LCOV - code coverage report
Current view: top level - lib/blob - blobstore.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 4090 5122 79.9 %
Date: 2024-11-05 10:06:02 Functions: 339 361 93.9 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2017 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  *   Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  */
       6             : 
       7             : #include "spdk/stdinc.h"
       8             : 
       9             : #include "spdk/blob.h"
      10             : #include "spdk/crc32.h"
      11             : #include "spdk/env.h"
      12             : #include "spdk/queue.h"
      13             : #include "spdk/thread.h"
      14             : #include "spdk/bit_array.h"
      15             : #include "spdk/bit_pool.h"
      16             : #include "spdk/likely.h"
      17             : #include "spdk/util.h"
      18             : #include "spdk/string.h"
      19             : #include "spdk/trace.h"
      20             : 
      21             : #include "spdk_internal/assert.h"
      22             : #include "spdk_internal/trace_defs.h"
      23             : #include "spdk/log.h"
      24             : 
      25             : #include "blobstore.h"
      26             : 
      27             : #define BLOB_CRC32C_INITIAL    0xffffffffUL
      28             : 
      29             : static int bs_register_md_thread(struct spdk_blob_store *bs);
      30             : static int bs_unregister_md_thread(struct spdk_blob_store *bs);
      31             : static void blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
      32             : static void blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
      33             :                 uint64_t cluster, uint32_t extent, struct spdk_blob_md_page *page,
      34             :                 spdk_blob_op_complete cb_fn, void *cb_arg);
      35             : static void blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
      36             :                 uint32_t extent_page, struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
      37             : 
      38             : static int blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
      39             :                           uint16_t value_len, bool internal);
      40             : static int blob_get_xattr_value(struct spdk_blob *blob, const char *name,
      41             :                                 const void **value, size_t *value_len, bool internal);
      42             : static int blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal);
      43             : 
      44             : static void blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
      45             :                                    struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
      46             : static void blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg);
      47             : 
      48             : static void bs_shallow_copy_cluster_find_next(void *cb_arg);
      49             : 
      50             : /*
      51             :  * External snapshots require a channel per thread per esnap bdev.  The tree
      52             :  * is populated lazily as blob IOs are handled by the back_bs_dev. When this
      53             :  * channel is destroyed, all the channels in the tree are destroyed.
      54             :  */
      55             : 
      56             : struct blob_esnap_channel {
      57             :         RB_ENTRY(blob_esnap_channel)    node;
      58             :         spdk_blob_id                    blob_id;
      59             :         struct spdk_io_channel          *channel;
      60             : };
      61             : 
      62             : static int blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2);
      63             : static void blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
      64             :                 spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
      65             : static void blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch);
      66             : static void blob_set_back_bs_dev_frozen(void *_ctx, int bserrno);
      67       10225 : RB_GENERATE_STATIC(blob_esnap_channel_tree, blob_esnap_channel, node, blob_esnap_channel_compare)
      68             : 
      69             : static inline bool
      70       49596 : blob_is_esnap_clone(const struct spdk_blob *blob)
      71             : {
      72       49596 :         assert(blob != NULL);
      73       49596 :         return !!(blob->invalid_flags & SPDK_BLOB_EXTERNAL_SNAPSHOT);
      74             : }
      75             : 
      76             : static int
      77        2297 : blob_id_cmp(struct spdk_blob *blob1, struct spdk_blob *blob2)
      78             : {
      79        2297 :         assert(blob1 != NULL && blob2 != NULL);
      80        2297 :         return (blob1->id < blob2->id ? -1 : blob1->id > blob2->id);
      81             : }
      82             : 
      83       15017 : RB_GENERATE_STATIC(spdk_blob_tree, spdk_blob, link, blob_id_cmp);
      84             : 
      85             : static void
      86       37053 : blob_verify_md_op(struct spdk_blob *blob)
      87             : {
      88       37053 :         assert(blob != NULL);
      89       37053 :         assert(spdk_get_thread() == blob->bs->md_thread);
      90       37053 :         assert(blob->state != SPDK_BLOB_STATE_LOADING);
      91       37053 : }
      92             : 
      93             : static struct spdk_blob_list *
      94        3828 : bs_get_snapshot_entry(struct spdk_blob_store *bs, spdk_blob_id blobid)
      95             : {
      96        3828 :         struct spdk_blob_list *snapshot_entry = NULL;
      97             : 
      98        4816 :         TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
      99        1764 :                 if (snapshot_entry->id == blobid) {
     100         776 :                         break;
     101             :                 }
     102             :         }
     103             : 
     104        3828 :         return snapshot_entry;
     105             : }
     106             : 
     107             : static void
     108        2906 : bs_claim_md_page(struct spdk_blob_store *bs, uint32_t page)
     109             : {
     110        2906 :         assert(spdk_spin_held(&bs->used_lock));
     111        2906 :         assert(page < spdk_bit_array_capacity(bs->used_md_pages));
     112        2906 :         assert(spdk_bit_array_get(bs->used_md_pages, page) == false);
     113             : 
     114        2906 :         spdk_bit_array_set(bs->used_md_pages, page);
     115        2906 : }
     116             : 
     117             : static void
     118        2202 : bs_release_md_page(struct spdk_blob_store *bs, uint32_t page)
     119             : {
     120        2202 :         assert(spdk_spin_held(&bs->used_lock));
     121        2202 :         assert(page < spdk_bit_array_capacity(bs->used_md_pages));
     122        2202 :         assert(spdk_bit_array_get(bs->used_md_pages, page) == true);
     123             : 
     124        2202 :         spdk_bit_array_clear(bs->used_md_pages, page);
     125        2202 : }
     126             : 
     127             : static uint32_t
     128        8228 : bs_claim_cluster(struct spdk_blob_store *bs)
     129             : {
     130             :         uint32_t cluster_num;
     131             : 
     132        8228 :         assert(spdk_spin_held(&bs->used_lock));
     133             : 
     134        8228 :         cluster_num = spdk_bit_pool_allocate_bit(bs->used_clusters);
     135        8228 :         if (cluster_num == UINT32_MAX) {
     136           0 :                 return UINT32_MAX;
     137             :         }
     138             : 
     139        8228 :         SPDK_DEBUGLOG(blob, "Claiming cluster %u\n", cluster_num);
     140        8228 :         bs->num_free_clusters--;
     141             : 
     142        8228 :         return cluster_num;
     143             : }
     144             : 
     145             : static void
     146        2407 : bs_release_cluster(struct spdk_blob_store *bs, uint32_t cluster_num)
     147             : {
     148        2407 :         assert(spdk_spin_held(&bs->used_lock));
     149        2407 :         assert(cluster_num < spdk_bit_pool_capacity(bs->used_clusters));
     150        2407 :         assert(spdk_bit_pool_is_allocated(bs->used_clusters, cluster_num) == true);
     151        2407 :         assert(bs->num_free_clusters < bs->total_clusters);
     152             : 
     153        2407 :         SPDK_DEBUGLOG(blob, "Releasing cluster %u\n", cluster_num);
     154             : 
     155        2407 :         spdk_bit_pool_free_bit(bs->used_clusters, cluster_num);
     156        2407 :         bs->num_free_clusters++;
     157        2407 : }
     158             : 
     159             : static int
     160        8228 : blob_insert_cluster(struct spdk_blob *blob, uint32_t cluster_num, uint64_t cluster)
     161             : {
     162        8228 :         uint64_t *cluster_lba = &blob->active.clusters[cluster_num];
     163             : 
     164        8228 :         blob_verify_md_op(blob);
     165             : 
     166        8228 :         if (*cluster_lba != 0) {
     167           4 :                 return -EEXIST;
     168             :         }
     169             : 
     170        8224 :         *cluster_lba = bs_cluster_to_lba(blob->bs, cluster);
     171        8224 :         blob->active.num_allocated_clusters++;
     172             : 
     173        8224 :         return 0;
     174             : }
     175             : 
     176             : static int
     177        8228 : bs_allocate_cluster(struct spdk_blob *blob, uint32_t cluster_num,
     178             :                     uint64_t *cluster, uint32_t *lowest_free_md_page, bool update_map)
     179             : {
     180        8228 :         uint32_t *extent_page = 0;
     181             : 
     182        8228 :         assert(spdk_spin_held(&blob->bs->used_lock));
     183             : 
     184        8228 :         *cluster = bs_claim_cluster(blob->bs);
     185        8228 :         if (*cluster == UINT32_MAX) {
     186             :                 /* No more free clusters. Cannot satisfy the request */
     187           0 :                 return -ENOSPC;
     188             :         }
     189             : 
     190        8228 :         if (blob->use_extent_table) {
     191        4172 :                 extent_page = bs_cluster_to_extent_page(blob, cluster_num);
     192        4172 :                 if (*extent_page == 0) {
     193             :                         /* Extent page shall never occupy md_page so start the search from 1 */
     194         730 :                         if (*lowest_free_md_page == 0) {
     195         728 :                                 *lowest_free_md_page = 1;
     196             :                         }
     197             :                         /* No extent_page is allocated for the cluster */
     198         730 :                         *lowest_free_md_page = spdk_bit_array_find_first_clear(blob->bs->used_md_pages,
     199             :                                                *lowest_free_md_page);
     200         730 :                         if (*lowest_free_md_page == UINT32_MAX) {
     201             :                                 /* No more free md pages. Cannot satisfy the request */
     202           0 :                                 bs_release_cluster(blob->bs, *cluster);
     203           0 :                                 return -ENOSPC;
     204             :                         }
     205         730 :                         bs_claim_md_page(blob->bs, *lowest_free_md_page);
     206             :                 }
     207             :         }
     208             : 
     209        8228 :         SPDK_DEBUGLOG(blob, "Claiming cluster %" PRIu64 " for blob 0x%" PRIx64 "\n", *cluster,
     210             :                       blob->id);
     211             : 
     212        8228 :         if (update_map) {
     213        7404 :                 blob_insert_cluster(blob, cluster_num, *cluster);
     214        7404 :                 if (blob->use_extent_table && *extent_page == 0) {
     215         644 :                         *extent_page = *lowest_free_md_page;
     216             :                 }
     217             :         }
     218             : 
     219        8228 :         return 0;
     220             : }
     221             : 
     222             : static void
     223        5582 : blob_xattrs_init(struct spdk_blob_xattr_opts *xattrs)
     224             : {
     225        5582 :         xattrs->count = 0;
     226        5582 :         xattrs->names = NULL;
     227        5582 :         xattrs->ctx = NULL;
     228        5582 :         xattrs->get_value = NULL;
     229        5582 : }
     230             : 
     231             : void
     232        3688 : spdk_blob_opts_init(struct spdk_blob_opts *opts, size_t opts_size)
     233             : {
     234        3688 :         if (!opts) {
     235           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
     236           0 :                 return;
     237             :         }
     238             : 
     239        3688 :         if (!opts_size) {
     240           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
     241           0 :                 return;
     242             :         }
     243             : 
     244        3688 :         memset(opts, 0, opts_size);
     245        3688 :         opts->opts_size = opts_size;
     246             : 
     247             : #define FIELD_OK(field) \
     248             :         offsetof(struct spdk_blob_opts, field) + sizeof(opts->field) <= opts_size
     249             : 
     250             : #define SET_FIELD(field, value) \
     251             :         if (FIELD_OK(field)) { \
     252             :                 opts->field = value; \
     253             :         } \
     254             : 
     255        3688 :         SET_FIELD(num_clusters, 0);
     256        3688 :         SET_FIELD(thin_provision, false);
     257        3688 :         SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
     258             : 
     259        3688 :         if (FIELD_OK(xattrs)) {
     260        3688 :                 blob_xattrs_init(&opts->xattrs);
     261             :         }
     262             : 
     263        3688 :         SET_FIELD(use_extent_table, true);
     264             : 
     265             : #undef FIELD_OK
     266             : #undef SET_FIELD
     267             : }
     268             : 
     269             : void
     270        3478 : spdk_blob_open_opts_init(struct spdk_blob_open_opts *opts, size_t opts_size)
     271             : {
     272        3478 :         if (!opts) {
     273           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
     274           0 :                 return;
     275             :         }
     276             : 
     277        3478 :         if (!opts_size) {
     278           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
     279           0 :                 return;
     280             :         }
     281             : 
     282        3478 :         memset(opts, 0, opts_size);
     283        3478 :         opts->opts_size = opts_size;
     284             : 
     285             : #define FIELD_OK(field) \
     286             :         offsetof(struct spdk_blob_open_opts, field) + sizeof(opts->field) <= opts_size
     287             : 
     288             : #define SET_FIELD(field, value) \
     289             :         if (FIELD_OK(field)) { \
     290             :                 opts->field = value; \
     291             :         } \
     292             : 
     293        3478 :         SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
     294             : 
     295             : #undef FIELD_OK
     296             : #undef SET_FILED
     297             : }
     298             : 
     299             : static struct spdk_blob *
     300        5368 : blob_alloc(struct spdk_blob_store *bs, spdk_blob_id id)
     301             : {
     302             :         struct spdk_blob *blob;
     303             : 
     304        5368 :         blob = calloc(1, sizeof(*blob));
     305        5368 :         if (!blob) {
     306           0 :                 return NULL;
     307             :         }
     308             : 
     309        5368 :         blob->id = id;
     310        5368 :         blob->bs = bs;
     311             : 
     312        5368 :         blob->parent_id = SPDK_BLOBID_INVALID;
     313             : 
     314        5368 :         blob->state = SPDK_BLOB_STATE_DIRTY;
     315        5368 :         blob->extent_rle_found = false;
     316        5368 :         blob->extent_table_found = false;
     317        5368 :         blob->active.num_pages = 1;
     318        5368 :         blob->active.pages = calloc(1, sizeof(*blob->active.pages));
     319        5368 :         if (!blob->active.pages) {
     320           0 :                 free(blob);
     321           0 :                 return NULL;
     322             :         }
     323             : 
     324        5368 :         blob->active.pages[0] = bs_blobid_to_page(id);
     325             : 
     326        5368 :         TAILQ_INIT(&blob->xattrs);
     327        5368 :         TAILQ_INIT(&blob->xattrs_internal);
     328        5368 :         TAILQ_INIT(&blob->pending_persists);
     329        5368 :         TAILQ_INIT(&blob->persists_to_complete);
     330             : 
     331        5368 :         return blob;
     332             : }
     333             : 
     334             : static void
     335       10736 : xattrs_free(struct spdk_xattr_tailq *xattrs)
     336             : {
     337             :         struct spdk_xattr       *xattr, *xattr_tmp;
     338             : 
     339       12502 :         TAILQ_FOREACH_SAFE(xattr, xattrs, link, xattr_tmp) {
     340        1766 :                 TAILQ_REMOVE(xattrs, xattr, link);
     341        1766 :                 free(xattr->name);
     342        1766 :                 free(xattr->value);
     343        1766 :                 free(xattr);
     344             :         }
     345       10736 : }
     346             : 
     347             : static void
     348        1116 : blob_unref_back_bs_dev(struct spdk_blob *blob)
     349             : {
     350        1116 :         blob->back_bs_dev->destroy(blob->back_bs_dev);
     351        1116 :         blob->back_bs_dev = NULL;
     352        1116 : }
     353             : 
     354             : static void
     355        5368 : blob_free(struct spdk_blob *blob)
     356             : {
     357        5368 :         assert(blob != NULL);
     358        5368 :         assert(TAILQ_EMPTY(&blob->pending_persists));
     359        5368 :         assert(TAILQ_EMPTY(&blob->persists_to_complete));
     360             : 
     361        5368 :         free(blob->active.extent_pages);
     362        5368 :         free(blob->clean.extent_pages);
     363        5368 :         free(blob->active.clusters);
     364        5368 :         free(blob->clean.clusters);
     365        5368 :         free(blob->active.pages);
     366        5368 :         free(blob->clean.pages);
     367             : 
     368        5368 :         xattrs_free(&blob->xattrs);
     369        5368 :         xattrs_free(&blob->xattrs_internal);
     370             : 
     371        5368 :         if (blob->back_bs_dev) {
     372        1088 :                 blob_unref_back_bs_dev(blob);
     373             :         }
     374             : 
     375        5368 :         free(blob);
     376        5368 : }
     377             : 
     378             : static void
     379         324 : blob_back_bs_destroy_esnap_done(void *ctx, struct spdk_blob *blob, int bserrno)
     380             : {
     381         324 :         struct spdk_bs_dev      *bs_dev = ctx;
     382             : 
     383         324 :         if (bserrno != 0) {
     384             :                 /*
     385             :                  * This is probably due to a memory allocation failure when creating the
     386             :                  * blob_esnap_destroy_ctx before iterating threads.
     387             :                  */
     388           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": Unable to destroy bs dev channels: error %d\n",
     389             :                             blob->id, bserrno);
     390           0 :                 assert(false);
     391             :         }
     392             : 
     393         324 :         if (bs_dev == NULL) {
     394             :                 /*
     395             :                  * This check exists to make scanbuild happy.
     396             :                  *
     397             :                  * blob->back_bs_dev for an esnap is NULL during the first iteration of blobs while
     398             :                  * the blobstore is being loaded. It could also be NULL if there was an error
     399             :                  * opening the esnap device. In each of these cases, no channels could have been
     400             :                  * created because back_bs_dev->create_channel() would have led to a NULL pointer
     401             :                  * deref.
     402             :                  */
     403           0 :                 assert(false);
     404             :                 return;
     405             :         }
     406             : 
     407         324 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": calling destroy on back_bs_dev\n", blob->id);
     408         324 :         bs_dev->destroy(bs_dev);
     409             : }
     410             : 
     411             : static void
     412         324 : blob_back_bs_destroy(struct spdk_blob *blob)
     413             : {
     414         324 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": preparing to destroy back_bs_dev\n",
     415             :                       blob->id);
     416             : 
     417         324 :         blob_esnap_destroy_bs_dev_channels(blob, false, blob_back_bs_destroy_esnap_done,
     418         324 :                                            blob->back_bs_dev);
     419         324 :         blob->back_bs_dev = NULL;
     420         324 : }
     421             : 
     422             : struct blob_parent {
     423             :         union {
     424             :                 struct {
     425             :                         spdk_blob_id id;
     426             :                         struct spdk_blob *blob;
     427             :                 } snapshot;
     428             : 
     429             :                 struct {
     430             :                         void *id;
     431             :                         uint32_t id_len;
     432             :                         struct spdk_bs_dev *back_bs_dev;
     433             :                 } esnap;
     434             :         } u;
     435             : };
     436             : 
     437             : typedef int (*set_parent_refs_cb)(struct spdk_blob *blob, struct blob_parent *parent);
     438             : 
     439             : struct set_bs_dev_ctx {
     440             :         struct spdk_blob        *blob;
     441             :         struct spdk_bs_dev      *back_bs_dev;
     442             : 
     443             :         /*
     444             :          * This callback is used during a set parent operation to change the references
     445             :          * to the parent of the blob.
     446             :          */
     447             :         set_parent_refs_cb      parent_refs_cb_fn;
     448             :         struct blob_parent      *parent_refs_cb_arg;
     449             : 
     450             :         spdk_blob_op_complete   cb_fn;
     451             :         void                    *cb_arg;
     452             :         int                     bserrno;
     453             : };
     454             : 
     455             : static void
     456          28 : blob_set_back_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
     457             :                      set_parent_refs_cb parent_refs_cb_fn, struct blob_parent *parent_refs_cb_arg,
     458             :                      spdk_blob_op_complete cb_fn, void *cb_arg)
     459             : {
     460             :         struct set_bs_dev_ctx   *ctx;
     461             : 
     462          28 :         ctx = calloc(1, sizeof(*ctx));
     463          28 :         if (ctx == NULL) {
     464           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": out of memory while setting back_bs_dev\n",
     465             :                             blob->id);
     466           0 :                 cb_fn(cb_arg, -ENOMEM);
     467           0 :                 return;
     468             :         }
     469             : 
     470          28 :         ctx->parent_refs_cb_fn = parent_refs_cb_fn;
     471          28 :         ctx->parent_refs_cb_arg = parent_refs_cb_arg;
     472          28 :         ctx->cb_fn = cb_fn;
     473          28 :         ctx->cb_arg = cb_arg;
     474          28 :         ctx->back_bs_dev = back_bs_dev;
     475          28 :         ctx->blob = blob;
     476             : 
     477          28 :         blob_freeze_io(blob, blob_set_back_bs_dev_frozen, ctx);
     478             : }
     479             : 
     480             : struct freeze_io_ctx {
     481             :         struct spdk_bs_cpl cpl;
     482             :         struct spdk_blob *blob;
     483             : };
     484             : 
     485             : static void
     486         534 : blob_io_sync(struct spdk_io_channel_iter *i)
     487             : {
     488         534 :         spdk_for_each_channel_continue(i, 0);
     489         534 : }
     490             : 
     491             : static void
     492         522 : blob_execute_queued_io(struct spdk_io_channel_iter *i)
     493             : {
     494         522 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
     495         522 :         struct spdk_bs_channel *ch = spdk_io_channel_get_ctx(_ch);
     496         522 :         struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
     497             :         struct spdk_bs_request_set      *set;
     498             :         struct spdk_bs_user_op_args     *args;
     499             :         spdk_bs_user_op_t *op, *tmp;
     500             : 
     501         526 :         TAILQ_FOREACH_SAFE(op, &ch->queued_io, link, tmp) {
     502           4 :                 set = (struct spdk_bs_request_set *)op;
     503           4 :                 args = &set->u.user_op;
     504             : 
     505           4 :                 if (args->blob == ctx->blob) {
     506           4 :                         TAILQ_REMOVE(&ch->queued_io, op, link);
     507           4 :                         bs_user_op_execute(op);
     508             :                 }
     509             :         }
     510             : 
     511         522 :         spdk_for_each_channel_continue(i, 0);
     512         522 : }
     513             : 
     514             : static void
     515        1024 : blob_io_cpl(struct spdk_io_channel_iter *i, int status)
     516             : {
     517        1024 :         struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
     518             : 
     519        1024 :         ctx->cpl.u.blob_basic.cb_fn(ctx->cpl.u.blob_basic.cb_arg, 0);
     520             : 
     521        1024 :         free(ctx);
     522        1024 : }
     523             : 
     524             : static void
     525         518 : blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
     526             : {
     527             :         struct freeze_io_ctx *ctx;
     528             : 
     529         518 :         blob_verify_md_op(blob);
     530             : 
     531         518 :         ctx = calloc(1, sizeof(*ctx));
     532         518 :         if (!ctx) {
     533           0 :                 cb_fn(cb_arg, -ENOMEM);
     534           0 :                 return;
     535             :         }
     536             : 
     537         518 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
     538         518 :         ctx->cpl.u.blob_basic.cb_fn = cb_fn;
     539         518 :         ctx->cpl.u.blob_basic.cb_arg = cb_arg;
     540         518 :         ctx->blob = blob;
     541             : 
     542             :         /* Freeze I/O on blob */
     543         518 :         blob->frozen_refcnt++;
     544             : 
     545         518 :         spdk_for_each_channel(blob->bs, blob_io_sync, ctx, blob_io_cpl);
     546             : }
     547             : 
     548             : static void
     549         506 : blob_unfreeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
     550             : {
     551             :         struct freeze_io_ctx *ctx;
     552             : 
     553         506 :         blob_verify_md_op(blob);
     554             : 
     555         506 :         ctx = calloc(1, sizeof(*ctx));
     556         506 :         if (!ctx) {
     557           0 :                 cb_fn(cb_arg, -ENOMEM);
     558           0 :                 return;
     559             :         }
     560             : 
     561         506 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
     562         506 :         ctx->cpl.u.blob_basic.cb_fn = cb_fn;
     563         506 :         ctx->cpl.u.blob_basic.cb_arg = cb_arg;
     564         506 :         ctx->blob = blob;
     565             : 
     566         506 :         assert(blob->frozen_refcnt > 0);
     567             : 
     568         506 :         blob->frozen_refcnt--;
     569             : 
     570         506 :         spdk_for_each_channel(blob->bs, blob_execute_queued_io, ctx, blob_io_cpl);
     571             : }
     572             : 
     573             : static int
     574        8480 : blob_mark_clean(struct spdk_blob *blob)
     575             : {
     576        8480 :         uint32_t *extent_pages = NULL;
     577        8480 :         uint64_t *clusters = NULL;
     578        8480 :         uint32_t *pages = NULL;
     579             : 
     580        8480 :         assert(blob != NULL);
     581             : 
     582        8480 :         if (blob->active.num_extent_pages) {
     583        2861 :                 assert(blob->active.extent_pages);
     584        2861 :                 extent_pages = calloc(blob->active.num_extent_pages, sizeof(*blob->active.extent_pages));
     585        2861 :                 if (!extent_pages) {
     586           0 :                         return -ENOMEM;
     587             :                 }
     588        2861 :                 memcpy(extent_pages, blob->active.extent_pages,
     589        2861 :                        blob->active.num_extent_pages * sizeof(*extent_pages));
     590             :         }
     591             : 
     592        8480 :         if (blob->active.num_clusters) {
     593        5952 :                 assert(blob->active.clusters);
     594        5952 :                 clusters = calloc(blob->active.num_clusters, sizeof(*blob->active.clusters));
     595        5952 :                 if (!clusters) {
     596           0 :                         free(extent_pages);
     597           0 :                         return -ENOMEM;
     598             :                 }
     599        5952 :                 memcpy(clusters, blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
     600             :         }
     601             : 
     602        8480 :         if (blob->active.num_pages) {
     603        6992 :                 assert(blob->active.pages);
     604        6992 :                 pages = calloc(blob->active.num_pages, sizeof(*blob->active.pages));
     605        6992 :                 if (!pages) {
     606           0 :                         free(extent_pages);
     607           0 :                         free(clusters);
     608           0 :                         return -ENOMEM;
     609             :                 }
     610        6992 :                 memcpy(pages, blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
     611             :         }
     612             : 
     613        8480 :         free(blob->clean.extent_pages);
     614        8480 :         free(blob->clean.clusters);
     615        8480 :         free(blob->clean.pages);
     616             : 
     617        8480 :         blob->clean.num_extent_pages = blob->active.num_extent_pages;
     618        8480 :         blob->clean.extent_pages = blob->active.extent_pages;
     619        8480 :         blob->clean.num_clusters = blob->active.num_clusters;
     620        8480 :         blob->clean.clusters = blob->active.clusters;
     621        8480 :         blob->clean.num_allocated_clusters = blob->active.num_allocated_clusters;
     622        8480 :         blob->clean.num_pages = blob->active.num_pages;
     623        8480 :         blob->clean.pages = blob->active.pages;
     624             : 
     625        8480 :         blob->active.extent_pages = extent_pages;
     626        8480 :         blob->active.clusters = clusters;
     627        8480 :         blob->active.pages = pages;
     628             : 
     629             :         /* If the metadata was dirtied again while the metadata was being written to disk,
     630             :          *  we do not want to revert the DIRTY state back to CLEAN here.
     631             :          */
     632        8480 :         if (blob->state == SPDK_BLOB_STATE_LOADING) {
     633        3410 :                 blob->state = SPDK_BLOB_STATE_CLEAN;
     634             :         }
     635             : 
     636        8480 :         return 0;
     637             : }
     638             : 
     639             : static int
     640        1284 : blob_deserialize_xattr(struct spdk_blob *blob,
     641             :                        struct spdk_blob_md_descriptor_xattr *desc_xattr, bool internal)
     642             : {
     643             :         struct spdk_xattr                       *xattr;
     644             : 
     645        1284 :         if (desc_xattr->length != sizeof(desc_xattr->name_length) +
     646             :             sizeof(desc_xattr->value_length) +
     647        1284 :             desc_xattr->name_length + desc_xattr->value_length) {
     648           0 :                 return -EINVAL;
     649             :         }
     650             : 
     651        1284 :         xattr = calloc(1, sizeof(*xattr));
     652        1284 :         if (xattr == NULL) {
     653           0 :                 return -ENOMEM;
     654             :         }
     655             : 
     656        1284 :         xattr->name = malloc(desc_xattr->name_length + 1);
     657        1284 :         if (xattr->name == NULL) {
     658           0 :                 free(xattr);
     659           0 :                 return -ENOMEM;
     660             :         }
     661             : 
     662        1284 :         xattr->value = malloc(desc_xattr->value_length);
     663        1284 :         if (xattr->value == NULL) {
     664           0 :                 free(xattr->name);
     665           0 :                 free(xattr);
     666           0 :                 return -ENOMEM;
     667             :         }
     668             : 
     669        1284 :         memcpy(xattr->name, desc_xattr->name, desc_xattr->name_length);
     670        1284 :         xattr->name[desc_xattr->name_length] = '\0';
     671        1284 :         xattr->value_len = desc_xattr->value_length;
     672        1284 :         memcpy(xattr->value,
     673        1284 :                (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
     674        1284 :                desc_xattr->value_length);
     675             : 
     676        1284 :         TAILQ_INSERT_TAIL(internal ? &blob->xattrs_internal : &blob->xattrs, xattr, link);
     677             : 
     678        1284 :         return 0;
     679             : }
     680             : 
     681             : 
     682             : static int
     683        4590 : blob_parse_page(const struct spdk_blob_md_page *page, struct spdk_blob *blob)
     684             : {
     685             :         struct spdk_blob_md_descriptor *desc;
     686        4590 :         size_t  cur_desc = 0;
     687             :         void *tmp;
     688             : 
     689        4590 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
     690       13480 :         while (cur_desc < sizeof(page->descriptors)) {
     691       13480 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
     692        4542 :                         if (desc->length == 0) {
     693             :                                 /* If padding and length are 0, this terminates the page */
     694        4542 :                                 break;
     695             :                         }
     696        8938 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
     697             :                         struct spdk_blob_md_descriptor_flags    *desc_flags;
     698             : 
     699        3442 :                         desc_flags = (struct spdk_blob_md_descriptor_flags *)desc;
     700             : 
     701        3442 :                         if (desc_flags->length != sizeof(*desc_flags) - sizeof(*desc)) {
     702           0 :                                 return -EINVAL;
     703             :                         }
     704             : 
     705        3442 :                         if ((desc_flags->invalid_flags | SPDK_BLOB_INVALID_FLAGS_MASK) !=
     706             :                             SPDK_BLOB_INVALID_FLAGS_MASK) {
     707           8 :                                 return -EINVAL;
     708             :                         }
     709             : 
     710        3434 :                         if ((desc_flags->data_ro_flags | SPDK_BLOB_DATA_RO_FLAGS_MASK) !=
     711             :                             SPDK_BLOB_DATA_RO_FLAGS_MASK) {
     712          12 :                                 blob->data_ro = true;
     713          12 :                                 blob->md_ro = true;
     714             :                         }
     715             : 
     716        3434 :                         if ((desc_flags->md_ro_flags | SPDK_BLOB_MD_RO_FLAGS_MASK) !=
     717             :                             SPDK_BLOB_MD_RO_FLAGS_MASK) {
     718          12 :                                 blob->md_ro = true;
     719             :                         }
     720             : 
     721        3434 :                         if ((desc_flags->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
     722         566 :                                 blob->data_ro = true;
     723         566 :                                 blob->md_ro = true;
     724             :                         }
     725             : 
     726        3434 :                         blob->invalid_flags = desc_flags->invalid_flags;
     727        3434 :                         blob->data_ro_flags = desc_flags->data_ro_flags;
     728        3434 :                         blob->md_ro_flags = desc_flags->md_ro_flags;
     729             : 
     730        5496 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
     731             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
     732             :                         unsigned int                            i, j;
     733        1396 :                         unsigned int                            cluster_count = blob->active.num_clusters;
     734             : 
     735        1396 :                         if (blob->extent_table_found) {
     736             :                                 /* Extent Table already present in the md,
     737             :                                  * both descriptors should never be at the same time. */
     738           0 :                                 return -EINVAL;
     739             :                         }
     740        1396 :                         blob->extent_rle_found = true;
     741             : 
     742        1396 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
     743             : 
     744        1396 :                         if (desc_extent_rle->length == 0 ||
     745        1396 :                             (desc_extent_rle->length % sizeof(desc_extent_rle->extents[0]) != 0)) {
     746           0 :                                 return -EINVAL;
     747             :                         }
     748             : 
     749        2968 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
     750       21240 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
     751       19668 :                                         if (desc_extent_rle->extents[i].cluster_idx != 0) {
     752        6692 :                                                 if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters,
     753        6692 :                                                                                 desc_extent_rle->extents[i].cluster_idx + j)) {
     754           0 :                                                         return -EINVAL;
     755             :                                                 }
     756             :                                         }
     757       19668 :                                         cluster_count++;
     758             :                                 }
     759             :                         }
     760             : 
     761        1396 :                         if (cluster_count == 0) {
     762           0 :                                 return -EINVAL;
     763             :                         }
     764        1396 :                         tmp = realloc(blob->active.clusters, cluster_count * sizeof(*blob->active.clusters));
     765        1396 :                         if (tmp == NULL) {
     766           0 :                                 return -ENOMEM;
     767             :                         }
     768        1396 :                         blob->active.clusters = tmp;
     769        1396 :                         blob->active.cluster_array_size = cluster_count;
     770             : 
     771        2968 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
     772       21240 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
     773       19668 :                                         if (desc_extent_rle->extents[i].cluster_idx != 0) {
     774        6692 :                                                 blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
     775        6692 :                                                                 desc_extent_rle->extents[i].cluster_idx + j);
     776        6692 :                                                 blob->active.num_allocated_clusters++;
     777       12976 :                                         } else if (spdk_blob_is_thin_provisioned(blob)) {
     778       12976 :                                                 blob->active.clusters[blob->active.num_clusters++] = 0;
     779             :                                         } else {
     780           0 :                                                 return -EINVAL;
     781             :                                         }
     782             :                                 }
     783             :                         }
     784        4100 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
     785             :                         struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
     786        1768 :                         uint32_t num_extent_pages = blob->active.num_extent_pages;
     787             :                         uint32_t i, j;
     788             :                         size_t extent_pages_length;
     789             : 
     790        1768 :                         desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
     791        1768 :                         extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
     792             : 
     793        1768 :                         if (blob->extent_rle_found) {
     794             :                                 /* This means that Extent RLE is present in MD,
     795             :                                  * both should never be at the same time. */
     796           0 :                                 return -EINVAL;
     797        1768 :                         } else if (blob->extent_table_found &&
     798           0 :                                    desc_extent_table->num_clusters != blob->remaining_clusters_in_et) {
     799             :                                 /* Number of clusters in this ET does not match number
     800             :                                  * from previously read EXTENT_TABLE. */
     801           0 :                                 return -EINVAL;
     802             :                         }
     803             : 
     804        1768 :                         if (desc_extent_table->length == 0 ||
     805        1768 :                             (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
     806           0 :                                 return -EINVAL;
     807             :                         }
     808             : 
     809        1768 :                         blob->extent_table_found = true;
     810             : 
     811        3246 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
     812        1478 :                                 num_extent_pages += desc_extent_table->extent_page[i].num_pages;
     813             :                         }
     814             : 
     815        1768 :                         if (num_extent_pages > 0) {
     816        1462 :                                 tmp = realloc(blob->active.extent_pages, num_extent_pages * sizeof(uint32_t));
     817        1462 :                                 if (tmp == NULL) {
     818           0 :                                         return -ENOMEM;
     819             :                                 }
     820        1462 :                                 blob->active.extent_pages = tmp;
     821             :                         }
     822        1768 :                         blob->active.extent_pages_array_size = num_extent_pages;
     823             : 
     824        1768 :                         blob->remaining_clusters_in_et = desc_extent_table->num_clusters;
     825             : 
     826             :                         /* Extent table entries contain md page numbers for extent pages.
     827             :                          * Zeroes represent unallocated extent pages, those are run-length-encoded.
     828             :                          */
     829        3246 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
     830        1478 :                                 if (desc_extent_table->extent_page[i].page_idx != 0) {
     831        1054 :                                         assert(desc_extent_table->extent_page[i].num_pages == 1);
     832        2108 :                                         blob->active.extent_pages[blob->active.num_extent_pages++] =
     833        1054 :                                                 desc_extent_table->extent_page[i].page_idx;
     834         424 :                                 } else if (spdk_blob_is_thin_provisioned(blob)) {
     835         848 :                                         for (j = 0; j < desc_extent_table->extent_page[i].num_pages; j++) {
     836         424 :                                                 blob->active.extent_pages[blob->active.num_extent_pages++] = 0;
     837             :                                         }
     838             :                                 } else {
     839           0 :                                         return -EINVAL;
     840             :                                 }
     841             :                         }
     842        2332 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
     843             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
     844             :                         unsigned int                                    i;
     845        1048 :                         unsigned int                                    cluster_count = 0;
     846             :                         size_t                                          cluster_idx_length;
     847             : 
     848        1048 :                         if (blob->extent_rle_found) {
     849             :                                 /* This means that Extent RLE is present in MD,
     850             :                                  * both should never be at the same time. */
     851           0 :                                 return -EINVAL;
     852             :                         }
     853             : 
     854        1048 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
     855        1048 :                         cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
     856             : 
     857        1048 :                         if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
     858        1048 :                             (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
     859           0 :                                 return -EINVAL;
     860             :                         }
     861             : 
     862       16344 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
     863       15296 :                                 if (desc_extent->cluster_idx[i] != 0) {
     864        6962 :                                         if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters, desc_extent->cluster_idx[i])) {
     865           0 :                                                 return -EINVAL;
     866             :                                         }
     867             :                                 }
     868       15296 :                                 cluster_count++;
     869             :                         }
     870             : 
     871        1048 :                         if (cluster_count == 0) {
     872           0 :                                 return -EINVAL;
     873             :                         }
     874             : 
     875             :                         /* When reading extent pages sequentially starting cluster idx should match
     876             :                          * current size of a blob.
     877             :                          * If changed to batch reading, this check shall be removed. */
     878        1048 :                         if (desc_extent->start_cluster_idx != blob->active.num_clusters) {
     879           0 :                                 return -EINVAL;
     880             :                         }
     881             : 
     882        1048 :                         tmp = realloc(blob->active.clusters,
     883        1048 :                                       (cluster_count + blob->active.num_clusters) * sizeof(*blob->active.clusters));
     884        1048 :                         if (tmp == NULL) {
     885           0 :                                 return -ENOMEM;
     886             :                         }
     887        1048 :                         blob->active.clusters = tmp;
     888        1048 :                         blob->active.cluster_array_size = (cluster_count + blob->active.num_clusters);
     889             : 
     890       16344 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
     891       15296 :                                 if (desc_extent->cluster_idx[i] != 0) {
     892        6962 :                                         blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
     893             :                                                         desc_extent->cluster_idx[i]);
     894        6962 :                                         blob->active.num_allocated_clusters++;
     895        8334 :                                 } else if (spdk_blob_is_thin_provisioned(blob)) {
     896        8334 :                                         blob->active.clusters[blob->active.num_clusters++] = 0;
     897             :                                 } else {
     898           0 :                                         return -EINVAL;
     899             :                                 }
     900             :                         }
     901        1048 :                         assert(desc_extent->start_cluster_idx + cluster_count == blob->active.num_clusters);
     902        1048 :                         assert(blob->remaining_clusters_in_et >= cluster_count);
     903        1048 :                         blob->remaining_clusters_in_et -= cluster_count;
     904        1284 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
     905             :                         int rc;
     906             : 
     907         394 :                         rc = blob_deserialize_xattr(blob,
     908             :                                                     (struct spdk_blob_md_descriptor_xattr *) desc, false);
     909         394 :                         if (rc != 0) {
     910           0 :                                 return rc;
     911             :                         }
     912         890 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
     913             :                         int rc;
     914             : 
     915         890 :                         rc = blob_deserialize_xattr(blob,
     916             :                                                     (struct spdk_blob_md_descriptor_xattr *) desc, true);
     917         890 :                         if (rc != 0) {
     918           0 :                                 return rc;
     919             :                         }
     920             :                 } else {
     921             :                         /* Unrecognized descriptor type.  Do not fail - just continue to the
     922             :                          *  next descriptor.  If this descriptor is associated with some feature
     923             :                          *  defined in a newer version of blobstore, that version of blobstore
     924             :                          *  should create and set an associated feature flag to specify if this
     925             :                          *  blob can be loaded or not.
     926             :                          */
     927             :                 }
     928             : 
     929             :                 /* Advance to the next descriptor */
     930        8930 :                 cur_desc += sizeof(*desc) + desc->length;
     931        8930 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
     932          40 :                         break;
     933             :                 }
     934        8890 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
     935             :         }
     936             : 
     937        4582 :         return 0;
     938             : }
     939             : 
     940             : static bool bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page);
     941             : 
     942             : static int
     943        1048 : blob_parse_extent_page(struct spdk_blob_md_page *extent_page, struct spdk_blob *blob)
     944             : {
     945        1048 :         assert(blob != NULL);
     946        1048 :         assert(blob->state == SPDK_BLOB_STATE_LOADING);
     947             : 
     948        1048 :         if (bs_load_cur_extent_page_valid(extent_page) == false) {
     949           0 :                 return -ENOENT;
     950             :         }
     951             : 
     952        1048 :         return blob_parse_page(extent_page, blob);
     953             : }
     954             : 
     955             : static int
     956        3446 : blob_parse(const struct spdk_blob_md_page *pages, uint32_t page_count,
     957             :            struct spdk_blob *blob)
     958             : {
     959             :         const struct spdk_blob_md_page *page;
     960             :         uint32_t i;
     961             :         int rc;
     962             :         void *tmp;
     963             : 
     964        3446 :         assert(page_count > 0);
     965        3446 :         assert(pages[0].sequence_num == 0);
     966        3446 :         assert(blob != NULL);
     967        3446 :         assert(blob->state == SPDK_BLOB_STATE_LOADING);
     968        3446 :         assert(blob->active.clusters == NULL);
     969             : 
     970             :         /* The blobid provided doesn't match what's in the MD, this can
     971             :          * happen for example if a bogus blobid is passed in through open.
     972             :          */
     973        3446 :         if (blob->id != pages[0].id) {
     974           4 :                 SPDK_ERRLOG("Blobid (0x%" PRIx64 ") doesn't match what's in metadata "
     975             :                             "(0x%" PRIx64 ")\n", blob->id, pages[0].id);
     976           4 :                 return -ENOENT;
     977             :         }
     978             : 
     979        3442 :         tmp = realloc(blob->active.pages, page_count * sizeof(*blob->active.pages));
     980        3442 :         if (!tmp) {
     981           0 :                 return -ENOMEM;
     982             :         }
     983        3442 :         blob->active.pages = tmp;
     984             : 
     985        3442 :         blob->active.pages[0] = pages[0].id;
     986             : 
     987        3542 :         for (i = 1; i < page_count; i++) {
     988         100 :                 assert(spdk_bit_array_get(blob->bs->used_md_pages, pages[i - 1].next));
     989         100 :                 blob->active.pages[i] = pages[i - 1].next;
     990             :         }
     991        3442 :         blob->active.num_pages = page_count;
     992             : 
     993        6976 :         for (i = 0; i < page_count; i++) {
     994        3542 :                 page = &pages[i];
     995             : 
     996        3542 :                 assert(page->id == blob->id);
     997        3542 :                 assert(page->sequence_num == i);
     998             : 
     999        3542 :                 rc = blob_parse_page(page, blob);
    1000        3542 :                 if (rc != 0) {
    1001           8 :                         return rc;
    1002             :                 }
    1003             :         }
    1004             : 
    1005        3434 :         return 0;
    1006             : }
    1007             : 
    1008             : static int
    1009        4378 : blob_serialize_add_page(const struct spdk_blob *blob,
    1010             :                         struct spdk_blob_md_page **pages,
    1011             :                         uint32_t *page_count,
    1012             :                         struct spdk_blob_md_page **last_page)
    1013             : {
    1014             :         struct spdk_blob_md_page *page, *tmp_pages;
    1015             : 
    1016        4378 :         assert(pages != NULL);
    1017        4378 :         assert(page_count != NULL);
    1018             : 
    1019        4378 :         *last_page = NULL;
    1020        4378 :         if (*page_count == 0) {
    1021        4290 :                 assert(*pages == NULL);
    1022        4290 :                 *pages = spdk_malloc(SPDK_BS_PAGE_SIZE, 0,
    1023             :                                      NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    1024        4290 :                 if (*pages == NULL) {
    1025           0 :                         return -ENOMEM;
    1026             :                 }
    1027        4290 :                 *page_count = 1;
    1028             :         } else {
    1029          88 :                 assert(*pages != NULL);
    1030          88 :                 tmp_pages = spdk_realloc(*pages, SPDK_BS_PAGE_SIZE * (*page_count + 1), 0);
    1031          88 :                 if (tmp_pages == NULL) {
    1032           0 :                         return -ENOMEM;
    1033             :                 }
    1034          88 :                 (*page_count)++;
    1035          88 :                 *pages = tmp_pages;
    1036             :         }
    1037             : 
    1038        4378 :         page = &(*pages)[*page_count - 1];
    1039        4378 :         memset(page, 0, sizeof(*page));
    1040        4378 :         page->id = blob->id;
    1041        4378 :         page->sequence_num = *page_count - 1;
    1042        4378 :         page->next = SPDK_INVALID_MD_PAGE;
    1043        4378 :         *last_page = page;
    1044             : 
    1045        4378 :         return 0;
    1046             : }
    1047             : 
    1048             : /* Transform the in-memory representation 'xattr' into an on-disk xattr descriptor.
    1049             :  * Update required_sz on both success and failure.
    1050             :  *
    1051             :  */
    1052             : static int
    1053        1787 : blob_serialize_xattr(const struct spdk_xattr *xattr,
    1054             :                      uint8_t *buf, size_t buf_sz,
    1055             :                      size_t *required_sz, bool internal)
    1056             : {
    1057             :         struct spdk_blob_md_descriptor_xattr    *desc;
    1058             : 
    1059        3574 :         *required_sz = sizeof(struct spdk_blob_md_descriptor_xattr) +
    1060        1787 :                        strlen(xattr->name) +
    1061        1787 :                        xattr->value_len;
    1062             : 
    1063        1787 :         if (buf_sz < *required_sz) {
    1064          48 :                 return -1;
    1065             :         }
    1066             : 
    1067        1739 :         desc = (struct spdk_blob_md_descriptor_xattr *)buf;
    1068             : 
    1069        1739 :         desc->type = internal ? SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL : SPDK_MD_DESCRIPTOR_TYPE_XATTR;
    1070        1739 :         desc->length = sizeof(desc->name_length) +
    1071             :                        sizeof(desc->value_length) +
    1072        1739 :                        strlen(xattr->name) +
    1073        1739 :                        xattr->value_len;
    1074        1739 :         desc->name_length = strlen(xattr->name);
    1075        1739 :         desc->value_length = xattr->value_len;
    1076             : 
    1077        1739 :         memcpy(desc->name, xattr->name, desc->name_length);
    1078        1739 :         memcpy((void *)((uintptr_t)desc->name + desc->name_length),
    1079        1739 :                xattr->value,
    1080        1739 :                desc->value_length);
    1081             : 
    1082        1739 :         return 0;
    1083             : }
    1084             : 
    1085             : static void
    1086        1697 : blob_serialize_extent_table_entry(const struct spdk_blob *blob,
    1087             :                                   uint64_t start_ep, uint64_t *next_ep,
    1088             :                                   uint8_t **buf, size_t *remaining_sz)
    1089             : {
    1090             :         struct spdk_blob_md_descriptor_extent_table *desc;
    1091             :         size_t cur_sz;
    1092             :         uint64_t i, et_idx;
    1093             :         uint32_t extent_page, ep_len;
    1094             : 
    1095             :         /* The buffer must have room for at least num_clusters entry */
    1096        1697 :         cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc->num_clusters);
    1097        1697 :         if (*remaining_sz < cur_sz) {
    1098          20 :                 *next_ep = start_ep;
    1099          20 :                 return;
    1100             :         }
    1101             : 
    1102        1677 :         desc = (struct spdk_blob_md_descriptor_extent_table *)*buf;
    1103        1677 :         desc->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE;
    1104             : 
    1105        1677 :         desc->num_clusters = blob->active.num_clusters;
    1106             : 
    1107        1677 :         ep_len = 1;
    1108        1677 :         et_idx = 0;
    1109        4260 :         for (i = start_ep; i < blob->active.num_extent_pages; i++) {
    1110        2583 :                 if (*remaining_sz < cur_sz  + sizeof(desc->extent_page[0])) {
    1111             :                         /* If we ran out of buffer space, return */
    1112           0 :                         break;
    1113             :                 }
    1114             : 
    1115        2583 :                 extent_page = blob->active.extent_pages[i];
    1116             :                 /* Verify that next extent_page is unallocated */
    1117        2583 :                 if (extent_page == 0 &&
    1118        1522 :                     (i + 1 < blob->active.num_extent_pages && blob->active.extent_pages[i + 1] == 0)) {
    1119        1078 :                         ep_len++;
    1120        1078 :                         continue;
    1121             :                 }
    1122        1505 :                 desc->extent_page[et_idx].page_idx = extent_page;
    1123        1505 :                 desc->extent_page[et_idx].num_pages = ep_len;
    1124        1505 :                 et_idx++;
    1125             : 
    1126        1505 :                 ep_len = 1;
    1127        1505 :                 cur_sz += sizeof(desc->extent_page[et_idx]);
    1128             :         }
    1129        1677 :         *next_ep = i;
    1130             : 
    1131        1677 :         desc->length = sizeof(desc->num_clusters) + sizeof(desc->extent_page[0]) * et_idx;
    1132        1677 :         *remaining_sz -= sizeof(struct spdk_blob_md_descriptor) + desc->length;
    1133        1677 :         *buf += sizeof(struct spdk_blob_md_descriptor) + desc->length;
    1134             : }
    1135             : 
    1136             : static int
    1137        1679 : blob_serialize_extent_table(const struct spdk_blob *blob,
    1138             :                             struct spdk_blob_md_page **pages,
    1139             :                             struct spdk_blob_md_page *cur_page,
    1140             :                             uint32_t *page_count, uint8_t **buf,
    1141             :                             size_t *remaining_sz)
    1142             : {
    1143        1679 :         uint64_t                                last_extent_page;
    1144             :         int                                     rc;
    1145             : 
    1146        1679 :         last_extent_page = 0;
    1147             :         /* At least single extent table entry has to be always persisted.
    1148             :          * Such case occurs with num_extent_pages == 0. */
    1149        1697 :         while (last_extent_page <= blob->active.num_extent_pages) {
    1150        1697 :                 blob_serialize_extent_table_entry(blob, last_extent_page, &last_extent_page, buf,
    1151             :                                                   remaining_sz);
    1152             : 
    1153        1697 :                 if (last_extent_page == blob->active.num_extent_pages) {
    1154        1679 :                         break;
    1155             :                 }
    1156             : 
    1157          18 :                 rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1158          18 :                 if (rc < 0) {
    1159           0 :                         return rc;
    1160             :                 }
    1161             : 
    1162          18 :                 *buf = (uint8_t *)cur_page->descriptors;
    1163          18 :                 *remaining_sz = sizeof(cur_page->descriptors);
    1164             :         }
    1165             : 
    1166        1679 :         return 0;
    1167             : }
    1168             : 
    1169             : static void
    1170        1751 : blob_serialize_extent_rle(const struct spdk_blob *blob,
    1171             :                           uint64_t start_cluster, uint64_t *next_cluster,
    1172             :                           uint8_t **buf, size_t *buf_sz)
    1173             : {
    1174             :         struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
    1175             :         size_t cur_sz;
    1176             :         uint64_t i, extent_idx;
    1177             :         uint64_t lba, lba_per_cluster, lba_count;
    1178             : 
    1179             :         /* The buffer must have room for at least one extent */
    1180        1751 :         cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc_extent_rle->extents[0]);
    1181        1751 :         if (*buf_sz < cur_sz) {
    1182          18 :                 *next_cluster = start_cluster;
    1183          18 :                 return;
    1184             :         }
    1185             : 
    1186        1733 :         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)*buf;
    1187        1733 :         desc_extent_rle->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE;
    1188             : 
    1189        1733 :         lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
    1190             :         /* Assert for scan-build false positive */
    1191        1733 :         assert(lba_per_cluster > 0);
    1192             : 
    1193        1733 :         lba = blob->active.clusters[start_cluster];
    1194        1733 :         lba_count = lba_per_cluster;
    1195        1733 :         extent_idx = 0;
    1196      810464 :         for (i = start_cluster + 1; i < blob->active.num_clusters; i++) {
    1197      808735 :                 if ((lba + lba_count) == blob->active.clusters[i] && lba != 0) {
    1198             :                         /* Run-length encode sequential non-zero LBA */
    1199        7276 :                         lba_count += lba_per_cluster;
    1200        7276 :                         continue;
    1201      801459 :                 } else if (lba == 0 && blob->active.clusters[i] == 0) {
    1202             :                         /* Run-length encode unallocated clusters */
    1203      800266 :                         lba_count += lba_per_cluster;
    1204      800266 :                         continue;
    1205             :                 }
    1206        1193 :                 desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
    1207        1193 :                 desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
    1208        1193 :                 extent_idx++;
    1209             : 
    1210        1193 :                 cur_sz += sizeof(desc_extent_rle->extents[extent_idx]);
    1211             : 
    1212        1193 :                 if (*buf_sz < cur_sz) {
    1213             :                         /* If we ran out of buffer space, return */
    1214           4 :                         *next_cluster = i;
    1215           4 :                         break;
    1216             :                 }
    1217             : 
    1218        1189 :                 lba = blob->active.clusters[i];
    1219        1189 :                 lba_count = lba_per_cluster;
    1220             :         }
    1221             : 
    1222        1733 :         if (*buf_sz >= cur_sz) {
    1223        1729 :                 desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
    1224        1729 :                 desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
    1225        1729 :                 extent_idx++;
    1226             : 
    1227        1729 :                 *next_cluster = blob->active.num_clusters;
    1228             :         }
    1229             : 
    1230        1733 :         desc_extent_rle->length = sizeof(desc_extent_rle->extents[0]) * extent_idx;
    1231        1733 :         *buf_sz -= sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
    1232        1733 :         *buf += sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
    1233             : }
    1234             : 
    1235             : static int
    1236        1943 : blob_serialize_extents_rle(const struct spdk_blob *blob,
    1237             :                            struct spdk_blob_md_page **pages,
    1238             :                            struct spdk_blob_md_page *cur_page,
    1239             :                            uint32_t *page_count, uint8_t **buf,
    1240             :                            size_t *remaining_sz)
    1241             : {
    1242        1943 :         uint64_t                                last_cluster;
    1243             :         int                                     rc;
    1244             : 
    1245        1943 :         last_cluster = 0;
    1246        1965 :         while (last_cluster < blob->active.num_clusters) {
    1247        1751 :                 blob_serialize_extent_rle(blob, last_cluster, &last_cluster, buf, remaining_sz);
    1248             : 
    1249        1751 :                 if (last_cluster == blob->active.num_clusters) {
    1250        1729 :                         break;
    1251             :                 }
    1252             : 
    1253          22 :                 rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1254          22 :                 if (rc < 0) {
    1255           0 :                         return rc;
    1256             :                 }
    1257             : 
    1258          22 :                 *buf = (uint8_t *)cur_page->descriptors;
    1259          22 :                 *remaining_sz = sizeof(cur_page->descriptors);
    1260             :         }
    1261             : 
    1262        1943 :         return 0;
    1263             : }
    1264             : 
    1265             : static void
    1266        1106 : blob_serialize_extent_page(const struct spdk_blob *blob,
    1267             :                            uint64_t cluster, struct spdk_blob_md_page *page)
    1268             : {
    1269             :         struct spdk_blob_md_descriptor_extent_page *desc_extent;
    1270             :         uint64_t i, extent_idx;
    1271             :         uint64_t lba, lba_per_cluster;
    1272        1106 :         uint64_t start_cluster_idx = (cluster / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
    1273             : 
    1274        1106 :         desc_extent = (struct spdk_blob_md_descriptor_extent_page *) page->descriptors;
    1275        1106 :         desc_extent->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE;
    1276             : 
    1277        1106 :         lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
    1278             : 
    1279        1106 :         desc_extent->start_cluster_idx = start_cluster_idx;
    1280        1106 :         extent_idx = 0;
    1281       42418 :         for (i = start_cluster_idx; i < blob->active.num_clusters; i++) {
    1282       41378 :                 lba = blob->active.clusters[i];
    1283       41378 :                 desc_extent->cluster_idx[extent_idx++] = lba / lba_per_cluster;
    1284       41378 :                 if (extent_idx >= SPDK_EXTENTS_PER_EP) {
    1285          66 :                         break;
    1286             :                 }
    1287             :         }
    1288        1106 :         desc_extent->length = sizeof(desc_extent->start_cluster_idx) +
    1289             :                               sizeof(desc_extent->cluster_idx[0]) * extent_idx;
    1290        1106 : }
    1291             : 
    1292             : static void
    1293        3622 : blob_serialize_flags(const struct spdk_blob *blob,
    1294             :                      uint8_t *buf, size_t *buf_sz)
    1295             : {
    1296             :         struct spdk_blob_md_descriptor_flags *desc;
    1297             : 
    1298             :         /*
    1299             :          * Flags get serialized first, so we should always have room for the flags
    1300             :          *  descriptor.
    1301             :          */
    1302        3622 :         assert(*buf_sz >= sizeof(*desc));
    1303             : 
    1304        3622 :         desc = (struct spdk_blob_md_descriptor_flags *)buf;
    1305        3622 :         desc->type = SPDK_MD_DESCRIPTOR_TYPE_FLAGS;
    1306        3622 :         desc->length = sizeof(*desc) - sizeof(struct spdk_blob_md_descriptor);
    1307        3622 :         desc->invalid_flags = blob->invalid_flags;
    1308        3622 :         desc->data_ro_flags = blob->data_ro_flags;
    1309        3622 :         desc->md_ro_flags = blob->md_ro_flags;
    1310             : 
    1311        3622 :         *buf_sz -= sizeof(*desc);
    1312        3622 : }
    1313             : 
    1314             : static int
    1315        7244 : blob_serialize_xattrs(const struct spdk_blob *blob,
    1316             :                       const struct spdk_xattr_tailq *xattrs, bool internal,
    1317             :                       struct spdk_blob_md_page **pages,
    1318             :                       struct spdk_blob_md_page *cur_page,
    1319             :                       uint32_t *page_count, uint8_t **buf,
    1320             :                       size_t *remaining_sz)
    1321             : {
    1322             :         const struct spdk_xattr *xattr;
    1323             :         int     rc;
    1324             : 
    1325        8983 :         TAILQ_FOREACH(xattr, xattrs, link) {
    1326        1739 :                 size_t required_sz = 0;
    1327             : 
    1328        1739 :                 rc = blob_serialize_xattr(xattr,
    1329             :                                           *buf, *remaining_sz,
    1330             :                                           &required_sz, internal);
    1331        1739 :                 if (rc < 0) {
    1332             :                         /* Need to add a new page to the chain */
    1333          48 :                         rc = blob_serialize_add_page(blob, pages, page_count,
    1334             :                                                      &cur_page);
    1335          48 :                         if (rc < 0) {
    1336           0 :                                 spdk_free(*pages);
    1337           0 :                                 *pages = NULL;
    1338           0 :                                 *page_count = 0;
    1339           0 :                                 return rc;
    1340             :                         }
    1341             : 
    1342          48 :                         *buf = (uint8_t *)cur_page->descriptors;
    1343          48 :                         *remaining_sz = sizeof(cur_page->descriptors);
    1344             : 
    1345             :                         /* Try again */
    1346          48 :                         required_sz = 0;
    1347          48 :                         rc = blob_serialize_xattr(xattr,
    1348             :                                                   *buf, *remaining_sz,
    1349             :                                                   &required_sz, internal);
    1350             : 
    1351          48 :                         if (rc < 0) {
    1352           0 :                                 spdk_free(*pages);
    1353           0 :                                 *pages = NULL;
    1354           0 :                                 *page_count = 0;
    1355           0 :                                 return rc;
    1356             :                         }
    1357             :                 }
    1358             : 
    1359        1739 :                 *remaining_sz -= required_sz;
    1360        1739 :                 *buf += required_sz;
    1361             :         }
    1362             : 
    1363        7244 :         return 0;
    1364             : }
    1365             : 
    1366             : static int
    1367        3622 : blob_serialize(const struct spdk_blob *blob, struct spdk_blob_md_page **pages,
    1368             :                uint32_t *page_count)
    1369             : {
    1370        3622 :         struct spdk_blob_md_page                *cur_page;
    1371             :         int                                     rc;
    1372        3622 :         uint8_t                                 *buf;
    1373        3622 :         size_t                                  remaining_sz;
    1374             : 
    1375        3622 :         assert(pages != NULL);
    1376        3622 :         assert(page_count != NULL);
    1377        3622 :         assert(blob != NULL);
    1378        3622 :         assert(blob->state == SPDK_BLOB_STATE_DIRTY);
    1379             : 
    1380        3622 :         *pages = NULL;
    1381        3622 :         *page_count = 0;
    1382             : 
    1383             :         /* A blob always has at least 1 page, even if it has no descriptors */
    1384        3622 :         rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1385        3622 :         if (rc < 0) {
    1386           0 :                 return rc;
    1387             :         }
    1388             : 
    1389        3622 :         buf = (uint8_t *)cur_page->descriptors;
    1390        3622 :         remaining_sz = sizeof(cur_page->descriptors);
    1391             : 
    1392             :         /* Serialize flags */
    1393        3622 :         blob_serialize_flags(blob, buf, &remaining_sz);
    1394        3622 :         buf += sizeof(struct spdk_blob_md_descriptor_flags);
    1395             : 
    1396             :         /* Serialize xattrs */
    1397        3622 :         rc = blob_serialize_xattrs(blob, &blob->xattrs, false,
    1398             :                                    pages, cur_page, page_count, &buf, &remaining_sz);
    1399        3622 :         if (rc < 0) {
    1400           0 :                 return rc;
    1401             :         }
    1402             : 
    1403             :         /* Serialize internal xattrs */
    1404        3622 :         rc = blob_serialize_xattrs(blob, &blob->xattrs_internal, true,
    1405             :                                    pages, cur_page, page_count, &buf, &remaining_sz);
    1406        3622 :         if (rc < 0) {
    1407           0 :                 return rc;
    1408             :         }
    1409             : 
    1410        3622 :         if (blob->use_extent_table) {
    1411             :                 /* Serialize extent table */
    1412        1679 :                 rc = blob_serialize_extent_table(blob, pages, cur_page, page_count, &buf, &remaining_sz);
    1413             :         } else {
    1414             :                 /* Serialize extents */
    1415        1943 :                 rc = blob_serialize_extents_rle(blob, pages, cur_page, page_count, &buf, &remaining_sz);
    1416             :         }
    1417             : 
    1418        3622 :         return rc;
    1419             : }
    1420             : 
    1421             : struct spdk_blob_load_ctx {
    1422             :         struct spdk_blob                *blob;
    1423             : 
    1424             :         struct spdk_blob_md_page        *pages;
    1425             :         uint32_t                        num_pages;
    1426             :         uint32_t                        next_extent_page;
    1427             :         spdk_bs_sequence_t              *seq;
    1428             : 
    1429             :         spdk_bs_sequence_cpl            cb_fn;
    1430             :         void                            *cb_arg;
    1431             : };
    1432             : 
    1433             : static uint32_t
    1434       19974 : blob_md_page_calc_crc(void *page)
    1435             : {
    1436             :         uint32_t                crc;
    1437             : 
    1438       19974 :         crc = BLOB_CRC32C_INITIAL;
    1439       19974 :         crc = spdk_crc32c_update(page, SPDK_BS_PAGE_SIZE - 4, crc);
    1440       19974 :         crc ^= BLOB_CRC32C_INITIAL;
    1441             : 
    1442       19974 :         return crc;
    1443             : 
    1444             : }
    1445             : 
    1446             : static void
    1447        3474 : blob_load_final(struct spdk_blob_load_ctx *ctx, int bserrno)
    1448             : {
    1449        3474 :         struct spdk_blob                *blob = ctx->blob;
    1450             : 
    1451        3474 :         if (bserrno == 0) {
    1452        3410 :                 blob_mark_clean(blob);
    1453             :         }
    1454             : 
    1455        3474 :         ctx->cb_fn(ctx->seq, ctx->cb_arg, bserrno);
    1456             : 
    1457             :         /* Free the memory */
    1458        3474 :         spdk_free(ctx->pages);
    1459        3474 :         free(ctx);
    1460        3474 : }
    1461             : 
    1462             : static void
    1463         458 : blob_load_snapshot_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
    1464             : {
    1465         458 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1466         458 :         struct spdk_blob                *blob = ctx->blob;
    1467             : 
    1468         458 :         if (bserrno == 0) {
    1469         452 :                 blob->back_bs_dev = bs_create_blob_bs_dev(snapshot);
    1470         452 :                 if (blob->back_bs_dev == NULL) {
    1471           0 :                         bserrno = -ENOMEM;
    1472             :                 }
    1473             :         }
    1474         458 :         if (bserrno != 0) {
    1475           6 :                 SPDK_ERRLOG("Snapshot fail\n");
    1476             :         }
    1477             : 
    1478         458 :         blob_load_final(ctx, bserrno);
    1479         458 : }
    1480             : 
    1481             : static void blob_update_clear_method(struct spdk_blob *blob);
    1482             : 
    1483             : static int
    1484         120 : blob_load_esnap(struct spdk_blob *blob, void *blob_ctx)
    1485             : {
    1486         120 :         struct spdk_blob_store *bs = blob->bs;
    1487         120 :         struct spdk_bs_dev *bs_dev = NULL;
    1488         120 :         const void *esnap_id = NULL;
    1489         120 :         size_t id_len = 0;
    1490             :         int rc;
    1491             : 
    1492         120 :         if (bs->esnap_bs_dev_create == NULL) {
    1493           8 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " is an esnap clone but the blobstore was opened "
    1494             :                                "without support for esnap clones\n", blob->id);
    1495           8 :                 return -ENOTSUP;
    1496             :         }
    1497         112 :         assert(blob->back_bs_dev == NULL);
    1498             : 
    1499         112 :         rc = blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, &esnap_id, &id_len, true);
    1500         112 :         if (rc != 0) {
    1501           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " is an esnap clone but has no esnap ID\n", blob->id);
    1502           0 :                 return -EINVAL;
    1503             :         }
    1504         112 :         assert(id_len > 0 && id_len < UINT32_MAX);
    1505             : 
    1506         112 :         SPDK_INFOLOG(blob, "Creating external snapshot device\n");
    1507             : 
    1508         112 :         rc = bs->esnap_bs_dev_create(bs->esnap_ctx, blob_ctx, blob, esnap_id, (uint32_t)id_len,
    1509             :                                      &bs_dev);
    1510         112 :         if (rc != 0) {
    1511           0 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": failed to load back_bs_dev "
    1512             :                               "with error %d\n", blob->id, rc);
    1513           0 :                 return rc;
    1514             :         }
    1515             : 
    1516             :         /*
    1517             :          * Note: bs_dev might be NULL if the consumer chose to not open the external snapshot.
    1518             :          * This especially might happen during spdk_bs_load() iteration.
    1519             :          */
    1520         112 :         if (bs_dev != NULL) {
    1521         112 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": loaded back_bs_dev\n", blob->id);
    1522         112 :                 if ((bs->io_unit_size % bs_dev->blocklen) != 0) {
    1523           4 :                         SPDK_NOTICELOG("blob 0x%" PRIx64 " external snapshot device block size %u "
    1524             :                                        "is not compatible with blobstore block size %u\n",
    1525             :                                        blob->id, bs_dev->blocklen, bs->io_unit_size);
    1526           4 :                         bs_dev->destroy(bs_dev);
    1527           4 :                         return -EINVAL;
    1528             :                 }
    1529             :         }
    1530             : 
    1531         108 :         blob->back_bs_dev = bs_dev;
    1532         108 :         blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    1533             : 
    1534         108 :         return 0;
    1535             : }
    1536             : 
    1537             : static void
    1538        3428 : blob_load_backing_dev(spdk_bs_sequence_t *seq, void *cb_arg)
    1539             : {
    1540        3428 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1541        3428 :         struct spdk_blob                *blob = ctx->blob;
    1542        3428 :         const void                      *value;
    1543        3428 :         size_t                          len;
    1544             :         int                             rc;
    1545             : 
    1546        3428 :         if (blob_is_esnap_clone(blob)) {
    1547         120 :                 rc = blob_load_esnap(blob, seq->cpl.u.blob_handle.esnap_ctx);
    1548         120 :                 blob_load_final(ctx, rc);
    1549         120 :                 return;
    1550             :         }
    1551             : 
    1552        3308 :         if (spdk_blob_is_thin_provisioned(blob)) {
    1553        1038 :                 rc = blob_get_xattr_value(blob, BLOB_SNAPSHOT, &value, &len, true);
    1554        1038 :                 if (rc == 0) {
    1555         458 :                         if (len != sizeof(spdk_blob_id)) {
    1556           0 :                                 blob_load_final(ctx, -EINVAL);
    1557           0 :                                 return;
    1558             :                         }
    1559             :                         /* open snapshot blob and continue in the callback function */
    1560         458 :                         blob->parent_id = *(spdk_blob_id *)value;
    1561         458 :                         spdk_bs_open_blob(blob->bs, blob->parent_id,
    1562             :                                           blob_load_snapshot_cpl, ctx);
    1563         458 :                         return;
    1564             :                 } else {
    1565             :                         /* add zeroes_dev for thin provisioned blob */
    1566         580 :                         blob->back_bs_dev = bs_create_zeroes_dev();
    1567             :                 }
    1568             :         } else {
    1569             :                 /* standard blob */
    1570        2270 :                 blob->back_bs_dev = NULL;
    1571             :         }
    1572        2850 :         blob_load_final(ctx, 0);
    1573             : }
    1574             : 
    1575             : static void
    1576        2822 : blob_load_cpl_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1577             : {
    1578        2822 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1579        2822 :         struct spdk_blob                *blob = ctx->blob;
    1580             :         struct spdk_blob_md_page        *page;
    1581             :         uint64_t                        i;
    1582             :         uint32_t                        crc;
    1583             :         uint64_t                        lba;
    1584             :         void                            *tmp;
    1585             :         uint64_t                        sz;
    1586             : 
    1587        2822 :         if (bserrno) {
    1588           6 :                 SPDK_ERRLOG("Extent page read failed: %d\n", bserrno);
    1589           6 :                 blob_load_final(ctx, bserrno);
    1590           6 :                 return;
    1591             :         }
    1592             : 
    1593        2816 :         if (ctx->pages == NULL) {
    1594             :                 /* First iteration of this function, allocate buffer for single EXTENT_PAGE */
    1595        1768 :                 ctx->pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    1596             :                                           NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    1597        1768 :                 if (!ctx->pages) {
    1598           0 :                         blob_load_final(ctx, -ENOMEM);
    1599           0 :                         return;
    1600             :                 }
    1601        1768 :                 ctx->num_pages = 1;
    1602        1768 :                 ctx->next_extent_page = 0;
    1603             :         } else {
    1604        1048 :                 page = &ctx->pages[0];
    1605        1048 :                 crc = blob_md_page_calc_crc(page);
    1606        1048 :                 if (crc != page->crc) {
    1607           0 :                         blob_load_final(ctx, -EINVAL);
    1608           0 :                         return;
    1609             :                 }
    1610             : 
    1611        1048 :                 if (page->next != SPDK_INVALID_MD_PAGE) {
    1612           0 :                         blob_load_final(ctx, -EINVAL);
    1613           0 :                         return;
    1614             :                 }
    1615             : 
    1616        1048 :                 bserrno = blob_parse_extent_page(page, blob);
    1617        1048 :                 if (bserrno) {
    1618           0 :                         blob_load_final(ctx, bserrno);
    1619           0 :                         return;
    1620             :                 }
    1621             :         }
    1622             : 
    1623        3240 :         for (i = ctx->next_extent_page; i < blob->active.num_extent_pages; i++) {
    1624        1478 :                 if (blob->active.extent_pages[i] != 0) {
    1625             :                         /* Extent page was allocated, read and parse it. */
    1626        1054 :                         lba = bs_md_page_to_lba(blob->bs, blob->active.extent_pages[i]);
    1627        1054 :                         ctx->next_extent_page = i + 1;
    1628             : 
    1629        1054 :                         bs_sequence_read_dev(seq, &ctx->pages[0], lba,
    1630        1054 :                                              bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
    1631             :                                              blob_load_cpl_extents_cpl, ctx);
    1632        1054 :                         return;
    1633             :                 } else {
    1634             :                         /* Thin provisioned blobs can point to unallocated extent pages.
    1635             :                          * In this case blob size should be increased by up to the amount left in remaining_clusters_in_et. */
    1636             : 
    1637         424 :                         sz = spdk_min(blob->remaining_clusters_in_et, SPDK_EXTENTS_PER_EP);
    1638         424 :                         blob->active.num_clusters += sz;
    1639         424 :                         blob->remaining_clusters_in_et -= sz;
    1640             : 
    1641         424 :                         assert(spdk_blob_is_thin_provisioned(blob));
    1642         424 :                         assert(i + 1 < blob->active.num_extent_pages || blob->remaining_clusters_in_et == 0);
    1643             : 
    1644         424 :                         tmp = realloc(blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
    1645         424 :                         if (tmp == NULL) {
    1646           0 :                                 blob_load_final(ctx, -ENOMEM);
    1647           0 :                                 return;
    1648             :                         }
    1649         424 :                         memset(tmp + sizeof(*blob->active.clusters) * blob->active.cluster_array_size, 0,
    1650         424 :                                sizeof(*blob->active.clusters) * (blob->active.num_clusters - blob->active.cluster_array_size));
    1651         424 :                         blob->active.clusters = tmp;
    1652         424 :                         blob->active.cluster_array_size = blob->active.num_clusters;
    1653             :                 }
    1654             :         }
    1655             : 
    1656        1762 :         blob_load_backing_dev(seq, ctx);
    1657             : }
    1658             : 
    1659             : static void
    1660        3574 : blob_load_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1661             : {
    1662        3574 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1663        3574 :         struct spdk_blob                *blob = ctx->blob;
    1664             :         struct spdk_blob_md_page        *page;
    1665             :         int                             rc;
    1666             :         uint32_t                        crc;
    1667             :         uint32_t                        current_page;
    1668             : 
    1669        3574 :         if (ctx->num_pages == 1) {
    1670        3474 :                 current_page = bs_blobid_to_page(blob->id);
    1671             :         } else {
    1672         100 :                 assert(ctx->num_pages != 0);
    1673         100 :                 page = &ctx->pages[ctx->num_pages - 2];
    1674         100 :                 current_page = page->next;
    1675             :         }
    1676             : 
    1677        3574 :         if (bserrno) {
    1678          20 :                 SPDK_ERRLOG("Metadata page %d read failed for blobid 0x%" PRIx64 ": %d\n",
    1679             :                             current_page, blob->id, bserrno);
    1680          20 :                 blob_load_final(ctx, bserrno);
    1681          20 :                 return;
    1682             :         }
    1683             : 
    1684        3554 :         page = &ctx->pages[ctx->num_pages - 1];
    1685        3554 :         crc = blob_md_page_calc_crc(page);
    1686        3554 :         if (crc != page->crc) {
    1687           8 :                 SPDK_ERRLOG("Metadata page %d crc mismatch for blobid 0x%" PRIx64 "\n",
    1688             :                             current_page, blob->id);
    1689           8 :                 blob_load_final(ctx, -EINVAL);
    1690           8 :                 return;
    1691             :         }
    1692             : 
    1693        3546 :         if (page->next != SPDK_INVALID_MD_PAGE) {
    1694             :                 struct spdk_blob_md_page *tmp_pages;
    1695         100 :                 uint32_t next_page = page->next;
    1696         100 :                 uint64_t next_lba = bs_md_page_to_lba(blob->bs, next_page);
    1697             : 
    1698             :                 /* Read the next page */
    1699         100 :                 tmp_pages = spdk_realloc(ctx->pages, (sizeof(*page) * (ctx->num_pages + 1)), 0);
    1700         100 :                 if (tmp_pages == NULL) {
    1701           0 :                         blob_load_final(ctx, -ENOMEM);
    1702           0 :                         return;
    1703             :                 }
    1704         100 :                 ctx->num_pages++;
    1705         100 :                 ctx->pages = tmp_pages;
    1706             : 
    1707         100 :                 bs_sequence_read_dev(seq, &ctx->pages[ctx->num_pages - 1],
    1708             :                                      next_lba,
    1709         100 :                                      bs_byte_to_lba(blob->bs, sizeof(*page)),
    1710             :                                      blob_load_cpl, ctx);
    1711         100 :                 return;
    1712             :         }
    1713             : 
    1714             :         /* Parse the pages */
    1715        3446 :         rc = blob_parse(ctx->pages, ctx->num_pages, blob);
    1716        3446 :         if (rc) {
    1717          12 :                 blob_load_final(ctx, rc);
    1718          12 :                 return;
    1719             :         }
    1720             : 
    1721        3434 :         if (blob->extent_table_found == true) {
    1722             :                 /* If EXTENT_TABLE was found, that means support for it should be enabled. */
    1723        1768 :                 assert(blob->extent_rle_found == false);
    1724        1768 :                 blob->use_extent_table = true;
    1725             :         } else {
    1726             :                 /* If EXTENT_RLE or no extent_* descriptor was found disable support
    1727             :                  * for extent table. No extent_* descriptors means that blob has length of 0
    1728             :                  * and no extent_rle descriptors were persisted for it.
    1729             :                  * EXTENT_TABLE if used, is always present in metadata regardless of length. */
    1730        1666 :                 blob->use_extent_table = false;
    1731             :         }
    1732             : 
    1733             :         /* Check the clear_method stored in metadata vs what may have been passed
    1734             :          * via spdk_bs_open_blob_ext() and update accordingly.
    1735             :          */
    1736        3434 :         blob_update_clear_method(blob);
    1737             : 
    1738        3434 :         spdk_free(ctx->pages);
    1739        3434 :         ctx->pages = NULL;
    1740             : 
    1741        3434 :         if (blob->extent_table_found) {
    1742        1768 :                 blob_load_cpl_extents_cpl(seq, ctx, 0);
    1743             :         } else {
    1744        1666 :                 blob_load_backing_dev(seq, ctx);
    1745             :         }
    1746             : }
    1747             : 
    1748             : /* Load a blob from disk given a blobid */
    1749             : static void
    1750        3474 : blob_load(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
    1751             :           spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    1752             : {
    1753             :         struct spdk_blob_load_ctx *ctx;
    1754             :         struct spdk_blob_store *bs;
    1755             :         uint32_t page_num;
    1756             :         uint64_t lba;
    1757             : 
    1758        3474 :         blob_verify_md_op(blob);
    1759             : 
    1760        3474 :         bs = blob->bs;
    1761             : 
    1762        3474 :         ctx = calloc(1, sizeof(*ctx));
    1763        3474 :         if (!ctx) {
    1764           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    1765           0 :                 return;
    1766             :         }
    1767             : 
    1768        3474 :         ctx->blob = blob;
    1769        3474 :         ctx->pages = spdk_realloc(ctx->pages, SPDK_BS_PAGE_SIZE, 0);
    1770        3474 :         if (!ctx->pages) {
    1771           0 :                 free(ctx);
    1772           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    1773           0 :                 return;
    1774             :         }
    1775        3474 :         ctx->num_pages = 1;
    1776        3474 :         ctx->cb_fn = cb_fn;
    1777        3474 :         ctx->cb_arg = cb_arg;
    1778        3474 :         ctx->seq = seq;
    1779             : 
    1780        3474 :         page_num = bs_blobid_to_page(blob->id);
    1781        3474 :         lba = bs_md_page_to_lba(blob->bs, page_num);
    1782             : 
    1783        3474 :         blob->state = SPDK_BLOB_STATE_LOADING;
    1784             : 
    1785        3474 :         bs_sequence_read_dev(seq, &ctx->pages[0], lba,
    1786        3474 :                              bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE),
    1787             :                              blob_load_cpl, ctx);
    1788             : }
    1789             : 
    1790             : struct spdk_blob_persist_ctx {
    1791             :         struct spdk_blob                *blob;
    1792             : 
    1793             :         struct spdk_blob_md_page        *pages;
    1794             :         uint32_t                        next_extent_page;
    1795             :         struct spdk_blob_md_page        *extent_page;
    1796             : 
    1797             :         spdk_bs_sequence_t              *seq;
    1798             :         spdk_bs_sequence_cpl            cb_fn;
    1799             :         void                            *cb_arg;
    1800             :         TAILQ_ENTRY(spdk_blob_persist_ctx) link;
    1801             : };
    1802             : 
    1803             : static void
    1804        1270 : bs_batch_clear_dev(struct spdk_blob *blob, spdk_bs_batch_t *batch, uint64_t lba,
    1805             :                    uint64_t lba_count)
    1806             : {
    1807        1270 :         switch (blob->clear_method) {
    1808        1270 :         case BLOB_CLEAR_WITH_DEFAULT:
    1809             :         case BLOB_CLEAR_WITH_UNMAP:
    1810        1270 :                 bs_batch_unmap_dev(batch, lba, lba_count);
    1811        1270 :                 break;
    1812           0 :         case BLOB_CLEAR_WITH_WRITE_ZEROES:
    1813           0 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    1814           0 :                 break;
    1815           0 :         case BLOB_CLEAR_WITH_NONE:
    1816             :         default:
    1817           0 :                 break;
    1818             :         }
    1819        1270 : }
    1820             : 
    1821             : static int
    1822        1152 : bs_super_validate(struct spdk_bs_super_block *super, struct spdk_blob_store *bs)
    1823             : {
    1824             :         uint32_t        crc;
    1825             :         static const char zeros[SPDK_BLOBSTORE_TYPE_LENGTH];
    1826             : 
    1827        1152 :         if (super->version > SPDK_BS_VERSION ||
    1828        1148 :             super->version < SPDK_BS_INITIAL_VERSION) {
    1829           8 :                 return -EILSEQ;
    1830             :         }
    1831             : 
    1832        1144 :         if (memcmp(super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    1833             :                    sizeof(super->signature)) != 0) {
    1834           0 :                 return -EILSEQ;
    1835             :         }
    1836             : 
    1837        1144 :         crc = blob_md_page_calc_crc(super);
    1838        1144 :         if (crc != super->crc) {
    1839           4 :                 return -EILSEQ;
    1840             :         }
    1841             : 
    1842        1140 :         if (memcmp(&bs->bstype, &super->bstype, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
    1843        1126 :                 SPDK_DEBUGLOG(blob, "Bstype matched - loading blobstore\n");
    1844          14 :         } else if (memcmp(&bs->bstype, zeros, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
    1845           6 :                 SPDK_DEBUGLOG(blob, "Bstype wildcard used - loading blobstore regardless bstype\n");
    1846             :         } else {
    1847           8 :                 SPDK_DEBUGLOG(blob, "Unexpected bstype\n");
    1848           8 :                 SPDK_LOGDUMP(blob, "Expected:", bs->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
    1849           8 :                 SPDK_LOGDUMP(blob, "Found:", super->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
    1850           8 :                 return -ENXIO;
    1851             :         }
    1852             : 
    1853        1132 :         if (super->size > bs->dev->blockcnt * bs->dev->blocklen) {
    1854           8 :                 SPDK_NOTICELOG("Size mismatch, dev size: %" PRIu64 ", blobstore size: %" PRIu64 "\n",
    1855             :                                bs->dev->blockcnt * bs->dev->blocklen, super->size);
    1856           8 :                 return -EILSEQ;
    1857             :         }
    1858             : 
    1859        1124 :         return 0;
    1860             : }
    1861             : 
    1862             : static void bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    1863             :                           spdk_bs_sequence_cpl cb_fn, void *cb_arg);
    1864             : 
    1865             : static void
    1866        5122 : blob_persist_complete_cb(void *arg)
    1867             : {
    1868        5122 :         struct spdk_blob_persist_ctx *ctx = arg;
    1869             : 
    1870             :         /* Call user callback */
    1871        5122 :         ctx->cb_fn(ctx->seq, ctx->cb_arg, 0);
    1872             : 
    1873             :         /* Free the memory */
    1874        5122 :         spdk_free(ctx->pages);
    1875        5122 :         free(ctx);
    1876        5122 : }
    1877             : 
    1878             : static void blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
    1879             : 
    1880             : static void
    1881        5122 : blob_persist_complete(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx, int bserrno)
    1882             : {
    1883             :         struct spdk_blob_persist_ctx    *next_persist, *tmp;
    1884        5122 :         struct spdk_blob                *blob = ctx->blob;
    1885             : 
    1886        5122 :         if (bserrno == 0) {
    1887        5070 :                 blob_mark_clean(blob);
    1888             :         }
    1889             : 
    1890        5122 :         assert(ctx == TAILQ_FIRST(&blob->persists_to_complete));
    1891             : 
    1892             :         /* Complete all persists that were pending when the current persist started */
    1893       10244 :         TAILQ_FOREACH_SAFE(next_persist, &blob->persists_to_complete, link, tmp) {
    1894        5122 :                 TAILQ_REMOVE(&blob->persists_to_complete, next_persist, link);
    1895        5122 :                 spdk_thread_send_msg(spdk_get_thread(), blob_persist_complete_cb, next_persist);
    1896             :         }
    1897             : 
    1898        5122 :         if (TAILQ_EMPTY(&blob->pending_persists)) {
    1899        5099 :                 return;
    1900             :         }
    1901             : 
    1902             :         /* Queue up all pending persists for completion and start blob persist with first one */
    1903          23 :         TAILQ_SWAP(&blob->persists_to_complete, &blob->pending_persists, spdk_blob_persist_ctx, link);
    1904          23 :         next_persist = TAILQ_FIRST(&blob->persists_to_complete);
    1905             : 
    1906          23 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    1907          23 :         bs_mark_dirty(seq, blob->bs, blob_persist_start, next_persist);
    1908             : }
    1909             : 
    1910             : static void
    1911        5070 : blob_persist_clear_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1912             : {
    1913        5070 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    1914        5070 :         struct spdk_blob                *blob = ctx->blob;
    1915        5070 :         struct spdk_blob_store          *bs = blob->bs;
    1916             :         size_t                          i;
    1917             : 
    1918        5070 :         if (bserrno != 0) {
    1919           0 :                 blob_persist_complete(seq, ctx, bserrno);
    1920           0 :                 return;
    1921             :         }
    1922             : 
    1923        5070 :         spdk_spin_lock(&bs->used_lock);
    1924             : 
    1925             :         /* Release all extent_pages that were truncated */
    1926        6806 :         for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
    1927             :                 /* Nothing to release if it was not allocated */
    1928        1736 :                 if (blob->active.extent_pages[i] != 0) {
    1929         628 :                         bs_release_md_page(bs, blob->active.extent_pages[i]);
    1930             :                 }
    1931             :         }
    1932             : 
    1933        5070 :         spdk_spin_unlock(&bs->used_lock);
    1934             : 
    1935        5070 :         if (blob->active.num_extent_pages == 0) {
    1936        3655 :                 free(blob->active.extent_pages);
    1937        3655 :                 blob->active.extent_pages = NULL;
    1938        3655 :                 blob->active.extent_pages_array_size = 0;
    1939        1415 :         } else if (blob->active.num_extent_pages != blob->active.extent_pages_array_size) {
    1940             : #ifndef __clang_analyzer__
    1941             :                 void *tmp;
    1942             : 
    1943             :                 /* scan-build really can't figure reallocs, workaround it */
    1944           2 :                 tmp = realloc(blob->active.extent_pages, sizeof(uint32_t) * blob->active.num_extent_pages);
    1945           2 :                 assert(tmp != NULL);
    1946           2 :                 blob->active.extent_pages = tmp;
    1947             : #endif
    1948           2 :                 blob->active.extent_pages_array_size = blob->active.num_extent_pages;
    1949             :         }
    1950             : 
    1951        5070 :         blob_persist_complete(seq, ctx, bserrno);
    1952             : }
    1953             : 
    1954             : static void
    1955        5070 : blob_persist_clear_extents(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    1956             : {
    1957        5070 :         struct spdk_blob                *blob = ctx->blob;
    1958        5070 :         struct spdk_blob_store          *bs = blob->bs;
    1959             :         size_t                          i;
    1960             :         uint64_t                        lba;
    1961             :         uint64_t                        lba_count;
    1962             :         spdk_bs_batch_t                 *batch;
    1963             : 
    1964        5070 :         batch = bs_sequence_to_batch(seq, blob_persist_clear_extents_cpl, ctx);
    1965        5070 :         lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
    1966             : 
    1967             :         /* Clear all extent_pages that were truncated */
    1968        6806 :         for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
    1969             :                 /* Nothing to clear if it was not allocated */
    1970        1736 :                 if (blob->active.extent_pages[i] != 0) {
    1971         628 :                         lba = bs_md_page_to_lba(bs, blob->active.extent_pages[i]);
    1972         628 :                         bs_batch_write_zeroes_dev(batch, lba, lba_count);
    1973             :                 }
    1974             :         }
    1975             : 
    1976        5070 :         bs_batch_close(batch);
    1977        5070 : }
    1978             : 
    1979             : static void
    1980        5070 : blob_persist_clear_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1981             : {
    1982        5070 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    1983        5070 :         struct spdk_blob                *blob = ctx->blob;
    1984        5070 :         struct spdk_blob_store          *bs = blob->bs;
    1985             :         size_t                          i;
    1986             : 
    1987        5070 :         if (bserrno != 0) {
    1988           0 :                 blob_persist_complete(seq, ctx, bserrno);
    1989           0 :                 return;
    1990             :         }
    1991             : 
    1992        5070 :         spdk_spin_lock(&bs->used_lock);
    1993             :         /* Release all clusters that were truncated */
    1994     1074081 :         for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
    1995     1069011 :                 uint32_t cluster_num = bs_lba_to_cluster(bs, blob->active.clusters[i]);
    1996             : 
    1997             :                 /* Nothing to release if it was not allocated */
    1998     1069011 :                 if (blob->active.clusters[i] != 0) {
    1999        2351 :                         bs_release_cluster(bs, cluster_num);
    2000             :                 }
    2001             :         }
    2002        5070 :         spdk_spin_unlock(&bs->used_lock);
    2003             : 
    2004        5070 :         if (blob->active.num_clusters == 0) {
    2005        1944 :                 free(blob->active.clusters);
    2006        1944 :                 blob->active.clusters = NULL;
    2007        1944 :                 blob->active.cluster_array_size = 0;
    2008        3126 :         } else if (blob->active.num_clusters != blob->active.cluster_array_size) {
    2009             : #ifndef __clang_analyzer__
    2010             :                 void *tmp;
    2011             : 
    2012             :                 /* scan-build really can't figure reallocs, workaround it */
    2013          18 :                 tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * blob->active.num_clusters);
    2014          18 :                 assert(tmp != NULL);
    2015          18 :                 blob->active.clusters = tmp;
    2016             : 
    2017             : #endif
    2018          18 :                 blob->active.cluster_array_size = blob->active.num_clusters;
    2019             :         }
    2020             : 
    2021             :         /* Move on to clearing extent pages */
    2022        5070 :         blob_persist_clear_extents(seq, ctx);
    2023             : }
    2024             : 
    2025             : static void
    2026        5070 : blob_persist_clear_clusters(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    2027             : {
    2028        5070 :         struct spdk_blob                *blob = ctx->blob;
    2029        5070 :         struct spdk_blob_store          *bs = blob->bs;
    2030             :         spdk_bs_batch_t                 *batch;
    2031             :         size_t                          i;
    2032             :         uint64_t                        lba;
    2033             :         uint64_t                        lba_count;
    2034             : 
    2035             :         /* Clusters don't move around in blobs. The list shrinks or grows
    2036             :          * at the end, but no changes ever occur in the middle of the list.
    2037             :          */
    2038             : 
    2039        5070 :         batch = bs_sequence_to_batch(seq, blob_persist_clear_clusters_cpl, ctx);
    2040             : 
    2041             :         /* Clear all clusters that were truncated */
    2042        5070 :         lba = 0;
    2043        5070 :         lba_count = 0;
    2044     1074081 :         for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
    2045     1069011 :                 uint64_t next_lba = blob->active.clusters[i];
    2046     1069011 :                 uint64_t next_lba_count = bs_cluster_to_lba(bs, 1);
    2047             : 
    2048     1069011 :                 if (next_lba > 0 && (lba + lba_count) == next_lba) {
    2049             :                         /* This cluster is contiguous with the previous one. */
    2050        1085 :                         lba_count += next_lba_count;
    2051        1085 :                         continue;
    2052     1067926 :                 } else if (next_lba == 0) {
    2053     1066660 :                         continue;
    2054             :                 }
    2055             : 
    2056             :                 /* This cluster is not contiguous with the previous one. */
    2057             : 
    2058             :                 /* If a run of LBAs previously existing, clear them now */
    2059        1266 :                 if (lba_count > 0) {
    2060          36 :                         bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
    2061             :                 }
    2062             : 
    2063             :                 /* Start building the next batch */
    2064        1266 :                 lba = next_lba;
    2065        1266 :                 if (next_lba > 0) {
    2066        1266 :                         lba_count = next_lba_count;
    2067             :                 } else {
    2068           0 :                         lba_count = 0;
    2069             :                 }
    2070             :         }
    2071             : 
    2072             :         /* If we ended with a contiguous set of LBAs, clear them now */
    2073        5070 :         if (lba_count > 0) {
    2074        1230 :                 bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
    2075             :         }
    2076             : 
    2077        5070 :         bs_batch_close(batch);
    2078        5070 : }
    2079             : 
    2080             : static void
    2081        5074 : blob_persist_zero_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2082             : {
    2083        5074 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2084        5074 :         struct spdk_blob                *blob = ctx->blob;
    2085        5074 :         struct spdk_blob_store          *bs = blob->bs;
    2086             :         size_t                          i;
    2087             : 
    2088        5074 :         if (bserrno != 0) {
    2089           4 :                 blob_persist_complete(seq, ctx, bserrno);
    2090           4 :                 return;
    2091             :         }
    2092             : 
    2093        5070 :         spdk_spin_lock(&bs->used_lock);
    2094             : 
    2095             :         /* This loop starts at 1 because the first page is special and handled
    2096             :          * below. The pages (except the first) are never written in place,
    2097             :          * so any pages in the clean list must be zeroed.
    2098             :          */
    2099        5138 :         for (i = 1; i < blob->clean.num_pages; i++) {
    2100          68 :                 bs_release_md_page(bs, blob->clean.pages[i]);
    2101             :         }
    2102             : 
    2103        5070 :         if (blob->active.num_pages == 0) {
    2104             :                 uint32_t page_num;
    2105             : 
    2106        1488 :                 page_num = bs_blobid_to_page(blob->id);
    2107        1488 :                 bs_release_md_page(bs, page_num);
    2108             :         }
    2109             : 
    2110        5070 :         spdk_spin_unlock(&bs->used_lock);
    2111             : 
    2112             :         /* Move on to clearing clusters */
    2113        5070 :         blob_persist_clear_clusters(seq, ctx);
    2114             : }
    2115             : 
    2116             : static void
    2117        5114 : blob_persist_zero_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2118             : {
    2119        5114 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2120        5114 :         struct spdk_blob                *blob = ctx->blob;
    2121        5114 :         struct spdk_blob_store          *bs = blob->bs;
    2122             :         uint64_t                        lba;
    2123             :         uint64_t                        lba_count;
    2124             :         spdk_bs_batch_t                 *batch;
    2125             :         size_t                          i;
    2126             : 
    2127        5114 :         if (bserrno != 0) {
    2128          40 :                 blob_persist_complete(seq, ctx, bserrno);
    2129          40 :                 return;
    2130             :         }
    2131             : 
    2132        5074 :         batch = bs_sequence_to_batch(seq, blob_persist_zero_pages_cpl, ctx);
    2133             : 
    2134        5074 :         lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
    2135             : 
    2136             :         /* This loop starts at 1 because the first page is special and handled
    2137             :          * below. The pages (except the first) are never written in place,
    2138             :          * so any pages in the clean list must be zeroed.
    2139             :          */
    2140        5142 :         for (i = 1; i < blob->clean.num_pages; i++) {
    2141          68 :                 lba = bs_md_page_to_lba(bs, blob->clean.pages[i]);
    2142             : 
    2143          68 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    2144             :         }
    2145             : 
    2146             :         /* The first page will only be zeroed if this is a delete. */
    2147        5074 :         if (blob->active.num_pages == 0) {
    2148             :                 uint32_t page_num;
    2149             : 
    2150             :                 /* The first page in the metadata goes where the blobid indicates */
    2151        1492 :                 page_num = bs_blobid_to_page(blob->id);
    2152        1492 :                 lba = bs_md_page_to_lba(bs, page_num);
    2153             : 
    2154        1492 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    2155             :         }
    2156             : 
    2157        5074 :         bs_batch_close(batch);
    2158             : }
    2159             : 
    2160             : static void
    2161        3622 : blob_persist_write_page_root(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2162             : {
    2163        3622 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2164        3622 :         struct spdk_blob                *blob = ctx->blob;
    2165        3622 :         struct spdk_blob_store          *bs = blob->bs;
    2166             :         uint64_t                        lba;
    2167             :         uint32_t                        lba_count;
    2168             :         struct spdk_blob_md_page        *page;
    2169             : 
    2170        3622 :         if (bserrno != 0) {
    2171           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2172           0 :                 return;
    2173             :         }
    2174             : 
    2175        3622 :         if (blob->active.num_pages == 0) {
    2176             :                 /* Move on to the next step */
    2177           0 :                 blob_persist_zero_pages(seq, ctx, 0);
    2178           0 :                 return;
    2179             :         }
    2180             : 
    2181        3622 :         lba_count = bs_byte_to_lba(bs, sizeof(*page));
    2182             : 
    2183        3622 :         page = &ctx->pages[0];
    2184             :         /* The first page in the metadata goes where the blobid indicates */
    2185        3622 :         lba = bs_md_page_to_lba(bs, bs_blobid_to_page(blob->id));
    2186             : 
    2187        3622 :         bs_sequence_write_dev(seq, page, lba, lba_count,
    2188             :                               blob_persist_zero_pages, ctx);
    2189             : }
    2190             : 
    2191             : static void
    2192        3622 : blob_persist_write_page_chain(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    2193             : {
    2194        3622 :         struct spdk_blob                *blob = ctx->blob;
    2195        3622 :         struct spdk_blob_store          *bs = blob->bs;
    2196             :         uint64_t                        lba;
    2197             :         uint32_t                        lba_count;
    2198             :         struct spdk_blob_md_page        *page;
    2199             :         spdk_bs_batch_t                 *batch;
    2200             :         size_t                          i;
    2201             : 
    2202             :         /* Clusters don't move around in blobs. The list shrinks or grows
    2203             :          * at the end, but no changes ever occur in the middle of the list.
    2204             :          */
    2205             : 
    2206        3622 :         lba_count = bs_byte_to_lba(bs, sizeof(*page));
    2207             : 
    2208        3622 :         batch = bs_sequence_to_batch(seq, blob_persist_write_page_root, ctx);
    2209             : 
    2210             :         /* This starts at 1. The root page is not written until
    2211             :          * all of the others are finished
    2212             :          */
    2213        3710 :         for (i = 1; i < blob->active.num_pages; i++) {
    2214          88 :                 page = &ctx->pages[i];
    2215          88 :                 assert(page->sequence_num == i);
    2216             : 
    2217          88 :                 lba = bs_md_page_to_lba(bs, blob->active.pages[i]);
    2218             : 
    2219          88 :                 bs_batch_write_dev(batch, page, lba, lba_count);
    2220             :         }
    2221             : 
    2222        3622 :         bs_batch_close(batch);
    2223        3622 : }
    2224             : 
    2225             : static int
    2226        3580 : blob_resize(struct spdk_blob *blob, uint64_t sz)
    2227             : {
    2228             :         uint64_t        i;
    2229             :         uint64_t        *tmp;
    2230        3580 :         uint64_t        cluster;
    2231        3580 :         uint32_t        lfmd; /*  lowest free md page */
    2232             :         uint64_t        num_clusters;
    2233             :         uint32_t        *ep_tmp;
    2234        3580 :         uint64_t        new_num_ep = 0, current_num_ep = 0;
    2235             :         struct spdk_blob_store *bs;
    2236             :         int             rc;
    2237             : 
    2238        3580 :         bs = blob->bs;
    2239             : 
    2240        3580 :         blob_verify_md_op(blob);
    2241             : 
    2242        3580 :         if (blob->active.num_clusters == sz) {
    2243         456 :                 return 0;
    2244             :         }
    2245             : 
    2246        3124 :         if (blob->active.num_clusters < blob->active.cluster_array_size) {
    2247             :                 /* If this blob was resized to be larger, then smaller, then
    2248             :                  * larger without syncing, then the cluster array already
    2249             :                  * contains spare assigned clusters we can use.
    2250             :                  */
    2251           0 :                 num_clusters = spdk_min(blob->active.cluster_array_size,
    2252             :                                         sz);
    2253             :         } else {
    2254        3124 :                 num_clusters = blob->active.num_clusters;
    2255             :         }
    2256             : 
    2257        3124 :         if (blob->use_extent_table) {
    2258             :                 /* Round up since every cluster beyond current Extent Table size,
    2259             :                  * requires new extent page. */
    2260        1584 :                 new_num_ep = spdk_divide_round_up(sz, SPDK_EXTENTS_PER_EP);
    2261        1584 :                 current_num_ep = spdk_divide_round_up(num_clusters, SPDK_EXTENTS_PER_EP);
    2262             :         }
    2263             : 
    2264        3124 :         assert(!spdk_spin_held(&bs->used_lock));
    2265             : 
    2266             :         /* Check first that we have enough clusters and md pages before we start claiming them.
    2267             :          * bs->used_lock is held to ensure that clusters we think are free are still free when we go
    2268             :          * to claim them later in this function.
    2269             :          */
    2270        3124 :         if (sz > num_clusters && spdk_blob_is_thin_provisioned(blob) == false) {
    2271        1302 :                 spdk_spin_lock(&bs->used_lock);
    2272        1302 :                 if ((sz - num_clusters) > bs->num_free_clusters) {
    2273           8 :                         rc = -ENOSPC;
    2274           8 :                         goto out;
    2275             :                 }
    2276        1294 :                 lfmd = 0;
    2277        1938 :                 for (i = current_num_ep; i < new_num_ep ; i++) {
    2278         644 :                         lfmd = spdk_bit_array_find_first_clear(blob->bs->used_md_pages, lfmd);
    2279         644 :                         if (lfmd == UINT32_MAX) {
    2280             :                                 /* No more free md pages. Cannot satisfy the request */
    2281           0 :                                 rc = -ENOSPC;
    2282           0 :                                 goto out;
    2283             :                         }
    2284             :                 }
    2285             :         }
    2286             : 
    2287        3116 :         if (sz > num_clusters) {
    2288             :                 /* Expand the cluster array if necessary.
    2289             :                  * We only shrink the array when persisting.
    2290             :                  */
    2291        1706 :                 tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * sz);
    2292        1706 :                 if (sz > 0 && tmp == NULL) {
    2293           0 :                         rc = -ENOMEM;
    2294           0 :                         goto out;
    2295             :                 }
    2296        1706 :                 memset(tmp + blob->active.cluster_array_size, 0,
    2297        1706 :                        sizeof(*blob->active.clusters) * (sz - blob->active.cluster_array_size));
    2298        1706 :                 blob->active.clusters = tmp;
    2299        1706 :                 blob->active.cluster_array_size = sz;
    2300             : 
    2301             :                 /* Expand the extents table, only if enough clusters were added */
    2302        1706 :                 if (new_num_ep > current_num_ep && blob->use_extent_table) {
    2303         842 :                         ep_tmp = realloc(blob->active.extent_pages, sizeof(*blob->active.extent_pages) * new_num_ep);
    2304         842 :                         if (new_num_ep > 0 && ep_tmp == NULL) {
    2305           0 :                                 rc = -ENOMEM;
    2306           0 :                                 goto out;
    2307             :                         }
    2308         842 :                         memset(ep_tmp + blob->active.extent_pages_array_size, 0,
    2309         842 :                                sizeof(*blob->active.extent_pages) * (new_num_ep - blob->active.extent_pages_array_size));
    2310         842 :                         blob->active.extent_pages = ep_tmp;
    2311         842 :                         blob->active.extent_pages_array_size = new_num_ep;
    2312             :                 }
    2313             :         }
    2314             : 
    2315        3116 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    2316             : 
    2317        3116 :         if (spdk_blob_is_thin_provisioned(blob) == false) {
    2318        2428 :                 cluster = 0;
    2319        2428 :                 lfmd = 0;
    2320        9832 :                 for (i = num_clusters; i < sz; i++) {
    2321        7404 :                         bs_allocate_cluster(blob, i, &cluster, &lfmd, true);
    2322             :                         /* Do not increment lfmd here.  lfmd will get updated
    2323             :                          * to the md_page allocated (if any) when a new extent
    2324             :                          * page is needed.  Just pass that value again,
    2325             :                          * bs_allocate_cluster will just start at that index
    2326             :                          * to find the next free md_page when needed.
    2327             :                          */
    2328             :                 }
    2329             :         }
    2330             : 
    2331             :         /* If we are shrinking the blob, we must adjust num_allocated_clusters */
    2332     1072167 :         for (i = sz; i < num_clusters; i++) {
    2333     1069051 :                 if (blob->active.clusters[i] != 0) {
    2334        2351 :                         blob->active.num_allocated_clusters--;
    2335             :                 }
    2336             :         }
    2337             : 
    2338        3116 :         blob->active.num_clusters = sz;
    2339        3116 :         blob->active.num_extent_pages = new_num_ep;
    2340             : 
    2341        3116 :         rc = 0;
    2342        3124 : out:
    2343        3124 :         if (spdk_spin_held(&bs->used_lock)) {
    2344        1302 :                 spdk_spin_unlock(&bs->used_lock);
    2345             :         }
    2346             : 
    2347        3124 :         return rc;
    2348             : }
    2349             : 
    2350             : static void
    2351        3622 : blob_persist_generate_new_md(struct spdk_blob_persist_ctx *ctx)
    2352             : {
    2353        3622 :         spdk_bs_sequence_t *seq = ctx->seq;
    2354        3622 :         struct spdk_blob *blob = ctx->blob;
    2355        3622 :         struct spdk_blob_store *bs = blob->bs;
    2356             :         uint64_t i;
    2357             :         uint32_t page_num;
    2358             :         void *tmp;
    2359             :         int rc;
    2360             : 
    2361             :         /* Generate the new metadata */
    2362        3622 :         rc = blob_serialize(blob, &ctx->pages, &blob->active.num_pages);
    2363        3622 :         if (rc < 0) {
    2364           0 :                 blob_persist_complete(seq, ctx, rc);
    2365           0 :                 return;
    2366             :         }
    2367             : 
    2368        3622 :         assert(blob->active.num_pages >= 1);
    2369             : 
    2370             :         /* Resize the cache of page indices */
    2371        3622 :         tmp = realloc(blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
    2372        3622 :         if (!tmp) {
    2373           0 :                 blob_persist_complete(seq, ctx, -ENOMEM);
    2374           0 :                 return;
    2375             :         }
    2376        3622 :         blob->active.pages = tmp;
    2377             : 
    2378             :         /* Assign this metadata to pages. This requires two passes - one to verify that there are
    2379             :          * enough pages and a second to actually claim them. The used_lock is held across
    2380             :          * both passes to ensure things don't change in the middle.
    2381             :          */
    2382        3622 :         spdk_spin_lock(&bs->used_lock);
    2383        3622 :         page_num = 0;
    2384             :         /* Note that this loop starts at one. The first page location is fixed by the blobid. */
    2385        3710 :         for (i = 1; i < blob->active.num_pages; i++) {
    2386          88 :                 page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
    2387          88 :                 if (page_num == UINT32_MAX) {
    2388           0 :                         spdk_spin_unlock(&bs->used_lock);
    2389           0 :                         blob_persist_complete(seq, ctx, -ENOMEM);
    2390           0 :                         return;
    2391             :                 }
    2392          88 :                 page_num++;
    2393             :         }
    2394             : 
    2395        3622 :         page_num = 0;
    2396        3622 :         blob->active.pages[0] = bs_blobid_to_page(blob->id);
    2397        3710 :         for (i = 1; i < blob->active.num_pages; i++) {
    2398          88 :                 page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
    2399          88 :                 ctx->pages[i - 1].next = page_num;
    2400             :                 /* Now that previous metadata page is complete, calculate the crc for it. */
    2401          88 :                 ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
    2402          88 :                 blob->active.pages[i] = page_num;
    2403          88 :                 bs_claim_md_page(bs, page_num);
    2404          88 :                 SPDK_DEBUGLOG(blob, "Claiming page %u for blob 0x%" PRIx64 "\n", page_num,
    2405             :                               blob->id);
    2406          88 :                 page_num++;
    2407             :         }
    2408        3622 :         spdk_spin_unlock(&bs->used_lock);
    2409        3622 :         ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
    2410             :         /* Start writing the metadata from last page to first */
    2411        3622 :         blob->state = SPDK_BLOB_STATE_CLEAN;
    2412        3622 :         blob_persist_write_page_chain(seq, ctx);
    2413             : }
    2414             : 
    2415             : static void
    2416        2364 : blob_persist_write_extent_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2417             : {
    2418        2364 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2419        2364 :         struct spdk_blob                *blob = ctx->blob;
    2420             :         size_t                          i;
    2421             :         uint32_t                        extent_page_id;
    2422        2364 :         uint32_t                        page_count = 0;
    2423             :         int                             rc;
    2424             : 
    2425        2364 :         if (ctx->extent_page != NULL) {
    2426         668 :                 spdk_free(ctx->extent_page);
    2427         668 :                 ctx->extent_page = NULL;
    2428             :         }
    2429             : 
    2430        2364 :         if (bserrno != 0) {
    2431           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2432           0 :                 return;
    2433             :         }
    2434             : 
    2435             :         /* Only write out Extent Pages when blob was resized. */
    2436        4620 :         for (i = ctx->next_extent_page; i < blob->active.extent_pages_array_size; i++) {
    2437        2924 :                 extent_page_id = blob->active.extent_pages[i];
    2438        2924 :                 if (extent_page_id == 0) {
    2439             :                         /* No Extent Page to persist */
    2440        2256 :                         assert(spdk_blob_is_thin_provisioned(blob));
    2441        2256 :                         continue;
    2442             :                 }
    2443         668 :                 assert(spdk_bit_array_get(blob->bs->used_md_pages, extent_page_id));
    2444         668 :                 ctx->next_extent_page = i + 1;
    2445         668 :                 rc = blob_serialize_add_page(ctx->blob, &ctx->extent_page, &page_count, &ctx->extent_page);
    2446         668 :                 if (rc < 0) {
    2447           0 :                         blob_persist_complete(seq, ctx, rc);
    2448           0 :                         return;
    2449             :                 }
    2450             : 
    2451         668 :                 blob->state = SPDK_BLOB_STATE_DIRTY;
    2452         668 :                 blob_serialize_extent_page(blob, i * SPDK_EXTENTS_PER_EP, ctx->extent_page);
    2453             : 
    2454         668 :                 ctx->extent_page->crc = blob_md_page_calc_crc(ctx->extent_page);
    2455             : 
    2456         668 :                 bs_sequence_write_dev(seq, ctx->extent_page, bs_md_page_to_lba(blob->bs, extent_page_id),
    2457         668 :                                       bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
    2458             :                                       blob_persist_write_extent_pages, ctx);
    2459         668 :                 return;
    2460             :         }
    2461             : 
    2462        1696 :         blob_persist_generate_new_md(ctx);
    2463             : }
    2464             : 
    2465             : static void
    2466        5122 : blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2467             : {
    2468        5122 :         struct spdk_blob_persist_ctx *ctx = cb_arg;
    2469        5122 :         struct spdk_blob *blob = ctx->blob;
    2470             : 
    2471        5122 :         if (bserrno != 0) {
    2472           8 :                 blob_persist_complete(seq, ctx, bserrno);
    2473           8 :                 return;
    2474             :         }
    2475             : 
    2476        5114 :         if (blob->active.num_pages == 0) {
    2477             :                 /* This is the signal that the blob should be deleted.
    2478             :                  * Immediately jump to the clean up routine. */
    2479        1492 :                 assert(blob->clean.num_pages > 0);
    2480        1492 :                 blob->state = SPDK_BLOB_STATE_CLEAN;
    2481        1492 :                 blob_persist_zero_pages(seq, ctx, 0);
    2482        1492 :                 return;
    2483             : 
    2484             :         }
    2485             : 
    2486        3622 :         if (blob->clean.num_clusters < blob->active.num_clusters) {
    2487             :                 /* Blob was resized up */
    2488        1678 :                 assert(blob->clean.num_extent_pages <= blob->active.num_extent_pages);
    2489        1678 :                 ctx->next_extent_page = spdk_max(1, blob->clean.num_extent_pages) - 1;
    2490        1944 :         } else if (blob->active.num_clusters < blob->active.cluster_array_size) {
    2491             :                 /* Blob was resized down */
    2492          18 :                 assert(blob->clean.num_extent_pages >= blob->active.num_extent_pages);
    2493          18 :                 ctx->next_extent_page = spdk_max(1, blob->active.num_extent_pages) - 1;
    2494             :         } else {
    2495             :                 /* No change in size occurred */
    2496        1926 :                 blob_persist_generate_new_md(ctx);
    2497        1926 :                 return;
    2498             :         }
    2499             : 
    2500        1696 :         blob_persist_write_extent_pages(seq, ctx, 0);
    2501             : }
    2502             : 
    2503             : struct spdk_bs_mark_dirty {
    2504             :         struct spdk_blob_store          *bs;
    2505             :         struct spdk_bs_super_block      *super;
    2506             :         spdk_bs_sequence_cpl            cb_fn;
    2507             :         void                            *cb_arg;
    2508             : };
    2509             : 
    2510             : static void
    2511         158 : bs_mark_dirty_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2512             : {
    2513         158 :         struct spdk_bs_mark_dirty *ctx = cb_arg;
    2514             : 
    2515         158 :         if (bserrno == 0) {
    2516         150 :                 ctx->bs->clean = 0;
    2517             :         }
    2518             : 
    2519         158 :         ctx->cb_fn(seq, ctx->cb_arg, bserrno);
    2520             : 
    2521         158 :         spdk_free(ctx->super);
    2522         158 :         free(ctx);
    2523         158 : }
    2524             : 
    2525             : static void bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    2526             :                            struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg);
    2527             : 
    2528             : 
    2529             : static void
    2530         158 : bs_mark_dirty_write(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2531             : {
    2532         158 :         struct spdk_bs_mark_dirty *ctx = cb_arg;
    2533             :         int rc;
    2534             : 
    2535         158 :         if (bserrno != 0) {
    2536           4 :                 bs_mark_dirty_write_cpl(seq, ctx, bserrno);
    2537           4 :                 return;
    2538             :         }
    2539             : 
    2540         154 :         rc = bs_super_validate(ctx->super, ctx->bs);
    2541         154 :         if (rc != 0) {
    2542           0 :                 bs_mark_dirty_write_cpl(seq, ctx, rc);
    2543           0 :                 return;
    2544             :         }
    2545             : 
    2546         154 :         ctx->super->clean = 0;
    2547         154 :         if (ctx->super->size == 0) {
    2548           4 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    2549             :         }
    2550             : 
    2551         154 :         bs_write_super(seq, ctx->bs, ctx->super, bs_mark_dirty_write_cpl, ctx);
    2552             : }
    2553             : 
    2554             : static void
    2555        5560 : bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    2556             :               spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    2557             : {
    2558             :         struct spdk_bs_mark_dirty *ctx;
    2559             : 
    2560             :         /* Blobstore is already marked dirty */
    2561        5560 :         if (bs->clean == 0) {
    2562        5402 :                 cb_fn(seq, cb_arg, 0);
    2563        5402 :                 return;
    2564             :         }
    2565             : 
    2566         158 :         ctx = calloc(1, sizeof(*ctx));
    2567         158 :         if (!ctx) {
    2568           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2569           0 :                 return;
    2570             :         }
    2571         158 :         ctx->bs = bs;
    2572         158 :         ctx->cb_fn = cb_fn;
    2573         158 :         ctx->cb_arg = cb_arg;
    2574             : 
    2575         158 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    2576             :                                   SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    2577         158 :         if (!ctx->super) {
    2578           0 :                 free(ctx);
    2579           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2580           0 :                 return;
    2581             :         }
    2582             : 
    2583         158 :         bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
    2584         158 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    2585             :                              bs_mark_dirty_write, ctx);
    2586             : }
    2587             : 
    2588             : /* Write a blob to disk */
    2589             : static void
    2590        9154 : blob_persist(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
    2591             :              spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    2592             : {
    2593             :         struct spdk_blob_persist_ctx *ctx;
    2594             : 
    2595        9154 :         blob_verify_md_op(blob);
    2596             : 
    2597        9154 :         if (blob->state == SPDK_BLOB_STATE_CLEAN && TAILQ_EMPTY(&blob->persists_to_complete)) {
    2598        4032 :                 cb_fn(seq, cb_arg, 0);
    2599        4032 :                 return;
    2600             :         }
    2601             : 
    2602        5122 :         ctx = calloc(1, sizeof(*ctx));
    2603        5122 :         if (!ctx) {
    2604           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2605           0 :                 return;
    2606             :         }
    2607        5122 :         ctx->blob = blob;
    2608        5122 :         ctx->seq = seq;
    2609        5122 :         ctx->cb_fn = cb_fn;
    2610        5122 :         ctx->cb_arg = cb_arg;
    2611             : 
    2612             :         /* Multiple blob persists can affect one another, via blob->state or
    2613             :          * blob mutable data changes. To prevent it, queue up the persists. */
    2614        5122 :         if (!TAILQ_EMPTY(&blob->persists_to_complete)) {
    2615          23 :                 TAILQ_INSERT_TAIL(&blob->pending_persists, ctx, link);
    2616          23 :                 return;
    2617             :         }
    2618        5099 :         TAILQ_INSERT_HEAD(&blob->persists_to_complete, ctx, link);
    2619             : 
    2620        5099 :         bs_mark_dirty(seq, blob->bs, blob_persist_start, ctx);
    2621             : }
    2622             : 
    2623             : struct spdk_blob_copy_cluster_ctx {
    2624             :         struct spdk_blob *blob;
    2625             :         uint8_t *buf;
    2626             :         uint64_t page;
    2627             :         uint64_t new_cluster;
    2628             :         uint32_t new_extent_page;
    2629             :         spdk_bs_sequence_t *seq;
    2630             :         struct spdk_blob_md_page *new_cluster_page;
    2631             : };
    2632             : 
    2633             : struct spdk_blob_free_cluster_ctx {
    2634             :         struct spdk_blob *blob;
    2635             :         uint64_t page;
    2636             :         struct spdk_blob_md_page *md_page;
    2637             :         uint64_t cluster_num;
    2638             :         uint32_t extent_page;
    2639             :         spdk_bs_sequence_t *seq;
    2640             : };
    2641             : 
    2642             : static void
    2643         820 : blob_allocate_and_copy_cluster_cpl(void *cb_arg, int bserrno)
    2644             : {
    2645         820 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2646         820 :         struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)ctx->seq;
    2647         820 :         TAILQ_HEAD(, spdk_bs_request_set) requests;
    2648             :         spdk_bs_user_op_t *op;
    2649             : 
    2650         820 :         TAILQ_INIT(&requests);
    2651         820 :         TAILQ_SWAP(&set->channel->need_cluster_alloc, &requests, spdk_bs_request_set, link);
    2652             : 
    2653        1640 :         while (!TAILQ_EMPTY(&requests)) {
    2654         820 :                 op = TAILQ_FIRST(&requests);
    2655         820 :                 TAILQ_REMOVE(&requests, op, link);
    2656         820 :                 if (bserrno == 0) {
    2657         820 :                         bs_user_op_execute(op);
    2658             :                 } else {
    2659           0 :                         bs_user_op_abort(op, bserrno);
    2660             :                 }
    2661             :         }
    2662             : 
    2663         820 :         spdk_free(ctx->buf);
    2664         820 :         free(ctx);
    2665         820 : }
    2666             : 
    2667             : static void
    2668          60 : blob_free_cluster_cpl(void *cb_arg, int bserrno)
    2669             : {
    2670          60 :         struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
    2671          60 :         spdk_bs_sequence_t *seq = ctx->seq;
    2672             : 
    2673          60 :         bs_sequence_finish(seq, bserrno);
    2674             : 
    2675          60 :         free(ctx);
    2676          60 : }
    2677             : 
    2678             : static void
    2679           4 : blob_insert_cluster_revert(struct spdk_blob_copy_cluster_ctx *ctx)
    2680             : {
    2681           4 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    2682           4 :         bs_release_cluster(ctx->blob->bs, ctx->new_cluster);
    2683           4 :         if (ctx->new_extent_page != 0) {
    2684           2 :                 bs_release_md_page(ctx->blob->bs, ctx->new_extent_page);
    2685             :         }
    2686           4 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    2687           4 : }
    2688             : 
    2689             : static void
    2690           4 : blob_insert_cluster_clear_cpl(void *cb_arg, int bserrno)
    2691             : {
    2692           4 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2693             : 
    2694           4 :         if (bserrno) {
    2695           0 :                 SPDK_WARNLOG("Failed to clear cluster: %d\n", bserrno);
    2696             :         }
    2697             : 
    2698           4 :         blob_insert_cluster_revert(ctx);
    2699           4 :         bs_sequence_finish(ctx->seq, bserrno);
    2700           4 : }
    2701             : 
    2702             : static void
    2703           4 : blob_insert_cluster_clear(struct spdk_blob_copy_cluster_ctx *ctx)
    2704             : {
    2705           4 :         struct spdk_bs_cpl cpl;
    2706             :         spdk_bs_batch_t *batch;
    2707           4 :         struct spdk_io_channel *ch = spdk_io_channel_from_ctx(ctx->seq->channel);
    2708             : 
    2709             :         /*
    2710             :          * We allocated a cluster and we copied data to it. But now, we realized that we don't need
    2711             :          * this cluster and we want to release it. We must ensure that we clear the data on this
    2712             :          * cluster.
    2713             :          * The cluster may later be re-allocated by a thick-provisioned blob for example. When
    2714             :          * reading from this thick-provisioned blob before writing data, we should read zeroes.
    2715             :          */
    2716             : 
    2717           4 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    2718           4 :         cpl.u.blob_basic.cb_fn = blob_insert_cluster_clear_cpl;
    2719           4 :         cpl.u.blob_basic.cb_arg = ctx;
    2720             : 
    2721           4 :         batch = bs_batch_open(ch, &cpl, ctx->blob);
    2722           4 :         if (!batch) {
    2723           0 :                 blob_insert_cluster_clear_cpl(ctx, -ENOMEM);
    2724           0 :                 return;
    2725             :         }
    2726             : 
    2727           4 :         bs_batch_clear_dev(ctx->blob, batch, bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
    2728           4 :                            bs_cluster_to_lba(ctx->blob->bs, 1));
    2729           4 :         bs_batch_close(batch);
    2730             : }
    2731             : 
    2732             : static void
    2733         820 : blob_insert_cluster_cpl(void *cb_arg, int bserrno)
    2734             : {
    2735         820 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2736             : 
    2737         820 :         if (bserrno) {
    2738           4 :                 if (bserrno == -EEXIST) {
    2739             :                         /* The metadata insert failed because another thread
    2740             :                          * allocated the cluster first. Clear and free our cluster
    2741             :                          * but continue without error. */
    2742           4 :                         blob_insert_cluster_clear(ctx);
    2743           4 :                         return;
    2744             :                 }
    2745             : 
    2746           0 :                 blob_insert_cluster_revert(ctx);
    2747             :         }
    2748             : 
    2749         816 :         bs_sequence_finish(ctx->seq, bserrno);
    2750             : }
    2751             : 
    2752             : static void
    2753         412 : blob_write_copy_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2754             : {
    2755         412 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2756             :         uint32_t cluster_number;
    2757             : 
    2758         412 :         if (bserrno) {
    2759             :                 /* The write failed, so jump to the final completion handler */
    2760           0 :                 bs_sequence_finish(seq, bserrno);
    2761           0 :                 return;
    2762             :         }
    2763             : 
    2764         412 :         cluster_number = bs_page_to_cluster(ctx->blob->bs, ctx->page);
    2765             : 
    2766         412 :         blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
    2767             :                                          ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
    2768             : }
    2769             : 
    2770             : static void
    2771         282 : blob_write_copy(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2772             : {
    2773         282 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2774             : 
    2775         282 :         if (bserrno != 0) {
    2776             :                 /* The read failed, so jump to the final completion handler */
    2777           0 :                 bs_sequence_finish(seq, bserrno);
    2778           0 :                 return;
    2779             :         }
    2780             : 
    2781             :         /* Write whole cluster */
    2782         282 :         bs_sequence_write_dev(seq, ctx->buf,
    2783         282 :                               bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
    2784         282 :                               bs_cluster_to_lba(ctx->blob->bs, 1),
    2785             :                               blob_write_copy_cpl, ctx);
    2786             : }
    2787             : 
    2788             : static bool
    2789         804 : blob_can_copy(struct spdk_blob *blob, uint64_t cluster_start_page, uint64_t *base_lba)
    2790             : {
    2791         804 :         uint64_t lba = bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page);
    2792             : 
    2793        1158 :         return (!blob_is_esnap_clone(blob) && blob->bs->dev->copy != NULL) &&
    2794         354 :                blob->back_bs_dev->translate_lba(blob->back_bs_dev, lba, base_lba);
    2795             : }
    2796             : 
    2797             : static void
    2798         130 : blob_copy(struct spdk_blob_copy_cluster_ctx *ctx, spdk_bs_user_op_t *op, uint64_t src_lba)
    2799             : {
    2800         130 :         struct spdk_blob *blob = ctx->blob;
    2801         130 :         uint64_t lba_count = bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz);
    2802             : 
    2803         130 :         bs_sequence_copy_dev(ctx->seq,
    2804         130 :                              bs_cluster_to_lba(blob->bs, ctx->new_cluster),
    2805             :                              src_lba,
    2806             :                              lba_count,
    2807             :                              blob_write_copy_cpl, ctx);
    2808         130 : }
    2809             : 
    2810             : static void
    2811         820 : bs_allocate_and_copy_cluster(struct spdk_blob *blob,
    2812             :                              struct spdk_io_channel *_ch,
    2813             :                              uint64_t io_unit, spdk_bs_user_op_t *op)
    2814             : {
    2815         820 :         struct spdk_bs_cpl cpl;
    2816             :         struct spdk_bs_channel *ch;
    2817             :         struct spdk_blob_copy_cluster_ctx *ctx;
    2818             :         uint64_t cluster_start_page;
    2819             :         uint32_t cluster_number;
    2820             :         bool is_zeroes;
    2821             :         bool can_copy;
    2822             :         bool is_valid_range;
    2823         820 :         uint64_t copy_src_lba;
    2824             :         int rc;
    2825             : 
    2826         820 :         ch = spdk_io_channel_get_ctx(_ch);
    2827             : 
    2828         820 :         if (!TAILQ_EMPTY(&ch->need_cluster_alloc)) {
    2829             :                 /* There are already operations pending. Queue this user op
    2830             :                  * and return because it will be re-executed when the outstanding
    2831             :                  * cluster allocation completes. */
    2832           0 :                 TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
    2833           0 :                 return;
    2834             :         }
    2835             : 
    2836             :         /* Round the io_unit offset down to the first page in the cluster */
    2837         820 :         cluster_start_page = bs_io_unit_to_cluster_start(blob, io_unit);
    2838             : 
    2839             :         /* Calculate which index in the metadata cluster array the corresponding
    2840             :          * cluster is supposed to be at. */
    2841         820 :         cluster_number = bs_io_unit_to_cluster_number(blob, io_unit);
    2842             : 
    2843         820 :         ctx = calloc(1, sizeof(*ctx));
    2844         820 :         if (!ctx) {
    2845           0 :                 bs_user_op_abort(op, -ENOMEM);
    2846           0 :                 return;
    2847             :         }
    2848             : 
    2849         820 :         assert(blob->bs->cluster_sz % blob->back_bs_dev->blocklen == 0);
    2850             : 
    2851         820 :         ctx->blob = blob;
    2852         820 :         ctx->page = cluster_start_page;
    2853         820 :         ctx->new_cluster_page = ch->new_cluster_page;
    2854         820 :         memset(ctx->new_cluster_page, 0, SPDK_BS_PAGE_SIZE);
    2855             : 
    2856             :         /* Check if the cluster that we intend to do CoW for is valid for
    2857             :          * the backing dev. For zeroes backing dev, it'll be always valid.
    2858             :          * For other backing dev e.g. a snapshot, it could be invalid if
    2859             :          * the blob has been resized after snapshot was taken. */
    2860        1640 :         is_valid_range = blob->back_bs_dev->is_range_valid(blob->back_bs_dev,
    2861             :                          bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
    2862         820 :                          bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
    2863             : 
    2864         820 :         can_copy = is_valid_range && blob_can_copy(blob, cluster_start_page, &copy_src_lba);
    2865             : 
    2866        1624 :         is_zeroes = is_valid_range && blob->back_bs_dev->is_zeroes(blob->back_bs_dev,
    2867             :                         bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
    2868         804 :                         bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
    2869         820 :         if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes && !can_copy) {
    2870         282 :                 ctx->buf = spdk_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen,
    2871             :                                        NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    2872         282 :                 if (!ctx->buf) {
    2873           0 :                         SPDK_ERRLOG("DMA allocation for cluster of size = %" PRIu32 " failed.\n",
    2874             :                                     blob->bs->cluster_sz);
    2875           0 :                         free(ctx);
    2876           0 :                         bs_user_op_abort(op, -ENOMEM);
    2877           0 :                         return;
    2878             :                 }
    2879             :         }
    2880             : 
    2881         820 :         spdk_spin_lock(&blob->bs->used_lock);
    2882         820 :         rc = bs_allocate_cluster(blob, cluster_number, &ctx->new_cluster, &ctx->new_extent_page,
    2883             :                                  false);
    2884         820 :         spdk_spin_unlock(&blob->bs->used_lock);
    2885         820 :         if (rc != 0) {
    2886           0 :                 spdk_free(ctx->buf);
    2887           0 :                 free(ctx);
    2888           0 :                 bs_user_op_abort(op, rc);
    2889           0 :                 return;
    2890             :         }
    2891             : 
    2892         820 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    2893         820 :         cpl.u.blob_basic.cb_fn = blob_allocate_and_copy_cluster_cpl;
    2894         820 :         cpl.u.blob_basic.cb_arg = ctx;
    2895             : 
    2896         820 :         ctx->seq = bs_sequence_start_blob(_ch, &cpl, blob);
    2897         820 :         if (!ctx->seq) {
    2898           0 :                 spdk_spin_lock(&blob->bs->used_lock);
    2899           0 :                 bs_release_cluster(blob->bs, ctx->new_cluster);
    2900           0 :                 spdk_spin_unlock(&blob->bs->used_lock);
    2901           0 :                 spdk_free(ctx->buf);
    2902           0 :                 free(ctx);
    2903           0 :                 bs_user_op_abort(op, -ENOMEM);
    2904           0 :                 return;
    2905             :         }
    2906             : 
    2907             :         /* Queue the user op to block other incoming operations */
    2908         820 :         TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
    2909             : 
    2910         820 :         if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes) {
    2911         412 :                 if (can_copy) {
    2912         130 :                         blob_copy(ctx, op, copy_src_lba);
    2913             :                 } else {
    2914             :                         /* Read cluster from backing device */
    2915         282 :                         bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf,
    2916             :                                                 bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
    2917         282 :                                                 bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz),
    2918             :                                                 blob_write_copy, ctx);
    2919             :                 }
    2920             : 
    2921             :         } else {
    2922         408 :                 blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
    2923             :                                                  ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
    2924             :         }
    2925             : }
    2926             : 
    2927             : static inline bool
    2928       40232 : blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint64_t length,
    2929             :                                  uint64_t *lba, uint64_t *lba_count)
    2930             : {
    2931       40232 :         *lba_count = length;
    2932             : 
    2933       40232 :         if (!bs_io_unit_is_allocated(blob, io_unit)) {
    2934        3000 :                 assert(blob->back_bs_dev != NULL);
    2935        3000 :                 *lba = bs_io_unit_to_back_dev_lba(blob, io_unit);
    2936        3000 :                 *lba_count = bs_io_unit_to_back_dev_lba(blob, *lba_count);
    2937        3000 :                 return false;
    2938             :         } else {
    2939       37232 :                 *lba = bs_blob_io_unit_to_lba(blob, io_unit);
    2940       37232 :                 return true;
    2941             :         }
    2942             : }
    2943             : 
    2944             : struct op_split_ctx {
    2945             :         struct spdk_blob *blob;
    2946             :         struct spdk_io_channel *channel;
    2947             :         uint64_t io_unit_offset;
    2948             :         uint64_t io_units_remaining;
    2949             :         void *curr_payload;
    2950             :         enum spdk_blob_op_type op_type;
    2951             :         spdk_bs_sequence_t *seq;
    2952             :         bool in_submit_ctx;
    2953             :         bool completed_in_submit_ctx;
    2954             :         bool done;
    2955             : };
    2956             : 
    2957             : static void
    2958         774 : blob_request_submit_op_split_next(void *cb_arg, int bserrno)
    2959             : {
    2960         774 :         struct op_split_ctx     *ctx = cb_arg;
    2961         774 :         struct spdk_blob        *blob = ctx->blob;
    2962         774 :         struct spdk_io_channel  *ch = ctx->channel;
    2963         774 :         enum spdk_blob_op_type  op_type = ctx->op_type;
    2964             :         uint8_t                 *buf;
    2965             :         uint64_t                offset;
    2966             :         uint64_t                length;
    2967             :         uint64_t                op_length;
    2968             : 
    2969         774 :         if (bserrno != 0 || ctx->io_units_remaining == 0) {
    2970         178 :                 bs_sequence_finish(ctx->seq, bserrno);
    2971         178 :                 if (ctx->in_submit_ctx) {
    2972             :                         /* Defer freeing of the ctx object, since it will be
    2973             :                          * accessed when this unwinds back to the submission
    2974             :                          * context.
    2975             :                          */
    2976          40 :                         ctx->done = true;
    2977             :                 } else {
    2978         138 :                         free(ctx);
    2979             :                 }
    2980         178 :                 return;
    2981             :         }
    2982             : 
    2983         596 :         if (ctx->in_submit_ctx) {
    2984             :                 /* If this split operation completed in the context
    2985             :                  * of its submission, mark the flag and return immediately
    2986             :                  * to avoid recursion.
    2987             :                  */
    2988          68 :                 ctx->completed_in_submit_ctx = true;
    2989          68 :                 return;
    2990             :         }
    2991             : 
    2992             :         while (true) {
    2993         596 :                 ctx->completed_in_submit_ctx = false;
    2994             : 
    2995         596 :                 offset = ctx->io_unit_offset;
    2996         596 :                 length = ctx->io_units_remaining;
    2997         596 :                 buf = ctx->curr_payload;
    2998         596 :                 op_length = spdk_min(length, bs_num_io_units_to_cluster_boundary(blob,
    2999             :                                      offset));
    3000             : 
    3001             :                 /* Update length and payload for next operation */
    3002         596 :                 ctx->io_units_remaining -= op_length;
    3003         596 :                 ctx->io_unit_offset += op_length;
    3004         596 :                 if (op_type == SPDK_BLOB_WRITE || op_type == SPDK_BLOB_READ) {
    3005         528 :                         ctx->curr_payload += op_length * blob->bs->io_unit_size;
    3006             :                 }
    3007             : 
    3008         596 :                 assert(!ctx->in_submit_ctx);
    3009         596 :                 ctx->in_submit_ctx = true;
    3010             : 
    3011         596 :                 switch (op_type) {
    3012         418 :                 case SPDK_BLOB_READ:
    3013         418 :                         spdk_blob_io_read(blob, ch, buf, offset, op_length,
    3014             :                                           blob_request_submit_op_split_next, ctx);
    3015         418 :                         break;
    3016         110 :                 case SPDK_BLOB_WRITE:
    3017         110 :                         spdk_blob_io_write(blob, ch, buf, offset, op_length,
    3018             :                                            blob_request_submit_op_split_next, ctx);
    3019         110 :                         break;
    3020          36 :                 case SPDK_BLOB_UNMAP:
    3021          36 :                         spdk_blob_io_unmap(blob, ch, offset, op_length,
    3022             :                                            blob_request_submit_op_split_next, ctx);
    3023          36 :                         break;
    3024          32 :                 case SPDK_BLOB_WRITE_ZEROES:
    3025          32 :                         spdk_blob_io_write_zeroes(blob, ch, offset, op_length,
    3026             :                                                   blob_request_submit_op_split_next, ctx);
    3027          32 :                         break;
    3028           0 :                 case SPDK_BLOB_READV:
    3029             :                 case SPDK_BLOB_WRITEV:
    3030           0 :                         SPDK_ERRLOG("readv/write not valid\n");
    3031           0 :                         bs_sequence_finish(ctx->seq, -EINVAL);
    3032           0 :                         free(ctx);
    3033           0 :                         return;
    3034             :                 }
    3035             : 
    3036             : #ifndef __clang_analyzer__
    3037             :                 /* scan-build reports a false positive around accessing the ctx here. It
    3038             :                  * forms a path that recursively calls this function, but then says
    3039             :                  * "assuming ctx->in_submit_ctx is false", when that isn't possible.
    3040             :                  * This path does free(ctx), returns to here, and reports a use-after-free
    3041             :                  * bug.  Wrapping this bit of code so that scan-build doesn't see it
    3042             :                  * works around the scan-build bug.
    3043             :                  */
    3044         596 :                 assert(ctx->in_submit_ctx);
    3045         596 :                 ctx->in_submit_ctx = false;
    3046             : 
    3047             :                 /* If the operation completed immediately, loop back and submit the
    3048             :                  * next operation.  Otherwise we can return and the next split
    3049             :                  * operation will get submitted when this current operation is
    3050             :                  * later completed asynchronously.
    3051             :                  */
    3052         596 :                 if (ctx->completed_in_submit_ctx) {
    3053          68 :                         continue;
    3054         528 :                 } else if (ctx->done) {
    3055          40 :                         free(ctx);
    3056             :                 }
    3057             : #endif
    3058         528 :                 break;
    3059             :         }
    3060             : }
    3061             : 
    3062             : static void
    3063         178 : blob_request_submit_op_split(struct spdk_io_channel *ch, struct spdk_blob *blob,
    3064             :                              void *payload, uint64_t offset, uint64_t length,
    3065             :                              spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3066             : {
    3067             :         struct op_split_ctx *ctx;
    3068             :         spdk_bs_sequence_t *seq;
    3069         178 :         struct spdk_bs_cpl cpl;
    3070             : 
    3071         178 :         assert(blob != NULL);
    3072             : 
    3073         178 :         ctx = calloc(1, sizeof(struct op_split_ctx));
    3074         178 :         if (ctx == NULL) {
    3075           0 :                 cb_fn(cb_arg, -ENOMEM);
    3076           0 :                 return;
    3077             :         }
    3078             : 
    3079         178 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3080         178 :         cpl.u.blob_basic.cb_fn = cb_fn;
    3081         178 :         cpl.u.blob_basic.cb_arg = cb_arg;
    3082             : 
    3083         178 :         seq = bs_sequence_start_blob(ch, &cpl, blob);
    3084         178 :         if (!seq) {
    3085           0 :                 free(ctx);
    3086           0 :                 cb_fn(cb_arg, -ENOMEM);
    3087           0 :                 return;
    3088             :         }
    3089             : 
    3090         178 :         ctx->blob = blob;
    3091         178 :         ctx->channel = ch;
    3092         178 :         ctx->curr_payload = payload;
    3093         178 :         ctx->io_unit_offset = offset;
    3094         178 :         ctx->io_units_remaining = length;
    3095         178 :         ctx->op_type = op_type;
    3096         178 :         ctx->seq = seq;
    3097             : 
    3098         178 :         blob_request_submit_op_split_next(ctx, 0);
    3099             : }
    3100             : 
    3101             : static void
    3102          60 : spdk_free_cluster_unmap_complete(void *cb_arg, int bserrno)
    3103             : {
    3104          60 :         struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
    3105             : 
    3106          60 :         if (bserrno) {
    3107           0 :                 bs_sequence_finish(ctx->seq, bserrno);
    3108           0 :                 free(ctx);
    3109           0 :                 return;
    3110             :         }
    3111             : 
    3112          60 :         blob_free_cluster_on_md_thread(ctx->blob, ctx->cluster_num,
    3113             :                                        ctx->extent_page, ctx->md_page, blob_free_cluster_cpl, ctx);
    3114             : }
    3115             : 
    3116             : static void
    3117       37860 : blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blob,
    3118             :                               void *payload, uint64_t offset, uint64_t length,
    3119             :                               spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3120             : {
    3121       37860 :         struct spdk_bs_cpl cpl;
    3122       37860 :         uint64_t lba;
    3123       37860 :         uint64_t lba_count;
    3124             :         bool is_allocated;
    3125             : 
    3126       37860 :         assert(blob != NULL);
    3127             : 
    3128       37860 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3129       37860 :         cpl.u.blob_basic.cb_fn = cb_fn;
    3130       37860 :         cpl.u.blob_basic.cb_arg = cb_arg;
    3131             : 
    3132       37860 :         if (blob->frozen_refcnt) {
    3133             :                 /* This blob I/O is frozen */
    3134             :                 spdk_bs_user_op_t *op;
    3135           4 :                 struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
    3136             : 
    3137           4 :                 op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
    3138           4 :                 if (!op) {
    3139           0 :                         cb_fn(cb_arg, -ENOMEM);
    3140           0 :                         return;
    3141             :                 }
    3142             : 
    3143           4 :                 TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
    3144             : 
    3145           4 :                 return;
    3146             :         }
    3147             : 
    3148       37856 :         is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
    3149             : 
    3150       37856 :         switch (op_type) {
    3151       16893 :         case SPDK_BLOB_READ: {
    3152             :                 spdk_bs_batch_t *batch;
    3153             : 
    3154       16893 :                 batch = bs_batch_open(_ch, &cpl, blob);
    3155       16893 :                 if (!batch) {
    3156           0 :                         cb_fn(cb_arg, -ENOMEM);
    3157           0 :                         return;
    3158             :                 }
    3159             : 
    3160       16893 :                 if (is_allocated) {
    3161             :                         /* Read from the blob */
    3162       15805 :                         bs_batch_read_dev(batch, payload, lba, lba_count);
    3163             :                 } else {
    3164             :                         /* Read from the backing block device */
    3165        1088 :                         bs_batch_read_bs_dev(batch, blob->back_bs_dev, payload, lba, lba_count);
    3166             :                 }
    3167             : 
    3168       16893 :                 bs_batch_close(batch);
    3169       16893 :                 break;
    3170             :         }
    3171       20867 :         case SPDK_BLOB_WRITE:
    3172             :         case SPDK_BLOB_WRITE_ZEROES: {
    3173       20867 :                 if (is_allocated) {
    3174             :                         /* Write to the blob */
    3175             :                         spdk_bs_batch_t *batch;
    3176             : 
    3177       20515 :                         if (lba_count == 0) {
    3178           0 :                                 cb_fn(cb_arg, 0);
    3179           0 :                                 return;
    3180             :                         }
    3181             : 
    3182       20515 :                         batch = bs_batch_open(_ch, &cpl, blob);
    3183       20515 :                         if (!batch) {
    3184           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3185           0 :                                 return;
    3186             :                         }
    3187             : 
    3188       20515 :                         if (op_type == SPDK_BLOB_WRITE) {
    3189       20483 :                                 bs_batch_write_dev(batch, payload, lba, lba_count);
    3190             :                         } else {
    3191          32 :                                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    3192             :                         }
    3193             : 
    3194       20515 :                         bs_batch_close(batch);
    3195             :                 } else {
    3196             :                         /* Queue this operation and allocate the cluster */
    3197             :                         spdk_bs_user_op_t *op;
    3198             : 
    3199         352 :                         op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
    3200         352 :                         if (!op) {
    3201           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3202           0 :                                 return;
    3203             :                         }
    3204             : 
    3205         352 :                         bs_allocate_and_copy_cluster(blob, _ch, offset, op);
    3206             :                 }
    3207       20867 :                 break;
    3208             :         }
    3209          96 :         case SPDK_BLOB_UNMAP: {
    3210          96 :                 struct spdk_blob_free_cluster_ctx *ctx = NULL;
    3211             :                 spdk_bs_batch_t *batch;
    3212             : 
    3213             :                 /* if aligned with cluster release cluster */
    3214         168 :                 if (spdk_blob_is_thin_provisioned(blob) && is_allocated &&
    3215         140 :                     blob_backed_with_zeroes_dev(blob) &&
    3216          68 :                     bs_io_units_per_cluster(blob) == length) {
    3217          60 :                         struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
    3218             :                         uint64_t cluster_start_page;
    3219             :                         uint32_t cluster_number;
    3220             : 
    3221          60 :                         assert(offset % bs_io_units_per_cluster(blob) == 0);
    3222             : 
    3223             :                         /* Round the io_unit offset down to the first page in the cluster */
    3224          60 :                         cluster_start_page = bs_io_unit_to_cluster_start(blob, offset);
    3225             : 
    3226             :                         /* Calculate which index in the metadata cluster array the corresponding
    3227             :                          * cluster is supposed to be at. */
    3228          60 :                         cluster_number = bs_io_unit_to_cluster_number(blob, offset);
    3229             : 
    3230          60 :                         ctx = calloc(1, sizeof(*ctx));
    3231          60 :                         if (!ctx) {
    3232           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3233           0 :                                 return;
    3234             :                         }
    3235             :                         /* When freeing a cluster the flow should be (in order):
    3236             :                          * 1. Unmap the underlying area (so if the cluster is reclaimed in the future, it won't leak
    3237             :                          * old data)
    3238             :                          * 2. Once the unmap completes (to avoid any races with incoming writes that may claim the
    3239             :                          * cluster), update and sync metadata freeing the cluster
    3240             :                          * 3. Once metadata update is done, complete the user unmap request
    3241             :                          */
    3242          60 :                         ctx->blob = blob;
    3243          60 :                         ctx->page = cluster_start_page;
    3244          60 :                         ctx->cluster_num = cluster_number;
    3245          60 :                         ctx->md_page = bs_channel->new_cluster_page;
    3246          60 :                         ctx->seq = bs_sequence_start_bs(_ch, &cpl);
    3247          60 :                         if (!ctx->seq) {
    3248           0 :                                 free(ctx);
    3249           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3250           0 :                                 return;
    3251             :                         }
    3252             : 
    3253          60 :                         if (blob->use_extent_table) {
    3254          30 :                                 ctx->extent_page = *bs_cluster_to_extent_page(blob, cluster_number);
    3255             :                         }
    3256             : 
    3257          60 :                         cpl.u.blob_basic.cb_fn = spdk_free_cluster_unmap_complete;
    3258          60 :                         cpl.u.blob_basic.cb_arg = ctx;
    3259             :                 }
    3260             : 
    3261          96 :                 batch = bs_batch_open(_ch, &cpl, blob);
    3262          96 :                 if (!batch) {
    3263           0 :                         free(ctx);
    3264           0 :                         cb_fn(cb_arg, -ENOMEM);
    3265           0 :                         return;
    3266             :                 }
    3267             : 
    3268          96 :                 if (is_allocated) {
    3269          96 :                         bs_batch_unmap_dev(batch, lba, lba_count);
    3270             :                 }
    3271             : 
    3272          96 :                 bs_batch_close(batch);
    3273          96 :                 break;
    3274             :         }
    3275           0 :         case SPDK_BLOB_READV:
    3276             :         case SPDK_BLOB_WRITEV:
    3277           0 :                 SPDK_ERRLOG("readv/write not valid\n");
    3278           0 :                 cb_fn(cb_arg, -EINVAL);
    3279           0 :                 break;
    3280             :         }
    3281             : }
    3282             : 
    3283             : static void
    3284       38550 : blob_request_submit_op(struct spdk_blob *blob, struct spdk_io_channel *_channel,
    3285             :                        void *payload, uint64_t offset, uint64_t length,
    3286             :                        spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3287             : {
    3288       38550 :         assert(blob != NULL);
    3289             : 
    3290       38550 :         if (blob->data_ro && op_type != SPDK_BLOB_READ) {
    3291           4 :                 cb_fn(cb_arg, -EPERM);
    3292           4 :                 return;
    3293             :         }
    3294             : 
    3295       38546 :         if (length == 0) {
    3296         492 :                 cb_fn(cb_arg, 0);
    3297         492 :                 return;
    3298             :         }
    3299             : 
    3300       38054 :         if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
    3301          24 :                 cb_fn(cb_arg, -EINVAL);
    3302          24 :                 return;
    3303             :         }
    3304       38030 :         if (length <= bs_num_io_units_to_cluster_boundary(blob, offset)) {
    3305       37852 :                 blob_request_submit_op_single(_channel, blob, payload, offset, length,
    3306             :                                               cb_fn, cb_arg, op_type);
    3307             :         } else {
    3308         178 :                 blob_request_submit_op_split(_channel, blob, payload, offset, length,
    3309             :                                              cb_fn, cb_arg, op_type);
    3310             :         }
    3311             : }
    3312             : 
    3313             : struct rw_iov_ctx {
    3314             :         struct spdk_blob *blob;
    3315             :         struct spdk_io_channel *channel;
    3316             :         spdk_blob_op_complete cb_fn;
    3317             :         void *cb_arg;
    3318             :         bool read;
    3319             :         int iovcnt;
    3320             :         struct iovec *orig_iov;
    3321             :         uint64_t io_unit_offset;
    3322             :         uint64_t io_units_remaining;
    3323             :         uint64_t io_units_done;
    3324             :         struct spdk_blob_ext_io_opts *ext_io_opts;
    3325             :         struct iovec iov[0];
    3326             : };
    3327             : 
    3328             : static void
    3329        2360 : rw_iov_done(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    3330             : {
    3331        2360 :         assert(cb_arg == NULL);
    3332        2360 :         bs_sequence_finish(seq, bserrno);
    3333        2360 : }
    3334             : 
    3335             : static void
    3336         744 : rw_iov_split_next(void *cb_arg, int bserrno)
    3337             : {
    3338         744 :         struct rw_iov_ctx *ctx = cb_arg;
    3339         744 :         struct spdk_blob *blob = ctx->blob;
    3340             :         struct iovec *iov, *orig_iov;
    3341             :         int iovcnt;
    3342             :         size_t orig_iovoff;
    3343             :         uint64_t io_units_count, io_units_to_boundary, io_unit_offset;
    3344             :         uint64_t byte_count;
    3345             : 
    3346         744 :         if (bserrno != 0 || ctx->io_units_remaining == 0) {
    3347         204 :                 ctx->cb_fn(ctx->cb_arg, bserrno);
    3348         204 :                 free(ctx);
    3349         204 :                 return;
    3350             :         }
    3351             : 
    3352         540 :         io_unit_offset = ctx->io_unit_offset;
    3353         540 :         io_units_to_boundary = bs_num_io_units_to_cluster_boundary(blob, io_unit_offset);
    3354         540 :         io_units_count = spdk_min(ctx->io_units_remaining, io_units_to_boundary);
    3355             :         /*
    3356             :          * Get index and offset into the original iov array for our current position in the I/O sequence.
    3357             :          *  byte_count will keep track of how many bytes remaining until orig_iov and orig_iovoff will
    3358             :          *  point to the current position in the I/O sequence.
    3359             :          */
    3360         540 :         byte_count = ctx->io_units_done * blob->bs->io_unit_size;
    3361         540 :         orig_iov = &ctx->orig_iov[0];
    3362         540 :         orig_iovoff = 0;
    3363        1148 :         while (byte_count > 0) {
    3364         608 :                 if (byte_count >= orig_iov->iov_len) {
    3365         352 :                         byte_count -= orig_iov->iov_len;
    3366         352 :                         orig_iov++;
    3367             :                 } else {
    3368         256 :                         orig_iovoff = byte_count;
    3369         256 :                         byte_count = 0;
    3370             :                 }
    3371             :         }
    3372             : 
    3373             :         /*
    3374             :          * Build an iov array for the next I/O in the sequence.  byte_count will keep track of how many
    3375             :          *  bytes of this next I/O remain to be accounted for in the new iov array.
    3376             :          */
    3377         540 :         byte_count = io_units_count * blob->bs->io_unit_size;
    3378         540 :         iov = &ctx->iov[0];
    3379         540 :         iovcnt = 0;
    3380        1380 :         while (byte_count > 0) {
    3381         840 :                 assert(iovcnt < ctx->iovcnt);
    3382         840 :                 iov->iov_len = spdk_min(byte_count, orig_iov->iov_len - orig_iovoff);
    3383         840 :                 iov->iov_base = orig_iov->iov_base + orig_iovoff;
    3384         840 :                 byte_count -= iov->iov_len;
    3385         840 :                 orig_iovoff = 0;
    3386         840 :                 orig_iov++;
    3387         840 :                 iov++;
    3388         840 :                 iovcnt++;
    3389             :         }
    3390             : 
    3391         540 :         ctx->io_unit_offset += io_units_count;
    3392         540 :         ctx->io_units_remaining -= io_units_count;
    3393         540 :         ctx->io_units_done += io_units_count;
    3394         540 :         iov = &ctx->iov[0];
    3395             : 
    3396         540 :         if (ctx->read) {
    3397         408 :                 spdk_blob_io_readv_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
    3398             :                                        io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
    3399             :         } else {
    3400         132 :                 spdk_blob_io_writev_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
    3401             :                                         io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
    3402             :         }
    3403             : }
    3404             : 
    3405             : static void
    3406        2588 : blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_channel,
    3407             :                            struct iovec *iov, int iovcnt,
    3408             :                            uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg, bool read,
    3409             :                            struct spdk_blob_ext_io_opts *ext_io_opts)
    3410             : {
    3411        2588 :         struct spdk_bs_cpl      cpl;
    3412             : 
    3413        2588 :         assert(blob != NULL);
    3414             : 
    3415        2588 :         if (!read && blob->data_ro) {
    3416           4 :                 cb_fn(cb_arg, -EPERM);
    3417           4 :                 return;
    3418             :         }
    3419             : 
    3420        2584 :         if (length == 0) {
    3421           0 :                 cb_fn(cb_arg, 0);
    3422           0 :                 return;
    3423             :         }
    3424             : 
    3425        2584 :         if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
    3426           0 :                 cb_fn(cb_arg, -EINVAL);
    3427           0 :                 return;
    3428             :         }
    3429             : 
    3430             :         /*
    3431             :          * For now, we implement readv/writev using a sequence (instead of a batch) to account for having
    3432             :          *  to split a request that spans a cluster boundary.  For I/O that do not span a cluster boundary,
    3433             :          *  there will be no noticeable difference compared to using a batch.  For I/O that do span a cluster
    3434             :          *  boundary, the target LBAs (after blob offset to LBA translation) may not be contiguous, so we need
    3435             :          *  to allocate a separate iov array and split the I/O such that none of the resulting
    3436             :          *  smaller I/O cross a cluster boundary.  These smaller I/O will be issued in sequence (not in parallel)
    3437             :          *  but since this case happens very infrequently, any performance impact will be negligible.
    3438             :          *
    3439             :          * This could be optimized in the future to allocate a big enough iov array to account for all of the iovs
    3440             :          *  for all of the smaller I/Os, pre-build all of the iov arrays for the smaller I/Os, then issue them
    3441             :          *  in a batch.  That would also require creating an intermediate spdk_bs_cpl that would get called
    3442             :          *  when the batch was completed, to allow for freeing the memory for the iov arrays.
    3443             :          */
    3444        2584 :         if (spdk_likely(length <= bs_num_io_units_to_cluster_boundary(blob, offset))) {
    3445        2376 :                 uint64_t lba_count;
    3446        2376 :                 uint64_t lba;
    3447             :                 bool is_allocated;
    3448             : 
    3449        2376 :                 cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3450        2376 :                 cpl.u.blob_basic.cb_fn = cb_fn;
    3451        2376 :                 cpl.u.blob_basic.cb_arg = cb_arg;
    3452             : 
    3453        2376 :                 if (blob->frozen_refcnt) {
    3454             :                         /* This blob I/O is frozen */
    3455             :                         enum spdk_blob_op_type op_type;
    3456             :                         spdk_bs_user_op_t *op;
    3457           0 :                         struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_channel);
    3458             : 
    3459           0 :                         op_type = read ? SPDK_BLOB_READV : SPDK_BLOB_WRITEV;
    3460           0 :                         op = bs_user_op_alloc(_channel, &cpl, op_type, blob, iov, iovcnt, offset, length);
    3461           0 :                         if (!op) {
    3462           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3463           0 :                                 return;
    3464             :                         }
    3465             : 
    3466           0 :                         TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
    3467             : 
    3468           0 :                         return;
    3469             :                 }
    3470             : 
    3471        2376 :                 is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
    3472             : 
    3473        2376 :                 if (read) {
    3474             :                         spdk_bs_sequence_t *seq;
    3475             : 
    3476        2084 :                         seq = bs_sequence_start_blob(_channel, &cpl, blob);
    3477        2084 :                         if (!seq) {
    3478           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3479           0 :                                 return;
    3480             :                         }
    3481             : 
    3482        2084 :                         seq->ext_io_opts = ext_io_opts;
    3483             : 
    3484        2084 :                         if (is_allocated) {
    3485         540 :                                 bs_sequence_readv_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
    3486             :                         } else {
    3487        1544 :                                 bs_sequence_readv_bs_dev(seq, blob->back_bs_dev, iov, iovcnt, lba, lba_count,
    3488             :                                                          rw_iov_done, NULL);
    3489             :                         }
    3490             :                 } else {
    3491         292 :                         if (is_allocated) {
    3492             :                                 spdk_bs_sequence_t *seq;
    3493             : 
    3494         276 :                                 seq = bs_sequence_start_blob(_channel, &cpl, blob);
    3495         276 :                                 if (!seq) {
    3496           0 :                                         cb_fn(cb_arg, -ENOMEM);
    3497           0 :                                         return;
    3498             :                                 }
    3499             : 
    3500         276 :                                 seq->ext_io_opts = ext_io_opts;
    3501             : 
    3502         276 :                                 bs_sequence_writev_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
    3503             :                         } else {
    3504             :                                 /* Queue this operation and allocate the cluster */
    3505             :                                 spdk_bs_user_op_t *op;
    3506             : 
    3507          16 :                                 op = bs_user_op_alloc(_channel, &cpl, SPDK_BLOB_WRITEV, blob, iov, iovcnt, offset,
    3508             :                                                       length);
    3509          16 :                                 if (!op) {
    3510           0 :                                         cb_fn(cb_arg, -ENOMEM);
    3511           0 :                                         return;
    3512             :                                 }
    3513             : 
    3514          16 :                                 op->ext_io_opts = ext_io_opts;
    3515             : 
    3516          16 :                                 bs_allocate_and_copy_cluster(blob, _channel, offset, op);
    3517             :                         }
    3518             :                 }
    3519             :         } else {
    3520             :                 struct rw_iov_ctx *ctx;
    3521             : 
    3522         208 :                 ctx = calloc(1, sizeof(struct rw_iov_ctx) + iovcnt * sizeof(struct iovec));
    3523         208 :                 if (ctx == NULL) {
    3524           4 :                         cb_fn(cb_arg, -ENOMEM);
    3525           4 :                         return;
    3526             :                 }
    3527             : 
    3528         204 :                 ctx->blob = blob;
    3529         204 :                 ctx->channel = _channel;
    3530         204 :                 ctx->cb_fn = cb_fn;
    3531         204 :                 ctx->cb_arg = cb_arg;
    3532         204 :                 ctx->read = read;
    3533         204 :                 ctx->orig_iov = iov;
    3534         204 :                 ctx->iovcnt = iovcnt;
    3535         204 :                 ctx->io_unit_offset = offset;
    3536         204 :                 ctx->io_units_remaining = length;
    3537         204 :                 ctx->io_units_done = 0;
    3538         204 :                 ctx->ext_io_opts = ext_io_opts;
    3539             : 
    3540         204 :                 rw_iov_split_next(ctx, 0);
    3541             :         }
    3542             : }
    3543             : 
    3544             : static struct spdk_blob *
    3545        7737 : blob_lookup(struct spdk_blob_store *bs, spdk_blob_id blobid)
    3546             : {
    3547        7737 :         struct spdk_blob find;
    3548             : 
    3549        7737 :         if (spdk_bit_array_get(bs->open_blobids, blobid) == 0) {
    3550        6948 :                 return NULL;
    3551             :         }
    3552             : 
    3553         789 :         find.id = blobid;
    3554         789 :         return RB_FIND(spdk_blob_tree, &bs->open_blobs, &find);
    3555             : }
    3556             : 
    3557             : static void
    3558        1806 : blob_get_snapshot_and_clone_entries(struct spdk_blob *blob,
    3559             :                                     struct spdk_blob_list **snapshot_entry, struct spdk_blob_list **clone_entry)
    3560             : {
    3561        1806 :         assert(blob != NULL);
    3562        1806 :         *snapshot_entry = NULL;
    3563        1806 :         *clone_entry = NULL;
    3564             : 
    3565        1806 :         if (blob->parent_id == SPDK_BLOBID_INVALID) {
    3566        1522 :                 return;
    3567             :         }
    3568             : 
    3569         428 :         TAILQ_FOREACH(*snapshot_entry, &blob->bs->snapshots, link) {
    3570         376 :                 if ((*snapshot_entry)->id == blob->parent_id) {
    3571         232 :                         break;
    3572             :                 }
    3573             :         }
    3574             : 
    3575         284 :         if (*snapshot_entry != NULL) {
    3576         276 :                 TAILQ_FOREACH(*clone_entry, &(*snapshot_entry)->clones, link) {
    3577         276 :                         if ((*clone_entry)->id == blob->id) {
    3578         232 :                                 break;
    3579             :                         }
    3580             :                 }
    3581             : 
    3582         232 :                 assert(*clone_entry != NULL);
    3583             :         }
    3584             : }
    3585             : 
    3586             : static int
    3587         796 : bs_channel_create(void *io_device, void *ctx_buf)
    3588             : {
    3589         796 :         struct spdk_blob_store          *bs = io_device;
    3590         796 :         struct spdk_bs_channel          *channel = ctx_buf;
    3591             :         struct spdk_bs_dev              *dev;
    3592         796 :         uint32_t                        max_ops = bs->max_channel_ops;
    3593             :         uint32_t                        i;
    3594             : 
    3595         796 :         dev = bs->dev;
    3596             : 
    3597         796 :         channel->req_mem = calloc(max_ops, sizeof(struct spdk_bs_request_set));
    3598         796 :         if (!channel->req_mem) {
    3599           0 :                 return -1;
    3600             :         }
    3601             : 
    3602         796 :         TAILQ_INIT(&channel->reqs);
    3603             : 
    3604      408348 :         for (i = 0; i < max_ops; i++) {
    3605      407552 :                 TAILQ_INSERT_TAIL(&channel->reqs, &channel->req_mem[i], link);
    3606             :         }
    3607             : 
    3608         796 :         channel->bs = bs;
    3609         796 :         channel->dev = dev;
    3610         796 :         channel->dev_channel = dev->create_channel(dev);
    3611             : 
    3612         796 :         if (!channel->dev_channel) {
    3613           0 :                 SPDK_ERRLOG("Failed to create device channel.\n");
    3614           0 :                 free(channel->req_mem);
    3615           0 :                 return -1;
    3616             :         }
    3617             : 
    3618         796 :         channel->new_cluster_page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_NUMA_ID_ANY,
    3619             :                                     SPDK_MALLOC_DMA);
    3620         796 :         if (!channel->new_cluster_page) {
    3621           0 :                 SPDK_ERRLOG("Failed to allocate new cluster page\n");
    3622           0 :                 free(channel->req_mem);
    3623           0 :                 channel->dev->destroy_channel(channel->dev, channel->dev_channel);
    3624           0 :                 return -1;
    3625             :         }
    3626             : 
    3627         796 :         TAILQ_INIT(&channel->need_cluster_alloc);
    3628         796 :         TAILQ_INIT(&channel->queued_io);
    3629         796 :         RB_INIT(&channel->esnap_channels);
    3630             : 
    3631         796 :         return 0;
    3632             : }
    3633             : 
    3634             : static void
    3635         796 : bs_channel_destroy(void *io_device, void *ctx_buf)
    3636             : {
    3637         796 :         struct spdk_bs_channel *channel = ctx_buf;
    3638             :         spdk_bs_user_op_t *op;
    3639             : 
    3640         796 :         while (!TAILQ_EMPTY(&channel->need_cluster_alloc)) {
    3641           0 :                 op = TAILQ_FIRST(&channel->need_cluster_alloc);
    3642           0 :                 TAILQ_REMOVE(&channel->need_cluster_alloc, op, link);
    3643           0 :                 bs_user_op_abort(op, -EIO);
    3644             :         }
    3645             : 
    3646         796 :         while (!TAILQ_EMPTY(&channel->queued_io)) {
    3647           0 :                 op = TAILQ_FIRST(&channel->queued_io);
    3648           0 :                 TAILQ_REMOVE(&channel->queued_io, op, link);
    3649           0 :                 bs_user_op_abort(op, -EIO);
    3650             :         }
    3651             : 
    3652         796 :         blob_esnap_destroy_bs_channel(channel);
    3653             : 
    3654         796 :         free(channel->req_mem);
    3655         796 :         spdk_free(channel->new_cluster_page);
    3656         796 :         channel->dev->destroy_channel(channel->dev, channel->dev_channel);
    3657         796 : }
    3658             : 
    3659             : static void
    3660         780 : bs_dev_destroy(void *io_device)
    3661             : {
    3662         780 :         struct spdk_blob_store *bs = io_device;
    3663             :         struct spdk_blob        *blob, *blob_tmp;
    3664             : 
    3665         780 :         bs->dev->destroy(bs->dev);
    3666             : 
    3667         780 :         RB_FOREACH_SAFE(blob, spdk_blob_tree, &bs->open_blobs, blob_tmp) {
    3668           0 :                 RB_REMOVE(spdk_blob_tree, &bs->open_blobs, blob);
    3669           0 :                 spdk_bit_array_clear(bs->open_blobids, blob->id);
    3670           0 :                 blob_free(blob);
    3671             :         }
    3672             : 
    3673         780 :         spdk_spin_destroy(&bs->used_lock);
    3674             : 
    3675         780 :         spdk_bit_array_free(&bs->open_blobids);
    3676         780 :         spdk_bit_array_free(&bs->used_blobids);
    3677         780 :         spdk_bit_array_free(&bs->used_md_pages);
    3678         780 :         spdk_bit_pool_free(&bs->used_clusters);
    3679             :         /*
    3680             :          * If this function is called for any reason except a successful unload,
    3681             :          * the unload_cpl type will be NONE and this will be a nop.
    3682             :          */
    3683         780 :         bs_call_cpl(&bs->unload_cpl, bs->unload_err);
    3684             : 
    3685         780 :         free(bs);
    3686         780 : }
    3687             : 
    3688             : static int
    3689         908 : bs_blob_list_add(struct spdk_blob *blob)
    3690             : {
    3691             :         spdk_blob_id snapshot_id;
    3692         908 :         struct spdk_blob_list *snapshot_entry = NULL;
    3693         908 :         struct spdk_blob_list *clone_entry = NULL;
    3694             : 
    3695         908 :         assert(blob != NULL);
    3696             : 
    3697         908 :         snapshot_id = blob->parent_id;
    3698         908 :         if (snapshot_id == SPDK_BLOBID_INVALID ||
    3699             :             snapshot_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    3700         492 :                 return 0;
    3701             :         }
    3702             : 
    3703         416 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, snapshot_id);
    3704         416 :         if (snapshot_entry == NULL) {
    3705             :                 /* Snapshot not found */
    3706         288 :                 snapshot_entry = calloc(1, sizeof(struct spdk_blob_list));
    3707         288 :                 if (snapshot_entry == NULL) {
    3708           0 :                         return -ENOMEM;
    3709             :                 }
    3710         288 :                 snapshot_entry->id = snapshot_id;
    3711         288 :                 TAILQ_INIT(&snapshot_entry->clones);
    3712         288 :                 TAILQ_INSERT_TAIL(&blob->bs->snapshots, snapshot_entry, link);
    3713             :         } else {
    3714         204 :                 TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    3715          76 :                         if (clone_entry->id == blob->id) {
    3716           0 :                                 break;
    3717             :                         }
    3718             :                 }
    3719             :         }
    3720             : 
    3721         416 :         if (clone_entry == NULL) {
    3722             :                 /* Clone not found */
    3723         416 :                 clone_entry = calloc(1, sizeof(struct spdk_blob_list));
    3724         416 :                 if (clone_entry == NULL) {
    3725           0 :                         return -ENOMEM;
    3726             :                 }
    3727         416 :                 clone_entry->id = blob->id;
    3728         416 :                 TAILQ_INIT(&clone_entry->clones);
    3729         416 :                 TAILQ_INSERT_TAIL(&snapshot_entry->clones, clone_entry, link);
    3730         416 :                 snapshot_entry->clone_count++;
    3731             :         }
    3732             : 
    3733         416 :         return 0;
    3734             : }
    3735             : 
    3736             : static void
    3737        1728 : bs_blob_list_remove(struct spdk_blob *blob)
    3738             : {
    3739        1728 :         struct spdk_blob_list *snapshot_entry = NULL;
    3740        1728 :         struct spdk_blob_list *clone_entry = NULL;
    3741             : 
    3742        1728 :         blob_get_snapshot_and_clone_entries(blob, &snapshot_entry, &clone_entry);
    3743             : 
    3744        1728 :         if (snapshot_entry == NULL) {
    3745        1512 :                 return;
    3746             :         }
    3747             : 
    3748         216 :         blob->parent_id = SPDK_BLOBID_INVALID;
    3749         216 :         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    3750         216 :         free(clone_entry);
    3751             : 
    3752         216 :         snapshot_entry->clone_count--;
    3753             : }
    3754             : 
    3755             : static int
    3756         780 : bs_blob_list_free(struct spdk_blob_store *bs)
    3757             : {
    3758             :         struct spdk_blob_list *snapshot_entry;
    3759             :         struct spdk_blob_list *snapshot_entry_tmp;
    3760             :         struct spdk_blob_list *clone_entry;
    3761             :         struct spdk_blob_list *clone_entry_tmp;
    3762             : 
    3763         924 :         TAILQ_FOREACH_SAFE(snapshot_entry, &bs->snapshots, link, snapshot_entry_tmp) {
    3764         296 :                 TAILQ_FOREACH_SAFE(clone_entry, &snapshot_entry->clones, link, clone_entry_tmp) {
    3765         152 :                         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    3766         152 :                         free(clone_entry);
    3767             :                 }
    3768         144 :                 TAILQ_REMOVE(&bs->snapshots, snapshot_entry, link);
    3769         144 :                 free(snapshot_entry);
    3770             :         }
    3771             : 
    3772         780 :         return 0;
    3773             : }
    3774             : 
    3775             : static void
    3776         780 : bs_free(struct spdk_blob_store *bs)
    3777             : {
    3778         780 :         bs_blob_list_free(bs);
    3779             : 
    3780         780 :         bs_unregister_md_thread(bs);
    3781         780 :         spdk_io_device_unregister(bs, bs_dev_destroy);
    3782         780 : }
    3783             : 
    3784             : void
    3785        1048 : spdk_bs_opts_init(struct spdk_bs_opts *opts, size_t opts_size)
    3786             : {
    3787             : 
    3788        1048 :         if (!opts) {
    3789           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
    3790           0 :                 return;
    3791             :         }
    3792             : 
    3793        1048 :         if (!opts_size) {
    3794           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
    3795           0 :                 return;
    3796             :         }
    3797             : 
    3798        1048 :         memset(opts, 0, opts_size);
    3799        1048 :         opts->opts_size = opts_size;
    3800             : 
    3801             : #define FIELD_OK(field) \
    3802             :         offsetof(struct spdk_bs_opts, field) + sizeof(opts->field) <= opts_size
    3803             : 
    3804             : #define SET_FIELD(field, value) \
    3805             :         if (FIELD_OK(field)) { \
    3806             :                 opts->field = value; \
    3807             :         } \
    3808             : 
    3809        1048 :         SET_FIELD(cluster_sz, SPDK_BLOB_OPTS_CLUSTER_SZ);
    3810        1048 :         SET_FIELD(num_md_pages, SPDK_BLOB_OPTS_NUM_MD_PAGES);
    3811        1048 :         SET_FIELD(max_md_ops, SPDK_BLOB_OPTS_NUM_MD_PAGES);
    3812        1048 :         SET_FIELD(max_channel_ops, SPDK_BLOB_OPTS_DEFAULT_CHANNEL_OPS);
    3813        1048 :         SET_FIELD(clear_method,  BS_CLEAR_WITH_UNMAP);
    3814             : 
    3815        1048 :         if (FIELD_OK(bstype)) {
    3816        1048 :                 memset(&opts->bstype, 0, sizeof(opts->bstype));
    3817             :         }
    3818             : 
    3819        1048 :         SET_FIELD(iter_cb_fn, NULL);
    3820        1048 :         SET_FIELD(iter_cb_arg, NULL);
    3821        1048 :         SET_FIELD(force_recover, false);
    3822        1048 :         SET_FIELD(esnap_bs_dev_create, NULL);
    3823        1048 :         SET_FIELD(esnap_ctx, NULL);
    3824             : 
    3825             : #undef FIELD_OK
    3826             : #undef SET_FIELD
    3827             : }
    3828             : 
    3829             : static int
    3830         484 : bs_opts_verify(struct spdk_bs_opts *opts)
    3831             : {
    3832         484 :         if (opts->cluster_sz == 0 || opts->num_md_pages == 0 || opts->max_md_ops == 0 ||
    3833         480 :             opts->max_channel_ops == 0) {
    3834           4 :                 SPDK_ERRLOG("Blobstore options cannot be set to 0\n");
    3835           4 :                 return -1;
    3836             :         }
    3837             : 
    3838         480 :         return 0;
    3839             : }
    3840             : 
    3841             : /* START spdk_bs_load */
    3842             : 
    3843             : /* spdk_bs_load_ctx is used for init, load, unload and dump code paths. */
    3844             : 
    3845             : struct spdk_bs_load_ctx {
    3846             :         struct spdk_blob_store          *bs;
    3847             :         struct spdk_bs_super_block      *super;
    3848             : 
    3849             :         struct spdk_bs_md_mask          *mask;
    3850             :         bool                            in_page_chain;
    3851             :         uint32_t                        page_index;
    3852             :         uint32_t                        cur_page;
    3853             :         struct spdk_blob_md_page        *page;
    3854             : 
    3855             :         uint64_t                        num_extent_pages;
    3856             :         uint32_t                        *extent_page_num;
    3857             :         struct spdk_blob_md_page        *extent_pages;
    3858             :         struct spdk_bit_array           *used_clusters;
    3859             : 
    3860             :         spdk_bs_sequence_t                      *seq;
    3861             :         spdk_blob_op_with_handle_complete       iter_cb_fn;
    3862             :         void                                    *iter_cb_arg;
    3863             :         struct spdk_blob                        *blob;
    3864             :         spdk_blob_id                            blobid;
    3865             : 
    3866             :         bool                                    force_recover;
    3867             : 
    3868             :         /* These fields are used in the spdk_bs_dump path. */
    3869             :         bool                                    dumping;
    3870             :         FILE                                    *fp;
    3871             :         spdk_bs_dump_print_xattr                print_xattr_fn;
    3872             :         char                                    xattr_name[4096];
    3873             : };
    3874             : 
    3875             : static int
    3876         784 : bs_alloc(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts, struct spdk_blob_store **_bs,
    3877             :          struct spdk_bs_load_ctx **_ctx)
    3878             : {
    3879             :         struct spdk_blob_store  *bs;
    3880             :         struct spdk_bs_load_ctx *ctx;
    3881             :         uint64_t dev_size;
    3882             :         int rc;
    3883             : 
    3884         784 :         dev_size = dev->blocklen * dev->blockcnt;
    3885         784 :         if (dev_size < opts->cluster_sz) {
    3886             :                 /* Device size cannot be smaller than cluster size of blobstore */
    3887           0 :                 SPDK_INFOLOG(blob, "Device size %" PRIu64 " is smaller than cluster size %" PRIu32 "\n",
    3888             :                              dev_size, opts->cluster_sz);
    3889           0 :                 return -ENOSPC;
    3890             :         }
    3891         784 :         if (opts->cluster_sz < SPDK_BS_PAGE_SIZE) {
    3892             :                 /* Cluster size cannot be smaller than page size */
    3893           4 :                 SPDK_ERRLOG("Cluster size %" PRIu32 " is smaller than page size %d\n",
    3894             :                             opts->cluster_sz, SPDK_BS_PAGE_SIZE);
    3895           4 :                 return -EINVAL;
    3896             :         }
    3897         780 :         bs = calloc(1, sizeof(struct spdk_blob_store));
    3898         780 :         if (!bs) {
    3899           0 :                 return -ENOMEM;
    3900             :         }
    3901             : 
    3902         780 :         ctx = calloc(1, sizeof(struct spdk_bs_load_ctx));
    3903         780 :         if (!ctx) {
    3904           0 :                 free(bs);
    3905           0 :                 return -ENOMEM;
    3906             :         }
    3907             : 
    3908         780 :         ctx->bs = bs;
    3909         780 :         ctx->iter_cb_fn = opts->iter_cb_fn;
    3910         780 :         ctx->iter_cb_arg = opts->iter_cb_arg;
    3911         780 :         ctx->force_recover = opts->force_recover;
    3912             : 
    3913         780 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    3914             :                                   SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    3915         780 :         if (!ctx->super) {
    3916           0 :                 free(ctx);
    3917           0 :                 free(bs);
    3918           0 :                 return -ENOMEM;
    3919             :         }
    3920             : 
    3921         780 :         RB_INIT(&bs->open_blobs);
    3922         780 :         TAILQ_INIT(&bs->snapshots);
    3923         780 :         bs->dev = dev;
    3924         780 :         bs->md_thread = spdk_get_thread();
    3925         780 :         assert(bs->md_thread != NULL);
    3926             : 
    3927             :         /*
    3928             :          * Do not use bs_lba_to_cluster() here since blockcnt may not be an
    3929             :          *  even multiple of the cluster size.
    3930             :          */
    3931         780 :         bs->cluster_sz = opts->cluster_sz;
    3932         780 :         bs->total_clusters = dev->blockcnt / (bs->cluster_sz / dev->blocklen);
    3933         780 :         ctx->used_clusters = spdk_bit_array_create(bs->total_clusters);
    3934         780 :         if (!ctx->used_clusters) {
    3935           0 :                 spdk_free(ctx->super);
    3936           0 :                 free(ctx);
    3937           0 :                 free(bs);
    3938           0 :                 return -ENOMEM;
    3939             :         }
    3940             : 
    3941         780 :         bs->pages_per_cluster = bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    3942         780 :         if (spdk_u32_is_pow2(bs->pages_per_cluster)) {
    3943         780 :                 bs->pages_per_cluster_shift = spdk_u32log2(bs->pages_per_cluster);
    3944             :         }
    3945         780 :         bs->num_free_clusters = bs->total_clusters;
    3946         780 :         bs->io_unit_size = dev->blocklen;
    3947             : 
    3948         780 :         bs->max_channel_ops = opts->max_channel_ops;
    3949         780 :         bs->super_blob = SPDK_BLOBID_INVALID;
    3950         780 :         memcpy(&bs->bstype, &opts->bstype, sizeof(opts->bstype));
    3951         780 :         bs->esnap_bs_dev_create = opts->esnap_bs_dev_create;
    3952         780 :         bs->esnap_ctx = opts->esnap_ctx;
    3953             : 
    3954             :         /* The metadata is assumed to be at least 1 page */
    3955         780 :         bs->used_md_pages = spdk_bit_array_create(1);
    3956         780 :         bs->used_blobids = spdk_bit_array_create(0);
    3957         780 :         bs->open_blobids = spdk_bit_array_create(0);
    3958             : 
    3959         780 :         spdk_spin_init(&bs->used_lock);
    3960             : 
    3961         780 :         spdk_io_device_register(bs, bs_channel_create, bs_channel_destroy,
    3962             :                                 sizeof(struct spdk_bs_channel), "blobstore");
    3963         780 :         rc = bs_register_md_thread(bs);
    3964         780 :         if (rc == -1) {
    3965           0 :                 spdk_io_device_unregister(bs, NULL);
    3966           0 :                 spdk_spin_destroy(&bs->used_lock);
    3967           0 :                 spdk_bit_array_free(&bs->open_blobids);
    3968           0 :                 spdk_bit_array_free(&bs->used_blobids);
    3969           0 :                 spdk_bit_array_free(&bs->used_md_pages);
    3970           0 :                 spdk_bit_array_free(&ctx->used_clusters);
    3971           0 :                 spdk_free(ctx->super);
    3972           0 :                 free(ctx);
    3973           0 :                 free(bs);
    3974             :                 /* FIXME: this is a lie but don't know how to get a proper error code here */
    3975           0 :                 return -ENOMEM;
    3976             :         }
    3977             : 
    3978         780 :         *_ctx = ctx;
    3979         780 :         *_bs = bs;
    3980         780 :         return 0;
    3981             : }
    3982             : 
    3983             : static void
    3984          24 : bs_load_ctx_fail(struct spdk_bs_load_ctx *ctx, int bserrno)
    3985             : {
    3986          24 :         assert(bserrno != 0);
    3987             : 
    3988          24 :         spdk_free(ctx->super);
    3989          24 :         bs_sequence_finish(ctx->seq, bserrno);
    3990          24 :         bs_free(ctx->bs);
    3991          24 :         spdk_bit_array_free(&ctx->used_clusters);
    3992          24 :         free(ctx);
    3993          24 : }
    3994             : 
    3995             : static void
    3996         824 : bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    3997             :                struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    3998             : {
    3999             :         /* Update the values in the super block */
    4000         824 :         super->super_blob = bs->super_blob;
    4001         824 :         memcpy(&super->bstype, &bs->bstype, sizeof(bs->bstype));
    4002         824 :         super->crc = blob_md_page_calc_crc(super);
    4003         824 :         bs_sequence_write_dev(seq, super, bs_page_to_lba(bs, 0),
    4004         824 :                               bs_byte_to_lba(bs, sizeof(*super)),
    4005             :                               cb_fn, cb_arg);
    4006         824 : }
    4007             : 
    4008             : static void
    4009         760 : bs_write_used_clusters(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4010             : {
    4011         760 :         struct spdk_bs_load_ctx *ctx = arg;
    4012             :         uint64_t        mask_size, lba, lba_count;
    4013             : 
    4014             :         /* Write out the used clusters mask */
    4015         760 :         mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    4016         760 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4017             :                                  SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    4018         760 :         if (!ctx->mask) {
    4019           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4020           0 :                 return;
    4021             :         }
    4022             : 
    4023         760 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_CLUSTERS;
    4024         760 :         ctx->mask->length = ctx->bs->total_clusters;
    4025             :         /* We could get here through the normal unload path, or through dirty
    4026             :          * shutdown recovery.  For the normal unload path, we use the mask from
    4027             :          * the bit pool.  For dirty shutdown recovery, we don't have a bit pool yet -
    4028             :          * only the bit array from the load ctx.
    4029             :          */
    4030         760 :         if (ctx->bs->used_clusters) {
    4031         654 :                 assert(ctx->mask->length == spdk_bit_pool_capacity(ctx->bs->used_clusters));
    4032         654 :                 spdk_bit_pool_store_mask(ctx->bs->used_clusters, ctx->mask->mask);
    4033             :         } else {
    4034         106 :                 assert(ctx->mask->length == spdk_bit_array_capacity(ctx->used_clusters));
    4035         106 :                 spdk_bit_array_store_mask(ctx->used_clusters, ctx->mask->mask);
    4036             :         }
    4037         760 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    4038         760 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    4039         760 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4040             : }
    4041             : 
    4042             : static void
    4043         760 : bs_write_used_md(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4044             : {
    4045         760 :         struct spdk_bs_load_ctx *ctx = arg;
    4046             :         uint64_t        mask_size, lba, lba_count;
    4047             : 
    4048         760 :         mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
    4049         760 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4050             :                                  SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    4051         760 :         if (!ctx->mask) {
    4052           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4053           0 :                 return;
    4054             :         }
    4055             : 
    4056         760 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_PAGES;
    4057         760 :         ctx->mask->length = ctx->super->md_len;
    4058         760 :         assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_md_pages));
    4059             : 
    4060         760 :         spdk_bit_array_store_mask(ctx->bs->used_md_pages, ctx->mask->mask);
    4061         760 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
    4062         760 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
    4063         760 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4064             : }
    4065             : 
    4066             : static void
    4067         760 : bs_write_used_blobids(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4068             : {
    4069         760 :         struct spdk_bs_load_ctx *ctx = arg;
    4070             :         uint64_t        mask_size, lba, lba_count;
    4071             : 
    4072         760 :         if (ctx->super->used_blobid_mask_len == 0) {
    4073             :                 /*
    4074             :                  * This is a pre-v3 on-disk format where the blobid mask does not get
    4075             :                  *  written to disk.
    4076             :                  */
    4077          24 :                 cb_fn(seq, arg, 0);
    4078          24 :                 return;
    4079             :         }
    4080             : 
    4081         736 :         mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
    4082         736 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_NUMA_ID_ANY,
    4083             :                                  SPDK_MALLOC_DMA);
    4084         736 :         if (!ctx->mask) {
    4085           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4086           0 :                 return;
    4087             :         }
    4088             : 
    4089         736 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_BLOBIDS;
    4090         736 :         ctx->mask->length = ctx->super->md_len;
    4091         736 :         assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_blobids));
    4092             : 
    4093         736 :         spdk_bit_array_store_mask(ctx->bs->used_blobids, ctx->mask->mask);
    4094         736 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
    4095         736 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
    4096         736 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4097             : }
    4098             : 
    4099             : static void
    4100         704 : blob_set_thin_provision(struct spdk_blob *blob)
    4101             : {
    4102         704 :         blob_verify_md_op(blob);
    4103         704 :         blob->invalid_flags |= SPDK_BLOB_THIN_PROV;
    4104         704 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    4105         704 : }
    4106             : 
    4107             : static void
    4108        2094 : blob_set_clear_method(struct spdk_blob *blob, enum blob_clear_method clear_method)
    4109             : {
    4110        2094 :         blob_verify_md_op(blob);
    4111        2094 :         blob->clear_method = clear_method;
    4112        2094 :         blob->md_ro_flags |= (clear_method << SPDK_BLOB_CLEAR_METHOD_SHIFT);
    4113        2094 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    4114        2094 : }
    4115             : 
    4116             : static void bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno);
    4117             : 
    4118             : static void
    4119          24 : bs_delete_corrupted_blob_cpl(void *cb_arg, int bserrno)
    4120             : {
    4121          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4122             :         spdk_blob_id id;
    4123             :         int64_t page_num;
    4124             : 
    4125             :         /* Iterate to next blob (we can't use spdk_bs_iter_next function as our
    4126             :          * last blob has been removed */
    4127          24 :         page_num = bs_blobid_to_page(ctx->blobid);
    4128          24 :         page_num++;
    4129          24 :         page_num = spdk_bit_array_find_first_set(ctx->bs->used_blobids, page_num);
    4130          24 :         if (page_num >= spdk_bit_array_capacity(ctx->bs->used_blobids)) {
    4131          24 :                 bs_load_iter(ctx, NULL, -ENOENT);
    4132          24 :                 return;
    4133             :         }
    4134             : 
    4135           0 :         id = bs_page_to_blobid(page_num);
    4136             : 
    4137           0 :         spdk_bs_open_blob(ctx->bs, id, bs_load_iter, ctx);
    4138             : }
    4139             : 
    4140             : static void
    4141          24 : bs_delete_corrupted_close_cb(void *cb_arg, int bserrno)
    4142             : {
    4143          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4144             : 
    4145          24 :         if (bserrno != 0) {
    4146           0 :                 SPDK_ERRLOG("Failed to close corrupted blob\n");
    4147           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4148           0 :                 return;
    4149             :         }
    4150             : 
    4151          24 :         spdk_bs_delete_blob(ctx->bs, ctx->blobid, bs_delete_corrupted_blob_cpl, ctx);
    4152             : }
    4153             : 
    4154             : static void
    4155          24 : bs_delete_corrupted_blob(void *cb_arg, int bserrno)
    4156             : {
    4157          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4158             :         uint64_t i;
    4159             : 
    4160          24 :         if (bserrno != 0) {
    4161           0 :                 SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
    4162           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4163           0 :                 return;
    4164             :         }
    4165             : 
    4166             :         /* Snapshot and clone have the same copy of cluster map and extent pages
    4167             :          * at this point. Let's clear both for snapshot now,
    4168             :          * so that it won't be cleared for clone later when we remove snapshot.
    4169             :          * Also set thin provision to pass data corruption check */
    4170         264 :         for (i = 0; i < ctx->blob->active.num_clusters; i++) {
    4171         240 :                 ctx->blob->active.clusters[i] = 0;
    4172             :         }
    4173          36 :         for (i = 0; i < ctx->blob->active.num_extent_pages; i++) {
    4174          12 :                 ctx->blob->active.extent_pages[i] = 0;
    4175             :         }
    4176             : 
    4177          24 :         ctx->blob->active.num_allocated_clusters = 0;
    4178             : 
    4179          24 :         ctx->blob->md_ro = false;
    4180             : 
    4181          24 :         blob_set_thin_provision(ctx->blob);
    4182             : 
    4183          24 :         ctx->blobid = ctx->blob->id;
    4184             : 
    4185          24 :         spdk_blob_close(ctx->blob, bs_delete_corrupted_close_cb, ctx);
    4186             : }
    4187             : 
    4188             : static void
    4189          12 : bs_update_corrupted_blob(void *cb_arg, int bserrno)
    4190             : {
    4191          12 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4192             : 
    4193          12 :         if (bserrno != 0) {
    4194           0 :                 SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
    4195           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4196           0 :                 return;
    4197             :         }
    4198             : 
    4199          12 :         ctx->blob->md_ro = false;
    4200          12 :         blob_remove_xattr(ctx->blob, SNAPSHOT_PENDING_REMOVAL, true);
    4201          12 :         blob_remove_xattr(ctx->blob, SNAPSHOT_IN_PROGRESS, true);
    4202          12 :         spdk_blob_set_read_only(ctx->blob);
    4203             : 
    4204          12 :         if (ctx->iter_cb_fn) {
    4205           0 :                 ctx->iter_cb_fn(ctx->iter_cb_arg, ctx->blob, 0);
    4206             :         }
    4207          12 :         bs_blob_list_add(ctx->blob);
    4208             : 
    4209          12 :         spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4210             : }
    4211             : 
    4212             : static void
    4213          36 : bs_examine_clone(void *cb_arg, struct spdk_blob *blob, int bserrno)
    4214             : {
    4215          36 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4216             : 
    4217          36 :         if (bserrno != 0) {
    4218           0 :                 SPDK_ERRLOG("Failed to open clone of a corrupted blob\n");
    4219           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4220           0 :                 return;
    4221             :         }
    4222             : 
    4223          36 :         if (blob->parent_id == ctx->blob->id) {
    4224             :                 /* Power failure occurred before updating clone (snapshot delete case)
    4225             :                  * or after updating clone (creating snapshot case) - keep snapshot */
    4226          12 :                 spdk_blob_close(blob, bs_update_corrupted_blob, ctx);
    4227             :         } else {
    4228             :                 /* Power failure occurred after updating clone (snapshot delete case)
    4229             :                  * or before updating clone (creating snapshot case) - remove snapshot */
    4230          24 :                 spdk_blob_close(blob, bs_delete_corrupted_blob, ctx);
    4231             :         }
    4232             : }
    4233             : 
    4234             : static void
    4235         720 : bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno)
    4236             : {
    4237         720 :         struct spdk_bs_load_ctx *ctx = arg;
    4238         720 :         const void *value;
    4239         720 :         size_t len;
    4240         720 :         int rc = 0;
    4241             : 
    4242         720 :         if (bserrno == 0) {
    4243             :                 /* Examine blob if it is corrupted after power failure. Fix
    4244             :                  * the ones that can be fixed and remove any other corrupted
    4245             :                  * ones. If it is not corrupted just process it */
    4246         440 :                 rc = blob_get_xattr_value(blob, SNAPSHOT_PENDING_REMOVAL, &value, &len, true);
    4247         440 :                 if (rc != 0) {
    4248         420 :                         rc = blob_get_xattr_value(blob, SNAPSHOT_IN_PROGRESS, &value, &len, true);
    4249         420 :                         if (rc != 0) {
    4250             :                                 /* Not corrupted - process it and continue with iterating through blobs */
    4251         404 :                                 if (ctx->iter_cb_fn) {
    4252          34 :                                         ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0);
    4253             :                                 }
    4254         404 :                                 bs_blob_list_add(blob);
    4255         404 :                                 spdk_bs_iter_next(ctx->bs, blob, bs_load_iter, ctx);
    4256         404 :                                 return;
    4257             :                         }
    4258             : 
    4259             :                 }
    4260             : 
    4261          36 :                 assert(len == sizeof(spdk_blob_id));
    4262             : 
    4263          36 :                 ctx->blob = blob;
    4264             : 
    4265             :                 /* Open clone to check if we are able to fix this blob or should we remove it */
    4266          36 :                 spdk_bs_open_blob(ctx->bs, *(spdk_blob_id *)value, bs_examine_clone, ctx);
    4267          36 :                 return;
    4268         280 :         } else if (bserrno == -ENOENT) {
    4269         280 :                 bserrno = 0;
    4270             :         } else {
    4271             :                 /*
    4272             :                  * This case needs to be looked at further.  Same problem
    4273             :                  *  exists with applications that rely on explicit blob
    4274             :                  *  iteration.  We should just skip the blob that failed
    4275             :                  *  to load and continue on to the next one.
    4276             :                  */
    4277           0 :                 SPDK_ERRLOG("Error in iterating blobs\n");
    4278             :         }
    4279             : 
    4280         280 :         ctx->iter_cb_fn = NULL;
    4281             : 
    4282         280 :         spdk_free(ctx->super);
    4283         280 :         spdk_free(ctx->mask);
    4284         280 :         bs_sequence_finish(ctx->seq, bserrno);
    4285         280 :         free(ctx);
    4286             : }
    4287             : 
    4288             : static void bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg);
    4289             : 
    4290             : static void
    4291         280 : bs_load_complete(struct spdk_bs_load_ctx *ctx)
    4292             : {
    4293         280 :         ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
    4294         280 :         if (ctx->dumping) {
    4295           0 :                 bs_dump_read_md_page(ctx->seq, ctx);
    4296           0 :                 return;
    4297             :         }
    4298         280 :         spdk_bs_iter_first(ctx->bs, bs_load_iter, ctx);
    4299             : }
    4300             : 
    4301             : static void
    4302         174 : bs_load_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4303             : {
    4304         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4305             :         int rc;
    4306             : 
    4307             :         /* The type must be correct */
    4308         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_BLOBIDS);
    4309             : 
    4310             :         /* The length of the mask (in bits) must not be greater than
    4311             :          * the length of the buffer (converted to bits) */
    4312         174 :         assert(ctx->mask->length <= (ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE * 8));
    4313             : 
    4314             :         /* The length of the mask must be exactly equal to the size
    4315             :          * (in pages) of the metadata region */
    4316         174 :         assert(ctx->mask->length == ctx->super->md_len);
    4317             : 
    4318         174 :         rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->mask->length);
    4319         174 :         if (rc < 0) {
    4320           0 :                 spdk_free(ctx->mask);
    4321           0 :                 bs_load_ctx_fail(ctx, rc);
    4322           0 :                 return;
    4323             :         }
    4324             : 
    4325         174 :         spdk_bit_array_load_mask(ctx->bs->used_blobids, ctx->mask->mask);
    4326         174 :         bs_load_complete(ctx);
    4327             : }
    4328             : 
    4329             : static void
    4330         174 : bs_load_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4331             : {
    4332         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4333             :         uint64_t                lba, lba_count, mask_size;
    4334             :         int                     rc;
    4335             : 
    4336         174 :         if (bserrno != 0) {
    4337           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4338           0 :                 return;
    4339             :         }
    4340             : 
    4341             :         /* The type must be correct */
    4342         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
    4343             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    4344         174 :         assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
    4345             :                                              struct spdk_blob_md_page) * 8));
    4346             :         /*
    4347             :          * The length of the mask must be equal to or larger than the total number of clusters. It may be
    4348             :          * larger than the total number of clusters due to a failure spdk_bs_grow.
    4349             :          */
    4350         174 :         assert(ctx->mask->length >= ctx->bs->total_clusters);
    4351         174 :         if (ctx->mask->length > ctx->bs->total_clusters) {
    4352           4 :                 SPDK_WARNLOG("Shrink the used_custers mask length to total_clusters");
    4353           4 :                 ctx->mask->length = ctx->bs->total_clusters;
    4354             :         }
    4355             : 
    4356         174 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->mask->length);
    4357         174 :         if (rc < 0) {
    4358           0 :                 spdk_free(ctx->mask);
    4359           0 :                 bs_load_ctx_fail(ctx, rc);
    4360           0 :                 return;
    4361             :         }
    4362             : 
    4363         174 :         spdk_bit_array_load_mask(ctx->used_clusters, ctx->mask->mask);
    4364         174 :         ctx->bs->num_free_clusters = spdk_bit_array_count_clear(ctx->used_clusters);
    4365         174 :         assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
    4366             : 
    4367         174 :         spdk_free(ctx->mask);
    4368             : 
    4369             :         /* Read the used blobids mask */
    4370         174 :         mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
    4371         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_NUMA_ID_ANY,
    4372             :                                  SPDK_MALLOC_DMA);
    4373         174 :         if (!ctx->mask) {
    4374           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4375           0 :                 return;
    4376             :         }
    4377         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
    4378         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
    4379         174 :         bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
    4380             :                              bs_load_used_blobids_cpl, ctx);
    4381             : }
    4382             : 
    4383             : static void
    4384         174 : bs_load_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4385             : {
    4386         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4387             :         uint64_t                lba, lba_count, mask_size;
    4388             :         int                     rc;
    4389             : 
    4390         174 :         if (bserrno != 0) {
    4391           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4392           0 :                 return;
    4393             :         }
    4394             : 
    4395             :         /* The type must be correct */
    4396         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_PAGES);
    4397             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    4398         174 :         assert(ctx->mask->length <= (ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE *
    4399             :                                      8));
    4400             :         /* The length of the mask must be exactly equal to the size (in pages) of the metadata region */
    4401         174 :         if (ctx->mask->length != ctx->super->md_len) {
    4402           0 :                 SPDK_ERRLOG("mismatched md_len in used_pages mask: "
    4403             :                             "mask->length=%" PRIu32 " super->md_len=%" PRIu32 "\n",
    4404             :                             ctx->mask->length, ctx->super->md_len);
    4405           0 :                 assert(false);
    4406             :         }
    4407             : 
    4408         174 :         rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->mask->length);
    4409         174 :         if (rc < 0) {
    4410           0 :                 spdk_free(ctx->mask);
    4411           0 :                 bs_load_ctx_fail(ctx, rc);
    4412           0 :                 return;
    4413             :         }
    4414             : 
    4415         174 :         spdk_bit_array_load_mask(ctx->bs->used_md_pages, ctx->mask->mask);
    4416         174 :         spdk_free(ctx->mask);
    4417             : 
    4418             :         /* Read the used clusters mask */
    4419         174 :         mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    4420         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_NUMA_ID_ANY,
    4421             :                                  SPDK_MALLOC_DMA);
    4422         174 :         if (!ctx->mask) {
    4423           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4424           0 :                 return;
    4425             :         }
    4426         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    4427         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    4428         174 :         bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
    4429             :                              bs_load_used_clusters_cpl, ctx);
    4430             : }
    4431             : 
    4432             : static void
    4433         174 : bs_load_read_used_pages(struct spdk_bs_load_ctx *ctx)
    4434             : {
    4435             :         uint64_t lba, lba_count, mask_size;
    4436             : 
    4437             :         /* Read the used pages mask */
    4438         174 :         mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
    4439         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4440             :                                  SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    4441         174 :         if (!ctx->mask) {
    4442           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4443           0 :                 return;
    4444             :         }
    4445             : 
    4446         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
    4447         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
    4448         174 :         bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
    4449             :                              bs_load_used_pages_cpl, ctx);
    4450             : }
    4451             : 
    4452             : static int
    4453         246 : bs_load_replay_md_parse_page(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_page *page)
    4454             : {
    4455         246 :         struct spdk_blob_store *bs = ctx->bs;
    4456             :         struct spdk_blob_md_descriptor *desc;
    4457         246 :         size_t  cur_desc = 0;
    4458             : 
    4459         246 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    4460         718 :         while (cur_desc < sizeof(page->descriptors)) {
    4461         718 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
    4462         226 :                         if (desc->length == 0) {
    4463             :                                 /* If padding and length are 0, this terminates the page */
    4464         226 :                                 break;
    4465             :                         }
    4466         492 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
    4467             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
    4468             :                         unsigned int                            i, j;
    4469          68 :                         unsigned int                            cluster_count = 0;
    4470             :                         uint32_t                                cluster_idx;
    4471             : 
    4472          68 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
    4473             : 
    4474         136 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
    4475         828 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
    4476         760 :                                         cluster_idx = desc_extent_rle->extents[i].cluster_idx;
    4477             :                                         /*
    4478             :                                          * cluster_idx = 0 means an unallocated cluster - don't mark that
    4479             :                                          * in the used cluster map.
    4480             :                                          */
    4481         760 :                                         if (cluster_idx != 0) {
    4482         540 :                                                 SPDK_NOTICELOG("Recover: cluster %" PRIu32 "\n", cluster_idx + j);
    4483         540 :                                                 spdk_bit_array_set(ctx->used_clusters, cluster_idx + j);
    4484         540 :                                                 if (bs->num_free_clusters == 0) {
    4485           0 :                                                         return -ENOSPC;
    4486             :                                                 }
    4487         540 :                                                 bs->num_free_clusters--;
    4488             :                                         }
    4489         760 :                                         cluster_count++;
    4490             :                                 }
    4491             :                         }
    4492          68 :                         if (cluster_count == 0) {
    4493           0 :                                 return -EINVAL;
    4494             :                         }
    4495         424 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    4496             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
    4497             :                         uint32_t                                        i;
    4498          52 :                         uint32_t                                        cluster_count = 0;
    4499             :                         uint32_t                                        cluster_idx;
    4500             :                         size_t                                          cluster_idx_length;
    4501             : 
    4502          52 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
    4503          52 :                         cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
    4504             : 
    4505          52 :                         if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
    4506          52 :                             (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
    4507           0 :                                 return -EINVAL;
    4508             :                         }
    4509             : 
    4510         652 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
    4511         600 :                                 cluster_idx = desc_extent->cluster_idx[i];
    4512             :                                 /*
    4513             :                                  * cluster_idx = 0 means an unallocated cluster - don't mark that
    4514             :                                  * in the used cluster map.
    4515             :                                  */
    4516         600 :                                 if (cluster_idx != 0) {
    4517         600 :                                         if (cluster_idx < desc_extent->start_cluster_idx &&
    4518           0 :                                             cluster_idx >= desc_extent->start_cluster_idx + cluster_count) {
    4519           0 :                                                 return -EINVAL;
    4520             :                                         }
    4521         600 :                                         spdk_bit_array_set(ctx->used_clusters, cluster_idx);
    4522         600 :                                         if (bs->num_free_clusters == 0) {
    4523           0 :                                                 return -ENOSPC;
    4524             :                                         }
    4525         600 :                                         bs->num_free_clusters--;
    4526             :                                 }
    4527         600 :                                 cluster_count++;
    4528             :                         }
    4529             : 
    4530          52 :                         if (cluster_count == 0) {
    4531           0 :                                 return -EINVAL;
    4532             :                         }
    4533         372 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    4534             :                         /* Skip this item */
    4535         296 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    4536             :                         /* Skip this item */
    4537         236 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
    4538             :                         /* Skip this item */
    4539          82 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
    4540             :                         struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
    4541          82 :                         uint32_t num_extent_pages = ctx->num_extent_pages;
    4542             :                         uint32_t i;
    4543             :                         size_t extent_pages_length;
    4544             :                         void *tmp;
    4545             : 
    4546          82 :                         desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
    4547          82 :                         extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
    4548             : 
    4549          82 :                         if (desc_extent_table->length == 0 ||
    4550          82 :                             (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
    4551           0 :                                 return -EINVAL;
    4552             :                         }
    4553             : 
    4554         160 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
    4555          78 :                                 if (desc_extent_table->extent_page[i].page_idx != 0) {
    4556          52 :                                         if (desc_extent_table->extent_page[i].num_pages != 1) {
    4557           0 :                                                 return -EINVAL;
    4558             :                                         }
    4559          52 :                                         num_extent_pages += 1;
    4560             :                                 }
    4561             :                         }
    4562             : 
    4563          82 :                         if (num_extent_pages > 0) {
    4564          52 :                                 tmp = realloc(ctx->extent_page_num, num_extent_pages * sizeof(uint32_t));
    4565          52 :                                 if (tmp == NULL) {
    4566           0 :                                         return -ENOMEM;
    4567             :                                 }
    4568          52 :                                 ctx->extent_page_num = tmp;
    4569             : 
    4570             :                                 /* Extent table entries contain md page numbers for extent pages.
    4571             :                                  * Zeroes represent unallocated extent pages, those are run-length-encoded.
    4572             :                                  */
    4573         104 :                                 for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
    4574          52 :                                         if (desc_extent_table->extent_page[i].page_idx != 0) {
    4575          52 :                                                 ctx->extent_page_num[ctx->num_extent_pages] = desc_extent_table->extent_page[i].page_idx;
    4576          52 :                                                 ctx->num_extent_pages += 1;
    4577             :                                         }
    4578             :                                 }
    4579             :                         }
    4580             :                 } else {
    4581             :                         /* Error */
    4582           0 :                         return -EINVAL;
    4583             :                 }
    4584             :                 /* Advance to the next descriptor */
    4585         492 :                 cur_desc += sizeof(*desc) + desc->length;
    4586         492 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
    4587          20 :                         break;
    4588             :                 }
    4589         472 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
    4590             :         }
    4591         246 :         return 0;
    4592             : }
    4593             : 
    4594             : static bool
    4595        1298 : bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page)
    4596             : {
    4597             :         uint32_t crc;
    4598        1298 :         struct spdk_blob_md_descriptor *desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    4599             :         size_t desc_len;
    4600             : 
    4601        1298 :         crc = blob_md_page_calc_crc(page);
    4602        1298 :         if (crc != page->crc) {
    4603           0 :                 return false;
    4604             :         }
    4605             : 
    4606             :         /* Extent page should always be of sequence num 0. */
    4607        1298 :         if (page->sequence_num != 0) {
    4608          44 :                 return false;
    4609             :         }
    4610             : 
    4611             :         /* Descriptor type must be EXTENT_PAGE. */
    4612        1254 :         if (desc->type != SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    4613         154 :                 return false;
    4614             :         }
    4615             : 
    4616             :         /* Descriptor length cannot exceed the page. */
    4617        1100 :         desc_len = sizeof(*desc) + desc->length;
    4618        1100 :         if (desc_len > sizeof(page->descriptors)) {
    4619           0 :                 return false;
    4620             :         }
    4621             : 
    4622             :         /* It has to be the only descriptor in the page. */
    4623        1100 :         if (desc_len + sizeof(*desc) <= sizeof(page->descriptors)) {
    4624        1100 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + desc_len);
    4625        1100 :                 if (desc->length != 0) {
    4626           0 :                         return false;
    4627             :                 }
    4628             :         }
    4629             : 
    4630        1100 :         return true;
    4631             : }
    4632             : 
    4633             : static bool
    4634        6754 : bs_load_cur_md_page_valid(struct spdk_bs_load_ctx *ctx)
    4635             : {
    4636             :         uint32_t crc;
    4637        6754 :         struct spdk_blob_md_page *page = ctx->page;
    4638             : 
    4639        6754 :         crc = blob_md_page_calc_crc(page);
    4640        6754 :         if (crc != page->crc) {
    4641        6538 :                 return false;
    4642             :         }
    4643             : 
    4644             :         /* First page of a sequence should match the blobid. */
    4645         216 :         if (page->sequence_num == 0 &&
    4646         172 :             bs_page_to_blobid(ctx->cur_page) != page->id) {
    4647          18 :                 return false;
    4648             :         }
    4649         198 :         assert(bs_load_cur_extent_page_valid(page) == false);
    4650             : 
    4651         198 :         return true;
    4652             : }
    4653             : 
    4654             : static void bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx);
    4655             : 
    4656             : static void
    4657         106 : bs_load_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4658             : {
    4659         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4660             : 
    4661         106 :         if (bserrno != 0) {
    4662           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4663           0 :                 return;
    4664             :         }
    4665             : 
    4666         106 :         bs_load_complete(ctx);
    4667             : }
    4668             : 
    4669             : static void
    4670         106 : bs_load_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4671             : {
    4672         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4673             : 
    4674         106 :         spdk_free(ctx->mask);
    4675         106 :         ctx->mask = NULL;
    4676             : 
    4677         106 :         if (bserrno != 0) {
    4678           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4679           0 :                 return;
    4680             :         }
    4681             : 
    4682         106 :         bs_write_used_clusters(seq, ctx, bs_load_write_used_clusters_cpl);
    4683             : }
    4684             : 
    4685             : static void
    4686         106 : bs_load_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4687             : {
    4688         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4689             : 
    4690         106 :         spdk_free(ctx->mask);
    4691         106 :         ctx->mask = NULL;
    4692             : 
    4693         106 :         if (bserrno != 0) {
    4694           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4695           0 :                 return;
    4696             :         }
    4697             : 
    4698         106 :         bs_write_used_blobids(seq, ctx, bs_load_write_used_blobids_cpl);
    4699             : }
    4700             : 
    4701             : static void
    4702         106 : bs_load_write_used_md(struct spdk_bs_load_ctx *ctx)
    4703             : {
    4704         106 :         bs_write_used_md(ctx->seq, ctx, bs_load_write_used_pages_cpl);
    4705         106 : }
    4706             : 
    4707             : static void
    4708        6714 : bs_load_replay_md_chain_cpl(struct spdk_bs_load_ctx *ctx)
    4709             : {
    4710             :         uint64_t num_md_clusters;
    4711             :         uint64_t i;
    4712             : 
    4713        6714 :         ctx->in_page_chain = false;
    4714             : 
    4715             :         do {
    4716        6784 :                 ctx->page_index++;
    4717        6784 :         } while (spdk_bit_array_get(ctx->bs->used_md_pages, ctx->page_index) == true);
    4718             : 
    4719        6714 :         if (ctx->page_index < ctx->super->md_len) {
    4720        6608 :                 ctx->cur_page = ctx->page_index;
    4721        6608 :                 bs_load_replay_cur_md_page(ctx);
    4722             :         } else {
    4723             :                 /* Claim all of the clusters used by the metadata */
    4724         106 :                 num_md_clusters = spdk_divide_round_up(
    4725         212 :                                           ctx->super->md_start + ctx->super->md_len, ctx->bs->pages_per_cluster);
    4726         480 :                 for (i = 0; i < num_md_clusters; i++) {
    4727         374 :                         spdk_bit_array_set(ctx->used_clusters, i);
    4728             :                 }
    4729         106 :                 ctx->bs->num_free_clusters -= num_md_clusters;
    4730         106 :                 spdk_free(ctx->page);
    4731         106 :                 bs_load_write_used_md(ctx);
    4732             :         }
    4733        6714 : }
    4734             : 
    4735             : static void
    4736          52 : bs_load_replay_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4737             : {
    4738          52 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4739             :         uint32_t page_num;
    4740             :         uint64_t i;
    4741             : 
    4742          52 :         if (bserrno != 0) {
    4743           0 :                 spdk_free(ctx->extent_pages);
    4744           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4745           0 :                 return;
    4746             :         }
    4747             : 
    4748         104 :         for (i = 0; i < ctx->num_extent_pages; i++) {
    4749             :                 /* Extent pages are only read when present within in chain md.
    4750             :                  * Integrity of md is not right if that page was not a valid extent page. */
    4751          52 :                 if (bs_load_cur_extent_page_valid(&ctx->extent_pages[i]) != true) {
    4752           0 :                         spdk_free(ctx->extent_pages);
    4753           0 :                         bs_load_ctx_fail(ctx, -EILSEQ);
    4754           0 :                         return;
    4755             :                 }
    4756             : 
    4757          52 :                 page_num = ctx->extent_page_num[i];
    4758          52 :                 spdk_bit_array_set(ctx->bs->used_md_pages, page_num);
    4759          52 :                 if (bs_load_replay_md_parse_page(ctx, &ctx->extent_pages[i])) {
    4760           0 :                         spdk_free(ctx->extent_pages);
    4761           0 :                         bs_load_ctx_fail(ctx, -EILSEQ);
    4762           0 :                         return;
    4763             :                 }
    4764             :         }
    4765             : 
    4766          52 :         spdk_free(ctx->extent_pages);
    4767          52 :         free(ctx->extent_page_num);
    4768          52 :         ctx->extent_page_num = NULL;
    4769          52 :         ctx->num_extent_pages = 0;
    4770             : 
    4771          52 :         bs_load_replay_md_chain_cpl(ctx);
    4772             : }
    4773             : 
    4774             : static void
    4775          52 : bs_load_replay_extent_pages(struct spdk_bs_load_ctx *ctx)
    4776             : {
    4777             :         spdk_bs_batch_t *batch;
    4778             :         uint32_t page;
    4779             :         uint64_t lba;
    4780             :         uint64_t i;
    4781             : 
    4782          52 :         ctx->extent_pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE * ctx->num_extent_pages, 0,
    4783             :                                          NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    4784          52 :         if (!ctx->extent_pages) {
    4785           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4786           0 :                 return;
    4787             :         }
    4788             : 
    4789          52 :         batch = bs_sequence_to_batch(ctx->seq, bs_load_replay_extent_page_cpl, ctx);
    4790             : 
    4791         104 :         for (i = 0; i < ctx->num_extent_pages; i++) {
    4792          52 :                 page = ctx->extent_page_num[i];
    4793          52 :                 assert(page < ctx->super->md_len);
    4794          52 :                 lba = bs_md_page_to_lba(ctx->bs, page);
    4795          52 :                 bs_batch_read_dev(batch, &ctx->extent_pages[i], lba,
    4796          52 :                                   bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE));
    4797             :         }
    4798             : 
    4799          52 :         bs_batch_close(batch);
    4800             : }
    4801             : 
    4802             : static void
    4803        6754 : bs_load_replay_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4804             : {
    4805        6754 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4806             :         uint32_t page_num;
    4807             :         struct spdk_blob_md_page *page;
    4808             : 
    4809        6754 :         if (bserrno != 0) {
    4810           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4811           0 :                 return;
    4812             :         }
    4813             : 
    4814        6754 :         page_num = ctx->cur_page;
    4815        6754 :         page = ctx->page;
    4816        6754 :         if (bs_load_cur_md_page_valid(ctx) == true) {
    4817         198 :                 if (page->sequence_num == 0 || ctx->in_page_chain == true) {
    4818         194 :                         spdk_spin_lock(&ctx->bs->used_lock);
    4819         194 :                         bs_claim_md_page(ctx->bs, page_num);
    4820         194 :                         spdk_spin_unlock(&ctx->bs->used_lock);
    4821         194 :                         if (page->sequence_num == 0) {
    4822         154 :                                 SPDK_NOTICELOG("Recover: blob 0x%" PRIx32 "\n", page_num);
    4823         154 :                                 spdk_bit_array_set(ctx->bs->used_blobids, page_num);
    4824             :                         }
    4825         194 :                         if (bs_load_replay_md_parse_page(ctx, page)) {
    4826           0 :                                 bs_load_ctx_fail(ctx, -EILSEQ);
    4827           0 :                                 return;
    4828             :                         }
    4829         194 :                         if (page->next != SPDK_INVALID_MD_PAGE) {
    4830          40 :                                 ctx->in_page_chain = true;
    4831          40 :                                 ctx->cur_page = page->next;
    4832          40 :                                 bs_load_replay_cur_md_page(ctx);
    4833          40 :                                 return;
    4834             :                         }
    4835         154 :                         if (ctx->num_extent_pages != 0) {
    4836          52 :                                 bs_load_replay_extent_pages(ctx);
    4837          52 :                                 return;
    4838             :                         }
    4839             :                 }
    4840             :         }
    4841        6662 :         bs_load_replay_md_chain_cpl(ctx);
    4842             : }
    4843             : 
    4844             : static void
    4845        6754 : bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx)
    4846             : {
    4847             :         uint64_t lba;
    4848             : 
    4849        6754 :         assert(ctx->cur_page < ctx->super->md_len);
    4850        6754 :         lba = bs_md_page_to_lba(ctx->bs, ctx->cur_page);
    4851        6754 :         bs_sequence_read_dev(ctx->seq, ctx->page, lba,
    4852        6754 :                              bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    4853             :                              bs_load_replay_md_cpl, ctx);
    4854        6754 : }
    4855             : 
    4856             : static void
    4857         106 : bs_load_replay_md(struct spdk_bs_load_ctx *ctx)
    4858             : {
    4859         106 :         ctx->page_index = 0;
    4860         106 :         ctx->cur_page = 0;
    4861         106 :         ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    4862             :                                  NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    4863         106 :         if (!ctx->page) {
    4864           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4865           0 :                 return;
    4866             :         }
    4867         106 :         bs_load_replay_cur_md_page(ctx);
    4868             : }
    4869             : 
    4870             : static void
    4871         106 : bs_recover(struct spdk_bs_load_ctx *ctx)
    4872             : {
    4873             :         int             rc;
    4874             : 
    4875         106 :         SPDK_NOTICELOG("Performing recovery on blobstore\n");
    4876         106 :         rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->super->md_len);
    4877         106 :         if (rc < 0) {
    4878           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4879           0 :                 return;
    4880             :         }
    4881             : 
    4882         106 :         rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->super->md_len);
    4883         106 :         if (rc < 0) {
    4884           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4885           0 :                 return;
    4886             :         }
    4887             : 
    4888         106 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    4889         106 :         if (rc < 0) {
    4890           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4891           0 :                 return;
    4892             :         }
    4893             : 
    4894         106 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->super->md_len);
    4895         106 :         if (rc < 0) {
    4896           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4897           0 :                 return;
    4898             :         }
    4899             : 
    4900         106 :         ctx->bs->num_free_clusters = ctx->bs->total_clusters;
    4901         106 :         bs_load_replay_md(ctx);
    4902             : }
    4903             : 
    4904             : static int
    4905         276 : bs_parse_super(struct spdk_bs_load_ctx *ctx)
    4906             : {
    4907             :         int rc;
    4908             : 
    4909         276 :         if (ctx->super->size == 0) {
    4910           8 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    4911             :         }
    4912             : 
    4913         276 :         if (ctx->super->io_unit_size == 0) {
    4914           8 :                 ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
    4915             :         }
    4916             : 
    4917         276 :         ctx->bs->clean = 1;
    4918         276 :         ctx->bs->cluster_sz = ctx->super->cluster_size;
    4919         276 :         ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
    4920         276 :         ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    4921         276 :         if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
    4922         276 :                 ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
    4923             :         }
    4924         276 :         ctx->bs->io_unit_size = ctx->super->io_unit_size;
    4925         276 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    4926         276 :         if (rc < 0) {
    4927           0 :                 return -ENOMEM;
    4928             :         }
    4929         276 :         ctx->bs->md_start = ctx->super->md_start;
    4930         276 :         ctx->bs->md_len = ctx->super->md_len;
    4931         276 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
    4932         276 :         if (rc < 0) {
    4933           0 :                 return -ENOMEM;
    4934             :         }
    4935             : 
    4936         828 :         ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
    4937         552 :                                                ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
    4938         276 :         ctx->bs->super_blob = ctx->super->super_blob;
    4939         276 :         memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
    4940             : 
    4941         276 :         return 0;
    4942             : }
    4943             : 
    4944             : static void
    4945         300 : bs_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4946             : {
    4947         300 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4948             :         int rc;
    4949             : 
    4950         300 :         rc = bs_super_validate(ctx->super, ctx->bs);
    4951         300 :         if (rc != 0) {
    4952          24 :                 bs_load_ctx_fail(ctx, rc);
    4953          24 :                 return;
    4954             :         }
    4955             : 
    4956         276 :         rc = bs_parse_super(ctx);
    4957         276 :         if (rc < 0) {
    4958           0 :                 bs_load_ctx_fail(ctx, rc);
    4959           0 :                 return;
    4960             :         }
    4961             : 
    4962         276 :         if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0 || ctx->force_recover) {
    4963         106 :                 bs_recover(ctx);
    4964             :         } else {
    4965         170 :                 bs_load_read_used_pages(ctx);
    4966             :         }
    4967             : }
    4968             : 
    4969             : static inline int
    4970         308 : bs_opts_copy(struct spdk_bs_opts *src, struct spdk_bs_opts *dst)
    4971             : {
    4972             : 
    4973         308 :         if (!src->opts_size) {
    4974           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
    4975           0 :                 return -1;
    4976             :         }
    4977             : 
    4978             : #define FIELD_OK(field) \
    4979             :         offsetof(struct spdk_bs_opts, field) + sizeof(src->field) <= src->opts_size
    4980             : 
    4981             : #define SET_FIELD(field) \
    4982             :         if (FIELD_OK(field)) { \
    4983             :                 dst->field = src->field; \
    4984             :         } \
    4985             : 
    4986         308 :         SET_FIELD(cluster_sz);
    4987         308 :         SET_FIELD(num_md_pages);
    4988         308 :         SET_FIELD(max_md_ops);
    4989         308 :         SET_FIELD(max_channel_ops);
    4990         308 :         SET_FIELD(clear_method);
    4991             : 
    4992         308 :         if (FIELD_OK(bstype)) {
    4993         308 :                 memcpy(&dst->bstype, &src->bstype, sizeof(dst->bstype));
    4994             :         }
    4995         308 :         SET_FIELD(iter_cb_fn);
    4996         308 :         SET_FIELD(iter_cb_arg);
    4997         308 :         SET_FIELD(force_recover);
    4998         308 :         SET_FIELD(esnap_bs_dev_create);
    4999         308 :         SET_FIELD(esnap_ctx);
    5000             : 
    5001         308 :         dst->opts_size = src->opts_size;
    5002             : 
    5003             :         /* You should not remove this statement, but need to update the assert statement
    5004             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    5005             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_opts) == 88, "Incorrect size");
    5006             : 
    5007             : #undef FIELD_OK
    5008             : #undef SET_FIELD
    5009             : 
    5010         308 :         return 0;
    5011             : }
    5012             : 
    5013             : void
    5014         312 : spdk_bs_load(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    5015             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    5016             : {
    5017         312 :         struct spdk_blob_store  *bs;
    5018         312 :         struct spdk_bs_cpl      cpl;
    5019         312 :         struct spdk_bs_load_ctx *ctx;
    5020         312 :         struct spdk_bs_opts     opts = {};
    5021             :         int err;
    5022             : 
    5023         312 :         SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
    5024             : 
    5025         312 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
    5026           4 :                 SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
    5027           4 :                 dev->destroy(dev);
    5028           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5029           4 :                 return;
    5030             :         }
    5031             : 
    5032         308 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5033         308 :         if (o) {
    5034         122 :                 if (bs_opts_copy(o, &opts)) {
    5035           0 :                         return;
    5036             :                 }
    5037             :         }
    5038             : 
    5039         308 :         if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
    5040           8 :                 dev->destroy(dev);
    5041           8 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5042           8 :                 return;
    5043             :         }
    5044             : 
    5045         300 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    5046         300 :         if (err) {
    5047           0 :                 dev->destroy(dev);
    5048           0 :                 cb_fn(cb_arg, NULL, err);
    5049           0 :                 return;
    5050             :         }
    5051             : 
    5052         300 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    5053         300 :         cpl.u.bs_handle.cb_fn = cb_fn;
    5054         300 :         cpl.u.bs_handle.cb_arg = cb_arg;
    5055         300 :         cpl.u.bs_handle.bs = bs;
    5056             : 
    5057         300 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5058         300 :         if (!ctx->seq) {
    5059           0 :                 spdk_free(ctx->super);
    5060           0 :                 free(ctx);
    5061           0 :                 bs_free(bs);
    5062           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5063           0 :                 return;
    5064             :         }
    5065             : 
    5066             :         /* Read the super block */
    5067         300 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5068         300 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5069             :                              bs_load_super_cpl, ctx);
    5070             : }
    5071             : 
    5072             : /* END spdk_bs_load */
    5073             : 
    5074             : /* START spdk_bs_dump */
    5075             : 
    5076             : static void
    5077           0 : bs_dump_finish(spdk_bs_sequence_t *seq, struct spdk_bs_load_ctx *ctx, int bserrno)
    5078             : {
    5079           0 :         spdk_free(ctx->super);
    5080             : 
    5081             :         /*
    5082             :          * We need to defer calling bs_call_cpl() until after
    5083             :          * dev destruction, so tuck these away for later use.
    5084             :          */
    5085           0 :         ctx->bs->unload_err = bserrno;
    5086           0 :         memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5087           0 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5088             : 
    5089           0 :         bs_sequence_finish(seq, 0);
    5090           0 :         bs_free(ctx->bs);
    5091           0 :         free(ctx);
    5092           0 : }
    5093             : 
    5094             : static void
    5095           0 : bs_dump_print_xattr(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5096             : {
    5097             :         struct spdk_blob_md_descriptor_xattr *desc_xattr;
    5098             :         uint32_t i;
    5099             :         const char *type;
    5100             : 
    5101           0 :         desc_xattr = (struct spdk_blob_md_descriptor_xattr *)desc;
    5102             : 
    5103           0 :         if (desc_xattr->length !=
    5104             :             sizeof(desc_xattr->name_length) + sizeof(desc_xattr->value_length) +
    5105           0 :             desc_xattr->name_length + desc_xattr->value_length) {
    5106             :         }
    5107             : 
    5108           0 :         memcpy(ctx->xattr_name, desc_xattr->name, desc_xattr->name_length);
    5109           0 :         ctx->xattr_name[desc_xattr->name_length] = '\0';
    5110           0 :         if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    5111           0 :                 type = "XATTR";
    5112           0 :         } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    5113           0 :                 type = "XATTR_INTERNAL";
    5114             :         } else {
    5115           0 :                 assert(false);
    5116             :                 type = "XATTR_?";
    5117             :         }
    5118           0 :         fprintf(ctx->fp, "%s: name = \"%s\"\n", type, ctx->xattr_name);
    5119           0 :         fprintf(ctx->fp, "       value = \"");
    5120           0 :         ctx->print_xattr_fn(ctx->fp, ctx->super->bstype.bstype, ctx->xattr_name,
    5121           0 :                             (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
    5122           0 :                             desc_xattr->value_length);
    5123           0 :         fprintf(ctx->fp, "\"\n");
    5124           0 :         for (i = 0; i < desc_xattr->value_length; i++) {
    5125           0 :                 if (i % 16 == 0) {
    5126           0 :                         fprintf(ctx->fp, "               ");
    5127             :                 }
    5128           0 :                 fprintf(ctx->fp, "%02" PRIx8 " ", *((uint8_t *)desc_xattr->name + desc_xattr->name_length + i));
    5129           0 :                 if ((i + 1) % 16 == 0) {
    5130           0 :                         fprintf(ctx->fp, "\n");
    5131             :                 }
    5132             :         }
    5133           0 :         if (i % 16 != 0) {
    5134           0 :                 fprintf(ctx->fp, "\n");
    5135             :         }
    5136           0 : }
    5137             : 
    5138             : struct type_flag_desc {
    5139             :         uint64_t mask;
    5140             :         uint64_t val;
    5141             :         const char *name;
    5142             : };
    5143             : 
    5144             : static void
    5145           0 : bs_dump_print_type_bits(struct spdk_bs_load_ctx *ctx, uint64_t flags,
    5146             :                         struct type_flag_desc *desc, size_t numflags)
    5147             : {
    5148           0 :         uint64_t covered = 0;
    5149             :         size_t i;
    5150             : 
    5151           0 :         for (i = 0; i < numflags; i++) {
    5152           0 :                 if ((desc[i].mask & flags) != desc[i].val) {
    5153           0 :                         continue;
    5154             :                 }
    5155           0 :                 fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " %s", desc[i].val, desc[i].name);
    5156           0 :                 if (desc[i].mask != desc[i].val) {
    5157           0 :                         fprintf(ctx->fp, " (mask 0x%" PRIx64 " value 0x%" PRIx64 ")",
    5158           0 :                                 desc[i].mask, desc[i].val);
    5159             :                 }
    5160           0 :                 fprintf(ctx->fp, "\n");
    5161           0 :                 covered |= desc[i].mask;
    5162             :         }
    5163           0 :         if ((flags & ~covered) != 0) {
    5164           0 :                 fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " Unknown\n", flags & ~covered);
    5165             :         }
    5166           0 : }
    5167             : 
    5168             : static void
    5169           0 : bs_dump_print_type_flags(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5170             : {
    5171             :         struct spdk_blob_md_descriptor_flags *type_desc;
    5172             : #define ADD_FLAG(f) { f, f, #f }
    5173             : #define ADD_MASK_VAL(m, v) { m, v, #v }
    5174             :         static struct type_flag_desc invalid[] = {
    5175             :                 ADD_FLAG(SPDK_BLOB_THIN_PROV),
    5176             :                 ADD_FLAG(SPDK_BLOB_INTERNAL_XATTR),
    5177             :                 ADD_FLAG(SPDK_BLOB_EXTENT_TABLE),
    5178             :         };
    5179             :         static struct type_flag_desc data_ro[] = {
    5180             :                 ADD_FLAG(SPDK_BLOB_READ_ONLY),
    5181             :         };
    5182             :         static struct type_flag_desc md_ro[] = {
    5183             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_DEFAULT),
    5184             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_NONE),
    5185             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_UNMAP),
    5186             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_WRITE_ZEROES),
    5187             :         };
    5188             : #undef ADD_FLAG
    5189             : #undef ADD_MASK_VAL
    5190             : 
    5191           0 :         type_desc = (struct spdk_blob_md_descriptor_flags *)desc;
    5192           0 :         fprintf(ctx->fp, "Flags:\n");
    5193           0 :         fprintf(ctx->fp, "\tinvalid: 0x%016" PRIx64 "\n", type_desc->invalid_flags);
    5194           0 :         bs_dump_print_type_bits(ctx, type_desc->invalid_flags, invalid,
    5195             :                                 SPDK_COUNTOF(invalid));
    5196           0 :         fprintf(ctx->fp, "\tdata_ro: 0x%016" PRIx64 "\n", type_desc->data_ro_flags);
    5197           0 :         bs_dump_print_type_bits(ctx, type_desc->data_ro_flags, data_ro,
    5198             :                                 SPDK_COUNTOF(data_ro));
    5199           0 :         fprintf(ctx->fp, "\t  md_ro: 0x%016" PRIx64 "\n", type_desc->md_ro_flags);
    5200           0 :         bs_dump_print_type_bits(ctx, type_desc->md_ro_flags, md_ro,
    5201             :                                 SPDK_COUNTOF(md_ro));
    5202           0 : }
    5203             : 
    5204             : static void
    5205           0 : bs_dump_print_extent_table(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5206             : {
    5207             :         struct spdk_blob_md_descriptor_extent_table *et_desc;
    5208             :         uint64_t num_extent_pages;
    5209             :         uint32_t et_idx;
    5210             : 
    5211           0 :         et_desc = (struct spdk_blob_md_descriptor_extent_table *)desc;
    5212           0 :         num_extent_pages = (et_desc->length - sizeof(et_desc->num_clusters)) /
    5213             :                            sizeof(et_desc->extent_page[0]);
    5214             : 
    5215           0 :         fprintf(ctx->fp, "Extent table:\n");
    5216           0 :         for (et_idx = 0; et_idx < num_extent_pages; et_idx++) {
    5217           0 :                 if (et_desc->extent_page[et_idx].page_idx == 0) {
    5218             :                         /* Zeroes represent unallocated extent pages. */
    5219           0 :                         continue;
    5220             :                 }
    5221           0 :                 fprintf(ctx->fp, "\tExtent page: %5" PRIu32 " length %3" PRIu32
    5222             :                         " at LBA %" PRIu64 "\n", et_desc->extent_page[et_idx].page_idx,
    5223             :                         et_desc->extent_page[et_idx].num_pages,
    5224             :                         bs_md_page_to_lba(ctx->bs, et_desc->extent_page[et_idx].page_idx));
    5225             :         }
    5226           0 : }
    5227             : 
    5228             : static void
    5229           0 : bs_dump_print_md_page(struct spdk_bs_load_ctx *ctx)
    5230             : {
    5231           0 :         uint32_t page_idx = ctx->cur_page;
    5232           0 :         struct spdk_blob_md_page *page = ctx->page;
    5233             :         struct spdk_blob_md_descriptor *desc;
    5234           0 :         size_t cur_desc = 0;
    5235             :         uint32_t crc;
    5236             : 
    5237           0 :         fprintf(ctx->fp, "=========\n");
    5238           0 :         fprintf(ctx->fp, "Metadata Page Index: %" PRIu32 " (0x%" PRIx32 ")\n", page_idx, page_idx);
    5239           0 :         fprintf(ctx->fp, "Start LBA: %" PRIu64 "\n", bs_md_page_to_lba(ctx->bs, page_idx));
    5240           0 :         fprintf(ctx->fp, "Blob ID: 0x%" PRIx64 "\n", page->id);
    5241           0 :         fprintf(ctx->fp, "Sequence: %" PRIu32 "\n", page->sequence_num);
    5242           0 :         if (page->next == SPDK_INVALID_MD_PAGE) {
    5243           0 :                 fprintf(ctx->fp, "Next: None\n");
    5244             :         } else {
    5245           0 :                 fprintf(ctx->fp, "Next: %" PRIu32 "\n", page->next);
    5246             :         }
    5247           0 :         fprintf(ctx->fp, "In used bit array%s:", ctx->super->clean ? "" : " (not clean: dubious)");
    5248           0 :         if (spdk_bit_array_get(ctx->bs->used_md_pages, page_idx)) {
    5249           0 :                 fprintf(ctx->fp, " md");
    5250             :         }
    5251           0 :         if (spdk_bit_array_get(ctx->bs->used_blobids, page_idx)) {
    5252           0 :                 fprintf(ctx->fp, " blob");
    5253             :         }
    5254           0 :         fprintf(ctx->fp, "\n");
    5255             : 
    5256           0 :         crc = blob_md_page_calc_crc(page);
    5257           0 :         fprintf(ctx->fp, "CRC: 0x%" PRIx32 " (%s)\n", page->crc, crc == page->crc ? "OK" : "Mismatch");
    5258             : 
    5259           0 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    5260           0 :         while (cur_desc < sizeof(page->descriptors)) {
    5261           0 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
    5262           0 :                         if (desc->length == 0) {
    5263             :                                 /* If padding and length are 0, this terminates the page */
    5264           0 :                                 break;
    5265             :                         }
    5266           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
    5267             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
    5268             :                         unsigned int                            i;
    5269             : 
    5270           0 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
    5271             : 
    5272           0 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
    5273           0 :                                 if (desc_extent_rle->extents[i].cluster_idx != 0) {
    5274           0 :                                         fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
    5275             :                                                 desc_extent_rle->extents[i].cluster_idx);
    5276             :                                 } else {
    5277           0 :                                         fprintf(ctx->fp, "Unallocated Extent - ");
    5278             :                                 }
    5279           0 :                                 fprintf(ctx->fp, " Length: %" PRIu32, desc_extent_rle->extents[i].length);
    5280           0 :                                 fprintf(ctx->fp, "\n");
    5281             :                         }
    5282           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    5283             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
    5284             :                         unsigned int                                    i;
    5285             : 
    5286           0 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
    5287             : 
    5288           0 :                         for (i = 0; i < desc_extent->length / sizeof(desc_extent->cluster_idx[0]); i++) {
    5289           0 :                                 if (desc_extent->cluster_idx[i] != 0) {
    5290           0 :                                         fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
    5291             :                                                 desc_extent->cluster_idx[i]);
    5292             :                                 } else {
    5293           0 :                                         fprintf(ctx->fp, "Unallocated Extent");
    5294             :                                 }
    5295           0 :                                 fprintf(ctx->fp, "\n");
    5296             :                         }
    5297           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    5298           0 :                         bs_dump_print_xattr(ctx, desc);
    5299           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    5300           0 :                         bs_dump_print_xattr(ctx, desc);
    5301           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
    5302           0 :                         bs_dump_print_type_flags(ctx, desc);
    5303           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
    5304           0 :                         bs_dump_print_extent_table(ctx, desc);
    5305             :                 } else {
    5306             :                         /* Error */
    5307           0 :                         fprintf(ctx->fp, "Unknown descriptor type %" PRIu8 "\n", desc->type);
    5308             :                 }
    5309             :                 /* Advance to the next descriptor */
    5310           0 :                 cur_desc += sizeof(*desc) + desc->length;
    5311           0 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
    5312           0 :                         break;
    5313             :                 }
    5314           0 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
    5315             :         }
    5316           0 : }
    5317             : 
    5318             : static void
    5319           0 : bs_dump_read_md_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5320             : {
    5321           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5322             : 
    5323           0 :         if (bserrno != 0) {
    5324           0 :                 bs_dump_finish(seq, ctx, bserrno);
    5325           0 :                 return;
    5326             :         }
    5327             : 
    5328           0 :         if (ctx->page->id != 0) {
    5329           0 :                 bs_dump_print_md_page(ctx);
    5330             :         }
    5331             : 
    5332           0 :         ctx->cur_page++;
    5333             : 
    5334           0 :         if (ctx->cur_page < ctx->super->md_len) {
    5335           0 :                 bs_dump_read_md_page(seq, ctx);
    5336             :         } else {
    5337           0 :                 spdk_free(ctx->page);
    5338           0 :                 bs_dump_finish(seq, ctx, 0);
    5339             :         }
    5340             : }
    5341             : 
    5342             : static void
    5343           0 : bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg)
    5344             : {
    5345           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5346             :         uint64_t lba;
    5347             : 
    5348           0 :         assert(ctx->cur_page < ctx->super->md_len);
    5349           0 :         lba = bs_page_to_lba(ctx->bs, ctx->super->md_start + ctx->cur_page);
    5350           0 :         bs_sequence_read_dev(seq, ctx->page, lba,
    5351           0 :                              bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    5352             :                              bs_dump_read_md_page_cpl, ctx);
    5353           0 : }
    5354             : 
    5355             : static void
    5356           0 : bs_dump_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5357             : {
    5358           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5359             :         int rc;
    5360             : 
    5361           0 :         fprintf(ctx->fp, "Signature: \"%.8s\" ", ctx->super->signature);
    5362           0 :         if (memcmp(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    5363             :                    sizeof(ctx->super->signature)) != 0) {
    5364           0 :                 fprintf(ctx->fp, "(Mismatch)\n");
    5365           0 :                 bs_dump_finish(seq, ctx, bserrno);
    5366           0 :                 return;
    5367             :         } else {
    5368           0 :                 fprintf(ctx->fp, "(OK)\n");
    5369             :         }
    5370           0 :         fprintf(ctx->fp, "Version: %" PRIu32 "\n", ctx->super->version);
    5371           0 :         fprintf(ctx->fp, "CRC: 0x%x (%s)\n", ctx->super->crc,
    5372           0 :                 (ctx->super->crc == blob_md_page_calc_crc(ctx->super)) ? "OK" : "Mismatch");
    5373           0 :         fprintf(ctx->fp, "Blobstore Type: %.*s\n", SPDK_BLOBSTORE_TYPE_LENGTH, ctx->super->bstype.bstype);
    5374           0 :         fprintf(ctx->fp, "Cluster Size: %" PRIu32 "\n", ctx->super->cluster_size);
    5375           0 :         fprintf(ctx->fp, "Super Blob ID: ");
    5376           0 :         if (ctx->super->super_blob == SPDK_BLOBID_INVALID) {
    5377           0 :                 fprintf(ctx->fp, "(None)\n");
    5378             :         } else {
    5379           0 :                 fprintf(ctx->fp, "0x%" PRIx64 "\n", ctx->super->super_blob);
    5380             :         }
    5381           0 :         fprintf(ctx->fp, "Clean: %" PRIu32 "\n", ctx->super->clean);
    5382           0 :         fprintf(ctx->fp, "Used Metadata Page Mask Start: %" PRIu32 "\n", ctx->super->used_page_mask_start);
    5383           0 :         fprintf(ctx->fp, "Used Metadata Page Mask Length: %" PRIu32 "\n", ctx->super->used_page_mask_len);
    5384           0 :         fprintf(ctx->fp, "Used Cluster Mask Start: %" PRIu32 "\n", ctx->super->used_cluster_mask_start);
    5385           0 :         fprintf(ctx->fp, "Used Cluster Mask Length: %" PRIu32 "\n", ctx->super->used_cluster_mask_len);
    5386           0 :         fprintf(ctx->fp, "Used Blob ID Mask Start: %" PRIu32 "\n", ctx->super->used_blobid_mask_start);
    5387           0 :         fprintf(ctx->fp, "Used Blob ID Mask Length: %" PRIu32 "\n", ctx->super->used_blobid_mask_len);
    5388           0 :         fprintf(ctx->fp, "Metadata Start: %" PRIu32 "\n", ctx->super->md_start);
    5389           0 :         fprintf(ctx->fp, "Metadata Length: %" PRIu32 "\n", ctx->super->md_len);
    5390             : 
    5391           0 :         ctx->cur_page = 0;
    5392           0 :         ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    5393             :                                  NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    5394           0 :         if (!ctx->page) {
    5395           0 :                 bs_dump_finish(seq, ctx, -ENOMEM);
    5396           0 :                 return;
    5397             :         }
    5398             : 
    5399           0 :         rc = bs_parse_super(ctx);
    5400           0 :         if (rc < 0) {
    5401           0 :                 bs_load_ctx_fail(ctx, rc);
    5402           0 :                 return;
    5403             :         }
    5404             : 
    5405           0 :         bs_load_read_used_pages(ctx);
    5406             : }
    5407             : 
    5408             : void
    5409           0 : spdk_bs_dump(struct spdk_bs_dev *dev, FILE *fp, spdk_bs_dump_print_xattr print_xattr_fn,
    5410             :              spdk_bs_op_complete cb_fn, void *cb_arg)
    5411             : {
    5412           0 :         struct spdk_blob_store  *bs;
    5413           0 :         struct spdk_bs_cpl      cpl;
    5414           0 :         struct spdk_bs_load_ctx *ctx;
    5415           0 :         struct spdk_bs_opts     opts = {};
    5416             :         int err;
    5417             : 
    5418           0 :         SPDK_DEBUGLOG(blob, "Dumping blobstore from dev %p\n", dev);
    5419             : 
    5420           0 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5421             : 
    5422           0 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    5423           0 :         if (err) {
    5424           0 :                 dev->destroy(dev);
    5425           0 :                 cb_fn(cb_arg, err);
    5426           0 :                 return;
    5427             :         }
    5428             : 
    5429           0 :         ctx->dumping = true;
    5430           0 :         ctx->fp = fp;
    5431           0 :         ctx->print_xattr_fn = print_xattr_fn;
    5432             : 
    5433           0 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5434           0 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5435           0 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5436             : 
    5437           0 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5438           0 :         if (!ctx->seq) {
    5439           0 :                 spdk_free(ctx->super);
    5440           0 :                 free(ctx);
    5441           0 :                 bs_free(bs);
    5442           0 :                 cb_fn(cb_arg, -ENOMEM);
    5443           0 :                 return;
    5444             :         }
    5445             : 
    5446             :         /* Read the super block */
    5447           0 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5448           0 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5449             :                              bs_dump_super_cpl, ctx);
    5450             : }
    5451             : 
    5452             : /* END spdk_bs_dump */
    5453             : 
    5454             : /* START spdk_bs_init */
    5455             : 
    5456             : static void
    5457         472 : bs_init_persist_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5458             : {
    5459         472 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5460             : 
    5461         472 :         ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
    5462         472 :         spdk_free(ctx->super);
    5463         472 :         free(ctx);
    5464             : 
    5465         472 :         bs_sequence_finish(seq, bserrno);
    5466         472 : }
    5467             : 
    5468             : static void
    5469         472 : bs_init_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5470             : {
    5471         472 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5472             : 
    5473             :         /* Write super block */
    5474         472 :         bs_sequence_write_dev(seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
    5475         472 :                               bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
    5476             :                               bs_init_persist_super_cpl, ctx);
    5477         472 : }
    5478             : 
    5479             : void
    5480         488 : spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    5481             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    5482             : {
    5483         488 :         struct spdk_bs_load_ctx *ctx;
    5484         488 :         struct spdk_blob_store  *bs;
    5485         488 :         struct spdk_bs_cpl      cpl;
    5486             :         spdk_bs_sequence_t      *seq;
    5487             :         spdk_bs_batch_t         *batch;
    5488             :         uint64_t                num_md_lba;
    5489             :         uint64_t                num_md_pages;
    5490             :         uint64_t                num_md_clusters;
    5491             :         uint64_t                max_used_cluster_mask_len;
    5492             :         uint32_t                i;
    5493         488 :         struct spdk_bs_opts     opts = {};
    5494             :         int                     rc;
    5495             :         uint64_t                lba, lba_count;
    5496             : 
    5497         488 :         SPDK_DEBUGLOG(blob, "Initializing blobstore on dev %p\n", dev);
    5498             : 
    5499         488 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
    5500           4 :                 SPDK_ERRLOG("unsupported dev block length of %d\n",
    5501             :                             dev->blocklen);
    5502           4 :                 dev->destroy(dev);
    5503           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5504           4 :                 return;
    5505             :         }
    5506             : 
    5507         484 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5508         484 :         if (o) {
    5509         182 :                 if (bs_opts_copy(o, &opts)) {
    5510           0 :                         return;
    5511             :                 }
    5512             :         }
    5513             : 
    5514         484 :         if (bs_opts_verify(&opts) != 0) {
    5515           4 :                 dev->destroy(dev);
    5516           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5517           4 :                 return;
    5518             :         }
    5519             : 
    5520         480 :         rc = bs_alloc(dev, &opts, &bs, &ctx);
    5521         480 :         if (rc) {
    5522           4 :                 dev->destroy(dev);
    5523           4 :                 cb_fn(cb_arg, NULL, rc);
    5524           4 :                 return;
    5525             :         }
    5526             : 
    5527         476 :         if (opts.num_md_pages == SPDK_BLOB_OPTS_NUM_MD_PAGES) {
    5528             :                 /* By default, allocate 1 page per cluster.
    5529             :                  * Technically, this over-allocates metadata
    5530             :                  * because more metadata will reduce the number
    5531             :                  * of usable clusters. This can be addressed with
    5532             :                  * more complex math in the future.
    5533             :                  */
    5534         468 :                 bs->md_len = bs->total_clusters;
    5535             :         } else {
    5536           8 :                 bs->md_len = opts.num_md_pages;
    5537             :         }
    5538         476 :         rc = spdk_bit_array_resize(&bs->used_md_pages, bs->md_len);
    5539         476 :         if (rc < 0) {
    5540           0 :                 spdk_free(ctx->super);
    5541           0 :                 free(ctx);
    5542           0 :                 bs_free(bs);
    5543           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5544           0 :                 return;
    5545             :         }
    5546             : 
    5547         476 :         rc = spdk_bit_array_resize(&bs->used_blobids, bs->md_len);
    5548         476 :         if (rc < 0) {
    5549           0 :                 spdk_free(ctx->super);
    5550           0 :                 free(ctx);
    5551           0 :                 bs_free(bs);
    5552           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5553           0 :                 return;
    5554             :         }
    5555             : 
    5556         476 :         rc = spdk_bit_array_resize(&bs->open_blobids, bs->md_len);
    5557         476 :         if (rc < 0) {
    5558           0 :                 spdk_free(ctx->super);
    5559           0 :                 free(ctx);
    5560           0 :                 bs_free(bs);
    5561           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5562           0 :                 return;
    5563             :         }
    5564             : 
    5565         476 :         memcpy(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    5566             :                sizeof(ctx->super->signature));
    5567         476 :         ctx->super->version = SPDK_BS_VERSION;
    5568         476 :         ctx->super->length = sizeof(*ctx->super);
    5569         476 :         ctx->super->super_blob = bs->super_blob;
    5570         476 :         ctx->super->clean = 0;
    5571         476 :         ctx->super->cluster_size = bs->cluster_sz;
    5572         476 :         ctx->super->io_unit_size = bs->io_unit_size;
    5573         476 :         memcpy(&ctx->super->bstype, &bs->bstype, sizeof(bs->bstype));
    5574             : 
    5575             :         /* Calculate how many pages the metadata consumes at the front
    5576             :          * of the disk.
    5577             :          */
    5578             : 
    5579             :         /* The super block uses 1 page */
    5580         476 :         num_md_pages = 1;
    5581             : 
    5582             :         /* The used_md_pages mask requires 1 bit per metadata page, rounded
    5583             :          * up to the nearest page, plus a header.
    5584             :          */
    5585         476 :         ctx->super->used_page_mask_start = num_md_pages;
    5586         476 :         ctx->super->used_page_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5587         476 :                                          spdk_divide_round_up(bs->md_len, 8),
    5588             :                                          SPDK_BS_PAGE_SIZE);
    5589         476 :         num_md_pages += ctx->super->used_page_mask_len;
    5590             : 
    5591             :         /* The used_clusters mask requires 1 bit per cluster, rounded
    5592             :          * up to the nearest page, plus a header.
    5593             :          */
    5594         476 :         ctx->super->used_cluster_mask_start = num_md_pages;
    5595         476 :         ctx->super->used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5596         476 :                                             spdk_divide_round_up(bs->total_clusters, 8),
    5597             :                                             SPDK_BS_PAGE_SIZE);
    5598             :         /* The blobstore might be extended, then the used_cluster bitmap will need more space.
    5599             :          * Here we calculate the max clusters we can support according to the
    5600             :          * num_md_pages (bs->md_len).
    5601             :          */
    5602         476 :         max_used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5603         476 :                                     spdk_divide_round_up(bs->md_len, 8),
    5604             :                                     SPDK_BS_PAGE_SIZE);
    5605         476 :         max_used_cluster_mask_len = spdk_max(max_used_cluster_mask_len,
    5606             :                                              ctx->super->used_cluster_mask_len);
    5607         476 :         num_md_pages += max_used_cluster_mask_len;
    5608             : 
    5609             :         /* The used_blobids mask requires 1 bit per metadata page, rounded
    5610             :          * up to the nearest page, plus a header.
    5611             :          */
    5612         476 :         ctx->super->used_blobid_mask_start = num_md_pages;
    5613         476 :         ctx->super->used_blobid_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5614         476 :                                            spdk_divide_round_up(bs->md_len, 8),
    5615             :                                            SPDK_BS_PAGE_SIZE);
    5616         476 :         num_md_pages += ctx->super->used_blobid_mask_len;
    5617             : 
    5618             :         /* The metadata region size was chosen above */
    5619         476 :         ctx->super->md_start = bs->md_start = num_md_pages;
    5620         476 :         ctx->super->md_len = bs->md_len;
    5621         476 :         num_md_pages += bs->md_len;
    5622             : 
    5623         476 :         num_md_lba = bs_page_to_lba(bs, num_md_pages);
    5624             : 
    5625         476 :         ctx->super->size = dev->blockcnt * dev->blocklen;
    5626             : 
    5627         476 :         ctx->super->crc = blob_md_page_calc_crc(ctx->super);
    5628             : 
    5629         476 :         num_md_clusters = spdk_divide_round_up(num_md_pages, bs->pages_per_cluster);
    5630         476 :         if (num_md_clusters > bs->total_clusters) {
    5631           4 :                 SPDK_ERRLOG("Blobstore metadata cannot use more clusters than is available, "
    5632             :                             "please decrease number of pages reserved for metadata "
    5633             :                             "or increase cluster size.\n");
    5634           4 :                 spdk_free(ctx->super);
    5635           4 :                 spdk_bit_array_free(&ctx->used_clusters);
    5636           4 :                 free(ctx);
    5637           4 :                 bs_free(bs);
    5638           4 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5639           4 :                 return;
    5640             :         }
    5641             :         /* Claim all of the clusters used by the metadata */
    5642       75700 :         for (i = 0; i < num_md_clusters; i++) {
    5643       75228 :                 spdk_bit_array_set(ctx->used_clusters, i);
    5644             :         }
    5645             : 
    5646         472 :         bs->num_free_clusters -= num_md_clusters;
    5647         472 :         bs->total_data_clusters = bs->num_free_clusters;
    5648             : 
    5649         472 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    5650         472 :         cpl.u.bs_handle.cb_fn = cb_fn;
    5651         472 :         cpl.u.bs_handle.cb_arg = cb_arg;
    5652         472 :         cpl.u.bs_handle.bs = bs;
    5653             : 
    5654         472 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5655         472 :         if (!seq) {
    5656           0 :                 spdk_free(ctx->super);
    5657           0 :                 free(ctx);
    5658           0 :                 bs_free(bs);
    5659           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5660           0 :                 return;
    5661             :         }
    5662             : 
    5663         472 :         batch = bs_sequence_to_batch(seq, bs_init_trim_cpl, ctx);
    5664             : 
    5665             :         /* Clear metadata space */
    5666         472 :         bs_batch_write_zeroes_dev(batch, 0, num_md_lba);
    5667             : 
    5668         472 :         lba = num_md_lba;
    5669         472 :         lba_count = ctx->bs->dev->blockcnt - lba;
    5670         472 :         switch (opts.clear_method) {
    5671         456 :         case BS_CLEAR_WITH_UNMAP:
    5672             :                 /* Trim data clusters */
    5673         456 :                 bs_batch_unmap_dev(batch, lba, lba_count);
    5674         456 :                 break;
    5675           0 :         case BS_CLEAR_WITH_WRITE_ZEROES:
    5676             :                 /* Write_zeroes to data clusters */
    5677           0 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    5678           0 :                 break;
    5679          16 :         case BS_CLEAR_WITH_NONE:
    5680             :         default:
    5681          16 :                 break;
    5682             :         }
    5683             : 
    5684         472 :         bs_batch_close(batch);
    5685             : }
    5686             : 
    5687             : /* END spdk_bs_init */
    5688             : 
    5689             : /* START spdk_bs_destroy */
    5690             : 
    5691             : static void
    5692           4 : bs_destroy_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5693             : {
    5694           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5695           4 :         struct spdk_blob_store *bs = ctx->bs;
    5696             : 
    5697             :         /*
    5698             :          * We need to defer calling bs_call_cpl() until after
    5699             :          * dev destruction, so tuck these away for later use.
    5700             :          */
    5701           4 :         bs->unload_err = bserrno;
    5702           4 :         memcpy(&bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5703           4 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5704             : 
    5705           4 :         bs_sequence_finish(seq, bserrno);
    5706             : 
    5707           4 :         bs_free(bs);
    5708           4 :         free(ctx);
    5709           4 : }
    5710             : 
    5711             : void
    5712           4 : spdk_bs_destroy(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn,
    5713             :                 void *cb_arg)
    5714             : {
    5715           4 :         struct spdk_bs_cpl      cpl;
    5716             :         spdk_bs_sequence_t      *seq;
    5717             :         struct spdk_bs_load_ctx *ctx;
    5718             : 
    5719           4 :         SPDK_DEBUGLOG(blob, "Destroying blobstore\n");
    5720             : 
    5721           4 :         if (!RB_EMPTY(&bs->open_blobs)) {
    5722           0 :                 SPDK_ERRLOG("Blobstore still has open blobs\n");
    5723           0 :                 cb_fn(cb_arg, -EBUSY);
    5724           0 :                 return;
    5725             :         }
    5726             : 
    5727           4 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5728           4 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5729           4 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5730             : 
    5731           4 :         ctx = calloc(1, sizeof(*ctx));
    5732           4 :         if (!ctx) {
    5733           0 :                 cb_fn(cb_arg, -ENOMEM);
    5734           0 :                 return;
    5735             :         }
    5736             : 
    5737           4 :         ctx->bs = bs;
    5738             : 
    5739           4 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5740           4 :         if (!seq) {
    5741           0 :                 free(ctx);
    5742           0 :                 cb_fn(cb_arg, -ENOMEM);
    5743           0 :                 return;
    5744             :         }
    5745             : 
    5746             :         /* Write zeroes to the super block */
    5747           4 :         bs_sequence_write_zeroes_dev(seq,
    5748             :                                      bs_page_to_lba(bs, 0),
    5749             :                                      bs_byte_to_lba(bs, sizeof(struct spdk_bs_super_block)),
    5750             :                                      bs_destroy_trim_cpl, ctx);
    5751             : }
    5752             : 
    5753             : /* END spdk_bs_destroy */
    5754             : 
    5755             : /* START spdk_bs_unload */
    5756             : 
    5757             : static void
    5758         654 : bs_unload_finish(struct spdk_bs_load_ctx *ctx, int bserrno)
    5759             : {
    5760         654 :         spdk_bs_sequence_t *seq = ctx->seq;
    5761             : 
    5762         654 :         spdk_free(ctx->super);
    5763             : 
    5764             :         /*
    5765             :          * We need to defer calling bs_call_cpl() until after
    5766             :          * dev destruction, so tuck these away for later use.
    5767             :          */
    5768         654 :         ctx->bs->unload_err = bserrno;
    5769         654 :         memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5770         654 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5771             : 
    5772         654 :         bs_sequence_finish(seq, bserrno);
    5773             : 
    5774         654 :         bs_free(ctx->bs);
    5775         654 :         free(ctx);
    5776         654 : }
    5777             : 
    5778             : static void
    5779         654 : bs_unload_write_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5780             : {
    5781         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5782             : 
    5783         654 :         bs_unload_finish(ctx, bserrno);
    5784         654 : }
    5785             : 
    5786             : static void
    5787         654 : bs_unload_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5788             : {
    5789         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5790             : 
    5791         654 :         spdk_free(ctx->mask);
    5792             : 
    5793         654 :         if (bserrno != 0) {
    5794           0 :                 bs_unload_finish(ctx, bserrno);
    5795           0 :                 return;
    5796             :         }
    5797             : 
    5798         654 :         ctx->super->clean = 1;
    5799             : 
    5800         654 :         bs_write_super(seq, ctx->bs, ctx->super, bs_unload_write_super_cpl, ctx);
    5801             : }
    5802             : 
    5803             : static void
    5804         654 : bs_unload_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5805             : {
    5806         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5807             : 
    5808         654 :         spdk_free(ctx->mask);
    5809         654 :         ctx->mask = NULL;
    5810             : 
    5811         654 :         if (bserrno != 0) {
    5812           0 :                 bs_unload_finish(ctx, bserrno);
    5813           0 :                 return;
    5814             :         }
    5815             : 
    5816         654 :         bs_write_used_clusters(seq, ctx, bs_unload_write_used_clusters_cpl);
    5817             : }
    5818             : 
    5819             : static void
    5820         654 : bs_unload_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5821             : {
    5822         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5823             : 
    5824         654 :         spdk_free(ctx->mask);
    5825         654 :         ctx->mask = NULL;
    5826             : 
    5827         654 :         if (bserrno != 0) {
    5828           0 :                 bs_unload_finish(ctx, bserrno);
    5829           0 :                 return;
    5830             :         }
    5831             : 
    5832         654 :         bs_write_used_blobids(seq, ctx, bs_unload_write_used_blobids_cpl);
    5833             : }
    5834             : 
    5835             : static void
    5836         654 : bs_unload_read_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5837             : {
    5838         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5839             :         int rc;
    5840             : 
    5841         654 :         if (bserrno != 0) {
    5842           0 :                 bs_unload_finish(ctx, bserrno);
    5843           0 :                 return;
    5844             :         }
    5845             : 
    5846         654 :         rc = bs_super_validate(ctx->super, ctx->bs);
    5847         654 :         if (rc != 0) {
    5848           0 :                 bs_unload_finish(ctx, rc);
    5849           0 :                 return;
    5850             :         }
    5851             : 
    5852         654 :         bs_write_used_md(seq, cb_arg, bs_unload_write_used_pages_cpl);
    5853             : }
    5854             : 
    5855             : void
    5856         662 : spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_arg)
    5857             : {
    5858         662 :         struct spdk_bs_cpl      cpl;
    5859             :         struct spdk_bs_load_ctx *ctx;
    5860             : 
    5861         662 :         SPDK_DEBUGLOG(blob, "Syncing blobstore\n");
    5862             : 
    5863             :         /*
    5864             :          * If external snapshot channels are being destroyed while the blobstore is unloaded, the
    5865             :          * unload is deferred until after the channel destruction completes.
    5866             :          */
    5867         662 :         if (bs->esnap_channels_unloading != 0) {
    5868           4 :                 if (bs->esnap_unload_cb_fn != NULL) {
    5869           0 :                         SPDK_ERRLOG("Blobstore unload in progress\n");
    5870           0 :                         cb_fn(cb_arg, -EBUSY);
    5871           0 :                         return;
    5872             :                 }
    5873           4 :                 SPDK_DEBUGLOG(blob_esnap, "Blobstore unload deferred: %" PRIu32
    5874             :                               " esnap clones are unloading\n", bs->esnap_channels_unloading);
    5875           4 :                 bs->esnap_unload_cb_fn = cb_fn;
    5876           4 :                 bs->esnap_unload_cb_arg = cb_arg;
    5877           4 :                 return;
    5878             :         }
    5879         658 :         if (bs->esnap_unload_cb_fn != NULL) {
    5880           4 :                 SPDK_DEBUGLOG(blob_esnap, "Blobstore deferred unload progressing\n");
    5881           4 :                 assert(bs->esnap_unload_cb_fn == cb_fn);
    5882           4 :                 assert(bs->esnap_unload_cb_arg == cb_arg);
    5883           4 :                 bs->esnap_unload_cb_fn = NULL;
    5884           4 :                 bs->esnap_unload_cb_arg = NULL;
    5885             :         }
    5886             : 
    5887         658 :         if (!RB_EMPTY(&bs->open_blobs)) {
    5888           4 :                 SPDK_ERRLOG("Blobstore still has open blobs\n");
    5889           4 :                 cb_fn(cb_arg, -EBUSY);
    5890           4 :                 return;
    5891             :         }
    5892             : 
    5893         654 :         ctx = calloc(1, sizeof(*ctx));
    5894         654 :         if (!ctx) {
    5895           0 :                 cb_fn(cb_arg, -ENOMEM);
    5896           0 :                 return;
    5897             :         }
    5898             : 
    5899         654 :         ctx->bs = bs;
    5900             : 
    5901         654 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    5902             :                                   SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    5903         654 :         if (!ctx->super) {
    5904           0 :                 free(ctx);
    5905           0 :                 cb_fn(cb_arg, -ENOMEM);
    5906           0 :                 return;
    5907             :         }
    5908             : 
    5909         654 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5910         654 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5911         654 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5912             : 
    5913         654 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5914         654 :         if (!ctx->seq) {
    5915           0 :                 spdk_free(ctx->super);
    5916           0 :                 free(ctx);
    5917           0 :                 cb_fn(cb_arg, -ENOMEM);
    5918           0 :                 return;
    5919             :         }
    5920             : 
    5921             :         /* Read super block */
    5922         654 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5923         654 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5924             :                              bs_unload_read_super_cpl, ctx);
    5925             : }
    5926             : 
    5927             : /* END spdk_bs_unload */
    5928             : 
    5929             : /* START spdk_bs_set_super */
    5930             : 
    5931             : struct spdk_bs_set_super_ctx {
    5932             :         struct spdk_blob_store          *bs;
    5933             :         struct spdk_bs_super_block      *super;
    5934             : };
    5935             : 
    5936             : static void
    5937           8 : bs_set_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5938             : {
    5939           8 :         struct spdk_bs_set_super_ctx    *ctx = cb_arg;
    5940             : 
    5941           8 :         if (bserrno != 0) {
    5942           0 :                 SPDK_ERRLOG("Unable to write to super block of blobstore\n");
    5943             :         }
    5944             : 
    5945           8 :         spdk_free(ctx->super);
    5946             : 
    5947           8 :         bs_sequence_finish(seq, bserrno);
    5948             : 
    5949           8 :         free(ctx);
    5950           8 : }
    5951             : 
    5952             : static void
    5953           8 : bs_set_super_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5954             : {
    5955           8 :         struct spdk_bs_set_super_ctx    *ctx = cb_arg;
    5956             :         int rc;
    5957             : 
    5958           8 :         if (bserrno != 0) {
    5959           0 :                 SPDK_ERRLOG("Unable to read super block of blobstore\n");
    5960           0 :                 spdk_free(ctx->super);
    5961           0 :                 bs_sequence_finish(seq, bserrno);
    5962           0 :                 free(ctx);
    5963           0 :                 return;
    5964             :         }
    5965             : 
    5966           8 :         rc = bs_super_validate(ctx->super, ctx->bs);
    5967           8 :         if (rc != 0) {
    5968           0 :                 SPDK_ERRLOG("Not a valid super block\n");
    5969           0 :                 spdk_free(ctx->super);
    5970           0 :                 bs_sequence_finish(seq, rc);
    5971           0 :                 free(ctx);
    5972           0 :                 return;
    5973             :         }
    5974             : 
    5975           8 :         bs_write_super(seq, ctx->bs, ctx->super, bs_set_super_write_cpl, ctx);
    5976             : }
    5977             : 
    5978             : void
    5979           8 : spdk_bs_set_super(struct spdk_blob_store *bs, spdk_blob_id blobid,
    5980             :                   spdk_bs_op_complete cb_fn, void *cb_arg)
    5981             : {
    5982           8 :         struct spdk_bs_cpl              cpl;
    5983             :         spdk_bs_sequence_t              *seq;
    5984             :         struct spdk_bs_set_super_ctx    *ctx;
    5985             : 
    5986           8 :         SPDK_DEBUGLOG(blob, "Setting super blob id on blobstore\n");
    5987             : 
    5988           8 :         ctx = calloc(1, sizeof(*ctx));
    5989           8 :         if (!ctx) {
    5990           0 :                 cb_fn(cb_arg, -ENOMEM);
    5991           0 :                 return;
    5992             :         }
    5993             : 
    5994           8 :         ctx->bs = bs;
    5995             : 
    5996           8 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    5997             :                                   SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    5998           8 :         if (!ctx->super) {
    5999           0 :                 free(ctx);
    6000           0 :                 cb_fn(cb_arg, -ENOMEM);
    6001           0 :                 return;
    6002             :         }
    6003             : 
    6004           8 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    6005           8 :         cpl.u.bs_basic.cb_fn = cb_fn;
    6006           8 :         cpl.u.bs_basic.cb_arg = cb_arg;
    6007             : 
    6008           8 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    6009           8 :         if (!seq) {
    6010           0 :                 spdk_free(ctx->super);
    6011           0 :                 free(ctx);
    6012           0 :                 cb_fn(cb_arg, -ENOMEM);
    6013           0 :                 return;
    6014             :         }
    6015             : 
    6016           8 :         bs->super_blob = blobid;
    6017             : 
    6018             :         /* Read super block */
    6019           8 :         bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
    6020           8 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    6021             :                              bs_set_super_read_cpl, ctx);
    6022             : }
    6023             : 
    6024             : /* END spdk_bs_set_super */
    6025             : 
    6026             : void
    6027          12 : spdk_bs_get_super(struct spdk_blob_store *bs,
    6028             :                   spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6029             : {
    6030          12 :         if (bs->super_blob == SPDK_BLOBID_INVALID) {
    6031           4 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOENT);
    6032             :         } else {
    6033           8 :                 cb_fn(cb_arg, bs->super_blob, 0);
    6034             :         }
    6035          12 : }
    6036             : 
    6037             : uint64_t
    6038         132 : spdk_bs_get_cluster_size(struct spdk_blob_store *bs)
    6039             : {
    6040         132 :         return bs->cluster_sz;
    6041             : }
    6042             : 
    6043             : uint64_t
    6044          68 : spdk_bs_get_page_size(struct spdk_blob_store *bs)
    6045             : {
    6046          68 :         return SPDK_BS_PAGE_SIZE;
    6047             : }
    6048             : 
    6049             : uint64_t
    6050         742 : spdk_bs_get_io_unit_size(struct spdk_blob_store *bs)
    6051             : {
    6052         742 :         return bs->io_unit_size;
    6053             : }
    6054             : 
    6055             : uint64_t
    6056         560 : spdk_bs_free_cluster_count(struct spdk_blob_store *bs)
    6057             : {
    6058         560 :         return bs->num_free_clusters;
    6059             : }
    6060             : 
    6061             : uint64_t
    6062          92 : spdk_bs_total_data_cluster_count(struct spdk_blob_store *bs)
    6063             : {
    6064          92 :         return bs->total_data_clusters;
    6065             : }
    6066             : 
    6067             : static int
    6068         780 : bs_register_md_thread(struct spdk_blob_store *bs)
    6069             : {
    6070         780 :         bs->md_channel = spdk_get_io_channel(bs);
    6071         780 :         if (!bs->md_channel) {
    6072           0 :                 SPDK_ERRLOG("Failed to get IO channel.\n");
    6073           0 :                 return -1;
    6074             :         }
    6075             : 
    6076         780 :         return 0;
    6077             : }
    6078             : 
    6079             : static int
    6080         780 : bs_unregister_md_thread(struct spdk_blob_store *bs)
    6081             : {
    6082         780 :         spdk_put_io_channel(bs->md_channel);
    6083             : 
    6084         780 :         return 0;
    6085             : }
    6086             : 
    6087             : spdk_blob_id
    6088         570 : spdk_blob_get_id(struct spdk_blob *blob)
    6089             : {
    6090         570 :         assert(blob != NULL);
    6091             : 
    6092         570 :         return blob->id;
    6093             : }
    6094             : 
    6095             : uint64_t
    6096          24 : spdk_blob_get_num_pages(struct spdk_blob *blob)
    6097             : {
    6098          24 :         assert(blob != NULL);
    6099             : 
    6100          24 :         return bs_cluster_to_page(blob->bs, blob->active.num_clusters);
    6101             : }
    6102             : 
    6103             : uint64_t
    6104          24 : spdk_blob_get_num_io_units(struct spdk_blob *blob)
    6105             : {
    6106          24 :         assert(blob != NULL);
    6107             : 
    6108          24 :         return spdk_blob_get_num_pages(blob) * bs_io_unit_per_page(blob->bs);
    6109             : }
    6110             : 
    6111             : uint64_t
    6112         569 : spdk_blob_get_num_clusters(struct spdk_blob *blob)
    6113             : {
    6114         569 :         assert(blob != NULL);
    6115             : 
    6116         569 :         return blob->active.num_clusters;
    6117             : }
    6118             : 
    6119             : uint64_t
    6120         330 : spdk_blob_get_num_allocated_clusters(struct spdk_blob *blob)
    6121             : {
    6122         330 :         assert(blob != NULL);
    6123             : 
    6124         330 :         return blob->active.num_allocated_clusters;
    6125             : }
    6126             : 
    6127             : static uint64_t
    6128          24 : blob_find_io_unit(struct spdk_blob *blob, uint64_t offset, bool is_allocated)
    6129             : {
    6130          24 :         uint64_t blob_io_unit_num = spdk_blob_get_num_io_units(blob);
    6131             : 
    6132          44 :         while (offset < blob_io_unit_num) {
    6133          40 :                 if (bs_io_unit_is_allocated(blob, offset) == is_allocated) {
    6134          20 :                         return offset;
    6135             :                 }
    6136             : 
    6137          20 :                 offset += bs_num_io_units_to_cluster_boundary(blob, offset);
    6138             :         }
    6139             : 
    6140           4 :         return UINT64_MAX;
    6141             : }
    6142             : 
    6143             : uint64_t
    6144          12 : spdk_blob_get_next_allocated_io_unit(struct spdk_blob *blob, uint64_t offset)
    6145             : {
    6146          12 :         return blob_find_io_unit(blob, offset, true);
    6147             : }
    6148             : 
    6149             : uint64_t
    6150          12 : spdk_blob_get_next_unallocated_io_unit(struct spdk_blob *blob, uint64_t offset)
    6151             : {
    6152          12 :         return blob_find_io_unit(blob, offset, false);
    6153             : }
    6154             : 
    6155             : /* START spdk_bs_create_blob */
    6156             : 
    6157             : static void
    6158        1878 : bs_create_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    6159             : {
    6160        1878 :         struct spdk_blob *blob = cb_arg;
    6161        1878 :         uint32_t page_idx = bs_blobid_to_page(blob->id);
    6162             : 
    6163        1878 :         if (bserrno != 0) {
    6164           0 :                 spdk_spin_lock(&blob->bs->used_lock);
    6165           0 :                 spdk_bit_array_clear(blob->bs->used_blobids, page_idx);
    6166           0 :                 bs_release_md_page(blob->bs, page_idx);
    6167           0 :                 spdk_spin_unlock(&blob->bs->used_lock);
    6168             :         }
    6169             : 
    6170        1878 :         blob_free(blob);
    6171             : 
    6172        1878 :         bs_sequence_finish(seq, bserrno);
    6173        1878 : }
    6174             : 
    6175             : static int
    6176        3776 : blob_set_xattrs(struct spdk_blob *blob, const struct spdk_blob_xattr_opts *xattrs,
    6177             :                 bool internal)
    6178             : {
    6179             :         uint64_t i;
    6180        3776 :         size_t value_len = 0;
    6181             :         int rc;
    6182        3776 :         const void *value = NULL;
    6183        3776 :         if (xattrs->count > 0 && xattrs->get_value == NULL) {
    6184           8 :                 return -EINVAL;
    6185             :         }
    6186        4084 :         for (i = 0; i < xattrs->count; i++) {
    6187         320 :                 xattrs->get_value(xattrs->ctx, xattrs->names[i], &value, &value_len);
    6188         320 :                 if (value == NULL || value_len == 0) {
    6189           4 :                         return -EINVAL;
    6190             :                 }
    6191         316 :                 rc = blob_set_xattr(blob, xattrs->names[i], value, value_len, internal);
    6192         316 :                 if (rc < 0) {
    6193           0 :                         return rc;
    6194             :                 }
    6195             :         }
    6196        3764 :         return 0;
    6197             : }
    6198             : 
    6199             : static void
    6200        1862 : blob_opts_copy(const struct spdk_blob_opts *src, struct spdk_blob_opts *dst)
    6201             : {
    6202             : #define FIELD_OK(field) \
    6203             :         offsetof(struct spdk_blob_opts, field) + sizeof(src->field) <= src->opts_size
    6204             : 
    6205             : #define SET_FIELD(field) \
    6206             :         if (FIELD_OK(field)) { \
    6207             :                 dst->field = src->field; \
    6208             :         } \
    6209             : 
    6210        1862 :         SET_FIELD(num_clusters);
    6211        1862 :         SET_FIELD(thin_provision);
    6212        1862 :         SET_FIELD(clear_method);
    6213             : 
    6214        1862 :         if (FIELD_OK(xattrs)) {
    6215        1862 :                 memcpy(&dst->xattrs, &src->xattrs, sizeof(src->xattrs));
    6216             :         }
    6217             : 
    6218        1862 :         SET_FIELD(use_extent_table);
    6219        1862 :         SET_FIELD(esnap_id);
    6220        1862 :         SET_FIELD(esnap_id_len);
    6221             : 
    6222        1862 :         dst->opts_size = src->opts_size;
    6223             : 
    6224             :         /* You should not remove this statement, but need to update the assert statement
    6225             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    6226             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_opts) == 80, "Incorrect size");
    6227             : 
    6228             : #undef FIELD_OK
    6229             : #undef SET_FIELD
    6230        1862 : }
    6231             : 
    6232             : static void
    6233        1894 : bs_create_blob(struct spdk_blob_store *bs,
    6234             :                const struct spdk_blob_opts *opts,
    6235             :                const struct spdk_blob_xattr_opts *internal_xattrs,
    6236             :                spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6237             : {
    6238             :         struct spdk_blob        *blob;
    6239             :         uint32_t                page_idx;
    6240        1894 :         struct spdk_bs_cpl      cpl;
    6241        1894 :         struct spdk_blob_opts   opts_local;
    6242        1894 :         struct spdk_blob_xattr_opts internal_xattrs_default;
    6243             :         spdk_bs_sequence_t      *seq;
    6244             :         spdk_blob_id            id;
    6245             :         int rc;
    6246             : 
    6247        1894 :         assert(spdk_get_thread() == bs->md_thread);
    6248             : 
    6249        1894 :         spdk_spin_lock(&bs->used_lock);
    6250        1894 :         page_idx = spdk_bit_array_find_first_clear(bs->used_md_pages, 0);
    6251        1894 :         if (page_idx == UINT32_MAX) {
    6252           0 :                 spdk_spin_unlock(&bs->used_lock);
    6253           0 :                 cb_fn(cb_arg, 0, -ENOMEM);
    6254           0 :                 return;
    6255             :         }
    6256        1894 :         spdk_bit_array_set(bs->used_blobids, page_idx);
    6257        1894 :         bs_claim_md_page(bs, page_idx);
    6258        1894 :         spdk_spin_unlock(&bs->used_lock);
    6259             : 
    6260        1894 :         id = bs_page_to_blobid(page_idx);
    6261             : 
    6262        1894 :         SPDK_DEBUGLOG(blob, "Creating blob with id 0x%" PRIx64 " at page %u\n", id, page_idx);
    6263             : 
    6264        1894 :         spdk_blob_opts_init(&opts_local, sizeof(opts_local));
    6265        1894 :         if (opts) {
    6266        1862 :                 blob_opts_copy(opts, &opts_local);
    6267             :         }
    6268             : 
    6269        1894 :         blob = blob_alloc(bs, id);
    6270        1894 :         if (!blob) {
    6271           0 :                 rc = -ENOMEM;
    6272           0 :                 goto error;
    6273             :         }
    6274             : 
    6275        1894 :         blob->use_extent_table = opts_local.use_extent_table;
    6276        1894 :         if (blob->use_extent_table) {
    6277         968 :                 blob->invalid_flags |= SPDK_BLOB_EXTENT_TABLE;
    6278             :         }
    6279             : 
    6280        1894 :         if (!internal_xattrs) {
    6281        1622 :                 blob_xattrs_init(&internal_xattrs_default);
    6282        1622 :                 internal_xattrs = &internal_xattrs_default;
    6283             :         }
    6284             : 
    6285        1894 :         rc = blob_set_xattrs(blob, &opts_local.xattrs, false);
    6286        1894 :         if (rc < 0) {
    6287          12 :                 goto error;
    6288             :         }
    6289             : 
    6290        1882 :         rc = blob_set_xattrs(blob, internal_xattrs, true);
    6291        1882 :         if (rc < 0) {
    6292           0 :                 goto error;
    6293             :         }
    6294             : 
    6295        1882 :         if (opts_local.thin_provision) {
    6296         356 :                 blob_set_thin_provision(blob);
    6297             :         }
    6298             : 
    6299        1882 :         blob_set_clear_method(blob, opts_local.clear_method);
    6300             : 
    6301        1882 :         if (opts_local.esnap_id != NULL) {
    6302          60 :                 if (opts_local.esnap_id_len > UINT16_MAX) {
    6303           0 :                         SPDK_ERRLOG("esnap id length %" PRIu64 "is too long\n",
    6304             :                                     opts_local.esnap_id_len);
    6305           0 :                         rc = -EINVAL;
    6306           0 :                         goto error;
    6307             : 
    6308             :                 }
    6309          60 :                 blob_set_thin_provision(blob);
    6310          60 :                 blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6311          60 :                 rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID,
    6312          60 :                                     opts_local.esnap_id, opts_local.esnap_id_len, true);
    6313          60 :                 if (rc != 0) {
    6314           0 :                         goto error;
    6315             :                 }
    6316             :         }
    6317             : 
    6318        1882 :         rc = blob_resize(blob, opts_local.num_clusters);
    6319        1882 :         if (rc < 0) {
    6320           4 :                 goto error;
    6321             :         }
    6322        1878 :         cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6323        1878 :         cpl.u.blobid.cb_fn = cb_fn;
    6324        1878 :         cpl.u.blobid.cb_arg = cb_arg;
    6325        1878 :         cpl.u.blobid.blobid = blob->id;
    6326             : 
    6327        1878 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    6328        1878 :         if (!seq) {
    6329           0 :                 rc = -ENOMEM;
    6330           0 :                 goto error;
    6331             :         }
    6332             : 
    6333        1878 :         blob_persist(seq, blob, bs_create_blob_cpl, blob);
    6334        1878 :         return;
    6335             : 
    6336          16 : error:
    6337          16 :         SPDK_ERRLOG("Failed to create blob: %s, size in clusters/size: %lu (clusters)\n",
    6338             :                     spdk_strerror(rc), opts_local.num_clusters);
    6339          16 :         if (blob != NULL) {
    6340          16 :                 blob_free(blob);
    6341             :         }
    6342          16 :         spdk_spin_lock(&bs->used_lock);
    6343          16 :         spdk_bit_array_clear(bs->used_blobids, page_idx);
    6344          16 :         bs_release_md_page(bs, page_idx);
    6345          16 :         spdk_spin_unlock(&bs->used_lock);
    6346          16 :         cb_fn(cb_arg, 0, rc);
    6347             : }
    6348             : 
    6349             : void
    6350          16 : spdk_bs_create_blob(struct spdk_blob_store *bs,
    6351             :                     spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6352             : {
    6353          16 :         bs_create_blob(bs, NULL, NULL, cb_fn, cb_arg);
    6354          16 : }
    6355             : 
    6356             : void
    6357        1598 : spdk_bs_create_blob_ext(struct spdk_blob_store *bs, const struct spdk_blob_opts *opts,
    6358             :                         spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6359             : {
    6360        1598 :         bs_create_blob(bs, opts, NULL, cb_fn, cb_arg);
    6361        1598 : }
    6362             : 
    6363             : /* END spdk_bs_create_blob */
    6364             : 
    6365             : /* START blob_cleanup */
    6366             : 
    6367             : struct spdk_clone_snapshot_ctx {
    6368             :         struct spdk_bs_cpl      cpl;
    6369             :         int bserrno;
    6370             :         bool frozen;
    6371             : 
    6372             :         struct spdk_io_channel *channel;
    6373             : 
    6374             :         /* Current cluster for inflate operation */
    6375             :         uint64_t cluster;
    6376             : 
    6377             :         /* For inflation force allocation of all unallocated clusters and remove
    6378             :          * thin-provisioning. Otherwise only decouple parent and keep clone thin. */
    6379             :         bool allocate_all;
    6380             : 
    6381             :         struct {
    6382             :                 spdk_blob_id id;
    6383             :                 struct spdk_blob *blob;
    6384             :                 bool md_ro;
    6385             :         } original;
    6386             :         struct {
    6387             :                 spdk_blob_id id;
    6388             :                 struct spdk_blob *blob;
    6389             :         } new;
    6390             : 
    6391             :         /* xattrs specified for snapshot/clones only. They have no impact on
    6392             :          * the original blobs xattrs. */
    6393             :         const struct spdk_blob_xattr_opts *xattrs;
    6394             : };
    6395             : 
    6396             : static void
    6397         342 : bs_clone_snapshot_cleanup_finish(void *cb_arg, int bserrno)
    6398             : {
    6399         342 :         struct spdk_clone_snapshot_ctx *ctx = cb_arg;
    6400         342 :         struct spdk_bs_cpl *cpl = &ctx->cpl;
    6401             : 
    6402         342 :         if (bserrno != 0) {
    6403           6 :                 if (ctx->bserrno != 0) {
    6404           0 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6405             :                 } else {
    6406           6 :                         ctx->bserrno = bserrno;
    6407             :                 }
    6408             :         }
    6409             : 
    6410         342 :         switch (cpl->type) {
    6411         282 :         case SPDK_BS_CPL_TYPE_BLOBID:
    6412         282 :                 cpl->u.blobid.cb_fn(cpl->u.blobid.cb_arg, cpl->u.blobid.blobid, ctx->bserrno);
    6413         282 :                 break;
    6414          60 :         case SPDK_BS_CPL_TYPE_BLOB_BASIC:
    6415          60 :                 cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
    6416          60 :                 break;
    6417           0 :         default:
    6418           0 :                 SPDK_UNREACHABLE();
    6419             :                 break;
    6420             :         }
    6421             : 
    6422         342 :         free(ctx);
    6423         342 : }
    6424             : 
    6425             : static void
    6426         328 : bs_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
    6427             : {
    6428         328 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6429         328 :         struct spdk_blob *origblob = ctx->original.blob;
    6430             : 
    6431         328 :         if (bserrno != 0) {
    6432           0 :                 if (ctx->bserrno != 0) {
    6433           0 :                         SPDK_ERRLOG("Unfreeze error %d\n", bserrno);
    6434             :                 } else {
    6435           0 :                         ctx->bserrno = bserrno;
    6436             :                 }
    6437             :         }
    6438             : 
    6439         328 :         ctx->original.id = origblob->id;
    6440         328 :         origblob->locked_operation_in_progress = false;
    6441             : 
    6442             :         /* Revert md_ro to original state */
    6443         328 :         origblob->md_ro = ctx->original.md_ro;
    6444             : 
    6445         328 :         spdk_blob_close(origblob, bs_clone_snapshot_cleanup_finish, ctx);
    6446         328 : }
    6447             : 
    6448             : static void
    6449         328 : bs_clone_snapshot_origblob_cleanup(void *cb_arg, int bserrno)
    6450             : {
    6451         328 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6452         328 :         struct spdk_blob *origblob = ctx->original.blob;
    6453             : 
    6454         328 :         if (bserrno != 0) {
    6455          24 :                 if (ctx->bserrno != 0) {
    6456           4 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6457             :                 } else {
    6458          20 :                         ctx->bserrno = bserrno;
    6459             :                 }
    6460             :         }
    6461             : 
    6462         328 :         if (ctx->frozen) {
    6463             :                 /* Unfreeze any outstanding I/O */
    6464         212 :                 blob_unfreeze_io(origblob, bs_snapshot_unfreeze_cpl, ctx);
    6465             :         } else {
    6466         116 :                 bs_snapshot_unfreeze_cpl(ctx, 0);
    6467             :         }
    6468             : 
    6469         328 : }
    6470             : 
    6471             : static void
    6472           4 : bs_clone_snapshot_newblob_cleanup(struct spdk_clone_snapshot_ctx *ctx, int bserrno)
    6473             : {
    6474           4 :         struct spdk_blob *newblob = ctx->new.blob;
    6475             : 
    6476           4 :         if (bserrno != 0) {
    6477           4 :                 if (ctx->bserrno != 0) {
    6478           0 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6479             :                 } else {
    6480           4 :                         ctx->bserrno = bserrno;
    6481             :                 }
    6482             :         }
    6483             : 
    6484           4 :         ctx->new.id = newblob->id;
    6485           4 :         spdk_blob_close(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
    6486           4 : }
    6487             : 
    6488             : /* END blob_cleanup */
    6489             : 
    6490             : /* START spdk_bs_create_snapshot */
    6491             : 
    6492             : static void
    6493         220 : bs_snapshot_swap_cluster_maps(struct spdk_blob *blob1, struct spdk_blob *blob2)
    6494             : {
    6495             :         uint64_t *cluster_temp;
    6496             :         uint64_t num_allocated_clusters_temp;
    6497             :         uint32_t *extent_page_temp;
    6498             : 
    6499         220 :         cluster_temp = blob1->active.clusters;
    6500         220 :         blob1->active.clusters = blob2->active.clusters;
    6501         220 :         blob2->active.clusters = cluster_temp;
    6502             : 
    6503         220 :         num_allocated_clusters_temp = blob1->active.num_allocated_clusters;
    6504         220 :         blob1->active.num_allocated_clusters = blob2->active.num_allocated_clusters;
    6505         220 :         blob2->active.num_allocated_clusters = num_allocated_clusters_temp;
    6506             : 
    6507         220 :         extent_page_temp = blob1->active.extent_pages;
    6508         220 :         blob1->active.extent_pages = blob2->active.extent_pages;
    6509         220 :         blob2->active.extent_pages = extent_page_temp;
    6510         220 : }
    6511             : 
    6512             : /* Copies an internal xattr */
    6513             : static int
    6514          20 : bs_snapshot_copy_xattr(struct spdk_blob *toblob, struct spdk_blob *fromblob, const char *name)
    6515             : {
    6516          20 :         const void      *val = NULL;
    6517          20 :         size_t          len;
    6518             :         int             bserrno;
    6519             : 
    6520          20 :         bserrno = blob_get_xattr_value(fromblob, name, &val, &len, true);
    6521          20 :         if (bserrno != 0) {
    6522           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " missing %s XATTR\n", fromblob->id, name);
    6523           0 :                 return bserrno;
    6524             :         }
    6525             : 
    6526          20 :         bserrno = blob_set_xattr(toblob, name, val, len, true);
    6527          20 :         if (bserrno != 0) {
    6528           0 :                 SPDK_ERRLOG("could not set %s XATTR on blob 0x%" PRIx64 "\n",
    6529             :                             name, toblob->id);
    6530           0 :                 return bserrno;
    6531             :         }
    6532          20 :         return 0;
    6533             : }
    6534             : 
    6535             : static void
    6536         208 : bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno)
    6537             : {
    6538         208 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6539         208 :         struct spdk_blob *origblob = ctx->original.blob;
    6540         208 :         struct spdk_blob *newblob = ctx->new.blob;
    6541             : 
    6542         208 :         if (bserrno != 0) {
    6543           4 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6544           4 :                 if (blob_is_esnap_clone(newblob)) {
    6545           0 :                         bs_snapshot_copy_xattr(origblob, newblob, BLOB_EXTERNAL_SNAPSHOT_ID);
    6546           0 :                         origblob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6547             :                 }
    6548           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6549           4 :                 return;
    6550             :         }
    6551             : 
    6552             :         /* Remove metadata descriptor SNAPSHOT_IN_PROGRESS */
    6553         204 :         bserrno = blob_remove_xattr(newblob, SNAPSHOT_IN_PROGRESS, true);
    6554         204 :         if (bserrno != 0) {
    6555           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6556           0 :                 return;
    6557             :         }
    6558             : 
    6559         204 :         bs_blob_list_add(ctx->original.blob);
    6560             : 
    6561         204 :         spdk_blob_set_read_only(newblob);
    6562             : 
    6563             :         /* sync snapshot metadata */
    6564         204 :         spdk_blob_sync_md(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
    6565             : }
    6566             : 
    6567             : static void
    6568         212 : bs_snapshot_newblob_sync_cpl(void *cb_arg, int bserrno)
    6569             : {
    6570         212 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6571         212 :         struct spdk_blob *origblob = ctx->original.blob;
    6572         212 :         struct spdk_blob *newblob = ctx->new.blob;
    6573             : 
    6574         212 :         if (bserrno != 0) {
    6575             :                 /* return cluster map back to original */
    6576           4 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6577             : 
    6578             :                 /* Newblob md sync failed. Valid clusters are only present in origblob.
    6579             :                  * Since I/O is frozen on origblob, not changes to zeroed out cluster map should have occurred.
    6580             :                  * Newblob needs to be reverted to thin_provisioned state at creation to properly close. */
    6581           4 :                 blob_set_thin_provision(newblob);
    6582           4 :                 assert(spdk_mem_all_zero(newblob->active.clusters,
    6583             :                                          newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
    6584           4 :                 assert(spdk_mem_all_zero(newblob->active.extent_pages,
    6585             :                                          newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
    6586             : 
    6587           4 :                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6588           4 :                 return;
    6589             :         }
    6590             : 
    6591             :         /* Set internal xattr for snapshot id */
    6592         208 :         bserrno = blob_set_xattr(origblob, BLOB_SNAPSHOT, &newblob->id, sizeof(spdk_blob_id), true);
    6593         208 :         if (bserrno != 0) {
    6594             :                 /* return cluster map back to original */
    6595           0 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6596           0 :                 blob_set_thin_provision(newblob);
    6597           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6598           0 :                 return;
    6599             :         }
    6600             : 
    6601             :         /* Create new back_bs_dev for snapshot */
    6602         208 :         origblob->back_bs_dev = bs_create_blob_bs_dev(newblob);
    6603         208 :         if (origblob->back_bs_dev == NULL) {
    6604             :                 /* return cluster map back to original */
    6605           0 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6606           0 :                 blob_set_thin_provision(newblob);
    6607           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, -EINVAL);
    6608           0 :                 return;
    6609             :         }
    6610             : 
    6611             :         /* Remove the xattr that references an external snapshot */
    6612         208 :         if (blob_is_esnap_clone(origblob)) {
    6613          12 :                 origblob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6614          12 :                 bserrno = blob_remove_xattr(origblob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    6615          12 :                 if (bserrno != 0) {
    6616           0 :                         if (bserrno == -ENOENT) {
    6617           0 :                                 SPDK_ERRLOG("blob 0x%" PRIx64 " has no " BLOB_EXTERNAL_SNAPSHOT_ID
    6618             :                                             " xattr to remove\n", origblob->id);
    6619           0 :                                 assert(false);
    6620             :                         } else {
    6621             :                                 /* return cluster map back to original */
    6622           0 :                                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6623           0 :                                 blob_set_thin_provision(newblob);
    6624           0 :                                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6625           0 :                                 return;
    6626             :                         }
    6627             :                 }
    6628             :         }
    6629             : 
    6630         208 :         bs_blob_list_remove(origblob);
    6631         208 :         origblob->parent_id = newblob->id;
    6632             :         /* set clone blob as thin provisioned */
    6633         208 :         blob_set_thin_provision(origblob);
    6634             : 
    6635         208 :         bs_blob_list_add(newblob);
    6636             : 
    6637             :         /* sync clone metadata */
    6638         208 :         spdk_blob_sync_md(origblob, bs_snapshot_origblob_sync_cpl, ctx);
    6639             : }
    6640             : 
    6641             : static void
    6642         212 : bs_snapshot_freeze_cpl(void *cb_arg, int rc)
    6643             : {
    6644         212 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6645         212 :         struct spdk_blob *origblob = ctx->original.blob;
    6646         212 :         struct spdk_blob *newblob = ctx->new.blob;
    6647             :         int bserrno;
    6648             : 
    6649         212 :         if (rc != 0) {
    6650           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, rc);
    6651           0 :                 return;
    6652             :         }
    6653             : 
    6654         212 :         ctx->frozen = true;
    6655             : 
    6656         212 :         if (blob_is_esnap_clone(origblob)) {
    6657             :                 /* Clean up any channels associated with the original blob id because future IO will
    6658             :                  * perform IO using the snapshot blob_id.
    6659             :                  */
    6660          12 :                 blob_esnap_destroy_bs_dev_channels(origblob, false, NULL, NULL);
    6661             :         }
    6662         212 :         if (newblob->back_bs_dev) {
    6663         212 :                 blob_back_bs_destroy(newblob);
    6664             :         }
    6665             :         /* set new back_bs_dev for snapshot */
    6666         212 :         newblob->back_bs_dev = origblob->back_bs_dev;
    6667             :         /* Set invalid flags from origblob */
    6668         212 :         newblob->invalid_flags = origblob->invalid_flags;
    6669             : 
    6670             :         /* inherit parent from original blob if set */
    6671         212 :         newblob->parent_id = origblob->parent_id;
    6672         212 :         switch (origblob->parent_id) {
    6673          12 :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    6674          12 :                 bserrno = bs_snapshot_copy_xattr(newblob, origblob, BLOB_EXTERNAL_SNAPSHOT_ID);
    6675          12 :                 if (bserrno != 0) {
    6676           0 :                         bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6677           0 :                         return;
    6678             :                 }
    6679          12 :                 break;
    6680         148 :         case SPDK_BLOBID_INVALID:
    6681         148 :                 break;
    6682          52 :         default:
    6683             :                 /* Set internal xattr for snapshot id */
    6684          52 :                 bserrno = blob_set_xattr(newblob, BLOB_SNAPSHOT,
    6685          52 :                                          &origblob->parent_id, sizeof(spdk_blob_id), true);
    6686          52 :                 if (bserrno != 0) {
    6687           0 :                         bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6688           0 :                         return;
    6689             :                 }
    6690             :         }
    6691             : 
    6692             :         /* swap cluster maps */
    6693         212 :         bs_snapshot_swap_cluster_maps(newblob, origblob);
    6694             : 
    6695             :         /* Set the clear method on the new blob to match the original. */
    6696         212 :         blob_set_clear_method(newblob, origblob->clear_method);
    6697             : 
    6698             :         /* sync snapshot metadata */
    6699         212 :         spdk_blob_sync_md(newblob, bs_snapshot_newblob_sync_cpl, ctx);
    6700             : }
    6701             : 
    6702             : static void
    6703         216 : bs_snapshot_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6704             : {
    6705         216 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6706         216 :         struct spdk_blob *origblob = ctx->original.blob;
    6707         216 :         struct spdk_blob *newblob = _blob;
    6708             : 
    6709         216 :         if (bserrno != 0) {
    6710           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6711           4 :                 return;
    6712             :         }
    6713             : 
    6714         212 :         ctx->new.blob = newblob;
    6715         212 :         assert(spdk_blob_is_thin_provisioned(newblob));
    6716         212 :         assert(spdk_mem_all_zero(newblob->active.clusters,
    6717             :                                  newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
    6718         212 :         assert(spdk_mem_all_zero(newblob->active.extent_pages,
    6719             :                                  newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
    6720             : 
    6721         212 :         blob_freeze_io(origblob, bs_snapshot_freeze_cpl, ctx);
    6722             : }
    6723             : 
    6724             : static void
    6725         220 : bs_snapshot_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
    6726             : {
    6727         220 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6728         220 :         struct spdk_blob *origblob = ctx->original.blob;
    6729             : 
    6730         220 :         if (bserrno != 0) {
    6731           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6732           4 :                 return;
    6733             :         }
    6734             : 
    6735         216 :         ctx->new.id = blobid;
    6736         216 :         ctx->cpl.u.blobid.blobid = blobid;
    6737             : 
    6738         216 :         spdk_bs_open_blob(origblob->bs, ctx->new.id, bs_snapshot_newblob_open_cpl, ctx);
    6739             : }
    6740             : 
    6741             : 
    6742             : static void
    6743         220 : bs_xattr_snapshot(void *arg, const char *name,
    6744             :                   const void **value, size_t *value_len)
    6745             : {
    6746         220 :         assert(strncmp(name, SNAPSHOT_IN_PROGRESS, sizeof(SNAPSHOT_IN_PROGRESS)) == 0);
    6747             : 
    6748         220 :         struct spdk_blob *blob = (struct spdk_blob *)arg;
    6749         220 :         *value = &blob->id;
    6750         220 :         *value_len = sizeof(blob->id);
    6751         220 : }
    6752             : 
    6753             : static void
    6754         230 : bs_snapshot_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6755             : {
    6756         230 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6757         230 :         struct spdk_blob_opts opts;
    6758         230 :         struct spdk_blob_xattr_opts internal_xattrs;
    6759         230 :         char *xattrs_names[] = { SNAPSHOT_IN_PROGRESS };
    6760             : 
    6761         230 :         if (bserrno != 0) {
    6762           6 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    6763           6 :                 return;
    6764             :         }
    6765             : 
    6766         224 :         ctx->original.blob = _blob;
    6767             : 
    6768         224 :         if (_blob->data_ro || _blob->md_ro) {
    6769           4 :                 SPDK_DEBUGLOG(blob, "Cannot create snapshot from read only blob with id 0x%"
    6770             :                               PRIx64 "\n", _blob->id);
    6771           4 :                 ctx->bserrno = -EINVAL;
    6772           4 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6773           4 :                 return;
    6774             :         }
    6775             : 
    6776         220 :         if (_blob->locked_operation_in_progress) {
    6777           0 :                 SPDK_DEBUGLOG(blob, "Cannot create snapshot - another operation in progress\n");
    6778           0 :                 ctx->bserrno = -EBUSY;
    6779           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6780           0 :                 return;
    6781             :         }
    6782             : 
    6783         220 :         _blob->locked_operation_in_progress = true;
    6784             : 
    6785         220 :         spdk_blob_opts_init(&opts, sizeof(opts));
    6786         220 :         blob_xattrs_init(&internal_xattrs);
    6787             : 
    6788             :         /* Change the size of new blob to the same as in original blob,
    6789             :          * but do not allocate clusters */
    6790         220 :         opts.thin_provision = true;
    6791         220 :         opts.num_clusters = spdk_blob_get_num_clusters(_blob);
    6792         220 :         opts.use_extent_table = _blob->use_extent_table;
    6793             : 
    6794             :         /* If there are any xattrs specified for snapshot, set them now */
    6795         220 :         if (ctx->xattrs) {
    6796           4 :                 memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
    6797             :         }
    6798             :         /* Set internal xattr SNAPSHOT_IN_PROGRESS */
    6799         220 :         internal_xattrs.count = 1;
    6800         220 :         internal_xattrs.ctx = _blob;
    6801         220 :         internal_xattrs.names = xattrs_names;
    6802         220 :         internal_xattrs.get_value = bs_xattr_snapshot;
    6803             : 
    6804         220 :         bs_create_blob(_blob->bs, &opts, &internal_xattrs,
    6805             :                        bs_snapshot_newblob_create_cpl, ctx);
    6806             : }
    6807             : 
    6808             : void
    6809         230 : spdk_bs_create_snapshot(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6810             :                         const struct spdk_blob_xattr_opts *snapshot_xattrs,
    6811             :                         spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6812             : {
    6813         230 :         struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
    6814             : 
    6815         230 :         if (!ctx) {
    6816           0 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
    6817           0 :                 return;
    6818             :         }
    6819         230 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6820         230 :         ctx->cpl.u.blobid.cb_fn = cb_fn;
    6821         230 :         ctx->cpl.u.blobid.cb_arg = cb_arg;
    6822         230 :         ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
    6823         230 :         ctx->bserrno = 0;
    6824         230 :         ctx->frozen = false;
    6825         230 :         ctx->original.id = blobid;
    6826         230 :         ctx->xattrs = snapshot_xattrs;
    6827             : 
    6828         230 :         spdk_bs_open_blob(bs, ctx->original.id, bs_snapshot_origblob_open_cpl, ctx);
    6829             : }
    6830             : /* END spdk_bs_create_snapshot */
    6831             : 
    6832             : /* START spdk_bs_create_clone */
    6833             : 
    6834             : static void
    6835          48 : bs_xattr_clone(void *arg, const char *name,
    6836             :                const void **value, size_t *value_len)
    6837             : {
    6838          48 :         assert(strncmp(name, BLOB_SNAPSHOT, sizeof(BLOB_SNAPSHOT)) == 0);
    6839             : 
    6840          48 :         struct spdk_blob *blob = (struct spdk_blob *)arg;
    6841          48 :         *value = &blob->id;
    6842          48 :         *value_len = sizeof(blob->id);
    6843          48 : }
    6844             : 
    6845             : static void
    6846          48 : bs_clone_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6847             : {
    6848          48 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6849          48 :         struct spdk_blob *clone = _blob;
    6850             : 
    6851          48 :         ctx->new.blob = clone;
    6852          48 :         bs_blob_list_add(clone);
    6853             : 
    6854          48 :         spdk_blob_close(clone, bs_clone_snapshot_origblob_cleanup, ctx);
    6855          48 : }
    6856             : 
    6857             : static void
    6858          48 : bs_clone_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
    6859             : {
    6860          48 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6861             : 
    6862          48 :         ctx->cpl.u.blobid.blobid = blobid;
    6863          48 :         spdk_bs_open_blob(ctx->original.blob->bs, blobid, bs_clone_newblob_open_cpl, ctx);
    6864          48 : }
    6865             : 
    6866             : static void
    6867          52 : bs_clone_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6868             : {
    6869          52 :         struct spdk_clone_snapshot_ctx  *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6870          52 :         struct spdk_blob_opts           opts;
    6871          52 :         struct spdk_blob_xattr_opts internal_xattrs;
    6872          52 :         char *xattr_names[] = { BLOB_SNAPSHOT };
    6873             : 
    6874          52 :         if (bserrno != 0) {
    6875           0 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    6876           0 :                 return;
    6877             :         }
    6878             : 
    6879          52 :         ctx->original.blob = _blob;
    6880          52 :         ctx->original.md_ro = _blob->md_ro;
    6881             : 
    6882          52 :         if (!_blob->data_ro || !_blob->md_ro) {
    6883           4 :                 SPDK_DEBUGLOG(blob, "Clone not from read-only blob\n");
    6884           4 :                 ctx->bserrno = -EINVAL;
    6885           4 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6886           4 :                 return;
    6887             :         }
    6888             : 
    6889          48 :         if (_blob->locked_operation_in_progress) {
    6890           0 :                 SPDK_DEBUGLOG(blob, "Cannot create clone - another operation in progress\n");
    6891           0 :                 ctx->bserrno = -EBUSY;
    6892           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6893           0 :                 return;
    6894             :         }
    6895             : 
    6896          48 :         _blob->locked_operation_in_progress = true;
    6897             : 
    6898          48 :         spdk_blob_opts_init(&opts, sizeof(opts));
    6899          48 :         blob_xattrs_init(&internal_xattrs);
    6900             : 
    6901          48 :         opts.thin_provision = true;
    6902          48 :         opts.num_clusters = spdk_blob_get_num_clusters(_blob);
    6903          48 :         opts.use_extent_table = _blob->use_extent_table;
    6904          48 :         if (ctx->xattrs) {
    6905           4 :                 memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
    6906             :         }
    6907             : 
    6908             :         /* Set internal xattr BLOB_SNAPSHOT */
    6909          48 :         internal_xattrs.count = 1;
    6910          48 :         internal_xattrs.ctx = _blob;
    6911          48 :         internal_xattrs.names = xattr_names;
    6912          48 :         internal_xattrs.get_value = bs_xattr_clone;
    6913             : 
    6914          48 :         bs_create_blob(_blob->bs, &opts, &internal_xattrs,
    6915             :                        bs_clone_newblob_create_cpl, ctx);
    6916             : }
    6917             : 
    6918             : void
    6919          52 : spdk_bs_create_clone(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6920             :                      const struct spdk_blob_xattr_opts *clone_xattrs,
    6921             :                      spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6922             : {
    6923          52 :         struct spdk_clone_snapshot_ctx  *ctx = calloc(1, sizeof(*ctx));
    6924             : 
    6925          52 :         if (!ctx) {
    6926           0 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
    6927           0 :                 return;
    6928             :         }
    6929             : 
    6930          52 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6931          52 :         ctx->cpl.u.blobid.cb_fn = cb_fn;
    6932          52 :         ctx->cpl.u.blobid.cb_arg = cb_arg;
    6933          52 :         ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
    6934          52 :         ctx->bserrno = 0;
    6935          52 :         ctx->xattrs = clone_xattrs;
    6936          52 :         ctx->original.id = blobid;
    6937             : 
    6938          52 :         spdk_bs_open_blob(bs, ctx->original.id, bs_clone_origblob_open_cpl, ctx);
    6939             : }
    6940             : 
    6941             : /* END spdk_bs_create_clone */
    6942             : 
    6943             : /* START spdk_bs_inflate_blob */
    6944             : 
    6945             : static void
    6946          12 : bs_inflate_blob_set_parent_cpl(void *cb_arg, struct spdk_blob *_parent, int bserrno)
    6947             : {
    6948          12 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6949          12 :         struct spdk_blob *_blob = ctx->original.blob;
    6950             : 
    6951          12 :         if (bserrno != 0) {
    6952           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6953           0 :                 return;
    6954             :         }
    6955             : 
    6956             :         /* Temporarily override md_ro flag for MD modification */
    6957          12 :         _blob->md_ro = false;
    6958             : 
    6959          12 :         bserrno = blob_set_xattr(_blob, BLOB_SNAPSHOT, &_parent->id, sizeof(spdk_blob_id), true);
    6960          12 :         if (bserrno != 0) {
    6961           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6962           0 :                 return;
    6963             :         }
    6964             : 
    6965          12 :         assert(_parent != NULL);
    6966             : 
    6967          12 :         bs_blob_list_remove(_blob);
    6968          12 :         _blob->parent_id = _parent->id;
    6969             : 
    6970          12 :         blob_back_bs_destroy(_blob);
    6971          12 :         _blob->back_bs_dev = bs_create_blob_bs_dev(_parent);
    6972          12 :         bs_blob_list_add(_blob);
    6973             : 
    6974          12 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    6975             : }
    6976             : 
    6977             : static void
    6978          56 : bs_inflate_blob_done(struct spdk_clone_snapshot_ctx *ctx)
    6979             : {
    6980          56 :         struct spdk_blob *_blob = ctx->original.blob;
    6981             :         struct spdk_blob *_parent;
    6982             : 
    6983          56 :         if (ctx->allocate_all) {
    6984             :                 /* remove thin provisioning */
    6985          32 :                 bs_blob_list_remove(_blob);
    6986          32 :                 if (_blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    6987           8 :                         blob_remove_xattr(_blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    6988           8 :                         _blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6989             :                 } else {
    6990          24 :                         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    6991             :                 }
    6992          32 :                 _blob->invalid_flags = _blob->invalid_flags & ~SPDK_BLOB_THIN_PROV;
    6993          32 :                 blob_back_bs_destroy(_blob);
    6994          32 :                 _blob->parent_id = SPDK_BLOBID_INVALID;
    6995             :         } else {
    6996             :                 /* For now, esnap clones always have allocate_all set. */
    6997          24 :                 assert(!blob_is_esnap_clone(_blob));
    6998             : 
    6999          24 :                 _parent = ((struct spdk_blob_bs_dev *)(_blob->back_bs_dev))->blob;
    7000          24 :                 if (_parent->parent_id != SPDK_BLOBID_INVALID) {
    7001             :                         /* We must change the parent of the inflated blob */
    7002          12 :                         spdk_bs_open_blob(_blob->bs, _parent->parent_id,
    7003             :                                           bs_inflate_blob_set_parent_cpl, ctx);
    7004          12 :                         return;
    7005             :                 }
    7006             : 
    7007          12 :                 bs_blob_list_remove(_blob);
    7008          12 :                 _blob->parent_id = SPDK_BLOBID_INVALID;
    7009          12 :                 blob_back_bs_destroy(_blob);
    7010          12 :                 _blob->back_bs_dev = bs_create_zeroes_dev();
    7011             :         }
    7012             : 
    7013             :         /* Temporarily override md_ro flag for MD modification */
    7014          44 :         _blob->md_ro = false;
    7015          44 :         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    7016          44 :         _blob->state = SPDK_BLOB_STATE_DIRTY;
    7017             : 
    7018          44 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    7019             : }
    7020             : 
    7021             : /* Check if cluster needs allocation */
    7022             : static inline bool
    7023        1200 : bs_cluster_needs_allocation(struct spdk_blob *blob, uint64_t cluster, bool allocate_all)
    7024             : {
    7025             :         struct spdk_blob_bs_dev *b;
    7026             : 
    7027        1200 :         assert(blob != NULL);
    7028             : 
    7029        1200 :         if (blob->active.clusters[cluster] != 0) {
    7030             :                 /* Cluster is already allocated */
    7031          32 :                 return false;
    7032             :         }
    7033             : 
    7034        1168 :         if (blob->parent_id == SPDK_BLOBID_INVALID) {
    7035             :                 /* Blob have no parent blob */
    7036          80 :                 return allocate_all;
    7037             :         }
    7038             : 
    7039        1088 :         if (blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    7040          64 :                 return true;
    7041             :         }
    7042             : 
    7043        1024 :         b = (struct spdk_blob_bs_dev *)blob->back_bs_dev;
    7044        1024 :         return (allocate_all || b->blob->active.clusters[cluster] != 0);
    7045             : }
    7046             : 
    7047             : static void
    7048         508 : bs_inflate_blob_touch_next(void *cb_arg, int bserrno)
    7049             : {
    7050         508 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    7051         508 :         struct spdk_blob *_blob = ctx->original.blob;
    7052         508 :         struct spdk_bs_cpl cpl;
    7053             :         spdk_bs_user_op_t *op;
    7054             :         uint64_t offset;
    7055             : 
    7056         508 :         if (bserrno != 0) {
    7057           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    7058           0 :                 return;
    7059             :         }
    7060             : 
    7061         656 :         for (; ctx->cluster < _blob->active.num_clusters; ctx->cluster++) {
    7062         600 :                 if (bs_cluster_needs_allocation(_blob, ctx->cluster, ctx->allocate_all)) {
    7063         452 :                         break;
    7064             :                 }
    7065             :         }
    7066             : 
    7067         508 :         if (ctx->cluster < _blob->active.num_clusters) {
    7068         452 :                 offset = bs_cluster_to_lba(_blob->bs, ctx->cluster);
    7069             : 
    7070             :                 /* We may safely increment a cluster before copying */
    7071         452 :                 ctx->cluster++;
    7072             : 
    7073             :                 /* Use a dummy 0B read as a context for cluster copy */
    7074         452 :                 cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7075         452 :                 cpl.u.blob_basic.cb_fn = bs_inflate_blob_touch_next;
    7076         452 :                 cpl.u.blob_basic.cb_arg = ctx;
    7077             : 
    7078         452 :                 op = bs_user_op_alloc(ctx->channel, &cpl, SPDK_BLOB_READ, _blob,
    7079             :                                       NULL, 0, offset, 0);
    7080         452 :                 if (!op) {
    7081           0 :                         bs_clone_snapshot_origblob_cleanup(ctx, -ENOMEM);
    7082           0 :                         return;
    7083             :                 }
    7084             : 
    7085         452 :                 bs_allocate_and_copy_cluster(_blob, ctx->channel, offset, op);
    7086             :         } else {
    7087          56 :                 bs_inflate_blob_done(ctx);
    7088             :         }
    7089             : }
    7090             : 
    7091             : static void
    7092          60 : bs_inflate_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    7093             : {
    7094          60 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    7095             :         uint64_t clusters_needed;
    7096             :         uint64_t i;
    7097             : 
    7098          60 :         if (bserrno != 0) {
    7099           0 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    7100           0 :                 return;
    7101             :         }
    7102             : 
    7103          60 :         ctx->original.blob = _blob;
    7104          60 :         ctx->original.md_ro = _blob->md_ro;
    7105             : 
    7106          60 :         if (_blob->locked_operation_in_progress) {
    7107           0 :                 SPDK_DEBUGLOG(blob, "Cannot inflate blob - another operation in progress\n");
    7108           0 :                 ctx->bserrno = -EBUSY;
    7109           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    7110           0 :                 return;
    7111             :         }
    7112             : 
    7113          60 :         _blob->locked_operation_in_progress = true;
    7114             : 
    7115          60 :         switch (_blob->parent_id) {
    7116           8 :         case SPDK_BLOBID_INVALID:
    7117           8 :                 if (!ctx->allocate_all) {
    7118             :                         /* This blob has no parent, so we cannot decouple it. */
    7119           4 :                         SPDK_ERRLOG("Cannot decouple parent of blob with no parent.\n");
    7120           4 :                         bs_clone_snapshot_origblob_cleanup(ctx, -EINVAL);
    7121           4 :                         return;
    7122             :                 }
    7123           4 :                 break;
    7124           8 :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    7125             :                 /*
    7126             :                  * It would be better to rely on back_bs_dev->is_zeroes(), to determine which
    7127             :                  * clusters require allocation. Until there is a blobstore consumer that
    7128             :                  * uses esnaps with an spdk_bs_dev that implements a useful is_zeroes() it is not
    7129             :                  * worth the effort.
    7130             :                  */
    7131           8 :                 ctx->allocate_all = true;
    7132           8 :                 break;
    7133          44 :         default:
    7134          44 :                 break;
    7135             :         }
    7136             : 
    7137          56 :         if (spdk_blob_is_thin_provisioned(_blob) == false) {
    7138             :                 /* This is not thin provisioned blob. No need to inflate. */
    7139           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, 0);
    7140           0 :                 return;
    7141             :         }
    7142             : 
    7143             :         /* Do two passes - one to verify that we can obtain enough clusters
    7144             :          * and another to actually claim them.
    7145             :          */
    7146          56 :         clusters_needed = 0;
    7147         656 :         for (i = 0; i < _blob->active.num_clusters; i++) {
    7148         600 :                 if (bs_cluster_needs_allocation(_blob, i, ctx->allocate_all)) {
    7149         452 :                         clusters_needed++;
    7150             :                 }
    7151             :         }
    7152             : 
    7153          56 :         if (clusters_needed > _blob->bs->num_free_clusters) {
    7154             :                 /* Not enough free clusters. Cannot satisfy the request. */
    7155           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, -ENOSPC);
    7156           0 :                 return;
    7157             :         }
    7158             : 
    7159          56 :         ctx->cluster = 0;
    7160          56 :         bs_inflate_blob_touch_next(ctx, 0);
    7161             : }
    7162             : 
    7163             : static void
    7164          60 : bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7165             :                 spdk_blob_id blobid, bool allocate_all, spdk_blob_op_complete cb_fn, void *cb_arg)
    7166             : {
    7167          60 :         struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
    7168             : 
    7169          60 :         if (!ctx) {
    7170           0 :                 cb_fn(cb_arg, -ENOMEM);
    7171           0 :                 return;
    7172             :         }
    7173          60 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7174          60 :         ctx->cpl.u.bs_basic.cb_fn = cb_fn;
    7175          60 :         ctx->cpl.u.bs_basic.cb_arg = cb_arg;
    7176          60 :         ctx->bserrno = 0;
    7177          60 :         ctx->original.id = blobid;
    7178          60 :         ctx->channel = channel;
    7179          60 :         ctx->allocate_all = allocate_all;
    7180             : 
    7181          60 :         spdk_bs_open_blob(bs, ctx->original.id, bs_inflate_blob_open_cpl, ctx);
    7182             : }
    7183             : 
    7184             : void
    7185          28 : spdk_bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7186             :                      spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
    7187             : {
    7188          28 :         bs_inflate_blob(bs, channel, blobid, true, cb_fn, cb_arg);
    7189          28 : }
    7190             : 
    7191             : void
    7192          32 : spdk_bs_blob_decouple_parent(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7193             :                              spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
    7194             : {
    7195          32 :         bs_inflate_blob(bs, channel, blobid, false, cb_fn, cb_arg);
    7196          32 : }
    7197             : /* END spdk_bs_inflate_blob */
    7198             : 
    7199             : /* START spdk_bs_blob_shallow_copy */
    7200             : 
    7201             : struct shallow_copy_ctx {
    7202             :         struct spdk_bs_cpl cpl;
    7203             :         int bserrno;
    7204             : 
    7205             :         /* Blob source for copy */
    7206             :         struct spdk_blob_store *bs;
    7207             :         spdk_blob_id blobid;
    7208             :         struct spdk_blob *blob;
    7209             :         struct spdk_io_channel *blob_channel;
    7210             : 
    7211             :         /* Destination device for copy */
    7212             :         struct spdk_bs_dev *ext_dev;
    7213             :         struct spdk_io_channel *ext_channel;
    7214             : 
    7215             :         /* Current cluster for copy operation */
    7216             :         uint64_t cluster;
    7217             : 
    7218             :         /* Buffer for blob reading */
    7219             :         uint8_t *read_buff;
    7220             : 
    7221             :         /* Struct for external device writing */
    7222             :         struct spdk_bs_dev_cb_args ext_args;
    7223             : 
    7224             :         /* Actual number of copied clusters */
    7225             :         uint64_t copied_clusters_count;
    7226             : 
    7227             :         /* Status callback for updates about the ongoing operation */
    7228             :         spdk_blob_shallow_copy_status status_cb;
    7229             : 
    7230             :         /* Argument passed to function status_cb */
    7231             :         void *status_cb_arg;
    7232             : };
    7233             : 
    7234             : static void
    7235          16 : bs_shallow_copy_cleanup_finish(void *cb_arg, int bserrno)
    7236             : {
    7237          16 :         struct shallow_copy_ctx *ctx = cb_arg;
    7238          16 :         struct spdk_bs_cpl *cpl = &ctx->cpl;
    7239             : 
    7240          16 :         if (bserrno != 0) {
    7241           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, cleanup error %d\n", ctx->blob->id, bserrno);
    7242           0 :                 ctx->bserrno = bserrno;
    7243             :         }
    7244             : 
    7245          16 :         ctx->ext_dev->destroy_channel(ctx->ext_dev, ctx->ext_channel);
    7246          16 :         spdk_free(ctx->read_buff);
    7247             : 
    7248          16 :         cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
    7249             : 
    7250          16 :         free(ctx);
    7251          16 : }
    7252             : 
    7253             : static void
    7254           8 : bs_shallow_copy_bdev_write_cpl(struct spdk_io_channel *channel, void *cb_arg, int bserrno)
    7255             : {
    7256           8 :         struct shallow_copy_ctx *ctx = cb_arg;
    7257           8 :         struct spdk_blob *_blob = ctx->blob;
    7258             : 
    7259           8 :         if (bserrno != 0) {
    7260           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, ext dev write error %d\n", ctx->blob->id, bserrno);
    7261           0 :                 ctx->bserrno = bserrno;
    7262           0 :                 _blob->locked_operation_in_progress = false;
    7263           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7264           0 :                 return;
    7265             :         }
    7266             : 
    7267           8 :         ctx->cluster++;
    7268           8 :         if (ctx->status_cb) {
    7269           8 :                 ctx->copied_clusters_count++;
    7270           8 :                 ctx->status_cb(ctx->copied_clusters_count, ctx->status_cb_arg);
    7271             :         }
    7272             : 
    7273           8 :         bs_shallow_copy_cluster_find_next(ctx);
    7274             : }
    7275             : 
    7276             : static void
    7277           8 : bs_shallow_copy_blob_read_cpl(void *cb_arg, int bserrno)
    7278             : {
    7279           8 :         struct shallow_copy_ctx *ctx = cb_arg;
    7280           8 :         struct spdk_bs_dev *ext_dev = ctx->ext_dev;
    7281           8 :         struct spdk_blob *_blob = ctx->blob;
    7282             : 
    7283           8 :         if (bserrno != 0) {
    7284           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob read error %d\n", ctx->blob->id, bserrno);
    7285           0 :                 ctx->bserrno = bserrno;
    7286           0 :                 _blob->locked_operation_in_progress = false;
    7287           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7288           0 :                 return;
    7289             :         }
    7290             : 
    7291           8 :         ctx->ext_args.channel = ctx->ext_channel;
    7292           8 :         ctx->ext_args.cb_fn = bs_shallow_copy_bdev_write_cpl;
    7293           8 :         ctx->ext_args.cb_arg = ctx;
    7294             : 
    7295          16 :         ext_dev->write(ext_dev, ctx->ext_channel, ctx->read_buff,
    7296           8 :                        bs_cluster_to_lba(_blob->bs, ctx->cluster),
    7297           8 :                        bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
    7298             :                        &ctx->ext_args);
    7299             : }
    7300             : 
    7301             : static void
    7302          12 : bs_shallow_copy_cluster_find_next(void *cb_arg)
    7303             : {
    7304          12 :         struct shallow_copy_ctx *ctx = cb_arg;
    7305          12 :         struct spdk_blob *_blob = ctx->blob;
    7306             : 
    7307          20 :         while (ctx->cluster < _blob->active.num_clusters) {
    7308          16 :                 if (_blob->active.clusters[ctx->cluster] != 0) {
    7309           8 :                         break;
    7310             :                 }
    7311             : 
    7312           8 :                 ctx->cluster++;
    7313             :         }
    7314             : 
    7315          12 :         if (ctx->cluster < _blob->active.num_clusters) {
    7316          16 :                 blob_request_submit_op_single(ctx->blob_channel, _blob, ctx->read_buff,
    7317           8 :                                               bs_cluster_to_lba(_blob->bs, ctx->cluster),
    7318           8 :                                               bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
    7319             :                                               bs_shallow_copy_blob_read_cpl, ctx, SPDK_BLOB_READ);
    7320             :         } else {
    7321           4 :                 _blob->locked_operation_in_progress = false;
    7322           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7323             :         }
    7324          12 : }
    7325             : 
    7326             : static void
    7327          16 : bs_shallow_copy_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    7328             : {
    7329          16 :         struct shallow_copy_ctx *ctx = cb_arg;
    7330          16 :         struct spdk_bs_dev *ext_dev = ctx->ext_dev;
    7331             :         uint32_t blob_block_size;
    7332             :         uint64_t blob_total_size;
    7333             : 
    7334          16 :         if (bserrno != 0) {
    7335           0 :                 SPDK_ERRLOG("Shallow copy blob open error %d\n", bserrno);
    7336           0 :                 ctx->bserrno = bserrno;
    7337           0 :                 bs_shallow_copy_cleanup_finish(ctx, 0);
    7338           0 :                 return;
    7339             :         }
    7340             : 
    7341          16 :         if (!spdk_blob_is_read_only(_blob)) {
    7342           4 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob must be read only\n", _blob->id);
    7343           4 :                 ctx->bserrno = -EPERM;
    7344           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7345           4 :                 return;
    7346             :         }
    7347             : 
    7348          12 :         blob_block_size = _blob->bs->dev->blocklen;
    7349          12 :         blob_total_size = spdk_blob_get_num_clusters(_blob) * spdk_bs_get_cluster_size(_blob->bs);
    7350             : 
    7351          12 :         if (blob_total_size > ext_dev->blockcnt * ext_dev->blocklen) {
    7352           4 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device must have at least blob size\n",
    7353             :                             _blob->id);
    7354           4 :                 ctx->bserrno = -EINVAL;
    7355           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7356           4 :                 return;
    7357             :         }
    7358             : 
    7359           8 :         if (blob_block_size % ext_dev->blocklen != 0) {
    7360           4 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device block size is not compatible with \
    7361             : blobstore block size\n", _blob->id);
    7362           4 :                 ctx->bserrno = -EINVAL;
    7363           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7364           4 :                 return;
    7365             :         }
    7366             : 
    7367           4 :         ctx->blob = _blob;
    7368             : 
    7369           4 :         if (_blob->locked_operation_in_progress) {
    7370           0 :                 SPDK_DEBUGLOG(blob, "blob 0x%" PRIx64 " shallow copy - another operation in progress\n", _blob->id);
    7371           0 :                 ctx->bserrno = -EBUSY;
    7372           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7373           0 :                 return;
    7374             :         }
    7375             : 
    7376           4 :         _blob->locked_operation_in_progress = true;
    7377             : 
    7378           4 :         ctx->cluster = 0;
    7379           4 :         bs_shallow_copy_cluster_find_next(ctx);
    7380             : }
    7381             : 
    7382             : int
    7383          16 : spdk_bs_blob_shallow_copy(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7384             :                           spdk_blob_id blobid, struct spdk_bs_dev *ext_dev,
    7385             :                           spdk_blob_shallow_copy_status status_cb_fn, void *status_cb_arg,
    7386             :                           spdk_blob_op_complete cb_fn, void *cb_arg)
    7387             : {
    7388             :         struct shallow_copy_ctx *ctx;
    7389             :         struct spdk_io_channel *ext_channel;
    7390             : 
    7391          16 :         ctx = calloc(1, sizeof(*ctx));
    7392          16 :         if (!ctx) {
    7393           0 :                 return -ENOMEM;
    7394             :         }
    7395             : 
    7396          16 :         ctx->bs = bs;
    7397          16 :         ctx->blobid = blobid;
    7398          16 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7399          16 :         ctx->cpl.u.bs_basic.cb_fn = cb_fn;
    7400          16 :         ctx->cpl.u.bs_basic.cb_arg = cb_arg;
    7401          16 :         ctx->bserrno = 0;
    7402          16 :         ctx->blob_channel = channel;
    7403          16 :         ctx->status_cb = status_cb_fn;
    7404          16 :         ctx->status_cb_arg = status_cb_arg;
    7405          16 :         ctx->read_buff = spdk_malloc(bs->cluster_sz, bs->dev->blocklen, NULL,
    7406             :                                      SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
    7407          16 :         if (!ctx->read_buff) {
    7408           0 :                 free(ctx);
    7409           0 :                 return -ENOMEM;
    7410             :         }
    7411             : 
    7412          16 :         ext_channel = ext_dev->create_channel(ext_dev);
    7413          16 :         if (!ext_channel) {
    7414           0 :                 spdk_free(ctx->read_buff);
    7415           0 :                 free(ctx);
    7416           0 :                 return -ENOMEM;
    7417             :         }
    7418          16 :         ctx->ext_dev = ext_dev;
    7419          16 :         ctx->ext_channel = ext_channel;
    7420             : 
    7421          16 :         spdk_bs_open_blob(ctx->bs, ctx->blobid, bs_shallow_copy_blob_open_cpl, ctx);
    7422             : 
    7423          16 :         return 0;
    7424             : }
    7425             : /* END spdk_bs_blob_shallow_copy */
    7426             : 
    7427             : /* START spdk_bs_blob_set_parent */
    7428             : 
    7429             : struct set_parent_ctx {
    7430             :         struct spdk_blob_store *bs;
    7431             :         int                     bserrno;
    7432             :         spdk_bs_op_complete     cb_fn;
    7433             :         void                    *cb_arg;
    7434             : 
    7435             :         struct spdk_blob        *blob;
    7436             :         bool                    blob_md_ro;
    7437             : 
    7438             :         struct blob_parent      parent;
    7439             : };
    7440             : 
    7441             : static void
    7442          24 : bs_set_parent_cleanup_finish(void *cb_arg, int bserrno)
    7443             : {
    7444          24 :         struct set_parent_ctx *ctx = cb_arg;
    7445             : 
    7446          24 :         assert(ctx != NULL);
    7447             : 
    7448          24 :         if (bserrno != 0) {
    7449           0 :                 SPDK_ERRLOG("blob set parent finish error %d\n", bserrno);
    7450           0 :                 if (ctx->bserrno == 0) {
    7451           0 :                         ctx->bserrno = bserrno;
    7452             :                 }
    7453             :         }
    7454             : 
    7455          24 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
    7456             : 
    7457          24 :         free(ctx);
    7458          24 : }
    7459             : 
    7460             : static void
    7461          20 : bs_set_parent_close_snapshot(void *cb_arg, int bserrno)
    7462             : {
    7463          20 :         struct set_parent_ctx *ctx = cb_arg;
    7464             : 
    7465          20 :         if (ctx->bserrno != 0) {
    7466           8 :                 spdk_blob_close(ctx->parent.u.snapshot.blob, bs_set_parent_cleanup_finish, ctx);
    7467           8 :                 return;
    7468             :         }
    7469             : 
    7470          12 :         if (bserrno != 0) {
    7471           0 :                 SPDK_ERRLOG("blob close error %d\n", bserrno);
    7472           0 :                 ctx->bserrno = bserrno;
    7473             :         }
    7474             : 
    7475          12 :         bs_set_parent_cleanup_finish(ctx, ctx->bserrno);
    7476             : }
    7477             : 
    7478             : static void
    7479          12 : bs_set_parent_close_blob(void *cb_arg, int bserrno)
    7480             : {
    7481          12 :         struct set_parent_ctx *ctx = cb_arg;
    7482          12 :         struct spdk_blob *blob = ctx->blob;
    7483          12 :         struct spdk_blob *snapshot = ctx->parent.u.snapshot.blob;
    7484             : 
    7485          12 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7486           0 :                 SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
    7487           0 :                 ctx->bserrno = bserrno;
    7488             :         }
    7489             : 
    7490             :         /* Revert md_ro to original state */
    7491          12 :         blob->md_ro = ctx->blob_md_ro;
    7492             : 
    7493          12 :         blob->locked_operation_in_progress = false;
    7494          12 :         snapshot->locked_operation_in_progress = false;
    7495             : 
    7496          12 :         spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7497          12 : }
    7498             : 
    7499             : static void
    7500          12 : bs_set_parent_set_back_bs_dev_done(void *cb_arg, int bserrno)
    7501             : {
    7502          12 :         struct set_parent_ctx *ctx = cb_arg;
    7503          12 :         struct spdk_blob *blob = ctx->blob;
    7504             : 
    7505          12 :         if (bserrno != 0) {
    7506           0 :                 SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
    7507           0 :                 ctx->bserrno = bserrno;
    7508           0 :                 bs_set_parent_close_blob(ctx, bserrno);
    7509           0 :                 return;
    7510             :         }
    7511             : 
    7512          12 :         spdk_blob_sync_md(blob, bs_set_parent_close_blob, ctx);
    7513             : }
    7514             : 
    7515             : static int
    7516          12 : bs_set_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
    7517             : {
    7518             :         int rc;
    7519             : 
    7520          12 :         bs_blob_list_remove(blob);
    7521             : 
    7522          12 :         rc = blob_set_xattr(blob, BLOB_SNAPSHOT, &parent->u.snapshot.id, sizeof(spdk_blob_id), true);
    7523          12 :         if (rc != 0) {
    7524           0 :                 SPDK_ERRLOG("error %d setting snapshot xattr\n", rc);
    7525           0 :                 return rc;
    7526             :         }
    7527          12 :         blob->parent_id = parent->u.snapshot.id;
    7528             : 
    7529          12 :         if (blob_is_esnap_clone(blob)) {
    7530             :                 /* Remove the xattr that references the external snapshot */
    7531           4 :                 blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7532           4 :                 blob_remove_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    7533             :         }
    7534             : 
    7535          12 :         bs_blob_list_add(blob);
    7536             : 
    7537          12 :         return 0;
    7538             : }
    7539             : 
    7540             : static void
    7541          20 : bs_set_parent_snapshot_open_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
    7542             : {
    7543          20 :         struct set_parent_ctx *ctx = cb_arg;
    7544          20 :         struct spdk_blob *blob = ctx->blob;
    7545             :         struct spdk_bs_dev *back_bs_dev;
    7546             : 
    7547          20 :         if (bserrno != 0) {
    7548           0 :                 SPDK_ERRLOG("snapshot open error %d\n", bserrno);
    7549           0 :                 ctx->bserrno = bserrno;
    7550           0 :                 spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
    7551           0 :                 return;
    7552             :         }
    7553             : 
    7554          20 :         ctx->parent.u.snapshot.blob = snapshot;
    7555          20 :         ctx->parent.u.snapshot.id = snapshot->id;
    7556             : 
    7557          20 :         if (!spdk_blob_is_snapshot(snapshot)) {
    7558           4 :                 SPDK_ERRLOG("parent blob is not a snapshot\n");
    7559           4 :                 ctx->bserrno = -EINVAL;
    7560           4 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7561           4 :                 return;
    7562             :         }
    7563             : 
    7564          16 :         if (blob->active.num_clusters != snapshot->active.num_clusters) {
    7565           4 :                 SPDK_ERRLOG("parent blob has a number of clusters different from child's ones\n");
    7566           4 :                 ctx->bserrno = -EINVAL;
    7567           4 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7568           4 :                 return;
    7569             :         }
    7570             : 
    7571          12 :         if (blob->locked_operation_in_progress || snapshot->locked_operation_in_progress) {
    7572           0 :                 SPDK_ERRLOG("cannot set parent of blob, another operation in progress\n");
    7573           0 :                 ctx->bserrno = -EBUSY;
    7574           0 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7575           0 :                 return;
    7576             :         }
    7577             : 
    7578          12 :         blob->locked_operation_in_progress = true;
    7579          12 :         snapshot->locked_operation_in_progress = true;
    7580             : 
    7581             :         /* Temporarily override md_ro flag for MD modification */
    7582          12 :         blob->md_ro = false;
    7583             : 
    7584          12 :         back_bs_dev = bs_create_blob_bs_dev(snapshot);
    7585             : 
    7586          12 :         blob_set_back_bs_dev(blob, back_bs_dev, bs_set_parent_refs, &ctx->parent,
    7587             :                              bs_set_parent_set_back_bs_dev_done,
    7588             :                              ctx);
    7589             : }
    7590             : 
    7591             : static void
    7592          24 : bs_set_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    7593             : {
    7594          24 :         struct set_parent_ctx *ctx = cb_arg;
    7595             : 
    7596          24 :         if (bserrno != 0) {
    7597           0 :                 SPDK_ERRLOG("blob open error %d\n", bserrno);
    7598           0 :                 ctx->bserrno = bserrno;
    7599           0 :                 bs_set_parent_cleanup_finish(ctx, 0);
    7600           0 :                 return;
    7601             :         }
    7602             : 
    7603          24 :         if (!spdk_blob_is_thin_provisioned(blob)) {
    7604           4 :                 SPDK_ERRLOG("blob is not thin-provisioned\n");
    7605           4 :                 ctx->bserrno = -EINVAL;
    7606           4 :                 spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
    7607           4 :                 return;
    7608             :         }
    7609             : 
    7610          20 :         ctx->blob = blob;
    7611          20 :         ctx->blob_md_ro = blob->md_ro;
    7612             : 
    7613          20 :         spdk_bs_open_blob(ctx->bs, ctx->parent.u.snapshot.id, bs_set_parent_snapshot_open_cpl, ctx);
    7614             : }
    7615             : 
    7616             : void
    7617          36 : spdk_bs_blob_set_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
    7618             :                         spdk_blob_id snapshot_id, spdk_blob_op_complete cb_fn, void *cb_arg)
    7619             : {
    7620             :         struct set_parent_ctx *ctx;
    7621             : 
    7622          36 :         if (snapshot_id == SPDK_BLOBID_INVALID) {
    7623           4 :                 SPDK_ERRLOG("snapshot id not valid\n");
    7624           4 :                 cb_fn(cb_arg, -EINVAL);
    7625           4 :                 return;
    7626             :         }
    7627             : 
    7628          32 :         if (blob_id == snapshot_id) {
    7629           4 :                 SPDK_ERRLOG("blob id and snapshot id cannot be the same\n");
    7630           4 :                 cb_fn(cb_arg, -EINVAL);
    7631           4 :                 return;
    7632             :         }
    7633             : 
    7634          28 :         if (spdk_blob_get_parent_snapshot(bs, blob_id) == snapshot_id) {
    7635           4 :                 SPDK_NOTICELOG("snapshot is already the parent of blob\n");
    7636           4 :                 cb_fn(cb_arg, -EEXIST);
    7637           4 :                 return;
    7638             :         }
    7639             : 
    7640          24 :         ctx = calloc(1, sizeof(*ctx));
    7641          24 :         if (!ctx) {
    7642           0 :                 cb_fn(cb_arg, -ENOMEM);
    7643           0 :                 return;
    7644             :         }
    7645             : 
    7646          24 :         ctx->bs = bs;
    7647          24 :         ctx->parent.u.snapshot.id = snapshot_id;
    7648          24 :         ctx->cb_fn = cb_fn;
    7649          24 :         ctx->cb_arg = cb_arg;
    7650          24 :         ctx->bserrno = 0;
    7651             : 
    7652          24 :         spdk_bs_open_blob(bs, blob_id, bs_set_parent_blob_open_cpl, ctx);
    7653             : }
    7654             : /* END spdk_bs_blob_set_parent */
    7655             : 
    7656             : /* START spdk_bs_blob_set_external_parent */
    7657             : 
    7658             : static void
    7659          16 : bs_set_external_parent_cleanup_finish(void *cb_arg, int bserrno)
    7660             : {
    7661          16 :         struct set_parent_ctx *ctx = cb_arg;
    7662             : 
    7663          16 :         if (bserrno != 0) {
    7664           0 :                 SPDK_ERRLOG("blob set external parent finish error %d\n", bserrno);
    7665           0 :                 if (ctx->bserrno == 0) {
    7666           0 :                         ctx->bserrno = bserrno;
    7667             :                 }
    7668             :         }
    7669             : 
    7670          16 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
    7671             : 
    7672          16 :         free(ctx->parent.u.esnap.id);
    7673          16 :         free(ctx);
    7674          16 : }
    7675             : 
    7676             : static void
    7677           8 : bs_set_external_parent_close_blob(void *cb_arg, int bserrno)
    7678             : {
    7679           8 :         struct set_parent_ctx *ctx = cb_arg;
    7680           8 :         struct spdk_blob *blob = ctx->blob;
    7681             : 
    7682           8 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7683           0 :                 SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
    7684           0 :                 ctx->bserrno = bserrno;
    7685             :         }
    7686             : 
    7687             :         /* Revert md_ro to original state */
    7688           8 :         blob->md_ro = ctx->blob_md_ro;
    7689             : 
    7690           8 :         blob->locked_operation_in_progress = false;
    7691             : 
    7692           8 :         spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
    7693           8 : }
    7694             : 
    7695             : static void
    7696           8 : bs_set_external_parent_unfrozen(void *cb_arg, int bserrno)
    7697             : {
    7698           8 :         struct set_parent_ctx *ctx = cb_arg;
    7699           8 :         struct spdk_blob *blob = ctx->blob;
    7700             : 
    7701           8 :         if (bserrno != 0) {
    7702           0 :                 SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
    7703           0 :                 ctx->bserrno = bserrno;
    7704           0 :                 bs_set_external_parent_close_blob(ctx, bserrno);
    7705           0 :                 return;
    7706             :         }
    7707             : 
    7708           8 :         spdk_blob_sync_md(blob, bs_set_external_parent_close_blob, ctx);
    7709             : }
    7710             : 
    7711             : static int
    7712           8 : bs_set_external_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
    7713             : {
    7714             :         int rc;
    7715             : 
    7716           8 :         bs_blob_list_remove(blob);
    7717             : 
    7718           8 :         if (spdk_blob_is_clone(blob)) {
    7719             :                 /* Remove the xattr that references the snapshot */
    7720           0 :                 blob->parent_id = SPDK_BLOBID_INVALID;
    7721           0 :                 blob_remove_xattr(blob, BLOB_SNAPSHOT, true);
    7722             :         }
    7723             : 
    7724           8 :         rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, parent->u.esnap.id,
    7725           8 :                             parent->u.esnap.id_len, true);
    7726           8 :         if (rc != 0) {
    7727           0 :                 SPDK_ERRLOG("error %d setting external snapshot xattr\n", rc);
    7728           0 :                 return rc;
    7729             :         }
    7730           8 :         blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7731           8 :         blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    7732             : 
    7733           8 :         bs_blob_list_add(blob);
    7734             : 
    7735           8 :         return 0;
    7736             : }
    7737             : 
    7738             : static void
    7739          16 : bs_set_external_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    7740             : {
    7741          16 :         struct set_parent_ctx *ctx = cb_arg;
    7742          16 :         const void *esnap_id;
    7743          16 :         size_t esnap_id_len;
    7744             :         int rc;
    7745             : 
    7746          16 :         if (bserrno != 0) {
    7747           0 :                 SPDK_ERRLOG("blob open error %d\n", bserrno);
    7748           0 :                 ctx->bserrno = bserrno;
    7749           0 :                 bs_set_parent_cleanup_finish(ctx, 0);
    7750           0 :                 return;
    7751             :         }
    7752             : 
    7753          16 :         ctx->blob = blob;
    7754          16 :         ctx->blob_md_ro = blob->md_ro;
    7755             : 
    7756          16 :         rc = spdk_blob_get_esnap_id(blob, &esnap_id, &esnap_id_len);
    7757          16 :         if (rc == 0 && esnap_id != NULL && esnap_id_len == ctx->parent.u.esnap.id_len &&
    7758           4 :             memcmp(esnap_id, ctx->parent.u.esnap.id, esnap_id_len) == 0) {
    7759           4 :                 SPDK_ERRLOG("external snapshot is already the parent of blob\n");
    7760           4 :                 ctx->bserrno = -EEXIST;
    7761           4 :                 goto error;
    7762             :         }
    7763             : 
    7764          12 :         if (!spdk_blob_is_thin_provisioned(blob)) {
    7765           4 :                 SPDK_ERRLOG("blob is not thin-provisioned\n");
    7766           4 :                 ctx->bserrno = -EINVAL;
    7767           4 :                 goto error;
    7768             :         }
    7769             : 
    7770           8 :         if (blob->locked_operation_in_progress) {
    7771           0 :                 SPDK_ERRLOG("cannot set external parent of blob, another operation in progress\n");
    7772           0 :                 ctx->bserrno = -EBUSY;
    7773           0 :                 goto error;
    7774             :         }
    7775             : 
    7776           8 :         blob->locked_operation_in_progress = true;
    7777             : 
    7778             :         /* Temporarily override md_ro flag for MD modification */
    7779           8 :         blob->md_ro = false;
    7780             : 
    7781           8 :         blob_set_back_bs_dev(blob, ctx->parent.u.esnap.back_bs_dev, bs_set_external_parent_refs,
    7782             :                              &ctx->parent, bs_set_external_parent_unfrozen, ctx);
    7783           8 :         return;
    7784             : 
    7785           8 : error:
    7786           8 :         spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
    7787             : }
    7788             : 
    7789             : void
    7790          24 : spdk_bs_blob_set_external_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
    7791             :                                  struct spdk_bs_dev *esnap_bs_dev, const void *esnap_id,
    7792             :                                  uint32_t esnap_id_len, spdk_blob_op_complete cb_fn, void *cb_arg)
    7793             : {
    7794             :         struct set_parent_ctx *ctx;
    7795             :         uint64_t esnap_dev_size, cluster_sz;
    7796             : 
    7797          24 :         if (sizeof(blob_id) == esnap_id_len && memcmp(&blob_id, esnap_id, sizeof(blob_id)) == 0) {
    7798           4 :                 SPDK_ERRLOG("blob id and external snapshot id cannot be the same\n");
    7799           4 :                 cb_fn(cb_arg, -EINVAL);
    7800           4 :                 return;
    7801             :         }
    7802             : 
    7803          20 :         esnap_dev_size = esnap_bs_dev->blockcnt * esnap_bs_dev->blocklen;
    7804          20 :         cluster_sz = spdk_bs_get_cluster_size(bs);
    7805          20 :         if ((esnap_dev_size % cluster_sz) != 0) {
    7806           4 :                 SPDK_ERRLOG("Esnap device size %" PRIu64 " is not an integer multiple of "
    7807             :                             "cluster size %" PRIu64 "\n", esnap_dev_size, cluster_sz);
    7808           4 :                 cb_fn(cb_arg, -EINVAL);
    7809           4 :                 return;
    7810             :         }
    7811             : 
    7812          16 :         ctx = calloc(1, sizeof(*ctx));
    7813          16 :         if (!ctx) {
    7814           0 :                 cb_fn(cb_arg, -ENOMEM);
    7815           0 :                 return;
    7816             :         }
    7817             : 
    7818          16 :         ctx->parent.u.esnap.id = calloc(1, esnap_id_len);
    7819          16 :         if (!ctx->parent.u.esnap.id) {
    7820           0 :                 free(ctx);
    7821           0 :                 cb_fn(cb_arg, -ENOMEM);
    7822           0 :                 return;
    7823             :         }
    7824             : 
    7825          16 :         ctx->bs = bs;
    7826          16 :         ctx->parent.u.esnap.back_bs_dev = esnap_bs_dev;
    7827          16 :         memcpy(ctx->parent.u.esnap.id, esnap_id, esnap_id_len);
    7828          16 :         ctx->parent.u.esnap.id_len = esnap_id_len;
    7829          16 :         ctx->cb_fn = cb_fn;
    7830          16 :         ctx->cb_arg = cb_arg;
    7831          16 :         ctx->bserrno = 0;
    7832             : 
    7833          16 :         spdk_bs_open_blob(bs, blob_id, bs_set_external_parent_blob_open_cpl, ctx);
    7834             : }
    7835             : /* END spdk_bs_blob_set_external_parent */
    7836             : 
    7837             : /* START spdk_blob_resize */
    7838             : struct spdk_bs_resize_ctx {
    7839             :         spdk_blob_op_complete cb_fn;
    7840             :         void *cb_arg;
    7841             :         struct spdk_blob *blob;
    7842             :         uint64_t sz;
    7843             :         int rc;
    7844             : };
    7845             : 
    7846             : static void
    7847         206 : bs_resize_unfreeze_cpl(void *cb_arg, int rc)
    7848             : {
    7849         206 :         struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
    7850             : 
    7851         206 :         if (rc != 0) {
    7852           0 :                 SPDK_ERRLOG("Unfreeze failed, rc=%d\n", rc);
    7853             :         }
    7854             : 
    7855         206 :         if (ctx->rc != 0) {
    7856           4 :                 SPDK_ERRLOG("Unfreeze failed, ctx->rc=%d\n", ctx->rc);
    7857           4 :                 rc = ctx->rc;
    7858             :         }
    7859             : 
    7860         206 :         ctx->blob->locked_operation_in_progress = false;
    7861             : 
    7862         206 :         ctx->cb_fn(ctx->cb_arg, rc);
    7863         206 :         free(ctx);
    7864         206 : }
    7865             : 
    7866             : static void
    7867         206 : bs_resize_freeze_cpl(void *cb_arg, int rc)
    7868             : {
    7869         206 :         struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
    7870             : 
    7871         206 :         if (rc != 0) {
    7872           0 :                 ctx->blob->locked_operation_in_progress = false;
    7873           0 :                 ctx->cb_fn(ctx->cb_arg, rc);
    7874           0 :                 free(ctx);
    7875           0 :                 return;
    7876             :         }
    7877             : 
    7878         206 :         ctx->rc = blob_resize(ctx->blob, ctx->sz);
    7879             : 
    7880         206 :         blob_unfreeze_io(ctx->blob, bs_resize_unfreeze_cpl, ctx);
    7881             : }
    7882             : 
    7883             : void
    7884         220 : spdk_blob_resize(struct spdk_blob *blob, uint64_t sz, spdk_blob_op_complete cb_fn, void *cb_arg)
    7885             : {
    7886             :         struct spdk_bs_resize_ctx *ctx;
    7887             : 
    7888         220 :         blob_verify_md_op(blob);
    7889             : 
    7890         220 :         SPDK_DEBUGLOG(blob, "Resizing blob 0x%" PRIx64 " to %" PRIu64 " clusters\n", blob->id, sz);
    7891             : 
    7892         220 :         if (blob->md_ro) {
    7893           4 :                 cb_fn(cb_arg, -EPERM);
    7894           4 :                 return;
    7895             :         }
    7896             : 
    7897         216 :         if (sz == blob->active.num_clusters) {
    7898          10 :                 cb_fn(cb_arg, 0);
    7899          10 :                 return;
    7900             :         }
    7901             : 
    7902         206 :         if (blob->locked_operation_in_progress) {
    7903           0 :                 cb_fn(cb_arg, -EBUSY);
    7904           0 :                 return;
    7905             :         }
    7906             : 
    7907         206 :         ctx = calloc(1, sizeof(*ctx));
    7908         206 :         if (!ctx) {
    7909           0 :                 cb_fn(cb_arg, -ENOMEM);
    7910           0 :                 return;
    7911             :         }
    7912             : 
    7913         206 :         blob->locked_operation_in_progress = true;
    7914         206 :         ctx->cb_fn = cb_fn;
    7915         206 :         ctx->cb_arg = cb_arg;
    7916         206 :         ctx->blob = blob;
    7917         206 :         ctx->sz = sz;
    7918         206 :         blob_freeze_io(blob, bs_resize_freeze_cpl, ctx);
    7919             : }
    7920             : 
    7921             : /* END spdk_blob_resize */
    7922             : 
    7923             : 
    7924             : /* START spdk_bs_delete_blob */
    7925             : 
    7926             : static void
    7927        1492 : bs_delete_close_cpl(void *cb_arg, int bserrno)
    7928             : {
    7929        1492 :         spdk_bs_sequence_t *seq = cb_arg;
    7930             : 
    7931        1492 :         bs_sequence_finish(seq, bserrno);
    7932        1492 : }
    7933             : 
    7934             : static void
    7935        1492 : bs_delete_persist_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    7936             : {
    7937        1492 :         struct spdk_blob *blob = cb_arg;
    7938             : 
    7939        1492 :         if (bserrno != 0) {
    7940             :                 /*
    7941             :                  * We already removed this blob from the blobstore tailq, so
    7942             :                  *  we need to free it here since this is the last reference
    7943             :                  *  to it.
    7944             :                  */
    7945           0 :                 blob_free(blob);
    7946           0 :                 bs_delete_close_cpl(seq, bserrno);
    7947           0 :                 return;
    7948             :         }
    7949             : 
    7950             :         /*
    7951             :          * This will immediately decrement the ref_count and call
    7952             :          *  the completion routine since the metadata state is clean.
    7953             :          *  By calling spdk_blob_close, we reduce the number of call
    7954             :          *  points into code that touches the blob->open_ref count
    7955             :          *  and the blobstore's blob list.
    7956             :          */
    7957        1492 :         spdk_blob_close(blob, bs_delete_close_cpl, seq);
    7958             : }
    7959             : 
    7960             : struct delete_snapshot_ctx {
    7961             :         struct spdk_blob_list *parent_snapshot_entry;
    7962             :         struct spdk_blob *snapshot;
    7963             :         struct spdk_blob_md_page *page;
    7964             :         bool snapshot_md_ro;
    7965             :         struct spdk_blob *clone;
    7966             :         bool clone_md_ro;
    7967             :         spdk_blob_op_with_handle_complete cb_fn;
    7968             :         void *cb_arg;
    7969             :         int bserrno;
    7970             :         uint32_t next_extent_page;
    7971             : };
    7972             : 
    7973             : static void
    7974         110 : delete_blob_cleanup_finish(void *cb_arg, int bserrno)
    7975             : {
    7976         110 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7977             : 
    7978         110 :         if (bserrno != 0) {
    7979           0 :                 SPDK_ERRLOG("Snapshot cleanup error %d\n", bserrno);
    7980             :         }
    7981             : 
    7982         110 :         assert(ctx != NULL);
    7983             : 
    7984         110 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7985           0 :                 ctx->bserrno = bserrno;
    7986             :         }
    7987             : 
    7988         110 :         ctx->cb_fn(ctx->cb_arg, ctx->snapshot, ctx->bserrno);
    7989         110 :         spdk_free(ctx->page);
    7990         110 :         free(ctx);
    7991         110 : }
    7992             : 
    7993             : static void
    7994          22 : delete_snapshot_cleanup_snapshot(void *cb_arg, int bserrno)
    7995             : {
    7996          22 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7997             : 
    7998          22 :         if (bserrno != 0) {
    7999           0 :                 ctx->bserrno = bserrno;
    8000           0 :                 SPDK_ERRLOG("Clone cleanup error %d\n", bserrno);
    8001             :         }
    8002             : 
    8003          22 :         if (ctx->bserrno != 0) {
    8004          22 :                 assert(blob_lookup(ctx->snapshot->bs, ctx->snapshot->id) == NULL);
    8005          22 :                 RB_INSERT(spdk_blob_tree, &ctx->snapshot->bs->open_blobs, ctx->snapshot);
    8006          22 :                 spdk_bit_array_set(ctx->snapshot->bs->open_blobids, ctx->snapshot->id);
    8007             :         }
    8008             : 
    8009          22 :         ctx->snapshot->locked_operation_in_progress = false;
    8010          22 :         ctx->snapshot->md_ro = ctx->snapshot_md_ro;
    8011             : 
    8012          22 :         spdk_blob_close(ctx->snapshot, delete_blob_cleanup_finish, ctx);
    8013          22 : }
    8014             : 
    8015             : static void
    8016          12 : delete_snapshot_cleanup_clone(void *cb_arg, int bserrno)
    8017             : {
    8018          12 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8019             : 
    8020          12 :         ctx->clone->locked_operation_in_progress = false;
    8021          12 :         ctx->clone->md_ro = ctx->clone_md_ro;
    8022             : 
    8023          12 :         spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
    8024          12 : }
    8025             : 
    8026             : static void
    8027          48 : delete_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
    8028             : {
    8029          48 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8030             : 
    8031          48 :         if (bserrno) {
    8032           0 :                 ctx->bserrno = bserrno;
    8033           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8034           0 :                 return;
    8035             :         }
    8036             : 
    8037          48 :         ctx->clone->locked_operation_in_progress = false;
    8038          48 :         spdk_blob_close(ctx->clone, delete_blob_cleanup_finish, ctx);
    8039             : }
    8040             : 
    8041             : static void
    8042          52 : delete_snapshot_sync_snapshot_cpl(void *cb_arg, int bserrno)
    8043             : {
    8044          52 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8045          52 :         struct spdk_blob_list *parent_snapshot_entry = NULL;
    8046          52 :         struct spdk_blob_list *snapshot_entry = NULL;
    8047          52 :         struct spdk_blob_list *clone_entry = NULL;
    8048          52 :         struct spdk_blob_list *snapshot_clone_entry = NULL;
    8049             : 
    8050          52 :         if (bserrno) {
    8051           4 :                 SPDK_ERRLOG("Failed to sync MD on blob\n");
    8052           4 :                 ctx->bserrno = bserrno;
    8053           4 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8054           4 :                 return;
    8055             :         }
    8056             : 
    8057             :         /* Get snapshot entry for the snapshot we want to remove */
    8058          48 :         snapshot_entry = bs_get_snapshot_entry(ctx->snapshot->bs, ctx->snapshot->id);
    8059             : 
    8060          48 :         assert(snapshot_entry != NULL);
    8061             : 
    8062             :         /* Remove clone entry in this snapshot (at this point there can be only one clone) */
    8063          48 :         clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8064          48 :         assert(clone_entry != NULL);
    8065          48 :         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    8066          48 :         snapshot_entry->clone_count--;
    8067          48 :         assert(TAILQ_EMPTY(&snapshot_entry->clones));
    8068             : 
    8069          48 :         switch (ctx->snapshot->parent_id) {
    8070          40 :         case SPDK_BLOBID_INVALID:
    8071             :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    8072             :                 /* No parent snapshot - just remove clone entry */
    8073          40 :                 free(clone_entry);
    8074          40 :                 break;
    8075           8 :         default:
    8076             :                 /* This snapshot is at the same time a clone of another snapshot - we need to
    8077             :                  * update parent snapshot (remove current clone, add new one inherited from
    8078             :                  * the snapshot that is being removed) */
    8079             : 
    8080             :                 /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
    8081             :                  * snapshot that we are removing */
    8082           8 :                 blob_get_snapshot_and_clone_entries(ctx->snapshot, &parent_snapshot_entry,
    8083             :                                                     &snapshot_clone_entry);
    8084             : 
    8085             :                 /* Switch clone entry in parent snapshot */
    8086           8 :                 TAILQ_INSERT_TAIL(&parent_snapshot_entry->clones, clone_entry, link);
    8087           8 :                 TAILQ_REMOVE(&parent_snapshot_entry->clones, snapshot_clone_entry, link);
    8088           8 :                 free(snapshot_clone_entry);
    8089             :         }
    8090             : 
    8091             :         /* Restore md_ro flags */
    8092          48 :         ctx->clone->md_ro = ctx->clone_md_ro;
    8093          48 :         ctx->snapshot->md_ro = ctx->snapshot_md_ro;
    8094             : 
    8095          48 :         blob_unfreeze_io(ctx->clone, delete_snapshot_unfreeze_cpl, ctx);
    8096             : }
    8097             : 
    8098             : static void
    8099          56 : delete_snapshot_sync_clone_cpl(void *cb_arg, int bserrno)
    8100             : {
    8101          56 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8102             :         uint64_t i;
    8103             : 
    8104          56 :         ctx->snapshot->md_ro = false;
    8105             : 
    8106          56 :         if (bserrno) {
    8107           4 :                 SPDK_ERRLOG("Failed to sync MD on clone\n");
    8108           4 :                 ctx->bserrno = bserrno;
    8109             : 
    8110             :                 /* Restore snapshot to previous state */
    8111           4 :                 bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
    8112           4 :                 if (bserrno != 0) {
    8113           0 :                         delete_snapshot_cleanup_clone(ctx, bserrno);
    8114           0 :                         return;
    8115             :                 }
    8116             : 
    8117           4 :                 spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
    8118           4 :                 return;
    8119             :         }
    8120             : 
    8121             :         /* Clear cluster map entries for snapshot */
    8122         552 :         for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
    8123         500 :                 if (ctx->clone->active.clusters[i] == ctx->snapshot->active.clusters[i]) {
    8124         492 :                         if (ctx->snapshot->active.clusters[i] != 0) {
    8125         328 :                                 ctx->snapshot->active.num_allocated_clusters--;
    8126             :                         }
    8127         492 :                         ctx->snapshot->active.clusters[i] = 0;
    8128             :                 }
    8129             :         }
    8130          78 :         for (i = 0; i < ctx->snapshot->active.num_extent_pages &&
    8131          52 :              i < ctx->clone->active.num_extent_pages; i++) {
    8132          26 :                 if (ctx->clone->active.extent_pages[i] == ctx->snapshot->active.extent_pages[i]) {
    8133          24 :                         ctx->snapshot->active.extent_pages[i] = 0;
    8134             :                 }
    8135             :         }
    8136             : 
    8137          52 :         blob_set_thin_provision(ctx->snapshot);
    8138          52 :         ctx->snapshot->state = SPDK_BLOB_STATE_DIRTY;
    8139             : 
    8140          52 :         if (ctx->parent_snapshot_entry != NULL) {
    8141           8 :                 ctx->snapshot->back_bs_dev = NULL;
    8142             :         }
    8143             : 
    8144          52 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_cpl, ctx);
    8145             : }
    8146             : 
    8147             : static void
    8148          56 : delete_snapshot_update_extent_pages_cpl(struct delete_snapshot_ctx *ctx)
    8149             : {
    8150             :         int bserrno;
    8151             : 
    8152             :         /* Delete old backing bs_dev from clone (related to snapshot that will be removed) */
    8153          56 :         blob_back_bs_destroy(ctx->clone);
    8154             : 
    8155             :         /* Set/remove snapshot xattr and switch parent ID and backing bs_dev on clone... */
    8156          56 :         if (ctx->snapshot->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    8157           8 :                 bserrno = bs_snapshot_copy_xattr(ctx->clone, ctx->snapshot,
    8158             :                                                  BLOB_EXTERNAL_SNAPSHOT_ID);
    8159           8 :                 if (bserrno != 0) {
    8160           0 :                         ctx->bserrno = bserrno;
    8161             : 
    8162             :                         /* Restore snapshot to previous state */
    8163           0 :                         bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
    8164           0 :                         if (bserrno != 0) {
    8165           0 :                                 delete_snapshot_cleanup_clone(ctx, bserrno);
    8166           0 :                                 return;
    8167             :                         }
    8168             : 
    8169           0 :                         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
    8170           0 :                         return;
    8171             :                 }
    8172           8 :                 ctx->clone->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    8173           8 :                 ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
    8174             :                 /* Do not delete the external snapshot along with this snapshot */
    8175           8 :                 ctx->snapshot->back_bs_dev = NULL;
    8176           8 :                 ctx->clone->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    8177          48 :         } else if (ctx->parent_snapshot_entry != NULL) {
    8178             :                 /* ...to parent snapshot */
    8179           8 :                 ctx->clone->parent_id = ctx->parent_snapshot_entry->id;
    8180           8 :                 ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
    8181           8 :                 blob_set_xattr(ctx->clone, BLOB_SNAPSHOT, &ctx->parent_snapshot_entry->id,
    8182             :                                sizeof(spdk_blob_id),
    8183             :                                true);
    8184             :         } else {
    8185             :                 /* ...to blobid invalid and zeroes dev */
    8186          40 :                 ctx->clone->parent_id = SPDK_BLOBID_INVALID;
    8187          40 :                 ctx->clone->back_bs_dev = bs_create_zeroes_dev();
    8188          40 :                 blob_remove_xattr(ctx->clone, BLOB_SNAPSHOT, true);
    8189             :         }
    8190             : 
    8191          56 :         spdk_blob_sync_md(ctx->clone, delete_snapshot_sync_clone_cpl, ctx);
    8192             : }
    8193             : 
    8194             : static void
    8195          58 : delete_snapshot_update_extent_pages(void *cb_arg, int bserrno)
    8196             : {
    8197          58 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8198             :         uint32_t *extent_page;
    8199             :         uint64_t i;
    8200             : 
    8201          84 :         for (i = ctx->next_extent_page; i < ctx->snapshot->active.num_extent_pages &&
    8202          54 :              i < ctx->clone->active.num_extent_pages; i++) {
    8203          28 :                 if (ctx->snapshot->active.extent_pages[i] == 0) {
    8204             :                         /* No extent page to use from snapshot */
    8205           8 :                         continue;
    8206             :                 }
    8207             : 
    8208          20 :                 extent_page = &ctx->clone->active.extent_pages[i];
    8209          20 :                 if (*extent_page == 0) {
    8210             :                         /* Copy extent page from snapshot when clone did not have a matching one */
    8211          18 :                         *extent_page = ctx->snapshot->active.extent_pages[i];
    8212          18 :                         continue;
    8213             :                 }
    8214             : 
    8215             :                 /* Clone and snapshot both contain partially filled matching extent pages.
    8216             :                  * Update the clone extent page in place with cluster map containing the mix of both. */
    8217           2 :                 ctx->next_extent_page = i + 1;
    8218           2 :                 memset(ctx->page, 0, SPDK_BS_PAGE_SIZE);
    8219             : 
    8220           2 :                 blob_write_extent_page(ctx->clone, *extent_page, i * SPDK_EXTENTS_PER_EP, ctx->page,
    8221             :                                        delete_snapshot_update_extent_pages, ctx);
    8222           2 :                 return;
    8223             :         }
    8224          56 :         delete_snapshot_update_extent_pages_cpl(ctx);
    8225             : }
    8226             : 
    8227             : static void
    8228          60 : delete_snapshot_sync_snapshot_xattr_cpl(void *cb_arg, int bserrno)
    8229             : {
    8230          60 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8231             :         uint64_t i;
    8232             : 
    8233             :         /* Temporarily override md_ro flag for clone for MD modification */
    8234          60 :         ctx->clone_md_ro = ctx->clone->md_ro;
    8235          60 :         ctx->clone->md_ro = false;
    8236             : 
    8237          60 :         if (bserrno) {
    8238           4 :                 SPDK_ERRLOG("Failed to sync MD with xattr on blob\n");
    8239           4 :                 ctx->bserrno = bserrno;
    8240           4 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8241           4 :                 return;
    8242             :         }
    8243             : 
    8244             :         /* Copy snapshot map to clone map (only unallocated clusters in clone) */
    8245         596 :         for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
    8246         540 :                 if (ctx->clone->active.clusters[i] == 0) {
    8247         532 :                         ctx->clone->active.clusters[i] = ctx->snapshot->active.clusters[i];
    8248         532 :                         if (ctx->clone->active.clusters[i] != 0) {
    8249         368 :                                 ctx->clone->active.num_allocated_clusters++;
    8250             :                         }
    8251             :                 }
    8252             :         }
    8253          56 :         ctx->next_extent_page = 0;
    8254          56 :         delete_snapshot_update_extent_pages(ctx, 0);
    8255             : }
    8256             : 
    8257             : static void
    8258           8 : delete_snapshot_esnap_channels_destroyed_cb(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8259             : {
    8260           8 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8261             : 
    8262           8 :         if (bserrno != 0) {
    8263           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to destroy esnap channels: %d\n",
    8264             :                             blob->id, bserrno);
    8265             :                 /* That error should not stop us from syncing metadata. */
    8266             :         }
    8267             : 
    8268           8 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
    8269           8 : }
    8270             : 
    8271             : static void
    8272          60 : delete_snapshot_freeze_io_cb(void *cb_arg, int bserrno)
    8273             : {
    8274          60 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8275             : 
    8276          60 :         if (bserrno) {
    8277           0 :                 SPDK_ERRLOG("Failed to freeze I/O on clone\n");
    8278           0 :                 ctx->bserrno = bserrno;
    8279           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8280           0 :                 return;
    8281             :         }
    8282             : 
    8283             :         /* Temporarily override md_ro flag for snapshot for MD modification */
    8284          60 :         ctx->snapshot_md_ro = ctx->snapshot->md_ro;
    8285          60 :         ctx->snapshot->md_ro = false;
    8286             : 
    8287             :         /* Mark blob as pending for removal for power failure safety, use clone id for recovery */
    8288          60 :         ctx->bserrno = blob_set_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, &ctx->clone->id,
    8289             :                                       sizeof(spdk_blob_id), true);
    8290          60 :         if (ctx->bserrno != 0) {
    8291           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8292           0 :                 return;
    8293             :         }
    8294             : 
    8295          60 :         if (blob_is_esnap_clone(ctx->snapshot)) {
    8296           8 :                 blob_esnap_destroy_bs_dev_channels(ctx->snapshot, false,
    8297             :                                                    delete_snapshot_esnap_channels_destroyed_cb,
    8298             :                                                    ctx);
    8299           8 :                 return;
    8300             :         }
    8301             : 
    8302          52 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
    8303             : }
    8304             : 
    8305             : static void
    8306          70 : delete_snapshot_open_clone_cb(void *cb_arg, struct spdk_blob *clone, int bserrno)
    8307             : {
    8308          70 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8309             : 
    8310          70 :         if (bserrno) {
    8311          10 :                 SPDK_ERRLOG("Failed to open clone\n");
    8312          10 :                 ctx->bserrno = bserrno;
    8313          10 :                 delete_snapshot_cleanup_snapshot(ctx, 0);
    8314          10 :                 return;
    8315             :         }
    8316             : 
    8317          60 :         ctx->clone = clone;
    8318             : 
    8319          60 :         if (clone->locked_operation_in_progress) {
    8320           0 :                 SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress on its clone\n");
    8321           0 :                 ctx->bserrno = -EBUSY;
    8322           0 :                 spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
    8323           0 :                 return;
    8324             :         }
    8325             : 
    8326          60 :         clone->locked_operation_in_progress = true;
    8327             : 
    8328          60 :         blob_freeze_io(clone, delete_snapshot_freeze_io_cb, ctx);
    8329             : }
    8330             : 
    8331             : static void
    8332          70 : update_clone_on_snapshot_deletion(struct spdk_blob *snapshot, struct delete_snapshot_ctx *ctx)
    8333             : {
    8334          70 :         struct spdk_blob_list *snapshot_entry = NULL;
    8335          70 :         struct spdk_blob_list *clone_entry = NULL;
    8336          70 :         struct spdk_blob_list *snapshot_clone_entry = NULL;
    8337             : 
    8338             :         /* Get snapshot entry for the snapshot we want to remove */
    8339          70 :         snapshot_entry = bs_get_snapshot_entry(snapshot->bs, snapshot->id);
    8340             : 
    8341          70 :         assert(snapshot_entry != NULL);
    8342             : 
    8343             :         /* Get clone of the snapshot (at this point there can be only one clone) */
    8344          70 :         clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8345          70 :         assert(snapshot_entry->clone_count == 1);
    8346          70 :         assert(clone_entry != NULL);
    8347             : 
    8348             :         /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
    8349             :          * snapshot that we are removing */
    8350          70 :         blob_get_snapshot_and_clone_entries(snapshot, &ctx->parent_snapshot_entry,
    8351             :                                             &snapshot_clone_entry);
    8352             : 
    8353          70 :         spdk_bs_open_blob(snapshot->bs, clone_entry->id, delete_snapshot_open_clone_cb, ctx);
    8354          70 : }
    8355             : 
    8356             : static void
    8357        1554 : bs_delete_blob_finish(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8358             : {
    8359        1554 :         spdk_bs_sequence_t *seq = cb_arg;
    8360        1554 :         struct spdk_blob_list *snapshot_entry = NULL;
    8361             :         uint32_t page_num;
    8362             : 
    8363        1554 :         if (bserrno) {
    8364          62 :                 SPDK_ERRLOG("Failed to remove blob\n");
    8365          62 :                 bs_sequence_finish(seq, bserrno);
    8366          62 :                 return;
    8367             :         }
    8368             : 
    8369             :         /* Remove snapshot from the list */
    8370        1492 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    8371        1492 :         if (snapshot_entry != NULL) {
    8372         144 :                 TAILQ_REMOVE(&blob->bs->snapshots, snapshot_entry, link);
    8373         144 :                 free(snapshot_entry);
    8374             :         }
    8375             : 
    8376        1492 :         page_num = bs_blobid_to_page(blob->id);
    8377        1492 :         spdk_bit_array_clear(blob->bs->used_blobids, page_num);
    8378        1492 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    8379        1492 :         blob->active.num_pages = 0;
    8380        1492 :         blob_resize(blob, 0);
    8381             : 
    8382        1492 :         blob_persist(seq, blob, bs_delete_persist_cpl, blob);
    8383             : }
    8384             : 
    8385             : static int
    8386        1554 : bs_is_blob_deletable(struct spdk_blob *blob, bool *update_clone)
    8387             : {
    8388        1554 :         struct spdk_blob_list *snapshot_entry = NULL;
    8389        1554 :         struct spdk_blob_list *clone_entry = NULL;
    8390        1554 :         struct spdk_blob *clone = NULL;
    8391        1554 :         bool has_one_clone = false;
    8392             : 
    8393             :         /* Check if this is a snapshot with clones */
    8394        1554 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    8395        1554 :         if (snapshot_entry != NULL) {
    8396         194 :                 if (snapshot_entry->clone_count > 1) {
    8397          24 :                         SPDK_ERRLOG("Cannot remove snapshot with more than one clone\n");
    8398          24 :                         return -EBUSY;
    8399         170 :                 } else if (snapshot_entry->clone_count == 1) {
    8400          70 :                         has_one_clone = true;
    8401             :                 }
    8402             :         }
    8403             : 
    8404             :         /* Check if someone has this blob open (besides this delete context):
    8405             :          * - open_ref = 1 - only this context opened blob, so it is ok to remove it
    8406             :          * - open_ref <= 2 && has_one_clone = true - clone is holding snapshot
    8407             :          *      and that is ok, because we will update it accordingly */
    8408        1530 :         if (blob->open_ref <= 2 && has_one_clone) {
    8409          70 :                 clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8410          70 :                 assert(clone_entry != NULL);
    8411          70 :                 clone = blob_lookup(blob->bs, clone_entry->id);
    8412             : 
    8413          70 :                 if (blob->open_ref == 2 && clone == NULL) {
    8414             :                         /* Clone is closed and someone else opened this blob */
    8415           0 :                         SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
    8416           0 :                         return -EBUSY;
    8417             :                 }
    8418             : 
    8419          70 :                 *update_clone = true;
    8420          70 :                 return 0;
    8421             :         }
    8422             : 
    8423        1460 :         if (blob->open_ref > 1) {
    8424          16 :                 SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
    8425          16 :                 return -EBUSY;
    8426             :         }
    8427             : 
    8428        1444 :         assert(has_one_clone == false);
    8429        1444 :         *update_clone = false;
    8430        1444 :         return 0;
    8431             : }
    8432             : 
    8433             : static void
    8434           0 : bs_delete_enomem_close_cpl(void *cb_arg, int bserrno)
    8435             : {
    8436           0 :         spdk_bs_sequence_t *seq = cb_arg;
    8437             : 
    8438           0 :         bs_sequence_finish(seq, -ENOMEM);
    8439           0 : }
    8440             : 
    8441             : static void
    8442        1564 : bs_delete_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8443             : {
    8444        1564 :         spdk_bs_sequence_t *seq = cb_arg;
    8445             :         struct delete_snapshot_ctx *ctx;
    8446        1564 :         bool update_clone = false;
    8447             : 
    8448        1564 :         if (bserrno != 0) {
    8449          10 :                 bs_sequence_finish(seq, bserrno);
    8450          10 :                 return;
    8451             :         }
    8452             : 
    8453        1554 :         blob_verify_md_op(blob);
    8454             : 
    8455        1554 :         ctx = calloc(1, sizeof(*ctx));
    8456        1554 :         if (ctx == NULL) {
    8457           0 :                 spdk_blob_close(blob, bs_delete_enomem_close_cpl, seq);
    8458           0 :                 return;
    8459             :         }
    8460             : 
    8461        1554 :         ctx->snapshot = blob;
    8462        1554 :         ctx->cb_fn = bs_delete_blob_finish;
    8463        1554 :         ctx->cb_arg = seq;
    8464             : 
    8465             :         /* Check if blob can be removed and if it is a snapshot with clone on top of it */
    8466        1554 :         ctx->bserrno = bs_is_blob_deletable(blob, &update_clone);
    8467        1554 :         if (ctx->bserrno) {
    8468          40 :                 spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8469          40 :                 return;
    8470             :         }
    8471             : 
    8472        1514 :         if (blob->locked_operation_in_progress) {
    8473           0 :                 SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress\n");
    8474           0 :                 ctx->bserrno = -EBUSY;
    8475           0 :                 spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8476           0 :                 return;
    8477             :         }
    8478             : 
    8479        1514 :         blob->locked_operation_in_progress = true;
    8480             : 
    8481             :         /*
    8482             :          * Remove the blob from the blob_store list now, to ensure it does not
    8483             :          *  get returned after this point by blob_lookup().
    8484             :          */
    8485        1514 :         spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
    8486        1514 :         RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
    8487             : 
    8488        1514 :         if (update_clone) {
    8489          70 :                 ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    8490          70 :                 if (!ctx->page) {
    8491           0 :                         ctx->bserrno = -ENOMEM;
    8492           0 :                         spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8493           0 :                         return;
    8494             :                 }
    8495             :                 /* This blob is a snapshot with active clone - update clone first */
    8496          70 :                 update_clone_on_snapshot_deletion(blob, ctx);
    8497             :         } else {
    8498             :                 /* This blob does not have any clones - just remove it */
    8499        1444 :                 bs_blob_list_remove(blob);
    8500        1444 :                 bs_delete_blob_finish(seq, blob, 0);
    8501        1444 :                 free(ctx);
    8502             :         }
    8503             : }
    8504             : 
    8505             : void
    8506        1564 : spdk_bs_delete_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8507             :                     spdk_blob_op_complete cb_fn, void *cb_arg)
    8508             : {
    8509        1564 :         struct spdk_bs_cpl      cpl;
    8510             :         spdk_bs_sequence_t      *seq;
    8511             : 
    8512        1564 :         SPDK_DEBUGLOG(blob, "Deleting blob 0x%" PRIx64 "\n", blobid);
    8513             : 
    8514        1564 :         assert(spdk_get_thread() == bs->md_thread);
    8515             : 
    8516        1564 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8517        1564 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8518        1564 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8519             : 
    8520        1564 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    8521        1564 :         if (!seq) {
    8522           0 :                 cb_fn(cb_arg, -ENOMEM);
    8523           0 :                 return;
    8524             :         }
    8525             : 
    8526        1564 :         spdk_bs_open_blob(bs, blobid, bs_delete_open_cpl, seq);
    8527             : }
    8528             : 
    8529             : /* END spdk_bs_delete_blob */
    8530             : 
    8531             : /* START spdk_bs_open_blob */
    8532             : 
    8533             : static void
    8534        3474 : bs_open_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8535             : {
    8536        3474 :         struct spdk_blob *blob = cb_arg;
    8537             :         struct spdk_blob *existing;
    8538             : 
    8539        3474 :         if (bserrno != 0) {
    8540          64 :                 blob_free(blob);
    8541          64 :                 seq->cpl.u.blob_handle.blob = NULL;
    8542          64 :                 bs_sequence_finish(seq, bserrno);
    8543          64 :                 return;
    8544             :         }
    8545             : 
    8546        3410 :         existing = blob_lookup(blob->bs, blob->id);
    8547        3410 :         if (existing) {
    8548           4 :                 blob_free(blob);
    8549           4 :                 existing->open_ref++;
    8550           4 :                 seq->cpl.u.blob_handle.blob = existing;
    8551           4 :                 bs_sequence_finish(seq, 0);
    8552           4 :                 return;
    8553             :         }
    8554             : 
    8555        3406 :         blob->open_ref++;
    8556             : 
    8557        3406 :         spdk_bit_array_set(blob->bs->open_blobids, blob->id);
    8558        3406 :         RB_INSERT(spdk_blob_tree, &blob->bs->open_blobs, blob);
    8559             : 
    8560        3406 :         bs_sequence_finish(seq, bserrno);
    8561             : }
    8562             : 
    8563             : static inline void
    8564           4 : blob_open_opts_copy(const struct spdk_blob_open_opts *src, struct spdk_blob_open_opts *dst)
    8565             : {
    8566             : #define FIELD_OK(field) \
    8567             :         offsetof(struct spdk_blob_open_opts, field) + sizeof(src->field) <= src->opts_size
    8568             : 
    8569             : #define SET_FIELD(field) \
    8570             :         if (FIELD_OK(field)) { \
    8571             :                 dst->field = src->field; \
    8572             :         } \
    8573             : 
    8574           4 :         SET_FIELD(clear_method);
    8575           4 :         SET_FIELD(esnap_ctx);
    8576             : 
    8577           4 :         dst->opts_size = src->opts_size;
    8578             : 
    8579             :         /* You should not remove this statement, but need to update the assert statement
    8580             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    8581             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_open_opts) == 24, "Incorrect size");
    8582             : 
    8583             : #undef FIELD_OK
    8584             : #undef SET_FIELD
    8585           4 : }
    8586             : 
    8587             : static void
    8588        4283 : bs_open_blob(struct spdk_blob_store *bs,
    8589             :              spdk_blob_id blobid,
    8590             :              struct spdk_blob_open_opts *opts,
    8591             :              spdk_blob_op_with_handle_complete cb_fn,
    8592             :              void *cb_arg)
    8593             : {
    8594             :         struct spdk_blob                *blob;
    8595        4283 :         struct spdk_bs_cpl              cpl;
    8596        4283 :         struct spdk_blob_open_opts      opts_local;
    8597             :         spdk_bs_sequence_t              *seq;
    8598             :         uint32_t                        page_num;
    8599             : 
    8600        4283 :         SPDK_DEBUGLOG(blob, "Opening blob 0x%" PRIx64 "\n", blobid);
    8601        4283 :         assert(spdk_get_thread() == bs->md_thread);
    8602             : 
    8603        4283 :         page_num = bs_blobid_to_page(blobid);
    8604        4283 :         if (spdk_bit_array_get(bs->used_blobids, page_num) == false) {
    8605             :                 /* Invalid blobid */
    8606          48 :                 cb_fn(cb_arg, NULL, -ENOENT);
    8607          48 :                 return;
    8608             :         }
    8609             : 
    8610        4235 :         blob = blob_lookup(bs, blobid);
    8611        4235 :         if (blob) {
    8612         761 :                 blob->open_ref++;
    8613         761 :                 cb_fn(cb_arg, blob, 0);
    8614         761 :                 return;
    8615             :         }
    8616             : 
    8617        3474 :         blob = blob_alloc(bs, blobid);
    8618        3474 :         if (!blob) {
    8619           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    8620           0 :                 return;
    8621             :         }
    8622             : 
    8623        3474 :         spdk_blob_open_opts_init(&opts_local, sizeof(opts_local));
    8624        3474 :         if (opts) {
    8625           4 :                 blob_open_opts_copy(opts, &opts_local);
    8626             :         }
    8627             : 
    8628        3474 :         blob->clear_method = opts_local.clear_method;
    8629             : 
    8630        3474 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_HANDLE;
    8631        3474 :         cpl.u.blob_handle.cb_fn = cb_fn;
    8632        3474 :         cpl.u.blob_handle.cb_arg = cb_arg;
    8633        3474 :         cpl.u.blob_handle.blob = blob;
    8634        3474 :         cpl.u.blob_handle.esnap_ctx = opts_local.esnap_ctx;
    8635             : 
    8636        3474 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    8637        3474 :         if (!seq) {
    8638           0 :                 blob_free(blob);
    8639           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    8640           0 :                 return;
    8641             :         }
    8642             : 
    8643        3474 :         blob_load(seq, blob, bs_open_blob_cpl, blob);
    8644             : }
    8645             : 
    8646             : void
    8647        4279 : spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8648             :                   spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    8649             : {
    8650        4279 :         bs_open_blob(bs, blobid, NULL, cb_fn, cb_arg);
    8651        4279 : }
    8652             : 
    8653             : void
    8654           4 : spdk_bs_open_blob_ext(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8655             :                       struct spdk_blob_open_opts *opts, spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    8656             : {
    8657           4 :         bs_open_blob(bs, blobid, opts, cb_fn, cb_arg);
    8658           4 : }
    8659             : 
    8660             : /* END spdk_bs_open_blob */
    8661             : 
    8662             : /* START spdk_blob_set_read_only */
    8663             : int
    8664         236 : spdk_blob_set_read_only(struct spdk_blob *blob)
    8665             : {
    8666         236 :         blob_verify_md_op(blob);
    8667             : 
    8668         236 :         blob->data_ro_flags |= SPDK_BLOB_READ_ONLY;
    8669             : 
    8670         236 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    8671         236 :         return 0;
    8672             : }
    8673             : /* END spdk_blob_set_read_only */
    8674             : 
    8675             : /* START spdk_blob_sync_md */
    8676             : 
    8677             : static void
    8678        1613 : blob_sync_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8679             : {
    8680        1613 :         struct spdk_blob *blob = cb_arg;
    8681             : 
    8682        1613 :         if (bserrno == 0 && (blob->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
    8683         404 :                 blob->data_ro = true;
    8684         404 :                 blob->md_ro = true;
    8685             :         }
    8686             : 
    8687        1613 :         bs_sequence_finish(seq, bserrno);
    8688        1613 : }
    8689             : 
    8690             : static void
    8691        1613 : blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    8692             : {
    8693        1613 :         struct spdk_bs_cpl      cpl;
    8694             :         spdk_bs_sequence_t      *seq;
    8695             : 
    8696        1613 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8697        1613 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8698        1613 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8699             : 
    8700        1613 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    8701        1613 :         if (!seq) {
    8702           0 :                 cb_fn(cb_arg, -ENOMEM);
    8703           0 :                 return;
    8704             :         }
    8705             : 
    8706        1613 :         blob_persist(seq, blob, blob_sync_md_cpl, blob);
    8707             : }
    8708             : 
    8709             : void
    8710        1097 : spdk_blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    8711             : {
    8712        1097 :         blob_verify_md_op(blob);
    8713             : 
    8714        1097 :         SPDK_DEBUGLOG(blob, "Syncing blob 0x%" PRIx64 "\n", blob->id);
    8715             : 
    8716        1097 :         if (blob->md_ro) {
    8717           4 :                 assert(blob->state == SPDK_BLOB_STATE_CLEAN);
    8718           4 :                 cb_fn(cb_arg, 0);
    8719           4 :                 return;
    8720             :         }
    8721             : 
    8722        1093 :         blob_sync_md(blob, cb_fn, cb_arg);
    8723             : }
    8724             : 
    8725             : /* END spdk_blob_sync_md */
    8726             : 
    8727             : struct spdk_blob_cluster_op_ctx {
    8728             :         struct spdk_thread      *thread;
    8729             :         struct spdk_blob        *blob;
    8730             :         uint32_t                cluster_num;    /* cluster index in blob */
    8731             :         uint32_t                cluster;        /* cluster on disk */
    8732             :         uint32_t                extent_page;    /* extent page on disk */
    8733             :         struct spdk_blob_md_page *page; /* preallocated extent page */
    8734             :         int                     rc;
    8735             :         spdk_blob_op_complete   cb_fn;
    8736             :         void                    *cb_arg;
    8737             : };
    8738             : 
    8739             : static void
    8740         884 : blob_op_cluster_msg_cpl(void *arg)
    8741             : {
    8742         884 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8743             : 
    8744         884 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    8745         884 :         free(ctx);
    8746         884 : }
    8747             : 
    8748             : static void
    8749         854 : blob_op_cluster_msg_cb(void *arg, int bserrno)
    8750             : {
    8751         854 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8752             : 
    8753         854 :         ctx->rc = bserrno;
    8754         854 :         spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8755         854 : }
    8756             : 
    8757             : static void
    8758          84 : blob_insert_new_ep_cb(void *arg, int bserrno)
    8759             : {
    8760          84 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8761             :         uint32_t *extent_page;
    8762             : 
    8763          84 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    8764          84 :         *extent_page = ctx->extent_page;
    8765          84 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8766          84 :         blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    8767          84 : }
    8768             : 
    8769             : struct spdk_blob_write_extent_page_ctx {
    8770             :         struct spdk_blob_store          *bs;
    8771             : 
    8772             :         uint32_t                        extent;
    8773             :         struct spdk_blob_md_page        *page;
    8774             : };
    8775             : 
    8776             : static void
    8777          26 : blob_free_cluster_msg_cb(void *arg, int bserrno)
    8778             : {
    8779          26 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8780             : 
    8781          26 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8782          26 :         bs_release_cluster(ctx->blob->bs, ctx->cluster);
    8783          26 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8784             : 
    8785          26 :         ctx->rc = bserrno;
    8786          26 :         spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8787          26 : }
    8788             : 
    8789             : static void
    8790          26 : blob_free_cluster_update_ep_cb(void *arg, int bserrno)
    8791             : {
    8792          26 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8793             : 
    8794          26 :         if (bserrno != 0 || ctx->blob->bs->clean == 0) {
    8795          26 :                 blob_free_cluster_msg_cb(ctx, bserrno);
    8796          26 :                 return;
    8797             :         }
    8798             : 
    8799           0 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8800           0 :         blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
    8801             : }
    8802             : 
    8803             : static void
    8804           0 : blob_free_cluster_free_ep_cb(void *arg, int bserrno)
    8805             : {
    8806           0 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8807             : 
    8808           0 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8809           0 :         assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8810           0 :         bs_release_md_page(ctx->blob->bs, ctx->extent_page);
    8811           0 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8812           0 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8813           0 :         blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
    8814           0 : }
    8815             : 
    8816             : static void
    8817         438 : blob_persist_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8818             : {
    8819         438 :         struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
    8820             : 
    8821         438 :         free(ctx);
    8822         438 :         bs_sequence_finish(seq, bserrno);
    8823         438 : }
    8824             : 
    8825             : static void
    8826         438 : blob_write_extent_page_ready(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8827             : {
    8828         438 :         struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
    8829             : 
    8830         438 :         if (bserrno != 0) {
    8831           0 :                 blob_persist_extent_page_cpl(seq, ctx, bserrno);
    8832           0 :                 return;
    8833             :         }
    8834         438 :         bs_sequence_write_dev(seq, ctx->page, bs_md_page_to_lba(ctx->bs, ctx->extent),
    8835         438 :                               bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    8836             :                               blob_persist_extent_page_cpl, ctx);
    8837             : }
    8838             : 
    8839             : static void
    8840         438 : blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
    8841             :                        struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
    8842             : {
    8843             :         struct spdk_blob_write_extent_page_ctx  *ctx;
    8844             :         spdk_bs_sequence_t                      *seq;
    8845         438 :         struct spdk_bs_cpl                      cpl;
    8846             : 
    8847         438 :         ctx = calloc(1, sizeof(*ctx));
    8848         438 :         if (!ctx) {
    8849           0 :                 cb_fn(cb_arg, -ENOMEM);
    8850           0 :                 return;
    8851             :         }
    8852         438 :         ctx->bs = blob->bs;
    8853         438 :         ctx->extent = extent;
    8854         438 :         ctx->page = page;
    8855             : 
    8856         438 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8857         438 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8858         438 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8859             : 
    8860         438 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    8861         438 :         if (!seq) {
    8862           0 :                 free(ctx);
    8863           0 :                 cb_fn(cb_arg, -ENOMEM);
    8864           0 :                 return;
    8865             :         }
    8866             : 
    8867         438 :         assert(page);
    8868         438 :         page->next = SPDK_INVALID_MD_PAGE;
    8869         438 :         page->id = blob->id;
    8870         438 :         page->sequence_num = 0;
    8871             : 
    8872         438 :         blob_serialize_extent_page(blob, cluster_num, page);
    8873             : 
    8874         438 :         page->crc = blob_md_page_calc_crc(page);
    8875             : 
    8876         438 :         assert(spdk_bit_array_get(blob->bs->used_md_pages, extent) == true);
    8877             : 
    8878         438 :         bs_mark_dirty(seq, blob->bs, blob_write_extent_page_ready, ctx);
    8879             : }
    8880             : 
    8881             : static void
    8882         824 : blob_insert_cluster_msg(void *arg)
    8883             : {
    8884         824 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8885             :         uint32_t *extent_page;
    8886             : 
    8887         824 :         ctx->rc = blob_insert_cluster(ctx->blob, ctx->cluster_num, ctx->cluster);
    8888         824 :         if (ctx->rc != 0) {
    8889           4 :                 spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8890           4 :                 return;
    8891             :         }
    8892             : 
    8893         820 :         if (ctx->blob->use_extent_table == false) {
    8894             :                 /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
    8895         410 :                 ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8896         410 :                 blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    8897         410 :                 return;
    8898             :         }
    8899             : 
    8900         410 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    8901         410 :         if (*extent_page == 0) {
    8902             :                 /* Extent page requires allocation.
    8903             :                  * It was already claimed in the used_md_pages map and placed in ctx. */
    8904          84 :                 assert(ctx->extent_page != 0);
    8905          84 :                 assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8906          84 :                 blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
    8907             :                                        blob_insert_new_ep_cb, ctx);
    8908             :         } else {
    8909             :                 /* It is possible for original thread to allocate extent page for
    8910             :                  * different cluster in the same extent page. In such case proceed with
    8911             :                  * updating the existing extent page, but release the additional one. */
    8912         326 :                 if (ctx->extent_page != 0) {
    8913           0 :                         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8914           0 :                         assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8915           0 :                         bs_release_md_page(ctx->blob->bs, ctx->extent_page);
    8916           0 :                         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8917           0 :                         ctx->extent_page = 0;
    8918             :                 }
    8919             :                 /* Extent page already allocated.
    8920             :                  * Every cluster allocation, requires just an update of single extent page. */
    8921         326 :                 blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
    8922             :                                        blob_op_cluster_msg_cb, ctx);
    8923             :         }
    8924             : }
    8925             : 
    8926             : static void
    8927         824 : blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
    8928             :                                  uint64_t cluster, uint32_t extent_page, struct spdk_blob_md_page *page,
    8929             :                                  spdk_blob_op_complete cb_fn, void *cb_arg)
    8930             : {
    8931             :         struct spdk_blob_cluster_op_ctx *ctx;
    8932             : 
    8933         824 :         ctx = calloc(1, sizeof(*ctx));
    8934         824 :         if (ctx == NULL) {
    8935           0 :                 cb_fn(cb_arg, -ENOMEM);
    8936           0 :                 return;
    8937             :         }
    8938             : 
    8939         824 :         ctx->thread = spdk_get_thread();
    8940         824 :         ctx->blob = blob;
    8941         824 :         ctx->cluster_num = cluster_num;
    8942         824 :         ctx->cluster = cluster;
    8943         824 :         ctx->extent_page = extent_page;
    8944         824 :         ctx->page = page;
    8945         824 :         ctx->cb_fn = cb_fn;
    8946         824 :         ctx->cb_arg = cb_arg;
    8947             : 
    8948         824 :         spdk_thread_send_msg(blob->bs->md_thread, blob_insert_cluster_msg, ctx);
    8949             : }
    8950             : 
    8951             : static void
    8952          60 : blob_free_cluster_msg(void *arg)
    8953             : {
    8954          60 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8955             :         uint32_t *extent_page;
    8956             :         uint32_t start_cluster_idx;
    8957          60 :         bool free_extent_page = true;
    8958             :         size_t i;
    8959             : 
    8960          60 :         ctx->cluster = bs_lba_to_cluster(ctx->blob->bs, ctx->blob->active.clusters[ctx->cluster_num]);
    8961             : 
    8962             :         /* There were concurrent unmaps to the same cluster, only release the cluster on the first one */
    8963          60 :         if (ctx->cluster == 0) {
    8964           8 :                 blob_op_cluster_msg_cb(ctx, 0);
    8965           8 :                 return;
    8966             :         }
    8967             : 
    8968          52 :         ctx->blob->active.clusters[ctx->cluster_num] = 0;
    8969          52 :         if (ctx->cluster != 0) {
    8970          52 :                 ctx->blob->active.num_allocated_clusters--;
    8971             :         }
    8972             : 
    8973          52 :         if (ctx->blob->use_extent_table == false) {
    8974             :                 /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
    8975          26 :                 spdk_spin_lock(&ctx->blob->bs->used_lock);
    8976          26 :                 bs_release_cluster(ctx->blob->bs, ctx->cluster);
    8977          26 :                 spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8978          26 :                 ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8979          26 :                 blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    8980          26 :                 return;
    8981             :         }
    8982             : 
    8983          26 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    8984             : 
    8985             :         /* There shouldn't be parallel release operations on same cluster */
    8986          26 :         assert(*extent_page == ctx->extent_page);
    8987             : 
    8988          26 :         start_cluster_idx = (ctx->cluster_num / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
    8989          48 :         for (i = 0; i < SPDK_EXTENTS_PER_EP; ++i) {
    8990          48 :                 if (ctx->blob->active.clusters[start_cluster_idx + i] != 0) {
    8991          26 :                         free_extent_page = false;
    8992          26 :                         break;
    8993             :                 }
    8994             :         }
    8995             : 
    8996          26 :         if (free_extent_page) {
    8997           0 :                 assert(ctx->extent_page != 0);
    8998           0 :                 assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8999           0 :                 ctx->blob->active.extent_pages[bs_cluster_to_extent_table_id(ctx->cluster_num)] = 0;
    9000           0 :                 blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
    9001             :                                        blob_free_cluster_free_ep_cb, ctx);
    9002             :         } else {
    9003          26 :                 blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
    9004             :                                        blob_free_cluster_update_ep_cb, ctx);
    9005             :         }
    9006             : }
    9007             : 
    9008             : 
    9009             : static void
    9010          60 : blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num, uint32_t extent_page,
    9011             :                                struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
    9012             : {
    9013             :         struct spdk_blob_cluster_op_ctx *ctx;
    9014             : 
    9015          60 :         ctx = calloc(1, sizeof(*ctx));
    9016          60 :         if (ctx == NULL) {
    9017           0 :                 cb_fn(cb_arg, -ENOMEM);
    9018           0 :                 return;
    9019             :         }
    9020             : 
    9021          60 :         ctx->thread = spdk_get_thread();
    9022          60 :         ctx->blob = blob;
    9023          60 :         ctx->cluster_num = cluster_num;
    9024          60 :         ctx->extent_page = extent_page;
    9025          60 :         ctx->page = page;
    9026          60 :         ctx->cb_fn = cb_fn;
    9027          60 :         ctx->cb_arg = cb_arg;
    9028             : 
    9029          60 :         spdk_thread_send_msg(blob->bs->md_thread, blob_free_cluster_msg, ctx);
    9030             : }
    9031             : 
    9032             : /* START spdk_blob_close */
    9033             : 
    9034             : static void
    9035        4171 : blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9036             : {
    9037        4171 :         struct spdk_blob *blob = cb_arg;
    9038             : 
    9039        4171 :         if (bserrno == 0) {
    9040        4171 :                 blob->open_ref--;
    9041        4171 :                 if (blob->open_ref == 0) {
    9042             :                         /*
    9043             :                          * Blobs with active.num_pages == 0 are deleted blobs.
    9044             :                          *  these blobs are removed from the blob_store list
    9045             :                          *  when the deletion process starts - so don't try to
    9046             :                          *  remove them again.
    9047             :                          */
    9048        3406 :                         if (blob->active.num_pages > 0) {
    9049        1914 :                                 spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
    9050        1914 :                                 RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
    9051             :                         }
    9052        3406 :                         blob_free(blob);
    9053             :                 }
    9054             :         }
    9055             : 
    9056        4171 :         bs_sequence_finish(seq, bserrno);
    9057        4171 : }
    9058             : 
    9059             : static void
    9060         112 : blob_close_esnap_done(void *cb_arg, struct spdk_blob *blob, int bserrno)
    9061             : {
    9062         112 :         spdk_bs_sequence_t      *seq = cb_arg;
    9063             : 
    9064         112 :         if (bserrno != 0) {
    9065           0 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": close failed with error %d\n",
    9066             :                               blob->id, bserrno);
    9067           0 :                 bs_sequence_finish(seq, bserrno);
    9068           0 :                 return;
    9069             :         }
    9070             : 
    9071         112 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": closed, syncing metadata on thread %s\n",
    9072             :                       blob->id, spdk_thread_get_name(spdk_get_thread()));
    9073             : 
    9074             :         /* Sync metadata */
    9075         112 :         blob_persist(seq, blob, blob_close_cpl, blob);
    9076             : }
    9077             : 
    9078             : void
    9079        4171 : spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    9080             : {
    9081        4171 :         struct spdk_bs_cpl      cpl;
    9082             :         spdk_bs_sequence_t      *seq;
    9083             : 
    9084        4171 :         blob_verify_md_op(blob);
    9085             : 
    9086        4171 :         SPDK_DEBUGLOG(blob, "Closing blob 0x%" PRIx64 "\n", blob->id);
    9087             : 
    9088        4171 :         if (blob->open_ref == 0) {
    9089           0 :                 cb_fn(cb_arg, -EBADF);
    9090           0 :                 return;
    9091             :         }
    9092             : 
    9093        4171 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    9094        4171 :         cpl.u.blob_basic.cb_fn = cb_fn;
    9095        4171 :         cpl.u.blob_basic.cb_arg = cb_arg;
    9096             : 
    9097        4171 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    9098        4171 :         if (!seq) {
    9099           0 :                 cb_fn(cb_arg, -ENOMEM);
    9100           0 :                 return;
    9101             :         }
    9102             : 
    9103        4171 :         if (blob->open_ref == 1 && blob_is_esnap_clone(blob)) {
    9104         112 :                 blob_esnap_destroy_bs_dev_channels(blob, false, blob_close_esnap_done, seq);
    9105         112 :                 return;
    9106             :         }
    9107             : 
    9108             :         /* Sync metadata */
    9109        4059 :         blob_persist(seq, blob, blob_close_cpl, blob);
    9110             : }
    9111             : 
    9112             : /* END spdk_blob_close */
    9113             : 
    9114         229 : struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs)
    9115             : {
    9116         229 :         return spdk_get_io_channel(bs);
    9117             : }
    9118             : 
    9119             : void
    9120         229 : spdk_bs_free_io_channel(struct spdk_io_channel *channel)
    9121             : {
    9122         229 :         blob_esnap_destroy_bs_channel(spdk_io_channel_get_ctx(channel));
    9123         229 :         spdk_put_io_channel(channel);
    9124         229 : }
    9125             : 
    9126             : void
    9127         112 : spdk_blob_io_unmap(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9128             :                    uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
    9129             : {
    9130         112 :         blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
    9131             :                                SPDK_BLOB_UNMAP);
    9132         112 : }
    9133             : 
    9134             : void
    9135          48 : spdk_blob_io_write_zeroes(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9136             :                           uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
    9137             : {
    9138          48 :         blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
    9139             :                                SPDK_BLOB_WRITE_ZEROES);
    9140          48 : }
    9141             : 
    9142             : void
    9143       20884 : spdk_blob_io_write(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9144             :                    void *payload, uint64_t offset, uint64_t length,
    9145             :                    spdk_blob_op_complete cb_fn, void *cb_arg)
    9146             : {
    9147       20884 :         blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
    9148             :                                SPDK_BLOB_WRITE);
    9149       20884 : }
    9150             : 
    9151             : void
    9152       17506 : spdk_blob_io_read(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9153             :                   void *payload, uint64_t offset, uint64_t length,
    9154             :                   spdk_blob_op_complete cb_fn, void *cb_arg)
    9155             : {
    9156       17506 :         blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
    9157             :                                SPDK_BLOB_READ);
    9158       17506 : }
    9159             : 
    9160             : void
    9161         140 : spdk_blob_io_writev(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9162             :                     struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9163             :                     spdk_blob_op_complete cb_fn, void *cb_arg)
    9164             : {
    9165         140 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false, NULL);
    9166         140 : }
    9167             : 
    9168             : void
    9169         940 : spdk_blob_io_readv(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9170             :                    struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9171             :                    spdk_blob_op_complete cb_fn, void *cb_arg)
    9172             : {
    9173         940 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true, NULL);
    9174         940 : }
    9175             : 
    9176             : void
    9177         208 : spdk_blob_io_writev_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9178             :                         struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9179             :                         spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
    9180             : {
    9181         208 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false,
    9182             :                                    io_opts);
    9183         208 : }
    9184             : 
    9185             : void
    9186        1300 : spdk_blob_io_readv_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9187             :                        struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9188             :                        spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
    9189             : {
    9190        1300 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true,
    9191             :                                    io_opts);
    9192        1300 : }
    9193             : 
    9194             : struct spdk_bs_iter_ctx {
    9195             :         int64_t page_num;
    9196             :         struct spdk_blob_store *bs;
    9197             : 
    9198             :         spdk_blob_op_with_handle_complete cb_fn;
    9199             :         void *cb_arg;
    9200             : };
    9201             : 
    9202             : static void
    9203        1164 : bs_iter_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    9204             : {
    9205        1164 :         struct spdk_bs_iter_ctx *ctx = cb_arg;
    9206        1164 :         struct spdk_blob_store *bs = ctx->bs;
    9207             :         spdk_blob_id id;
    9208             : 
    9209        1164 :         if (bserrno == 0) {
    9210         444 :                 ctx->cb_fn(ctx->cb_arg, _blob, bserrno);
    9211         444 :                 free(ctx);
    9212         444 :                 return;
    9213             :         }
    9214             : 
    9215         720 :         ctx->page_num++;
    9216         720 :         ctx->page_num = spdk_bit_array_find_first_set(bs->used_blobids, ctx->page_num);
    9217         720 :         if (ctx->page_num >= spdk_bit_array_capacity(bs->used_blobids)) {
    9218         268 :                 ctx->cb_fn(ctx->cb_arg, NULL, -ENOENT);
    9219         268 :                 free(ctx);
    9220         268 :                 return;
    9221             :         }
    9222             : 
    9223         452 :         id = bs_page_to_blobid(ctx->page_num);
    9224             : 
    9225         452 :         spdk_bs_open_blob(bs, id, bs_iter_cpl, ctx);
    9226             : }
    9227             : 
    9228             : void
    9229         292 : spdk_bs_iter_first(struct spdk_blob_store *bs,
    9230             :                    spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    9231             : {
    9232             :         struct spdk_bs_iter_ctx *ctx;
    9233             : 
    9234         292 :         ctx = calloc(1, sizeof(*ctx));
    9235         292 :         if (!ctx) {
    9236           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    9237           0 :                 return;
    9238             :         }
    9239             : 
    9240         292 :         ctx->page_num = -1;
    9241         292 :         ctx->bs = bs;
    9242         292 :         ctx->cb_fn = cb_fn;
    9243         292 :         ctx->cb_arg = cb_arg;
    9244             : 
    9245         292 :         bs_iter_cpl(ctx, NULL, -1);
    9246             : }
    9247             : 
    9248             : static void
    9249         420 : bs_iter_close_cpl(void *cb_arg, int bserrno)
    9250             : {
    9251         420 :         struct spdk_bs_iter_ctx *ctx = cb_arg;
    9252             : 
    9253         420 :         bs_iter_cpl(ctx, NULL, -1);
    9254         420 : }
    9255             : 
    9256             : void
    9257         420 : spdk_bs_iter_next(struct spdk_blob_store *bs, struct spdk_blob *blob,
    9258             :                   spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    9259             : {
    9260             :         struct spdk_bs_iter_ctx *ctx;
    9261             : 
    9262         420 :         assert(blob != NULL);
    9263             : 
    9264         420 :         ctx = calloc(1, sizeof(*ctx));
    9265         420 :         if (!ctx) {
    9266           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    9267           0 :                 return;
    9268             :         }
    9269             : 
    9270         420 :         ctx->page_num = bs_blobid_to_page(blob->id);
    9271         420 :         ctx->bs = bs;
    9272         420 :         ctx->cb_fn = cb_fn;
    9273         420 :         ctx->cb_arg = cb_arg;
    9274             : 
    9275             :         /* Close the existing blob */
    9276         420 :         spdk_blob_close(blob, bs_iter_close_cpl, ctx);
    9277             : }
    9278             : 
    9279             : static int
    9280         951 : blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
    9281             :                uint16_t value_len, bool internal)
    9282             : {
    9283             :         struct spdk_xattr_tailq *xattrs;
    9284             :         struct spdk_xattr       *xattr;
    9285             :         size_t                  desc_size;
    9286             :         void                    *tmp;
    9287             : 
    9288         951 :         blob_verify_md_op(blob);
    9289             : 
    9290         951 :         if (blob->md_ro) {
    9291           4 :                 return -EPERM;
    9292             :         }
    9293             : 
    9294         947 :         desc_size = sizeof(struct spdk_blob_md_descriptor_xattr) + strlen(name) + value_len;
    9295         947 :         if (desc_size > SPDK_BS_MAX_DESC_SIZE) {
    9296           4 :                 SPDK_DEBUGLOG(blob, "Xattr '%s' of size %zu does not fix into single page %zu\n", name,
    9297             :                               desc_size, SPDK_BS_MAX_DESC_SIZE);
    9298           4 :                 return -ENOMEM;
    9299             :         }
    9300             : 
    9301         943 :         if (internal) {
    9302         732 :                 xattrs = &blob->xattrs_internal;
    9303         732 :                 blob->invalid_flags |= SPDK_BLOB_INTERNAL_XATTR;
    9304             :         } else {
    9305         211 :                 xattrs = &blob->xattrs;
    9306             :         }
    9307             : 
    9308        1166 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9309         332 :                 if (!strcmp(name, xattr->name)) {
    9310         109 :                         tmp = malloc(value_len);
    9311         109 :                         if (!tmp) {
    9312           0 :                                 return -ENOMEM;
    9313             :                         }
    9314             : 
    9315         109 :                         free(xattr->value);
    9316         109 :                         xattr->value_len = value_len;
    9317         109 :                         xattr->value = tmp;
    9318         109 :                         memcpy(xattr->value, value, value_len);
    9319             : 
    9320         109 :                         blob->state = SPDK_BLOB_STATE_DIRTY;
    9321             : 
    9322         109 :                         return 0;
    9323             :                 }
    9324             :         }
    9325             : 
    9326         834 :         xattr = calloc(1, sizeof(*xattr));
    9327         834 :         if (!xattr) {
    9328           0 :                 return -ENOMEM;
    9329             :         }
    9330             : 
    9331         834 :         xattr->name = strdup(name);
    9332         834 :         if (!xattr->name) {
    9333           0 :                 free(xattr);
    9334           0 :                 return -ENOMEM;
    9335             :         }
    9336             : 
    9337         834 :         xattr->value_len = value_len;
    9338         834 :         xattr->value = malloc(value_len);
    9339         834 :         if (!xattr->value) {
    9340           0 :                 free(xattr->name);
    9341           0 :                 free(xattr);
    9342           0 :                 return -ENOMEM;
    9343             :         }
    9344         834 :         memcpy(xattr->value, value, value_len);
    9345         834 :         TAILQ_INSERT_TAIL(xattrs, xattr, link);
    9346             : 
    9347         834 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    9348             : 
    9349         834 :         return 0;
    9350             : }
    9351             : 
    9352             : int
    9353         183 : spdk_blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
    9354             :                     uint16_t value_len)
    9355             : {
    9356         183 :         return blob_set_xattr(blob, name, value, value_len, false);
    9357             : }
    9358             : 
    9359             : static int
    9360         408 : blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal)
    9361             : {
    9362             :         struct spdk_xattr_tailq *xattrs;
    9363             :         struct spdk_xattr       *xattr;
    9364             : 
    9365         408 :         blob_verify_md_op(blob);
    9366             : 
    9367         408 :         if (blob->md_ro) {
    9368           4 :                 return -EPERM;
    9369             :         }
    9370         404 :         xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
    9371             : 
    9372         416 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9373         364 :                 if (!strcmp(name, xattr->name)) {
    9374         352 :                         TAILQ_REMOVE(xattrs, xattr, link);
    9375         352 :                         free(xattr->value);
    9376         352 :                         free(xattr->name);
    9377         352 :                         free(xattr);
    9378             : 
    9379         352 :                         if (internal && TAILQ_EMPTY(&blob->xattrs_internal)) {
    9380         244 :                                 blob->invalid_flags &= ~SPDK_BLOB_INTERNAL_XATTR;
    9381             :                         }
    9382         352 :                         blob->state = SPDK_BLOB_STATE_DIRTY;
    9383             : 
    9384         352 :                         return 0;
    9385             :                 }
    9386             :         }
    9387             : 
    9388          52 :         return -ENOENT;
    9389             : }
    9390             : 
    9391             : int
    9392          36 : spdk_blob_remove_xattr(struct spdk_blob *blob, const char *name)
    9393             : {
    9394          36 :         return blob_remove_xattr(blob, name, false);
    9395             : }
    9396             : 
    9397             : static int
    9398        2276 : blob_get_xattr_value(struct spdk_blob *blob, const char *name,
    9399             :                      const void **value, size_t *value_len, bool internal)
    9400             : {
    9401             :         struct spdk_xattr       *xattr;
    9402             :         struct spdk_xattr_tailq *xattrs;
    9403             : 
    9404        2276 :         xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
    9405             : 
    9406        2902 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9407        1380 :                 if (!strcmp(name, xattr->name)) {
    9408         754 :                         *value = xattr->value;
    9409         754 :                         *value_len = xattr->value_len;
    9410         754 :                         return 0;
    9411             :                 }
    9412             :         }
    9413        1522 :         return -ENOENT;
    9414             : }
    9415             : 
    9416             : int
    9417         154 : spdk_blob_get_xattr_value(struct spdk_blob *blob, const char *name,
    9418             :                           const void **value, size_t *value_len)
    9419             : {
    9420         154 :         blob_verify_md_op(blob);
    9421             : 
    9422         154 :         return blob_get_xattr_value(blob, name, value, value_len, false);
    9423             : }
    9424             : 
    9425             : struct spdk_xattr_names {
    9426             :         uint32_t        count;
    9427             :         const char      *names[0];
    9428             : };
    9429             : 
    9430             : static int
    9431           4 : blob_get_xattr_names(struct spdk_xattr_tailq *xattrs, struct spdk_xattr_names **names)
    9432             : {
    9433             :         struct spdk_xattr       *xattr;
    9434           4 :         int                     count = 0;
    9435             : 
    9436          12 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9437           8 :                 count++;
    9438             :         }
    9439             : 
    9440           4 :         *names = calloc(1, sizeof(struct spdk_xattr_names) + count * sizeof(char *));
    9441           4 :         if (*names == NULL) {
    9442           0 :                 return -ENOMEM;
    9443             :         }
    9444             : 
    9445          12 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9446           8 :                 (*names)->names[(*names)->count++] = xattr->name;
    9447             :         }
    9448             : 
    9449           4 :         return 0;
    9450             : }
    9451             : 
    9452             : int
    9453           4 : spdk_blob_get_xattr_names(struct spdk_blob *blob, struct spdk_xattr_names **names)
    9454             : {
    9455           4 :         blob_verify_md_op(blob);
    9456             : 
    9457           4 :         return blob_get_xattr_names(&blob->xattrs, names);
    9458             : }
    9459             : 
    9460             : uint32_t
    9461           4 : spdk_xattr_names_get_count(struct spdk_xattr_names *names)
    9462             : {
    9463           4 :         assert(names != NULL);
    9464             : 
    9465           4 :         return names->count;
    9466             : }
    9467             : 
    9468             : const char *
    9469           8 : spdk_xattr_names_get_name(struct spdk_xattr_names *names, uint32_t index)
    9470             : {
    9471           8 :         if (index >= names->count) {
    9472           0 :                 return NULL;
    9473             :         }
    9474             : 
    9475           8 :         return names->names[index];
    9476             : }
    9477             : 
    9478             : void
    9479           4 : spdk_xattr_names_free(struct spdk_xattr_names *names)
    9480             : {
    9481           4 :         free(names);
    9482           4 : }
    9483             : 
    9484             : struct spdk_bs_type
    9485           2 : spdk_bs_get_bstype(struct spdk_blob_store *bs)
    9486             : {
    9487           2 :         return bs->bstype;
    9488             : }
    9489             : 
    9490             : void
    9491           0 : spdk_bs_set_bstype(struct spdk_blob_store *bs, struct spdk_bs_type bstype)
    9492             : {
    9493           0 :         memcpy(&bs->bstype, &bstype, sizeof(bstype));
    9494           0 : }
    9495             : 
    9496             : bool
    9497          48 : spdk_blob_is_read_only(struct spdk_blob *blob)
    9498             : {
    9499          48 :         assert(blob != NULL);
    9500          48 :         return (blob->data_ro || blob->md_ro);
    9501             : }
    9502             : 
    9503             : bool
    9504          52 : spdk_blob_is_snapshot(struct spdk_blob *blob)
    9505             : {
    9506             :         struct spdk_blob_list *snapshot_entry;
    9507             : 
    9508          52 :         assert(blob != NULL);
    9509             : 
    9510          52 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    9511          52 :         if (snapshot_entry == NULL) {
    9512          28 :                 return false;
    9513             :         }
    9514             : 
    9515          24 :         return true;
    9516             : }
    9517             : 
    9518             : bool
    9519          68 : spdk_blob_is_clone(struct spdk_blob *blob)
    9520             : {
    9521          68 :         assert(blob != NULL);
    9522             : 
    9523          68 :         if (blob->parent_id != SPDK_BLOBID_INVALID &&
    9524          52 :             blob->parent_id != SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    9525          40 :                 assert(spdk_blob_is_thin_provisioned(blob));
    9526          40 :                 return true;
    9527             :         }
    9528             : 
    9529          28 :         return false;
    9530             : }
    9531             : 
    9532             : bool
    9533       36506 : spdk_blob_is_thin_provisioned(struct spdk_blob *blob)
    9534             : {
    9535       36506 :         assert(blob != NULL);
    9536       36506 :         return !!(blob->invalid_flags & SPDK_BLOB_THIN_PROV);
    9537             : }
    9538             : 
    9539             : bool
    9540       40914 : spdk_blob_is_esnap_clone(const struct spdk_blob *blob)
    9541             : {
    9542       40914 :         return blob_is_esnap_clone(blob);
    9543             : }
    9544             : 
    9545             : static void
    9546        3434 : blob_update_clear_method(struct spdk_blob *blob)
    9547             : {
    9548             :         enum blob_clear_method stored_cm;
    9549             : 
    9550        3434 :         assert(blob != NULL);
    9551             : 
    9552             :         /* If BLOB_CLEAR_WITH_DEFAULT was passed in, use the setting stored
    9553             :          * in metadata previously.  If something other than the default was
    9554             :          * specified, ignore stored value and used what was passed in.
    9555             :          */
    9556        3434 :         stored_cm = ((blob->md_ro_flags & SPDK_BLOB_CLEAR_METHOD) >> SPDK_BLOB_CLEAR_METHOD_SHIFT);
    9557             : 
    9558        3434 :         if (blob->clear_method == BLOB_CLEAR_WITH_DEFAULT) {
    9559        3434 :                 blob->clear_method = stored_cm;
    9560           0 :         } else if (blob->clear_method != stored_cm) {
    9561           0 :                 SPDK_WARNLOG("Using passed in clear method 0x%x instead of stored value of 0x%x\n",
    9562             :                              blob->clear_method, stored_cm);
    9563             :         }
    9564        3434 : }
    9565             : 
    9566             : spdk_blob_id
    9567         258 : spdk_blob_get_parent_snapshot(struct spdk_blob_store *bs, spdk_blob_id blob_id)
    9568             : {
    9569         258 :         struct spdk_blob_list *snapshot_entry = NULL;
    9570         258 :         struct spdk_blob_list *clone_entry = NULL;
    9571             : 
    9572         494 :         TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
    9573         732 :                 TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    9574         496 :                         if (clone_entry->id == blob_id) {
    9575         168 :                                 return snapshot_entry->id;
    9576             :                         }
    9577             :                 }
    9578             :         }
    9579             : 
    9580          90 :         return SPDK_BLOBID_INVALID;
    9581             : }
    9582             : 
    9583             : int
    9584         196 : spdk_blob_get_clones(struct spdk_blob_store *bs, spdk_blob_id blobid, spdk_blob_id *ids,
    9585             :                      size_t *count)
    9586             : {
    9587             :         struct spdk_blob_list *snapshot_entry, *clone_entry;
    9588             :         size_t n;
    9589             : 
    9590         196 :         snapshot_entry = bs_get_snapshot_entry(bs, blobid);
    9591         196 :         if (snapshot_entry == NULL) {
    9592          28 :                 *count = 0;
    9593          28 :                 return 0;
    9594             :         }
    9595             : 
    9596         168 :         if (ids == NULL || *count < snapshot_entry->clone_count) {
    9597           8 :                 *count = snapshot_entry->clone_count;
    9598           8 :                 return -ENOMEM;
    9599             :         }
    9600         160 :         *count = snapshot_entry->clone_count;
    9601             : 
    9602         160 :         n = 0;
    9603         340 :         TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    9604         180 :                 ids[n++] = clone_entry->id;
    9605             :         }
    9606             : 
    9607         160 :         return 0;
    9608             : }
    9609             : 
    9610             : static void
    9611           4 : bs_load_grow_continue(struct spdk_bs_load_ctx *ctx)
    9612             : {
    9613             :         int rc;
    9614             : 
    9615           4 :         if (ctx->super->size == 0) {
    9616           0 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9617             :         }
    9618             : 
    9619           4 :         if (ctx->super->io_unit_size == 0) {
    9620           0 :                 ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
    9621             :         }
    9622             : 
    9623             :         /* Parse the super block */
    9624           4 :         ctx->bs->clean = 1;
    9625           4 :         ctx->bs->cluster_sz = ctx->super->cluster_size;
    9626           4 :         ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
    9627           4 :         ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    9628           4 :         if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
    9629           4 :                 ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
    9630             :         }
    9631           4 :         ctx->bs->io_unit_size = ctx->super->io_unit_size;
    9632           4 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    9633           4 :         if (rc < 0) {
    9634           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9635           0 :                 return;
    9636             :         }
    9637           4 :         ctx->bs->md_start = ctx->super->md_start;
    9638           4 :         ctx->bs->md_len = ctx->super->md_len;
    9639           4 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
    9640           4 :         if (rc < 0) {
    9641           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9642           0 :                 return;
    9643             :         }
    9644             : 
    9645          12 :         ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
    9646           8 :                                                ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
    9647           4 :         ctx->bs->super_blob = ctx->super->super_blob;
    9648           4 :         memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
    9649             : 
    9650           4 :         if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0) {
    9651           0 :                 SPDK_ERRLOG("Can not grow an unclean blobstore, please load it normally to clean it.\n");
    9652           0 :                 bs_load_ctx_fail(ctx, -EIO);
    9653           0 :                 return;
    9654             :         } else {
    9655           4 :                 bs_load_read_used_pages(ctx);
    9656             :         }
    9657             : }
    9658             : 
    9659             : static void
    9660           4 : bs_load_grow_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9661             : {
    9662           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9663             : 
    9664           4 :         if (bserrno != 0) {
    9665           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9666           0 :                 return;
    9667             :         }
    9668           4 :         bs_load_grow_continue(ctx);
    9669             : }
    9670             : 
    9671             : static void
    9672           4 : bs_load_grow_used_clusters_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9673             : {
    9674           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9675             : 
    9676           4 :         if (bserrno != 0) {
    9677           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9678           0 :                 return;
    9679             :         }
    9680             : 
    9681           4 :         spdk_free(ctx->mask);
    9682             : 
    9683           4 :         bs_sequence_write_dev(ctx->seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
    9684           4 :                               bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
    9685             :                               bs_load_grow_super_write_cpl, ctx);
    9686             : }
    9687             : 
    9688             : static void
    9689           4 : bs_load_grow_used_clusters_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9690             : {
    9691           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9692             :         uint64_t                lba, lba_count;
    9693             :         uint64_t                dev_size;
    9694             :         uint64_t                total_clusters;
    9695             : 
    9696           4 :         if (bserrno != 0) {
    9697           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9698           0 :                 return;
    9699             :         }
    9700             : 
    9701             :         /* The type must be correct */
    9702           4 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
    9703             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    9704           4 :         assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
    9705             :                                              struct spdk_blob_md_page) * 8));
    9706           4 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9707           4 :         total_clusters = dev_size / ctx->super->cluster_size;
    9708           4 :         ctx->mask->length = total_clusters;
    9709             : 
    9710           4 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    9711           4 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    9712           4 :         bs_sequence_write_dev(ctx->seq, ctx->mask, lba, lba_count,
    9713             :                               bs_load_grow_used_clusters_write_cpl, ctx);
    9714             : }
    9715             : 
    9716             : static void
    9717           4 : bs_load_try_to_grow(struct spdk_bs_load_ctx *ctx)
    9718             : {
    9719             :         uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
    9720             :         uint64_t lba, lba_count, mask_size;
    9721             : 
    9722           4 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9723           4 :         total_clusters = dev_size / ctx->super->cluster_size;
    9724           4 :         used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    9725           4 :                                 spdk_divide_round_up(total_clusters, 8),
    9726             :                                 SPDK_BS_PAGE_SIZE);
    9727           4 :         max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
    9728             :         /* No necessary to grow or no space to grow */
    9729           4 :         if (ctx->super->size >= dev_size || used_cluster_mask_len > max_used_cluster_mask) {
    9730           0 :                 SPDK_DEBUGLOG(blob, "No grow\n");
    9731           0 :                 bs_load_grow_continue(ctx);
    9732           0 :                 return;
    9733             :         }
    9734             : 
    9735           4 :         SPDK_DEBUGLOG(blob, "Resize blobstore\n");
    9736             : 
    9737           4 :         ctx->super->size = dev_size;
    9738           4 :         ctx->super->used_cluster_mask_len = used_cluster_mask_len;
    9739           4 :         ctx->super->crc = blob_md_page_calc_crc(ctx->super);
    9740             : 
    9741           4 :         mask_size = used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    9742           4 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_NUMA_ID_ANY,
    9743             :                                  SPDK_MALLOC_DMA);
    9744           4 :         if (!ctx->mask) {
    9745           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9746           0 :                 return;
    9747             :         }
    9748           4 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    9749           4 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    9750           4 :         bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
    9751             :                              bs_load_grow_used_clusters_read_cpl, ctx);
    9752             : }
    9753             : 
    9754             : static void
    9755           4 : bs_grow_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9756             : {
    9757           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9758             :         int rc;
    9759             : 
    9760           4 :         rc = bs_super_validate(ctx->super, ctx->bs);
    9761           4 :         if (rc != 0) {
    9762           0 :                 bs_load_ctx_fail(ctx, rc);
    9763           0 :                 return;
    9764             :         }
    9765             : 
    9766           4 :         bs_load_try_to_grow(ctx);
    9767             : }
    9768             : 
    9769             : struct spdk_bs_grow_ctx {
    9770             :         struct spdk_blob_store          *bs;
    9771             :         struct spdk_bs_super_block      *super;
    9772             : 
    9773             :         struct spdk_bit_pool            *new_used_clusters;
    9774             :         struct spdk_bs_md_mask          *new_used_clusters_mask;
    9775             : 
    9776             :         spdk_bs_sequence_t              *seq;
    9777             : };
    9778             : 
    9779             : static void
    9780          32 : bs_grow_live_done(struct spdk_bs_grow_ctx *ctx, int bserrno)
    9781             : {
    9782          32 :         if (bserrno != 0) {
    9783           8 :                 spdk_bit_pool_free(&ctx->new_used_clusters);
    9784             :         }
    9785             : 
    9786          32 :         bs_sequence_finish(ctx->seq, bserrno);
    9787          32 :         free(ctx->new_used_clusters_mask);
    9788          32 :         spdk_free(ctx->super);
    9789          32 :         free(ctx);
    9790          32 : }
    9791             : 
    9792             : static void
    9793           8 : bs_grow_live_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9794             : {
    9795           8 :         struct spdk_bs_grow_ctx *ctx = cb_arg;
    9796           8 :         struct spdk_blob_store *bs = ctx->bs;
    9797             :         uint64_t total_clusters;
    9798             : 
    9799           8 :         if (bserrno != 0) {
    9800           0 :                 bs_grow_live_done(ctx, bserrno);
    9801           0 :                 return;
    9802             :         }
    9803             : 
    9804             :         /*
    9805             :          * Blobstore is not clean until unload, for now only the super block is up to date.
    9806             :          * This is similar to state right after blobstore init, when bs_write_used_md() didn't
    9807             :          * yet execute.
    9808             :          * When cleanly unloaded, the used md pages will be written out.
    9809             :          * In case of unclean shutdown, loading blobstore will go through recovery path correctly
    9810             :          * filling out the used_clusters with new size and writing it out.
    9811             :          */
    9812           8 :         bs->clean = 0;
    9813             : 
    9814             :         /* Reverting the super->size past this point is complex, avoid any error paths
    9815             :          * that require to do so. */
    9816           8 :         spdk_spin_lock(&bs->used_lock);
    9817             : 
    9818           8 :         total_clusters = ctx->super->size / ctx->super->cluster_size;
    9819             : 
    9820           8 :         assert(total_clusters >= spdk_bit_pool_capacity(bs->used_clusters));
    9821           8 :         spdk_bit_pool_store_mask(bs->used_clusters, ctx->new_used_clusters_mask);
    9822             : 
    9823           8 :         assert(total_clusters == spdk_bit_pool_capacity(ctx->new_used_clusters));
    9824           8 :         spdk_bit_pool_load_mask(ctx->new_used_clusters, ctx->new_used_clusters_mask);
    9825             : 
    9826           8 :         spdk_bit_pool_free(&bs->used_clusters);
    9827           8 :         bs->used_clusters = ctx->new_used_clusters;
    9828             : 
    9829           8 :         bs->total_clusters = total_clusters;
    9830          16 :         bs->total_data_clusters = bs->total_clusters - spdk_divide_round_up(
    9831           8 :                                           bs->md_start + bs->md_len, bs->pages_per_cluster);
    9832             : 
    9833           8 :         bs->num_free_clusters = spdk_bit_pool_count_free(bs->used_clusters);
    9834           8 :         assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
    9835           8 :         spdk_spin_unlock(&bs->used_lock);
    9836             : 
    9837           8 :         bs_grow_live_done(ctx, 0);
    9838             : }
    9839             : 
    9840             : static void
    9841          32 : bs_grow_live_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9842             : {
    9843          32 :         struct spdk_bs_grow_ctx *ctx = cb_arg;
    9844             :         uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
    9845             :         int rc;
    9846             : 
    9847          32 :         if (bserrno != 0) {
    9848           0 :                 bs_grow_live_done(ctx, bserrno);
    9849           0 :                 return;
    9850             :         }
    9851             : 
    9852          32 :         rc = bs_super_validate(ctx->super, ctx->bs);
    9853          32 :         if (rc != 0) {
    9854           4 :                 bs_grow_live_done(ctx, rc);
    9855           4 :                 return;
    9856             :         }
    9857             : 
    9858          28 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9859          28 :         total_clusters = dev_size / ctx->super->cluster_size;
    9860          28 :         used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    9861          28 :                                 spdk_divide_round_up(total_clusters, 8),
    9862             :                                 SPDK_BS_PAGE_SIZE);
    9863          28 :         max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
    9864             :         /* Only checking dev_size. Since it can change, but total_clusters remain the same. */
    9865          28 :         if (dev_size == ctx->super->size) {
    9866          16 :                 SPDK_DEBUGLOG(blob, "No need to grow blobstore\n");
    9867          16 :                 bs_grow_live_done(ctx, 0);
    9868          16 :                 return;
    9869             :         }
    9870             :         /*
    9871             :          * Blobstore cannot be shrunk, so check before if:
    9872             :          * - new size of the device is smaller than size in super_block
    9873             :          * - new total number of clusters is smaller than used_clusters bit_pool
    9874             :          * - there is enough space in metadata for used_cluster_mask to be written out
    9875             :          */
    9876          12 :         if (dev_size < ctx->super->size ||
    9877          12 :             total_clusters < spdk_bit_pool_capacity(ctx->bs->used_clusters) ||
    9878             :             used_cluster_mask_len > max_used_cluster_mask) {
    9879           4 :                 SPDK_DEBUGLOG(blob, "No space to grow blobstore\n");
    9880           4 :                 bs_grow_live_done(ctx, -ENOSPC);
    9881           4 :                 return;
    9882             :         }
    9883             : 
    9884           8 :         SPDK_DEBUGLOG(blob, "Resizing blobstore\n");
    9885             : 
    9886           8 :         ctx->new_used_clusters_mask = calloc(1, total_clusters);
    9887           8 :         if (!ctx->new_used_clusters_mask) {
    9888           0 :                 bs_grow_live_done(ctx, -ENOMEM);
    9889           0 :                 return;
    9890             :         }
    9891           8 :         ctx->new_used_clusters = spdk_bit_pool_create(total_clusters);
    9892           8 :         if (!ctx->new_used_clusters) {
    9893           0 :                 bs_grow_live_done(ctx, -ENOMEM);
    9894           0 :                 return;
    9895             :         }
    9896             : 
    9897           8 :         ctx->super->clean = 0;
    9898           8 :         ctx->super->size = dev_size;
    9899           8 :         ctx->super->used_cluster_mask_len = used_cluster_mask_len;
    9900           8 :         bs_write_super(seq, ctx->bs, ctx->super, bs_grow_live_super_write_cpl, ctx);
    9901             : }
    9902             : 
    9903             : void
    9904          32 : spdk_bs_grow_live(struct spdk_blob_store *bs,
    9905             :                   spdk_bs_op_complete cb_fn, void *cb_arg)
    9906             : {
    9907          32 :         struct spdk_bs_cpl      cpl;
    9908             :         struct spdk_bs_grow_ctx *ctx;
    9909             : 
    9910          32 :         assert(spdk_get_thread() == bs->md_thread);
    9911             : 
    9912          32 :         SPDK_DEBUGLOG(blob, "Growing blobstore on dev %p\n", bs->dev);
    9913             : 
    9914          32 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    9915          32 :         cpl.u.bs_basic.cb_fn = cb_fn;
    9916          32 :         cpl.u.bs_basic.cb_arg = cb_arg;
    9917             : 
    9918          32 :         ctx = calloc(1, sizeof(struct spdk_bs_grow_ctx));
    9919          32 :         if (!ctx) {
    9920           0 :                 cb_fn(cb_arg, -ENOMEM);
    9921           0 :                 return;
    9922             :         }
    9923          32 :         ctx->bs = bs;
    9924             : 
    9925          32 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    9926             :                                   SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    9927          32 :         if (!ctx->super) {
    9928           0 :                 free(ctx);
    9929           0 :                 cb_fn(cb_arg, -ENOMEM);
    9930           0 :                 return;
    9931             :         }
    9932             : 
    9933          32 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    9934          32 :         if (!ctx->seq) {
    9935           0 :                 spdk_free(ctx->super);
    9936           0 :                 free(ctx);
    9937           0 :                 cb_fn(cb_arg, -ENOMEM);
    9938           0 :                 return;
    9939             :         }
    9940             : 
    9941             :         /* Read the super block */
    9942          32 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    9943          32 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    9944             :                              bs_grow_live_load_super_cpl, ctx);
    9945             : }
    9946             : 
    9947             : void
    9948           4 : spdk_bs_grow(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    9949             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    9950             : {
    9951           4 :         struct spdk_blob_store  *bs;
    9952           4 :         struct spdk_bs_cpl      cpl;
    9953           4 :         struct spdk_bs_load_ctx *ctx;
    9954           4 :         struct spdk_bs_opts     opts = {};
    9955             :         int err;
    9956             : 
    9957           4 :         SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
    9958             : 
    9959           4 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
    9960           0 :                 SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
    9961           0 :                 dev->destroy(dev);
    9962           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
    9963           0 :                 return;
    9964             :         }
    9965             : 
    9966           4 :         spdk_bs_opts_init(&opts, sizeof(opts));
    9967           4 :         if (o) {
    9968           4 :                 if (bs_opts_copy(o, &opts)) {
    9969           0 :                         return;
    9970             :                 }
    9971             :         }
    9972             : 
    9973           4 :         if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
    9974           0 :                 dev->destroy(dev);
    9975           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
    9976           0 :                 return;
    9977             :         }
    9978             : 
    9979           4 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    9980           4 :         if (err) {
    9981           0 :                 dev->destroy(dev);
    9982           0 :                 cb_fn(cb_arg, NULL, err);
    9983           0 :                 return;
    9984             :         }
    9985             : 
    9986           4 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    9987           4 :         cpl.u.bs_handle.cb_fn = cb_fn;
    9988           4 :         cpl.u.bs_handle.cb_arg = cb_arg;
    9989           4 :         cpl.u.bs_handle.bs = bs;
    9990             : 
    9991           4 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    9992           4 :         if (!ctx->seq) {
    9993           0 :                 spdk_free(ctx->super);
    9994           0 :                 free(ctx);
    9995           0 :                 bs_free(bs);
    9996           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    9997           0 :                 return;
    9998             :         }
    9999             : 
   10000             :         /* Read the super block */
   10001           4 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
   10002           4 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
   10003             :                              bs_grow_load_super_cpl, ctx);
   10004             : }
   10005             : 
   10006             : int
   10007          24 : spdk_blob_get_esnap_id(struct spdk_blob *blob, const void **id, size_t *len)
   10008             : {
   10009          24 :         if (!blob_is_esnap_clone(blob)) {
   10010          12 :                 return -EINVAL;
   10011             :         }
   10012             : 
   10013          12 :         return blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, id, len, true);
   10014             : }
   10015             : 
   10016             : struct spdk_io_channel *
   10017        8840 : blob_esnap_get_io_channel(struct spdk_io_channel *ch, struct spdk_blob *blob)
   10018             : {
   10019        8840 :         struct spdk_bs_channel          *bs_channel = spdk_io_channel_get_ctx(ch);
   10020        8840 :         struct spdk_bs_dev              *bs_dev = blob->back_bs_dev;
   10021        8840 :         struct blob_esnap_channel       find = {};
   10022             :         struct blob_esnap_channel       *esnap_channel, *existing;
   10023             : 
   10024        8840 :         find.blob_id = blob->id;
   10025        8840 :         esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
   10026        8840 :         if (spdk_likely(esnap_channel != NULL)) {
   10027        8796 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": using cached channel on thread %s\n",
   10028             :                               blob->id, spdk_thread_get_name(spdk_get_thread()));
   10029        8796 :                 return esnap_channel->channel;
   10030             :         }
   10031             : 
   10032          44 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": allocating channel on thread %s\n",
   10033             :                       blob->id, spdk_thread_get_name(spdk_get_thread()));
   10034             : 
   10035          44 :         esnap_channel = calloc(1, sizeof(*esnap_channel));
   10036          44 :         if (esnap_channel == NULL) {
   10037           0 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " channel allocation failed: no memory\n",
   10038             :                                find.blob_id);
   10039           0 :                 return NULL;
   10040             :         }
   10041          44 :         esnap_channel->channel = bs_dev->create_channel(bs_dev);
   10042          44 :         if (esnap_channel->channel == NULL) {
   10043           0 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " back channel allocation failed\n", blob->id);
   10044           0 :                 free(esnap_channel);
   10045           0 :                 return NULL;
   10046             :         }
   10047          44 :         esnap_channel->blob_id = find.blob_id;
   10048          44 :         existing = RB_INSERT(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
   10049          44 :         if (spdk_unlikely(existing != NULL)) {
   10050             :                 /*
   10051             :                  * This should be unreachable: all modifications to this tree happen on this thread.
   10052             :                  */
   10053           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 "lost race to allocate a channel\n", find.blob_id);
   10054           0 :                 assert(false);
   10055             : 
   10056             :                 bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
   10057             :                 free(esnap_channel);
   10058             : 
   10059             :                 return existing->channel;
   10060             :         }
   10061             : 
   10062          44 :         return esnap_channel->channel;
   10063             : }
   10064             : 
   10065             : static int
   10066        8816 : blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2)
   10067             : {
   10068        8816 :         return (c1->blob_id < c2->blob_id ? -1 : c1->blob_id > c2->blob_id);
   10069             : }
   10070             : 
   10071             : struct blob_esnap_destroy_ctx {
   10072             :         spdk_blob_op_with_handle_complete       cb_fn;
   10073             :         void                                    *cb_arg;
   10074             :         struct spdk_blob                        *blob;
   10075             :         struct spdk_bs_dev                      *back_bs_dev;
   10076             :         bool                                    abort_io;
   10077             : };
   10078             : 
   10079             : static void
   10080         136 : blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status)
   10081             : {
   10082         136 :         struct blob_esnap_destroy_ctx   *ctx = spdk_io_channel_iter_get_ctx(i);
   10083         136 :         struct spdk_blob                *blob = ctx->blob;
   10084         136 :         struct spdk_blob_store          *bs = blob->bs;
   10085             : 
   10086         136 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n",
   10087             :                       blob->id);
   10088             : 
   10089         136 :         if (ctx->cb_fn != NULL) {
   10090         124 :                 ctx->cb_fn(ctx->cb_arg, blob, status);
   10091             :         }
   10092         136 :         free(ctx);
   10093             : 
   10094         136 :         bs->esnap_channels_unloading--;
   10095         136 :         if (bs->esnap_channels_unloading == 0 && bs->esnap_unload_cb_fn != NULL) {
   10096           4 :                 spdk_bs_unload(bs, bs->esnap_unload_cb_fn, bs->esnap_unload_cb_arg);
   10097             :         }
   10098         136 : }
   10099             : 
   10100             : static void
   10101         144 : blob_esnap_destroy_one_channel(struct spdk_io_channel_iter *i)
   10102             : {
   10103         144 :         struct blob_esnap_destroy_ctx   *ctx = spdk_io_channel_iter_get_ctx(i);
   10104         144 :         struct spdk_blob                *blob = ctx->blob;
   10105         144 :         struct spdk_bs_dev              *bs_dev = ctx->back_bs_dev;
   10106         144 :         struct spdk_io_channel          *channel = spdk_io_channel_iter_get_channel(i);
   10107         144 :         struct spdk_bs_channel          *bs_channel = spdk_io_channel_get_ctx(channel);
   10108             :         struct blob_esnap_channel       *esnap_channel;
   10109         144 :         struct blob_esnap_channel       find = {};
   10110             : 
   10111         144 :         assert(spdk_get_thread() == spdk_io_channel_get_thread(channel));
   10112             : 
   10113         144 :         find.blob_id = blob->id;
   10114         144 :         esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
   10115         144 :         if (esnap_channel != NULL) {
   10116          12 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channel on thread %s\n",
   10117             :                               blob->id, spdk_thread_get_name(spdk_get_thread()));
   10118          12 :                 RB_REMOVE(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
   10119             : 
   10120          12 :                 if (ctx->abort_io) {
   10121             :                         spdk_bs_user_op_t *op, *tmp;
   10122             : 
   10123           8 :                         TAILQ_FOREACH_SAFE(op, &bs_channel->queued_io, link, tmp) {
   10124           0 :                                 if (op->back_channel == esnap_channel->channel) {
   10125           0 :                                         TAILQ_REMOVE(&bs_channel->queued_io, op, link);
   10126           0 :                                         bs_user_op_abort(op, -EIO);
   10127             :                                 }
   10128             :                         }
   10129             :                 }
   10130             : 
   10131          12 :                 bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
   10132          12 :                 free(esnap_channel);
   10133             :         }
   10134             : 
   10135         144 :         spdk_for_each_channel_continue(i, 0);
   10136         144 : }
   10137             : 
   10138             : /*
   10139             :  * Destroy the channels for a specific blob on each thread with a blobstore channel. This should be
   10140             :  * used when closing an esnap clone blob and after decoupling from the parent.
   10141             :  */
   10142             : static void
   10143         484 : blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
   10144             :                                    spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
   10145             : {
   10146             :         struct blob_esnap_destroy_ctx   *ctx;
   10147             : 
   10148         484 :         if (!blob_is_esnap_clone(blob) || blob->back_bs_dev == NULL) {
   10149         348 :                 if (cb_fn != NULL) {
   10150         348 :                         cb_fn(cb_arg, blob, 0);
   10151             :                 }
   10152         348 :                 return;
   10153             :         }
   10154             : 
   10155         136 :         ctx = calloc(1, sizeof(*ctx));
   10156         136 :         if (ctx == NULL) {
   10157           0 :                 if (cb_fn != NULL) {
   10158           0 :                         cb_fn(cb_arg, blob, -ENOMEM);
   10159             :                 }
   10160           0 :                 return;
   10161             :         }
   10162         136 :         ctx->cb_fn = cb_fn;
   10163         136 :         ctx->cb_arg = cb_arg;
   10164         136 :         ctx->blob = blob;
   10165         136 :         ctx->back_bs_dev = blob->back_bs_dev;
   10166         136 :         ctx->abort_io = abort_io;
   10167             : 
   10168         136 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channels for this blob\n",
   10169             :                       blob->id);
   10170             : 
   10171         136 :         blob->bs->esnap_channels_unloading++;
   10172         136 :         spdk_for_each_channel(blob->bs, blob_esnap_destroy_one_channel, ctx,
   10173             :                               blob_esnap_destroy_channels_done);
   10174             : }
   10175             : 
   10176             : /*
   10177             :  * Destroy all bs_dev channels on a specific blobstore channel. This should be used when a
   10178             :  * bs_channel is destroyed.
   10179             :  */
   10180             : static void
   10181        1025 : blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch)
   10182             : {
   10183             :         struct blob_esnap_channel *esnap_channel, *esnap_channel_tmp;
   10184             : 
   10185        1025 :         assert(spdk_get_thread() == spdk_io_channel_get_thread(spdk_io_channel_from_ctx(ch)));
   10186             : 
   10187        1025 :         SPDK_DEBUGLOG(blob_esnap, "destroying channels on thread %s\n",
   10188             :                       spdk_thread_get_name(spdk_get_thread()));
   10189        1057 :         RB_FOREACH_SAFE(esnap_channel, blob_esnap_channel_tree, &ch->esnap_channels,
   10190             :                         esnap_channel_tmp) {
   10191          32 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64
   10192             :                               ": destroying one channel in thread %s\n",
   10193             :                               esnap_channel->blob_id, spdk_thread_get_name(spdk_get_thread()));
   10194          32 :                 RB_REMOVE(blob_esnap_channel_tree, &ch->esnap_channels, esnap_channel);
   10195          32 :                 spdk_put_io_channel(esnap_channel->channel);
   10196          32 :                 free(esnap_channel);
   10197             :         }
   10198        1025 :         SPDK_DEBUGLOG(blob_esnap, "done destroying channels on thread %s\n",
   10199             :                       spdk_thread_get_name(spdk_get_thread()));
   10200        1025 : }
   10201             : 
   10202             : static void
   10203          28 : blob_set_back_bs_dev_done(void *_ctx, int bserrno)
   10204             : {
   10205          28 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10206             : 
   10207          28 :         if (bserrno != 0) {
   10208             :                 /* Even though the unfreeze failed, the update may have succeed. */
   10209           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": unfreeze failed with error %d\n", ctx->blob->id,
   10210             :                             bserrno);
   10211             :         }
   10212          28 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
   10213          28 :         free(ctx);
   10214          28 : }
   10215             : 
   10216             : static void
   10217          28 : blob_frozen_set_back_bs_dev(void *_ctx, struct spdk_blob *blob, int bserrno)
   10218             : {
   10219          28 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10220             :         int rc;
   10221             : 
   10222          28 :         if (bserrno != 0) {
   10223           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to release old back_bs_dev with error %d\n",
   10224             :                             blob->id, bserrno);
   10225           0 :                 ctx->bserrno = bserrno;
   10226           0 :                 blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10227           0 :                 return;
   10228             :         }
   10229             : 
   10230          28 :         if (blob->back_bs_dev != NULL) {
   10231          28 :                 blob_unref_back_bs_dev(blob);
   10232             :         }
   10233             : 
   10234          28 :         if (ctx->parent_refs_cb_fn) {
   10235          20 :                 rc = ctx->parent_refs_cb_fn(blob, ctx->parent_refs_cb_arg);
   10236          20 :                 if (rc != 0) {
   10237           0 :                         ctx->bserrno = rc;
   10238           0 :                         blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10239           0 :                         return;
   10240             :                 }
   10241             :         }
   10242             : 
   10243          28 :         SPDK_NOTICELOG("blob 0x%" PRIx64 ": hotplugged back_bs_dev\n", blob->id);
   10244          28 :         blob->back_bs_dev = ctx->back_bs_dev;
   10245          28 :         ctx->bserrno = 0;
   10246             : 
   10247          28 :         blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10248             : }
   10249             : 
   10250             : static void
   10251          28 : blob_set_back_bs_dev_frozen(void *_ctx, int bserrno)
   10252             : {
   10253          28 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10254          28 :         struct spdk_blob        *blob = ctx->blob;
   10255             : 
   10256          28 :         if (bserrno != 0) {
   10257           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to freeze with error %d\n", blob->id,
   10258             :                             bserrno);
   10259           0 :                 ctx->cb_fn(ctx->cb_arg, bserrno);
   10260           0 :                 free(ctx);
   10261           0 :                 return;
   10262             :         }
   10263             : 
   10264             :         /*
   10265             :          * This does not prevent future reads from the esnap device because any future IO will
   10266             :          * lazily create a new esnap IO channel.
   10267             :          */
   10268          28 :         blob_esnap_destroy_bs_dev_channels(blob, true, blob_frozen_set_back_bs_dev, ctx);
   10269             : }
   10270             : 
   10271             : void
   10272           8 : spdk_blob_set_esnap_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
   10273             :                            spdk_blob_op_complete cb_fn, void *cb_arg)
   10274             : {
   10275           8 :         if (!blob_is_esnap_clone(blob)) {
   10276           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
   10277           0 :                 cb_fn(cb_arg, -EINVAL);
   10278           0 :                 return;
   10279             :         }
   10280             : 
   10281           8 :         blob_set_back_bs_dev(blob, back_bs_dev, NULL, NULL, cb_fn, cb_arg);
   10282             : }
   10283             : 
   10284             : struct spdk_bs_dev *
   10285           4 : spdk_blob_get_esnap_bs_dev(const struct spdk_blob *blob)
   10286             : {
   10287           4 :         if (!blob_is_esnap_clone(blob)) {
   10288           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
   10289           0 :                 return NULL;
   10290             :         }
   10291             : 
   10292           4 :         return blob->back_bs_dev;
   10293             : }
   10294             : 
   10295             : bool
   10296          28 : spdk_blob_is_degraded(const struct spdk_blob *blob)
   10297             : {
   10298          28 :         if (blob->bs->dev->is_degraded != NULL && blob->bs->dev->is_degraded(blob->bs->dev)) {
   10299           4 :                 return true;
   10300             :         }
   10301          24 :         if (blob->back_bs_dev == NULL || blob->back_bs_dev->is_degraded == NULL) {
   10302          12 :                 return false;
   10303             :         }
   10304             : 
   10305          12 :         return blob->back_bs_dev->is_degraded(blob->back_bs_dev);
   10306             : }
   10307             : 
   10308           3 : SPDK_LOG_REGISTER_COMPONENT(blob)
   10309           3 : SPDK_LOG_REGISTER_COMPONENT(blob_esnap)
   10310             : 
   10311             : static void
   10312           0 : blob_trace(void)
   10313             : {
   10314           0 :         struct spdk_trace_tpoint_opts opts[] = {
   10315             :                 {
   10316             :                         "BLOB_REQ_SET_START", TRACE_BLOB_REQ_SET_START,
   10317             :                         OWNER_TYPE_NONE, OBJECT_BLOB_CB_ARG, 1,
   10318             :                         {
   10319             :                                 { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }
   10320             :                         }
   10321             :                 },
   10322             :                 {
   10323             :                         "BLOB_REQ_SET_COMPLETE", TRACE_BLOB_REQ_SET_COMPLETE,
   10324             :                         OWNER_TYPE_NONE, OBJECT_BLOB_CB_ARG, 0,
   10325             :                         {
   10326             :                                 { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }
   10327             :                         }
   10328             :                 },
   10329             :         };
   10330             : 
   10331           0 :         spdk_trace_register_object(OBJECT_BLOB_CB_ARG, 'a');
   10332           0 :         spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
   10333           0 :         spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_START, OBJECT_BLOB_CB_ARG, 1);
   10334           0 :         spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_DONE, OBJECT_BLOB_CB_ARG, 0);
   10335           0 : }
   10336           3 : SPDK_TRACE_REGISTER_FN(blob_trace, "blob", TRACE_GROUP_BLOB)

Generated by: LCOV version 1.15