LCOV - code coverage report
Current view: top level - lib/blob - blobstore.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 4116 5144 80.0 %
Date: 2024-07-15 19:28:51 Functions: 339 360 94.2 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2017 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  *   Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  */
       6             : 
       7             : #include "spdk/stdinc.h"
       8             : 
       9             : #include "spdk/blob.h"
      10             : #include "spdk/crc32.h"
      11             : #include "spdk/env.h"
      12             : #include "spdk/queue.h"
      13             : #include "spdk/thread.h"
      14             : #include "spdk/bit_array.h"
      15             : #include "spdk/bit_pool.h"
      16             : #include "spdk/likely.h"
      17             : #include "spdk/util.h"
      18             : #include "spdk/string.h"
      19             : 
      20             : #include "spdk_internal/assert.h"
      21             : #include "spdk/log.h"
      22             : 
      23             : #include "blobstore.h"
      24             : 
      25             : #define BLOB_CRC32C_INITIAL    0xffffffffUL
      26             : 
      27             : static int bs_register_md_thread(struct spdk_blob_store *bs);
      28             : static int bs_unregister_md_thread(struct spdk_blob_store *bs);
      29             : static void blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
      30             : static void blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
      31             :                 uint64_t cluster, uint32_t extent, struct spdk_blob_md_page *page,
      32             :                 spdk_blob_op_complete cb_fn, void *cb_arg);
      33             : static void blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
      34             :                 uint32_t extent_page, struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
      35             : 
      36             : static int blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
      37             :                           uint16_t value_len, bool internal);
      38             : static int blob_get_xattr_value(struct spdk_blob *blob, const char *name,
      39             :                                 const void **value, size_t *value_len, bool internal);
      40             : static int blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal);
      41             : 
      42             : static void blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
      43             :                                    struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
      44             : static void blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg);
      45             : 
      46             : static void bs_shallow_copy_cluster_find_next(void *cb_arg);
      47             : 
      48             : /*
      49             :  * External snapshots require a channel per thread per esnap bdev.  The tree
      50             :  * is populated lazily as blob IOs are handled by the back_bs_dev. When this
      51             :  * channel is destroyed, all the channels in the tree are destroyed.
      52             :  */
      53             : 
      54             : struct blob_esnap_channel {
      55             :         RB_ENTRY(blob_esnap_channel)    node;
      56             :         spdk_blob_id                    blob_id;
      57             :         struct spdk_io_channel          *channel;
      58             : };
      59             : 
      60             : static int blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2);
      61             : static void blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
      62             :                 spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
      63             : static void blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch);
      64             : static void blob_set_back_bs_dev_frozen(void *_ctx, int bserrno);
      65       10245 : RB_GENERATE_STATIC(blob_esnap_channel_tree, blob_esnap_channel, node, blob_esnap_channel_compare)
      66             : 
      67             : static inline bool
      68       49582 : blob_is_esnap_clone(const struct spdk_blob *blob)
      69             : {
      70       49582 :         assert(blob != NULL);
      71       49582 :         return !!(blob->invalid_flags & SPDK_BLOB_EXTERNAL_SNAPSHOT);
      72             : }
      73             : 
      74             : static int
      75        2289 : blob_id_cmp(struct spdk_blob *blob1, struct spdk_blob *blob2)
      76             : {
      77        2289 :         assert(blob1 != NULL && blob2 != NULL);
      78        2289 :         return (blob1->id < blob2->id ? -1 : blob1->id > blob2->id);
      79             : }
      80             : 
      81       14741 : RB_GENERATE_STATIC(spdk_blob_tree, spdk_blob, link, blob_id_cmp);
      82             : 
      83             : static void
      84       37031 : blob_verify_md_op(struct spdk_blob *blob)
      85             : {
      86       37031 :         assert(blob != NULL);
      87       37031 :         assert(spdk_get_thread() == blob->bs->md_thread);
      88       37031 :         assert(blob->state != SPDK_BLOB_STATE_LOADING);
      89       37031 : }
      90             : 
      91             : static struct spdk_blob_list *
      92        3828 : bs_get_snapshot_entry(struct spdk_blob_store *bs, spdk_blob_id blobid)
      93             : {
      94        3828 :         struct spdk_blob_list *snapshot_entry = NULL;
      95             : 
      96        4808 :         TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
      97        1756 :                 if (snapshot_entry->id == blobid) {
      98         776 :                         break;
      99             :                 }
     100             :         }
     101             : 
     102        3828 :         return snapshot_entry;
     103             : }
     104             : 
     105             : static void
     106        2904 : bs_claim_md_page(struct spdk_blob_store *bs, uint32_t page)
     107             : {
     108        2904 :         assert(spdk_spin_held(&bs->used_lock));
     109        2904 :         assert(page < spdk_bit_array_capacity(bs->used_md_pages));
     110        2904 :         assert(spdk_bit_array_get(bs->used_md_pages, page) == false);
     111             : 
     112        2904 :         spdk_bit_array_set(bs->used_md_pages, page);
     113        2904 : }
     114             : 
     115             : static void
     116        2200 : bs_release_md_page(struct spdk_blob_store *bs, uint32_t page)
     117             : {
     118        2200 :         assert(spdk_spin_held(&bs->used_lock));
     119        2200 :         assert(page < spdk_bit_array_capacity(bs->used_md_pages));
     120        2200 :         assert(spdk_bit_array_get(bs->used_md_pages, page) == true);
     121             : 
     122        2200 :         spdk_bit_array_clear(bs->used_md_pages, page);
     123        2200 : }
     124             : 
     125             : static uint32_t
     126        8220 : bs_claim_cluster(struct spdk_blob_store *bs)
     127             : {
     128             :         uint32_t cluster_num;
     129             : 
     130        8220 :         assert(spdk_spin_held(&bs->used_lock));
     131             : 
     132        8220 :         cluster_num = spdk_bit_pool_allocate_bit(bs->used_clusters);
     133        8220 :         if (cluster_num == UINT32_MAX) {
     134           0 :                 return UINT32_MAX;
     135             :         }
     136             : 
     137        8220 :         SPDK_DEBUGLOG(blob, "Claiming cluster %u\n", cluster_num);
     138        8220 :         bs->num_free_clusters--;
     139             : 
     140        8220 :         return cluster_num;
     141             : }
     142             : 
     143             : static void
     144        2399 : bs_release_cluster(struct spdk_blob_store *bs, uint32_t cluster_num)
     145             : {
     146        2399 :         assert(spdk_spin_held(&bs->used_lock));
     147        2399 :         assert(cluster_num < spdk_bit_pool_capacity(bs->used_clusters));
     148        2399 :         assert(spdk_bit_pool_is_allocated(bs->used_clusters, cluster_num) == true);
     149        2399 :         assert(bs->num_free_clusters < bs->total_clusters);
     150             : 
     151        2399 :         SPDK_DEBUGLOG(blob, "Releasing cluster %u\n", cluster_num);
     152             : 
     153        2399 :         spdk_bit_pool_free_bit(bs->used_clusters, cluster_num);
     154        2399 :         bs->num_free_clusters++;
     155        2399 : }
     156             : 
     157             : static int
     158        8220 : blob_insert_cluster(struct spdk_blob *blob, uint32_t cluster_num, uint64_t cluster)
     159             : {
     160        8220 :         uint64_t *cluster_lba = &blob->active.clusters[cluster_num];
     161             : 
     162        8220 :         blob_verify_md_op(blob);
     163             : 
     164        8220 :         if (*cluster_lba != 0) {
     165           4 :                 return -EEXIST;
     166             :         }
     167             : 
     168        8216 :         *cluster_lba = bs_cluster_to_lba(blob->bs, cluster);
     169        8216 :         blob->active.num_allocated_clusters++;
     170             : 
     171        8216 :         return 0;
     172             : }
     173             : 
     174             : static int
     175        8220 : bs_allocate_cluster(struct spdk_blob *blob, uint32_t cluster_num,
     176             :                     uint64_t *cluster, uint32_t *lowest_free_md_page, bool update_map)
     177             : {
     178        8220 :         uint32_t *extent_page = 0;
     179             : 
     180        8220 :         assert(spdk_spin_held(&blob->bs->used_lock));
     181             : 
     182        8220 :         *cluster = bs_claim_cluster(blob->bs);
     183        8220 :         if (*cluster == UINT32_MAX) {
     184             :                 /* No more free clusters. Cannot satisfy the request */
     185           0 :                 return -ENOSPC;
     186             :         }
     187             : 
     188        8220 :         if (blob->use_extent_table) {
     189        4168 :                 extent_page = bs_cluster_to_extent_page(blob, cluster_num);
     190        4168 :                 if (*extent_page == 0) {
     191             :                         /* Extent page shall never occupy md_page so start the search from 1 */
     192         728 :                         if (*lowest_free_md_page == 0) {
     193         726 :                                 *lowest_free_md_page = 1;
     194             :                         }
     195             :                         /* No extent_page is allocated for the cluster */
     196         728 :                         *lowest_free_md_page = spdk_bit_array_find_first_clear(blob->bs->used_md_pages,
     197             :                                                *lowest_free_md_page);
     198         728 :                         if (*lowest_free_md_page == UINT32_MAX) {
     199             :                                 /* No more free md pages. Cannot satisfy the request */
     200           0 :                                 bs_release_cluster(blob->bs, *cluster);
     201           0 :                                 return -ENOSPC;
     202             :                         }
     203         728 :                         bs_claim_md_page(blob->bs, *lowest_free_md_page);
     204             :                 }
     205             :         }
     206             : 
     207        8220 :         SPDK_DEBUGLOG(blob, "Claiming cluster %" PRIu64 " for blob 0x%" PRIx64 "\n", *cluster,
     208             :                       blob->id);
     209             : 
     210        8220 :         if (update_map) {
     211        7404 :                 blob_insert_cluster(blob, cluster_num, *cluster);
     212        7404 :                 if (blob->use_extent_table && *extent_page == 0) {
     213         644 :                         *extent_page = *lowest_free_md_page;
     214             :                 }
     215             :         }
     216             : 
     217        8220 :         return 0;
     218             : }
     219             : 
     220             : static void
     221        5582 : blob_xattrs_init(struct spdk_blob_xattr_opts *xattrs)
     222             : {
     223        5582 :         xattrs->count = 0;
     224        5582 :         xattrs->names = NULL;
     225        5582 :         xattrs->ctx = NULL;
     226        5582 :         xattrs->get_value = NULL;
     227        5582 : }
     228             : 
     229             : void
     230        3688 : spdk_blob_opts_init(struct spdk_blob_opts *opts, size_t opts_size)
     231             : {
     232        3688 :         if (!opts) {
     233           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
     234           0 :                 return;
     235             :         }
     236             : 
     237        3688 :         if (!opts_size) {
     238           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
     239           0 :                 return;
     240             :         }
     241             : 
     242        3688 :         memset(opts, 0, opts_size);
     243        3688 :         opts->opts_size = opts_size;
     244             : 
     245             : #define FIELD_OK(field) \
     246             :         offsetof(struct spdk_blob_opts, field) + sizeof(opts->field) <= opts_size
     247             : 
     248             : #define SET_FIELD(field, value) \
     249             :         if (FIELD_OK(field)) { \
     250             :                 opts->field = value; \
     251             :         } \
     252             : 
     253        3688 :         SET_FIELD(num_clusters, 0);
     254        3688 :         SET_FIELD(thin_provision, false);
     255        3688 :         SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
     256             : 
     257        3688 :         if (FIELD_OK(xattrs)) {
     258        3688 :                 blob_xattrs_init(&opts->xattrs);
     259             :         }
     260             : 
     261        3688 :         SET_FIELD(use_extent_table, true);
     262             : 
     263             : #undef FIELD_OK
     264             : #undef SET_FIELD
     265             : }
     266             : 
     267             : void
     268        3478 : spdk_blob_open_opts_init(struct spdk_blob_open_opts *opts, size_t opts_size)
     269             : {
     270        3478 :         if (!opts) {
     271           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
     272           0 :                 return;
     273             :         }
     274             : 
     275        3478 :         if (!opts_size) {
     276           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
     277           0 :                 return;
     278             :         }
     279             : 
     280        3478 :         memset(opts, 0, opts_size);
     281        3478 :         opts->opts_size = opts_size;
     282             : 
     283             : #define FIELD_OK(field) \
     284             :         offsetof(struct spdk_blob_open_opts, field) + sizeof(opts->field) <= opts_size
     285             : 
     286             : #define SET_FIELD(field, value) \
     287             :         if (FIELD_OK(field)) { \
     288             :                 opts->field = value; \
     289             :         } \
     290             : 
     291        3478 :         SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
     292             : 
     293             : #undef FIELD_OK
     294             : #undef SET_FILED
     295             : }
     296             : 
     297             : static struct spdk_blob *
     298        5368 : blob_alloc(struct spdk_blob_store *bs, spdk_blob_id id)
     299             : {
     300             :         struct spdk_blob *blob;
     301             : 
     302        5368 :         blob = calloc(1, sizeof(*blob));
     303        5368 :         if (!blob) {
     304           0 :                 return NULL;
     305             :         }
     306             : 
     307        5368 :         blob->id = id;
     308        5368 :         blob->bs = bs;
     309             : 
     310        5368 :         blob->parent_id = SPDK_BLOBID_INVALID;
     311             : 
     312        5368 :         blob->state = SPDK_BLOB_STATE_DIRTY;
     313        5368 :         blob->extent_rle_found = false;
     314        5368 :         blob->extent_table_found = false;
     315        5368 :         blob->active.num_pages = 1;
     316        5368 :         blob->active.pages = calloc(1, sizeof(*blob->active.pages));
     317        5368 :         if (!blob->active.pages) {
     318           0 :                 free(blob);
     319           0 :                 return NULL;
     320             :         }
     321             : 
     322        5368 :         blob->active.pages[0] = bs_blobid_to_page(id);
     323             : 
     324        5368 :         TAILQ_INIT(&blob->xattrs);
     325        5368 :         TAILQ_INIT(&blob->xattrs_internal);
     326        5368 :         TAILQ_INIT(&blob->pending_persists);
     327        5368 :         TAILQ_INIT(&blob->persists_to_complete);
     328             : 
     329        5368 :         return blob;
     330             : }
     331             : 
     332             : static void
     333       10736 : xattrs_free(struct spdk_xattr_tailq *xattrs)
     334             : {
     335             :         struct spdk_xattr       *xattr, *xattr_tmp;
     336             : 
     337       12502 :         TAILQ_FOREACH_SAFE(xattr, xattrs, link, xattr_tmp) {
     338        1766 :                 TAILQ_REMOVE(xattrs, xattr, link);
     339        1766 :                 free(xattr->name);
     340        1766 :                 free(xattr->value);
     341        1766 :                 free(xattr);
     342             :         }
     343       10736 : }
     344             : 
     345             : static void
     346        1116 : blob_back_bs_dev_unref(struct spdk_blob *blob)
     347             : {
     348        1116 :         struct spdk_blob **le_prev = blob->back_bs_dev_link.le_prev;
     349        1116 :         struct spdk_blob *le_next = blob->back_bs_dev_link.le_next;
     350             : 
     351        1116 :         if (!le_next && !le_prev) {
     352             :                 /* If this is the last reference to the back_bs_dev, destroy it. */
     353        1112 :                 blob->back_bs_dev->destroy(blob->back_bs_dev);
     354             :         } else {
     355             :                 /* Remove the reference to back_bs_dev. */
     356           4 :                 if (le_prev) {
     357           0 :                         *le_prev = le_next;
     358             :                 }
     359             : 
     360           4 :                 if (le_next) {
     361           4 :                         le_next->back_bs_dev_link.le_prev = le_prev;
     362             :                 }
     363             :         }
     364             : 
     365        1116 :         blob->back_bs_dev = NULL;
     366        1116 : }
     367             : 
     368             : static void
     369        5368 : blob_free(struct spdk_blob *blob)
     370             : {
     371        5368 :         assert(blob != NULL);
     372        5368 :         assert(TAILQ_EMPTY(&blob->pending_persists));
     373        5368 :         assert(TAILQ_EMPTY(&blob->persists_to_complete));
     374             : 
     375        5368 :         free(blob->active.extent_pages);
     376        5368 :         free(blob->clean.extent_pages);
     377        5368 :         free(blob->active.clusters);
     378        5368 :         free(blob->clean.clusters);
     379        5368 :         free(blob->active.pages);
     380        5368 :         free(blob->clean.pages);
     381             : 
     382        5368 :         xattrs_free(&blob->xattrs);
     383        5368 :         xattrs_free(&blob->xattrs_internal);
     384             : 
     385        5368 :         if (blob->back_bs_dev) {
     386        1088 :                 blob_back_bs_dev_unref(blob);
     387             :         }
     388             : 
     389        5368 :         free(blob);
     390        5368 : }
     391             : 
     392             : static void
     393         328 : blob_back_bs_destroy_esnap_done(void *ctx, struct spdk_blob *blob, int bserrno)
     394             : {
     395         328 :         struct spdk_bs_dev      *bs_dev = ctx;
     396             : 
     397         328 :         if (bserrno != 0) {
     398             :                 /*
     399             :                  * This is probably due to a memory allocation failure when creating the
     400             :                  * blob_esnap_destroy_ctx before iterating threads.
     401             :                  */
     402           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": Unable to destroy bs dev channels: error %d\n",
     403             :                             blob->id, bserrno);
     404           0 :                 assert(false);
     405             :         }
     406             : 
     407         328 :         if (bs_dev == NULL) {
     408             :                 /*
     409             :                  * This check exists to make scanbuild happy.
     410             :                  *
     411             :                  * blob->back_bs_dev for an esnap is NULL during the first iteration of blobs while
     412             :                  * the blobstore is being loaded. It could also be NULL if there was an error
     413             :                  * opening the esnap device. In each of these cases, no channels could have been
     414             :                  * created because back_bs_dev->create_channel() would have led to a NULL pointer
     415             :                  * deref.
     416             :                  */
     417           0 :                 assert(false);
     418             :                 return;
     419             :         }
     420             : 
     421         328 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": calling destroy on back_bs_dev\n", blob->id);
     422         328 :         bs_dev->destroy(bs_dev);
     423             : }
     424             : 
     425             : static void
     426         328 : blob_back_bs_destroy(struct spdk_blob *blob)
     427             : {
     428         328 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": preparing to destroy back_bs_dev\n",
     429             :                       blob->id);
     430             : 
     431         328 :         blob_esnap_destroy_bs_dev_channels(blob, false, blob_back_bs_destroy_esnap_done,
     432         328 :                                            blob->back_bs_dev);
     433         328 :         blob->back_bs_dev = NULL;
     434         328 : }
     435             : 
     436             : struct blob_parent {
     437             :         union {
     438             :                 struct {
     439             :                         spdk_blob_id id;
     440             :                         struct spdk_blob *blob;
     441             :                 } snapshot;
     442             : 
     443             :                 struct {
     444             :                         void *id;
     445             :                         uint32_t id_len;
     446             :                         struct spdk_bs_dev *back_bs_dev;
     447             :                 } esnap;
     448             :         } u;
     449             : };
     450             : 
     451             : typedef int (*set_parent_refs_cb)(struct spdk_blob *blob, struct blob_parent *parent);
     452             : 
     453             : struct set_bs_dev_ctx {
     454             :         struct spdk_blob        *blob;
     455             :         struct spdk_bs_dev      *back_bs_dev;
     456             : 
     457             :         /*
     458             :          * This callback is used during a set parent operation to change the references
     459             :          * to the parent of the blob.
     460             :          */
     461             :         set_parent_refs_cb      parent_refs_cb_fn;
     462             :         struct blob_parent      *parent_refs_cb_arg;
     463             : 
     464             :         spdk_blob_op_complete   cb_fn;
     465             :         void                    *cb_arg;
     466             :         int                     bserrno;
     467             : };
     468             : 
     469             : static void
     470          28 : blob_set_back_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
     471             :                      set_parent_refs_cb parent_refs_cb_fn, struct blob_parent *parent_refs_cb_arg,
     472             :                      spdk_blob_op_complete cb_fn, void *cb_arg)
     473             : {
     474             :         struct set_bs_dev_ctx   *ctx;
     475             : 
     476          28 :         ctx = calloc(1, sizeof(*ctx));
     477          28 :         if (ctx == NULL) {
     478           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": out of memory while setting back_bs_dev\n",
     479             :                             blob->id);
     480           0 :                 cb_fn(cb_arg, -ENOMEM);
     481           0 :                 return;
     482             :         }
     483             : 
     484          28 :         ctx->parent_refs_cb_fn = parent_refs_cb_fn;
     485          28 :         ctx->parent_refs_cb_arg = parent_refs_cb_arg;
     486          28 :         ctx->cb_fn = cb_fn;
     487          28 :         ctx->cb_arg = cb_arg;
     488          28 :         ctx->back_bs_dev = back_bs_dev;
     489          28 :         ctx->blob = blob;
     490             : 
     491          28 :         blob_freeze_io(blob, blob_set_back_bs_dev_frozen, ctx);
     492             : }
     493             : 
     494             : struct freeze_io_ctx {
     495             :         struct spdk_bs_cpl cpl;
     496             :         struct spdk_blob *blob;
     497             : };
     498             : 
     499             : static void
     500         530 : blob_io_sync(struct spdk_io_channel_iter *i)
     501             : {
     502         530 :         spdk_for_each_channel_continue(i, 0);
     503         530 : }
     504             : 
     505             : static void
     506         518 : blob_execute_queued_io(struct spdk_io_channel_iter *i)
     507             : {
     508         518 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
     509         518 :         struct spdk_bs_channel *ch = spdk_io_channel_get_ctx(_ch);
     510         518 :         struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
     511             :         struct spdk_bs_request_set      *set;
     512             :         struct spdk_bs_user_op_args     *args;
     513             :         spdk_bs_user_op_t *op, *tmp;
     514             : 
     515         522 :         TAILQ_FOREACH_SAFE(op, &ch->queued_io, link, tmp) {
     516           4 :                 set = (struct spdk_bs_request_set *)op;
     517           4 :                 args = &set->u.user_op;
     518             : 
     519           4 :                 if (args->blob == ctx->blob) {
     520           4 :                         TAILQ_REMOVE(&ch->queued_io, op, link);
     521           4 :                         bs_user_op_execute(op);
     522             :                 }
     523             :         }
     524             : 
     525         518 :         spdk_for_each_channel_continue(i, 0);
     526         518 : }
     527             : 
     528             : static void
     529        1016 : blob_io_cpl(struct spdk_io_channel_iter *i, int status)
     530             : {
     531        1016 :         struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
     532             : 
     533        1016 :         ctx->cpl.u.blob_basic.cb_fn(ctx->cpl.u.blob_basic.cb_arg, 0);
     534             : 
     535        1016 :         free(ctx);
     536        1016 : }
     537             : 
     538             : static void
     539         514 : blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
     540             : {
     541             :         struct freeze_io_ctx *ctx;
     542             : 
     543         514 :         blob_verify_md_op(blob);
     544             : 
     545         514 :         ctx = calloc(1, sizeof(*ctx));
     546         514 :         if (!ctx) {
     547           0 :                 cb_fn(cb_arg, -ENOMEM);
     548           0 :                 return;
     549             :         }
     550             : 
     551         514 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
     552         514 :         ctx->cpl.u.blob_basic.cb_fn = cb_fn;
     553         514 :         ctx->cpl.u.blob_basic.cb_arg = cb_arg;
     554         514 :         ctx->blob = blob;
     555             : 
     556             :         /* Freeze I/O on blob */
     557         514 :         blob->frozen_refcnt++;
     558             : 
     559         514 :         spdk_for_each_channel(blob->bs, blob_io_sync, ctx, blob_io_cpl);
     560             : }
     561             : 
     562             : static void
     563         502 : blob_unfreeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
     564             : {
     565             :         struct freeze_io_ctx *ctx;
     566             : 
     567         502 :         blob_verify_md_op(blob);
     568             : 
     569         502 :         ctx = calloc(1, sizeof(*ctx));
     570         502 :         if (!ctx) {
     571           0 :                 cb_fn(cb_arg, -ENOMEM);
     572           0 :                 return;
     573             :         }
     574             : 
     575         502 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
     576         502 :         ctx->cpl.u.blob_basic.cb_fn = cb_fn;
     577         502 :         ctx->cpl.u.blob_basic.cb_arg = cb_arg;
     578         502 :         ctx->blob = blob;
     579             : 
     580         502 :         assert(blob->frozen_refcnt > 0);
     581             : 
     582         502 :         blob->frozen_refcnt--;
     583             : 
     584         502 :         spdk_for_each_channel(blob->bs, blob_execute_queued_io, ctx, blob_io_cpl);
     585             : }
     586             : 
     587             : static int
     588        8474 : blob_mark_clean(struct spdk_blob *blob)
     589             : {
     590        8474 :         uint32_t *extent_pages = NULL;
     591        8474 :         uint64_t *clusters = NULL;
     592        8474 :         uint32_t *pages = NULL;
     593             : 
     594        8474 :         assert(blob != NULL);
     595             : 
     596        8474 :         if (blob->active.num_extent_pages) {
     597        2859 :                 assert(blob->active.extent_pages);
     598        2859 :                 extent_pages = calloc(blob->active.num_extent_pages, sizeof(*blob->active.extent_pages));
     599        2859 :                 if (!extent_pages) {
     600           0 :                         return -ENOMEM;
     601             :                 }
     602        2859 :                 memcpy(extent_pages, blob->active.extent_pages,
     603        2859 :                        blob->active.num_extent_pages * sizeof(*extent_pages));
     604             :         }
     605             : 
     606        8474 :         if (blob->active.num_clusters) {
     607        5946 :                 assert(blob->active.clusters);
     608        5946 :                 clusters = calloc(blob->active.num_clusters, sizeof(*blob->active.clusters));
     609        5946 :                 if (!clusters) {
     610           0 :                         free(extent_pages);
     611           0 :                         return -ENOMEM;
     612             :                 }
     613        5946 :                 memcpy(clusters, blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
     614             :         }
     615             : 
     616        8474 :         if (blob->active.num_pages) {
     617        6986 :                 assert(blob->active.pages);
     618        6986 :                 pages = calloc(blob->active.num_pages, sizeof(*blob->active.pages));
     619        6986 :                 if (!pages) {
     620           0 :                         free(extent_pages);
     621           0 :                         free(clusters);
     622           0 :                         return -ENOMEM;
     623             :                 }
     624        6986 :                 memcpy(pages, blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
     625             :         }
     626             : 
     627        8474 :         free(blob->clean.extent_pages);
     628        8474 :         free(blob->clean.clusters);
     629        8474 :         free(blob->clean.pages);
     630             : 
     631        8474 :         blob->clean.num_extent_pages = blob->active.num_extent_pages;
     632        8474 :         blob->clean.extent_pages = blob->active.extent_pages;
     633        8474 :         blob->clean.num_clusters = blob->active.num_clusters;
     634        8474 :         blob->clean.clusters = blob->active.clusters;
     635        8474 :         blob->clean.num_allocated_clusters = blob->active.num_allocated_clusters;
     636        8474 :         blob->clean.num_pages = blob->active.num_pages;
     637        8474 :         blob->clean.pages = blob->active.pages;
     638             : 
     639        8474 :         blob->active.extent_pages = extent_pages;
     640        8474 :         blob->active.clusters = clusters;
     641        8474 :         blob->active.pages = pages;
     642             : 
     643             :         /* If the metadata was dirtied again while the metadata was being written to disk,
     644             :          *  we do not want to revert the DIRTY state back to CLEAN here.
     645             :          */
     646        8474 :         if (blob->state == SPDK_BLOB_STATE_LOADING) {
     647        3410 :                 blob->state = SPDK_BLOB_STATE_CLEAN;
     648             :         }
     649             : 
     650        8474 :         return 0;
     651             : }
     652             : 
     653             : static int
     654        1284 : blob_deserialize_xattr(struct spdk_blob *blob,
     655             :                        struct spdk_blob_md_descriptor_xattr *desc_xattr, bool internal)
     656             : {
     657             :         struct spdk_xattr                       *xattr;
     658             : 
     659        1284 :         if (desc_xattr->length != sizeof(desc_xattr->name_length) +
     660             :             sizeof(desc_xattr->value_length) +
     661        1284 :             desc_xattr->name_length + desc_xattr->value_length) {
     662           0 :                 return -EINVAL;
     663             :         }
     664             : 
     665        1284 :         xattr = calloc(1, sizeof(*xattr));
     666        1284 :         if (xattr == NULL) {
     667           0 :                 return -ENOMEM;
     668             :         }
     669             : 
     670        1284 :         xattr->name = malloc(desc_xattr->name_length + 1);
     671        1284 :         if (xattr->name == NULL) {
     672           0 :                 free(xattr);
     673           0 :                 return -ENOMEM;
     674             :         }
     675             : 
     676        1284 :         xattr->value = malloc(desc_xattr->value_length);
     677        1284 :         if (xattr->value == NULL) {
     678           0 :                 free(xattr->name);
     679           0 :                 free(xattr);
     680           0 :                 return -ENOMEM;
     681             :         }
     682             : 
     683        1284 :         memcpy(xattr->name, desc_xattr->name, desc_xattr->name_length);
     684        1284 :         xattr->name[desc_xattr->name_length] = '\0';
     685        1284 :         xattr->value_len = desc_xattr->value_length;
     686        1284 :         memcpy(xattr->value,
     687        1284 :                (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
     688        1284 :                desc_xattr->value_length);
     689             : 
     690        1284 :         TAILQ_INSERT_TAIL(internal ? &blob->xattrs_internal : &blob->xattrs, xattr, link);
     691             : 
     692        1284 :         return 0;
     693             : }
     694             : 
     695             : 
     696             : static int
     697        4588 : blob_parse_page(const struct spdk_blob_md_page *page, struct spdk_blob *blob)
     698             : {
     699             :         struct spdk_blob_md_descriptor *desc;
     700        4588 :         size_t  cur_desc = 0;
     701             :         void *tmp;
     702             : 
     703        4588 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
     704       13476 :         while (cur_desc < sizeof(page->descriptors)) {
     705       13476 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
     706        4540 :                         if (desc->length == 0) {
     707             :                                 /* If padding and length are 0, this terminates the page */
     708        4540 :                                 break;
     709             :                         }
     710        8936 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
     711             :                         struct spdk_blob_md_descriptor_flags    *desc_flags;
     712             : 
     713        3442 :                         desc_flags = (struct spdk_blob_md_descriptor_flags *)desc;
     714             : 
     715        3442 :                         if (desc_flags->length != sizeof(*desc_flags) - sizeof(*desc)) {
     716           0 :                                 return -EINVAL;
     717             :                         }
     718             : 
     719        3442 :                         if ((desc_flags->invalid_flags | SPDK_BLOB_INVALID_FLAGS_MASK) !=
     720             :                             SPDK_BLOB_INVALID_FLAGS_MASK) {
     721           8 :                                 return -EINVAL;
     722             :                         }
     723             : 
     724        3434 :                         if ((desc_flags->data_ro_flags | SPDK_BLOB_DATA_RO_FLAGS_MASK) !=
     725             :                             SPDK_BLOB_DATA_RO_FLAGS_MASK) {
     726          12 :                                 blob->data_ro = true;
     727          12 :                                 blob->md_ro = true;
     728             :                         }
     729             : 
     730        3434 :                         if ((desc_flags->md_ro_flags | SPDK_BLOB_MD_RO_FLAGS_MASK) !=
     731             :                             SPDK_BLOB_MD_RO_FLAGS_MASK) {
     732          12 :                                 blob->md_ro = true;
     733             :                         }
     734             : 
     735        3434 :                         if ((desc_flags->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
     736         566 :                                 blob->data_ro = true;
     737         566 :                                 blob->md_ro = true;
     738             :                         }
     739             : 
     740        3434 :                         blob->invalid_flags = desc_flags->invalid_flags;
     741        3434 :                         blob->data_ro_flags = desc_flags->data_ro_flags;
     742        3434 :                         blob->md_ro_flags = desc_flags->md_ro_flags;
     743             : 
     744        5494 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
     745             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
     746             :                         unsigned int                            i, j;
     747        1396 :                         unsigned int                            cluster_count = blob->active.num_clusters;
     748             : 
     749        1396 :                         if (blob->extent_table_found) {
     750             :                                 /* Extent Table already present in the md,
     751             :                                  * both descriptors should never be at the same time. */
     752           0 :                                 return -EINVAL;
     753             :                         }
     754        1396 :                         blob->extent_rle_found = true;
     755             : 
     756        1396 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
     757             : 
     758        1396 :                         if (desc_extent_rle->length == 0 ||
     759        1396 :                             (desc_extent_rle->length % sizeof(desc_extent_rle->extents[0]) != 0)) {
     760           0 :                                 return -EINVAL;
     761             :                         }
     762             : 
     763        2970 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
     764       21282 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
     765       19708 :                                         if (desc_extent_rle->extents[i].cluster_idx != 0) {
     766        6692 :                                                 if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters,
     767        6692 :                                                                                 desc_extent_rle->extents[i].cluster_idx + j)) {
     768           0 :                                                         return -EINVAL;
     769             :                                                 }
     770             :                                         }
     771       19708 :                                         cluster_count++;
     772             :                                 }
     773             :                         }
     774             : 
     775        1396 :                         if (cluster_count == 0) {
     776           0 :                                 return -EINVAL;
     777             :                         }
     778        1396 :                         tmp = realloc(blob->active.clusters, cluster_count * sizeof(*blob->active.clusters));
     779        1396 :                         if (tmp == NULL) {
     780           0 :                                 return -ENOMEM;
     781             :                         }
     782        1396 :                         blob->active.clusters = tmp;
     783        1396 :                         blob->active.cluster_array_size = cluster_count;
     784             : 
     785        2970 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
     786       21282 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
     787       19708 :                                         if (desc_extent_rle->extents[i].cluster_idx != 0) {
     788       13384 :                                                 blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
     789        6692 :                                                                 desc_extent_rle->extents[i].cluster_idx + j);
     790        6692 :                                                 blob->active.num_allocated_clusters++;
     791       13016 :                                         } else if (spdk_blob_is_thin_provisioned(blob)) {
     792       13016 :                                                 blob->active.clusters[blob->active.num_clusters++] = 0;
     793             :                                         } else {
     794           0 :                                                 return -EINVAL;
     795             :                                         }
     796             :                                 }
     797             :                         }
     798        4098 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
     799             :                         struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
     800        1768 :                         uint32_t num_extent_pages = blob->active.num_extent_pages;
     801             :                         uint32_t i, j;
     802             :                         size_t extent_pages_length;
     803             : 
     804        1768 :                         desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
     805        1768 :                         extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
     806             : 
     807        1768 :                         if (blob->extent_rle_found) {
     808             :                                 /* This means that Extent RLE is present in MD,
     809             :                                  * both should never be at the same time. */
     810           0 :                                 return -EINVAL;
     811        1768 :                         } else if (blob->extent_table_found &&
     812           0 :                                    desc_extent_table->num_clusters != blob->remaining_clusters_in_et) {
     813             :                                 /* Number of clusters in this ET does not match number
     814             :                                  * from previously read EXTENT_TABLE. */
     815           0 :                                 return -EINVAL;
     816             :                         }
     817             : 
     818        1768 :                         if (desc_extent_table->length == 0 ||
     819        1768 :                             (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
     820           0 :                                 return -EINVAL;
     821             :                         }
     822             : 
     823        1768 :                         blob->extent_table_found = true;
     824             : 
     825        3246 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
     826        1478 :                                 num_extent_pages += desc_extent_table->extent_page[i].num_pages;
     827             :                         }
     828             : 
     829        1768 :                         if (num_extent_pages > 0) {
     830        1462 :                                 tmp = realloc(blob->active.extent_pages, num_extent_pages * sizeof(uint32_t));
     831        1462 :                                 if (tmp == NULL) {
     832           0 :                                         return -ENOMEM;
     833             :                                 }
     834        1462 :                                 blob->active.extent_pages = tmp;
     835             :                         }
     836        1768 :                         blob->active.extent_pages_array_size = num_extent_pages;
     837             : 
     838        1768 :                         blob->remaining_clusters_in_et = desc_extent_table->num_clusters;
     839             : 
     840             :                         /* Extent table entries contain md page numbers for extent pages.
     841             :                          * Zeroes represent unallocated extent pages, those are run-length-encoded.
     842             :                          */
     843        3246 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
     844        1478 :                                 if (desc_extent_table->extent_page[i].page_idx != 0) {
     845        1052 :                                         assert(desc_extent_table->extent_page[i].num_pages == 1);
     846        1052 :                                         blob->active.extent_pages[blob->active.num_extent_pages++] =
     847        1052 :                                                 desc_extent_table->extent_page[i].page_idx;
     848         426 :                                 } else if (spdk_blob_is_thin_provisioned(blob)) {
     849         852 :                                         for (j = 0; j < desc_extent_table->extent_page[i].num_pages; j++) {
     850         426 :                                                 blob->active.extent_pages[blob->active.num_extent_pages++] = 0;
     851             :                                         }
     852             :                                 } else {
     853           0 :                                         return -EINVAL;
     854             :                                 }
     855             :                         }
     856        2330 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
     857             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
     858             :                         unsigned int                                    i;
     859        1046 :                         unsigned int                                    cluster_count = 0;
     860             :                         size_t                                          cluster_idx_length;
     861             : 
     862        1046 :                         if (blob->extent_rle_found) {
     863             :                                 /* This means that Extent RLE is present in MD,
     864             :                                  * both should never be at the same time. */
     865           0 :                                 return -EINVAL;
     866             :                         }
     867             : 
     868        1046 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
     869        1046 :                         cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
     870             : 
     871        1046 :                         if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
     872        1046 :                             (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
     873           0 :                                 return -EINVAL;
     874             :                         }
     875             : 
     876       16344 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
     877       15298 :                                 if (desc_extent->cluster_idx[i] != 0) {
     878        6962 :                                         if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters, desc_extent->cluster_idx[i])) {
     879           0 :                                                 return -EINVAL;
     880             :                                         }
     881             :                                 }
     882       15298 :                                 cluster_count++;
     883             :                         }
     884             : 
     885        1046 :                         if (cluster_count == 0) {
     886           0 :                                 return -EINVAL;
     887             :                         }
     888             : 
     889             :                         /* When reading extent pages sequentially starting cluster idx should match
     890             :                          * current size of a blob.
     891             :                          * If changed to batch reading, this check shall be removed. */
     892        1046 :                         if (desc_extent->start_cluster_idx != blob->active.num_clusters) {
     893           0 :                                 return -EINVAL;
     894             :                         }
     895             : 
     896        1046 :                         tmp = realloc(blob->active.clusters,
     897        1046 :                                       (cluster_count + blob->active.num_clusters) * sizeof(*blob->active.clusters));
     898        1046 :                         if (tmp == NULL) {
     899           0 :                                 return -ENOMEM;
     900             :                         }
     901        1046 :                         blob->active.clusters = tmp;
     902        1046 :                         blob->active.cluster_array_size = (cluster_count + blob->active.num_clusters);
     903             : 
     904       16344 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
     905       15298 :                                 if (desc_extent->cluster_idx[i] != 0) {
     906        6962 :                                         blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
     907             :                                                         desc_extent->cluster_idx[i]);
     908        6962 :                                         blob->active.num_allocated_clusters++;
     909        8336 :                                 } else if (spdk_blob_is_thin_provisioned(blob)) {
     910        8336 :                                         blob->active.clusters[blob->active.num_clusters++] = 0;
     911             :                                 } else {
     912           0 :                                         return -EINVAL;
     913             :                                 }
     914             :                         }
     915        1046 :                         assert(desc_extent->start_cluster_idx + cluster_count == blob->active.num_clusters);
     916        1046 :                         assert(blob->remaining_clusters_in_et >= cluster_count);
     917        1046 :                         blob->remaining_clusters_in_et -= cluster_count;
     918        1284 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
     919             :                         int rc;
     920             : 
     921         394 :                         rc = blob_deserialize_xattr(blob,
     922             :                                                     (struct spdk_blob_md_descriptor_xattr *) desc, false);
     923         394 :                         if (rc != 0) {
     924           0 :                                 return rc;
     925             :                         }
     926         890 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
     927             :                         int rc;
     928             : 
     929         890 :                         rc = blob_deserialize_xattr(blob,
     930             :                                                     (struct spdk_blob_md_descriptor_xattr *) desc, true);
     931         890 :                         if (rc != 0) {
     932           0 :                                 return rc;
     933             :                         }
     934             :                 } else {
     935             :                         /* Unrecognized descriptor type.  Do not fail - just continue to the
     936             :                          *  next descriptor.  If this descriptor is associated with some feature
     937             :                          *  defined in a newer version of blobstore, that version of blobstore
     938             :                          *  should create and set an associated feature flag to specify if this
     939             :                          *  blob can be loaded or not.
     940             :                          */
     941             :                 }
     942             : 
     943             :                 /* Advance to the next descriptor */
     944        8928 :                 cur_desc += sizeof(*desc) + desc->length;
     945        8928 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
     946          40 :                         break;
     947             :                 }
     948        8888 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
     949             :         }
     950             : 
     951        4580 :         return 0;
     952             : }
     953             : 
     954             : static bool bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page);
     955             : 
     956             : static int
     957        1046 : blob_parse_extent_page(struct spdk_blob_md_page *extent_page, struct spdk_blob *blob)
     958             : {
     959        1046 :         assert(blob != NULL);
     960        1046 :         assert(blob->state == SPDK_BLOB_STATE_LOADING);
     961             : 
     962        1046 :         if (bs_load_cur_extent_page_valid(extent_page) == false) {
     963           0 :                 return -ENOENT;
     964             :         }
     965             : 
     966        1046 :         return blob_parse_page(extent_page, blob);
     967             : }
     968             : 
     969             : static int
     970        3446 : blob_parse(const struct spdk_blob_md_page *pages, uint32_t page_count,
     971             :            struct spdk_blob *blob)
     972             : {
     973             :         const struct spdk_blob_md_page *page;
     974             :         uint32_t i;
     975             :         int rc;
     976             :         void *tmp;
     977             : 
     978        3446 :         assert(page_count > 0);
     979        3446 :         assert(pages[0].sequence_num == 0);
     980        3446 :         assert(blob != NULL);
     981        3446 :         assert(blob->state == SPDK_BLOB_STATE_LOADING);
     982        3446 :         assert(blob->active.clusters == NULL);
     983             : 
     984             :         /* The blobid provided doesn't match what's in the MD, this can
     985             :          * happen for example if a bogus blobid is passed in through open.
     986             :          */
     987        3446 :         if (blob->id != pages[0].id) {
     988           4 :                 SPDK_ERRLOG("Blobid (0x%" PRIx64 ") doesn't match what's in metadata "
     989             :                             "(0x%" PRIx64 ")\n", blob->id, pages[0].id);
     990           4 :                 return -ENOENT;
     991             :         }
     992             : 
     993        3442 :         tmp = realloc(blob->active.pages, page_count * sizeof(*blob->active.pages));
     994        3442 :         if (!tmp) {
     995           0 :                 return -ENOMEM;
     996             :         }
     997        3442 :         blob->active.pages = tmp;
     998             : 
     999        3442 :         blob->active.pages[0] = pages[0].id;
    1000             : 
    1001        3542 :         for (i = 1; i < page_count; i++) {
    1002         100 :                 assert(spdk_bit_array_get(blob->bs->used_md_pages, pages[i - 1].next));
    1003         100 :                 blob->active.pages[i] = pages[i - 1].next;
    1004             :         }
    1005        3442 :         blob->active.num_pages = page_count;
    1006             : 
    1007        6976 :         for (i = 0; i < page_count; i++) {
    1008        3542 :                 page = &pages[i];
    1009             : 
    1010        3542 :                 assert(page->id == blob->id);
    1011        3542 :                 assert(page->sequence_num == i);
    1012             : 
    1013        3542 :                 rc = blob_parse_page(page, blob);
    1014        3542 :                 if (rc != 0) {
    1015           8 :                         return rc;
    1016             :                 }
    1017             :         }
    1018             : 
    1019        3434 :         return 0;
    1020             : }
    1021             : 
    1022             : static int
    1023        4370 : blob_serialize_add_page(const struct spdk_blob *blob,
    1024             :                         struct spdk_blob_md_page **pages,
    1025             :                         uint32_t *page_count,
    1026             :                         struct spdk_blob_md_page **last_page)
    1027             : {
    1028             :         struct spdk_blob_md_page *page, *tmp_pages;
    1029             : 
    1030        4370 :         assert(pages != NULL);
    1031        4370 :         assert(page_count != NULL);
    1032             : 
    1033        4370 :         *last_page = NULL;
    1034        4370 :         if (*page_count == 0) {
    1035        4282 :                 assert(*pages == NULL);
    1036        4282 :                 *pages = spdk_malloc(SPDK_BS_PAGE_SIZE, 0,
    1037             :                                      NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    1038        4282 :                 if (*pages == NULL) {
    1039           0 :                         return -ENOMEM;
    1040             :                 }
    1041        4282 :                 *page_count = 1;
    1042             :         } else {
    1043          88 :                 assert(*pages != NULL);
    1044          88 :                 tmp_pages = spdk_realloc(*pages, SPDK_BS_PAGE_SIZE * (*page_count + 1), 0);
    1045          88 :                 if (tmp_pages == NULL) {
    1046           0 :                         return -ENOMEM;
    1047             :                 }
    1048          88 :                 (*page_count)++;
    1049          88 :                 *pages = tmp_pages;
    1050             :         }
    1051             : 
    1052        4370 :         page = &(*pages)[*page_count - 1];
    1053        4370 :         memset(page, 0, sizeof(*page));
    1054        4370 :         page->id = blob->id;
    1055        4370 :         page->sequence_num = *page_count - 1;
    1056        4370 :         page->next = SPDK_INVALID_MD_PAGE;
    1057        4370 :         *last_page = page;
    1058             : 
    1059        4370 :         return 0;
    1060             : }
    1061             : 
    1062             : /* Transform the in-memory representation 'xattr' into an on-disk xattr descriptor.
    1063             :  * Update required_sz on both success and failure.
    1064             :  *
    1065             :  */
    1066             : static int
    1067        1795 : blob_serialize_xattr(const struct spdk_xattr *xattr,
    1068             :                      uint8_t *buf, size_t buf_sz,
    1069             :                      size_t *required_sz, bool internal)
    1070             : {
    1071             :         struct spdk_blob_md_descriptor_xattr    *desc;
    1072             : 
    1073        1795 :         *required_sz = sizeof(struct spdk_blob_md_descriptor_xattr) +
    1074        1795 :                        strlen(xattr->name) +
    1075        1795 :                        xattr->value_len;
    1076             : 
    1077        1795 :         if (buf_sz < *required_sz) {
    1078          48 :                 return -1;
    1079             :         }
    1080             : 
    1081        1747 :         desc = (struct spdk_blob_md_descriptor_xattr *)buf;
    1082             : 
    1083        1747 :         desc->type = internal ? SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL : SPDK_MD_DESCRIPTOR_TYPE_XATTR;
    1084        1747 :         desc->length = sizeof(desc->name_length) +
    1085             :                        sizeof(desc->value_length) +
    1086        1747 :                        strlen(xattr->name) +
    1087        1747 :                        xattr->value_len;
    1088        1747 :         desc->name_length = strlen(xattr->name);
    1089        1747 :         desc->value_length = xattr->value_len;
    1090             : 
    1091        1747 :         memcpy(desc->name, xattr->name, desc->name_length);
    1092        1747 :         memcpy((void *)((uintptr_t)desc->name + desc->name_length),
    1093        1747 :                xattr->value,
    1094        1747 :                desc->value_length);
    1095             : 
    1096        1747 :         return 0;
    1097             : }
    1098             : 
    1099             : static void
    1100        1695 : blob_serialize_extent_table_entry(const struct spdk_blob *blob,
    1101             :                                   uint64_t start_ep, uint64_t *next_ep,
    1102             :                                   uint8_t **buf, size_t *remaining_sz)
    1103             : {
    1104             :         struct spdk_blob_md_descriptor_extent_table *desc;
    1105             :         size_t cur_sz;
    1106             :         uint64_t i, et_idx;
    1107             :         uint32_t extent_page, ep_len;
    1108             : 
    1109             :         /* The buffer must have room for at least num_clusters entry */
    1110        1695 :         cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc->num_clusters);
    1111        1695 :         if (*remaining_sz < cur_sz) {
    1112          20 :                 *next_ep = start_ep;
    1113          20 :                 return;
    1114             :         }
    1115             : 
    1116        1675 :         desc = (struct spdk_blob_md_descriptor_extent_table *)*buf;
    1117        1675 :         desc->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE;
    1118             : 
    1119        1675 :         desc->num_clusters = blob->active.num_clusters;
    1120             : 
    1121        1675 :         ep_len = 1;
    1122        1675 :         et_idx = 0;
    1123        4256 :         for (i = start_ep; i < blob->active.num_extent_pages; i++) {
    1124        2581 :                 if (*remaining_sz < cur_sz  + sizeof(desc->extent_page[0])) {
    1125             :                         /* If we ran out of buffer space, return */
    1126           0 :                         break;
    1127             :                 }
    1128             : 
    1129        2581 :                 extent_page = blob->active.extent_pages[i];
    1130             :                 /* Verify that next extent_page is unallocated */
    1131        2581 :                 if (extent_page == 0 &&
    1132        1528 :                     (i + 1 < blob->active.num_extent_pages && blob->active.extent_pages[i + 1] == 0)) {
    1133        1078 :                         ep_len++;
    1134        1078 :                         continue;
    1135             :                 }
    1136        1503 :                 desc->extent_page[et_idx].page_idx = extent_page;
    1137        1503 :                 desc->extent_page[et_idx].num_pages = ep_len;
    1138        1503 :                 et_idx++;
    1139             : 
    1140        1503 :                 ep_len = 1;
    1141        1503 :                 cur_sz += sizeof(desc->extent_page[et_idx]);
    1142             :         }
    1143        1675 :         *next_ep = i;
    1144             : 
    1145        1675 :         desc->length = sizeof(desc->num_clusters) + sizeof(desc->extent_page[0]) * et_idx;
    1146        1675 :         *remaining_sz -= sizeof(struct spdk_blob_md_descriptor) + desc->length;
    1147        1675 :         *buf += sizeof(struct spdk_blob_md_descriptor) + desc->length;
    1148             : }
    1149             : 
    1150             : static int
    1151        1677 : blob_serialize_extent_table(const struct spdk_blob *blob,
    1152             :                             struct spdk_blob_md_page **pages,
    1153             :                             struct spdk_blob_md_page *cur_page,
    1154             :                             uint32_t *page_count, uint8_t **buf,
    1155             :                             size_t *remaining_sz)
    1156             : {
    1157        1677 :         uint64_t                                last_extent_page;
    1158             :         int                                     rc;
    1159             : 
    1160        1677 :         last_extent_page = 0;
    1161             :         /* At least single extent table entry has to be always persisted.
    1162             :          * Such case occurs with num_extent_pages == 0. */
    1163        1695 :         while (last_extent_page <= blob->active.num_extent_pages) {
    1164        1695 :                 blob_serialize_extent_table_entry(blob, last_extent_page, &last_extent_page, buf,
    1165             :                                                   remaining_sz);
    1166             : 
    1167        1695 :                 if (last_extent_page == blob->active.num_extent_pages) {
    1168        1677 :                         break;
    1169             :                 }
    1170             : 
    1171          18 :                 rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1172          18 :                 if (rc < 0) {
    1173           0 :                         return rc;
    1174             :                 }
    1175             : 
    1176          18 :                 *buf = (uint8_t *)cur_page->descriptors;
    1177          18 :                 *remaining_sz = sizeof(cur_page->descriptors);
    1178             :         }
    1179             : 
    1180        1677 :         return 0;
    1181             : }
    1182             : 
    1183             : static void
    1184        1747 : blob_serialize_extent_rle(const struct spdk_blob *blob,
    1185             :                           uint64_t start_cluster, uint64_t *next_cluster,
    1186             :                           uint8_t **buf, size_t *buf_sz)
    1187             : {
    1188             :         struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
    1189             :         size_t cur_sz;
    1190             :         uint64_t i, extent_idx;
    1191             :         uint64_t lba, lba_per_cluster, lba_count;
    1192             : 
    1193             :         /* The buffer must have room for at least one extent */
    1194        1747 :         cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc_extent_rle->extents[0]);
    1195        1747 :         if (*buf_sz < cur_sz) {
    1196          18 :                 *next_cluster = start_cluster;
    1197          18 :                 return;
    1198             :         }
    1199             : 
    1200        1729 :         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)*buf;
    1201        1729 :         desc_extent_rle->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE;
    1202             : 
    1203        1729 :         lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
    1204             :         /* Assert for scan-build false positive */
    1205        1729 :         assert(lba_per_cluster > 0);
    1206             : 
    1207        1729 :         lba = blob->active.clusters[start_cluster];
    1208        1729 :         lba_count = lba_per_cluster;
    1209        1729 :         extent_idx = 0;
    1210      810550 :         for (i = start_cluster + 1; i < blob->active.num_clusters; i++) {
    1211      808825 :                 if ((lba + lba_count) == blob->active.clusters[i] && lba != 0) {
    1212             :                         /* Run-length encode sequential non-zero LBA */
    1213        7276 :                         lba_count += lba_per_cluster;
    1214        7276 :                         continue;
    1215      801549 :                 } else if (lba == 0 && blob->active.clusters[i] == 0) {
    1216             :                         /* Run-length encode unallocated clusters */
    1217      800356 :                         lba_count += lba_per_cluster;
    1218      800356 :                         continue;
    1219             :                 }
    1220        1193 :                 desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
    1221        1193 :                 desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
    1222        1193 :                 extent_idx++;
    1223             : 
    1224        1193 :                 cur_sz += sizeof(desc_extent_rle->extents[extent_idx]);
    1225             : 
    1226        1193 :                 if (*buf_sz < cur_sz) {
    1227             :                         /* If we ran out of buffer space, return */
    1228           4 :                         *next_cluster = i;
    1229           4 :                         break;
    1230             :                 }
    1231             : 
    1232        1189 :                 lba = blob->active.clusters[i];
    1233        1189 :                 lba_count = lba_per_cluster;
    1234             :         }
    1235             : 
    1236        1729 :         if (*buf_sz >= cur_sz) {
    1237        1725 :                 desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
    1238        1725 :                 desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
    1239        1725 :                 extent_idx++;
    1240             : 
    1241        1725 :                 *next_cluster = blob->active.num_clusters;
    1242             :         }
    1243             : 
    1244        1729 :         desc_extent_rle->length = sizeof(desc_extent_rle->extents[0]) * extent_idx;
    1245        1729 :         *buf_sz -= sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
    1246        1729 :         *buf += sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
    1247             : }
    1248             : 
    1249             : static int
    1250        1939 : blob_serialize_extents_rle(const struct spdk_blob *blob,
    1251             :                            struct spdk_blob_md_page **pages,
    1252             :                            struct spdk_blob_md_page *cur_page,
    1253             :                            uint32_t *page_count, uint8_t **buf,
    1254             :                            size_t *remaining_sz)
    1255             : {
    1256        1939 :         uint64_t                                last_cluster;
    1257             :         int                                     rc;
    1258             : 
    1259        1939 :         last_cluster = 0;
    1260        1961 :         while (last_cluster < blob->active.num_clusters) {
    1261        1747 :                 blob_serialize_extent_rle(blob, last_cluster, &last_cluster, buf, remaining_sz);
    1262             : 
    1263        1747 :                 if (last_cluster == blob->active.num_clusters) {
    1264        1725 :                         break;
    1265             :                 }
    1266             : 
    1267          22 :                 rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1268          22 :                 if (rc < 0) {
    1269           0 :                         return rc;
    1270             :                 }
    1271             : 
    1272          22 :                 *buf = (uint8_t *)cur_page->descriptors;
    1273          22 :                 *remaining_sz = sizeof(cur_page->descriptors);
    1274             :         }
    1275             : 
    1276        1939 :         return 0;
    1277             : }
    1278             : 
    1279             : static void
    1280        1100 : blob_serialize_extent_page(const struct spdk_blob *blob,
    1281             :                            uint64_t cluster, struct spdk_blob_md_page *page)
    1282             : {
    1283             :         struct spdk_blob_md_descriptor_extent_page *desc_extent;
    1284             :         uint64_t i, extent_idx;
    1285             :         uint64_t lba, lba_per_cluster;
    1286        1100 :         uint64_t start_cluster_idx = (cluster / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
    1287             : 
    1288        1100 :         desc_extent = (struct spdk_blob_md_descriptor_extent_page *) page->descriptors;
    1289        1100 :         desc_extent->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE;
    1290             : 
    1291        1100 :         lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
    1292             : 
    1293        1100 :         desc_extent->start_cluster_idx = start_cluster_idx;
    1294        1100 :         extent_idx = 0;
    1295       42406 :         for (i = start_cluster_idx; i < blob->active.num_clusters; i++) {
    1296       41372 :                 lba = blob->active.clusters[i];
    1297       41372 :                 desc_extent->cluster_idx[extent_idx++] = lba / lba_per_cluster;
    1298       41372 :                 if (extent_idx >= SPDK_EXTENTS_PER_EP) {
    1299          66 :                         break;
    1300             :                 }
    1301             :         }
    1302        1100 :         desc_extent->length = sizeof(desc_extent->start_cluster_idx) +
    1303             :                               sizeof(desc_extent->cluster_idx[0]) * extent_idx;
    1304        1100 : }
    1305             : 
    1306             : static void
    1307        3616 : blob_serialize_flags(const struct spdk_blob *blob,
    1308             :                      uint8_t *buf, size_t *buf_sz)
    1309             : {
    1310             :         struct spdk_blob_md_descriptor_flags *desc;
    1311             : 
    1312             :         /*
    1313             :          * Flags get serialized first, so we should always have room for the flags
    1314             :          *  descriptor.
    1315             :          */
    1316        3616 :         assert(*buf_sz >= sizeof(*desc));
    1317             : 
    1318        3616 :         desc = (struct spdk_blob_md_descriptor_flags *)buf;
    1319        3616 :         desc->type = SPDK_MD_DESCRIPTOR_TYPE_FLAGS;
    1320        3616 :         desc->length = sizeof(*desc) - sizeof(struct spdk_blob_md_descriptor);
    1321        3616 :         desc->invalid_flags = blob->invalid_flags;
    1322        3616 :         desc->data_ro_flags = blob->data_ro_flags;
    1323        3616 :         desc->md_ro_flags = blob->md_ro_flags;
    1324             : 
    1325        3616 :         *buf_sz -= sizeof(*desc);
    1326        3616 : }
    1327             : 
    1328             : static int
    1329        7232 : blob_serialize_xattrs(const struct spdk_blob *blob,
    1330             :                       const struct spdk_xattr_tailq *xattrs, bool internal,
    1331             :                       struct spdk_blob_md_page **pages,
    1332             :                       struct spdk_blob_md_page *cur_page,
    1333             :                       uint32_t *page_count, uint8_t **buf,
    1334             :                       size_t *remaining_sz)
    1335             : {
    1336             :         const struct spdk_xattr *xattr;
    1337             :         int     rc;
    1338             : 
    1339        8979 :         TAILQ_FOREACH(xattr, xattrs, link) {
    1340        1747 :                 size_t required_sz = 0;
    1341             : 
    1342        1747 :                 rc = blob_serialize_xattr(xattr,
    1343             :                                           *buf, *remaining_sz,
    1344             :                                           &required_sz, internal);
    1345        1747 :                 if (rc < 0) {
    1346             :                         /* Need to add a new page to the chain */
    1347          48 :                         rc = blob_serialize_add_page(blob, pages, page_count,
    1348             :                                                      &cur_page);
    1349          48 :                         if (rc < 0) {
    1350           0 :                                 spdk_free(*pages);
    1351           0 :                                 *pages = NULL;
    1352           0 :                                 *page_count = 0;
    1353           0 :                                 return rc;
    1354             :                         }
    1355             : 
    1356          48 :                         *buf = (uint8_t *)cur_page->descriptors;
    1357          48 :                         *remaining_sz = sizeof(cur_page->descriptors);
    1358             : 
    1359             :                         /* Try again */
    1360          48 :                         required_sz = 0;
    1361          48 :                         rc = blob_serialize_xattr(xattr,
    1362             :                                                   *buf, *remaining_sz,
    1363             :                                                   &required_sz, internal);
    1364             : 
    1365          48 :                         if (rc < 0) {
    1366           0 :                                 spdk_free(*pages);
    1367           0 :                                 *pages = NULL;
    1368           0 :                                 *page_count = 0;
    1369           0 :                                 return rc;
    1370             :                         }
    1371             :                 }
    1372             : 
    1373        1747 :                 *remaining_sz -= required_sz;
    1374        1747 :                 *buf += required_sz;
    1375             :         }
    1376             : 
    1377        7232 :         return 0;
    1378             : }
    1379             : 
    1380             : static int
    1381        3616 : blob_serialize(const struct spdk_blob *blob, struct spdk_blob_md_page **pages,
    1382             :                uint32_t *page_count)
    1383             : {
    1384        3616 :         struct spdk_blob_md_page                *cur_page;
    1385             :         int                                     rc;
    1386        3616 :         uint8_t                                 *buf;
    1387        3616 :         size_t                                  remaining_sz;
    1388             : 
    1389        3616 :         assert(pages != NULL);
    1390        3616 :         assert(page_count != NULL);
    1391        3616 :         assert(blob != NULL);
    1392        3616 :         assert(blob->state == SPDK_BLOB_STATE_DIRTY);
    1393             : 
    1394        3616 :         *pages = NULL;
    1395        3616 :         *page_count = 0;
    1396             : 
    1397             :         /* A blob always has at least 1 page, even if it has no descriptors */
    1398        3616 :         rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1399        3616 :         if (rc < 0) {
    1400           0 :                 return rc;
    1401             :         }
    1402             : 
    1403        3616 :         buf = (uint8_t *)cur_page->descriptors;
    1404        3616 :         remaining_sz = sizeof(cur_page->descriptors);
    1405             : 
    1406             :         /* Serialize flags */
    1407        3616 :         blob_serialize_flags(blob, buf, &remaining_sz);
    1408        3616 :         buf += sizeof(struct spdk_blob_md_descriptor_flags);
    1409             : 
    1410             :         /* Serialize xattrs */
    1411        3616 :         rc = blob_serialize_xattrs(blob, &blob->xattrs, false,
    1412             :                                    pages, cur_page, page_count, &buf, &remaining_sz);
    1413        3616 :         if (rc < 0) {
    1414           0 :                 return rc;
    1415             :         }
    1416             : 
    1417             :         /* Serialize internal xattrs */
    1418        3616 :         rc = blob_serialize_xattrs(blob, &blob->xattrs_internal, true,
    1419             :                                    pages, cur_page, page_count, &buf, &remaining_sz);
    1420        3616 :         if (rc < 0) {
    1421           0 :                 return rc;
    1422             :         }
    1423             : 
    1424        3616 :         if (blob->use_extent_table) {
    1425             :                 /* Serialize extent table */
    1426        1677 :                 rc = blob_serialize_extent_table(blob, pages, cur_page, page_count, &buf, &remaining_sz);
    1427             :         } else {
    1428             :                 /* Serialize extents */
    1429        1939 :                 rc = blob_serialize_extents_rle(blob, pages, cur_page, page_count, &buf, &remaining_sz);
    1430             :         }
    1431             : 
    1432        3616 :         return rc;
    1433             : }
    1434             : 
    1435             : struct spdk_blob_load_ctx {
    1436             :         struct spdk_blob                *blob;
    1437             : 
    1438             :         struct spdk_blob_md_page        *pages;
    1439             :         uint32_t                        num_pages;
    1440             :         uint32_t                        next_extent_page;
    1441             :         spdk_bs_sequence_t              *seq;
    1442             : 
    1443             :         spdk_bs_sequence_cpl            cb_fn;
    1444             :         void                            *cb_arg;
    1445             : };
    1446             : 
    1447             : static uint32_t
    1448       19958 : blob_md_page_calc_crc(void *page)
    1449             : {
    1450             :         uint32_t                crc;
    1451             : 
    1452       19958 :         crc = BLOB_CRC32C_INITIAL;
    1453       19958 :         crc = spdk_crc32c_update(page, SPDK_BS_PAGE_SIZE - 4, crc);
    1454       19958 :         crc ^= BLOB_CRC32C_INITIAL;
    1455             : 
    1456       19958 :         return crc;
    1457             : 
    1458             : }
    1459             : 
    1460             : static void
    1461        3474 : blob_load_final(struct spdk_blob_load_ctx *ctx, int bserrno)
    1462             : {
    1463        3474 :         struct spdk_blob                *blob = ctx->blob;
    1464             : 
    1465        3474 :         if (bserrno == 0) {
    1466        3410 :                 blob_mark_clean(blob);
    1467             :         }
    1468             : 
    1469        3474 :         ctx->cb_fn(ctx->seq, ctx->cb_arg, bserrno);
    1470             : 
    1471             :         /* Free the memory */
    1472        3474 :         spdk_free(ctx->pages);
    1473        3474 :         free(ctx);
    1474        3474 : }
    1475             : 
    1476             : static void
    1477         454 : blob_load_snapshot_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
    1478             : {
    1479         454 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1480         454 :         struct spdk_blob                *blob = ctx->blob;
    1481             : 
    1482         454 :         if (bserrno == 0) {
    1483         448 :                 blob->back_bs_dev = bs_create_blob_bs_dev(snapshot);
    1484         448 :                 if (blob->back_bs_dev == NULL) {
    1485           0 :                         bserrno = -ENOMEM;
    1486             :                 }
    1487             :         }
    1488         454 :         if (bserrno != 0) {
    1489           6 :                 SPDK_ERRLOG("Snapshot fail\n");
    1490             :         }
    1491             : 
    1492         454 :         blob_load_final(ctx, bserrno);
    1493         454 : }
    1494             : 
    1495             : static void blob_update_clear_method(struct spdk_blob *blob);
    1496             : 
    1497             : static int
    1498         124 : blob_load_esnap(struct spdk_blob *blob, void *blob_ctx)
    1499             : {
    1500         124 :         struct spdk_blob_store *bs = blob->bs;
    1501         124 :         struct spdk_bs_dev *bs_dev = NULL;
    1502         124 :         const void *esnap_id = NULL;
    1503         124 :         size_t id_len = 0;
    1504             :         int rc;
    1505             : 
    1506         124 :         if (bs->esnap_bs_dev_create == NULL) {
    1507           8 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " is an esnap clone but the blobstore was opened "
    1508             :                                "without support for esnap clones\n", blob->id);
    1509           8 :                 return -ENOTSUP;
    1510             :         }
    1511         116 :         assert(blob->back_bs_dev == NULL);
    1512             : 
    1513         116 :         rc = blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, &esnap_id, &id_len, true);
    1514         116 :         if (rc != 0) {
    1515           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " is an esnap clone but has no esnap ID\n", blob->id);
    1516           0 :                 return -EINVAL;
    1517             :         }
    1518         116 :         assert(id_len > 0 && id_len < UINT32_MAX);
    1519             : 
    1520         116 :         SPDK_INFOLOG(blob, "Creating external snapshot device\n");
    1521             : 
    1522         116 :         rc = bs->esnap_bs_dev_create(bs->esnap_ctx, blob_ctx, blob, esnap_id, (uint32_t)id_len,
    1523             :                                      &bs_dev);
    1524         116 :         if (rc != 0) {
    1525           0 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": failed to load back_bs_dev "
    1526             :                               "with error %d\n", blob->id, rc);
    1527           0 :                 return rc;
    1528             :         }
    1529             : 
    1530             :         /*
    1531             :          * Note: bs_dev might be NULL if the consumer chose to not open the external snapshot.
    1532             :          * This especially might happen during spdk_bs_load() iteration.
    1533             :          */
    1534         116 :         if (bs_dev != NULL) {
    1535         116 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": loaded back_bs_dev\n", blob->id);
    1536         116 :                 if ((bs->io_unit_size % bs_dev->blocklen) != 0) {
    1537           4 :                         SPDK_NOTICELOG("blob 0x%" PRIx64 " external snapshot device block size %u "
    1538             :                                        "is not compatible with blobstore block size %u\n",
    1539             :                                        blob->id, bs_dev->blocklen, bs->io_unit_size);
    1540           4 :                         bs_dev->destroy(bs_dev);
    1541           4 :                         return -EINVAL;
    1542             :                 }
    1543             :         }
    1544             : 
    1545         112 :         blob->back_bs_dev = bs_dev;
    1546         112 :         blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    1547             : 
    1548         112 :         return 0;
    1549             : }
    1550             : 
    1551             : static void
    1552        3428 : blob_load_backing_dev(spdk_bs_sequence_t *seq, void *cb_arg)
    1553             : {
    1554        3428 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1555        3428 :         struct spdk_blob                *blob = ctx->blob;
    1556        3428 :         const void                      *value;
    1557        3428 :         size_t                          len;
    1558             :         int                             rc;
    1559             : 
    1560        3428 :         if (blob_is_esnap_clone(blob)) {
    1561         124 :                 rc = blob_load_esnap(blob, seq->cpl.u.blob_handle.esnap_ctx);
    1562         124 :                 blob_load_final(ctx, rc);
    1563         124 :                 return;
    1564             :         }
    1565             : 
    1566        3304 :         if (spdk_blob_is_thin_provisioned(blob)) {
    1567        1034 :                 rc = blob_get_xattr_value(blob, BLOB_SNAPSHOT, &value, &len, true);
    1568        1034 :                 if (rc == 0) {
    1569         454 :                         if (len != sizeof(spdk_blob_id)) {
    1570           0 :                                 blob_load_final(ctx, -EINVAL);
    1571           0 :                                 return;
    1572             :                         }
    1573             :                         /* open snapshot blob and continue in the callback function */
    1574         454 :                         blob->parent_id = *(spdk_blob_id *)value;
    1575         454 :                         spdk_bs_open_blob(blob->bs, blob->parent_id,
    1576             :                                           blob_load_snapshot_cpl, ctx);
    1577         454 :                         return;
    1578             :                 } else {
    1579             :                         /* add zeroes_dev for thin provisioned blob */
    1580         580 :                         blob->back_bs_dev = bs_create_zeroes_dev();
    1581             :                 }
    1582             :         } else {
    1583             :                 /* standard blob */
    1584        2270 :                 blob->back_bs_dev = NULL;
    1585             :         }
    1586        2850 :         blob_load_final(ctx, 0);
    1587             : }
    1588             : 
    1589             : static void
    1590        2820 : blob_load_cpl_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1591             : {
    1592        2820 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1593        2820 :         struct spdk_blob                *blob = ctx->blob;
    1594             :         struct spdk_blob_md_page        *page;
    1595             :         uint64_t                        i;
    1596             :         uint32_t                        crc;
    1597             :         uint64_t                        lba;
    1598             :         void                            *tmp;
    1599             :         uint64_t                        sz;
    1600             : 
    1601        2820 :         if (bserrno) {
    1602           6 :                 SPDK_ERRLOG("Extent page read failed: %d\n", bserrno);
    1603           6 :                 blob_load_final(ctx, bserrno);
    1604           6 :                 return;
    1605             :         }
    1606             : 
    1607        2814 :         if (ctx->pages == NULL) {
    1608             :                 /* First iteration of this function, allocate buffer for single EXTENT_PAGE */
    1609        1768 :                 ctx->pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    1610             :                                           NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    1611        1768 :                 if (!ctx->pages) {
    1612           0 :                         blob_load_final(ctx, -ENOMEM);
    1613           0 :                         return;
    1614             :                 }
    1615        1768 :                 ctx->num_pages = 1;
    1616        1768 :                 ctx->next_extent_page = 0;
    1617             :         } else {
    1618        1046 :                 page = &ctx->pages[0];
    1619        1046 :                 crc = blob_md_page_calc_crc(page);
    1620        1046 :                 if (crc != page->crc) {
    1621           0 :                         blob_load_final(ctx, -EINVAL);
    1622           0 :                         return;
    1623             :                 }
    1624             : 
    1625        1046 :                 if (page->next != SPDK_INVALID_MD_PAGE) {
    1626           0 :                         blob_load_final(ctx, -EINVAL);
    1627           0 :                         return;
    1628             :                 }
    1629             : 
    1630        1046 :                 bserrno = blob_parse_extent_page(page, blob);
    1631        1046 :                 if (bserrno) {
    1632           0 :                         blob_load_final(ctx, bserrno);
    1633           0 :                         return;
    1634             :                 }
    1635             :         }
    1636             : 
    1637        3240 :         for (i = ctx->next_extent_page; i < blob->active.num_extent_pages; i++) {
    1638        1478 :                 if (blob->active.extent_pages[i] != 0) {
    1639             :                         /* Extent page was allocated, read and parse it. */
    1640        1052 :                         lba = bs_md_page_to_lba(blob->bs, blob->active.extent_pages[i]);
    1641        1052 :                         ctx->next_extent_page = i + 1;
    1642             : 
    1643        1052 :                         bs_sequence_read_dev(seq, &ctx->pages[0], lba,
    1644        1052 :                                              bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
    1645             :                                              blob_load_cpl_extents_cpl, ctx);
    1646        1052 :                         return;
    1647             :                 } else {
    1648             :                         /* Thin provisioned blobs can point to unallocated extent pages.
    1649             :                          * In this case blob size should be increased by up to the amount left in remaining_clusters_in_et. */
    1650             : 
    1651         426 :                         sz = spdk_min(blob->remaining_clusters_in_et, SPDK_EXTENTS_PER_EP);
    1652         426 :                         blob->active.num_clusters += sz;
    1653         426 :                         blob->remaining_clusters_in_et -= sz;
    1654             : 
    1655         426 :                         assert(spdk_blob_is_thin_provisioned(blob));
    1656         426 :                         assert(i + 1 < blob->active.num_extent_pages || blob->remaining_clusters_in_et == 0);
    1657             : 
    1658         426 :                         tmp = realloc(blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
    1659         426 :                         if (tmp == NULL) {
    1660           0 :                                 blob_load_final(ctx, -ENOMEM);
    1661           0 :                                 return;
    1662             :                         }
    1663         426 :                         memset(tmp + sizeof(*blob->active.clusters) * blob->active.cluster_array_size, 0,
    1664         426 :                                sizeof(*blob->active.clusters) * (blob->active.num_clusters - blob->active.cluster_array_size));
    1665         426 :                         blob->active.clusters = tmp;
    1666         426 :                         blob->active.cluster_array_size = blob->active.num_clusters;
    1667             :                 }
    1668             :         }
    1669             : 
    1670        1762 :         blob_load_backing_dev(seq, ctx);
    1671             : }
    1672             : 
    1673             : static void
    1674        3574 : blob_load_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1675             : {
    1676        3574 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1677        3574 :         struct spdk_blob                *blob = ctx->blob;
    1678             :         struct spdk_blob_md_page        *page;
    1679             :         int                             rc;
    1680             :         uint32_t                        crc;
    1681             :         uint32_t                        current_page;
    1682             : 
    1683        3574 :         if (ctx->num_pages == 1) {
    1684        3474 :                 current_page = bs_blobid_to_page(blob->id);
    1685             :         } else {
    1686         100 :                 assert(ctx->num_pages != 0);
    1687         100 :                 page = &ctx->pages[ctx->num_pages - 2];
    1688         100 :                 current_page = page->next;
    1689             :         }
    1690             : 
    1691        3574 :         if (bserrno) {
    1692          20 :                 SPDK_ERRLOG("Metadata page %d read failed for blobid 0x%" PRIx64 ": %d\n",
    1693             :                             current_page, blob->id, bserrno);
    1694          20 :                 blob_load_final(ctx, bserrno);
    1695          20 :                 return;
    1696             :         }
    1697             : 
    1698        3554 :         page = &ctx->pages[ctx->num_pages - 1];
    1699        3554 :         crc = blob_md_page_calc_crc(page);
    1700        3554 :         if (crc != page->crc) {
    1701           8 :                 SPDK_ERRLOG("Metadata page %d crc mismatch for blobid 0x%" PRIx64 "\n",
    1702             :                             current_page, blob->id);
    1703           8 :                 blob_load_final(ctx, -EINVAL);
    1704           8 :                 return;
    1705             :         }
    1706             : 
    1707        3546 :         if (page->next != SPDK_INVALID_MD_PAGE) {
    1708             :                 struct spdk_blob_md_page *tmp_pages;
    1709         100 :                 uint32_t next_page = page->next;
    1710         100 :                 uint64_t next_lba = bs_md_page_to_lba(blob->bs, next_page);
    1711             : 
    1712             :                 /* Read the next page */
    1713         100 :                 tmp_pages = spdk_realloc(ctx->pages, (sizeof(*page) * (ctx->num_pages + 1)), 0);
    1714         100 :                 if (tmp_pages == NULL) {
    1715           0 :                         blob_load_final(ctx, -ENOMEM);
    1716           0 :                         return;
    1717             :                 }
    1718         100 :                 ctx->num_pages++;
    1719         100 :                 ctx->pages = tmp_pages;
    1720             : 
    1721         100 :                 bs_sequence_read_dev(seq, &ctx->pages[ctx->num_pages - 1],
    1722             :                                      next_lba,
    1723         100 :                                      bs_byte_to_lba(blob->bs, sizeof(*page)),
    1724             :                                      blob_load_cpl, ctx);
    1725         100 :                 return;
    1726             :         }
    1727             : 
    1728             :         /* Parse the pages */
    1729        3446 :         rc = blob_parse(ctx->pages, ctx->num_pages, blob);
    1730        3446 :         if (rc) {
    1731          12 :                 blob_load_final(ctx, rc);
    1732          12 :                 return;
    1733             :         }
    1734             : 
    1735        3434 :         if (blob->extent_table_found == true) {
    1736             :                 /* If EXTENT_TABLE was found, that means support for it should be enabled. */
    1737        1768 :                 assert(blob->extent_rle_found == false);
    1738        1768 :                 blob->use_extent_table = true;
    1739             :         } else {
    1740             :                 /* If EXTENT_RLE or no extent_* descriptor was found disable support
    1741             :                  * for extent table. No extent_* descriptors means that blob has length of 0
    1742             :                  * and no extent_rle descriptors were persisted for it.
    1743             :                  * EXTENT_TABLE if used, is always present in metadata regardless of length. */
    1744        1666 :                 blob->use_extent_table = false;
    1745             :         }
    1746             : 
    1747             :         /* Check the clear_method stored in metadata vs what may have been passed
    1748             :          * via spdk_bs_open_blob_ext() and update accordingly.
    1749             :          */
    1750        3434 :         blob_update_clear_method(blob);
    1751             : 
    1752        3434 :         spdk_free(ctx->pages);
    1753        3434 :         ctx->pages = NULL;
    1754             : 
    1755        3434 :         if (blob->extent_table_found) {
    1756        1768 :                 blob_load_cpl_extents_cpl(seq, ctx, 0);
    1757             :         } else {
    1758        1666 :                 blob_load_backing_dev(seq, ctx);
    1759             :         }
    1760             : }
    1761             : 
    1762             : /* Load a blob from disk given a blobid */
    1763             : static void
    1764        3474 : blob_load(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
    1765             :           spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    1766             : {
    1767             :         struct spdk_blob_load_ctx *ctx;
    1768             :         struct spdk_blob_store *bs;
    1769             :         uint32_t page_num;
    1770             :         uint64_t lba;
    1771             : 
    1772        3474 :         blob_verify_md_op(blob);
    1773             : 
    1774        3474 :         bs = blob->bs;
    1775             : 
    1776        3474 :         ctx = calloc(1, sizeof(*ctx));
    1777        3474 :         if (!ctx) {
    1778           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    1779           0 :                 return;
    1780             :         }
    1781             : 
    1782        3474 :         ctx->blob = blob;
    1783        3474 :         ctx->pages = spdk_realloc(ctx->pages, SPDK_BS_PAGE_SIZE, 0);
    1784        3474 :         if (!ctx->pages) {
    1785           0 :                 free(ctx);
    1786           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    1787           0 :                 return;
    1788             :         }
    1789        3474 :         ctx->num_pages = 1;
    1790        3474 :         ctx->cb_fn = cb_fn;
    1791        3474 :         ctx->cb_arg = cb_arg;
    1792        3474 :         ctx->seq = seq;
    1793             : 
    1794        3474 :         page_num = bs_blobid_to_page(blob->id);
    1795        3474 :         lba = bs_md_page_to_lba(blob->bs, page_num);
    1796             : 
    1797        3474 :         blob->state = SPDK_BLOB_STATE_LOADING;
    1798             : 
    1799        3474 :         bs_sequence_read_dev(seq, &ctx->pages[0], lba,
    1800        3474 :                              bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE),
    1801             :                              blob_load_cpl, ctx);
    1802             : }
    1803             : 
    1804             : struct spdk_blob_persist_ctx {
    1805             :         struct spdk_blob                *blob;
    1806             : 
    1807             :         struct spdk_blob_md_page        *pages;
    1808             :         uint32_t                        next_extent_page;
    1809             :         struct spdk_blob_md_page        *extent_page;
    1810             : 
    1811             :         spdk_bs_sequence_t              *seq;
    1812             :         spdk_bs_sequence_cpl            cb_fn;
    1813             :         void                            *cb_arg;
    1814             :         TAILQ_ENTRY(spdk_blob_persist_ctx) link;
    1815             : };
    1816             : 
    1817             : static void
    1818        1262 : bs_batch_clear_dev(struct spdk_blob *blob, spdk_bs_batch_t *batch, uint64_t lba,
    1819             :                    uint64_t lba_count)
    1820             : {
    1821        1262 :         switch (blob->clear_method) {
    1822        1262 :         case BLOB_CLEAR_WITH_DEFAULT:
    1823             :         case BLOB_CLEAR_WITH_UNMAP:
    1824        1262 :                 bs_batch_unmap_dev(batch, lba, lba_count);
    1825        1262 :                 break;
    1826           0 :         case BLOB_CLEAR_WITH_WRITE_ZEROES:
    1827           0 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    1828           0 :                 break;
    1829           0 :         case BLOB_CLEAR_WITH_NONE:
    1830             :         default:
    1831           0 :                 break;
    1832             :         }
    1833        1262 : }
    1834             : 
    1835             : static int
    1836        1152 : bs_super_validate(struct spdk_bs_super_block *super, struct spdk_blob_store *bs)
    1837             : {
    1838             :         uint32_t        crc;
    1839             :         static const char zeros[SPDK_BLOBSTORE_TYPE_LENGTH];
    1840             : 
    1841        1152 :         if (super->version > SPDK_BS_VERSION ||
    1842        1148 :             super->version < SPDK_BS_INITIAL_VERSION) {
    1843           8 :                 return -EILSEQ;
    1844             :         }
    1845             : 
    1846        1144 :         if (memcmp(super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    1847             :                    sizeof(super->signature)) != 0) {
    1848           0 :                 return -EILSEQ;
    1849             :         }
    1850             : 
    1851        1144 :         crc = blob_md_page_calc_crc(super);
    1852        1144 :         if (crc != super->crc) {
    1853           4 :                 return -EILSEQ;
    1854             :         }
    1855             : 
    1856        1140 :         if (memcmp(&bs->bstype, &super->bstype, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
    1857        1126 :                 SPDK_DEBUGLOG(blob, "Bstype matched - loading blobstore\n");
    1858          14 :         } else if (memcmp(&bs->bstype, zeros, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
    1859           6 :                 SPDK_DEBUGLOG(blob, "Bstype wildcard used - loading blobstore regardless bstype\n");
    1860             :         } else {
    1861           8 :                 SPDK_DEBUGLOG(blob, "Unexpected bstype\n");
    1862           8 :                 SPDK_LOGDUMP(blob, "Expected:", bs->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
    1863           8 :                 SPDK_LOGDUMP(blob, "Found:", super->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
    1864           8 :                 return -ENXIO;
    1865             :         }
    1866             : 
    1867        1132 :         if (super->size > bs->dev->blockcnt * bs->dev->blocklen) {
    1868           8 :                 SPDK_NOTICELOG("Size mismatch, dev size: %" PRIu64 ", blobstore size: %" PRIu64 "\n",
    1869             :                                bs->dev->blockcnt * bs->dev->blocklen, super->size);
    1870           8 :                 return -EILSEQ;
    1871             :         }
    1872             : 
    1873        1124 :         return 0;
    1874             : }
    1875             : 
    1876             : static void bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    1877             :                           spdk_bs_sequence_cpl cb_fn, void *cb_arg);
    1878             : 
    1879             : static void
    1880        5116 : blob_persist_complete_cb(void *arg)
    1881             : {
    1882        5116 :         struct spdk_blob_persist_ctx *ctx = arg;
    1883             : 
    1884             :         /* Call user callback */
    1885        5116 :         ctx->cb_fn(ctx->seq, ctx->cb_arg, 0);
    1886             : 
    1887             :         /* Free the memory */
    1888        5116 :         spdk_free(ctx->pages);
    1889        5116 :         free(ctx);
    1890        5116 : }
    1891             : 
    1892             : static void blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
    1893             : 
    1894             : static void
    1895        5116 : blob_persist_complete(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx, int bserrno)
    1896             : {
    1897             :         struct spdk_blob_persist_ctx    *next_persist, *tmp;
    1898        5116 :         struct spdk_blob                *blob = ctx->blob;
    1899             : 
    1900        5116 :         if (bserrno == 0) {
    1901        5064 :                 blob_mark_clean(blob);
    1902             :         }
    1903             : 
    1904        5116 :         assert(ctx == TAILQ_FIRST(&blob->persists_to_complete));
    1905             : 
    1906             :         /* Complete all persists that were pending when the current persist started */
    1907       10232 :         TAILQ_FOREACH_SAFE(next_persist, &blob->persists_to_complete, link, tmp) {
    1908        5116 :                 TAILQ_REMOVE(&blob->persists_to_complete, next_persist, link);
    1909        5116 :                 spdk_thread_send_msg(spdk_get_thread(), blob_persist_complete_cb, next_persist);
    1910             :         }
    1911             : 
    1912        5116 :         if (TAILQ_EMPTY(&blob->pending_persists)) {
    1913        5093 :                 return;
    1914             :         }
    1915             : 
    1916             :         /* Queue up all pending persists for completion and start blob persist with first one */
    1917          23 :         TAILQ_SWAP(&blob->persists_to_complete, &blob->pending_persists, spdk_blob_persist_ctx, link);
    1918          23 :         next_persist = TAILQ_FIRST(&blob->persists_to_complete);
    1919             : 
    1920          23 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    1921          23 :         bs_mark_dirty(seq, blob->bs, blob_persist_start, next_persist);
    1922             : }
    1923             : 
    1924             : static void
    1925        5064 : blob_persist_clear_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1926             : {
    1927        5064 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    1928        5064 :         struct spdk_blob                *blob = ctx->blob;
    1929        5064 :         struct spdk_blob_store          *bs = blob->bs;
    1930             :         size_t                          i;
    1931             : 
    1932        5064 :         if (bserrno != 0) {
    1933           0 :                 blob_persist_complete(seq, ctx, bserrno);
    1934           0 :                 return;
    1935             :         }
    1936             : 
    1937        5064 :         spdk_spin_lock(&bs->used_lock);
    1938             : 
    1939             :         /* Release all extent_pages that were truncated */
    1940        6800 :         for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
    1941             :                 /* Nothing to release if it was not allocated */
    1942        1736 :                 if (blob->active.extent_pages[i] != 0) {
    1943         626 :                         bs_release_md_page(bs, blob->active.extent_pages[i]);
    1944             :                 }
    1945             :         }
    1946             : 
    1947        5064 :         spdk_spin_unlock(&bs->used_lock);
    1948             : 
    1949        5064 :         if (blob->active.num_extent_pages == 0) {
    1950        3651 :                 free(blob->active.extent_pages);
    1951        3651 :                 blob->active.extent_pages = NULL;
    1952        3651 :                 blob->active.extent_pages_array_size = 0;
    1953        1413 :         } else if (blob->active.num_extent_pages != blob->active.extent_pages_array_size) {
    1954             : #ifndef __clang_analyzer__
    1955             :                 void *tmp;
    1956             : 
    1957             :                 /* scan-build really can't figure reallocs, workaround it */
    1958           2 :                 tmp = realloc(blob->active.extent_pages, sizeof(uint32_t) * blob->active.num_extent_pages);
    1959           2 :                 assert(tmp != NULL);
    1960           2 :                 blob->active.extent_pages = tmp;
    1961             : #endif
    1962           2 :                 blob->active.extent_pages_array_size = blob->active.num_extent_pages;
    1963             :         }
    1964             : 
    1965        5064 :         blob_persist_complete(seq, ctx, bserrno);
    1966             : }
    1967             : 
    1968             : static void
    1969        5064 : blob_persist_clear_extents(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    1970             : {
    1971        5064 :         struct spdk_blob                *blob = ctx->blob;
    1972        5064 :         struct spdk_blob_store          *bs = blob->bs;
    1973             :         size_t                          i;
    1974             :         uint64_t                        lba;
    1975             :         uint64_t                        lba_count;
    1976             :         spdk_bs_batch_t                 *batch;
    1977             : 
    1978        5064 :         batch = bs_sequence_to_batch(seq, blob_persist_clear_extents_cpl, ctx);
    1979        5064 :         lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
    1980             : 
    1981             :         /* Clear all extent_pages that were truncated */
    1982        6800 :         for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
    1983             :                 /* Nothing to clear if it was not allocated */
    1984        1736 :                 if (blob->active.extent_pages[i] != 0) {
    1985         626 :                         lba = bs_md_page_to_lba(bs, blob->active.extent_pages[i]);
    1986         626 :                         bs_batch_write_zeroes_dev(batch, lba, lba_count);
    1987             :                 }
    1988             :         }
    1989             : 
    1990        5064 :         bs_batch_close(batch);
    1991        5064 : }
    1992             : 
    1993             : static void
    1994        5064 : blob_persist_clear_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1995             : {
    1996        5064 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    1997        5064 :         struct spdk_blob                *blob = ctx->blob;
    1998        5064 :         struct spdk_blob_store          *bs = blob->bs;
    1999             :         size_t                          i;
    2000             : 
    2001        5064 :         if (bserrno != 0) {
    2002           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2003           0 :                 return;
    2004             :         }
    2005             : 
    2006        5064 :         spdk_spin_lock(&bs->used_lock);
    2007             :         /* Release all clusters that were truncated */
    2008     1074111 :         for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
    2009     1069047 :                 uint32_t cluster_num = bs_lba_to_cluster(bs, blob->active.clusters[i]);
    2010             : 
    2011             :                 /* Nothing to release if it was not allocated */
    2012     1069047 :                 if (blob->active.clusters[i] != 0) {
    2013        2343 :                         bs_release_cluster(bs, cluster_num);
    2014             :                 }
    2015             :         }
    2016        5064 :         spdk_spin_unlock(&bs->used_lock);
    2017             : 
    2018        5064 :         if (blob->active.num_clusters == 0) {
    2019        1944 :                 free(blob->active.clusters);
    2020        1944 :                 blob->active.clusters = NULL;
    2021        1944 :                 blob->active.cluster_array_size = 0;
    2022        3120 :         } else if (blob->active.num_clusters != blob->active.cluster_array_size) {
    2023             : #ifndef __clang_analyzer__
    2024             :                 void *tmp;
    2025             : 
    2026             :                 /* scan-build really can't figure reallocs, workaround it */
    2027          14 :                 tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * blob->active.num_clusters);
    2028          14 :                 assert(tmp != NULL);
    2029          14 :                 blob->active.clusters = tmp;
    2030             : 
    2031             : #endif
    2032          14 :                 blob->active.cluster_array_size = blob->active.num_clusters;
    2033             :         }
    2034             : 
    2035             :         /* Move on to clearing extent pages */
    2036        5064 :         blob_persist_clear_extents(seq, ctx);
    2037             : }
    2038             : 
    2039             : static void
    2040        5064 : blob_persist_clear_clusters(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    2041             : {
    2042        5064 :         struct spdk_blob                *blob = ctx->blob;
    2043        5064 :         struct spdk_blob_store          *bs = blob->bs;
    2044             :         spdk_bs_batch_t                 *batch;
    2045             :         size_t                          i;
    2046             :         uint64_t                        lba;
    2047             :         uint64_t                        lba_count;
    2048             : 
    2049             :         /* Clusters don't move around in blobs. The list shrinks or grows
    2050             :          * at the end, but no changes ever occur in the middle of the list.
    2051             :          */
    2052             : 
    2053        5064 :         batch = bs_sequence_to_batch(seq, blob_persist_clear_clusters_cpl, ctx);
    2054             : 
    2055             :         /* Clear all clusters that were truncated */
    2056        5064 :         lba = 0;
    2057        5064 :         lba_count = 0;
    2058     1074111 :         for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
    2059     1069047 :                 uint64_t next_lba = blob->active.clusters[i];
    2060     1069047 :                 uint64_t next_lba_count = bs_cluster_to_lba(bs, 1);
    2061             : 
    2062     1069047 :                 if (next_lba > 0 && (lba + lba_count) == next_lba) {
    2063             :                         /* This cluster is contiguous with the previous one. */
    2064        1085 :                         lba_count += next_lba_count;
    2065        1085 :                         continue;
    2066     1067962 :                 } else if (next_lba == 0) {
    2067     1066704 :                         continue;
    2068             :                 }
    2069             : 
    2070             :                 /* This cluster is not contiguous with the previous one. */
    2071             : 
    2072             :                 /* If a run of LBAs previously existing, clear them now */
    2073        1258 :                 if (lba_count > 0) {
    2074          36 :                         bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
    2075             :                 }
    2076             : 
    2077             :                 /* Start building the next batch */
    2078        1258 :                 lba = next_lba;
    2079        1258 :                 if (next_lba > 0) {
    2080        1258 :                         lba_count = next_lba_count;
    2081             :                 } else {
    2082           0 :                         lba_count = 0;
    2083             :                 }
    2084             :         }
    2085             : 
    2086             :         /* If we ended with a contiguous set of LBAs, clear them now */
    2087        5064 :         if (lba_count > 0) {
    2088        1222 :                 bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
    2089             :         }
    2090             : 
    2091        5064 :         bs_batch_close(batch);
    2092        5064 : }
    2093             : 
    2094             : static void
    2095        5068 : blob_persist_zero_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2096             : {
    2097        5068 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2098        5068 :         struct spdk_blob                *blob = ctx->blob;
    2099        5068 :         struct spdk_blob_store          *bs = blob->bs;
    2100             :         size_t                          i;
    2101             : 
    2102        5068 :         if (bserrno != 0) {
    2103           4 :                 blob_persist_complete(seq, ctx, bserrno);
    2104           4 :                 return;
    2105             :         }
    2106             : 
    2107        5064 :         spdk_spin_lock(&bs->used_lock);
    2108             : 
    2109             :         /* This loop starts at 1 because the first page is special and handled
    2110             :          * below. The pages (except the first) are never written in place,
    2111             :          * so any pages in the clean list must be zeroed.
    2112             :          */
    2113        5132 :         for (i = 1; i < blob->clean.num_pages; i++) {
    2114          68 :                 bs_release_md_page(bs, blob->clean.pages[i]);
    2115             :         }
    2116             : 
    2117        5064 :         if (blob->active.num_pages == 0) {
    2118             :                 uint32_t page_num;
    2119             : 
    2120        1488 :                 page_num = bs_blobid_to_page(blob->id);
    2121        1488 :                 bs_release_md_page(bs, page_num);
    2122             :         }
    2123             : 
    2124        5064 :         spdk_spin_unlock(&bs->used_lock);
    2125             : 
    2126             :         /* Move on to clearing clusters */
    2127        5064 :         blob_persist_clear_clusters(seq, ctx);
    2128             : }
    2129             : 
    2130             : static void
    2131        5108 : blob_persist_zero_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2132             : {
    2133        5108 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2134        5108 :         struct spdk_blob                *blob = ctx->blob;
    2135        5108 :         struct spdk_blob_store          *bs = blob->bs;
    2136             :         uint64_t                        lba;
    2137             :         uint64_t                        lba_count;
    2138             :         spdk_bs_batch_t                 *batch;
    2139             :         size_t                          i;
    2140             : 
    2141        5108 :         if (bserrno != 0) {
    2142          40 :                 blob_persist_complete(seq, ctx, bserrno);
    2143          40 :                 return;
    2144             :         }
    2145             : 
    2146        5068 :         batch = bs_sequence_to_batch(seq, blob_persist_zero_pages_cpl, ctx);
    2147             : 
    2148        5068 :         lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
    2149             : 
    2150             :         /* This loop starts at 1 because the first page is special and handled
    2151             :          * below. The pages (except the first) are never written in place,
    2152             :          * so any pages in the clean list must be zeroed.
    2153             :          */
    2154        5136 :         for (i = 1; i < blob->clean.num_pages; i++) {
    2155          68 :                 lba = bs_md_page_to_lba(bs, blob->clean.pages[i]);
    2156             : 
    2157          68 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    2158             :         }
    2159             : 
    2160             :         /* The first page will only be zeroed if this is a delete. */
    2161        5068 :         if (blob->active.num_pages == 0) {
    2162             :                 uint32_t page_num;
    2163             : 
    2164             :                 /* The first page in the metadata goes where the blobid indicates */
    2165        1492 :                 page_num = bs_blobid_to_page(blob->id);
    2166        1492 :                 lba = bs_md_page_to_lba(bs, page_num);
    2167             : 
    2168        1492 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    2169             :         }
    2170             : 
    2171        5068 :         bs_batch_close(batch);
    2172             : }
    2173             : 
    2174             : static void
    2175        3616 : blob_persist_write_page_root(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2176             : {
    2177        3616 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2178        3616 :         struct spdk_blob                *blob = ctx->blob;
    2179        3616 :         struct spdk_blob_store          *bs = blob->bs;
    2180             :         uint64_t                        lba;
    2181             :         uint32_t                        lba_count;
    2182             :         struct spdk_blob_md_page        *page;
    2183             : 
    2184        3616 :         if (bserrno != 0) {
    2185           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2186           0 :                 return;
    2187             :         }
    2188             : 
    2189        3616 :         if (blob->active.num_pages == 0) {
    2190             :                 /* Move on to the next step */
    2191           0 :                 blob_persist_zero_pages(seq, ctx, 0);
    2192           0 :                 return;
    2193             :         }
    2194             : 
    2195        3616 :         lba_count = bs_byte_to_lba(bs, sizeof(*page));
    2196             : 
    2197        3616 :         page = &ctx->pages[0];
    2198             :         /* The first page in the metadata goes where the blobid indicates */
    2199        3616 :         lba = bs_md_page_to_lba(bs, bs_blobid_to_page(blob->id));
    2200             : 
    2201        3616 :         bs_sequence_write_dev(seq, page, lba, lba_count,
    2202             :                               blob_persist_zero_pages, ctx);
    2203             : }
    2204             : 
    2205             : static void
    2206        3616 : blob_persist_write_page_chain(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    2207             : {
    2208        3616 :         struct spdk_blob                *blob = ctx->blob;
    2209        3616 :         struct spdk_blob_store          *bs = blob->bs;
    2210             :         uint64_t                        lba;
    2211             :         uint32_t                        lba_count;
    2212             :         struct spdk_blob_md_page        *page;
    2213             :         spdk_bs_batch_t                 *batch;
    2214             :         size_t                          i;
    2215             : 
    2216             :         /* Clusters don't move around in blobs. The list shrinks or grows
    2217             :          * at the end, but no changes ever occur in the middle of the list.
    2218             :          */
    2219             : 
    2220        3616 :         lba_count = bs_byte_to_lba(bs, sizeof(*page));
    2221             : 
    2222        3616 :         batch = bs_sequence_to_batch(seq, blob_persist_write_page_root, ctx);
    2223             : 
    2224             :         /* This starts at 1. The root page is not written until
    2225             :          * all of the others are finished
    2226             :          */
    2227        3704 :         for (i = 1; i < blob->active.num_pages; i++) {
    2228          88 :                 page = &ctx->pages[i];
    2229          88 :                 assert(page->sequence_num == i);
    2230             : 
    2231          88 :                 lba = bs_md_page_to_lba(bs, blob->active.pages[i]);
    2232             : 
    2233          88 :                 bs_batch_write_dev(batch, page, lba, lba_count);
    2234             :         }
    2235             : 
    2236        3616 :         bs_batch_close(batch);
    2237        3616 : }
    2238             : 
    2239             : static int
    2240        3576 : blob_resize(struct spdk_blob *blob, uint64_t sz)
    2241             : {
    2242             :         uint64_t        i;
    2243             :         uint64_t        *tmp;
    2244        3576 :         uint64_t        cluster;
    2245        3576 :         uint32_t        lfmd; /*  lowest free md page */
    2246             :         uint64_t        num_clusters;
    2247             :         uint32_t        *ep_tmp;
    2248        3576 :         uint64_t        new_num_ep = 0, current_num_ep = 0;
    2249             :         struct spdk_blob_store *bs;
    2250             :         int             rc;
    2251             : 
    2252        3576 :         bs = blob->bs;
    2253             : 
    2254        3576 :         blob_verify_md_op(blob);
    2255             : 
    2256        3576 :         if (blob->active.num_clusters == sz) {
    2257         456 :                 return 0;
    2258             :         }
    2259             : 
    2260        3120 :         if (blob->active.num_clusters < blob->active.cluster_array_size) {
    2261             :                 /* If this blob was resized to be larger, then smaller, then
    2262             :                  * larger without syncing, then the cluster array already
    2263             :                  * contains spare assigned clusters we can use.
    2264             :                  */
    2265           0 :                 num_clusters = spdk_min(blob->active.cluster_array_size,
    2266             :                                         sz);
    2267             :         } else {
    2268        3120 :                 num_clusters = blob->active.num_clusters;
    2269             :         }
    2270             : 
    2271        3120 :         if (blob->use_extent_table) {
    2272             :                 /* Round up since every cluster beyond current Extent Table size,
    2273             :                  * requires new extent page. */
    2274        1582 :                 new_num_ep = spdk_divide_round_up(sz, SPDK_EXTENTS_PER_EP);
    2275        1582 :                 current_num_ep = spdk_divide_round_up(num_clusters, SPDK_EXTENTS_PER_EP);
    2276             :         }
    2277             : 
    2278        3120 :         assert(!spdk_spin_held(&bs->used_lock));
    2279             : 
    2280             :         /* Check first that we have enough clusters and md pages before we start claiming them.
    2281             :          * bs->used_lock is held to ensure that clusters we think are free are still free when we go
    2282             :          * to claim them later in this function.
    2283             :          */
    2284        3120 :         if (sz > num_clusters && spdk_blob_is_thin_provisioned(blob) == false) {
    2285        1302 :                 spdk_spin_lock(&bs->used_lock);
    2286        1302 :                 if ((sz - num_clusters) > bs->num_free_clusters) {
    2287           8 :                         rc = -ENOSPC;
    2288           8 :                         goto out;
    2289             :                 }
    2290        1294 :                 lfmd = 0;
    2291        1938 :                 for (i = current_num_ep; i < new_num_ep ; i++) {
    2292         644 :                         lfmd = spdk_bit_array_find_first_clear(blob->bs->used_md_pages, lfmd);
    2293         644 :                         if (lfmd == UINT32_MAX) {
    2294             :                                 /* No more free md pages. Cannot satisfy the request */
    2295           0 :                                 rc = -ENOSPC;
    2296           0 :                                 goto out;
    2297             :                         }
    2298             :                 }
    2299             :         }
    2300             : 
    2301        3112 :         if (sz > num_clusters) {
    2302             :                 /* Expand the cluster array if necessary.
    2303             :                  * We only shrink the array when persisting.
    2304             :                  */
    2305        1706 :                 tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * sz);
    2306        1706 :                 if (sz > 0 && tmp == NULL) {
    2307           0 :                         rc = -ENOMEM;
    2308           0 :                         goto out;
    2309             :                 }
    2310        1706 :                 memset(tmp + blob->active.cluster_array_size, 0,
    2311        1706 :                        sizeof(*blob->active.clusters) * (sz - blob->active.cluster_array_size));
    2312        1706 :                 blob->active.clusters = tmp;
    2313        1706 :                 blob->active.cluster_array_size = sz;
    2314             : 
    2315             :                 /* Expand the extents table, only if enough clusters were added */
    2316        1706 :                 if (new_num_ep > current_num_ep && blob->use_extent_table) {
    2317         842 :                         ep_tmp = realloc(blob->active.extent_pages, sizeof(*blob->active.extent_pages) * new_num_ep);
    2318         842 :                         if (new_num_ep > 0 && ep_tmp == NULL) {
    2319           0 :                                 rc = -ENOMEM;
    2320           0 :                                 goto out;
    2321             :                         }
    2322         842 :                         memset(ep_tmp + blob->active.extent_pages_array_size, 0,
    2323         842 :                                sizeof(*blob->active.extent_pages) * (new_num_ep - blob->active.extent_pages_array_size));
    2324         842 :                         blob->active.extent_pages = ep_tmp;
    2325         842 :                         blob->active.extent_pages_array_size = new_num_ep;
    2326             :                 }
    2327             :         }
    2328             : 
    2329        3112 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    2330             : 
    2331        3112 :         if (spdk_blob_is_thin_provisioned(blob) == false) {
    2332        2428 :                 cluster = 0;
    2333        2428 :                 lfmd = 0;
    2334        9832 :                 for (i = num_clusters; i < sz; i++) {
    2335        7404 :                         bs_allocate_cluster(blob, i, &cluster, &lfmd, true);
    2336             :                         /* Do not increment lfmd here.  lfmd will get updated
    2337             :                          * to the md_page allocated (if any) when a new extent
    2338             :                          * page is needed.  Just pass that value again,
    2339             :                          * bs_allocate_cluster will just start at that index
    2340             :                          * to find the next free md_page when needed.
    2341             :                          */
    2342             :                 }
    2343             :         }
    2344             : 
    2345             :         /* If we are shrinking the blob, we must adjust num_allocated_clusters */
    2346     1072199 :         for (i = sz; i < num_clusters; i++) {
    2347     1069087 :                 if (blob->active.clusters[i] != 0) {
    2348        2343 :                         blob->active.num_allocated_clusters--;
    2349             :                 }
    2350             :         }
    2351             : 
    2352        3112 :         blob->active.num_clusters = sz;
    2353        3112 :         blob->active.num_extent_pages = new_num_ep;
    2354             : 
    2355        3112 :         rc = 0;
    2356        3120 : out:
    2357        3120 :         if (spdk_spin_held(&bs->used_lock)) {
    2358        1302 :                 spdk_spin_unlock(&bs->used_lock);
    2359             :         }
    2360             : 
    2361        3120 :         return rc;
    2362             : }
    2363             : 
    2364             : static void
    2365        3616 : blob_persist_generate_new_md(struct spdk_blob_persist_ctx *ctx)
    2366             : {
    2367        3616 :         spdk_bs_sequence_t *seq = ctx->seq;
    2368        3616 :         struct spdk_blob *blob = ctx->blob;
    2369        3616 :         struct spdk_blob_store *bs = blob->bs;
    2370             :         uint64_t i;
    2371             :         uint32_t page_num;
    2372             :         void *tmp;
    2373             :         int rc;
    2374             : 
    2375             :         /* Generate the new metadata */
    2376        3616 :         rc = blob_serialize(blob, &ctx->pages, &blob->active.num_pages);
    2377        3616 :         if (rc < 0) {
    2378           0 :                 blob_persist_complete(seq, ctx, rc);
    2379           0 :                 return;
    2380             :         }
    2381             : 
    2382        3616 :         assert(blob->active.num_pages >= 1);
    2383             : 
    2384             :         /* Resize the cache of page indices */
    2385        3616 :         tmp = realloc(blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
    2386        3616 :         if (!tmp) {
    2387           0 :                 blob_persist_complete(seq, ctx, -ENOMEM);
    2388           0 :                 return;
    2389             :         }
    2390        3616 :         blob->active.pages = tmp;
    2391             : 
    2392             :         /* Assign this metadata to pages. This requires two passes - one to verify that there are
    2393             :          * enough pages and a second to actually claim them. The used_lock is held across
    2394             :          * both passes to ensure things don't change in the middle.
    2395             :          */
    2396        3616 :         spdk_spin_lock(&bs->used_lock);
    2397        3616 :         page_num = 0;
    2398             :         /* Note that this loop starts at one. The first page location is fixed by the blobid. */
    2399        3704 :         for (i = 1; i < blob->active.num_pages; i++) {
    2400          88 :                 page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
    2401          88 :                 if (page_num == UINT32_MAX) {
    2402           0 :                         spdk_spin_unlock(&bs->used_lock);
    2403           0 :                         blob_persist_complete(seq, ctx, -ENOMEM);
    2404           0 :                         return;
    2405             :                 }
    2406          88 :                 page_num++;
    2407             :         }
    2408             : 
    2409        3616 :         page_num = 0;
    2410        3616 :         blob->active.pages[0] = bs_blobid_to_page(blob->id);
    2411        3704 :         for (i = 1; i < blob->active.num_pages; i++) {
    2412          88 :                 page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
    2413          88 :                 ctx->pages[i - 1].next = page_num;
    2414             :                 /* Now that previous metadata page is complete, calculate the crc for it. */
    2415          88 :                 ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
    2416          88 :                 blob->active.pages[i] = page_num;
    2417          88 :                 bs_claim_md_page(bs, page_num);
    2418          88 :                 SPDK_DEBUGLOG(blob, "Claiming page %u for blob 0x%" PRIx64 "\n", page_num,
    2419             :                               blob->id);
    2420          88 :                 page_num++;
    2421             :         }
    2422        3616 :         spdk_spin_unlock(&bs->used_lock);
    2423        3616 :         ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
    2424             :         /* Start writing the metadata from last page to first */
    2425        3616 :         blob->state = SPDK_BLOB_STATE_CLEAN;
    2426        3616 :         blob_persist_write_page_chain(seq, ctx);
    2427             : }
    2428             : 
    2429             : static void
    2430        2358 : blob_persist_write_extent_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2431             : {
    2432        2358 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2433        2358 :         struct spdk_blob                *blob = ctx->blob;
    2434             :         size_t                          i;
    2435             :         uint32_t                        extent_page_id;
    2436        2358 :         uint32_t                        page_count = 0;
    2437             :         int                             rc;
    2438             : 
    2439        2358 :         if (ctx->extent_page != NULL) {
    2440         666 :                 spdk_free(ctx->extent_page);
    2441         666 :                 ctx->extent_page = NULL;
    2442             :         }
    2443             : 
    2444        2358 :         if (bserrno != 0) {
    2445           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2446           0 :                 return;
    2447             :         }
    2448             : 
    2449             :         /* Only write out Extent Pages when blob was resized. */
    2450        4614 :         for (i = ctx->next_extent_page; i < blob->active.extent_pages_array_size; i++) {
    2451        2922 :                 extent_page_id = blob->active.extent_pages[i];
    2452        2922 :                 if (extent_page_id == 0) {
    2453             :                         /* No Extent Page to persist */
    2454        2256 :                         assert(spdk_blob_is_thin_provisioned(blob));
    2455        2256 :                         continue;
    2456             :                 }
    2457         666 :                 assert(spdk_bit_array_get(blob->bs->used_md_pages, extent_page_id));
    2458         666 :                 ctx->next_extent_page = i + 1;
    2459         666 :                 rc = blob_serialize_add_page(ctx->blob, &ctx->extent_page, &page_count, &ctx->extent_page);
    2460         666 :                 if (rc < 0) {
    2461           0 :                         blob_persist_complete(seq, ctx, rc);
    2462           0 :                         return;
    2463             :                 }
    2464             : 
    2465         666 :                 blob->state = SPDK_BLOB_STATE_DIRTY;
    2466         666 :                 blob_serialize_extent_page(blob, i * SPDK_EXTENTS_PER_EP, ctx->extent_page);
    2467             : 
    2468         666 :                 ctx->extent_page->crc = blob_md_page_calc_crc(ctx->extent_page);
    2469             : 
    2470         666 :                 bs_sequence_write_dev(seq, ctx->extent_page, bs_md_page_to_lba(blob->bs, extent_page_id),
    2471         666 :                                       bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
    2472             :                                       blob_persist_write_extent_pages, ctx);
    2473         666 :                 return;
    2474             :         }
    2475             : 
    2476        1692 :         blob_persist_generate_new_md(ctx);
    2477             : }
    2478             : 
    2479             : static void
    2480        5116 : blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2481             : {
    2482        5116 :         struct spdk_blob_persist_ctx *ctx = cb_arg;
    2483        5116 :         struct spdk_blob *blob = ctx->blob;
    2484             : 
    2485        5116 :         if (bserrno != 0) {
    2486           8 :                 blob_persist_complete(seq, ctx, bserrno);
    2487           8 :                 return;
    2488             :         }
    2489             : 
    2490        5108 :         if (blob->active.num_pages == 0) {
    2491             :                 /* This is the signal that the blob should be deleted.
    2492             :                  * Immediately jump to the clean up routine. */
    2493        1492 :                 assert(blob->clean.num_pages > 0);
    2494        1492 :                 blob->state = SPDK_BLOB_STATE_CLEAN;
    2495        1492 :                 blob_persist_zero_pages(seq, ctx, 0);
    2496        1492 :                 return;
    2497             : 
    2498             :         }
    2499             : 
    2500        3616 :         if (blob->clean.num_clusters < blob->active.num_clusters) {
    2501             :                 /* Blob was resized up */
    2502        1678 :                 assert(blob->clean.num_extent_pages <= blob->active.num_extent_pages);
    2503        1678 :                 ctx->next_extent_page = spdk_max(1, blob->clean.num_extent_pages) - 1;
    2504        1938 :         } else if (blob->active.num_clusters < blob->active.cluster_array_size) {
    2505             :                 /* Blob was resized down */
    2506          14 :                 assert(blob->clean.num_extent_pages >= blob->active.num_extent_pages);
    2507          14 :                 ctx->next_extent_page = spdk_max(1, blob->active.num_extent_pages) - 1;
    2508             :         } else {
    2509             :                 /* No change in size occurred */
    2510        1924 :                 blob_persist_generate_new_md(ctx);
    2511        1924 :                 return;
    2512             :         }
    2513             : 
    2514        1692 :         blob_persist_write_extent_pages(seq, ctx, 0);
    2515             : }
    2516             : 
    2517             : struct spdk_bs_mark_dirty {
    2518             :         struct spdk_blob_store          *bs;
    2519             :         struct spdk_bs_super_block      *super;
    2520             :         spdk_bs_sequence_cpl            cb_fn;
    2521             :         void                            *cb_arg;
    2522             : };
    2523             : 
    2524             : static void
    2525         158 : bs_mark_dirty_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2526             : {
    2527         158 :         struct spdk_bs_mark_dirty *ctx = cb_arg;
    2528             : 
    2529         158 :         if (bserrno == 0) {
    2530         150 :                 ctx->bs->clean = 0;
    2531             :         }
    2532             : 
    2533         158 :         ctx->cb_fn(seq, ctx->cb_arg, bserrno);
    2534             : 
    2535         158 :         spdk_free(ctx->super);
    2536         158 :         free(ctx);
    2537         158 : }
    2538             : 
    2539             : static void bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    2540             :                            struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg);
    2541             : 
    2542             : 
    2543             : static void
    2544         158 : bs_mark_dirty_write(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2545             : {
    2546         158 :         struct spdk_bs_mark_dirty *ctx = cb_arg;
    2547             :         int rc;
    2548             : 
    2549         158 :         if (bserrno != 0) {
    2550           4 :                 bs_mark_dirty_write_cpl(seq, ctx, bserrno);
    2551           4 :                 return;
    2552             :         }
    2553             : 
    2554         154 :         rc = bs_super_validate(ctx->super, ctx->bs);
    2555         154 :         if (rc != 0) {
    2556           0 :                 bs_mark_dirty_write_cpl(seq, ctx, rc);
    2557           0 :                 return;
    2558             :         }
    2559             : 
    2560         154 :         ctx->super->clean = 0;
    2561         154 :         if (ctx->super->size == 0) {
    2562           4 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    2563             :         }
    2564             : 
    2565         154 :         bs_write_super(seq, ctx->bs, ctx->super, bs_mark_dirty_write_cpl, ctx);
    2566             : }
    2567             : 
    2568             : static void
    2569        5550 : bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    2570             :               spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    2571             : {
    2572             :         struct spdk_bs_mark_dirty *ctx;
    2573             : 
    2574             :         /* Blobstore is already marked dirty */
    2575        5550 :         if (bs->clean == 0) {
    2576        5392 :                 cb_fn(seq, cb_arg, 0);
    2577        5392 :                 return;
    2578             :         }
    2579             : 
    2580         158 :         ctx = calloc(1, sizeof(*ctx));
    2581         158 :         if (!ctx) {
    2582           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2583           0 :                 return;
    2584             :         }
    2585         158 :         ctx->bs = bs;
    2586         158 :         ctx->cb_fn = cb_fn;
    2587         158 :         ctx->cb_arg = cb_arg;
    2588             : 
    2589         158 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    2590             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    2591         158 :         if (!ctx->super) {
    2592           0 :                 free(ctx);
    2593           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2594           0 :                 return;
    2595             :         }
    2596             : 
    2597         158 :         bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
    2598         158 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    2599             :                              bs_mark_dirty_write, ctx);
    2600             : }
    2601             : 
    2602             : /* Write a blob to disk */
    2603             : static void
    2604        9144 : blob_persist(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
    2605             :              spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    2606             : {
    2607             :         struct spdk_blob_persist_ctx *ctx;
    2608             : 
    2609        9144 :         blob_verify_md_op(blob);
    2610             : 
    2611        9144 :         if (blob->state == SPDK_BLOB_STATE_CLEAN && TAILQ_EMPTY(&blob->persists_to_complete)) {
    2612        4028 :                 cb_fn(seq, cb_arg, 0);
    2613        4028 :                 return;
    2614             :         }
    2615             : 
    2616        5116 :         ctx = calloc(1, sizeof(*ctx));
    2617        5116 :         if (!ctx) {
    2618           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2619           0 :                 return;
    2620             :         }
    2621        5116 :         ctx->blob = blob;
    2622        5116 :         ctx->seq = seq;
    2623        5116 :         ctx->cb_fn = cb_fn;
    2624        5116 :         ctx->cb_arg = cb_arg;
    2625             : 
    2626             :         /* Multiple blob persists can affect one another, via blob->state or
    2627             :          * blob mutable data changes. To prevent it, queue up the persists. */
    2628        5116 :         if (!TAILQ_EMPTY(&blob->persists_to_complete)) {
    2629          23 :                 TAILQ_INSERT_TAIL(&blob->pending_persists, ctx, link);
    2630          23 :                 return;
    2631             :         }
    2632        5093 :         TAILQ_INSERT_HEAD(&blob->persists_to_complete, ctx, link);
    2633             : 
    2634        5093 :         bs_mark_dirty(seq, blob->bs, blob_persist_start, ctx);
    2635             : }
    2636             : 
    2637             : struct spdk_blob_copy_cluster_ctx {
    2638             :         struct spdk_blob *blob;
    2639             :         uint8_t *buf;
    2640             :         uint64_t page;
    2641             :         uint64_t new_cluster;
    2642             :         uint32_t new_extent_page;
    2643             :         spdk_bs_sequence_t *seq;
    2644             :         struct spdk_blob_md_page *new_cluster_page;
    2645             : };
    2646             : 
    2647             : struct spdk_blob_free_cluster_ctx {
    2648             :         struct spdk_blob *blob;
    2649             :         uint64_t page;
    2650             :         struct spdk_blob_md_page *md_page;
    2651             :         uint64_t cluster_num;
    2652             :         uint32_t extent_page;
    2653             :         spdk_bs_sequence_t *seq;
    2654             : };
    2655             : 
    2656             : static void
    2657         812 : blob_allocate_and_copy_cluster_cpl(void *cb_arg, int bserrno)
    2658             : {
    2659         812 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2660         812 :         struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)ctx->seq;
    2661         812 :         TAILQ_HEAD(, spdk_bs_request_set) requests;
    2662             :         spdk_bs_user_op_t *op;
    2663             : 
    2664         812 :         TAILQ_INIT(&requests);
    2665         812 :         TAILQ_SWAP(&set->channel->need_cluster_alloc, &requests, spdk_bs_request_set, link);
    2666             : 
    2667        1624 :         while (!TAILQ_EMPTY(&requests)) {
    2668         812 :                 op = TAILQ_FIRST(&requests);
    2669         812 :                 TAILQ_REMOVE(&requests, op, link);
    2670         812 :                 if (bserrno == 0) {
    2671         812 :                         bs_user_op_execute(op);
    2672             :                 } else {
    2673           0 :                         bs_user_op_abort(op, bserrno);
    2674             :                 }
    2675             :         }
    2676             : 
    2677         812 :         spdk_free(ctx->buf);
    2678         812 :         free(ctx);
    2679         812 : }
    2680             : 
    2681             : static void
    2682          60 : blob_free_cluster_cpl(void *cb_arg, int bserrno)
    2683             : {
    2684          60 :         struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
    2685          60 :         spdk_bs_sequence_t *seq = ctx->seq;
    2686             : 
    2687          60 :         bs_sequence_finish(seq, bserrno);
    2688             : 
    2689          60 :         free(ctx);
    2690          60 : }
    2691             : 
    2692             : static void
    2693           4 : blob_insert_cluster_revert(struct spdk_blob_copy_cluster_ctx *ctx)
    2694             : {
    2695           4 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    2696           4 :         bs_release_cluster(ctx->blob->bs, ctx->new_cluster);
    2697           4 :         if (ctx->new_extent_page != 0) {
    2698           2 :                 bs_release_md_page(ctx->blob->bs, ctx->new_extent_page);
    2699             :         }
    2700           4 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    2701           4 : }
    2702             : 
    2703             : static void
    2704           4 : blob_insert_cluster_clear_cpl(void *cb_arg, int bserrno)
    2705             : {
    2706           4 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2707             : 
    2708           4 :         if (bserrno) {
    2709           0 :                 SPDK_WARNLOG("Failed to clear cluster: %d\n", bserrno);
    2710             :         }
    2711             : 
    2712           4 :         blob_insert_cluster_revert(ctx);
    2713           4 :         bs_sequence_finish(ctx->seq, bserrno);
    2714           4 : }
    2715             : 
    2716             : static void
    2717           4 : blob_insert_cluster_clear(struct spdk_blob_copy_cluster_ctx *ctx)
    2718             : {
    2719           4 :         struct spdk_bs_cpl cpl;
    2720             :         spdk_bs_batch_t *batch;
    2721           4 :         struct spdk_io_channel *ch = spdk_io_channel_from_ctx(ctx->seq->channel);
    2722             : 
    2723             :         /*
    2724             :          * We allocated a cluster and we copied data to it. But now, we realized that we don't need
    2725             :          * this cluster and we want to release it. We must ensure that we clear the data on this
    2726             :          * cluster.
    2727             :          * The cluster may later be re-allocated by a thick-provisioned blob for example. When
    2728             :          * reading from this thick-provisioned blob before writing data, we should read zeroes.
    2729             :          */
    2730             : 
    2731           4 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    2732           4 :         cpl.u.blob_basic.cb_fn = blob_insert_cluster_clear_cpl;
    2733           4 :         cpl.u.blob_basic.cb_arg = ctx;
    2734             : 
    2735           4 :         batch = bs_batch_open(ch, &cpl, ctx->blob);
    2736           4 :         if (!batch) {
    2737           0 :                 blob_insert_cluster_clear_cpl(ctx, -ENOMEM);
    2738           0 :                 return;
    2739             :         }
    2740             : 
    2741           4 :         bs_batch_clear_dev(ctx->blob, batch, bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
    2742           4 :                            bs_cluster_to_lba(ctx->blob->bs, 1));
    2743           4 :         bs_batch_close(batch);
    2744             : }
    2745             : 
    2746             : static void
    2747         812 : blob_insert_cluster_cpl(void *cb_arg, int bserrno)
    2748             : {
    2749         812 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2750             : 
    2751         812 :         if (bserrno) {
    2752           4 :                 if (bserrno == -EEXIST) {
    2753             :                         /* The metadata insert failed because another thread
    2754             :                          * allocated the cluster first. Clear and free our cluster
    2755             :                          * but continue without error. */
    2756           4 :                         blob_insert_cluster_clear(ctx);
    2757           4 :                         return;
    2758             :                 }
    2759             : 
    2760           0 :                 blob_insert_cluster_revert(ctx);
    2761             :         }
    2762             : 
    2763         808 :         bs_sequence_finish(ctx->seq, bserrno);
    2764             : }
    2765             : 
    2766             : static void
    2767         408 : blob_write_copy_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2768             : {
    2769         408 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2770             :         uint32_t cluster_number;
    2771             : 
    2772         408 :         if (bserrno) {
    2773             :                 /* The write failed, so jump to the final completion handler */
    2774           0 :                 bs_sequence_finish(seq, bserrno);
    2775           0 :                 return;
    2776             :         }
    2777             : 
    2778         408 :         cluster_number = bs_page_to_cluster(ctx->blob->bs, ctx->page);
    2779             : 
    2780         408 :         blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
    2781             :                                          ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
    2782             : }
    2783             : 
    2784             : static void
    2785         280 : blob_write_copy(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2786             : {
    2787         280 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2788             : 
    2789         280 :         if (bserrno != 0) {
    2790             :                 /* The read failed, so jump to the final completion handler */
    2791           0 :                 bs_sequence_finish(seq, bserrno);
    2792           0 :                 return;
    2793             :         }
    2794             : 
    2795             :         /* Write whole cluster */
    2796         280 :         bs_sequence_write_dev(seq, ctx->buf,
    2797         280 :                               bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
    2798         280 :                               bs_cluster_to_lba(ctx->blob->bs, 1),
    2799             :                               blob_write_copy_cpl, ctx);
    2800             : }
    2801             : 
    2802             : static bool
    2803         796 : blob_can_copy(struct spdk_blob *blob, uint32_t cluster_start_page, uint64_t *base_lba)
    2804             : {
    2805         796 :         uint64_t lba = bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page);
    2806             : 
    2807        1146 :         return (!blob_is_esnap_clone(blob) && blob->bs->dev->copy != NULL) &&
    2808         350 :                blob->back_bs_dev->translate_lba(blob->back_bs_dev, lba, base_lba);
    2809             : }
    2810             : 
    2811             : static void
    2812         128 : blob_copy(struct spdk_blob_copy_cluster_ctx *ctx, spdk_bs_user_op_t *op, uint64_t src_lba)
    2813             : {
    2814         128 :         struct spdk_blob *blob = ctx->blob;
    2815         128 :         uint64_t lba_count = bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz);
    2816             : 
    2817         128 :         bs_sequence_copy_dev(ctx->seq,
    2818         128 :                              bs_cluster_to_lba(blob->bs, ctx->new_cluster),
    2819             :                              src_lba,
    2820             :                              lba_count,
    2821             :                              blob_write_copy_cpl, ctx);
    2822         128 : }
    2823             : 
    2824             : static void
    2825         812 : bs_allocate_and_copy_cluster(struct spdk_blob *blob,
    2826             :                              struct spdk_io_channel *_ch,
    2827             :                              uint64_t io_unit, spdk_bs_user_op_t *op)
    2828             : {
    2829         812 :         struct spdk_bs_cpl cpl;
    2830             :         struct spdk_bs_channel *ch;
    2831             :         struct spdk_blob_copy_cluster_ctx *ctx;
    2832             :         uint32_t cluster_start_page;
    2833             :         uint32_t cluster_number;
    2834             :         bool is_zeroes;
    2835             :         bool can_copy;
    2836             :         bool is_valid_range;
    2837         812 :         uint64_t copy_src_lba;
    2838             :         int rc;
    2839             : 
    2840         812 :         ch = spdk_io_channel_get_ctx(_ch);
    2841             : 
    2842         812 :         if (!TAILQ_EMPTY(&ch->need_cluster_alloc)) {
    2843             :                 /* There are already operations pending. Queue this user op
    2844             :                  * and return because it will be re-executed when the outstanding
    2845             :                  * cluster allocation completes. */
    2846           0 :                 TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
    2847           0 :                 return;
    2848             :         }
    2849             : 
    2850             :         /* Round the io_unit offset down to the first page in the cluster */
    2851         812 :         cluster_start_page = bs_io_unit_to_cluster_start(blob, io_unit);
    2852             : 
    2853             :         /* Calculate which index in the metadata cluster array the corresponding
    2854             :          * cluster is supposed to be at. */
    2855         812 :         cluster_number = bs_io_unit_to_cluster_number(blob, io_unit);
    2856             : 
    2857         812 :         ctx = calloc(1, sizeof(*ctx));
    2858         812 :         if (!ctx) {
    2859           0 :                 bs_user_op_abort(op, -ENOMEM);
    2860           0 :                 return;
    2861             :         }
    2862             : 
    2863         812 :         assert(blob->bs->cluster_sz % blob->back_bs_dev->blocklen == 0);
    2864             : 
    2865         812 :         ctx->blob = blob;
    2866         812 :         ctx->page = cluster_start_page;
    2867         812 :         ctx->new_cluster_page = ch->new_cluster_page;
    2868         812 :         memset(ctx->new_cluster_page, 0, SPDK_BS_PAGE_SIZE);
    2869             : 
    2870             :         /* Check if the cluster that we intend to do CoW for is valid for
    2871             :          * the backing dev. For zeroes backing dev, it'll be always valid.
    2872             :          * For other backing dev e.g. a snapshot, it could be invalid if
    2873             :          * the blob has been resized after snapshot was taken. */
    2874         812 :         is_valid_range = blob->back_bs_dev->is_range_valid(blob->back_bs_dev,
    2875             :                          bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
    2876         812 :                          bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
    2877             : 
    2878         812 :         can_copy = is_valid_range && blob_can_copy(blob, cluster_start_page, &copy_src_lba);
    2879             : 
    2880        1608 :         is_zeroes = is_valid_range && blob->back_bs_dev->is_zeroes(blob->back_bs_dev,
    2881             :                         bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
    2882         796 :                         bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
    2883         812 :         if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes && !can_copy) {
    2884         280 :                 ctx->buf = spdk_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen,
    2885             :                                        NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    2886         280 :                 if (!ctx->buf) {
    2887           0 :                         SPDK_ERRLOG("DMA allocation for cluster of size = %" PRIu32 " failed.\n",
    2888             :                                     blob->bs->cluster_sz);
    2889           0 :                         free(ctx);
    2890           0 :                         bs_user_op_abort(op, -ENOMEM);
    2891           0 :                         return;
    2892             :                 }
    2893             :         }
    2894             : 
    2895         812 :         spdk_spin_lock(&blob->bs->used_lock);
    2896         812 :         rc = bs_allocate_cluster(blob, cluster_number, &ctx->new_cluster, &ctx->new_extent_page,
    2897             :                                  false);
    2898         812 :         spdk_spin_unlock(&blob->bs->used_lock);
    2899         812 :         if (rc != 0) {
    2900           0 :                 spdk_free(ctx->buf);
    2901           0 :                 free(ctx);
    2902           0 :                 bs_user_op_abort(op, rc);
    2903           0 :                 return;
    2904             :         }
    2905             : 
    2906         812 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    2907         812 :         cpl.u.blob_basic.cb_fn = blob_allocate_and_copy_cluster_cpl;
    2908         812 :         cpl.u.blob_basic.cb_arg = ctx;
    2909             : 
    2910         812 :         ctx->seq = bs_sequence_start_blob(_ch, &cpl, blob);
    2911         812 :         if (!ctx->seq) {
    2912           0 :                 spdk_spin_lock(&blob->bs->used_lock);
    2913           0 :                 bs_release_cluster(blob->bs, ctx->new_cluster);
    2914           0 :                 spdk_spin_unlock(&blob->bs->used_lock);
    2915           0 :                 spdk_free(ctx->buf);
    2916           0 :                 free(ctx);
    2917           0 :                 bs_user_op_abort(op, -ENOMEM);
    2918           0 :                 return;
    2919             :         }
    2920             : 
    2921             :         /* Queue the user op to block other incoming operations */
    2922         812 :         TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
    2923             : 
    2924         812 :         if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes) {
    2925         408 :                 if (can_copy) {
    2926         128 :                         blob_copy(ctx, op, copy_src_lba);
    2927             :                 } else {
    2928             :                         /* Read cluster from backing device */
    2929         280 :                         bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf,
    2930             :                                                 bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
    2931         280 :                                                 bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz),
    2932             :                                                 blob_write_copy, ctx);
    2933             :                 }
    2934             : 
    2935             :         } else {
    2936         404 :                 blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
    2937             :                                                  ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
    2938             :         }
    2939             : }
    2940             : 
    2941             : static inline bool
    2942       40206 : blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint64_t length,
    2943             :                                  uint64_t *lba, uint64_t *lba_count)
    2944             : {
    2945       40206 :         *lba_count = length;
    2946             : 
    2947       40206 :         if (!bs_io_unit_is_allocated(blob, io_unit)) {
    2948        2992 :                 assert(blob->back_bs_dev != NULL);
    2949        2992 :                 *lba = bs_io_unit_to_back_dev_lba(blob, io_unit);
    2950        2992 :                 *lba_count = bs_io_unit_to_back_dev_lba(blob, *lba_count);
    2951        2992 :                 return false;
    2952             :         } else {
    2953       37214 :                 *lba = bs_blob_io_unit_to_lba(blob, io_unit);
    2954       37214 :                 return true;
    2955             :         }
    2956             : }
    2957             : 
    2958             : struct op_split_ctx {
    2959             :         struct spdk_blob *blob;
    2960             :         struct spdk_io_channel *channel;
    2961             :         uint64_t io_unit_offset;
    2962             :         uint64_t io_units_remaining;
    2963             :         void *curr_payload;
    2964             :         enum spdk_blob_op_type op_type;
    2965             :         spdk_bs_sequence_t *seq;
    2966             :         bool in_submit_ctx;
    2967             :         bool completed_in_submit_ctx;
    2968             :         bool done;
    2969             : };
    2970             : 
    2971             : static void
    2972         774 : blob_request_submit_op_split_next(void *cb_arg, int bserrno)
    2973             : {
    2974         774 :         struct op_split_ctx     *ctx = cb_arg;
    2975         774 :         struct spdk_blob        *blob = ctx->blob;
    2976         774 :         struct spdk_io_channel  *ch = ctx->channel;
    2977         774 :         enum spdk_blob_op_type  op_type = ctx->op_type;
    2978             :         uint8_t                 *buf;
    2979             :         uint64_t                offset;
    2980             :         uint64_t                length;
    2981             :         uint64_t                op_length;
    2982             : 
    2983         774 :         if (bserrno != 0 || ctx->io_units_remaining == 0) {
    2984         178 :                 bs_sequence_finish(ctx->seq, bserrno);
    2985         178 :                 if (ctx->in_submit_ctx) {
    2986             :                         /* Defer freeing of the ctx object, since it will be
    2987             :                          * accessed when this unwinds back to the submisison
    2988             :                          * context.
    2989             :                          */
    2990          40 :                         ctx->done = true;
    2991             :                 } else {
    2992         138 :                         free(ctx);
    2993             :                 }
    2994         178 :                 return;
    2995             :         }
    2996             : 
    2997         596 :         if (ctx->in_submit_ctx) {
    2998             :                 /* If this split operation completed in the context
    2999             :                  * of its submission, mark the flag and return immediately
    3000             :                  * to avoid recursion.
    3001             :                  */
    3002          68 :                 ctx->completed_in_submit_ctx = true;
    3003          68 :                 return;
    3004             :         }
    3005             : 
    3006             :         while (true) {
    3007         596 :                 ctx->completed_in_submit_ctx = false;
    3008             : 
    3009         596 :                 offset = ctx->io_unit_offset;
    3010         596 :                 length = ctx->io_units_remaining;
    3011         596 :                 buf = ctx->curr_payload;
    3012         596 :                 op_length = spdk_min(length, bs_num_io_units_to_cluster_boundary(blob,
    3013             :                                      offset));
    3014             : 
    3015             :                 /* Update length and payload for next operation */
    3016         596 :                 ctx->io_units_remaining -= op_length;
    3017         596 :                 ctx->io_unit_offset += op_length;
    3018         596 :                 if (op_type == SPDK_BLOB_WRITE || op_type == SPDK_BLOB_READ) {
    3019         528 :                         ctx->curr_payload += op_length * blob->bs->io_unit_size;
    3020             :                 }
    3021             : 
    3022         596 :                 assert(!ctx->in_submit_ctx);
    3023         596 :                 ctx->in_submit_ctx = true;
    3024             : 
    3025         596 :                 switch (op_type) {
    3026         418 :                 case SPDK_BLOB_READ:
    3027         418 :                         spdk_blob_io_read(blob, ch, buf, offset, op_length,
    3028             :                                           blob_request_submit_op_split_next, ctx);
    3029         418 :                         break;
    3030         110 :                 case SPDK_BLOB_WRITE:
    3031         110 :                         spdk_blob_io_write(blob, ch, buf, offset, op_length,
    3032             :                                            blob_request_submit_op_split_next, ctx);
    3033         110 :                         break;
    3034          36 :                 case SPDK_BLOB_UNMAP:
    3035          36 :                         spdk_blob_io_unmap(blob, ch, offset, op_length,
    3036             :                                            blob_request_submit_op_split_next, ctx);
    3037          36 :                         break;
    3038          32 :                 case SPDK_BLOB_WRITE_ZEROES:
    3039          32 :                         spdk_blob_io_write_zeroes(blob, ch, offset, op_length,
    3040             :                                                   blob_request_submit_op_split_next, ctx);
    3041          32 :                         break;
    3042           0 :                 case SPDK_BLOB_READV:
    3043             :                 case SPDK_BLOB_WRITEV:
    3044           0 :                         SPDK_ERRLOG("readv/write not valid\n");
    3045           0 :                         bs_sequence_finish(ctx->seq, -EINVAL);
    3046           0 :                         free(ctx);
    3047           0 :                         return;
    3048             :                 }
    3049             : 
    3050             : #ifndef __clang_analyzer__
    3051             :                 /* scan-build reports a false positive around accessing the ctx here. It
    3052             :                  * forms a path that recursively calls this function, but then says
    3053             :                  * "assuming ctx->in_submit_ctx is false", when that isn't possible.
    3054             :                  * This path does free(ctx), returns to here, and reports a use-after-free
    3055             :                  * bug.  Wrapping this bit of code so that scan-build doesn't see it
    3056             :                  * works around the scan-build bug.
    3057             :                  */
    3058         596 :                 assert(ctx->in_submit_ctx);
    3059         596 :                 ctx->in_submit_ctx = false;
    3060             : 
    3061             :                 /* If the operation completed immediately, loop back and submit the
    3062             :                  * next operation.  Otherwise we can return and the next split
    3063             :                  * operation will get submitted when this current operation is
    3064             :                  * later completed asynchronously.
    3065             :                  */
    3066         596 :                 if (ctx->completed_in_submit_ctx) {
    3067          68 :                         continue;
    3068         528 :                 } else if (ctx->done) {
    3069          40 :                         free(ctx);
    3070             :                 }
    3071             : #endif
    3072         528 :                 break;
    3073             :         }
    3074             : }
    3075             : 
    3076             : static void
    3077         178 : blob_request_submit_op_split(struct spdk_io_channel *ch, struct spdk_blob *blob,
    3078             :                              void *payload, uint64_t offset, uint64_t length,
    3079             :                              spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3080             : {
    3081             :         struct op_split_ctx *ctx;
    3082             :         spdk_bs_sequence_t *seq;
    3083         178 :         struct spdk_bs_cpl cpl;
    3084             : 
    3085         178 :         assert(blob != NULL);
    3086             : 
    3087         178 :         ctx = calloc(1, sizeof(struct op_split_ctx));
    3088         178 :         if (ctx == NULL) {
    3089           0 :                 cb_fn(cb_arg, -ENOMEM);
    3090           0 :                 return;
    3091             :         }
    3092             : 
    3093         178 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3094         178 :         cpl.u.blob_basic.cb_fn = cb_fn;
    3095         178 :         cpl.u.blob_basic.cb_arg = cb_arg;
    3096             : 
    3097         178 :         seq = bs_sequence_start_blob(ch, &cpl, blob);
    3098         178 :         if (!seq) {
    3099           0 :                 free(ctx);
    3100           0 :                 cb_fn(cb_arg, -ENOMEM);
    3101           0 :                 return;
    3102             :         }
    3103             : 
    3104         178 :         ctx->blob = blob;
    3105         178 :         ctx->channel = ch;
    3106         178 :         ctx->curr_payload = payload;
    3107         178 :         ctx->io_unit_offset = offset;
    3108         178 :         ctx->io_units_remaining = length;
    3109         178 :         ctx->op_type = op_type;
    3110         178 :         ctx->seq = seq;
    3111             : 
    3112         178 :         blob_request_submit_op_split_next(ctx, 0);
    3113             : }
    3114             : 
    3115             : static void
    3116          60 : spdk_free_cluster_unmap_complete(void *cb_arg, int bserrno)
    3117             : {
    3118          60 :         struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
    3119             : 
    3120          60 :         if (bserrno) {
    3121           0 :                 bs_sequence_finish(ctx->seq, bserrno);
    3122           0 :                 free(ctx);
    3123           0 :                 return;
    3124             :         }
    3125             : 
    3126          60 :         blob_free_cluster_on_md_thread(ctx->blob, ctx->cluster_num,
    3127             :                                        ctx->extent_page, ctx->md_page, blob_free_cluster_cpl, ctx);
    3128             : }
    3129             : 
    3130             : static void
    3131       37834 : blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blob,
    3132             :                               void *payload, uint64_t offset, uint64_t length,
    3133             :                               spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3134             : {
    3135       37834 :         struct spdk_bs_cpl cpl;
    3136       37834 :         uint64_t lba;
    3137       37834 :         uint64_t lba_count;
    3138             :         bool is_allocated;
    3139             : 
    3140       37834 :         assert(blob != NULL);
    3141             : 
    3142       37834 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3143       37834 :         cpl.u.blob_basic.cb_fn = cb_fn;
    3144       37834 :         cpl.u.blob_basic.cb_arg = cb_arg;
    3145             : 
    3146       37834 :         if (blob->frozen_refcnt) {
    3147             :                 /* This blob I/O is frozen */
    3148             :                 spdk_bs_user_op_t *op;
    3149           4 :                 struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
    3150             : 
    3151           4 :                 op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
    3152           4 :                 if (!op) {
    3153           0 :                         cb_fn(cb_arg, -ENOMEM);
    3154           0 :                         return;
    3155             :                 }
    3156             : 
    3157           4 :                 TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
    3158             : 
    3159           4 :                 return;
    3160             :         }
    3161             : 
    3162       37830 :         is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
    3163             : 
    3164       37830 :         switch (op_type) {
    3165       16887 :         case SPDK_BLOB_READ: {
    3166             :                 spdk_bs_batch_t *batch;
    3167             : 
    3168       16887 :                 batch = bs_batch_open(_ch, &cpl, blob);
    3169       16887 :                 if (!batch) {
    3170           0 :                         cb_fn(cb_arg, -ENOMEM);
    3171           0 :                         return;
    3172             :                 }
    3173             : 
    3174       16887 :                 if (is_allocated) {
    3175             :                         /* Read from the blob */
    3176       15799 :                         bs_batch_read_dev(batch, payload, lba, lba_count);
    3177             :                 } else {
    3178             :                         /* Read from the backing block device */
    3179        1088 :                         bs_batch_read_bs_dev(batch, blob->back_bs_dev, payload, lba, lba_count);
    3180             :                 }
    3181             : 
    3182       16887 :                 bs_batch_close(batch);
    3183       16887 :                 break;
    3184             :         }
    3185       20851 :         case SPDK_BLOB_WRITE:
    3186             :         case SPDK_BLOB_WRITE_ZEROES: {
    3187       20851 :                 if (is_allocated) {
    3188             :                         /* Write to the blob */
    3189             :                         spdk_bs_batch_t *batch;
    3190             : 
    3191       20507 :                         if (lba_count == 0) {
    3192           0 :                                 cb_fn(cb_arg, 0);
    3193           0 :                                 return;
    3194             :                         }
    3195             : 
    3196       20507 :                         batch = bs_batch_open(_ch, &cpl, blob);
    3197       20507 :                         if (!batch) {
    3198           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3199           0 :                                 return;
    3200             :                         }
    3201             : 
    3202       20507 :                         if (op_type == SPDK_BLOB_WRITE) {
    3203       20475 :                                 bs_batch_write_dev(batch, payload, lba, lba_count);
    3204             :                         } else {
    3205          32 :                                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    3206             :                         }
    3207             : 
    3208       20507 :                         bs_batch_close(batch);
    3209             :                 } else {
    3210             :                         /* Queue this operation and allocate the cluster */
    3211             :                         spdk_bs_user_op_t *op;
    3212             : 
    3213         344 :                         op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
    3214         344 :                         if (!op) {
    3215           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3216           0 :                                 return;
    3217             :                         }
    3218             : 
    3219         344 :                         bs_allocate_and_copy_cluster(blob, _ch, offset, op);
    3220             :                 }
    3221       20851 :                 break;
    3222             :         }
    3223          92 :         case SPDK_BLOB_UNMAP: {
    3224          92 :                 struct spdk_blob_free_cluster_ctx *ctx = NULL;
    3225             :                 spdk_bs_batch_t *batch;
    3226             : 
    3227             :                 /* if aligned with cluster release cluster */
    3228         160 :                 if (spdk_blob_is_thin_provisioned(blob) && is_allocated &&
    3229          68 :                     bs_io_units_per_cluster(blob) == length) {
    3230          60 :                         struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
    3231             :                         uint32_t cluster_start_page;
    3232             :                         uint32_t cluster_number;
    3233             : 
    3234          60 :                         assert(offset % bs_io_units_per_cluster(blob) == 0);
    3235             : 
    3236             :                         /* Round the io_unit offset down to the first page in the cluster */
    3237          60 :                         cluster_start_page = bs_io_unit_to_cluster_start(blob, offset);
    3238             : 
    3239             :                         /* Calculate which index in the metadata cluster array the corresponding
    3240             :                          * cluster is supposed to be at. */
    3241          60 :                         cluster_number = bs_io_unit_to_cluster_number(blob, offset);
    3242             : 
    3243          60 :                         ctx = calloc(1, sizeof(*ctx));
    3244          60 :                         if (!ctx) {
    3245           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3246           0 :                                 return;
    3247             :                         }
    3248             :                         /* When freeing a cluster the flow should be (in order):
    3249             :                          * 1. Unmap the underlying area (so if the cluster is reclaimed in the future, it won't leak
    3250             :                          * old data)
    3251             :                          * 2. Once the unmap completes (to avoid any races with incoming writes that may claim the
    3252             :                          * cluster), update and sync metadata freeing the cluster
    3253             :                          * 3. Once metadata update is done, complete the user unmap request
    3254             :                          */
    3255          60 :                         ctx->blob = blob;
    3256          60 :                         ctx->page = cluster_start_page;
    3257          60 :                         ctx->cluster_num = cluster_number;
    3258          60 :                         ctx->md_page = bs_channel->new_cluster_page;
    3259          60 :                         ctx->seq = bs_sequence_start_bs(_ch, &cpl);
    3260          60 :                         if (!ctx->seq) {
    3261           0 :                                 free(ctx);
    3262           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3263           0 :                                 return;
    3264             :                         }
    3265             : 
    3266          60 :                         if (blob->use_extent_table) {
    3267          30 :                                 ctx->extent_page = *bs_cluster_to_extent_page(blob, cluster_number);
    3268             :                         }
    3269             : 
    3270          60 :                         cpl.u.blob_basic.cb_fn = spdk_free_cluster_unmap_complete;
    3271          60 :                         cpl.u.blob_basic.cb_arg = ctx;
    3272             :                 }
    3273             : 
    3274          92 :                 batch = bs_batch_open(_ch, &cpl, blob);
    3275          92 :                 if (!batch) {
    3276           0 :                         free(ctx);
    3277           0 :                         cb_fn(cb_arg, -ENOMEM);
    3278           0 :                         return;
    3279             :                 }
    3280             : 
    3281          92 :                 if (is_allocated) {
    3282          92 :                         bs_batch_unmap_dev(batch, lba, lba_count);
    3283             :                 }
    3284             : 
    3285          92 :                 bs_batch_close(batch);
    3286          92 :                 break;
    3287             :         }
    3288           0 :         case SPDK_BLOB_READV:
    3289             :         case SPDK_BLOB_WRITEV:
    3290           0 :                 SPDK_ERRLOG("readv/write not valid\n");
    3291           0 :                 cb_fn(cb_arg, -EINVAL);
    3292           0 :                 break;
    3293             :         }
    3294             : }
    3295             : 
    3296             : static void
    3297       38524 : blob_request_submit_op(struct spdk_blob *blob, struct spdk_io_channel *_channel,
    3298             :                        void *payload, uint64_t offset, uint64_t length,
    3299             :                        spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3300             : {
    3301       38524 :         assert(blob != NULL);
    3302             : 
    3303       38524 :         if (blob->data_ro && op_type != SPDK_BLOB_READ) {
    3304           4 :                 cb_fn(cb_arg, -EPERM);
    3305           4 :                 return;
    3306             :         }
    3307             : 
    3308       38520 :         if (length == 0) {
    3309         492 :                 cb_fn(cb_arg, 0);
    3310         492 :                 return;
    3311             :         }
    3312             : 
    3313       38028 :         if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
    3314          24 :                 cb_fn(cb_arg, -EINVAL);
    3315          24 :                 return;
    3316             :         }
    3317       38004 :         if (length <= bs_num_io_units_to_cluster_boundary(blob, offset)) {
    3318       37826 :                 blob_request_submit_op_single(_channel, blob, payload, offset, length,
    3319             :                                               cb_fn, cb_arg, op_type);
    3320             :         } else {
    3321         178 :                 blob_request_submit_op_split(_channel, blob, payload, offset, length,
    3322             :                                              cb_fn, cb_arg, op_type);
    3323             :         }
    3324             : }
    3325             : 
    3326             : struct rw_iov_ctx {
    3327             :         struct spdk_blob *blob;
    3328             :         struct spdk_io_channel *channel;
    3329             :         spdk_blob_op_complete cb_fn;
    3330             :         void *cb_arg;
    3331             :         bool read;
    3332             :         int iovcnt;
    3333             :         struct iovec *orig_iov;
    3334             :         uint64_t io_unit_offset;
    3335             :         uint64_t io_units_remaining;
    3336             :         uint64_t io_units_done;
    3337             :         struct spdk_blob_ext_io_opts *ext_io_opts;
    3338             :         struct iovec iov[0];
    3339             : };
    3340             : 
    3341             : static void
    3342        2360 : rw_iov_done(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    3343             : {
    3344        2360 :         assert(cb_arg == NULL);
    3345        2360 :         bs_sequence_finish(seq, bserrno);
    3346        2360 : }
    3347             : 
    3348             : static void
    3349         744 : rw_iov_split_next(void *cb_arg, int bserrno)
    3350             : {
    3351         744 :         struct rw_iov_ctx *ctx = cb_arg;
    3352         744 :         struct spdk_blob *blob = ctx->blob;
    3353             :         struct iovec *iov, *orig_iov;
    3354             :         int iovcnt;
    3355             :         size_t orig_iovoff;
    3356             :         uint64_t io_units_count, io_units_to_boundary, io_unit_offset;
    3357             :         uint64_t byte_count;
    3358             : 
    3359         744 :         if (bserrno != 0 || ctx->io_units_remaining == 0) {
    3360         204 :                 ctx->cb_fn(ctx->cb_arg, bserrno);
    3361         204 :                 free(ctx);
    3362         204 :                 return;
    3363             :         }
    3364             : 
    3365         540 :         io_unit_offset = ctx->io_unit_offset;
    3366         540 :         io_units_to_boundary = bs_num_io_units_to_cluster_boundary(blob, io_unit_offset);
    3367         540 :         io_units_count = spdk_min(ctx->io_units_remaining, io_units_to_boundary);
    3368             :         /*
    3369             :          * Get index and offset into the original iov array for our current position in the I/O sequence.
    3370             :          *  byte_count will keep track of how many bytes remaining until orig_iov and orig_iovoff will
    3371             :          *  point to the current position in the I/O sequence.
    3372             :          */
    3373         540 :         byte_count = ctx->io_units_done * blob->bs->io_unit_size;
    3374         540 :         orig_iov = &ctx->orig_iov[0];
    3375         540 :         orig_iovoff = 0;
    3376        1148 :         while (byte_count > 0) {
    3377         608 :                 if (byte_count >= orig_iov->iov_len) {
    3378         352 :                         byte_count -= orig_iov->iov_len;
    3379         352 :                         orig_iov++;
    3380             :                 } else {
    3381         256 :                         orig_iovoff = byte_count;
    3382         256 :                         byte_count = 0;
    3383             :                 }
    3384             :         }
    3385             : 
    3386             :         /*
    3387             :          * Build an iov array for the next I/O in the sequence.  byte_count will keep track of how many
    3388             :          *  bytes of this next I/O remain to be accounted for in the new iov array.
    3389             :          */
    3390         540 :         byte_count = io_units_count * blob->bs->io_unit_size;
    3391         540 :         iov = &ctx->iov[0];
    3392         540 :         iovcnt = 0;
    3393        1380 :         while (byte_count > 0) {
    3394         840 :                 assert(iovcnt < ctx->iovcnt);
    3395         840 :                 iov->iov_len = spdk_min(byte_count, orig_iov->iov_len - orig_iovoff);
    3396         840 :                 iov->iov_base = orig_iov->iov_base + orig_iovoff;
    3397         840 :                 byte_count -= iov->iov_len;
    3398         840 :                 orig_iovoff = 0;
    3399         840 :                 orig_iov++;
    3400         840 :                 iov++;
    3401         840 :                 iovcnt++;
    3402             :         }
    3403             : 
    3404         540 :         ctx->io_unit_offset += io_units_count;
    3405         540 :         ctx->io_units_remaining -= io_units_count;
    3406         540 :         ctx->io_units_done += io_units_count;
    3407         540 :         iov = &ctx->iov[0];
    3408             : 
    3409         540 :         if (ctx->read) {
    3410         408 :                 spdk_blob_io_readv_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
    3411             :                                        io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
    3412             :         } else {
    3413         132 :                 spdk_blob_io_writev_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
    3414             :                                         io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
    3415             :         }
    3416             : }
    3417             : 
    3418             : static void
    3419        2588 : blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_channel,
    3420             :                            struct iovec *iov, int iovcnt,
    3421             :                            uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg, bool read,
    3422             :                            struct spdk_blob_ext_io_opts *ext_io_opts)
    3423             : {
    3424        2588 :         struct spdk_bs_cpl      cpl;
    3425             : 
    3426        2588 :         assert(blob != NULL);
    3427             : 
    3428        2588 :         if (!read && blob->data_ro) {
    3429           4 :                 cb_fn(cb_arg, -EPERM);
    3430           4 :                 return;
    3431             :         }
    3432             : 
    3433        2584 :         if (length == 0) {
    3434           0 :                 cb_fn(cb_arg, 0);
    3435           0 :                 return;
    3436             :         }
    3437             : 
    3438        2584 :         if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
    3439           0 :                 cb_fn(cb_arg, -EINVAL);
    3440           0 :                 return;
    3441             :         }
    3442             : 
    3443             :         /*
    3444             :          * For now, we implement readv/writev using a sequence (instead of a batch) to account for having
    3445             :          *  to split a request that spans a cluster boundary.  For I/O that do not span a cluster boundary,
    3446             :          *  there will be no noticeable difference compared to using a batch.  For I/O that do span a cluster
    3447             :          *  boundary, the target LBAs (after blob offset to LBA translation) may not be contiguous, so we need
    3448             :          *  to allocate a separate iov array and split the I/O such that none of the resulting
    3449             :          *  smaller I/O cross a cluster boundary.  These smaller I/O will be issued in sequence (not in parallel)
    3450             :          *  but since this case happens very infrequently, any performance impact will be negligible.
    3451             :          *
    3452             :          * This could be optimized in the future to allocate a big enough iov array to account for all of the iovs
    3453             :          *  for all of the smaller I/Os, pre-build all of the iov arrays for the smaller I/Os, then issue them
    3454             :          *  in a batch.  That would also require creating an intermediate spdk_bs_cpl that would get called
    3455             :          *  when the batch was completed, to allow for freeing the memory for the iov arrays.
    3456             :          */
    3457        2584 :         if (spdk_likely(length <= bs_num_io_units_to_cluster_boundary(blob, offset))) {
    3458        2376 :                 uint64_t lba_count;
    3459        2376 :                 uint64_t lba;
    3460             :                 bool is_allocated;
    3461             : 
    3462        2376 :                 cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3463        2376 :                 cpl.u.blob_basic.cb_fn = cb_fn;
    3464        2376 :                 cpl.u.blob_basic.cb_arg = cb_arg;
    3465             : 
    3466        2376 :                 if (blob->frozen_refcnt) {
    3467             :                         /* This blob I/O is frozen */
    3468             :                         enum spdk_blob_op_type op_type;
    3469             :                         spdk_bs_user_op_t *op;
    3470           0 :                         struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_channel);
    3471             : 
    3472           0 :                         op_type = read ? SPDK_BLOB_READV : SPDK_BLOB_WRITEV;
    3473           0 :                         op = bs_user_op_alloc(_channel, &cpl, op_type, blob, iov, iovcnt, offset, length);
    3474           0 :                         if (!op) {
    3475           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3476           0 :                                 return;
    3477             :                         }
    3478             : 
    3479           0 :                         TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
    3480             : 
    3481           0 :                         return;
    3482             :                 }
    3483             : 
    3484        2376 :                 is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
    3485             : 
    3486        2376 :                 if (read) {
    3487             :                         spdk_bs_sequence_t *seq;
    3488             : 
    3489        2084 :                         seq = bs_sequence_start_blob(_channel, &cpl, blob);
    3490        2084 :                         if (!seq) {
    3491           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3492           0 :                                 return;
    3493             :                         }
    3494             : 
    3495        2084 :                         seq->ext_io_opts = ext_io_opts;
    3496             : 
    3497        2084 :                         if (is_allocated) {
    3498         540 :                                 bs_sequence_readv_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
    3499             :                         } else {
    3500        1544 :                                 bs_sequence_readv_bs_dev(seq, blob->back_bs_dev, iov, iovcnt, lba, lba_count,
    3501             :                                                          rw_iov_done, NULL);
    3502             :                         }
    3503             :                 } else {
    3504         292 :                         if (is_allocated) {
    3505             :                                 spdk_bs_sequence_t *seq;
    3506             : 
    3507         276 :                                 seq = bs_sequence_start_blob(_channel, &cpl, blob);
    3508         276 :                                 if (!seq) {
    3509           0 :                                         cb_fn(cb_arg, -ENOMEM);
    3510           0 :                                         return;
    3511             :                                 }
    3512             : 
    3513         276 :                                 seq->ext_io_opts = ext_io_opts;
    3514             : 
    3515         276 :                                 bs_sequence_writev_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
    3516             :                         } else {
    3517             :                                 /* Queue this operation and allocate the cluster */
    3518             :                                 spdk_bs_user_op_t *op;
    3519             : 
    3520          16 :                                 op = bs_user_op_alloc(_channel, &cpl, SPDK_BLOB_WRITEV, blob, iov, iovcnt, offset,
    3521             :                                                       length);
    3522          16 :                                 if (!op) {
    3523           0 :                                         cb_fn(cb_arg, -ENOMEM);
    3524           0 :                                         return;
    3525             :                                 }
    3526             : 
    3527          16 :                                 op->ext_io_opts = ext_io_opts;
    3528             : 
    3529          16 :                                 bs_allocate_and_copy_cluster(blob, _channel, offset, op);
    3530             :                         }
    3531             :                 }
    3532             :         } else {
    3533             :                 struct rw_iov_ctx *ctx;
    3534             : 
    3535         208 :                 ctx = calloc(1, sizeof(struct rw_iov_ctx) + iovcnt * sizeof(struct iovec));
    3536         208 :                 if (ctx == NULL) {
    3537           4 :                         cb_fn(cb_arg, -ENOMEM);
    3538           4 :                         return;
    3539             :                 }
    3540             : 
    3541         204 :                 ctx->blob = blob;
    3542         204 :                 ctx->channel = _channel;
    3543         204 :                 ctx->cb_fn = cb_fn;
    3544         204 :                 ctx->cb_arg = cb_arg;
    3545         204 :                 ctx->read = read;
    3546         204 :                 ctx->orig_iov = iov;
    3547         204 :                 ctx->iovcnt = iovcnt;
    3548         204 :                 ctx->io_unit_offset = offset;
    3549         204 :                 ctx->io_units_remaining = length;
    3550         204 :                 ctx->io_units_done = 0;
    3551         204 :                 ctx->ext_io_opts = ext_io_opts;
    3552             : 
    3553         204 :                 rw_iov_split_next(ctx, 0);
    3554             :         }
    3555             : }
    3556             : 
    3557             : static struct spdk_blob *
    3558        7733 : blob_lookup(struct spdk_blob_store *bs, spdk_blob_id blobid)
    3559             : {
    3560        7733 :         struct spdk_blob find;
    3561             : 
    3562        7733 :         if (spdk_bit_array_get(bs->open_blobids, blobid) == 0) {
    3563        6948 :                 return NULL;
    3564             :         }
    3565             : 
    3566         785 :         find.id = blobid;
    3567         785 :         return RB_FIND(spdk_blob_tree, &bs->open_blobs, &find);
    3568             : }
    3569             : 
    3570             : static void
    3571        1810 : blob_get_snapshot_and_clone_entries(struct spdk_blob *blob,
    3572             :                                     struct spdk_blob_list **snapshot_entry, struct spdk_blob_list **clone_entry)
    3573             : {
    3574        1810 :         assert(blob != NULL);
    3575        1810 :         *snapshot_entry = NULL;
    3576        1810 :         *clone_entry = NULL;
    3577             : 
    3578        1810 :         if (blob->parent_id == SPDK_BLOBID_INVALID) {
    3579        1518 :                 return;
    3580             :         }
    3581             : 
    3582         440 :         TAILQ_FOREACH(*snapshot_entry, &blob->bs->snapshots, link) {
    3583         380 :                 if ((*snapshot_entry)->id == blob->parent_id) {
    3584         232 :                         break;
    3585             :                 }
    3586             :         }
    3587             : 
    3588         292 :         if (*snapshot_entry != NULL) {
    3589         276 :                 TAILQ_FOREACH(*clone_entry, &(*snapshot_entry)->clones, link) {
    3590         276 :                         if ((*clone_entry)->id == blob->id) {
    3591         232 :                                 break;
    3592             :                         }
    3593             :                 }
    3594             : 
    3595         232 :                 assert(*clone_entry != NULL);
    3596             :         }
    3597             : }
    3598             : 
    3599             : static int
    3600         796 : bs_channel_create(void *io_device, void *ctx_buf)
    3601             : {
    3602         796 :         struct spdk_blob_store          *bs = io_device;
    3603         796 :         struct spdk_bs_channel          *channel = ctx_buf;
    3604             :         struct spdk_bs_dev              *dev;
    3605         796 :         uint32_t                        max_ops = bs->max_channel_ops;
    3606             :         uint32_t                        i;
    3607             : 
    3608         796 :         dev = bs->dev;
    3609             : 
    3610         796 :         channel->req_mem = calloc(max_ops, sizeof(struct spdk_bs_request_set));
    3611         796 :         if (!channel->req_mem) {
    3612           0 :                 return -1;
    3613             :         }
    3614             : 
    3615         796 :         TAILQ_INIT(&channel->reqs);
    3616             : 
    3617      408348 :         for (i = 0; i < max_ops; i++) {
    3618      407552 :                 TAILQ_INSERT_TAIL(&channel->reqs, &channel->req_mem[i], link);
    3619             :         }
    3620             : 
    3621         796 :         channel->bs = bs;
    3622         796 :         channel->dev = dev;
    3623         796 :         channel->dev_channel = dev->create_channel(dev);
    3624             : 
    3625         796 :         if (!channel->dev_channel) {
    3626           0 :                 SPDK_ERRLOG("Failed to create device channel.\n");
    3627           0 :                 free(channel->req_mem);
    3628           0 :                 return -1;
    3629             :         }
    3630             : 
    3631         796 :         channel->new_cluster_page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY,
    3632             :                                     SPDK_MALLOC_DMA);
    3633         796 :         if (!channel->new_cluster_page) {
    3634           0 :                 SPDK_ERRLOG("Failed to allocate new cluster page\n");
    3635           0 :                 free(channel->req_mem);
    3636           0 :                 channel->dev->destroy_channel(channel->dev, channel->dev_channel);
    3637           0 :                 return -1;
    3638             :         }
    3639             : 
    3640         796 :         TAILQ_INIT(&channel->need_cluster_alloc);
    3641         796 :         TAILQ_INIT(&channel->queued_io);
    3642         796 :         RB_INIT(&channel->esnap_channels);
    3643             : 
    3644         796 :         return 0;
    3645             : }
    3646             : 
    3647             : static void
    3648         796 : bs_channel_destroy(void *io_device, void *ctx_buf)
    3649             : {
    3650         796 :         struct spdk_bs_channel *channel = ctx_buf;
    3651             :         spdk_bs_user_op_t *op;
    3652             : 
    3653         796 :         while (!TAILQ_EMPTY(&channel->need_cluster_alloc)) {
    3654           0 :                 op = TAILQ_FIRST(&channel->need_cluster_alloc);
    3655           0 :                 TAILQ_REMOVE(&channel->need_cluster_alloc, op, link);
    3656           0 :                 bs_user_op_abort(op, -EIO);
    3657             :         }
    3658             : 
    3659         796 :         while (!TAILQ_EMPTY(&channel->queued_io)) {
    3660           0 :                 op = TAILQ_FIRST(&channel->queued_io);
    3661           0 :                 TAILQ_REMOVE(&channel->queued_io, op, link);
    3662           0 :                 bs_user_op_abort(op, -EIO);
    3663             :         }
    3664             : 
    3665         796 :         blob_esnap_destroy_bs_channel(channel);
    3666             : 
    3667         796 :         free(channel->req_mem);
    3668         796 :         spdk_free(channel->new_cluster_page);
    3669         796 :         channel->dev->destroy_channel(channel->dev, channel->dev_channel);
    3670         796 : }
    3671             : 
    3672             : static void
    3673         780 : bs_dev_destroy(void *io_device)
    3674             : {
    3675         780 :         struct spdk_blob_store *bs = io_device;
    3676             :         struct spdk_blob        *blob, *blob_tmp;
    3677             : 
    3678         780 :         bs->dev->destroy(bs->dev);
    3679             : 
    3680         780 :         RB_FOREACH_SAFE(blob, spdk_blob_tree, &bs->open_blobs, blob_tmp) {
    3681           0 :                 RB_REMOVE(spdk_blob_tree, &bs->open_blobs, blob);
    3682           0 :                 spdk_bit_array_clear(bs->open_blobids, blob->id);
    3683           0 :                 blob_free(blob);
    3684             :         }
    3685             : 
    3686         780 :         spdk_spin_destroy(&bs->used_lock);
    3687             : 
    3688         780 :         spdk_bit_array_free(&bs->open_blobids);
    3689         780 :         spdk_bit_array_free(&bs->used_blobids);
    3690         780 :         spdk_bit_array_free(&bs->used_md_pages);
    3691         780 :         spdk_bit_pool_free(&bs->used_clusters);
    3692             :         /*
    3693             :          * If this function is called for any reason except a successful unload,
    3694             :          * the unload_cpl type will be NONE and this will be a nop.
    3695             :          */
    3696         780 :         bs_call_cpl(&bs->unload_cpl, bs->unload_err);
    3697             : 
    3698         780 :         free(bs);
    3699         780 : }
    3700             : 
    3701             : static int
    3702         908 : bs_blob_list_add(struct spdk_blob *blob)
    3703             : {
    3704             :         spdk_blob_id snapshot_id;
    3705         908 :         struct spdk_blob_list *snapshot_entry = NULL;
    3706         908 :         struct spdk_blob_list *clone_entry = NULL;
    3707             : 
    3708         908 :         assert(blob != NULL);
    3709             : 
    3710         908 :         snapshot_id = blob->parent_id;
    3711         908 :         if (snapshot_id == SPDK_BLOBID_INVALID ||
    3712             :             snapshot_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    3713         492 :                 return 0;
    3714             :         }
    3715             : 
    3716         416 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, snapshot_id);
    3717         416 :         if (snapshot_entry == NULL) {
    3718             :                 /* Snapshot not found */
    3719         288 :                 snapshot_entry = calloc(1, sizeof(struct spdk_blob_list));
    3720         288 :                 if (snapshot_entry == NULL) {
    3721           0 :                         return -ENOMEM;
    3722             :                 }
    3723         288 :                 snapshot_entry->id = snapshot_id;
    3724         288 :                 TAILQ_INIT(&snapshot_entry->clones);
    3725         288 :                 TAILQ_INSERT_TAIL(&blob->bs->snapshots, snapshot_entry, link);
    3726             :         } else {
    3727         204 :                 TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    3728          76 :                         if (clone_entry->id == blob->id) {
    3729           0 :                                 break;
    3730             :                         }
    3731             :                 }
    3732             :         }
    3733             : 
    3734         416 :         if (clone_entry == NULL) {
    3735             :                 /* Clone not found */
    3736         416 :                 clone_entry = calloc(1, sizeof(struct spdk_blob_list));
    3737         416 :                 if (clone_entry == NULL) {
    3738           0 :                         return -ENOMEM;
    3739             :                 }
    3740         416 :                 clone_entry->id = blob->id;
    3741         416 :                 TAILQ_INIT(&clone_entry->clones);
    3742         416 :                 TAILQ_INSERT_TAIL(&snapshot_entry->clones, clone_entry, link);
    3743         416 :                 snapshot_entry->clone_count++;
    3744             :         }
    3745             : 
    3746         416 :         return 0;
    3747             : }
    3748             : 
    3749             : static void
    3750        1732 : bs_blob_list_remove(struct spdk_blob *blob)
    3751             : {
    3752        1732 :         struct spdk_blob_list *snapshot_entry = NULL;
    3753        1732 :         struct spdk_blob_list *clone_entry = NULL;
    3754             : 
    3755        1732 :         blob_get_snapshot_and_clone_entries(blob, &snapshot_entry, &clone_entry);
    3756             : 
    3757        1732 :         if (snapshot_entry == NULL) {
    3758        1516 :                 return;
    3759             :         }
    3760             : 
    3761         216 :         blob->parent_id = SPDK_BLOBID_INVALID;
    3762         216 :         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    3763         216 :         free(clone_entry);
    3764             : 
    3765         216 :         snapshot_entry->clone_count--;
    3766             : }
    3767             : 
    3768             : static int
    3769         780 : bs_blob_list_free(struct spdk_blob_store *bs)
    3770             : {
    3771             :         struct spdk_blob_list *snapshot_entry;
    3772             :         struct spdk_blob_list *snapshot_entry_tmp;
    3773             :         struct spdk_blob_list *clone_entry;
    3774             :         struct spdk_blob_list *clone_entry_tmp;
    3775             : 
    3776         924 :         TAILQ_FOREACH_SAFE(snapshot_entry, &bs->snapshots, link, snapshot_entry_tmp) {
    3777         296 :                 TAILQ_FOREACH_SAFE(clone_entry, &snapshot_entry->clones, link, clone_entry_tmp) {
    3778         152 :                         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    3779         152 :                         free(clone_entry);
    3780             :                 }
    3781         144 :                 TAILQ_REMOVE(&bs->snapshots, snapshot_entry, link);
    3782         144 :                 free(snapshot_entry);
    3783             :         }
    3784             : 
    3785         780 :         return 0;
    3786             : }
    3787             : 
    3788             : static void
    3789         780 : bs_free(struct spdk_blob_store *bs)
    3790             : {
    3791         780 :         bs_blob_list_free(bs);
    3792             : 
    3793         780 :         bs_unregister_md_thread(bs);
    3794         780 :         spdk_io_device_unregister(bs, bs_dev_destroy);
    3795         780 : }
    3796             : 
    3797             : void
    3798        1048 : spdk_bs_opts_init(struct spdk_bs_opts *opts, size_t opts_size)
    3799             : {
    3800             : 
    3801        1048 :         if (!opts) {
    3802           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
    3803           0 :                 return;
    3804             :         }
    3805             : 
    3806        1048 :         if (!opts_size) {
    3807           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
    3808           0 :                 return;
    3809             :         }
    3810             : 
    3811        1048 :         memset(opts, 0, opts_size);
    3812        1048 :         opts->opts_size = opts_size;
    3813             : 
    3814             : #define FIELD_OK(field) \
    3815             :         offsetof(struct spdk_bs_opts, field) + sizeof(opts->field) <= opts_size
    3816             : 
    3817             : #define SET_FIELD(field, value) \
    3818             :         if (FIELD_OK(field)) { \
    3819             :                 opts->field = value; \
    3820             :         } \
    3821             : 
    3822        1048 :         SET_FIELD(cluster_sz, SPDK_BLOB_OPTS_CLUSTER_SZ);
    3823        1048 :         SET_FIELD(num_md_pages, SPDK_BLOB_OPTS_NUM_MD_PAGES);
    3824        1048 :         SET_FIELD(max_md_ops, SPDK_BLOB_OPTS_NUM_MD_PAGES);
    3825        1048 :         SET_FIELD(max_channel_ops, SPDK_BLOB_OPTS_DEFAULT_CHANNEL_OPS);
    3826        1048 :         SET_FIELD(clear_method,  BS_CLEAR_WITH_UNMAP);
    3827             : 
    3828        1048 :         if (FIELD_OK(bstype)) {
    3829        1048 :                 memset(&opts->bstype, 0, sizeof(opts->bstype));
    3830             :         }
    3831             : 
    3832        1048 :         SET_FIELD(iter_cb_fn, NULL);
    3833        1048 :         SET_FIELD(iter_cb_arg, NULL);
    3834        1048 :         SET_FIELD(force_recover, false);
    3835        1048 :         SET_FIELD(esnap_bs_dev_create, NULL);
    3836        1048 :         SET_FIELD(esnap_ctx, NULL);
    3837             : 
    3838             : #undef FIELD_OK
    3839             : #undef SET_FIELD
    3840             : }
    3841             : 
    3842             : static int
    3843         484 : bs_opts_verify(struct spdk_bs_opts *opts)
    3844             : {
    3845         484 :         if (opts->cluster_sz == 0 || opts->num_md_pages == 0 || opts->max_md_ops == 0 ||
    3846         480 :             opts->max_channel_ops == 0) {
    3847           4 :                 SPDK_ERRLOG("Blobstore options cannot be set to 0\n");
    3848           4 :                 return -1;
    3849             :         }
    3850             : 
    3851         480 :         return 0;
    3852             : }
    3853             : 
    3854             : /* START spdk_bs_load */
    3855             : 
    3856             : /* spdk_bs_load_ctx is used for init, load, unload and dump code paths. */
    3857             : 
    3858             : struct spdk_bs_load_ctx {
    3859             :         struct spdk_blob_store          *bs;
    3860             :         struct spdk_bs_super_block      *super;
    3861             : 
    3862             :         struct spdk_bs_md_mask          *mask;
    3863             :         bool                            in_page_chain;
    3864             :         uint32_t                        page_index;
    3865             :         uint32_t                        cur_page;
    3866             :         struct spdk_blob_md_page        *page;
    3867             : 
    3868             :         uint64_t                        num_extent_pages;
    3869             :         uint32_t                        *extent_page_num;
    3870             :         struct spdk_blob_md_page        *extent_pages;
    3871             :         struct spdk_bit_array           *used_clusters;
    3872             : 
    3873             :         spdk_bs_sequence_t                      *seq;
    3874             :         spdk_blob_op_with_handle_complete       iter_cb_fn;
    3875             :         void                                    *iter_cb_arg;
    3876             :         struct spdk_blob                        *blob;
    3877             :         spdk_blob_id                            blobid;
    3878             : 
    3879             :         bool                                    force_recover;
    3880             : 
    3881             :         /* These fields are used in the spdk_bs_dump path. */
    3882             :         bool                                    dumping;
    3883             :         FILE                                    *fp;
    3884             :         spdk_bs_dump_print_xattr                print_xattr_fn;
    3885             :         char                                    xattr_name[4096];
    3886             : };
    3887             : 
    3888             : static int
    3889         784 : bs_alloc(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts, struct spdk_blob_store **_bs,
    3890             :          struct spdk_bs_load_ctx **_ctx)
    3891             : {
    3892             :         struct spdk_blob_store  *bs;
    3893             :         struct spdk_bs_load_ctx *ctx;
    3894             :         uint64_t dev_size;
    3895             :         int rc;
    3896             : 
    3897         784 :         dev_size = dev->blocklen * dev->blockcnt;
    3898         784 :         if (dev_size < opts->cluster_sz) {
    3899             :                 /* Device size cannot be smaller than cluster size of blobstore */
    3900           0 :                 SPDK_INFOLOG(blob, "Device size %" PRIu64 " is smaller than cluster size %" PRIu32 "\n",
    3901             :                              dev_size, opts->cluster_sz);
    3902           0 :                 return -ENOSPC;
    3903             :         }
    3904         784 :         if (opts->cluster_sz < SPDK_BS_PAGE_SIZE) {
    3905             :                 /* Cluster size cannot be smaller than page size */
    3906           4 :                 SPDK_ERRLOG("Cluster size %" PRIu32 " is smaller than page size %d\n",
    3907             :                             opts->cluster_sz, SPDK_BS_PAGE_SIZE);
    3908           4 :                 return -EINVAL;
    3909             :         }
    3910         780 :         bs = calloc(1, sizeof(struct spdk_blob_store));
    3911         780 :         if (!bs) {
    3912           0 :                 return -ENOMEM;
    3913             :         }
    3914             : 
    3915         780 :         ctx = calloc(1, sizeof(struct spdk_bs_load_ctx));
    3916         780 :         if (!ctx) {
    3917           0 :                 free(bs);
    3918           0 :                 return -ENOMEM;
    3919             :         }
    3920             : 
    3921         780 :         ctx->bs = bs;
    3922         780 :         ctx->iter_cb_fn = opts->iter_cb_fn;
    3923         780 :         ctx->iter_cb_arg = opts->iter_cb_arg;
    3924         780 :         ctx->force_recover = opts->force_recover;
    3925             : 
    3926         780 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    3927             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    3928         780 :         if (!ctx->super) {
    3929           0 :                 free(ctx);
    3930           0 :                 free(bs);
    3931           0 :                 return -ENOMEM;
    3932             :         }
    3933             : 
    3934         780 :         RB_INIT(&bs->open_blobs);
    3935         780 :         TAILQ_INIT(&bs->snapshots);
    3936         780 :         bs->dev = dev;
    3937         780 :         bs->md_thread = spdk_get_thread();
    3938         780 :         assert(bs->md_thread != NULL);
    3939             : 
    3940             :         /*
    3941             :          * Do not use bs_lba_to_cluster() here since blockcnt may not be an
    3942             :          *  even multiple of the cluster size.
    3943             :          */
    3944         780 :         bs->cluster_sz = opts->cluster_sz;
    3945         780 :         bs->total_clusters = dev->blockcnt / (bs->cluster_sz / dev->blocklen);
    3946         780 :         ctx->used_clusters = spdk_bit_array_create(bs->total_clusters);
    3947         780 :         if (!ctx->used_clusters) {
    3948           0 :                 spdk_free(ctx->super);
    3949           0 :                 free(ctx);
    3950           0 :                 free(bs);
    3951           0 :                 return -ENOMEM;
    3952             :         }
    3953             : 
    3954         780 :         bs->pages_per_cluster = bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    3955         780 :         if (spdk_u32_is_pow2(bs->pages_per_cluster)) {
    3956         780 :                 bs->pages_per_cluster_shift = spdk_u32log2(bs->pages_per_cluster);
    3957             :         }
    3958         780 :         bs->num_free_clusters = bs->total_clusters;
    3959         780 :         bs->io_unit_size = dev->blocklen;
    3960             : 
    3961         780 :         bs->max_channel_ops = opts->max_channel_ops;
    3962         780 :         bs->super_blob = SPDK_BLOBID_INVALID;
    3963         780 :         memcpy(&bs->bstype, &opts->bstype, sizeof(opts->bstype));
    3964         780 :         bs->esnap_bs_dev_create = opts->esnap_bs_dev_create;
    3965         780 :         bs->esnap_ctx = opts->esnap_ctx;
    3966             : 
    3967             :         /* The metadata is assumed to be at least 1 page */
    3968         780 :         bs->used_md_pages = spdk_bit_array_create(1);
    3969         780 :         bs->used_blobids = spdk_bit_array_create(0);
    3970         780 :         bs->open_blobids = spdk_bit_array_create(0);
    3971             : 
    3972         780 :         spdk_spin_init(&bs->used_lock);
    3973             : 
    3974         780 :         spdk_io_device_register(bs, bs_channel_create, bs_channel_destroy,
    3975             :                                 sizeof(struct spdk_bs_channel), "blobstore");
    3976         780 :         rc = bs_register_md_thread(bs);
    3977         780 :         if (rc == -1) {
    3978           0 :                 spdk_io_device_unregister(bs, NULL);
    3979           0 :                 spdk_spin_destroy(&bs->used_lock);
    3980           0 :                 spdk_bit_array_free(&bs->open_blobids);
    3981           0 :                 spdk_bit_array_free(&bs->used_blobids);
    3982           0 :                 spdk_bit_array_free(&bs->used_md_pages);
    3983           0 :                 spdk_bit_array_free(&ctx->used_clusters);
    3984           0 :                 spdk_free(ctx->super);
    3985           0 :                 free(ctx);
    3986           0 :                 free(bs);
    3987             :                 /* FIXME: this is a lie but don't know how to get a proper error code here */
    3988           0 :                 return -ENOMEM;
    3989             :         }
    3990             : 
    3991         780 :         *_ctx = ctx;
    3992         780 :         *_bs = bs;
    3993         780 :         return 0;
    3994             : }
    3995             : 
    3996             : static void
    3997          24 : bs_load_ctx_fail(struct spdk_bs_load_ctx *ctx, int bserrno)
    3998             : {
    3999          24 :         assert(bserrno != 0);
    4000             : 
    4001          24 :         spdk_free(ctx->super);
    4002          24 :         bs_sequence_finish(ctx->seq, bserrno);
    4003          24 :         bs_free(ctx->bs);
    4004          24 :         spdk_bit_array_free(&ctx->used_clusters);
    4005          24 :         free(ctx);
    4006          24 : }
    4007             : 
    4008             : static void
    4009         824 : bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    4010             :                struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    4011             : {
    4012             :         /* Update the values in the super block */
    4013         824 :         super->super_blob = bs->super_blob;
    4014         824 :         memcpy(&super->bstype, &bs->bstype, sizeof(bs->bstype));
    4015         824 :         super->crc = blob_md_page_calc_crc(super);
    4016         824 :         bs_sequence_write_dev(seq, super, bs_page_to_lba(bs, 0),
    4017         824 :                               bs_byte_to_lba(bs, sizeof(*super)),
    4018             :                               cb_fn, cb_arg);
    4019         824 : }
    4020             : 
    4021             : static void
    4022         760 : bs_write_used_clusters(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4023             : {
    4024         760 :         struct spdk_bs_load_ctx *ctx = arg;
    4025             :         uint64_t        mask_size, lba, lba_count;
    4026             : 
    4027             :         /* Write out the used clusters mask */
    4028         760 :         mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    4029         760 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4030             :                                  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4031         760 :         if (!ctx->mask) {
    4032           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4033           0 :                 return;
    4034             :         }
    4035             : 
    4036         760 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_CLUSTERS;
    4037         760 :         ctx->mask->length = ctx->bs->total_clusters;
    4038             :         /* We could get here through the normal unload path, or through dirty
    4039             :          * shutdown recovery.  For the normal unload path, we use the mask from
    4040             :          * the bit pool.  For dirty shutdown recovery, we don't have a bit pool yet -
    4041             :          * only the bit array from the load ctx.
    4042             :          */
    4043         760 :         if (ctx->bs->used_clusters) {
    4044         654 :                 assert(ctx->mask->length == spdk_bit_pool_capacity(ctx->bs->used_clusters));
    4045         654 :                 spdk_bit_pool_store_mask(ctx->bs->used_clusters, ctx->mask->mask);
    4046             :         } else {
    4047         106 :                 assert(ctx->mask->length == spdk_bit_array_capacity(ctx->used_clusters));
    4048         106 :                 spdk_bit_array_store_mask(ctx->used_clusters, ctx->mask->mask);
    4049             :         }
    4050         760 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    4051         760 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    4052         760 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4053             : }
    4054             : 
    4055             : static void
    4056         760 : bs_write_used_md(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4057             : {
    4058         760 :         struct spdk_bs_load_ctx *ctx = arg;
    4059             :         uint64_t        mask_size, lba, lba_count;
    4060             : 
    4061         760 :         mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
    4062         760 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4063             :                                  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4064         760 :         if (!ctx->mask) {
    4065           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4066           0 :                 return;
    4067             :         }
    4068             : 
    4069         760 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_PAGES;
    4070         760 :         ctx->mask->length = ctx->super->md_len;
    4071         760 :         assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_md_pages));
    4072             : 
    4073         760 :         spdk_bit_array_store_mask(ctx->bs->used_md_pages, ctx->mask->mask);
    4074         760 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
    4075         760 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
    4076         760 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4077             : }
    4078             : 
    4079             : static void
    4080         760 : bs_write_used_blobids(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4081             : {
    4082         760 :         struct spdk_bs_load_ctx *ctx = arg;
    4083             :         uint64_t        mask_size, lba, lba_count;
    4084             : 
    4085         760 :         if (ctx->super->used_blobid_mask_len == 0) {
    4086             :                 /*
    4087             :                  * This is a pre-v3 on-disk format where the blobid mask does not get
    4088             :                  *  written to disk.
    4089             :                  */
    4090          24 :                 cb_fn(seq, arg, 0);
    4091          24 :                 return;
    4092             :         }
    4093             : 
    4094         736 :         mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
    4095         736 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    4096             :                                  SPDK_MALLOC_DMA);
    4097         736 :         if (!ctx->mask) {
    4098           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4099           0 :                 return;
    4100             :         }
    4101             : 
    4102         736 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_BLOBIDS;
    4103         736 :         ctx->mask->length = ctx->super->md_len;
    4104         736 :         assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_blobids));
    4105             : 
    4106         736 :         spdk_bit_array_store_mask(ctx->bs->used_blobids, ctx->mask->mask);
    4107         736 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
    4108         736 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
    4109         736 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4110             : }
    4111             : 
    4112             : static void
    4113         704 : blob_set_thin_provision(struct spdk_blob *blob)
    4114             : {
    4115         704 :         blob_verify_md_op(blob);
    4116         704 :         blob->invalid_flags |= SPDK_BLOB_THIN_PROV;
    4117         704 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    4118         704 : }
    4119             : 
    4120             : static void
    4121        2094 : blob_set_clear_method(struct spdk_blob *blob, enum blob_clear_method clear_method)
    4122             : {
    4123        2094 :         blob_verify_md_op(blob);
    4124        2094 :         blob->clear_method = clear_method;
    4125        2094 :         blob->md_ro_flags |= (clear_method << SPDK_BLOB_CLEAR_METHOD_SHIFT);
    4126        2094 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    4127        2094 : }
    4128             : 
    4129             : static void bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno);
    4130             : 
    4131             : static void
    4132          24 : bs_delete_corrupted_blob_cpl(void *cb_arg, int bserrno)
    4133             : {
    4134          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4135             :         spdk_blob_id id;
    4136             :         int64_t page_num;
    4137             : 
    4138             :         /* Iterate to next blob (we can't use spdk_bs_iter_next function as our
    4139             :          * last blob has been removed */
    4140          24 :         page_num = bs_blobid_to_page(ctx->blobid);
    4141          24 :         page_num++;
    4142          24 :         page_num = spdk_bit_array_find_first_set(ctx->bs->used_blobids, page_num);
    4143          24 :         if (page_num >= spdk_bit_array_capacity(ctx->bs->used_blobids)) {
    4144          24 :                 bs_load_iter(ctx, NULL, -ENOENT);
    4145          24 :                 return;
    4146             :         }
    4147             : 
    4148           0 :         id = bs_page_to_blobid(page_num);
    4149             : 
    4150           0 :         spdk_bs_open_blob(ctx->bs, id, bs_load_iter, ctx);
    4151             : }
    4152             : 
    4153             : static void
    4154          24 : bs_delete_corrupted_close_cb(void *cb_arg, int bserrno)
    4155             : {
    4156          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4157             : 
    4158          24 :         if (bserrno != 0) {
    4159           0 :                 SPDK_ERRLOG("Failed to close corrupted blob\n");
    4160           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4161           0 :                 return;
    4162             :         }
    4163             : 
    4164          24 :         spdk_bs_delete_blob(ctx->bs, ctx->blobid, bs_delete_corrupted_blob_cpl, ctx);
    4165             : }
    4166             : 
    4167             : static void
    4168          24 : bs_delete_corrupted_blob(void *cb_arg, int bserrno)
    4169             : {
    4170          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4171             :         uint64_t i;
    4172             : 
    4173          24 :         if (bserrno != 0) {
    4174           0 :                 SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
    4175           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4176           0 :                 return;
    4177             :         }
    4178             : 
    4179             :         /* Snapshot and clone have the same copy of cluster map and extent pages
    4180             :          * at this point. Let's clear both for snapshot now,
    4181             :          * so that it won't be cleared for clone later when we remove snapshot.
    4182             :          * Also set thin provision to pass data corruption check */
    4183         264 :         for (i = 0; i < ctx->blob->active.num_clusters; i++) {
    4184         240 :                 ctx->blob->active.clusters[i] = 0;
    4185             :         }
    4186          36 :         for (i = 0; i < ctx->blob->active.num_extent_pages; i++) {
    4187          12 :                 ctx->blob->active.extent_pages[i] = 0;
    4188             :         }
    4189             : 
    4190          24 :         ctx->blob->active.num_allocated_clusters = 0;
    4191             : 
    4192          24 :         ctx->blob->md_ro = false;
    4193             : 
    4194          24 :         blob_set_thin_provision(ctx->blob);
    4195             : 
    4196          24 :         ctx->blobid = ctx->blob->id;
    4197             : 
    4198          24 :         spdk_blob_close(ctx->blob, bs_delete_corrupted_close_cb, ctx);
    4199             : }
    4200             : 
    4201             : static void
    4202          12 : bs_update_corrupted_blob(void *cb_arg, int bserrno)
    4203             : {
    4204          12 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4205             : 
    4206          12 :         if (bserrno != 0) {
    4207           0 :                 SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
    4208           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4209           0 :                 return;
    4210             :         }
    4211             : 
    4212          12 :         ctx->blob->md_ro = false;
    4213          12 :         blob_remove_xattr(ctx->blob, SNAPSHOT_PENDING_REMOVAL, true);
    4214          12 :         blob_remove_xattr(ctx->blob, SNAPSHOT_IN_PROGRESS, true);
    4215          12 :         spdk_blob_set_read_only(ctx->blob);
    4216             : 
    4217          12 :         if (ctx->iter_cb_fn) {
    4218           0 :                 ctx->iter_cb_fn(ctx->iter_cb_arg, ctx->blob, 0);
    4219             :         }
    4220          12 :         bs_blob_list_add(ctx->blob);
    4221             : 
    4222          12 :         spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4223             : }
    4224             : 
    4225             : static void
    4226          36 : bs_examine_clone(void *cb_arg, struct spdk_blob *blob, int bserrno)
    4227             : {
    4228          36 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4229             : 
    4230          36 :         if (bserrno != 0) {
    4231           0 :                 SPDK_ERRLOG("Failed to open clone of a corrupted blob\n");
    4232           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4233           0 :                 return;
    4234             :         }
    4235             : 
    4236          36 :         if (blob->parent_id == ctx->blob->id) {
    4237             :                 /* Power failure occurred before updating clone (snapshot delete case)
    4238             :                  * or after updating clone (creating snapshot case) - keep snapshot */
    4239          12 :                 spdk_blob_close(blob, bs_update_corrupted_blob, ctx);
    4240             :         } else {
    4241             :                 /* Power failure occurred after updating clone (snapshot delete case)
    4242             :                  * or before updating clone (creating snapshot case) - remove snapshot */
    4243          24 :                 spdk_blob_close(blob, bs_delete_corrupted_blob, ctx);
    4244             :         }
    4245             : }
    4246             : 
    4247             : static void
    4248         720 : bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno)
    4249             : {
    4250         720 :         struct spdk_bs_load_ctx *ctx = arg;
    4251         720 :         const void *value;
    4252         720 :         size_t len;
    4253         720 :         int rc = 0;
    4254             : 
    4255         720 :         if (bserrno == 0) {
    4256             :                 /* Examine blob if it is corrupted after power failure. Fix
    4257             :                  * the ones that can be fixed and remove any other corrupted
    4258             :                  * ones. If it is not corrupted just process it */
    4259         440 :                 rc = blob_get_xattr_value(blob, SNAPSHOT_PENDING_REMOVAL, &value, &len, true);
    4260         440 :                 if (rc != 0) {
    4261         420 :                         rc = blob_get_xattr_value(blob, SNAPSHOT_IN_PROGRESS, &value, &len, true);
    4262         420 :                         if (rc != 0) {
    4263             :                                 /* Not corrupted - process it and continue with iterating through blobs */
    4264         404 :                                 if (ctx->iter_cb_fn) {
    4265          34 :                                         ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0);
    4266             :                                 }
    4267         404 :                                 bs_blob_list_add(blob);
    4268         404 :                                 spdk_bs_iter_next(ctx->bs, blob, bs_load_iter, ctx);
    4269         404 :                                 return;
    4270             :                         }
    4271             : 
    4272             :                 }
    4273             : 
    4274          36 :                 assert(len == sizeof(spdk_blob_id));
    4275             : 
    4276          36 :                 ctx->blob = blob;
    4277             : 
    4278             :                 /* Open clone to check if we are able to fix this blob or should we remove it */
    4279          36 :                 spdk_bs_open_blob(ctx->bs, *(spdk_blob_id *)value, bs_examine_clone, ctx);
    4280          36 :                 return;
    4281         280 :         } else if (bserrno == -ENOENT) {
    4282         280 :                 bserrno = 0;
    4283             :         } else {
    4284             :                 /*
    4285             :                  * This case needs to be looked at further.  Same problem
    4286             :                  *  exists with applications that rely on explicit blob
    4287             :                  *  iteration.  We should just skip the blob that failed
    4288             :                  *  to load and continue on to the next one.
    4289             :                  */
    4290           0 :                 SPDK_ERRLOG("Error in iterating blobs\n");
    4291             :         }
    4292             : 
    4293         280 :         ctx->iter_cb_fn = NULL;
    4294             : 
    4295         280 :         spdk_free(ctx->super);
    4296         280 :         spdk_free(ctx->mask);
    4297         280 :         bs_sequence_finish(ctx->seq, bserrno);
    4298         280 :         free(ctx);
    4299             : }
    4300             : 
    4301             : static void bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg);
    4302             : 
    4303             : static void
    4304         280 : bs_load_complete(struct spdk_bs_load_ctx *ctx)
    4305             : {
    4306         280 :         ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
    4307         280 :         if (ctx->dumping) {
    4308           0 :                 bs_dump_read_md_page(ctx->seq, ctx);
    4309           0 :                 return;
    4310             :         }
    4311         280 :         spdk_bs_iter_first(ctx->bs, bs_load_iter, ctx);
    4312             : }
    4313             : 
    4314             : static void
    4315         174 : bs_load_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4316             : {
    4317         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4318             :         int rc;
    4319             : 
    4320             :         /* The type must be correct */
    4321         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_BLOBIDS);
    4322             : 
    4323             :         /* The length of the mask (in bits) must not be greater than
    4324             :          * the length of the buffer (converted to bits) */
    4325         174 :         assert(ctx->mask->length <= (ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE * 8));
    4326             : 
    4327             :         /* The length of the mask must be exactly equal to the size
    4328             :          * (in pages) of the metadata region */
    4329         174 :         assert(ctx->mask->length == ctx->super->md_len);
    4330             : 
    4331         174 :         rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->mask->length);
    4332         174 :         if (rc < 0) {
    4333           0 :                 spdk_free(ctx->mask);
    4334           0 :                 bs_load_ctx_fail(ctx, rc);
    4335           0 :                 return;
    4336             :         }
    4337             : 
    4338         174 :         spdk_bit_array_load_mask(ctx->bs->used_blobids, ctx->mask->mask);
    4339         174 :         bs_load_complete(ctx);
    4340             : }
    4341             : 
    4342             : static void
    4343         174 : bs_load_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4344             : {
    4345         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4346             :         uint64_t                lba, lba_count, mask_size;
    4347             :         int                     rc;
    4348             : 
    4349         174 :         if (bserrno != 0) {
    4350           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4351           0 :                 return;
    4352             :         }
    4353             : 
    4354             :         /* The type must be correct */
    4355         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
    4356             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    4357         174 :         assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
    4358             :                                              struct spdk_blob_md_page) * 8));
    4359             :         /*
    4360             :          * The length of the mask must be equal to or larger than the total number of clusters. It may be
    4361             :          * larger than the total number of clusters due to a failure spdk_bs_grow.
    4362             :          */
    4363         174 :         assert(ctx->mask->length >= ctx->bs->total_clusters);
    4364         174 :         if (ctx->mask->length > ctx->bs->total_clusters) {
    4365           4 :                 SPDK_WARNLOG("Shrink the used_custers mask length to total_clusters");
    4366           4 :                 ctx->mask->length = ctx->bs->total_clusters;
    4367             :         }
    4368             : 
    4369         174 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->mask->length);
    4370         174 :         if (rc < 0) {
    4371           0 :                 spdk_free(ctx->mask);
    4372           0 :                 bs_load_ctx_fail(ctx, rc);
    4373           0 :                 return;
    4374             :         }
    4375             : 
    4376         174 :         spdk_bit_array_load_mask(ctx->used_clusters, ctx->mask->mask);
    4377         174 :         ctx->bs->num_free_clusters = spdk_bit_array_count_clear(ctx->used_clusters);
    4378         174 :         assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
    4379             : 
    4380         174 :         spdk_free(ctx->mask);
    4381             : 
    4382             :         /* Read the used blobids mask */
    4383         174 :         mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
    4384         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    4385             :                                  SPDK_MALLOC_DMA);
    4386         174 :         if (!ctx->mask) {
    4387           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4388           0 :                 return;
    4389             :         }
    4390         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
    4391         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
    4392         174 :         bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
    4393             :                              bs_load_used_blobids_cpl, ctx);
    4394             : }
    4395             : 
    4396             : static void
    4397         174 : bs_load_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4398             : {
    4399         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4400             :         uint64_t                lba, lba_count, mask_size;
    4401             :         int                     rc;
    4402             : 
    4403         174 :         if (bserrno != 0) {
    4404           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4405           0 :                 return;
    4406             :         }
    4407             : 
    4408             :         /* The type must be correct */
    4409         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_PAGES);
    4410             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    4411         174 :         assert(ctx->mask->length <= (ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE *
    4412             :                                      8));
    4413             :         /* The length of the mask must be exactly equal to the size (in pages) of the metadata region */
    4414         174 :         if (ctx->mask->length != ctx->super->md_len) {
    4415           0 :                 SPDK_ERRLOG("mismatched md_len in used_pages mask: "
    4416             :                             "mask->length=%" PRIu32 " super->md_len=%" PRIu32 "\n",
    4417             :                             ctx->mask->length, ctx->super->md_len);
    4418           0 :                 assert(false);
    4419             :         }
    4420             : 
    4421         174 :         rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->mask->length);
    4422         174 :         if (rc < 0) {
    4423           0 :                 spdk_free(ctx->mask);
    4424           0 :                 bs_load_ctx_fail(ctx, rc);
    4425           0 :                 return;
    4426             :         }
    4427             : 
    4428         174 :         spdk_bit_array_load_mask(ctx->bs->used_md_pages, ctx->mask->mask);
    4429         174 :         spdk_free(ctx->mask);
    4430             : 
    4431             :         /* Read the used clusters mask */
    4432         174 :         mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    4433         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    4434             :                                  SPDK_MALLOC_DMA);
    4435         174 :         if (!ctx->mask) {
    4436           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4437           0 :                 return;
    4438             :         }
    4439         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    4440         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    4441         174 :         bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
    4442             :                              bs_load_used_clusters_cpl, ctx);
    4443             : }
    4444             : 
    4445             : static void
    4446         174 : bs_load_read_used_pages(struct spdk_bs_load_ctx *ctx)
    4447             : {
    4448             :         uint64_t lba, lba_count, mask_size;
    4449             : 
    4450             :         /* Read the used pages mask */
    4451         174 :         mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
    4452         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4453             :                                  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4454         174 :         if (!ctx->mask) {
    4455           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4456           0 :                 return;
    4457             :         }
    4458             : 
    4459         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
    4460         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
    4461         174 :         bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
    4462             :                              bs_load_used_pages_cpl, ctx);
    4463             : }
    4464             : 
    4465             : static int
    4466         246 : bs_load_replay_md_parse_page(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_page *page)
    4467             : {
    4468         246 :         struct spdk_blob_store *bs = ctx->bs;
    4469             :         struct spdk_blob_md_descriptor *desc;
    4470         246 :         size_t  cur_desc = 0;
    4471             : 
    4472         246 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    4473         718 :         while (cur_desc < sizeof(page->descriptors)) {
    4474         718 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
    4475         226 :                         if (desc->length == 0) {
    4476             :                                 /* If padding and length are 0, this terminates the page */
    4477         226 :                                 break;
    4478             :                         }
    4479         492 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
    4480             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
    4481             :                         unsigned int                            i, j;
    4482          68 :                         unsigned int                            cluster_count = 0;
    4483             :                         uint32_t                                cluster_idx;
    4484             : 
    4485          68 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
    4486             : 
    4487         136 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
    4488         828 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
    4489         760 :                                         cluster_idx = desc_extent_rle->extents[i].cluster_idx;
    4490             :                                         /*
    4491             :                                          * cluster_idx = 0 means an unallocated cluster - don't mark that
    4492             :                                          * in the used cluster map.
    4493             :                                          */
    4494         760 :                                         if (cluster_idx != 0) {
    4495         540 :                                                 SPDK_NOTICELOG("Recover: cluster %" PRIu32 "\n", cluster_idx + j);
    4496         540 :                                                 spdk_bit_array_set(ctx->used_clusters, cluster_idx + j);
    4497         540 :                                                 if (bs->num_free_clusters == 0) {
    4498           0 :                                                         return -ENOSPC;
    4499             :                                                 }
    4500         540 :                                                 bs->num_free_clusters--;
    4501             :                                         }
    4502         760 :                                         cluster_count++;
    4503             :                                 }
    4504             :                         }
    4505          68 :                         if (cluster_count == 0) {
    4506           0 :                                 return -EINVAL;
    4507             :                         }
    4508         424 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    4509             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
    4510             :                         uint32_t                                        i;
    4511          52 :                         uint32_t                                        cluster_count = 0;
    4512             :                         uint32_t                                        cluster_idx;
    4513             :                         size_t                                          cluster_idx_length;
    4514             : 
    4515          52 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
    4516          52 :                         cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
    4517             : 
    4518          52 :                         if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
    4519          52 :                             (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
    4520           0 :                                 return -EINVAL;
    4521             :                         }
    4522             : 
    4523         652 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
    4524         600 :                                 cluster_idx = desc_extent->cluster_idx[i];
    4525             :                                 /*
    4526             :                                  * cluster_idx = 0 means an unallocated cluster - don't mark that
    4527             :                                  * in the used cluster map.
    4528             :                                  */
    4529         600 :                                 if (cluster_idx != 0) {
    4530         600 :                                         if (cluster_idx < desc_extent->start_cluster_idx &&
    4531           0 :                                             cluster_idx >= desc_extent->start_cluster_idx + cluster_count) {
    4532           0 :                                                 return -EINVAL;
    4533             :                                         }
    4534         600 :                                         spdk_bit_array_set(ctx->used_clusters, cluster_idx);
    4535         600 :                                         if (bs->num_free_clusters == 0) {
    4536           0 :                                                 return -ENOSPC;
    4537             :                                         }
    4538         600 :                                         bs->num_free_clusters--;
    4539             :                                 }
    4540         600 :                                 cluster_count++;
    4541             :                         }
    4542             : 
    4543          52 :                         if (cluster_count == 0) {
    4544           0 :                                 return -EINVAL;
    4545             :                         }
    4546         372 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    4547             :                         /* Skip this item */
    4548         296 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    4549             :                         /* Skip this item */
    4550         236 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
    4551             :                         /* Skip this item */
    4552          82 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
    4553             :                         struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
    4554          82 :                         uint32_t num_extent_pages = ctx->num_extent_pages;
    4555             :                         uint32_t i;
    4556             :                         size_t extent_pages_length;
    4557             :                         void *tmp;
    4558             : 
    4559          82 :                         desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
    4560          82 :                         extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
    4561             : 
    4562          82 :                         if (desc_extent_table->length == 0 ||
    4563          82 :                             (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
    4564           0 :                                 return -EINVAL;
    4565             :                         }
    4566             : 
    4567         160 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
    4568          78 :                                 if (desc_extent_table->extent_page[i].page_idx != 0) {
    4569          52 :                                         if (desc_extent_table->extent_page[i].num_pages != 1) {
    4570           0 :                                                 return -EINVAL;
    4571             :                                         }
    4572          52 :                                         num_extent_pages += 1;
    4573             :                                 }
    4574             :                         }
    4575             : 
    4576          82 :                         if (num_extent_pages > 0) {
    4577          52 :                                 tmp = realloc(ctx->extent_page_num, num_extent_pages * sizeof(uint32_t));
    4578          52 :                                 if (tmp == NULL) {
    4579           0 :                                         return -ENOMEM;
    4580             :                                 }
    4581          52 :                                 ctx->extent_page_num = tmp;
    4582             : 
    4583             :                                 /* Extent table entries contain md page numbers for extent pages.
    4584             :                                  * Zeroes represent unallocated extent pages, those are run-length-encoded.
    4585             :                                  */
    4586         104 :                                 for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
    4587          52 :                                         if (desc_extent_table->extent_page[i].page_idx != 0) {
    4588          52 :                                                 ctx->extent_page_num[ctx->num_extent_pages] = desc_extent_table->extent_page[i].page_idx;
    4589          52 :                                                 ctx->num_extent_pages += 1;
    4590             :                                         }
    4591             :                                 }
    4592             :                         }
    4593             :                 } else {
    4594             :                         /* Error */
    4595           0 :                         return -EINVAL;
    4596             :                 }
    4597             :                 /* Advance to the next descriptor */
    4598         492 :                 cur_desc += sizeof(*desc) + desc->length;
    4599         492 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
    4600          20 :                         break;
    4601             :                 }
    4602         472 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
    4603             :         }
    4604         246 :         return 0;
    4605             : }
    4606             : 
    4607             : static bool
    4608        1296 : bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page)
    4609             : {
    4610             :         uint32_t crc;
    4611        1296 :         struct spdk_blob_md_descriptor *desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    4612             :         size_t desc_len;
    4613             : 
    4614        1296 :         crc = blob_md_page_calc_crc(page);
    4615        1296 :         if (crc != page->crc) {
    4616           0 :                 return false;
    4617             :         }
    4618             : 
    4619             :         /* Extent page should always be of sequence num 0. */
    4620        1296 :         if (page->sequence_num != 0) {
    4621          44 :                 return false;
    4622             :         }
    4623             : 
    4624             :         /* Descriptor type must be EXTENT_PAGE. */
    4625        1252 :         if (desc->type != SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    4626         154 :                 return false;
    4627             :         }
    4628             : 
    4629             :         /* Descriptor length cannot exceed the page. */
    4630        1098 :         desc_len = sizeof(*desc) + desc->length;
    4631        1098 :         if (desc_len > sizeof(page->descriptors)) {
    4632           0 :                 return false;
    4633             :         }
    4634             : 
    4635             :         /* It has to be the only descriptor in the page. */
    4636        1098 :         if (desc_len + sizeof(*desc) <= sizeof(page->descriptors)) {
    4637        1098 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + desc_len);
    4638        1098 :                 if (desc->length != 0) {
    4639           0 :                         return false;
    4640             :                 }
    4641             :         }
    4642             : 
    4643        1098 :         return true;
    4644             : }
    4645             : 
    4646             : static bool
    4647        6754 : bs_load_cur_md_page_valid(struct spdk_bs_load_ctx *ctx)
    4648             : {
    4649             :         uint32_t crc;
    4650        6754 :         struct spdk_blob_md_page *page = ctx->page;
    4651             : 
    4652        6754 :         crc = blob_md_page_calc_crc(page);
    4653        6754 :         if (crc != page->crc) {
    4654        6538 :                 return false;
    4655             :         }
    4656             : 
    4657             :         /* First page of a sequence should match the blobid. */
    4658         216 :         if (page->sequence_num == 0 &&
    4659         172 :             bs_page_to_blobid(ctx->cur_page) != page->id) {
    4660          18 :                 return false;
    4661             :         }
    4662         198 :         assert(bs_load_cur_extent_page_valid(page) == false);
    4663             : 
    4664         198 :         return true;
    4665             : }
    4666             : 
    4667             : static void bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx);
    4668             : 
    4669             : static void
    4670         106 : bs_load_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4671             : {
    4672         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4673             : 
    4674         106 :         if (bserrno != 0) {
    4675           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4676           0 :                 return;
    4677             :         }
    4678             : 
    4679         106 :         bs_load_complete(ctx);
    4680             : }
    4681             : 
    4682             : static void
    4683         106 : bs_load_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4684             : {
    4685         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4686             : 
    4687         106 :         spdk_free(ctx->mask);
    4688         106 :         ctx->mask = NULL;
    4689             : 
    4690         106 :         if (bserrno != 0) {
    4691           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4692           0 :                 return;
    4693             :         }
    4694             : 
    4695         106 :         bs_write_used_clusters(seq, ctx, bs_load_write_used_clusters_cpl);
    4696             : }
    4697             : 
    4698             : static void
    4699         106 : bs_load_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4700             : {
    4701         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4702             : 
    4703         106 :         spdk_free(ctx->mask);
    4704         106 :         ctx->mask = NULL;
    4705             : 
    4706         106 :         if (bserrno != 0) {
    4707           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4708           0 :                 return;
    4709             :         }
    4710             : 
    4711         106 :         bs_write_used_blobids(seq, ctx, bs_load_write_used_blobids_cpl);
    4712             : }
    4713             : 
    4714             : static void
    4715         106 : bs_load_write_used_md(struct spdk_bs_load_ctx *ctx)
    4716             : {
    4717         106 :         bs_write_used_md(ctx->seq, ctx, bs_load_write_used_pages_cpl);
    4718         106 : }
    4719             : 
    4720             : static void
    4721        6714 : bs_load_replay_md_chain_cpl(struct spdk_bs_load_ctx *ctx)
    4722             : {
    4723             :         uint64_t num_md_clusters;
    4724             :         uint64_t i;
    4725             : 
    4726        6714 :         ctx->in_page_chain = false;
    4727             : 
    4728             :         do {
    4729        6784 :                 ctx->page_index++;
    4730        6784 :         } while (spdk_bit_array_get(ctx->bs->used_md_pages, ctx->page_index) == true);
    4731             : 
    4732        6714 :         if (ctx->page_index < ctx->super->md_len) {
    4733        6608 :                 ctx->cur_page = ctx->page_index;
    4734        6608 :                 bs_load_replay_cur_md_page(ctx);
    4735             :         } else {
    4736             :                 /* Claim all of the clusters used by the metadata */
    4737         106 :                 num_md_clusters = spdk_divide_round_up(
    4738         106 :                                           ctx->super->md_start + ctx->super->md_len, ctx->bs->pages_per_cluster);
    4739         480 :                 for (i = 0; i < num_md_clusters; i++) {
    4740         374 :                         spdk_bit_array_set(ctx->used_clusters, i);
    4741             :                 }
    4742         106 :                 ctx->bs->num_free_clusters -= num_md_clusters;
    4743         106 :                 spdk_free(ctx->page);
    4744         106 :                 bs_load_write_used_md(ctx);
    4745             :         }
    4746        6714 : }
    4747             : 
    4748             : static void
    4749          52 : bs_load_replay_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4750             : {
    4751          52 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4752             :         uint32_t page_num;
    4753             :         uint64_t i;
    4754             : 
    4755          52 :         if (bserrno != 0) {
    4756           0 :                 spdk_free(ctx->extent_pages);
    4757           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4758           0 :                 return;
    4759             :         }
    4760             : 
    4761         104 :         for (i = 0; i < ctx->num_extent_pages; i++) {
    4762             :                 /* Extent pages are only read when present within in chain md.
    4763             :                  * Integrity of md is not right if that page was not a valid extent page. */
    4764          52 :                 if (bs_load_cur_extent_page_valid(&ctx->extent_pages[i]) != true) {
    4765           0 :                         spdk_free(ctx->extent_pages);
    4766           0 :                         bs_load_ctx_fail(ctx, -EILSEQ);
    4767           0 :                         return;
    4768             :                 }
    4769             : 
    4770          52 :                 page_num = ctx->extent_page_num[i];
    4771          52 :                 spdk_bit_array_set(ctx->bs->used_md_pages, page_num);
    4772          52 :                 if (bs_load_replay_md_parse_page(ctx, &ctx->extent_pages[i])) {
    4773           0 :                         spdk_free(ctx->extent_pages);
    4774           0 :                         bs_load_ctx_fail(ctx, -EILSEQ);
    4775           0 :                         return;
    4776             :                 }
    4777             :         }
    4778             : 
    4779          52 :         spdk_free(ctx->extent_pages);
    4780          52 :         free(ctx->extent_page_num);
    4781          52 :         ctx->extent_page_num = NULL;
    4782          52 :         ctx->num_extent_pages = 0;
    4783             : 
    4784          52 :         bs_load_replay_md_chain_cpl(ctx);
    4785             : }
    4786             : 
    4787             : static void
    4788          52 : bs_load_replay_extent_pages(struct spdk_bs_load_ctx *ctx)
    4789             : {
    4790             :         spdk_bs_batch_t *batch;
    4791             :         uint32_t page;
    4792             :         uint64_t lba;
    4793             :         uint64_t i;
    4794             : 
    4795          52 :         ctx->extent_pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE * ctx->num_extent_pages, 0,
    4796             :                                          NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4797          52 :         if (!ctx->extent_pages) {
    4798           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4799           0 :                 return;
    4800             :         }
    4801             : 
    4802          52 :         batch = bs_sequence_to_batch(ctx->seq, bs_load_replay_extent_page_cpl, ctx);
    4803             : 
    4804         104 :         for (i = 0; i < ctx->num_extent_pages; i++) {
    4805          52 :                 page = ctx->extent_page_num[i];
    4806          52 :                 assert(page < ctx->super->md_len);
    4807          52 :                 lba = bs_md_page_to_lba(ctx->bs, page);
    4808          52 :                 bs_batch_read_dev(batch, &ctx->extent_pages[i], lba,
    4809          52 :                                   bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE));
    4810             :         }
    4811             : 
    4812          52 :         bs_batch_close(batch);
    4813             : }
    4814             : 
    4815             : static void
    4816        6754 : bs_load_replay_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4817             : {
    4818        6754 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4819             :         uint32_t page_num;
    4820             :         struct spdk_blob_md_page *page;
    4821             : 
    4822        6754 :         if (bserrno != 0) {
    4823           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4824           0 :                 return;
    4825             :         }
    4826             : 
    4827        6754 :         page_num = ctx->cur_page;
    4828        6754 :         page = ctx->page;
    4829        6754 :         if (bs_load_cur_md_page_valid(ctx) == true) {
    4830         198 :                 if (page->sequence_num == 0 || ctx->in_page_chain == true) {
    4831         194 :                         spdk_spin_lock(&ctx->bs->used_lock);
    4832         194 :                         bs_claim_md_page(ctx->bs, page_num);
    4833         194 :                         spdk_spin_unlock(&ctx->bs->used_lock);
    4834         194 :                         if (page->sequence_num == 0) {
    4835         154 :                                 SPDK_NOTICELOG("Recover: blob 0x%" PRIx32 "\n", page_num);
    4836         154 :                                 spdk_bit_array_set(ctx->bs->used_blobids, page_num);
    4837             :                         }
    4838         194 :                         if (bs_load_replay_md_parse_page(ctx, page)) {
    4839           0 :                                 bs_load_ctx_fail(ctx, -EILSEQ);
    4840           0 :                                 return;
    4841             :                         }
    4842         194 :                         if (page->next != SPDK_INVALID_MD_PAGE) {
    4843          40 :                                 ctx->in_page_chain = true;
    4844          40 :                                 ctx->cur_page = page->next;
    4845          40 :                                 bs_load_replay_cur_md_page(ctx);
    4846          40 :                                 return;
    4847             :                         }
    4848         154 :                         if (ctx->num_extent_pages != 0) {
    4849          52 :                                 bs_load_replay_extent_pages(ctx);
    4850          52 :                                 return;
    4851             :                         }
    4852             :                 }
    4853             :         }
    4854        6662 :         bs_load_replay_md_chain_cpl(ctx);
    4855             : }
    4856             : 
    4857             : static void
    4858        6754 : bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx)
    4859             : {
    4860             :         uint64_t lba;
    4861             : 
    4862        6754 :         assert(ctx->cur_page < ctx->super->md_len);
    4863        6754 :         lba = bs_md_page_to_lba(ctx->bs, ctx->cur_page);
    4864        6754 :         bs_sequence_read_dev(ctx->seq, ctx->page, lba,
    4865        6754 :                              bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    4866             :                              bs_load_replay_md_cpl, ctx);
    4867        6754 : }
    4868             : 
    4869             : static void
    4870         106 : bs_load_replay_md(struct spdk_bs_load_ctx *ctx)
    4871             : {
    4872         106 :         ctx->page_index = 0;
    4873         106 :         ctx->cur_page = 0;
    4874         106 :         ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    4875             :                                  NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4876         106 :         if (!ctx->page) {
    4877           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4878           0 :                 return;
    4879             :         }
    4880         106 :         bs_load_replay_cur_md_page(ctx);
    4881             : }
    4882             : 
    4883             : static void
    4884         106 : bs_recover(struct spdk_bs_load_ctx *ctx)
    4885             : {
    4886             :         int             rc;
    4887             : 
    4888         106 :         SPDK_NOTICELOG("Performing recovery on blobstore\n");
    4889         106 :         rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->super->md_len);
    4890         106 :         if (rc < 0) {
    4891           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4892           0 :                 return;
    4893             :         }
    4894             : 
    4895         106 :         rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->super->md_len);
    4896         106 :         if (rc < 0) {
    4897           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4898           0 :                 return;
    4899             :         }
    4900             : 
    4901         106 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    4902         106 :         if (rc < 0) {
    4903           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4904           0 :                 return;
    4905             :         }
    4906             : 
    4907         106 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->super->md_len);
    4908         106 :         if (rc < 0) {
    4909           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4910           0 :                 return;
    4911             :         }
    4912             : 
    4913         106 :         ctx->bs->num_free_clusters = ctx->bs->total_clusters;
    4914         106 :         bs_load_replay_md(ctx);
    4915             : }
    4916             : 
    4917             : static int
    4918         276 : bs_parse_super(struct spdk_bs_load_ctx *ctx)
    4919             : {
    4920             :         int rc;
    4921             : 
    4922         276 :         if (ctx->super->size == 0) {
    4923           8 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    4924             :         }
    4925             : 
    4926         276 :         if (ctx->super->io_unit_size == 0) {
    4927           8 :                 ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
    4928             :         }
    4929             : 
    4930         276 :         ctx->bs->clean = 1;
    4931         276 :         ctx->bs->cluster_sz = ctx->super->cluster_size;
    4932         276 :         ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
    4933         276 :         ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    4934         276 :         if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
    4935         276 :                 ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
    4936             :         }
    4937         276 :         ctx->bs->io_unit_size = ctx->super->io_unit_size;
    4938         276 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    4939         276 :         if (rc < 0) {
    4940           0 :                 return -ENOMEM;
    4941             :         }
    4942         276 :         ctx->bs->md_start = ctx->super->md_start;
    4943         276 :         ctx->bs->md_len = ctx->super->md_len;
    4944         276 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
    4945         276 :         if (rc < 0) {
    4946           0 :                 return -ENOMEM;
    4947             :         }
    4948             : 
    4949         552 :         ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
    4950         276 :                                                ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
    4951         276 :         ctx->bs->super_blob = ctx->super->super_blob;
    4952         276 :         memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
    4953             : 
    4954         276 :         return 0;
    4955             : }
    4956             : 
    4957             : static void
    4958         300 : bs_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4959             : {
    4960         300 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4961             :         int rc;
    4962             : 
    4963         300 :         rc = bs_super_validate(ctx->super, ctx->bs);
    4964         300 :         if (rc != 0) {
    4965          24 :                 bs_load_ctx_fail(ctx, rc);
    4966          24 :                 return;
    4967             :         }
    4968             : 
    4969         276 :         rc = bs_parse_super(ctx);
    4970         276 :         if (rc < 0) {
    4971           0 :                 bs_load_ctx_fail(ctx, rc);
    4972           0 :                 return;
    4973             :         }
    4974             : 
    4975         276 :         if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0 || ctx->force_recover) {
    4976         106 :                 bs_recover(ctx);
    4977             :         } else {
    4978         170 :                 bs_load_read_used_pages(ctx);
    4979             :         }
    4980             : }
    4981             : 
    4982             : static inline int
    4983         308 : bs_opts_copy(struct spdk_bs_opts *src, struct spdk_bs_opts *dst)
    4984             : {
    4985             : 
    4986         308 :         if (!src->opts_size) {
    4987           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
    4988           0 :                 return -1;
    4989             :         }
    4990             : 
    4991             : #define FIELD_OK(field) \
    4992             :         offsetof(struct spdk_bs_opts, field) + sizeof(src->field) <= src->opts_size
    4993             : 
    4994             : #define SET_FIELD(field) \
    4995             :         if (FIELD_OK(field)) { \
    4996             :                 dst->field = src->field; \
    4997             :         } \
    4998             : 
    4999         308 :         SET_FIELD(cluster_sz);
    5000         308 :         SET_FIELD(num_md_pages);
    5001         308 :         SET_FIELD(max_md_ops);
    5002         308 :         SET_FIELD(max_channel_ops);
    5003         308 :         SET_FIELD(clear_method);
    5004             : 
    5005         308 :         if (FIELD_OK(bstype)) {
    5006         308 :                 memcpy(&dst->bstype, &src->bstype, sizeof(dst->bstype));
    5007             :         }
    5008         308 :         SET_FIELD(iter_cb_fn);
    5009         308 :         SET_FIELD(iter_cb_arg);
    5010         308 :         SET_FIELD(force_recover);
    5011         308 :         SET_FIELD(esnap_bs_dev_create);
    5012         308 :         SET_FIELD(esnap_ctx);
    5013             : 
    5014         308 :         dst->opts_size = src->opts_size;
    5015             : 
    5016             :         /* You should not remove this statement, but need to update the assert statement
    5017             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    5018             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_opts) == 88, "Incorrect size");
    5019             : 
    5020             : #undef FIELD_OK
    5021             : #undef SET_FIELD
    5022             : 
    5023         308 :         return 0;
    5024             : }
    5025             : 
    5026             : void
    5027         312 : spdk_bs_load(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    5028             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    5029             : {
    5030         312 :         struct spdk_blob_store  *bs;
    5031         312 :         struct spdk_bs_cpl      cpl;
    5032         312 :         struct spdk_bs_load_ctx *ctx;
    5033         312 :         struct spdk_bs_opts     opts = {};
    5034             :         int err;
    5035             : 
    5036         312 :         SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
    5037             : 
    5038         312 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
    5039           4 :                 SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
    5040           4 :                 dev->destroy(dev);
    5041           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5042           4 :                 return;
    5043             :         }
    5044             : 
    5045         308 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5046         308 :         if (o) {
    5047         122 :                 if (bs_opts_copy(o, &opts)) {
    5048           0 :                         return;
    5049             :                 }
    5050             :         }
    5051             : 
    5052         308 :         if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
    5053           8 :                 dev->destroy(dev);
    5054           8 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5055           8 :                 return;
    5056             :         }
    5057             : 
    5058         300 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    5059         300 :         if (err) {
    5060           0 :                 dev->destroy(dev);
    5061           0 :                 cb_fn(cb_arg, NULL, err);
    5062           0 :                 return;
    5063             :         }
    5064             : 
    5065         300 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    5066         300 :         cpl.u.bs_handle.cb_fn = cb_fn;
    5067         300 :         cpl.u.bs_handle.cb_arg = cb_arg;
    5068         300 :         cpl.u.bs_handle.bs = bs;
    5069             : 
    5070         300 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5071         300 :         if (!ctx->seq) {
    5072           0 :                 spdk_free(ctx->super);
    5073           0 :                 free(ctx);
    5074           0 :                 bs_free(bs);
    5075           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5076           0 :                 return;
    5077             :         }
    5078             : 
    5079             :         /* Read the super block */
    5080         300 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5081         300 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5082             :                              bs_load_super_cpl, ctx);
    5083             : }
    5084             : 
    5085             : /* END spdk_bs_load */
    5086             : 
    5087             : /* START spdk_bs_dump */
    5088             : 
    5089             : static void
    5090           0 : bs_dump_finish(spdk_bs_sequence_t *seq, struct spdk_bs_load_ctx *ctx, int bserrno)
    5091             : {
    5092           0 :         spdk_free(ctx->super);
    5093             : 
    5094             :         /*
    5095             :          * We need to defer calling bs_call_cpl() until after
    5096             :          * dev destruction, so tuck these away for later use.
    5097             :          */
    5098           0 :         ctx->bs->unload_err = bserrno;
    5099           0 :         memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5100           0 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5101             : 
    5102           0 :         bs_sequence_finish(seq, 0);
    5103           0 :         bs_free(ctx->bs);
    5104           0 :         free(ctx);
    5105           0 : }
    5106             : 
    5107             : static void
    5108           0 : bs_dump_print_xattr(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5109             : {
    5110             :         struct spdk_blob_md_descriptor_xattr *desc_xattr;
    5111             :         uint32_t i;
    5112             :         const char *type;
    5113             : 
    5114           0 :         desc_xattr = (struct spdk_blob_md_descriptor_xattr *)desc;
    5115             : 
    5116           0 :         if (desc_xattr->length !=
    5117             :             sizeof(desc_xattr->name_length) + sizeof(desc_xattr->value_length) +
    5118           0 :             desc_xattr->name_length + desc_xattr->value_length) {
    5119             :         }
    5120             : 
    5121           0 :         memcpy(ctx->xattr_name, desc_xattr->name, desc_xattr->name_length);
    5122           0 :         ctx->xattr_name[desc_xattr->name_length] = '\0';
    5123           0 :         if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    5124           0 :                 type = "XATTR";
    5125           0 :         } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    5126           0 :                 type = "XATTR_INTERNAL";
    5127             :         } else {
    5128           0 :                 assert(false);
    5129             :                 type = "XATTR_?";
    5130             :         }
    5131           0 :         fprintf(ctx->fp, "%s: name = \"%s\"\n", type, ctx->xattr_name);
    5132           0 :         fprintf(ctx->fp, "       value = \"");
    5133           0 :         ctx->print_xattr_fn(ctx->fp, ctx->super->bstype.bstype, ctx->xattr_name,
    5134           0 :                             (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
    5135           0 :                             desc_xattr->value_length);
    5136           0 :         fprintf(ctx->fp, "\"\n");
    5137           0 :         for (i = 0; i < desc_xattr->value_length; i++) {
    5138           0 :                 if (i % 16 == 0) {
    5139           0 :                         fprintf(ctx->fp, "               ");
    5140             :                 }
    5141           0 :                 fprintf(ctx->fp, "%02" PRIx8 " ", *((uint8_t *)desc_xattr->name + desc_xattr->name_length + i));
    5142           0 :                 if ((i + 1) % 16 == 0) {
    5143           0 :                         fprintf(ctx->fp, "\n");
    5144             :                 }
    5145             :         }
    5146           0 :         if (i % 16 != 0) {
    5147           0 :                 fprintf(ctx->fp, "\n");
    5148             :         }
    5149           0 : }
    5150             : 
    5151             : struct type_flag_desc {
    5152             :         uint64_t mask;
    5153             :         uint64_t val;
    5154             :         const char *name;
    5155             : };
    5156             : 
    5157             : static void
    5158           0 : bs_dump_print_type_bits(struct spdk_bs_load_ctx *ctx, uint64_t flags,
    5159             :                         struct type_flag_desc *desc, size_t numflags)
    5160             : {
    5161           0 :         uint64_t covered = 0;
    5162             :         size_t i;
    5163             : 
    5164           0 :         for (i = 0; i < numflags; i++) {
    5165           0 :                 if ((desc[i].mask & flags) != desc[i].val) {
    5166           0 :                         continue;
    5167             :                 }
    5168           0 :                 fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " %s", desc[i].val, desc[i].name);
    5169           0 :                 if (desc[i].mask != desc[i].val) {
    5170           0 :                         fprintf(ctx->fp, " (mask 0x%" PRIx64 " value 0x%" PRIx64 ")",
    5171           0 :                                 desc[i].mask, desc[i].val);
    5172             :                 }
    5173           0 :                 fprintf(ctx->fp, "\n");
    5174           0 :                 covered |= desc[i].mask;
    5175             :         }
    5176           0 :         if ((flags & ~covered) != 0) {
    5177           0 :                 fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " Unknown\n", flags & ~covered);
    5178             :         }
    5179           0 : }
    5180             : 
    5181             : static void
    5182           0 : bs_dump_print_type_flags(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5183             : {
    5184             :         struct spdk_blob_md_descriptor_flags *type_desc;
    5185             : #define ADD_FLAG(f) { f, f, #f }
    5186             : #define ADD_MASK_VAL(m, v) { m, v, #v }
    5187             :         static struct type_flag_desc invalid[] = {
    5188             :                 ADD_FLAG(SPDK_BLOB_THIN_PROV),
    5189             :                 ADD_FLAG(SPDK_BLOB_INTERNAL_XATTR),
    5190             :                 ADD_FLAG(SPDK_BLOB_EXTENT_TABLE),
    5191             :         };
    5192             :         static struct type_flag_desc data_ro[] = {
    5193             :                 ADD_FLAG(SPDK_BLOB_READ_ONLY),
    5194             :         };
    5195             :         static struct type_flag_desc md_ro[] = {
    5196             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_DEFAULT),
    5197             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_NONE),
    5198             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_UNMAP),
    5199             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_WRITE_ZEROES),
    5200             :         };
    5201             : #undef ADD_FLAG
    5202             : #undef ADD_MASK_VAL
    5203             : 
    5204           0 :         type_desc = (struct spdk_blob_md_descriptor_flags *)desc;
    5205           0 :         fprintf(ctx->fp, "Flags:\n");
    5206           0 :         fprintf(ctx->fp, "\tinvalid: 0x%016" PRIx64 "\n", type_desc->invalid_flags);
    5207           0 :         bs_dump_print_type_bits(ctx, type_desc->invalid_flags, invalid,
    5208             :                                 SPDK_COUNTOF(invalid));
    5209           0 :         fprintf(ctx->fp, "\tdata_ro: 0x%016" PRIx64 "\n", type_desc->data_ro_flags);
    5210           0 :         bs_dump_print_type_bits(ctx, type_desc->data_ro_flags, data_ro,
    5211             :                                 SPDK_COUNTOF(data_ro));
    5212           0 :         fprintf(ctx->fp, "\t  md_ro: 0x%016" PRIx64 "\n", type_desc->md_ro_flags);
    5213           0 :         bs_dump_print_type_bits(ctx, type_desc->md_ro_flags, md_ro,
    5214             :                                 SPDK_COUNTOF(md_ro));
    5215           0 : }
    5216             : 
    5217             : static void
    5218           0 : bs_dump_print_extent_table(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5219             : {
    5220             :         struct spdk_blob_md_descriptor_extent_table *et_desc;
    5221             :         uint64_t num_extent_pages;
    5222             :         uint32_t et_idx;
    5223             : 
    5224           0 :         et_desc = (struct spdk_blob_md_descriptor_extent_table *)desc;
    5225           0 :         num_extent_pages = (et_desc->length - sizeof(et_desc->num_clusters)) /
    5226             :                            sizeof(et_desc->extent_page[0]);
    5227             : 
    5228           0 :         fprintf(ctx->fp, "Extent table:\n");
    5229           0 :         for (et_idx = 0; et_idx < num_extent_pages; et_idx++) {
    5230           0 :                 if (et_desc->extent_page[et_idx].page_idx == 0) {
    5231             :                         /* Zeroes represent unallocated extent pages. */
    5232           0 :                         continue;
    5233             :                 }
    5234           0 :                 fprintf(ctx->fp, "\tExtent page: %5" PRIu32 " length %3" PRIu32
    5235             :                         " at LBA %" PRIu64 "\n", et_desc->extent_page[et_idx].page_idx,
    5236             :                         et_desc->extent_page[et_idx].num_pages,
    5237             :                         bs_md_page_to_lba(ctx->bs, et_desc->extent_page[et_idx].page_idx));
    5238             :         }
    5239           0 : }
    5240             : 
    5241             : static void
    5242           0 : bs_dump_print_md_page(struct spdk_bs_load_ctx *ctx)
    5243             : {
    5244           0 :         uint32_t page_idx = ctx->cur_page;
    5245           0 :         struct spdk_blob_md_page *page = ctx->page;
    5246             :         struct spdk_blob_md_descriptor *desc;
    5247           0 :         size_t cur_desc = 0;
    5248             :         uint32_t crc;
    5249             : 
    5250           0 :         fprintf(ctx->fp, "=========\n");
    5251           0 :         fprintf(ctx->fp, "Metadata Page Index: %" PRIu32 " (0x%" PRIx32 ")\n", page_idx, page_idx);
    5252           0 :         fprintf(ctx->fp, "Start LBA: %" PRIu64 "\n", bs_md_page_to_lba(ctx->bs, page_idx));
    5253           0 :         fprintf(ctx->fp, "Blob ID: 0x%" PRIx64 "\n", page->id);
    5254           0 :         fprintf(ctx->fp, "Sequence: %" PRIu32 "\n", page->sequence_num);
    5255           0 :         if (page->next == SPDK_INVALID_MD_PAGE) {
    5256           0 :                 fprintf(ctx->fp, "Next: None\n");
    5257             :         } else {
    5258           0 :                 fprintf(ctx->fp, "Next: %" PRIu32 "\n", page->next);
    5259             :         }
    5260           0 :         fprintf(ctx->fp, "In used bit array%s:", ctx->super->clean ? "" : " (not clean: dubious)");
    5261           0 :         if (spdk_bit_array_get(ctx->bs->used_md_pages, page_idx)) {
    5262           0 :                 fprintf(ctx->fp, " md");
    5263             :         }
    5264           0 :         if (spdk_bit_array_get(ctx->bs->used_blobids, page_idx)) {
    5265           0 :                 fprintf(ctx->fp, " blob");
    5266             :         }
    5267           0 :         fprintf(ctx->fp, "\n");
    5268             : 
    5269           0 :         crc = blob_md_page_calc_crc(page);
    5270           0 :         fprintf(ctx->fp, "CRC: 0x%" PRIx32 " (%s)\n", page->crc, crc == page->crc ? "OK" : "Mismatch");
    5271             : 
    5272           0 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    5273           0 :         while (cur_desc < sizeof(page->descriptors)) {
    5274           0 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
    5275           0 :                         if (desc->length == 0) {
    5276             :                                 /* If padding and length are 0, this terminates the page */
    5277           0 :                                 break;
    5278             :                         }
    5279           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
    5280             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
    5281             :                         unsigned int                            i;
    5282             : 
    5283           0 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
    5284             : 
    5285           0 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
    5286           0 :                                 if (desc_extent_rle->extents[i].cluster_idx != 0) {
    5287           0 :                                         fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
    5288             :                                                 desc_extent_rle->extents[i].cluster_idx);
    5289             :                                 } else {
    5290           0 :                                         fprintf(ctx->fp, "Unallocated Extent - ");
    5291             :                                 }
    5292           0 :                                 fprintf(ctx->fp, " Length: %" PRIu32, desc_extent_rle->extents[i].length);
    5293           0 :                                 fprintf(ctx->fp, "\n");
    5294             :                         }
    5295           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    5296             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
    5297             :                         unsigned int                                    i;
    5298             : 
    5299           0 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
    5300             : 
    5301           0 :                         for (i = 0; i < desc_extent->length / sizeof(desc_extent->cluster_idx[0]); i++) {
    5302           0 :                                 if (desc_extent->cluster_idx[i] != 0) {
    5303           0 :                                         fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
    5304             :                                                 desc_extent->cluster_idx[i]);
    5305             :                                 } else {
    5306           0 :                                         fprintf(ctx->fp, "Unallocated Extent");
    5307             :                                 }
    5308           0 :                                 fprintf(ctx->fp, "\n");
    5309             :                         }
    5310           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    5311           0 :                         bs_dump_print_xattr(ctx, desc);
    5312           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    5313           0 :                         bs_dump_print_xattr(ctx, desc);
    5314           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
    5315           0 :                         bs_dump_print_type_flags(ctx, desc);
    5316           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
    5317           0 :                         bs_dump_print_extent_table(ctx, desc);
    5318             :                 } else {
    5319             :                         /* Error */
    5320           0 :                         fprintf(ctx->fp, "Unknown descriptor type %" PRIu8 "\n", desc->type);
    5321             :                 }
    5322             :                 /* Advance to the next descriptor */
    5323           0 :                 cur_desc += sizeof(*desc) + desc->length;
    5324           0 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
    5325           0 :                         break;
    5326             :                 }
    5327           0 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
    5328             :         }
    5329           0 : }
    5330             : 
    5331             : static void
    5332           0 : bs_dump_read_md_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5333             : {
    5334           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5335             : 
    5336           0 :         if (bserrno != 0) {
    5337           0 :                 bs_dump_finish(seq, ctx, bserrno);
    5338           0 :                 return;
    5339             :         }
    5340             : 
    5341           0 :         if (ctx->page->id != 0) {
    5342           0 :                 bs_dump_print_md_page(ctx);
    5343             :         }
    5344             : 
    5345           0 :         ctx->cur_page++;
    5346             : 
    5347           0 :         if (ctx->cur_page < ctx->super->md_len) {
    5348           0 :                 bs_dump_read_md_page(seq, ctx);
    5349             :         } else {
    5350           0 :                 spdk_free(ctx->page);
    5351           0 :                 bs_dump_finish(seq, ctx, 0);
    5352             :         }
    5353             : }
    5354             : 
    5355             : static void
    5356           0 : bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg)
    5357             : {
    5358           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5359             :         uint64_t lba;
    5360             : 
    5361           0 :         assert(ctx->cur_page < ctx->super->md_len);
    5362           0 :         lba = bs_page_to_lba(ctx->bs, ctx->super->md_start + ctx->cur_page);
    5363           0 :         bs_sequence_read_dev(seq, ctx->page, lba,
    5364           0 :                              bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    5365             :                              bs_dump_read_md_page_cpl, ctx);
    5366           0 : }
    5367             : 
    5368             : static void
    5369           0 : bs_dump_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5370             : {
    5371           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5372             :         int rc;
    5373             : 
    5374           0 :         fprintf(ctx->fp, "Signature: \"%.8s\" ", ctx->super->signature);
    5375           0 :         if (memcmp(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    5376             :                    sizeof(ctx->super->signature)) != 0) {
    5377           0 :                 fprintf(ctx->fp, "(Mismatch)\n");
    5378           0 :                 bs_dump_finish(seq, ctx, bserrno);
    5379           0 :                 return;
    5380             :         } else {
    5381           0 :                 fprintf(ctx->fp, "(OK)\n");
    5382             :         }
    5383           0 :         fprintf(ctx->fp, "Version: %" PRIu32 "\n", ctx->super->version);
    5384           0 :         fprintf(ctx->fp, "CRC: 0x%x (%s)\n", ctx->super->crc,
    5385           0 :                 (ctx->super->crc == blob_md_page_calc_crc(ctx->super)) ? "OK" : "Mismatch");
    5386           0 :         fprintf(ctx->fp, "Blobstore Type: %.*s\n", SPDK_BLOBSTORE_TYPE_LENGTH, ctx->super->bstype.bstype);
    5387           0 :         fprintf(ctx->fp, "Cluster Size: %" PRIu32 "\n", ctx->super->cluster_size);
    5388           0 :         fprintf(ctx->fp, "Super Blob ID: ");
    5389           0 :         if (ctx->super->super_blob == SPDK_BLOBID_INVALID) {
    5390           0 :                 fprintf(ctx->fp, "(None)\n");
    5391             :         } else {
    5392           0 :                 fprintf(ctx->fp, "0x%" PRIx64 "\n", ctx->super->super_blob);
    5393             :         }
    5394           0 :         fprintf(ctx->fp, "Clean: %" PRIu32 "\n", ctx->super->clean);
    5395           0 :         fprintf(ctx->fp, "Used Metadata Page Mask Start: %" PRIu32 "\n", ctx->super->used_page_mask_start);
    5396           0 :         fprintf(ctx->fp, "Used Metadata Page Mask Length: %" PRIu32 "\n", ctx->super->used_page_mask_len);
    5397           0 :         fprintf(ctx->fp, "Used Cluster Mask Start: %" PRIu32 "\n", ctx->super->used_cluster_mask_start);
    5398           0 :         fprintf(ctx->fp, "Used Cluster Mask Length: %" PRIu32 "\n", ctx->super->used_cluster_mask_len);
    5399           0 :         fprintf(ctx->fp, "Used Blob ID Mask Start: %" PRIu32 "\n", ctx->super->used_blobid_mask_start);
    5400           0 :         fprintf(ctx->fp, "Used Blob ID Mask Length: %" PRIu32 "\n", ctx->super->used_blobid_mask_len);
    5401           0 :         fprintf(ctx->fp, "Metadata Start: %" PRIu32 "\n", ctx->super->md_start);
    5402           0 :         fprintf(ctx->fp, "Metadata Length: %" PRIu32 "\n", ctx->super->md_len);
    5403             : 
    5404           0 :         ctx->cur_page = 0;
    5405           0 :         ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    5406             :                                  NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    5407           0 :         if (!ctx->page) {
    5408           0 :                 bs_dump_finish(seq, ctx, -ENOMEM);
    5409           0 :                 return;
    5410             :         }
    5411             : 
    5412           0 :         rc = bs_parse_super(ctx);
    5413           0 :         if (rc < 0) {
    5414           0 :                 bs_load_ctx_fail(ctx, rc);
    5415           0 :                 return;
    5416             :         }
    5417             : 
    5418           0 :         bs_load_read_used_pages(ctx);
    5419             : }
    5420             : 
    5421             : void
    5422           0 : spdk_bs_dump(struct spdk_bs_dev *dev, FILE *fp, spdk_bs_dump_print_xattr print_xattr_fn,
    5423             :              spdk_bs_op_complete cb_fn, void *cb_arg)
    5424             : {
    5425           0 :         struct spdk_blob_store  *bs;
    5426           0 :         struct spdk_bs_cpl      cpl;
    5427           0 :         struct spdk_bs_load_ctx *ctx;
    5428           0 :         struct spdk_bs_opts     opts = {};
    5429             :         int err;
    5430             : 
    5431           0 :         SPDK_DEBUGLOG(blob, "Dumping blobstore from dev %p\n", dev);
    5432             : 
    5433           0 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5434             : 
    5435           0 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    5436           0 :         if (err) {
    5437           0 :                 dev->destroy(dev);
    5438           0 :                 cb_fn(cb_arg, err);
    5439           0 :                 return;
    5440             :         }
    5441             : 
    5442           0 :         ctx->dumping = true;
    5443           0 :         ctx->fp = fp;
    5444           0 :         ctx->print_xattr_fn = print_xattr_fn;
    5445             : 
    5446           0 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5447           0 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5448           0 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5449             : 
    5450           0 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5451           0 :         if (!ctx->seq) {
    5452           0 :                 spdk_free(ctx->super);
    5453           0 :                 free(ctx);
    5454           0 :                 bs_free(bs);
    5455           0 :                 cb_fn(cb_arg, -ENOMEM);
    5456           0 :                 return;
    5457             :         }
    5458             : 
    5459             :         /* Read the super block */
    5460           0 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5461           0 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5462             :                              bs_dump_super_cpl, ctx);
    5463             : }
    5464             : 
    5465             : /* END spdk_bs_dump */
    5466             : 
    5467             : /* START spdk_bs_init */
    5468             : 
    5469             : static void
    5470         472 : bs_init_persist_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5471             : {
    5472         472 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5473             : 
    5474         472 :         ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
    5475         472 :         spdk_free(ctx->super);
    5476         472 :         free(ctx);
    5477             : 
    5478         472 :         bs_sequence_finish(seq, bserrno);
    5479         472 : }
    5480             : 
    5481             : static void
    5482         472 : bs_init_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5483             : {
    5484         472 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5485             : 
    5486             :         /* Write super block */
    5487         472 :         bs_sequence_write_dev(seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
    5488         472 :                               bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
    5489             :                               bs_init_persist_super_cpl, ctx);
    5490         472 : }
    5491             : 
    5492             : void
    5493         488 : spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    5494             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    5495             : {
    5496         488 :         struct spdk_bs_load_ctx *ctx;
    5497         488 :         struct spdk_blob_store  *bs;
    5498         488 :         struct spdk_bs_cpl      cpl;
    5499             :         spdk_bs_sequence_t      *seq;
    5500             :         spdk_bs_batch_t         *batch;
    5501             :         uint64_t                num_md_lba;
    5502             :         uint64_t                num_md_pages;
    5503             :         uint64_t                num_md_clusters;
    5504             :         uint64_t                max_used_cluster_mask_len;
    5505             :         uint32_t                i;
    5506         488 :         struct spdk_bs_opts     opts = {};
    5507             :         int                     rc;
    5508             :         uint64_t                lba, lba_count;
    5509             : 
    5510         488 :         SPDK_DEBUGLOG(blob, "Initializing blobstore on dev %p\n", dev);
    5511             : 
    5512         488 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
    5513           4 :                 SPDK_ERRLOG("unsupported dev block length of %d\n",
    5514             :                             dev->blocklen);
    5515           4 :                 dev->destroy(dev);
    5516           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5517           4 :                 return;
    5518             :         }
    5519             : 
    5520         484 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5521         484 :         if (o) {
    5522         182 :                 if (bs_opts_copy(o, &opts)) {
    5523           0 :                         return;
    5524             :                 }
    5525             :         }
    5526             : 
    5527         484 :         if (bs_opts_verify(&opts) != 0) {
    5528           4 :                 dev->destroy(dev);
    5529           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5530           4 :                 return;
    5531             :         }
    5532             : 
    5533         480 :         rc = bs_alloc(dev, &opts, &bs, &ctx);
    5534         480 :         if (rc) {
    5535           4 :                 dev->destroy(dev);
    5536           4 :                 cb_fn(cb_arg, NULL, rc);
    5537           4 :                 return;
    5538             :         }
    5539             : 
    5540         476 :         if (opts.num_md_pages == SPDK_BLOB_OPTS_NUM_MD_PAGES) {
    5541             :                 /* By default, allocate 1 page per cluster.
    5542             :                  * Technically, this over-allocates metadata
    5543             :                  * because more metadata will reduce the number
    5544             :                  * of usable clusters. This can be addressed with
    5545             :                  * more complex math in the future.
    5546             :                  */
    5547         468 :                 bs->md_len = bs->total_clusters;
    5548             :         } else {
    5549           8 :                 bs->md_len = opts.num_md_pages;
    5550             :         }
    5551         476 :         rc = spdk_bit_array_resize(&bs->used_md_pages, bs->md_len);
    5552         476 :         if (rc < 0) {
    5553           0 :                 spdk_free(ctx->super);
    5554           0 :                 free(ctx);
    5555           0 :                 bs_free(bs);
    5556           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5557           0 :                 return;
    5558             :         }
    5559             : 
    5560         476 :         rc = spdk_bit_array_resize(&bs->used_blobids, bs->md_len);
    5561         476 :         if (rc < 0) {
    5562           0 :                 spdk_free(ctx->super);
    5563           0 :                 free(ctx);
    5564           0 :                 bs_free(bs);
    5565           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5566           0 :                 return;
    5567             :         }
    5568             : 
    5569         476 :         rc = spdk_bit_array_resize(&bs->open_blobids, bs->md_len);
    5570         476 :         if (rc < 0) {
    5571           0 :                 spdk_free(ctx->super);
    5572           0 :                 free(ctx);
    5573           0 :                 bs_free(bs);
    5574           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5575           0 :                 return;
    5576             :         }
    5577             : 
    5578         476 :         memcpy(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    5579             :                sizeof(ctx->super->signature));
    5580         476 :         ctx->super->version = SPDK_BS_VERSION;
    5581         476 :         ctx->super->length = sizeof(*ctx->super);
    5582         476 :         ctx->super->super_blob = bs->super_blob;
    5583         476 :         ctx->super->clean = 0;
    5584         476 :         ctx->super->cluster_size = bs->cluster_sz;
    5585         476 :         ctx->super->io_unit_size = bs->io_unit_size;
    5586         476 :         memcpy(&ctx->super->bstype, &bs->bstype, sizeof(bs->bstype));
    5587             : 
    5588             :         /* Calculate how many pages the metadata consumes at the front
    5589             :          * of the disk.
    5590             :          */
    5591             : 
    5592             :         /* The super block uses 1 page */
    5593         476 :         num_md_pages = 1;
    5594             : 
    5595             :         /* The used_md_pages mask requires 1 bit per metadata page, rounded
    5596             :          * up to the nearest page, plus a header.
    5597             :          */
    5598         476 :         ctx->super->used_page_mask_start = num_md_pages;
    5599         476 :         ctx->super->used_page_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5600         476 :                                          spdk_divide_round_up(bs->md_len, 8),
    5601             :                                          SPDK_BS_PAGE_SIZE);
    5602         476 :         num_md_pages += ctx->super->used_page_mask_len;
    5603             : 
    5604             :         /* The used_clusters mask requires 1 bit per cluster, rounded
    5605             :          * up to the nearest page, plus a header.
    5606             :          */
    5607         476 :         ctx->super->used_cluster_mask_start = num_md_pages;
    5608         476 :         ctx->super->used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5609         476 :                                             spdk_divide_round_up(bs->total_clusters, 8),
    5610             :                                             SPDK_BS_PAGE_SIZE);
    5611             :         /* The blobstore might be extended, then the used_cluster bitmap will need more space.
    5612             :          * Here we calculate the max clusters we can support according to the
    5613             :          * num_md_pages (bs->md_len).
    5614             :          */
    5615         476 :         max_used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5616         476 :                                     spdk_divide_round_up(bs->md_len, 8),
    5617             :                                     SPDK_BS_PAGE_SIZE);
    5618         476 :         max_used_cluster_mask_len = spdk_max(max_used_cluster_mask_len,
    5619             :                                              ctx->super->used_cluster_mask_len);
    5620         476 :         num_md_pages += max_used_cluster_mask_len;
    5621             : 
    5622             :         /* The used_blobids mask requires 1 bit per metadata page, rounded
    5623             :          * up to the nearest page, plus a header.
    5624             :          */
    5625         476 :         ctx->super->used_blobid_mask_start = num_md_pages;
    5626         476 :         ctx->super->used_blobid_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5627         476 :                                            spdk_divide_round_up(bs->md_len, 8),
    5628             :                                            SPDK_BS_PAGE_SIZE);
    5629         476 :         num_md_pages += ctx->super->used_blobid_mask_len;
    5630             : 
    5631             :         /* The metadata region size was chosen above */
    5632         476 :         ctx->super->md_start = bs->md_start = num_md_pages;
    5633         476 :         ctx->super->md_len = bs->md_len;
    5634         476 :         num_md_pages += bs->md_len;
    5635             : 
    5636         476 :         num_md_lba = bs_page_to_lba(bs, num_md_pages);
    5637             : 
    5638         476 :         ctx->super->size = dev->blockcnt * dev->blocklen;
    5639             : 
    5640         476 :         ctx->super->crc = blob_md_page_calc_crc(ctx->super);
    5641             : 
    5642         476 :         num_md_clusters = spdk_divide_round_up(num_md_pages, bs->pages_per_cluster);
    5643         476 :         if (num_md_clusters > bs->total_clusters) {
    5644           4 :                 SPDK_ERRLOG("Blobstore metadata cannot use more clusters than is available, "
    5645             :                             "please decrease number of pages reserved for metadata "
    5646             :                             "or increase cluster size.\n");
    5647           4 :                 spdk_free(ctx->super);
    5648           4 :                 spdk_bit_array_free(&ctx->used_clusters);
    5649           4 :                 free(ctx);
    5650           4 :                 bs_free(bs);
    5651           4 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5652           4 :                 return;
    5653             :         }
    5654             :         /* Claim all of the clusters used by the metadata */
    5655       75700 :         for (i = 0; i < num_md_clusters; i++) {
    5656       75228 :                 spdk_bit_array_set(ctx->used_clusters, i);
    5657             :         }
    5658             : 
    5659         472 :         bs->num_free_clusters -= num_md_clusters;
    5660         472 :         bs->total_data_clusters = bs->num_free_clusters;
    5661             : 
    5662         472 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    5663         472 :         cpl.u.bs_handle.cb_fn = cb_fn;
    5664         472 :         cpl.u.bs_handle.cb_arg = cb_arg;
    5665         472 :         cpl.u.bs_handle.bs = bs;
    5666             : 
    5667         472 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5668         472 :         if (!seq) {
    5669           0 :                 spdk_free(ctx->super);
    5670           0 :                 free(ctx);
    5671           0 :                 bs_free(bs);
    5672           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5673           0 :                 return;
    5674             :         }
    5675             : 
    5676         472 :         batch = bs_sequence_to_batch(seq, bs_init_trim_cpl, ctx);
    5677             : 
    5678             :         /* Clear metadata space */
    5679         472 :         bs_batch_write_zeroes_dev(batch, 0, num_md_lba);
    5680             : 
    5681         472 :         lba = num_md_lba;
    5682         472 :         lba_count = ctx->bs->dev->blockcnt - lba;
    5683         472 :         switch (opts.clear_method) {
    5684         456 :         case BS_CLEAR_WITH_UNMAP:
    5685             :                 /* Trim data clusters */
    5686         456 :                 bs_batch_unmap_dev(batch, lba, lba_count);
    5687         456 :                 break;
    5688           0 :         case BS_CLEAR_WITH_WRITE_ZEROES:
    5689             :                 /* Write_zeroes to data clusters */
    5690           0 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    5691           0 :                 break;
    5692          16 :         case BS_CLEAR_WITH_NONE:
    5693             :         default:
    5694          16 :                 break;
    5695             :         }
    5696             : 
    5697         472 :         bs_batch_close(batch);
    5698             : }
    5699             : 
    5700             : /* END spdk_bs_init */
    5701             : 
    5702             : /* START spdk_bs_destroy */
    5703             : 
    5704             : static void
    5705           4 : bs_destroy_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5706             : {
    5707           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5708           4 :         struct spdk_blob_store *bs = ctx->bs;
    5709             : 
    5710             :         /*
    5711             :          * We need to defer calling bs_call_cpl() until after
    5712             :          * dev destruction, so tuck these away for later use.
    5713             :          */
    5714           4 :         bs->unload_err = bserrno;
    5715           4 :         memcpy(&bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5716           4 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5717             : 
    5718           4 :         bs_sequence_finish(seq, bserrno);
    5719             : 
    5720           4 :         bs_free(bs);
    5721           4 :         free(ctx);
    5722           4 : }
    5723             : 
    5724             : void
    5725           4 : spdk_bs_destroy(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn,
    5726             :                 void *cb_arg)
    5727             : {
    5728           4 :         struct spdk_bs_cpl      cpl;
    5729             :         spdk_bs_sequence_t      *seq;
    5730             :         struct spdk_bs_load_ctx *ctx;
    5731             : 
    5732           4 :         SPDK_DEBUGLOG(blob, "Destroying blobstore\n");
    5733             : 
    5734           4 :         if (!RB_EMPTY(&bs->open_blobs)) {
    5735           0 :                 SPDK_ERRLOG("Blobstore still has open blobs\n");
    5736           0 :                 cb_fn(cb_arg, -EBUSY);
    5737           0 :                 return;
    5738             :         }
    5739             : 
    5740           4 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5741           4 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5742           4 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5743             : 
    5744           4 :         ctx = calloc(1, sizeof(*ctx));
    5745           4 :         if (!ctx) {
    5746           0 :                 cb_fn(cb_arg, -ENOMEM);
    5747           0 :                 return;
    5748             :         }
    5749             : 
    5750           4 :         ctx->bs = bs;
    5751             : 
    5752           4 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5753           4 :         if (!seq) {
    5754           0 :                 free(ctx);
    5755           0 :                 cb_fn(cb_arg, -ENOMEM);
    5756           0 :                 return;
    5757             :         }
    5758             : 
    5759             :         /* Write zeroes to the super block */
    5760           4 :         bs_sequence_write_zeroes_dev(seq,
    5761             :                                      bs_page_to_lba(bs, 0),
    5762             :                                      bs_byte_to_lba(bs, sizeof(struct spdk_bs_super_block)),
    5763             :                                      bs_destroy_trim_cpl, ctx);
    5764             : }
    5765             : 
    5766             : /* END spdk_bs_destroy */
    5767             : 
    5768             : /* START spdk_bs_unload */
    5769             : 
    5770             : static void
    5771         654 : bs_unload_finish(struct spdk_bs_load_ctx *ctx, int bserrno)
    5772             : {
    5773         654 :         spdk_bs_sequence_t *seq = ctx->seq;
    5774             : 
    5775         654 :         spdk_free(ctx->super);
    5776             : 
    5777             :         /*
    5778             :          * We need to defer calling bs_call_cpl() until after
    5779             :          * dev destruction, so tuck these away for later use.
    5780             :          */
    5781         654 :         ctx->bs->unload_err = bserrno;
    5782         654 :         memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5783         654 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5784             : 
    5785         654 :         bs_sequence_finish(seq, bserrno);
    5786             : 
    5787         654 :         bs_free(ctx->bs);
    5788         654 :         free(ctx);
    5789         654 : }
    5790             : 
    5791             : static void
    5792         654 : bs_unload_write_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5793             : {
    5794         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5795             : 
    5796         654 :         bs_unload_finish(ctx, bserrno);
    5797         654 : }
    5798             : 
    5799             : static void
    5800         654 : bs_unload_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5801             : {
    5802         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5803             : 
    5804         654 :         spdk_free(ctx->mask);
    5805             : 
    5806         654 :         if (bserrno != 0) {
    5807           0 :                 bs_unload_finish(ctx, bserrno);
    5808           0 :                 return;
    5809             :         }
    5810             : 
    5811         654 :         ctx->super->clean = 1;
    5812             : 
    5813         654 :         bs_write_super(seq, ctx->bs, ctx->super, bs_unload_write_super_cpl, ctx);
    5814             : }
    5815             : 
    5816             : static void
    5817         654 : bs_unload_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5818             : {
    5819         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5820             : 
    5821         654 :         spdk_free(ctx->mask);
    5822         654 :         ctx->mask = NULL;
    5823             : 
    5824         654 :         if (bserrno != 0) {
    5825           0 :                 bs_unload_finish(ctx, bserrno);
    5826           0 :                 return;
    5827             :         }
    5828             : 
    5829         654 :         bs_write_used_clusters(seq, ctx, bs_unload_write_used_clusters_cpl);
    5830             : }
    5831             : 
    5832             : static void
    5833         654 : bs_unload_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5834             : {
    5835         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5836             : 
    5837         654 :         spdk_free(ctx->mask);
    5838         654 :         ctx->mask = NULL;
    5839             : 
    5840         654 :         if (bserrno != 0) {
    5841           0 :                 bs_unload_finish(ctx, bserrno);
    5842           0 :                 return;
    5843             :         }
    5844             : 
    5845         654 :         bs_write_used_blobids(seq, ctx, bs_unload_write_used_blobids_cpl);
    5846             : }
    5847             : 
    5848             : static void
    5849         654 : bs_unload_read_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5850             : {
    5851         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5852             :         int rc;
    5853             : 
    5854         654 :         if (bserrno != 0) {
    5855           0 :                 bs_unload_finish(ctx, bserrno);
    5856           0 :                 return;
    5857             :         }
    5858             : 
    5859         654 :         rc = bs_super_validate(ctx->super, ctx->bs);
    5860         654 :         if (rc != 0) {
    5861           0 :                 bs_unload_finish(ctx, rc);
    5862           0 :                 return;
    5863             :         }
    5864             : 
    5865         654 :         bs_write_used_md(seq, cb_arg, bs_unload_write_used_pages_cpl);
    5866             : }
    5867             : 
    5868             : void
    5869         662 : spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_arg)
    5870             : {
    5871         662 :         struct spdk_bs_cpl      cpl;
    5872             :         struct spdk_bs_load_ctx *ctx;
    5873             : 
    5874         662 :         SPDK_DEBUGLOG(blob, "Syncing blobstore\n");
    5875             : 
    5876             :         /*
    5877             :          * If external snapshot channels are being destroyed while the blobstore is unloaded, the
    5878             :          * unload is deferred until after the channel destruction completes.
    5879             :          */
    5880         662 :         if (bs->esnap_channels_unloading != 0) {
    5881           4 :                 if (bs->esnap_unload_cb_fn != NULL) {
    5882           0 :                         SPDK_ERRLOG("Blobstore unload in progress\n");
    5883           0 :                         cb_fn(cb_arg, -EBUSY);
    5884           0 :                         return;
    5885             :                 }
    5886           4 :                 SPDK_DEBUGLOG(blob_esnap, "Blobstore unload deferred: %" PRIu32
    5887             :                               " esnap clones are unloading\n", bs->esnap_channels_unloading);
    5888           4 :                 bs->esnap_unload_cb_fn = cb_fn;
    5889           4 :                 bs->esnap_unload_cb_arg = cb_arg;
    5890           4 :                 return;
    5891             :         }
    5892         658 :         if (bs->esnap_unload_cb_fn != NULL) {
    5893           4 :                 SPDK_DEBUGLOG(blob_esnap, "Blobstore deferred unload progressing\n");
    5894           4 :                 assert(bs->esnap_unload_cb_fn == cb_fn);
    5895           4 :                 assert(bs->esnap_unload_cb_arg == cb_arg);
    5896           4 :                 bs->esnap_unload_cb_fn = NULL;
    5897           4 :                 bs->esnap_unload_cb_arg = NULL;
    5898             :         }
    5899             : 
    5900         658 :         if (!RB_EMPTY(&bs->open_blobs)) {
    5901           4 :                 SPDK_ERRLOG("Blobstore still has open blobs\n");
    5902           4 :                 cb_fn(cb_arg, -EBUSY);
    5903           4 :                 return;
    5904             :         }
    5905             : 
    5906         654 :         ctx = calloc(1, sizeof(*ctx));
    5907         654 :         if (!ctx) {
    5908           0 :                 cb_fn(cb_arg, -ENOMEM);
    5909           0 :                 return;
    5910             :         }
    5911             : 
    5912         654 :         ctx->bs = bs;
    5913             : 
    5914         654 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    5915             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    5916         654 :         if (!ctx->super) {
    5917           0 :                 free(ctx);
    5918           0 :                 cb_fn(cb_arg, -ENOMEM);
    5919           0 :                 return;
    5920             :         }
    5921             : 
    5922         654 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5923         654 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5924         654 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5925             : 
    5926         654 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5927         654 :         if (!ctx->seq) {
    5928           0 :                 spdk_free(ctx->super);
    5929           0 :                 free(ctx);
    5930           0 :                 cb_fn(cb_arg, -ENOMEM);
    5931           0 :                 return;
    5932             :         }
    5933             : 
    5934             :         /* Read super block */
    5935         654 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5936         654 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5937             :                              bs_unload_read_super_cpl, ctx);
    5938             : }
    5939             : 
    5940             : /* END spdk_bs_unload */
    5941             : 
    5942             : /* START spdk_bs_set_super */
    5943             : 
    5944             : struct spdk_bs_set_super_ctx {
    5945             :         struct spdk_blob_store          *bs;
    5946             :         struct spdk_bs_super_block      *super;
    5947             : };
    5948             : 
    5949             : static void
    5950           8 : bs_set_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5951             : {
    5952           8 :         struct spdk_bs_set_super_ctx    *ctx = cb_arg;
    5953             : 
    5954           8 :         if (bserrno != 0) {
    5955           0 :                 SPDK_ERRLOG("Unable to write to super block of blobstore\n");
    5956             :         }
    5957             : 
    5958           8 :         spdk_free(ctx->super);
    5959             : 
    5960           8 :         bs_sequence_finish(seq, bserrno);
    5961             : 
    5962           8 :         free(ctx);
    5963           8 : }
    5964             : 
    5965             : static void
    5966           8 : bs_set_super_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5967             : {
    5968           8 :         struct spdk_bs_set_super_ctx    *ctx = cb_arg;
    5969             :         int rc;
    5970             : 
    5971           8 :         if (bserrno != 0) {
    5972           0 :                 SPDK_ERRLOG("Unable to read super block of blobstore\n");
    5973           0 :                 spdk_free(ctx->super);
    5974           0 :                 bs_sequence_finish(seq, bserrno);
    5975           0 :                 free(ctx);
    5976           0 :                 return;
    5977             :         }
    5978             : 
    5979           8 :         rc = bs_super_validate(ctx->super, ctx->bs);
    5980           8 :         if (rc != 0) {
    5981           0 :                 SPDK_ERRLOG("Not a valid super block\n");
    5982           0 :                 spdk_free(ctx->super);
    5983           0 :                 bs_sequence_finish(seq, rc);
    5984           0 :                 free(ctx);
    5985           0 :                 return;
    5986             :         }
    5987             : 
    5988           8 :         bs_write_super(seq, ctx->bs, ctx->super, bs_set_super_write_cpl, ctx);
    5989             : }
    5990             : 
    5991             : void
    5992           8 : spdk_bs_set_super(struct spdk_blob_store *bs, spdk_blob_id blobid,
    5993             :                   spdk_bs_op_complete cb_fn, void *cb_arg)
    5994             : {
    5995           8 :         struct spdk_bs_cpl              cpl;
    5996             :         spdk_bs_sequence_t              *seq;
    5997             :         struct spdk_bs_set_super_ctx    *ctx;
    5998             : 
    5999           8 :         SPDK_DEBUGLOG(blob, "Setting super blob id on blobstore\n");
    6000             : 
    6001           8 :         ctx = calloc(1, sizeof(*ctx));
    6002           8 :         if (!ctx) {
    6003           0 :                 cb_fn(cb_arg, -ENOMEM);
    6004           0 :                 return;
    6005             :         }
    6006             : 
    6007           8 :         ctx->bs = bs;
    6008             : 
    6009           8 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    6010             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    6011           8 :         if (!ctx->super) {
    6012           0 :                 free(ctx);
    6013           0 :                 cb_fn(cb_arg, -ENOMEM);
    6014           0 :                 return;
    6015             :         }
    6016             : 
    6017           8 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    6018           8 :         cpl.u.bs_basic.cb_fn = cb_fn;
    6019           8 :         cpl.u.bs_basic.cb_arg = cb_arg;
    6020             : 
    6021           8 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    6022           8 :         if (!seq) {
    6023           0 :                 spdk_free(ctx->super);
    6024           0 :                 free(ctx);
    6025           0 :                 cb_fn(cb_arg, -ENOMEM);
    6026           0 :                 return;
    6027             :         }
    6028             : 
    6029           8 :         bs->super_blob = blobid;
    6030             : 
    6031             :         /* Read super block */
    6032           8 :         bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
    6033           8 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    6034             :                              bs_set_super_read_cpl, ctx);
    6035             : }
    6036             : 
    6037             : /* END spdk_bs_set_super */
    6038             : 
    6039             : void
    6040          12 : spdk_bs_get_super(struct spdk_blob_store *bs,
    6041             :                   spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6042             : {
    6043          12 :         if (bs->super_blob == SPDK_BLOBID_INVALID) {
    6044           4 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOENT);
    6045             :         } else {
    6046           8 :                 cb_fn(cb_arg, bs->super_blob, 0);
    6047             :         }
    6048          12 : }
    6049             : 
    6050             : uint64_t
    6051         132 : spdk_bs_get_cluster_size(struct spdk_blob_store *bs)
    6052             : {
    6053         132 :         return bs->cluster_sz;
    6054             : }
    6055             : 
    6056             : uint64_t
    6057          68 : spdk_bs_get_page_size(struct spdk_blob_store *bs)
    6058             : {
    6059          68 :         return SPDK_BS_PAGE_SIZE;
    6060             : }
    6061             : 
    6062             : uint64_t
    6063         738 : spdk_bs_get_io_unit_size(struct spdk_blob_store *bs)
    6064             : {
    6065         738 :         return bs->io_unit_size;
    6066             : }
    6067             : 
    6068             : uint64_t
    6069         540 : spdk_bs_free_cluster_count(struct spdk_blob_store *bs)
    6070             : {
    6071         540 :         return bs->num_free_clusters;
    6072             : }
    6073             : 
    6074             : uint64_t
    6075          92 : spdk_bs_total_data_cluster_count(struct spdk_blob_store *bs)
    6076             : {
    6077          92 :         return bs->total_data_clusters;
    6078             : }
    6079             : 
    6080             : static int
    6081         780 : bs_register_md_thread(struct spdk_blob_store *bs)
    6082             : {
    6083         780 :         bs->md_channel = spdk_get_io_channel(bs);
    6084         780 :         if (!bs->md_channel) {
    6085           0 :                 SPDK_ERRLOG("Failed to get IO channel.\n");
    6086           0 :                 return -1;
    6087             :         }
    6088             : 
    6089         780 :         return 0;
    6090             : }
    6091             : 
    6092             : static int
    6093         780 : bs_unregister_md_thread(struct spdk_blob_store *bs)
    6094             : {
    6095         780 :         spdk_put_io_channel(bs->md_channel);
    6096             : 
    6097         780 :         return 0;
    6098             : }
    6099             : 
    6100             : spdk_blob_id
    6101         562 : spdk_blob_get_id(struct spdk_blob *blob)
    6102             : {
    6103         562 :         assert(blob != NULL);
    6104             : 
    6105         562 :         return blob->id;
    6106             : }
    6107             : 
    6108             : uint64_t
    6109          24 : spdk_blob_get_num_pages(struct spdk_blob *blob)
    6110             : {
    6111          24 :         assert(blob != NULL);
    6112             : 
    6113          24 :         return bs_cluster_to_page(blob->bs, blob->active.num_clusters);
    6114             : }
    6115             : 
    6116             : uint64_t
    6117          24 : spdk_blob_get_num_io_units(struct spdk_blob *blob)
    6118             : {
    6119          24 :         assert(blob != NULL);
    6120             : 
    6121          24 :         return spdk_blob_get_num_pages(blob) * bs_io_unit_per_page(blob->bs);
    6122             : }
    6123             : 
    6124             : uint64_t
    6125         569 : spdk_blob_get_num_clusters(struct spdk_blob *blob)
    6126             : {
    6127         569 :         assert(blob != NULL);
    6128             : 
    6129         569 :         return blob->active.num_clusters;
    6130             : }
    6131             : 
    6132             : uint64_t
    6133         330 : spdk_blob_get_num_allocated_clusters(struct spdk_blob *blob)
    6134             : {
    6135         330 :         assert(blob != NULL);
    6136             : 
    6137         330 :         return blob->active.num_allocated_clusters;
    6138             : }
    6139             : 
    6140             : static uint64_t
    6141          24 : blob_find_io_unit(struct spdk_blob *blob, uint64_t offset, bool is_allocated)
    6142             : {
    6143          24 :         uint64_t blob_io_unit_num = spdk_blob_get_num_io_units(blob);
    6144             : 
    6145          44 :         while (offset < blob_io_unit_num) {
    6146          40 :                 if (bs_io_unit_is_allocated(blob, offset) == is_allocated) {
    6147          20 :                         return offset;
    6148             :                 }
    6149             : 
    6150          20 :                 offset += bs_num_io_units_to_cluster_boundary(blob, offset);
    6151             :         }
    6152             : 
    6153           4 :         return UINT64_MAX;
    6154             : }
    6155             : 
    6156             : uint64_t
    6157          12 : spdk_blob_get_next_allocated_io_unit(struct spdk_blob *blob, uint64_t offset)
    6158             : {
    6159          12 :         return blob_find_io_unit(blob, offset, true);
    6160             : }
    6161             : 
    6162             : uint64_t
    6163          12 : spdk_blob_get_next_unallocated_io_unit(struct spdk_blob *blob, uint64_t offset)
    6164             : {
    6165          12 :         return blob_find_io_unit(blob, offset, false);
    6166             : }
    6167             : 
    6168             : /* START spdk_bs_create_blob */
    6169             : 
    6170             : static void
    6171        1878 : bs_create_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    6172             : {
    6173        1878 :         struct spdk_blob *blob = cb_arg;
    6174        1878 :         uint32_t page_idx = bs_blobid_to_page(blob->id);
    6175             : 
    6176        1878 :         if (bserrno != 0) {
    6177           0 :                 spdk_spin_lock(&blob->bs->used_lock);
    6178           0 :                 spdk_bit_array_clear(blob->bs->used_blobids, page_idx);
    6179           0 :                 bs_release_md_page(blob->bs, page_idx);
    6180           0 :                 spdk_spin_unlock(&blob->bs->used_lock);
    6181             :         }
    6182             : 
    6183        1878 :         blob_free(blob);
    6184             : 
    6185        1878 :         bs_sequence_finish(seq, bserrno);
    6186        1878 : }
    6187             : 
    6188             : static int
    6189        3776 : blob_set_xattrs(struct spdk_blob *blob, const struct spdk_blob_xattr_opts *xattrs,
    6190             :                 bool internal)
    6191             : {
    6192             :         uint64_t i;
    6193        3776 :         size_t value_len = 0;
    6194             :         int rc;
    6195        3776 :         const void *value = NULL;
    6196        3776 :         if (xattrs->count > 0 && xattrs->get_value == NULL) {
    6197           8 :                 return -EINVAL;
    6198             :         }
    6199        4084 :         for (i = 0; i < xattrs->count; i++) {
    6200         320 :                 xattrs->get_value(xattrs->ctx, xattrs->names[i], &value, &value_len);
    6201         320 :                 if (value == NULL || value_len == 0) {
    6202           4 :                         return -EINVAL;
    6203             :                 }
    6204         316 :                 rc = blob_set_xattr(blob, xattrs->names[i], value, value_len, internal);
    6205         316 :                 if (rc < 0) {
    6206           0 :                         return rc;
    6207             :                 }
    6208             :         }
    6209        3764 :         return 0;
    6210             : }
    6211             : 
    6212             : static void
    6213        1862 : blob_opts_copy(const struct spdk_blob_opts *src, struct spdk_blob_opts *dst)
    6214             : {
    6215             : #define FIELD_OK(field) \
    6216             :         offsetof(struct spdk_blob_opts, field) + sizeof(src->field) <= src->opts_size
    6217             : 
    6218             : #define SET_FIELD(field) \
    6219             :         if (FIELD_OK(field)) { \
    6220             :                 dst->field = src->field; \
    6221             :         } \
    6222             : 
    6223        1862 :         SET_FIELD(num_clusters);
    6224        1862 :         SET_FIELD(thin_provision);
    6225        1862 :         SET_FIELD(clear_method);
    6226             : 
    6227        1862 :         if (FIELD_OK(xattrs)) {
    6228        1862 :                 memcpy(&dst->xattrs, &src->xattrs, sizeof(src->xattrs));
    6229             :         }
    6230             : 
    6231        1862 :         SET_FIELD(use_extent_table);
    6232        1862 :         SET_FIELD(esnap_id);
    6233        1862 :         SET_FIELD(esnap_id_len);
    6234             : 
    6235        1862 :         dst->opts_size = src->opts_size;
    6236             : 
    6237             :         /* You should not remove this statement, but need to update the assert statement
    6238             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    6239             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_opts) == 80, "Incorrect size");
    6240             : 
    6241             : #undef FIELD_OK
    6242             : #undef SET_FIELD
    6243        1862 : }
    6244             : 
    6245             : static void
    6246        1894 : bs_create_blob(struct spdk_blob_store *bs,
    6247             :                const struct spdk_blob_opts *opts,
    6248             :                const struct spdk_blob_xattr_opts *internal_xattrs,
    6249             :                spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6250             : {
    6251             :         struct spdk_blob        *blob;
    6252             :         uint32_t                page_idx;
    6253        1894 :         struct spdk_bs_cpl      cpl;
    6254        1894 :         struct spdk_blob_opts   opts_local;
    6255        1894 :         struct spdk_blob_xattr_opts internal_xattrs_default;
    6256             :         spdk_bs_sequence_t      *seq;
    6257             :         spdk_blob_id            id;
    6258             :         int rc;
    6259             : 
    6260        1894 :         assert(spdk_get_thread() == bs->md_thread);
    6261             : 
    6262        1894 :         spdk_spin_lock(&bs->used_lock);
    6263        1894 :         page_idx = spdk_bit_array_find_first_clear(bs->used_md_pages, 0);
    6264        1894 :         if (page_idx == UINT32_MAX) {
    6265           0 :                 spdk_spin_unlock(&bs->used_lock);
    6266           0 :                 cb_fn(cb_arg, 0, -ENOMEM);
    6267           0 :                 return;
    6268             :         }
    6269        1894 :         spdk_bit_array_set(bs->used_blobids, page_idx);
    6270        1894 :         bs_claim_md_page(bs, page_idx);
    6271        1894 :         spdk_spin_unlock(&bs->used_lock);
    6272             : 
    6273        1894 :         id = bs_page_to_blobid(page_idx);
    6274             : 
    6275        1894 :         SPDK_DEBUGLOG(blob, "Creating blob with id 0x%" PRIx64 " at page %u\n", id, page_idx);
    6276             : 
    6277        1894 :         spdk_blob_opts_init(&opts_local, sizeof(opts_local));
    6278        1894 :         if (opts) {
    6279        1862 :                 blob_opts_copy(opts, &opts_local);
    6280             :         }
    6281             : 
    6282        1894 :         blob = blob_alloc(bs, id);
    6283        1894 :         if (!blob) {
    6284           0 :                 rc = -ENOMEM;
    6285           0 :                 goto error;
    6286             :         }
    6287             : 
    6288        1894 :         blob->use_extent_table = opts_local.use_extent_table;
    6289        1894 :         if (blob->use_extent_table) {
    6290         968 :                 blob->invalid_flags |= SPDK_BLOB_EXTENT_TABLE;
    6291             :         }
    6292             : 
    6293        1894 :         if (!internal_xattrs) {
    6294        1622 :                 blob_xattrs_init(&internal_xattrs_default);
    6295        1622 :                 internal_xattrs = &internal_xattrs_default;
    6296             :         }
    6297             : 
    6298        1894 :         rc = blob_set_xattrs(blob, &opts_local.xattrs, false);
    6299        1894 :         if (rc < 0) {
    6300          12 :                 goto error;
    6301             :         }
    6302             : 
    6303        1882 :         rc = blob_set_xattrs(blob, internal_xattrs, true);
    6304        1882 :         if (rc < 0) {
    6305           0 :                 goto error;
    6306             :         }
    6307             : 
    6308        1882 :         if (opts_local.thin_provision) {
    6309         356 :                 blob_set_thin_provision(blob);
    6310             :         }
    6311             : 
    6312        1882 :         blob_set_clear_method(blob, opts_local.clear_method);
    6313             : 
    6314        1882 :         if (opts_local.esnap_id != NULL) {
    6315          60 :                 if (opts_local.esnap_id_len > UINT16_MAX) {
    6316           0 :                         SPDK_ERRLOG("esnap id length %" PRIu64 "is too long\n",
    6317             :                                     opts_local.esnap_id_len);
    6318           0 :                         rc = -EINVAL;
    6319           0 :                         goto error;
    6320             : 
    6321             :                 }
    6322          60 :                 blob_set_thin_provision(blob);
    6323          60 :                 blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6324          60 :                 rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID,
    6325          60 :                                     opts_local.esnap_id, opts_local.esnap_id_len, true);
    6326          60 :                 if (rc != 0) {
    6327           0 :                         goto error;
    6328             :                 }
    6329             :         }
    6330             : 
    6331        1882 :         rc = blob_resize(blob, opts_local.num_clusters);
    6332        1882 :         if (rc < 0) {
    6333           4 :                 goto error;
    6334             :         }
    6335        1878 :         cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6336        1878 :         cpl.u.blobid.cb_fn = cb_fn;
    6337        1878 :         cpl.u.blobid.cb_arg = cb_arg;
    6338        1878 :         cpl.u.blobid.blobid = blob->id;
    6339             : 
    6340        1878 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    6341        1878 :         if (!seq) {
    6342           0 :                 rc = -ENOMEM;
    6343           0 :                 goto error;
    6344             :         }
    6345             : 
    6346        1878 :         blob_persist(seq, blob, bs_create_blob_cpl, blob);
    6347        1878 :         return;
    6348             : 
    6349          16 : error:
    6350          16 :         SPDK_ERRLOG("Failed to create blob: %s, size in clusters/size: %lu (clusters)\n",
    6351             :                     spdk_strerror(rc), opts_local.num_clusters);
    6352          16 :         if (blob != NULL) {
    6353          16 :                 blob_free(blob);
    6354             :         }
    6355          16 :         spdk_spin_lock(&bs->used_lock);
    6356          16 :         spdk_bit_array_clear(bs->used_blobids, page_idx);
    6357          16 :         bs_release_md_page(bs, page_idx);
    6358          16 :         spdk_spin_unlock(&bs->used_lock);
    6359          16 :         cb_fn(cb_arg, 0, rc);
    6360             : }
    6361             : 
    6362             : void
    6363          16 : spdk_bs_create_blob(struct spdk_blob_store *bs,
    6364             :                     spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6365             : {
    6366          16 :         bs_create_blob(bs, NULL, NULL, cb_fn, cb_arg);
    6367          16 : }
    6368             : 
    6369             : void
    6370        1598 : spdk_bs_create_blob_ext(struct spdk_blob_store *bs, const struct spdk_blob_opts *opts,
    6371             :                         spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6372             : {
    6373        1598 :         bs_create_blob(bs, opts, NULL, cb_fn, cb_arg);
    6374        1598 : }
    6375             : 
    6376             : /* END spdk_bs_create_blob */
    6377             : 
    6378             : /* START blob_cleanup */
    6379             : 
    6380             : struct spdk_clone_snapshot_ctx {
    6381             :         struct spdk_bs_cpl      cpl;
    6382             :         int bserrno;
    6383             :         bool frozen;
    6384             : 
    6385             :         struct spdk_io_channel *channel;
    6386             : 
    6387             :         /* Current cluster for inflate operation */
    6388             :         uint64_t cluster;
    6389             : 
    6390             :         /* For inflation force allocation of all unallocated clusters and remove
    6391             :          * thin-provisioning. Otherwise only decouple parent and keep clone thin. */
    6392             :         bool allocate_all;
    6393             : 
    6394             :         struct {
    6395             :                 spdk_blob_id id;
    6396             :                 struct spdk_blob *blob;
    6397             :                 bool md_ro;
    6398             :         } original;
    6399             :         struct {
    6400             :                 spdk_blob_id id;
    6401             :                 struct spdk_blob *blob;
    6402             :         } new;
    6403             : 
    6404             :         /* xattrs specified for snapshot/clones only. They have no impact on
    6405             :          * the original blobs xattrs. */
    6406             :         const struct spdk_blob_xattr_opts *xattrs;
    6407             : };
    6408             : 
    6409             : static void
    6410         346 : bs_clone_snapshot_cleanup_finish(void *cb_arg, int bserrno)
    6411             : {
    6412         346 :         struct spdk_clone_snapshot_ctx *ctx = cb_arg;
    6413         346 :         struct spdk_bs_cpl *cpl = &ctx->cpl;
    6414             : 
    6415         346 :         if (bserrno != 0) {
    6416           6 :                 if (ctx->bserrno != 0) {
    6417           0 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6418             :                 } else {
    6419           6 :                         ctx->bserrno = bserrno;
    6420             :                 }
    6421             :         }
    6422             : 
    6423         346 :         switch (cpl->type) {
    6424         282 :         case SPDK_BS_CPL_TYPE_BLOBID:
    6425         282 :                 cpl->u.blobid.cb_fn(cpl->u.blobid.cb_arg, cpl->u.blobid.blobid, ctx->bserrno);
    6426         282 :                 break;
    6427          64 :         case SPDK_BS_CPL_TYPE_BLOB_BASIC:
    6428          64 :                 cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
    6429          64 :                 break;
    6430           0 :         default:
    6431           0 :                 SPDK_UNREACHABLE();
    6432             :                 break;
    6433             :         }
    6434             : 
    6435         346 :         free(ctx);
    6436         346 : }
    6437             : 
    6438             : static void
    6439         332 : bs_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
    6440             : {
    6441         332 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6442         332 :         struct spdk_blob *origblob = ctx->original.blob;
    6443             : 
    6444         332 :         if (bserrno != 0) {
    6445           0 :                 if (ctx->bserrno != 0) {
    6446           0 :                         SPDK_ERRLOG("Unfreeze error %d\n", bserrno);
    6447             :                 } else {
    6448           0 :                         ctx->bserrno = bserrno;
    6449             :                 }
    6450             :         }
    6451             : 
    6452         332 :         ctx->original.id = origblob->id;
    6453         332 :         origblob->locked_operation_in_progress = false;
    6454             : 
    6455             :         /* Revert md_ro to original state */
    6456         332 :         origblob->md_ro = ctx->original.md_ro;
    6457             : 
    6458         332 :         spdk_blob_close(origblob, bs_clone_snapshot_cleanup_finish, ctx);
    6459         332 : }
    6460             : 
    6461             : static void
    6462         332 : bs_clone_snapshot_origblob_cleanup(void *cb_arg, int bserrno)
    6463             : {
    6464         332 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6465         332 :         struct spdk_blob *origblob = ctx->original.blob;
    6466             : 
    6467         332 :         if (bserrno != 0) {
    6468          24 :                 if (ctx->bserrno != 0) {
    6469           4 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6470             :                 } else {
    6471          20 :                         ctx->bserrno = bserrno;
    6472             :                 }
    6473             :         }
    6474             : 
    6475         332 :         if (ctx->frozen) {
    6476             :                 /* Unfreeze any outstanding I/O */
    6477         212 :                 blob_unfreeze_io(origblob, bs_snapshot_unfreeze_cpl, ctx);
    6478             :         } else {
    6479         120 :                 bs_snapshot_unfreeze_cpl(ctx, 0);
    6480             :         }
    6481             : 
    6482         332 : }
    6483             : 
    6484             : static void
    6485           4 : bs_clone_snapshot_newblob_cleanup(struct spdk_clone_snapshot_ctx *ctx, int bserrno)
    6486             : {
    6487           4 :         struct spdk_blob *newblob = ctx->new.blob;
    6488             : 
    6489           4 :         if (bserrno != 0) {
    6490           4 :                 if (ctx->bserrno != 0) {
    6491           0 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6492             :                 } else {
    6493           4 :                         ctx->bserrno = bserrno;
    6494             :                 }
    6495             :         }
    6496             : 
    6497           4 :         ctx->new.id = newblob->id;
    6498           4 :         spdk_blob_close(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
    6499           4 : }
    6500             : 
    6501             : /* END blob_cleanup */
    6502             : 
    6503             : /* START spdk_bs_create_snapshot */
    6504             : 
    6505             : static void
    6506         220 : bs_snapshot_swap_cluster_maps(struct spdk_blob *blob1, struct spdk_blob *blob2)
    6507             : {
    6508             :         uint64_t *cluster_temp;
    6509             :         uint64_t num_allocated_clusters_temp;
    6510             :         uint32_t *extent_page_temp;
    6511             : 
    6512         220 :         cluster_temp = blob1->active.clusters;
    6513         220 :         blob1->active.clusters = blob2->active.clusters;
    6514         220 :         blob2->active.clusters = cluster_temp;
    6515             : 
    6516         220 :         num_allocated_clusters_temp = blob1->active.num_allocated_clusters;
    6517         220 :         blob1->active.num_allocated_clusters = blob2->active.num_allocated_clusters;
    6518         220 :         blob2->active.num_allocated_clusters = num_allocated_clusters_temp;
    6519             : 
    6520         220 :         extent_page_temp = blob1->active.extent_pages;
    6521         220 :         blob1->active.extent_pages = blob2->active.extent_pages;
    6522         220 :         blob2->active.extent_pages = extent_page_temp;
    6523         220 : }
    6524             : 
    6525             : /* Copies an internal xattr */
    6526             : static int
    6527          28 : bs_snapshot_copy_xattr(struct spdk_blob *toblob, struct spdk_blob *fromblob, const char *name)
    6528             : {
    6529          28 :         const void      *val = NULL;
    6530          28 :         size_t          len;
    6531             :         int             bserrno;
    6532             : 
    6533          28 :         bserrno = blob_get_xattr_value(fromblob, name, &val, &len, true);
    6534          28 :         if (bserrno != 0) {
    6535           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " missing %s XATTR\n", fromblob->id, name);
    6536           0 :                 return bserrno;
    6537             :         }
    6538             : 
    6539          28 :         bserrno = blob_set_xattr(toblob, name, val, len, true);
    6540          28 :         if (bserrno != 0) {
    6541           0 :                 SPDK_ERRLOG("could not set %s XATTR on blob 0x%" PRIx64 "\n",
    6542             :                             name, toblob->id);
    6543           0 :                 return bserrno;
    6544             :         }
    6545          28 :         return 0;
    6546             : }
    6547             : 
    6548             : static void
    6549         208 : bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno)
    6550             : {
    6551         208 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6552         208 :         struct spdk_blob *origblob = ctx->original.blob;
    6553         208 :         struct spdk_blob *newblob = ctx->new.blob;
    6554             : 
    6555         208 :         if (bserrno != 0) {
    6556           4 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6557           4 :                 if (blob_is_esnap_clone(newblob)) {
    6558           0 :                         bs_snapshot_copy_xattr(origblob, newblob, BLOB_EXTERNAL_SNAPSHOT_ID);
    6559           0 :                         origblob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6560             :                 }
    6561           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6562           4 :                 return;
    6563             :         }
    6564             : 
    6565             :         /* Remove metadata descriptor SNAPSHOT_IN_PROGRESS */
    6566         204 :         bserrno = blob_remove_xattr(newblob, SNAPSHOT_IN_PROGRESS, true);
    6567         204 :         if (bserrno != 0) {
    6568           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6569           0 :                 return;
    6570             :         }
    6571             : 
    6572         204 :         bs_blob_list_add(ctx->original.blob);
    6573             : 
    6574         204 :         spdk_blob_set_read_only(newblob);
    6575             : 
    6576             :         /* sync snapshot metadata */
    6577         204 :         spdk_blob_sync_md(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
    6578             : }
    6579             : 
    6580             : static void
    6581         212 : bs_snapshot_newblob_sync_cpl(void *cb_arg, int bserrno)
    6582             : {
    6583         212 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6584         212 :         struct spdk_blob *origblob = ctx->original.blob;
    6585         212 :         struct spdk_blob *newblob = ctx->new.blob;
    6586             : 
    6587         212 :         if (bserrno != 0) {
    6588             :                 /* return cluster map back to original */
    6589           4 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6590             : 
    6591             :                 /* Newblob md sync failed. Valid clusters are only present in origblob.
    6592             :                  * Since I/O is frozen on origblob, not changes to zeroed out cluster map should have occurred.
    6593             :                  * Newblob needs to be reverted to thin_provisioned state at creation to properly close. */
    6594           4 :                 blob_set_thin_provision(newblob);
    6595           4 :                 assert(spdk_mem_all_zero(newblob->active.clusters,
    6596             :                                          newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
    6597           4 :                 assert(spdk_mem_all_zero(newblob->active.extent_pages,
    6598             :                                          newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
    6599             : 
    6600           4 :                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6601           4 :                 return;
    6602             :         }
    6603             : 
    6604             :         /* Set internal xattr for snapshot id */
    6605         208 :         bserrno = blob_set_xattr(origblob, BLOB_SNAPSHOT, &newblob->id, sizeof(spdk_blob_id), true);
    6606         208 :         if (bserrno != 0) {
    6607             :                 /* return cluster map back to original */
    6608           0 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6609           0 :                 blob_set_thin_provision(newblob);
    6610           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6611           0 :                 return;
    6612             :         }
    6613             : 
    6614             :         /* Create new back_bs_dev for snapshot */
    6615         208 :         origblob->back_bs_dev = bs_create_blob_bs_dev(newblob);
    6616         208 :         if (origblob->back_bs_dev == NULL) {
    6617             :                 /* return cluster map back to original */
    6618           0 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6619           0 :                 blob_set_thin_provision(newblob);
    6620           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, -EINVAL);
    6621           0 :                 return;
    6622             :         }
    6623             : 
    6624             :         /* Remove the xattr that references an external snapshot */
    6625         208 :         if (blob_is_esnap_clone(origblob)) {
    6626          16 :                 origblob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6627          16 :                 bserrno = blob_remove_xattr(origblob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    6628          16 :                 if (bserrno != 0) {
    6629           0 :                         if (bserrno == -ENOENT) {
    6630           0 :                                 SPDK_ERRLOG("blob 0x%" PRIx64 " has no " BLOB_EXTERNAL_SNAPSHOT_ID
    6631             :                                             " xattr to remove\n", origblob->id);
    6632           0 :                                 assert(false);
    6633             :                         } else {
    6634             :                                 /* return cluster map back to original */
    6635           0 :                                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6636           0 :                                 blob_set_thin_provision(newblob);
    6637           0 :                                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6638           0 :                                 return;
    6639             :                         }
    6640             :                 }
    6641             :         }
    6642             : 
    6643         208 :         bs_blob_list_remove(origblob);
    6644         208 :         origblob->parent_id = newblob->id;
    6645             :         /* set clone blob as thin provisioned */
    6646         208 :         blob_set_thin_provision(origblob);
    6647             : 
    6648         208 :         bs_blob_list_add(newblob);
    6649             : 
    6650             :         /* sync clone metadata */
    6651         208 :         spdk_blob_sync_md(origblob, bs_snapshot_origblob_sync_cpl, ctx);
    6652             : }
    6653             : 
    6654             : static void
    6655         212 : bs_snapshot_freeze_cpl(void *cb_arg, int rc)
    6656             : {
    6657         212 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6658         212 :         struct spdk_blob *origblob = ctx->original.blob;
    6659         212 :         struct spdk_blob *newblob = ctx->new.blob;
    6660             :         int bserrno;
    6661             : 
    6662         212 :         if (rc != 0) {
    6663           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, rc);
    6664           0 :                 return;
    6665             :         }
    6666             : 
    6667         212 :         ctx->frozen = true;
    6668             : 
    6669         212 :         if (blob_is_esnap_clone(origblob)) {
    6670             :                 /* Clean up any channels associated with the original blob id because future IO will
    6671             :                  * perform IO using the snapshot blob_id.
    6672             :                  */
    6673          16 :                 blob_esnap_destroy_bs_dev_channels(origblob, false, NULL, NULL);
    6674             :         }
    6675         212 :         if (newblob->back_bs_dev) {
    6676         212 :                 blob_back_bs_destroy(newblob);
    6677             :         }
    6678             :         /* set new back_bs_dev for snapshot */
    6679         212 :         newblob->back_bs_dev = origblob->back_bs_dev;
    6680             :         /* Set invalid flags from origblob */
    6681         212 :         newblob->invalid_flags = origblob->invalid_flags;
    6682             : 
    6683             :         /* inherit parent from original blob if set */
    6684         212 :         newblob->parent_id = origblob->parent_id;
    6685         212 :         switch (origblob->parent_id) {
    6686          16 :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    6687          16 :                 bserrno = bs_snapshot_copy_xattr(newblob, origblob, BLOB_EXTERNAL_SNAPSHOT_ID);
    6688          16 :                 if (bserrno != 0) {
    6689           0 :                         bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6690           0 :                         return;
    6691             :                 }
    6692          16 :                 break;
    6693         144 :         case SPDK_BLOBID_INVALID:
    6694         144 :                 break;
    6695          52 :         default:
    6696             :                 /* Set internal xattr for snapshot id */
    6697          52 :                 bserrno = blob_set_xattr(newblob, BLOB_SNAPSHOT,
    6698          52 :                                          &origblob->parent_id, sizeof(spdk_blob_id), true);
    6699          52 :                 if (bserrno != 0) {
    6700           0 :                         bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6701           0 :                         return;
    6702             :                 }
    6703             :         }
    6704             : 
    6705             :         /* swap cluster maps */
    6706         212 :         bs_snapshot_swap_cluster_maps(newblob, origblob);
    6707             : 
    6708             :         /* Set the clear method on the new blob to match the original. */
    6709         212 :         blob_set_clear_method(newblob, origblob->clear_method);
    6710             : 
    6711             :         /* sync snapshot metadata */
    6712         212 :         spdk_blob_sync_md(newblob, bs_snapshot_newblob_sync_cpl, ctx);
    6713             : }
    6714             : 
    6715             : static void
    6716         216 : bs_snapshot_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6717             : {
    6718         216 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6719         216 :         struct spdk_blob *origblob = ctx->original.blob;
    6720         216 :         struct spdk_blob *newblob = _blob;
    6721             : 
    6722         216 :         if (bserrno != 0) {
    6723           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6724           4 :                 return;
    6725             :         }
    6726             : 
    6727         212 :         ctx->new.blob = newblob;
    6728         212 :         assert(spdk_blob_is_thin_provisioned(newblob));
    6729         212 :         assert(spdk_mem_all_zero(newblob->active.clusters,
    6730             :                                  newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
    6731         212 :         assert(spdk_mem_all_zero(newblob->active.extent_pages,
    6732             :                                  newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
    6733             : 
    6734         212 :         blob_freeze_io(origblob, bs_snapshot_freeze_cpl, ctx);
    6735             : }
    6736             : 
    6737             : static void
    6738         220 : bs_snapshot_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
    6739             : {
    6740         220 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6741         220 :         struct spdk_blob *origblob = ctx->original.blob;
    6742             : 
    6743         220 :         if (bserrno != 0) {
    6744           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6745           4 :                 return;
    6746             :         }
    6747             : 
    6748         216 :         ctx->new.id = blobid;
    6749         216 :         ctx->cpl.u.blobid.blobid = blobid;
    6750             : 
    6751         216 :         spdk_bs_open_blob(origblob->bs, ctx->new.id, bs_snapshot_newblob_open_cpl, ctx);
    6752             : }
    6753             : 
    6754             : 
    6755             : static void
    6756         220 : bs_xattr_snapshot(void *arg, const char *name,
    6757             :                   const void **value, size_t *value_len)
    6758             : {
    6759         220 :         assert(strncmp(name, SNAPSHOT_IN_PROGRESS, sizeof(SNAPSHOT_IN_PROGRESS)) == 0);
    6760             : 
    6761         220 :         struct spdk_blob *blob = (struct spdk_blob *)arg;
    6762         220 :         *value = &blob->id;
    6763         220 :         *value_len = sizeof(blob->id);
    6764         220 : }
    6765             : 
    6766             : static void
    6767         230 : bs_snapshot_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6768             : {
    6769         230 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6770         230 :         struct spdk_blob_opts opts;
    6771         230 :         struct spdk_blob_xattr_opts internal_xattrs;
    6772         230 :         char *xattrs_names[] = { SNAPSHOT_IN_PROGRESS };
    6773             : 
    6774         230 :         if (bserrno != 0) {
    6775           6 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    6776           6 :                 return;
    6777             :         }
    6778             : 
    6779         224 :         ctx->original.blob = _blob;
    6780             : 
    6781         224 :         if (_blob->data_ro || _blob->md_ro) {
    6782           4 :                 SPDK_DEBUGLOG(blob, "Cannot create snapshot from read only blob with id 0x%"
    6783             :                               PRIx64 "\n", _blob->id);
    6784           4 :                 ctx->bserrno = -EINVAL;
    6785           4 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6786           4 :                 return;
    6787             :         }
    6788             : 
    6789         220 :         if (_blob->locked_operation_in_progress) {
    6790           0 :                 SPDK_DEBUGLOG(blob, "Cannot create snapshot - another operation in progress\n");
    6791           0 :                 ctx->bserrno = -EBUSY;
    6792           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6793           0 :                 return;
    6794             :         }
    6795             : 
    6796         220 :         _blob->locked_operation_in_progress = true;
    6797             : 
    6798         220 :         spdk_blob_opts_init(&opts, sizeof(opts));
    6799         220 :         blob_xattrs_init(&internal_xattrs);
    6800             : 
    6801             :         /* Change the size of new blob to the same as in original blob,
    6802             :          * but do not allocate clusters */
    6803         220 :         opts.thin_provision = true;
    6804         220 :         opts.num_clusters = spdk_blob_get_num_clusters(_blob);
    6805         220 :         opts.use_extent_table = _blob->use_extent_table;
    6806             : 
    6807             :         /* If there are any xattrs specified for snapshot, set them now */
    6808         220 :         if (ctx->xattrs) {
    6809           4 :                 memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
    6810             :         }
    6811             :         /* Set internal xattr SNAPSHOT_IN_PROGRESS */
    6812         220 :         internal_xattrs.count = 1;
    6813         220 :         internal_xattrs.ctx = _blob;
    6814         220 :         internal_xattrs.names = xattrs_names;
    6815         220 :         internal_xattrs.get_value = bs_xattr_snapshot;
    6816             : 
    6817         220 :         bs_create_blob(_blob->bs, &opts, &internal_xattrs,
    6818             :                        bs_snapshot_newblob_create_cpl, ctx);
    6819             : }
    6820             : 
    6821             : void
    6822         230 : spdk_bs_create_snapshot(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6823             :                         const struct spdk_blob_xattr_opts *snapshot_xattrs,
    6824             :                         spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6825             : {
    6826         230 :         struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
    6827             : 
    6828         230 :         if (!ctx) {
    6829           0 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
    6830           0 :                 return;
    6831             :         }
    6832         230 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6833         230 :         ctx->cpl.u.blobid.cb_fn = cb_fn;
    6834         230 :         ctx->cpl.u.blobid.cb_arg = cb_arg;
    6835         230 :         ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
    6836         230 :         ctx->bserrno = 0;
    6837         230 :         ctx->frozen = false;
    6838         230 :         ctx->original.id = blobid;
    6839         230 :         ctx->xattrs = snapshot_xattrs;
    6840             : 
    6841         230 :         spdk_bs_open_blob(bs, ctx->original.id, bs_snapshot_origblob_open_cpl, ctx);
    6842             : }
    6843             : /* END spdk_bs_create_snapshot */
    6844             : 
    6845             : /* START spdk_bs_create_clone */
    6846             : 
    6847             : static void
    6848          48 : bs_xattr_clone(void *arg, const char *name,
    6849             :                const void **value, size_t *value_len)
    6850             : {
    6851          48 :         assert(strncmp(name, BLOB_SNAPSHOT, sizeof(BLOB_SNAPSHOT)) == 0);
    6852             : 
    6853          48 :         struct spdk_blob *blob = (struct spdk_blob *)arg;
    6854          48 :         *value = &blob->id;
    6855          48 :         *value_len = sizeof(blob->id);
    6856          48 : }
    6857             : 
    6858             : static void
    6859          48 : bs_clone_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6860             : {
    6861          48 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6862          48 :         struct spdk_blob *clone = _blob;
    6863             : 
    6864          48 :         ctx->new.blob = clone;
    6865          48 :         bs_blob_list_add(clone);
    6866             : 
    6867          48 :         spdk_blob_close(clone, bs_clone_snapshot_origblob_cleanup, ctx);
    6868          48 : }
    6869             : 
    6870             : static void
    6871          48 : bs_clone_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
    6872             : {
    6873          48 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6874             : 
    6875          48 :         ctx->cpl.u.blobid.blobid = blobid;
    6876          48 :         spdk_bs_open_blob(ctx->original.blob->bs, blobid, bs_clone_newblob_open_cpl, ctx);
    6877          48 : }
    6878             : 
    6879             : static void
    6880          52 : bs_clone_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6881             : {
    6882          52 :         struct spdk_clone_snapshot_ctx  *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6883          52 :         struct spdk_blob_opts           opts;
    6884          52 :         struct spdk_blob_xattr_opts internal_xattrs;
    6885          52 :         char *xattr_names[] = { BLOB_SNAPSHOT };
    6886             : 
    6887          52 :         if (bserrno != 0) {
    6888           0 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    6889           0 :                 return;
    6890             :         }
    6891             : 
    6892          52 :         ctx->original.blob = _blob;
    6893          52 :         ctx->original.md_ro = _blob->md_ro;
    6894             : 
    6895          52 :         if (!_blob->data_ro || !_blob->md_ro) {
    6896           4 :                 SPDK_DEBUGLOG(blob, "Clone not from read-only blob\n");
    6897           4 :                 ctx->bserrno = -EINVAL;
    6898           4 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6899           4 :                 return;
    6900             :         }
    6901             : 
    6902          48 :         if (_blob->locked_operation_in_progress) {
    6903           0 :                 SPDK_DEBUGLOG(blob, "Cannot create clone - another operation in progress\n");
    6904           0 :                 ctx->bserrno = -EBUSY;
    6905           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6906           0 :                 return;
    6907             :         }
    6908             : 
    6909          48 :         _blob->locked_operation_in_progress = true;
    6910             : 
    6911          48 :         spdk_blob_opts_init(&opts, sizeof(opts));
    6912          48 :         blob_xattrs_init(&internal_xattrs);
    6913             : 
    6914          48 :         opts.thin_provision = true;
    6915          48 :         opts.num_clusters = spdk_blob_get_num_clusters(_blob);
    6916          48 :         opts.use_extent_table = _blob->use_extent_table;
    6917          48 :         if (ctx->xattrs) {
    6918           4 :                 memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
    6919             :         }
    6920             : 
    6921             :         /* Set internal xattr BLOB_SNAPSHOT */
    6922          48 :         internal_xattrs.count = 1;
    6923          48 :         internal_xattrs.ctx = _blob;
    6924          48 :         internal_xattrs.names = xattr_names;
    6925          48 :         internal_xattrs.get_value = bs_xattr_clone;
    6926             : 
    6927          48 :         bs_create_blob(_blob->bs, &opts, &internal_xattrs,
    6928             :                        bs_clone_newblob_create_cpl, ctx);
    6929             : }
    6930             : 
    6931             : void
    6932          52 : spdk_bs_create_clone(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6933             :                      const struct spdk_blob_xattr_opts *clone_xattrs,
    6934             :                      spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6935             : {
    6936          52 :         struct spdk_clone_snapshot_ctx  *ctx = calloc(1, sizeof(*ctx));
    6937             : 
    6938          52 :         if (!ctx) {
    6939           0 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
    6940           0 :                 return;
    6941             :         }
    6942             : 
    6943          52 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6944          52 :         ctx->cpl.u.blobid.cb_fn = cb_fn;
    6945          52 :         ctx->cpl.u.blobid.cb_arg = cb_arg;
    6946          52 :         ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
    6947          52 :         ctx->bserrno = 0;
    6948          52 :         ctx->xattrs = clone_xattrs;
    6949          52 :         ctx->original.id = blobid;
    6950             : 
    6951          52 :         spdk_bs_open_blob(bs, ctx->original.id, bs_clone_origblob_open_cpl, ctx);
    6952             : }
    6953             : 
    6954             : /* END spdk_bs_create_clone */
    6955             : 
    6956             : /* START spdk_bs_inflate_blob */
    6957             : 
    6958             : static void
    6959          12 : bs_inflate_blob_set_parent_cpl(void *cb_arg, struct spdk_blob *_parent, int bserrno)
    6960             : {
    6961          12 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6962          12 :         struct spdk_blob *_blob = ctx->original.blob;
    6963             : 
    6964          12 :         if (bserrno != 0) {
    6965           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6966           0 :                 return;
    6967             :         }
    6968             : 
    6969             :         /* Temporarily override md_ro flag for MD modification */
    6970          12 :         _blob->md_ro = false;
    6971             : 
    6972          12 :         bserrno = blob_set_xattr(_blob, BLOB_SNAPSHOT, &_parent->id, sizeof(spdk_blob_id), true);
    6973          12 :         if (bserrno != 0) {
    6974           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6975           0 :                 return;
    6976             :         }
    6977             : 
    6978          12 :         assert(_parent != NULL);
    6979             : 
    6980          12 :         bs_blob_list_remove(_blob);
    6981          12 :         _blob->parent_id = _parent->id;
    6982             : 
    6983          12 :         blob_back_bs_destroy(_blob);
    6984          12 :         _blob->back_bs_dev = bs_create_blob_bs_dev(_parent);
    6985          12 :         bs_blob_list_add(_blob);
    6986             : 
    6987          12 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    6988             : }
    6989             : 
    6990             : static void
    6991           4 : bs_inflate_blob_set_esnap_refs(struct spdk_clone_snapshot_ctx *ctx)
    6992             : {
    6993           4 :         struct spdk_blob *_blob = ctx->original.blob;
    6994           4 :         struct spdk_blob *_parent = ((struct spdk_blob_bs_dev *)(_blob->back_bs_dev))->blob;
    6995             :         int bserrno;
    6996             : 
    6997           4 :         assert(_parent != NULL);
    6998           4 :         assert(_parent->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT);
    6999             : 
    7000             :         /* Temporarily override md_ro flag for MD modification */
    7001           4 :         _blob->md_ro = false;
    7002             : 
    7003           4 :         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    7004           4 :         bserrno = bs_snapshot_copy_xattr(_blob, _parent, BLOB_EXTERNAL_SNAPSHOT_ID);
    7005           4 :         if (bserrno != 0) {
    7006           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    7007           0 :                 return;
    7008             :         }
    7009             : 
    7010           4 :         bs_blob_list_remove(_blob);
    7011             : 
    7012           4 :         _blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7013           4 :         _blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    7014             : 
    7015           4 :         blob_back_bs_destroy(_blob);
    7016           4 :         _blob->back_bs_dev = _parent->back_bs_dev;
    7017             : 
    7018           4 :         LIST_INSERT_AFTER(_parent, _blob, back_bs_dev_link);
    7019             : 
    7020           4 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    7021             : }
    7022             : 
    7023             : static void
    7024          60 : bs_inflate_blob_done(struct spdk_clone_snapshot_ctx *ctx)
    7025             : {
    7026          60 :         struct spdk_blob *_blob = ctx->original.blob;
    7027             :         struct spdk_blob *_parent;
    7028             : 
    7029          60 :         if (ctx->allocate_all) {
    7030             :                 /* remove thin provisioning */
    7031          32 :                 bs_blob_list_remove(_blob);
    7032          32 :                 if (_blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    7033           8 :                         blob_remove_xattr(_blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    7034           8 :                         _blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7035             :                 } else {
    7036          24 :                         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    7037             :                 }
    7038          32 :                 _blob->invalid_flags = _blob->invalid_flags & ~SPDK_BLOB_THIN_PROV;
    7039          32 :                 blob_back_bs_destroy(_blob);
    7040          32 :                 _blob->parent_id = SPDK_BLOBID_INVALID;
    7041             :         } else {
    7042             :                 /* For now, esnap clones always have allocate_all set. */
    7043          28 :                 assert(!blob_is_esnap_clone(_blob));
    7044             : 
    7045          28 :                 _parent = ((struct spdk_blob_bs_dev *)(_blob->back_bs_dev))->blob;
    7046          28 :                 switch (_parent->parent_id) {
    7047          12 :                 case SPDK_BLOBID_INVALID:
    7048          12 :                         bs_blob_list_remove(_blob);
    7049          12 :                         _blob->parent_id = SPDK_BLOBID_INVALID;
    7050          12 :                         blob_back_bs_destroy(_blob);
    7051          12 :                         _blob->back_bs_dev = bs_create_zeroes_dev();
    7052          12 :                         break;
    7053           4 :                 case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    7054           4 :                         bs_inflate_blob_set_esnap_refs(ctx);
    7055           4 :                         return;
    7056          12 :                 default:
    7057             :                         /* We must change the parent of the inflated blob */
    7058          12 :                         spdk_bs_open_blob(_blob->bs, _parent->parent_id,
    7059             :                                           bs_inflate_blob_set_parent_cpl, ctx);
    7060          12 :                         return;
    7061             :                 }
    7062             :         }
    7063             : 
    7064             :         /* Temporarily override md_ro flag for MD modification */
    7065          44 :         _blob->md_ro = false;
    7066          44 :         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    7067          44 :         _blob->state = SPDK_BLOB_STATE_DIRTY;
    7068             : 
    7069          44 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    7070             : }
    7071             : 
    7072             : /* Check if cluster needs allocation */
    7073             : static inline bool
    7074        1280 : bs_cluster_needs_allocation(struct spdk_blob *blob, uint64_t cluster, bool allocate_all)
    7075             : {
    7076             :         struct spdk_blob_bs_dev *b;
    7077             : 
    7078        1280 :         assert(blob != NULL);
    7079             : 
    7080        1280 :         if (blob->active.clusters[cluster] != 0) {
    7081             :                 /* Cluster is already allocated */
    7082          32 :                 return false;
    7083             :         }
    7084             : 
    7085        1248 :         if (blob->parent_id == SPDK_BLOBID_INVALID) {
    7086             :                 /* Blob have no parent blob */
    7087          80 :                 return allocate_all;
    7088             :         }
    7089             : 
    7090        1168 :         if (blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    7091          64 :                 return true;
    7092             :         }
    7093             : 
    7094        1104 :         b = (struct spdk_blob_bs_dev *)blob->back_bs_dev;
    7095        1104 :         return (allocate_all || b->blob->active.clusters[cluster] != 0);
    7096             : }
    7097             : 
    7098             : static void
    7099         512 : bs_inflate_blob_touch_next(void *cb_arg, int bserrno)
    7100             : {
    7101         512 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    7102         512 :         struct spdk_blob *_blob = ctx->original.blob;
    7103         512 :         struct spdk_bs_cpl cpl;
    7104             :         spdk_bs_user_op_t *op;
    7105             :         uint64_t offset;
    7106             : 
    7107         512 :         if (bserrno != 0) {
    7108           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    7109           0 :                 return;
    7110             :         }
    7111             : 
    7112         700 :         for (; ctx->cluster < _blob->active.num_clusters; ctx->cluster++) {
    7113         640 :                 if (bs_cluster_needs_allocation(_blob, ctx->cluster, ctx->allocate_all)) {
    7114         452 :                         break;
    7115             :                 }
    7116             :         }
    7117             : 
    7118         512 :         if (ctx->cluster < _blob->active.num_clusters) {
    7119         452 :                 offset = bs_cluster_to_lba(_blob->bs, ctx->cluster);
    7120             : 
    7121             :                 /* We may safely increment a cluster before copying */
    7122         452 :                 ctx->cluster++;
    7123             : 
    7124             :                 /* Use a dummy 0B read as a context for cluster copy */
    7125         452 :                 cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7126         452 :                 cpl.u.blob_basic.cb_fn = bs_inflate_blob_touch_next;
    7127         452 :                 cpl.u.blob_basic.cb_arg = ctx;
    7128             : 
    7129         452 :                 op = bs_user_op_alloc(ctx->channel, &cpl, SPDK_BLOB_READ, _blob,
    7130             :                                       NULL, 0, offset, 0);
    7131         452 :                 if (!op) {
    7132           0 :                         bs_clone_snapshot_origblob_cleanup(ctx, -ENOMEM);
    7133           0 :                         return;
    7134             :                 }
    7135             : 
    7136         452 :                 bs_allocate_and_copy_cluster(_blob, ctx->channel, offset, op);
    7137             :         } else {
    7138          60 :                 bs_inflate_blob_done(ctx);
    7139             :         }
    7140             : }
    7141             : 
    7142             : static void
    7143          64 : bs_inflate_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    7144             : {
    7145          64 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    7146             :         uint64_t clusters_needed;
    7147             :         uint64_t i;
    7148             : 
    7149          64 :         if (bserrno != 0) {
    7150           0 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    7151           0 :                 return;
    7152             :         }
    7153             : 
    7154          64 :         ctx->original.blob = _blob;
    7155          64 :         ctx->original.md_ro = _blob->md_ro;
    7156             : 
    7157          64 :         if (_blob->locked_operation_in_progress) {
    7158           0 :                 SPDK_DEBUGLOG(blob, "Cannot inflate blob - another operation in progress\n");
    7159           0 :                 ctx->bserrno = -EBUSY;
    7160           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    7161           0 :                 return;
    7162             :         }
    7163             : 
    7164          64 :         _blob->locked_operation_in_progress = true;
    7165             : 
    7166          64 :         switch (_blob->parent_id) {
    7167           8 :         case SPDK_BLOBID_INVALID:
    7168           8 :                 if (!ctx->allocate_all) {
    7169             :                         /* This blob has no parent, so we cannot decouple it. */
    7170           4 :                         SPDK_ERRLOG("Cannot decouple parent of blob with no parent.\n");
    7171           4 :                         bs_clone_snapshot_origblob_cleanup(ctx, -EINVAL);
    7172           4 :                         return;
    7173             :                 }
    7174           4 :                 break;
    7175           8 :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    7176             :                 /*
    7177             :                  * It would be better to rely on back_bs_dev->is_zeroes(), to determine which
    7178             :                  * clusters require allocation. Until there is a blobstore consumer that
    7179             :                  * uses esnaps with an spdk_bs_dev that implements a useful is_zeroes() it is not
    7180             :                  * worth the effort.
    7181             :                  */
    7182           8 :                 ctx->allocate_all = true;
    7183           8 :                 break;
    7184          48 :         default:
    7185          48 :                 break;
    7186             :         }
    7187             : 
    7188          60 :         if (spdk_blob_is_thin_provisioned(_blob) == false) {
    7189             :                 /* This is not thin provisioned blob. No need to inflate. */
    7190           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, 0);
    7191           0 :                 return;
    7192             :         }
    7193             : 
    7194             :         /* Do two passes - one to verify that we can obtain enough clusters
    7195             :          * and another to actually claim them.
    7196             :          */
    7197          60 :         clusters_needed = 0;
    7198         700 :         for (i = 0; i < _blob->active.num_clusters; i++) {
    7199         640 :                 if (bs_cluster_needs_allocation(_blob, i, ctx->allocate_all)) {
    7200         452 :                         clusters_needed++;
    7201             :                 }
    7202             :         }
    7203             : 
    7204          60 :         if (clusters_needed > _blob->bs->num_free_clusters) {
    7205             :                 /* Not enough free clusters. Cannot satisfy the request. */
    7206           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, -ENOSPC);
    7207           0 :                 return;
    7208             :         }
    7209             : 
    7210          60 :         ctx->cluster = 0;
    7211          60 :         bs_inflate_blob_touch_next(ctx, 0);
    7212             : }
    7213             : 
    7214             : static void
    7215          64 : bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7216             :                 spdk_blob_id blobid, bool allocate_all, spdk_blob_op_complete cb_fn, void *cb_arg)
    7217             : {
    7218          64 :         struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
    7219             : 
    7220          64 :         if (!ctx) {
    7221           0 :                 cb_fn(cb_arg, -ENOMEM);
    7222           0 :                 return;
    7223             :         }
    7224          64 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7225          64 :         ctx->cpl.u.bs_basic.cb_fn = cb_fn;
    7226          64 :         ctx->cpl.u.bs_basic.cb_arg = cb_arg;
    7227          64 :         ctx->bserrno = 0;
    7228          64 :         ctx->original.id = blobid;
    7229          64 :         ctx->channel = channel;
    7230          64 :         ctx->allocate_all = allocate_all;
    7231             : 
    7232          64 :         spdk_bs_open_blob(bs, ctx->original.id, bs_inflate_blob_open_cpl, ctx);
    7233             : }
    7234             : 
    7235             : void
    7236          28 : spdk_bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7237             :                      spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
    7238             : {
    7239          28 :         bs_inflate_blob(bs, channel, blobid, true, cb_fn, cb_arg);
    7240          28 : }
    7241             : 
    7242             : void
    7243          36 : spdk_bs_blob_decouple_parent(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7244             :                              spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
    7245             : {
    7246          36 :         bs_inflate_blob(bs, channel, blobid, false, cb_fn, cb_arg);
    7247          36 : }
    7248             : /* END spdk_bs_inflate_blob */
    7249             : 
    7250             : /* START spdk_bs_blob_shallow_copy */
    7251             : 
    7252             : struct shallow_copy_ctx {
    7253             :         struct spdk_bs_cpl cpl;
    7254             :         int bserrno;
    7255             : 
    7256             :         /* Blob source for copy */
    7257             :         struct spdk_blob_store *bs;
    7258             :         spdk_blob_id blobid;
    7259             :         struct spdk_blob *blob;
    7260             :         struct spdk_io_channel *blob_channel;
    7261             : 
    7262             :         /* Destination device for copy */
    7263             :         struct spdk_bs_dev *ext_dev;
    7264             :         struct spdk_io_channel *ext_channel;
    7265             : 
    7266             :         /* Current cluster for copy operation */
    7267             :         uint64_t cluster;
    7268             : 
    7269             :         /* Buffer for blob reading */
    7270             :         uint8_t *read_buff;
    7271             : 
    7272             :         /* Struct for external device writing */
    7273             :         struct spdk_bs_dev_cb_args ext_args;
    7274             : 
    7275             :         /* Actual number of copied clusters */
    7276             :         uint64_t copied_clusters_count;
    7277             : 
    7278             :         /* Status callback for updates about the ongoing operation */
    7279             :         spdk_blob_shallow_copy_status status_cb;
    7280             : 
    7281             :         /* Argument passed to function status_cb */
    7282             :         void *status_cb_arg;
    7283             : };
    7284             : 
    7285             : static void
    7286          16 : bs_shallow_copy_cleanup_finish(void *cb_arg, int bserrno)
    7287             : {
    7288          16 :         struct shallow_copy_ctx *ctx = cb_arg;
    7289          16 :         struct spdk_bs_cpl *cpl = &ctx->cpl;
    7290             : 
    7291          16 :         if (bserrno != 0) {
    7292           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, cleanup error %d\n", ctx->blob->id, bserrno);
    7293           0 :                 ctx->bserrno = bserrno;
    7294             :         }
    7295             : 
    7296          16 :         ctx->ext_dev->destroy_channel(ctx->ext_dev, ctx->ext_channel);
    7297          16 :         spdk_free(ctx->read_buff);
    7298             : 
    7299          16 :         cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
    7300             : 
    7301          16 :         free(ctx);
    7302          16 : }
    7303             : 
    7304             : static void
    7305           8 : bs_shallow_copy_bdev_write_cpl(struct spdk_io_channel *channel, void *cb_arg, int bserrno)
    7306             : {
    7307           8 :         struct shallow_copy_ctx *ctx = cb_arg;
    7308           8 :         struct spdk_blob *_blob = ctx->blob;
    7309             : 
    7310           8 :         if (bserrno != 0) {
    7311           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, ext dev write error %d\n", ctx->blob->id, bserrno);
    7312           0 :                 ctx->bserrno = bserrno;
    7313           0 :                 _blob->locked_operation_in_progress = false;
    7314           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7315           0 :                 return;
    7316             :         }
    7317             : 
    7318           8 :         ctx->cluster++;
    7319           8 :         if (ctx->status_cb) {
    7320           8 :                 ctx->copied_clusters_count++;
    7321           8 :                 ctx->status_cb(ctx->copied_clusters_count, ctx->status_cb_arg);
    7322             :         }
    7323             : 
    7324           8 :         bs_shallow_copy_cluster_find_next(ctx);
    7325             : }
    7326             : 
    7327             : static void
    7328           8 : bs_shallow_copy_blob_read_cpl(void *cb_arg, int bserrno)
    7329             : {
    7330           8 :         struct shallow_copy_ctx *ctx = cb_arg;
    7331           8 :         struct spdk_bs_dev *ext_dev = ctx->ext_dev;
    7332           8 :         struct spdk_blob *_blob = ctx->blob;
    7333             : 
    7334           8 :         if (bserrno != 0) {
    7335           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob read error %d\n", ctx->blob->id, bserrno);
    7336           0 :                 ctx->bserrno = bserrno;
    7337           0 :                 _blob->locked_operation_in_progress = false;
    7338           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7339           0 :                 return;
    7340             :         }
    7341             : 
    7342           8 :         ctx->ext_args.channel = ctx->ext_channel;
    7343           8 :         ctx->ext_args.cb_fn = bs_shallow_copy_bdev_write_cpl;
    7344           8 :         ctx->ext_args.cb_arg = ctx;
    7345             : 
    7346           8 :         ext_dev->write(ext_dev, ctx->ext_channel, ctx->read_buff,
    7347           8 :                        bs_cluster_to_lba(_blob->bs, ctx->cluster),
    7348           8 :                        bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
    7349             :                        &ctx->ext_args);
    7350             : }
    7351             : 
    7352             : static void
    7353          12 : bs_shallow_copy_cluster_find_next(void *cb_arg)
    7354             : {
    7355          12 :         struct shallow_copy_ctx *ctx = cb_arg;
    7356          12 :         struct spdk_blob *_blob = ctx->blob;
    7357             : 
    7358          20 :         while (ctx->cluster < _blob->active.num_clusters) {
    7359          16 :                 if (_blob->active.clusters[ctx->cluster] != 0) {
    7360           8 :                         break;
    7361             :                 }
    7362             : 
    7363           8 :                 ctx->cluster++;
    7364             :         }
    7365             : 
    7366          12 :         if (ctx->cluster < _blob->active.num_clusters) {
    7367           8 :                 blob_request_submit_op_single(ctx->blob_channel, _blob, ctx->read_buff,
    7368           8 :                                               bs_cluster_to_lba(_blob->bs, ctx->cluster),
    7369           8 :                                               bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
    7370             :                                               bs_shallow_copy_blob_read_cpl, ctx, SPDK_BLOB_READ);
    7371             :         } else {
    7372           4 :                 _blob->locked_operation_in_progress = false;
    7373           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7374             :         }
    7375          12 : }
    7376             : 
    7377             : static void
    7378          16 : bs_shallow_copy_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    7379             : {
    7380          16 :         struct shallow_copy_ctx *ctx = cb_arg;
    7381          16 :         struct spdk_bs_dev *ext_dev = ctx->ext_dev;
    7382             :         uint32_t blob_block_size;
    7383             :         uint64_t blob_total_size;
    7384             : 
    7385          16 :         if (bserrno != 0) {
    7386           0 :                 SPDK_ERRLOG("Shallow copy blob open error %d\n", bserrno);
    7387           0 :                 ctx->bserrno = bserrno;
    7388           0 :                 bs_shallow_copy_cleanup_finish(ctx, 0);
    7389           0 :                 return;
    7390             :         }
    7391             : 
    7392          16 :         if (!spdk_blob_is_read_only(_blob)) {
    7393           4 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob must be read only\n", _blob->id);
    7394           4 :                 ctx->bserrno = -EPERM;
    7395           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7396           4 :                 return;
    7397             :         }
    7398             : 
    7399          12 :         blob_block_size = _blob->bs->dev->blocklen;
    7400          12 :         blob_total_size = spdk_blob_get_num_clusters(_blob) * spdk_bs_get_cluster_size(_blob->bs);
    7401             : 
    7402          12 :         if (blob_total_size > ext_dev->blockcnt * ext_dev->blocklen) {
    7403           4 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device must have at least blob size\n",
    7404             :                             _blob->id);
    7405           4 :                 ctx->bserrno = -EINVAL;
    7406           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7407           4 :                 return;
    7408             :         }
    7409             : 
    7410           8 :         if (blob_block_size % ext_dev->blocklen != 0) {
    7411           4 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device block size is not compatible with \
    7412             : blobstore block size\n", _blob->id);
    7413           4 :                 ctx->bserrno = -EINVAL;
    7414           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7415           4 :                 return;
    7416             :         }
    7417             : 
    7418           4 :         ctx->blob = _blob;
    7419             : 
    7420           4 :         if (_blob->locked_operation_in_progress) {
    7421           0 :                 SPDK_DEBUGLOG(blob, "blob 0x%" PRIx64 " shallow copy - another operation in progress\n", _blob->id);
    7422           0 :                 ctx->bserrno = -EBUSY;
    7423           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7424           0 :                 return;
    7425             :         }
    7426             : 
    7427           4 :         _blob->locked_operation_in_progress = true;
    7428             : 
    7429           4 :         ctx->cluster = 0;
    7430           4 :         bs_shallow_copy_cluster_find_next(ctx);
    7431             : }
    7432             : 
    7433             : int
    7434          16 : spdk_bs_blob_shallow_copy(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7435             :                           spdk_blob_id blobid, struct spdk_bs_dev *ext_dev,
    7436             :                           spdk_blob_shallow_copy_status status_cb_fn, void *status_cb_arg,
    7437             :                           spdk_blob_op_complete cb_fn, void *cb_arg)
    7438             : {
    7439             :         struct shallow_copy_ctx *ctx;
    7440             :         struct spdk_io_channel *ext_channel;
    7441             : 
    7442          16 :         ctx = calloc(1, sizeof(*ctx));
    7443          16 :         if (!ctx) {
    7444           0 :                 return -ENOMEM;
    7445             :         }
    7446             : 
    7447          16 :         ctx->bs = bs;
    7448          16 :         ctx->blobid = blobid;
    7449          16 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7450          16 :         ctx->cpl.u.bs_basic.cb_fn = cb_fn;
    7451          16 :         ctx->cpl.u.bs_basic.cb_arg = cb_arg;
    7452          16 :         ctx->bserrno = 0;
    7453          16 :         ctx->blob_channel = channel;
    7454          16 :         ctx->status_cb = status_cb_fn;
    7455          16 :         ctx->status_cb_arg = status_cb_arg;
    7456          16 :         ctx->read_buff = spdk_malloc(bs->cluster_sz, bs->dev->blocklen, NULL,
    7457             :                                      SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
    7458          16 :         if (!ctx->read_buff) {
    7459           0 :                 free(ctx);
    7460           0 :                 return -ENOMEM;
    7461             :         }
    7462             : 
    7463          16 :         ext_channel = ext_dev->create_channel(ext_dev);
    7464          16 :         if (!ext_channel) {
    7465           0 :                 spdk_free(ctx->read_buff);
    7466           0 :                 free(ctx);
    7467           0 :                 return -ENOMEM;
    7468             :         }
    7469          16 :         ctx->ext_dev = ext_dev;
    7470          16 :         ctx->ext_channel = ext_channel;
    7471             : 
    7472          16 :         spdk_bs_open_blob(ctx->bs, ctx->blobid, bs_shallow_copy_blob_open_cpl, ctx);
    7473             : 
    7474          16 :         return 0;
    7475             : }
    7476             : /* END spdk_bs_blob_shallow_copy */
    7477             : 
    7478             : /* START spdk_bs_blob_set_parent */
    7479             : 
    7480             : struct set_parent_ctx {
    7481             :         struct spdk_blob_store *bs;
    7482             :         int                     bserrno;
    7483             :         spdk_bs_op_complete     cb_fn;
    7484             :         void                    *cb_arg;
    7485             : 
    7486             :         struct spdk_blob        *blob;
    7487             :         bool                    blob_md_ro;
    7488             : 
    7489             :         struct blob_parent      parent;
    7490             : };
    7491             : 
    7492             : static void
    7493          24 : bs_set_parent_cleanup_finish(void *cb_arg, int bserrno)
    7494             : {
    7495          24 :         struct set_parent_ctx *ctx = cb_arg;
    7496             : 
    7497          24 :         assert(ctx != NULL);
    7498             : 
    7499          24 :         if (bserrno != 0) {
    7500           0 :                 SPDK_ERRLOG("blob set parent finish error %d\n", bserrno);
    7501           0 :                 if (ctx->bserrno == 0) {
    7502           0 :                         ctx->bserrno = bserrno;
    7503             :                 }
    7504             :         }
    7505             : 
    7506          24 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
    7507             : 
    7508          24 :         free(ctx);
    7509          24 : }
    7510             : 
    7511             : static void
    7512          20 : bs_set_parent_close_snapshot(void *cb_arg, int bserrno)
    7513             : {
    7514          20 :         struct set_parent_ctx *ctx = cb_arg;
    7515             : 
    7516          20 :         if (ctx->bserrno != 0) {
    7517           8 :                 spdk_blob_close(ctx->parent.u.snapshot.blob, bs_set_parent_cleanup_finish, ctx);
    7518           8 :                 return;
    7519             :         }
    7520             : 
    7521          12 :         if (bserrno != 0) {
    7522           0 :                 SPDK_ERRLOG("blob close error %d\n", bserrno);
    7523           0 :                 ctx->bserrno = bserrno;
    7524             :         }
    7525             : 
    7526          12 :         bs_set_parent_cleanup_finish(ctx, ctx->bserrno);
    7527             : }
    7528             : 
    7529             : static void
    7530          12 : bs_set_parent_close_blob(void *cb_arg, int bserrno)
    7531             : {
    7532          12 :         struct set_parent_ctx *ctx = cb_arg;
    7533          12 :         struct spdk_blob *blob = ctx->blob;
    7534          12 :         struct spdk_blob *snapshot = ctx->parent.u.snapshot.blob;
    7535             : 
    7536          12 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7537           0 :                 SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
    7538           0 :                 ctx->bserrno = bserrno;
    7539             :         }
    7540             : 
    7541             :         /* Revert md_ro to original state */
    7542          12 :         blob->md_ro = ctx->blob_md_ro;
    7543             : 
    7544          12 :         blob->locked_operation_in_progress = false;
    7545          12 :         snapshot->locked_operation_in_progress = false;
    7546             : 
    7547          12 :         spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7548          12 : }
    7549             : 
    7550             : static void
    7551          12 : bs_set_parent_set_back_bs_dev_done(void *cb_arg, int bserrno)
    7552             : {
    7553          12 :         struct set_parent_ctx *ctx = cb_arg;
    7554          12 :         struct spdk_blob *blob = ctx->blob;
    7555             : 
    7556          12 :         if (bserrno != 0) {
    7557           0 :                 SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
    7558           0 :                 ctx->bserrno = bserrno;
    7559           0 :                 bs_set_parent_close_blob(ctx, bserrno);
    7560           0 :                 return;
    7561             :         }
    7562             : 
    7563          12 :         spdk_blob_sync_md(blob, bs_set_parent_close_blob, ctx);
    7564             : }
    7565             : 
    7566             : static int
    7567          12 : bs_set_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
    7568             : {
    7569             :         int rc;
    7570             : 
    7571          12 :         bs_blob_list_remove(blob);
    7572             : 
    7573          12 :         rc = blob_set_xattr(blob, BLOB_SNAPSHOT, &parent->u.snapshot.id, sizeof(spdk_blob_id), true);
    7574          12 :         if (rc != 0) {
    7575           0 :                 SPDK_ERRLOG("error %d setting snapshot xattr\n", rc);
    7576           0 :                 return rc;
    7577             :         }
    7578          12 :         blob->parent_id = parent->u.snapshot.id;
    7579             : 
    7580          12 :         if (blob_is_esnap_clone(blob)) {
    7581             :                 /* Remove the xattr that references the external snapshot */
    7582           4 :                 blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7583           4 :                 blob_remove_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    7584             :         }
    7585             : 
    7586          12 :         bs_blob_list_add(blob);
    7587             : 
    7588          12 :         return 0;
    7589             : }
    7590             : 
    7591             : static void
    7592          20 : bs_set_parent_snapshot_open_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
    7593             : {
    7594          20 :         struct set_parent_ctx *ctx = cb_arg;
    7595          20 :         struct spdk_blob *blob = ctx->blob;
    7596             :         struct spdk_bs_dev *back_bs_dev;
    7597             : 
    7598          20 :         if (bserrno != 0) {
    7599           0 :                 SPDK_ERRLOG("snapshot open error %d\n", bserrno);
    7600           0 :                 ctx->bserrno = bserrno;
    7601           0 :                 spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
    7602           0 :                 return;
    7603             :         }
    7604             : 
    7605          20 :         ctx->parent.u.snapshot.blob = snapshot;
    7606          20 :         ctx->parent.u.snapshot.id = snapshot->id;
    7607             : 
    7608          20 :         if (!spdk_blob_is_snapshot(snapshot)) {
    7609           4 :                 SPDK_ERRLOG("parent blob is not a snapshot\n");
    7610           4 :                 ctx->bserrno = -EINVAL;
    7611           4 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7612           4 :                 return;
    7613             :         }
    7614             : 
    7615          16 :         if (blob->active.num_clusters != snapshot->active.num_clusters) {
    7616           4 :                 SPDK_ERRLOG("parent blob has a number of clusters different from child's ones\n");
    7617           4 :                 ctx->bserrno = -EINVAL;
    7618           4 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7619           4 :                 return;
    7620             :         }
    7621             : 
    7622          12 :         if (blob->locked_operation_in_progress || snapshot->locked_operation_in_progress) {
    7623           0 :                 SPDK_ERRLOG("cannot set parent of blob, another operation in progress\n");
    7624           0 :                 ctx->bserrno = -EBUSY;
    7625           0 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7626           0 :                 return;
    7627             :         }
    7628             : 
    7629          12 :         blob->locked_operation_in_progress = true;
    7630          12 :         snapshot->locked_operation_in_progress = true;
    7631             : 
    7632             :         /* Temporarily override md_ro flag for MD modification */
    7633          12 :         blob->md_ro = false;
    7634             : 
    7635          12 :         back_bs_dev = bs_create_blob_bs_dev(snapshot);
    7636             : 
    7637          12 :         blob_set_back_bs_dev(blob, back_bs_dev, bs_set_parent_refs, &ctx->parent,
    7638             :                              bs_set_parent_set_back_bs_dev_done,
    7639             :                              ctx);
    7640             : }
    7641             : 
    7642             : static void
    7643          24 : bs_set_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    7644             : {
    7645          24 :         struct set_parent_ctx *ctx = cb_arg;
    7646             : 
    7647          24 :         if (bserrno != 0) {
    7648           0 :                 SPDK_ERRLOG("blob open error %d\n", bserrno);
    7649           0 :                 ctx->bserrno = bserrno;
    7650           0 :                 bs_set_parent_cleanup_finish(ctx, 0);
    7651           0 :                 return;
    7652             :         }
    7653             : 
    7654          24 :         if (!spdk_blob_is_thin_provisioned(blob)) {
    7655           4 :                 SPDK_ERRLOG("blob is not thin-provisioned\n");
    7656           4 :                 ctx->bserrno = -EINVAL;
    7657           4 :                 spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
    7658           4 :                 return;
    7659             :         }
    7660             : 
    7661          20 :         ctx->blob = blob;
    7662          20 :         ctx->blob_md_ro = blob->md_ro;
    7663             : 
    7664          20 :         spdk_bs_open_blob(ctx->bs, ctx->parent.u.snapshot.id, bs_set_parent_snapshot_open_cpl, ctx);
    7665             : }
    7666             : 
    7667             : void
    7668          36 : spdk_bs_blob_set_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
    7669             :                         spdk_blob_id snapshot_id, spdk_blob_op_complete cb_fn, void *cb_arg)
    7670             : {
    7671             :         struct set_parent_ctx *ctx;
    7672             : 
    7673          36 :         if (snapshot_id == SPDK_BLOBID_INVALID) {
    7674           4 :                 SPDK_ERRLOG("snapshot id not valid\n");
    7675           4 :                 cb_fn(cb_arg, -EINVAL);
    7676           4 :                 return;
    7677             :         }
    7678             : 
    7679          32 :         if (blob_id == snapshot_id) {
    7680           4 :                 SPDK_ERRLOG("blob id and snapshot id cannot be the same\n");
    7681           4 :                 cb_fn(cb_arg, -EINVAL);
    7682           4 :                 return;
    7683             :         }
    7684             : 
    7685          28 :         if (spdk_blob_get_parent_snapshot(bs, blob_id) == snapshot_id) {
    7686           4 :                 SPDK_NOTICELOG("snapshot is already the parent of blob\n");
    7687           4 :                 cb_fn(cb_arg, -EEXIST);
    7688           4 :                 return;
    7689             :         }
    7690             : 
    7691          24 :         ctx = calloc(1, sizeof(*ctx));
    7692          24 :         if (!ctx) {
    7693           0 :                 cb_fn(cb_arg, -ENOMEM);
    7694           0 :                 return;
    7695             :         }
    7696             : 
    7697          24 :         ctx->bs = bs;
    7698          24 :         ctx->parent.u.snapshot.id = snapshot_id;
    7699          24 :         ctx->cb_fn = cb_fn;
    7700          24 :         ctx->cb_arg = cb_arg;
    7701          24 :         ctx->bserrno = 0;
    7702             : 
    7703          24 :         spdk_bs_open_blob(bs, blob_id, bs_set_parent_blob_open_cpl, ctx);
    7704             : }
    7705             : /* END spdk_bs_blob_set_parent */
    7706             : 
    7707             : /* START spdk_bs_blob_set_external_parent */
    7708             : 
    7709             : static void
    7710          16 : bs_set_external_parent_cleanup_finish(void *cb_arg, int bserrno)
    7711             : {
    7712          16 :         struct set_parent_ctx *ctx = cb_arg;
    7713             : 
    7714          16 :         if (bserrno != 0) {
    7715           0 :                 SPDK_ERRLOG("blob set external parent finish error %d\n", bserrno);
    7716           0 :                 if (ctx->bserrno == 0) {
    7717           0 :                         ctx->bserrno = bserrno;
    7718             :                 }
    7719             :         }
    7720             : 
    7721          16 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
    7722             : 
    7723          16 :         free(ctx->parent.u.esnap.id);
    7724          16 :         free(ctx);
    7725          16 : }
    7726             : 
    7727             : static void
    7728           8 : bs_set_external_parent_close_blob(void *cb_arg, int bserrno)
    7729             : {
    7730           8 :         struct set_parent_ctx *ctx = cb_arg;
    7731           8 :         struct spdk_blob *blob = ctx->blob;
    7732             : 
    7733           8 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7734           0 :                 SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
    7735           0 :                 ctx->bserrno = bserrno;
    7736             :         }
    7737             : 
    7738             :         /* Revert md_ro to original state */
    7739           8 :         blob->md_ro = ctx->blob_md_ro;
    7740             : 
    7741           8 :         blob->locked_operation_in_progress = false;
    7742             : 
    7743           8 :         spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
    7744           8 : }
    7745             : 
    7746             : static void
    7747           8 : bs_set_external_parent_unfrozen(void *cb_arg, int bserrno)
    7748             : {
    7749           8 :         struct set_parent_ctx *ctx = cb_arg;
    7750           8 :         struct spdk_blob *blob = ctx->blob;
    7751             : 
    7752           8 :         if (bserrno != 0) {
    7753           0 :                 SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
    7754           0 :                 ctx->bserrno = bserrno;
    7755           0 :                 bs_set_external_parent_close_blob(ctx, bserrno);
    7756           0 :                 return;
    7757             :         }
    7758             : 
    7759           8 :         spdk_blob_sync_md(blob, bs_set_external_parent_close_blob, ctx);
    7760             : }
    7761             : 
    7762             : static int
    7763           8 : bs_set_external_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
    7764             : {
    7765             :         int rc;
    7766             : 
    7767           8 :         bs_blob_list_remove(blob);
    7768             : 
    7769           8 :         if (spdk_blob_is_clone(blob)) {
    7770             :                 /* Remove the xattr that references the snapshot */
    7771           0 :                 blob->parent_id = SPDK_BLOBID_INVALID;
    7772           0 :                 blob_remove_xattr(blob, BLOB_SNAPSHOT, true);
    7773             :         }
    7774             : 
    7775           8 :         rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, parent->u.esnap.id,
    7776           8 :                             parent->u.esnap.id_len, true);
    7777           8 :         if (rc != 0) {
    7778           0 :                 SPDK_ERRLOG("error %d setting external snapshot xattr\n", rc);
    7779           0 :                 return rc;
    7780             :         }
    7781           8 :         blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7782           8 :         blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    7783             : 
    7784           8 :         bs_blob_list_add(blob);
    7785             : 
    7786           8 :         return 0;
    7787             : }
    7788             : 
    7789             : static void
    7790          16 : bs_set_external_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    7791             : {
    7792          16 :         struct set_parent_ctx *ctx = cb_arg;
    7793          16 :         const void *esnap_id;
    7794          16 :         size_t esnap_id_len;
    7795             :         int rc;
    7796             : 
    7797          16 :         if (bserrno != 0) {
    7798           0 :                 SPDK_ERRLOG("blob open error %d\n", bserrno);
    7799           0 :                 ctx->bserrno = bserrno;
    7800           0 :                 bs_set_parent_cleanup_finish(ctx, 0);
    7801           0 :                 return;
    7802             :         }
    7803             : 
    7804          16 :         ctx->blob = blob;
    7805          16 :         ctx->blob_md_ro = blob->md_ro;
    7806             : 
    7807          16 :         rc = spdk_blob_get_esnap_id(blob, &esnap_id, &esnap_id_len);
    7808          16 :         if (rc == 0 && esnap_id != NULL && esnap_id_len == ctx->parent.u.esnap.id_len &&
    7809           4 :             memcmp(esnap_id, ctx->parent.u.esnap.id, esnap_id_len) == 0) {
    7810           4 :                 SPDK_ERRLOG("external snapshot is already the parent of blob\n");
    7811           4 :                 ctx->bserrno = -EEXIST;
    7812           4 :                 goto error;
    7813             :         }
    7814             : 
    7815          12 :         if (!spdk_blob_is_thin_provisioned(blob)) {
    7816           4 :                 SPDK_ERRLOG("blob is not thin-provisioned\n");
    7817           4 :                 ctx->bserrno = -EINVAL;
    7818           4 :                 goto error;
    7819             :         }
    7820             : 
    7821           8 :         if (blob->locked_operation_in_progress) {
    7822           0 :                 SPDK_ERRLOG("cannot set external parent of blob, another operation in progress\n");
    7823           0 :                 ctx->bserrno = -EBUSY;
    7824           0 :                 goto error;
    7825             :         }
    7826             : 
    7827           8 :         blob->locked_operation_in_progress = true;
    7828             : 
    7829             :         /* Temporarily override md_ro flag for MD modification */
    7830           8 :         blob->md_ro = false;
    7831             : 
    7832           8 :         blob_set_back_bs_dev(blob, ctx->parent.u.esnap.back_bs_dev, bs_set_external_parent_refs,
    7833             :                              &ctx->parent, bs_set_external_parent_unfrozen, ctx);
    7834           8 :         return;
    7835             : 
    7836           8 : error:
    7837           8 :         spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
    7838             : }
    7839             : 
    7840             : void
    7841          24 : spdk_bs_blob_set_external_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
    7842             :                                  struct spdk_bs_dev *esnap_bs_dev, const void *esnap_id,
    7843             :                                  uint32_t esnap_id_len, spdk_blob_op_complete cb_fn, void *cb_arg)
    7844             : {
    7845             :         struct set_parent_ctx *ctx;
    7846             :         uint64_t esnap_dev_size, cluster_sz;
    7847             : 
    7848          24 :         if (sizeof(blob_id) == esnap_id_len && memcmp(&blob_id, esnap_id, sizeof(blob_id)) == 0) {
    7849           4 :                 SPDK_ERRLOG("blob id and external snapshot id cannot be the same\n");
    7850           4 :                 cb_fn(cb_arg, -EINVAL);
    7851           4 :                 return;
    7852             :         }
    7853             : 
    7854          20 :         esnap_dev_size = esnap_bs_dev->blockcnt * esnap_bs_dev->blocklen;
    7855          20 :         cluster_sz = spdk_bs_get_cluster_size(bs);
    7856          20 :         if ((esnap_dev_size % cluster_sz) != 0) {
    7857           4 :                 SPDK_ERRLOG("Esnap device size %" PRIu64 " is not an integer multiple of "
    7858             :                             "cluster size %" PRIu64 "\n", esnap_dev_size, cluster_sz);
    7859           4 :                 cb_fn(cb_arg, -EINVAL);
    7860           4 :                 return;
    7861             :         }
    7862             : 
    7863          16 :         ctx = calloc(1, sizeof(*ctx));
    7864          16 :         if (!ctx) {
    7865           0 :                 cb_fn(cb_arg, -ENOMEM);
    7866           0 :                 return;
    7867             :         }
    7868             : 
    7869          16 :         ctx->parent.u.esnap.id = calloc(1, esnap_id_len);
    7870          16 :         if (!ctx->parent.u.esnap.id) {
    7871           0 :                 free(ctx);
    7872           0 :                 cb_fn(cb_arg, -ENOMEM);
    7873           0 :                 return;
    7874             :         }
    7875             : 
    7876          16 :         ctx->bs = bs;
    7877          16 :         ctx->parent.u.esnap.back_bs_dev = esnap_bs_dev;
    7878          16 :         memcpy(ctx->parent.u.esnap.id, esnap_id, esnap_id_len);
    7879          16 :         ctx->parent.u.esnap.id_len = esnap_id_len;
    7880          16 :         ctx->cb_fn = cb_fn;
    7881          16 :         ctx->cb_arg = cb_arg;
    7882          16 :         ctx->bserrno = 0;
    7883             : 
    7884          16 :         spdk_bs_open_blob(bs, blob_id, bs_set_external_parent_blob_open_cpl, ctx);
    7885             : }
    7886             : /* END spdk_bs_blob_set_external_parent */
    7887             : 
    7888             : /* START spdk_blob_resize */
    7889             : struct spdk_bs_resize_ctx {
    7890             :         spdk_blob_op_complete cb_fn;
    7891             :         void *cb_arg;
    7892             :         struct spdk_blob *blob;
    7893             :         uint64_t sz;
    7894             :         int rc;
    7895             : };
    7896             : 
    7897             : static void
    7898         202 : bs_resize_unfreeze_cpl(void *cb_arg, int rc)
    7899             : {
    7900         202 :         struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
    7901             : 
    7902         202 :         if (rc != 0) {
    7903           0 :                 SPDK_ERRLOG("Unfreeze failed, rc=%d\n", rc);
    7904             :         }
    7905             : 
    7906         202 :         if (ctx->rc != 0) {
    7907           4 :                 SPDK_ERRLOG("Unfreeze failed, ctx->rc=%d\n", ctx->rc);
    7908           4 :                 rc = ctx->rc;
    7909             :         }
    7910             : 
    7911         202 :         ctx->blob->locked_operation_in_progress = false;
    7912             : 
    7913         202 :         ctx->cb_fn(ctx->cb_arg, rc);
    7914         202 :         free(ctx);
    7915         202 : }
    7916             : 
    7917             : static void
    7918         202 : bs_resize_freeze_cpl(void *cb_arg, int rc)
    7919             : {
    7920         202 :         struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
    7921             : 
    7922         202 :         if (rc != 0) {
    7923           0 :                 ctx->blob->locked_operation_in_progress = false;
    7924           0 :                 ctx->cb_fn(ctx->cb_arg, rc);
    7925           0 :                 free(ctx);
    7926           0 :                 return;
    7927             :         }
    7928             : 
    7929         202 :         ctx->rc = blob_resize(ctx->blob, ctx->sz);
    7930             : 
    7931         202 :         blob_unfreeze_io(ctx->blob, bs_resize_unfreeze_cpl, ctx);
    7932             : }
    7933             : 
    7934             : void
    7935         216 : spdk_blob_resize(struct spdk_blob *blob, uint64_t sz, spdk_blob_op_complete cb_fn, void *cb_arg)
    7936             : {
    7937             :         struct spdk_bs_resize_ctx *ctx;
    7938             : 
    7939         216 :         blob_verify_md_op(blob);
    7940             : 
    7941         216 :         SPDK_DEBUGLOG(blob, "Resizing blob 0x%" PRIx64 " to %" PRIu64 " clusters\n", blob->id, sz);
    7942             : 
    7943         216 :         if (blob->md_ro) {
    7944           4 :                 cb_fn(cb_arg, -EPERM);
    7945           4 :                 return;
    7946             :         }
    7947             : 
    7948         212 :         if (sz == blob->active.num_clusters) {
    7949          10 :                 cb_fn(cb_arg, 0);
    7950          10 :                 return;
    7951             :         }
    7952             : 
    7953         202 :         if (blob->locked_operation_in_progress) {
    7954           0 :                 cb_fn(cb_arg, -EBUSY);
    7955           0 :                 return;
    7956             :         }
    7957             : 
    7958         202 :         ctx = calloc(1, sizeof(*ctx));
    7959         202 :         if (!ctx) {
    7960           0 :                 cb_fn(cb_arg, -ENOMEM);
    7961           0 :                 return;
    7962             :         }
    7963             : 
    7964         202 :         blob->locked_operation_in_progress = true;
    7965         202 :         ctx->cb_fn = cb_fn;
    7966         202 :         ctx->cb_arg = cb_arg;
    7967         202 :         ctx->blob = blob;
    7968         202 :         ctx->sz = sz;
    7969         202 :         blob_freeze_io(blob, bs_resize_freeze_cpl, ctx);
    7970             : }
    7971             : 
    7972             : /* END spdk_blob_resize */
    7973             : 
    7974             : 
    7975             : /* START spdk_bs_delete_blob */
    7976             : 
    7977             : static void
    7978        1492 : bs_delete_close_cpl(void *cb_arg, int bserrno)
    7979             : {
    7980        1492 :         spdk_bs_sequence_t *seq = cb_arg;
    7981             : 
    7982        1492 :         bs_sequence_finish(seq, bserrno);
    7983        1492 : }
    7984             : 
    7985             : static void
    7986        1492 : bs_delete_persist_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    7987             : {
    7988        1492 :         struct spdk_blob *blob = cb_arg;
    7989             : 
    7990        1492 :         if (bserrno != 0) {
    7991             :                 /*
    7992             :                  * We already removed this blob from the blobstore tailq, so
    7993             :                  *  we need to free it here since this is the last reference
    7994             :                  *  to it.
    7995             :                  */
    7996           0 :                 blob_free(blob);
    7997           0 :                 bs_delete_close_cpl(seq, bserrno);
    7998           0 :                 return;
    7999             :         }
    8000             : 
    8001             :         /*
    8002             :          * This will immediately decrement the ref_count and call
    8003             :          *  the completion routine since the metadata state is clean.
    8004             :          *  By calling spdk_blob_close, we reduce the number of call
    8005             :          *  points into code that touches the blob->open_ref count
    8006             :          *  and the blobstore's blob list.
    8007             :          */
    8008        1492 :         spdk_blob_close(blob, bs_delete_close_cpl, seq);
    8009             : }
    8010             : 
    8011             : struct delete_snapshot_ctx {
    8012             :         struct spdk_blob_list *parent_snapshot_entry;
    8013             :         struct spdk_blob *snapshot;
    8014             :         struct spdk_blob_md_page *page;
    8015             :         bool snapshot_md_ro;
    8016             :         struct spdk_blob *clone;
    8017             :         bool clone_md_ro;
    8018             :         spdk_blob_op_with_handle_complete cb_fn;
    8019             :         void *cb_arg;
    8020             :         int bserrno;
    8021             :         uint32_t next_extent_page;
    8022             : };
    8023             : 
    8024             : static void
    8025         110 : delete_blob_cleanup_finish(void *cb_arg, int bserrno)
    8026             : {
    8027         110 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8028             : 
    8029         110 :         if (bserrno != 0) {
    8030           0 :                 SPDK_ERRLOG("Snapshot cleanup error %d\n", bserrno);
    8031             :         }
    8032             : 
    8033         110 :         assert(ctx != NULL);
    8034             : 
    8035         110 :         if (bserrno != 0 && ctx->bserrno == 0) {
    8036           0 :                 ctx->bserrno = bserrno;
    8037             :         }
    8038             : 
    8039         110 :         ctx->cb_fn(ctx->cb_arg, ctx->snapshot, ctx->bserrno);
    8040         110 :         spdk_free(ctx->page);
    8041         110 :         free(ctx);
    8042         110 : }
    8043             : 
    8044             : static void
    8045          22 : delete_snapshot_cleanup_snapshot(void *cb_arg, int bserrno)
    8046             : {
    8047          22 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8048             : 
    8049          22 :         if (bserrno != 0) {
    8050           0 :                 ctx->bserrno = bserrno;
    8051           0 :                 SPDK_ERRLOG("Clone cleanup error %d\n", bserrno);
    8052             :         }
    8053             : 
    8054          22 :         if (ctx->bserrno != 0) {
    8055          22 :                 assert(blob_lookup(ctx->snapshot->bs, ctx->snapshot->id) == NULL);
    8056          22 :                 RB_INSERT(spdk_blob_tree, &ctx->snapshot->bs->open_blobs, ctx->snapshot);
    8057          22 :                 spdk_bit_array_set(ctx->snapshot->bs->open_blobids, ctx->snapshot->id);
    8058             :         }
    8059             : 
    8060          22 :         ctx->snapshot->locked_operation_in_progress = false;
    8061          22 :         ctx->snapshot->md_ro = ctx->snapshot_md_ro;
    8062             : 
    8063          22 :         spdk_blob_close(ctx->snapshot, delete_blob_cleanup_finish, ctx);
    8064          22 : }
    8065             : 
    8066             : static void
    8067          12 : delete_snapshot_cleanup_clone(void *cb_arg, int bserrno)
    8068             : {
    8069          12 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8070             : 
    8071          12 :         ctx->clone->locked_operation_in_progress = false;
    8072          12 :         ctx->clone->md_ro = ctx->clone_md_ro;
    8073             : 
    8074          12 :         spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
    8075          12 : }
    8076             : 
    8077             : static void
    8078          48 : delete_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
    8079             : {
    8080          48 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8081             : 
    8082          48 :         if (bserrno) {
    8083           0 :                 ctx->bserrno = bserrno;
    8084           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8085           0 :                 return;
    8086             :         }
    8087             : 
    8088          48 :         ctx->clone->locked_operation_in_progress = false;
    8089          48 :         spdk_blob_close(ctx->clone, delete_blob_cleanup_finish, ctx);
    8090             : }
    8091             : 
    8092             : static void
    8093          52 : delete_snapshot_sync_snapshot_cpl(void *cb_arg, int bserrno)
    8094             : {
    8095          52 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8096          52 :         struct spdk_blob_list *parent_snapshot_entry = NULL;
    8097          52 :         struct spdk_blob_list *snapshot_entry = NULL;
    8098          52 :         struct spdk_blob_list *clone_entry = NULL;
    8099          52 :         struct spdk_blob_list *snapshot_clone_entry = NULL;
    8100             : 
    8101          52 :         if (bserrno) {
    8102           4 :                 SPDK_ERRLOG("Failed to sync MD on blob\n");
    8103           4 :                 ctx->bserrno = bserrno;
    8104           4 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8105           4 :                 return;
    8106             :         }
    8107             : 
    8108             :         /* Get snapshot entry for the snapshot we want to remove */
    8109          48 :         snapshot_entry = bs_get_snapshot_entry(ctx->snapshot->bs, ctx->snapshot->id);
    8110             : 
    8111          48 :         assert(snapshot_entry != NULL);
    8112             : 
    8113             :         /* Remove clone entry in this snapshot (at this point there can be only one clone) */
    8114          48 :         clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8115          48 :         assert(clone_entry != NULL);
    8116          48 :         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    8117          48 :         snapshot_entry->clone_count--;
    8118          48 :         assert(TAILQ_EMPTY(&snapshot_entry->clones));
    8119             : 
    8120          48 :         switch (ctx->snapshot->parent_id) {
    8121          40 :         case SPDK_BLOBID_INVALID:
    8122             :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    8123             :                 /* No parent snapshot - just remove clone entry */
    8124          40 :                 free(clone_entry);
    8125          40 :                 break;
    8126           8 :         default:
    8127             :                 /* This snapshot is at the same time a clone of another snapshot - we need to
    8128             :                  * update parent snapshot (remove current clone, add new one inherited from
    8129             :                  * the snapshot that is being removed) */
    8130             : 
    8131             :                 /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
    8132             :                  * snapshot that we are removing */
    8133           8 :                 blob_get_snapshot_and_clone_entries(ctx->snapshot, &parent_snapshot_entry,
    8134             :                                                     &snapshot_clone_entry);
    8135             : 
    8136             :                 /* Switch clone entry in parent snapshot */
    8137           8 :                 TAILQ_INSERT_TAIL(&parent_snapshot_entry->clones, clone_entry, link);
    8138           8 :                 TAILQ_REMOVE(&parent_snapshot_entry->clones, snapshot_clone_entry, link);
    8139           8 :                 free(snapshot_clone_entry);
    8140             :         }
    8141             : 
    8142             :         /* Restore md_ro flags */
    8143          48 :         ctx->clone->md_ro = ctx->clone_md_ro;
    8144          48 :         ctx->snapshot->md_ro = ctx->snapshot_md_ro;
    8145             : 
    8146          48 :         blob_unfreeze_io(ctx->clone, delete_snapshot_unfreeze_cpl, ctx);
    8147             : }
    8148             : 
    8149             : static void
    8150          56 : delete_snapshot_sync_clone_cpl(void *cb_arg, int bserrno)
    8151             : {
    8152          56 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8153             :         uint64_t i;
    8154             : 
    8155          56 :         ctx->snapshot->md_ro = false;
    8156             : 
    8157          56 :         if (bserrno) {
    8158           4 :                 SPDK_ERRLOG("Failed to sync MD on clone\n");
    8159           4 :                 ctx->bserrno = bserrno;
    8160             : 
    8161             :                 /* Restore snapshot to previous state */
    8162           4 :                 bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
    8163           4 :                 if (bserrno != 0) {
    8164           0 :                         delete_snapshot_cleanup_clone(ctx, bserrno);
    8165           0 :                         return;
    8166             :                 }
    8167             : 
    8168           4 :                 spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
    8169           4 :                 return;
    8170             :         }
    8171             : 
    8172             :         /* Clear cluster map entries for snapshot */
    8173         552 :         for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
    8174         500 :                 if (ctx->clone->active.clusters[i] == ctx->snapshot->active.clusters[i]) {
    8175         492 :                         if (ctx->snapshot->active.clusters[i] != 0) {
    8176         328 :                                 ctx->snapshot->active.num_allocated_clusters--;
    8177             :                         }
    8178         492 :                         ctx->snapshot->active.clusters[i] = 0;
    8179             :                 }
    8180             :         }
    8181          78 :         for (i = 0; i < ctx->snapshot->active.num_extent_pages &&
    8182          52 :              i < ctx->clone->active.num_extent_pages; i++) {
    8183          26 :                 if (ctx->clone->active.extent_pages[i] == ctx->snapshot->active.extent_pages[i]) {
    8184          24 :                         ctx->snapshot->active.extent_pages[i] = 0;
    8185             :                 }
    8186             :         }
    8187             : 
    8188          52 :         blob_set_thin_provision(ctx->snapshot);
    8189          52 :         ctx->snapshot->state = SPDK_BLOB_STATE_DIRTY;
    8190             : 
    8191          52 :         if (ctx->parent_snapshot_entry != NULL) {
    8192           8 :                 ctx->snapshot->back_bs_dev = NULL;
    8193             :         }
    8194             : 
    8195          52 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_cpl, ctx);
    8196             : }
    8197             : 
    8198             : static void
    8199          56 : delete_snapshot_update_extent_pages_cpl(struct delete_snapshot_ctx *ctx)
    8200             : {
    8201             :         int bserrno;
    8202             : 
    8203             :         /* Delete old backing bs_dev from clone (related to snapshot that will be removed) */
    8204          56 :         blob_back_bs_destroy(ctx->clone);
    8205             : 
    8206             :         /* Set/remove snapshot xattr and switch parent ID and backing bs_dev on clone... */
    8207          56 :         if (ctx->snapshot->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    8208           8 :                 bserrno = bs_snapshot_copy_xattr(ctx->clone, ctx->snapshot,
    8209             :                                                  BLOB_EXTERNAL_SNAPSHOT_ID);
    8210           8 :                 if (bserrno != 0) {
    8211           0 :                         ctx->bserrno = bserrno;
    8212             : 
    8213             :                         /* Restore snapshot to previous state */
    8214           0 :                         bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
    8215           0 :                         if (bserrno != 0) {
    8216           0 :                                 delete_snapshot_cleanup_clone(ctx, bserrno);
    8217           0 :                                 return;
    8218             :                         }
    8219             : 
    8220           0 :                         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
    8221           0 :                         return;
    8222             :                 }
    8223           8 :                 ctx->clone->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    8224           8 :                 ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
    8225             :                 /* Do not delete the external snapshot along with this snapshot */
    8226           8 :                 ctx->snapshot->back_bs_dev = NULL;
    8227           8 :                 ctx->clone->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    8228          48 :         } else if (ctx->parent_snapshot_entry != NULL) {
    8229             :                 /* ...to parent snapshot */
    8230           8 :                 ctx->clone->parent_id = ctx->parent_snapshot_entry->id;
    8231           8 :                 ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
    8232           8 :                 blob_set_xattr(ctx->clone, BLOB_SNAPSHOT, &ctx->parent_snapshot_entry->id,
    8233             :                                sizeof(spdk_blob_id),
    8234             :                                true);
    8235             :         } else {
    8236             :                 /* ...to blobid invalid and zeroes dev */
    8237          40 :                 ctx->clone->parent_id = SPDK_BLOBID_INVALID;
    8238          40 :                 ctx->clone->back_bs_dev = bs_create_zeroes_dev();
    8239          40 :                 blob_remove_xattr(ctx->clone, BLOB_SNAPSHOT, true);
    8240             :         }
    8241             : 
    8242          56 :         spdk_blob_sync_md(ctx->clone, delete_snapshot_sync_clone_cpl, ctx);
    8243             : }
    8244             : 
    8245             : static void
    8246          58 : delete_snapshot_update_extent_pages(void *cb_arg, int bserrno)
    8247             : {
    8248          58 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8249             :         uint32_t *extent_page;
    8250             :         uint64_t i;
    8251             : 
    8252          84 :         for (i = ctx->next_extent_page; i < ctx->snapshot->active.num_extent_pages &&
    8253          54 :              i < ctx->clone->active.num_extent_pages; i++) {
    8254          28 :                 if (ctx->snapshot->active.extent_pages[i] == 0) {
    8255             :                         /* No extent page to use from snapshot */
    8256           8 :                         continue;
    8257             :                 }
    8258             : 
    8259          20 :                 extent_page = &ctx->clone->active.extent_pages[i];
    8260          20 :                 if (*extent_page == 0) {
    8261             :                         /* Copy extent page from snapshot when clone did not have a matching one */
    8262          18 :                         *extent_page = ctx->snapshot->active.extent_pages[i];
    8263          18 :                         continue;
    8264             :                 }
    8265             : 
    8266             :                 /* Clone and snapshot both contain partially filled matching extent pages.
    8267             :                  * Update the clone extent page in place with cluster map containing the mix of both. */
    8268           2 :                 ctx->next_extent_page = i + 1;
    8269           2 :                 memset(ctx->page, 0, SPDK_BS_PAGE_SIZE);
    8270             : 
    8271           2 :                 blob_write_extent_page(ctx->clone, *extent_page, i * SPDK_EXTENTS_PER_EP, ctx->page,
    8272             :                                        delete_snapshot_update_extent_pages, ctx);
    8273           2 :                 return;
    8274             :         }
    8275          56 :         delete_snapshot_update_extent_pages_cpl(ctx);
    8276             : }
    8277             : 
    8278             : static void
    8279          60 : delete_snapshot_sync_snapshot_xattr_cpl(void *cb_arg, int bserrno)
    8280             : {
    8281          60 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8282             :         uint64_t i;
    8283             : 
    8284             :         /* Temporarily override md_ro flag for clone for MD modification */
    8285          60 :         ctx->clone_md_ro = ctx->clone->md_ro;
    8286          60 :         ctx->clone->md_ro = false;
    8287             : 
    8288          60 :         if (bserrno) {
    8289           4 :                 SPDK_ERRLOG("Failed to sync MD with xattr on blob\n");
    8290           4 :                 ctx->bserrno = bserrno;
    8291           4 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8292           4 :                 return;
    8293             :         }
    8294             : 
    8295             :         /* Copy snapshot map to clone map (only unallocated clusters in clone) */
    8296         596 :         for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
    8297         540 :                 if (ctx->clone->active.clusters[i] == 0) {
    8298         532 :                         ctx->clone->active.clusters[i] = ctx->snapshot->active.clusters[i];
    8299         532 :                         if (ctx->clone->active.clusters[i] != 0) {
    8300         368 :                                 ctx->clone->active.num_allocated_clusters++;
    8301             :                         }
    8302             :                 }
    8303             :         }
    8304          56 :         ctx->next_extent_page = 0;
    8305          56 :         delete_snapshot_update_extent_pages(ctx, 0);
    8306             : }
    8307             : 
    8308             : static void
    8309           8 : delete_snapshot_esnap_channels_destroyed_cb(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8310             : {
    8311           8 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8312             : 
    8313           8 :         if (bserrno != 0) {
    8314           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to destroy esnap channels: %d\n",
    8315             :                             blob->id, bserrno);
    8316             :                 /* That error should not stop us from syncing metadata. */
    8317             :         }
    8318             : 
    8319           8 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
    8320           8 : }
    8321             : 
    8322             : static void
    8323          60 : delete_snapshot_freeze_io_cb(void *cb_arg, int bserrno)
    8324             : {
    8325          60 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8326             : 
    8327          60 :         if (bserrno) {
    8328           0 :                 SPDK_ERRLOG("Failed to freeze I/O on clone\n");
    8329           0 :                 ctx->bserrno = bserrno;
    8330           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8331           0 :                 return;
    8332             :         }
    8333             : 
    8334             :         /* Temporarily override md_ro flag for snapshot for MD modification */
    8335          60 :         ctx->snapshot_md_ro = ctx->snapshot->md_ro;
    8336          60 :         ctx->snapshot->md_ro = false;
    8337             : 
    8338             :         /* Mark blob as pending for removal for power failure safety, use clone id for recovery */
    8339          60 :         ctx->bserrno = blob_set_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, &ctx->clone->id,
    8340             :                                       sizeof(spdk_blob_id), true);
    8341          60 :         if (ctx->bserrno != 0) {
    8342           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8343           0 :                 return;
    8344             :         }
    8345             : 
    8346          60 :         if (blob_is_esnap_clone(ctx->snapshot)) {
    8347           8 :                 blob_esnap_destroy_bs_dev_channels(ctx->snapshot, false,
    8348             :                                                    delete_snapshot_esnap_channels_destroyed_cb,
    8349             :                                                    ctx);
    8350           8 :                 return;
    8351             :         }
    8352             : 
    8353          52 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
    8354             : }
    8355             : 
    8356             : static void
    8357          70 : delete_snapshot_open_clone_cb(void *cb_arg, struct spdk_blob *clone, int bserrno)
    8358             : {
    8359          70 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8360             : 
    8361          70 :         if (bserrno) {
    8362          10 :                 SPDK_ERRLOG("Failed to open clone\n");
    8363          10 :                 ctx->bserrno = bserrno;
    8364          10 :                 delete_snapshot_cleanup_snapshot(ctx, 0);
    8365          10 :                 return;
    8366             :         }
    8367             : 
    8368          60 :         ctx->clone = clone;
    8369             : 
    8370          60 :         if (clone->locked_operation_in_progress) {
    8371           0 :                 SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress on its clone\n");
    8372           0 :                 ctx->bserrno = -EBUSY;
    8373           0 :                 spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
    8374           0 :                 return;
    8375             :         }
    8376             : 
    8377          60 :         clone->locked_operation_in_progress = true;
    8378             : 
    8379          60 :         blob_freeze_io(clone, delete_snapshot_freeze_io_cb, ctx);
    8380             : }
    8381             : 
    8382             : static void
    8383          70 : update_clone_on_snapshot_deletion(struct spdk_blob *snapshot, struct delete_snapshot_ctx *ctx)
    8384             : {
    8385          70 :         struct spdk_blob_list *snapshot_entry = NULL;
    8386          70 :         struct spdk_blob_list *clone_entry = NULL;
    8387          70 :         struct spdk_blob_list *snapshot_clone_entry = NULL;
    8388             : 
    8389             :         /* Get snapshot entry for the snapshot we want to remove */
    8390          70 :         snapshot_entry = bs_get_snapshot_entry(snapshot->bs, snapshot->id);
    8391             : 
    8392          70 :         assert(snapshot_entry != NULL);
    8393             : 
    8394             :         /* Get clone of the snapshot (at this point there can be only one clone) */
    8395          70 :         clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8396          70 :         assert(snapshot_entry->clone_count == 1);
    8397          70 :         assert(clone_entry != NULL);
    8398             : 
    8399             :         /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
    8400             :          * snapshot that we are removing */
    8401          70 :         blob_get_snapshot_and_clone_entries(snapshot, &ctx->parent_snapshot_entry,
    8402             :                                             &snapshot_clone_entry);
    8403             : 
    8404          70 :         spdk_bs_open_blob(snapshot->bs, clone_entry->id, delete_snapshot_open_clone_cb, ctx);
    8405          70 : }
    8406             : 
    8407             : static void
    8408        1554 : bs_delete_blob_finish(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8409             : {
    8410        1554 :         spdk_bs_sequence_t *seq = cb_arg;
    8411        1554 :         struct spdk_blob_list *snapshot_entry = NULL;
    8412             :         uint32_t page_num;
    8413             : 
    8414        1554 :         if (bserrno) {
    8415          62 :                 SPDK_ERRLOG("Failed to remove blob\n");
    8416          62 :                 bs_sequence_finish(seq, bserrno);
    8417          62 :                 return;
    8418             :         }
    8419             : 
    8420             :         /* Remove snapshot from the list */
    8421        1492 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    8422        1492 :         if (snapshot_entry != NULL) {
    8423         144 :                 TAILQ_REMOVE(&blob->bs->snapshots, snapshot_entry, link);
    8424         144 :                 free(snapshot_entry);
    8425             :         }
    8426             : 
    8427        1492 :         page_num = bs_blobid_to_page(blob->id);
    8428        1492 :         spdk_bit_array_clear(blob->bs->used_blobids, page_num);
    8429        1492 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    8430        1492 :         blob->active.num_pages = 0;
    8431        1492 :         blob_resize(blob, 0);
    8432             : 
    8433        1492 :         blob_persist(seq, blob, bs_delete_persist_cpl, blob);
    8434             : }
    8435             : 
    8436             : static int
    8437        1554 : bs_is_blob_deletable(struct spdk_blob *blob, bool *update_clone)
    8438             : {
    8439        1554 :         struct spdk_blob_list *snapshot_entry = NULL;
    8440        1554 :         struct spdk_blob_list *clone_entry = NULL;
    8441        1554 :         struct spdk_blob *clone = NULL;
    8442        1554 :         bool has_one_clone = false;
    8443             : 
    8444             :         /* Check if this is a snapshot with clones */
    8445        1554 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    8446        1554 :         if (snapshot_entry != NULL) {
    8447         194 :                 if (snapshot_entry->clone_count > 1) {
    8448          24 :                         SPDK_ERRLOG("Cannot remove snapshot with more than one clone\n");
    8449          24 :                         return -EBUSY;
    8450         170 :                 } else if (snapshot_entry->clone_count == 1) {
    8451          70 :                         has_one_clone = true;
    8452             :                 }
    8453             :         }
    8454             : 
    8455             :         /* Check if someone has this blob open (besides this delete context):
    8456             :          * - open_ref = 1 - only this context opened blob, so it is ok to remove it
    8457             :          * - open_ref <= 2 && has_one_clone = true - clone is holding snapshot
    8458             :          *      and that is ok, because we will update it accordingly */
    8459        1530 :         if (blob->open_ref <= 2 && has_one_clone) {
    8460          70 :                 clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8461          70 :                 assert(clone_entry != NULL);
    8462          70 :                 clone = blob_lookup(blob->bs, clone_entry->id);
    8463             : 
    8464          70 :                 if (blob->open_ref == 2 && clone == NULL) {
    8465             :                         /* Clone is closed and someone else opened this blob */
    8466           0 :                         SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
    8467           0 :                         return -EBUSY;
    8468             :                 }
    8469             : 
    8470          70 :                 *update_clone = true;
    8471          70 :                 return 0;
    8472             :         }
    8473             : 
    8474        1460 :         if (blob->open_ref > 1) {
    8475          16 :                 SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
    8476          16 :                 return -EBUSY;
    8477             :         }
    8478             : 
    8479        1444 :         assert(has_one_clone == false);
    8480        1444 :         *update_clone = false;
    8481        1444 :         return 0;
    8482             : }
    8483             : 
    8484             : static void
    8485           0 : bs_delete_enomem_close_cpl(void *cb_arg, int bserrno)
    8486             : {
    8487           0 :         spdk_bs_sequence_t *seq = cb_arg;
    8488             : 
    8489           0 :         bs_sequence_finish(seq, -ENOMEM);
    8490           0 : }
    8491             : 
    8492             : static void
    8493        1564 : bs_delete_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8494             : {
    8495        1564 :         spdk_bs_sequence_t *seq = cb_arg;
    8496             :         struct delete_snapshot_ctx *ctx;
    8497        1564 :         bool update_clone = false;
    8498             : 
    8499        1564 :         if (bserrno != 0) {
    8500          10 :                 bs_sequence_finish(seq, bserrno);
    8501          10 :                 return;
    8502             :         }
    8503             : 
    8504        1554 :         blob_verify_md_op(blob);
    8505             : 
    8506        1554 :         ctx = calloc(1, sizeof(*ctx));
    8507        1554 :         if (ctx == NULL) {
    8508           0 :                 spdk_blob_close(blob, bs_delete_enomem_close_cpl, seq);
    8509           0 :                 return;
    8510             :         }
    8511             : 
    8512        1554 :         ctx->snapshot = blob;
    8513        1554 :         ctx->cb_fn = bs_delete_blob_finish;
    8514        1554 :         ctx->cb_arg = seq;
    8515             : 
    8516             :         /* Check if blob can be removed and if it is a snapshot with clone on top of it */
    8517        1554 :         ctx->bserrno = bs_is_blob_deletable(blob, &update_clone);
    8518        1554 :         if (ctx->bserrno) {
    8519          40 :                 spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8520          40 :                 return;
    8521             :         }
    8522             : 
    8523        1514 :         if (blob->locked_operation_in_progress) {
    8524           0 :                 SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress\n");
    8525           0 :                 ctx->bserrno = -EBUSY;
    8526           0 :                 spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8527           0 :                 return;
    8528             :         }
    8529             : 
    8530        1514 :         blob->locked_operation_in_progress = true;
    8531             : 
    8532             :         /*
    8533             :          * Remove the blob from the blob_store list now, to ensure it does not
    8534             :          *  get returned after this point by blob_lookup().
    8535             :          */
    8536        1514 :         spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
    8537        1514 :         RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
    8538             : 
    8539        1514 :         if (update_clone) {
    8540          70 :                 ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    8541          70 :                 if (!ctx->page) {
    8542           0 :                         ctx->bserrno = -ENOMEM;
    8543           0 :                         spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8544           0 :                         return;
    8545             :                 }
    8546             :                 /* This blob is a snapshot with active clone - update clone first */
    8547          70 :                 update_clone_on_snapshot_deletion(blob, ctx);
    8548             :         } else {
    8549             :                 /* This blob does not have any clones - just remove it */
    8550        1444 :                 bs_blob_list_remove(blob);
    8551        1444 :                 bs_delete_blob_finish(seq, blob, 0);
    8552        1444 :                 free(ctx);
    8553             :         }
    8554             : }
    8555             : 
    8556             : void
    8557        1564 : spdk_bs_delete_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8558             :                     spdk_blob_op_complete cb_fn, void *cb_arg)
    8559             : {
    8560        1564 :         struct spdk_bs_cpl      cpl;
    8561             :         spdk_bs_sequence_t      *seq;
    8562             : 
    8563        1564 :         SPDK_DEBUGLOG(blob, "Deleting blob 0x%" PRIx64 "\n", blobid);
    8564             : 
    8565        1564 :         assert(spdk_get_thread() == bs->md_thread);
    8566             : 
    8567        1564 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8568        1564 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8569        1564 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8570             : 
    8571        1564 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    8572        1564 :         if (!seq) {
    8573           0 :                 cb_fn(cb_arg, -ENOMEM);
    8574           0 :                 return;
    8575             :         }
    8576             : 
    8577        1564 :         spdk_bs_open_blob(bs, blobid, bs_delete_open_cpl, seq);
    8578             : }
    8579             : 
    8580             : /* END spdk_bs_delete_blob */
    8581             : 
    8582             : /* START spdk_bs_open_blob */
    8583             : 
    8584             : static void
    8585        3474 : bs_open_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8586             : {
    8587        3474 :         struct spdk_blob *blob = cb_arg;
    8588             :         struct spdk_blob *existing;
    8589             : 
    8590        3474 :         if (bserrno != 0) {
    8591          64 :                 blob_free(blob);
    8592          64 :                 seq->cpl.u.blob_handle.blob = NULL;
    8593          64 :                 bs_sequence_finish(seq, bserrno);
    8594          64 :                 return;
    8595             :         }
    8596             : 
    8597        3410 :         existing = blob_lookup(blob->bs, blob->id);
    8598        3410 :         if (existing) {
    8599           4 :                 blob_free(blob);
    8600           4 :                 existing->open_ref++;
    8601           4 :                 seq->cpl.u.blob_handle.blob = existing;
    8602           4 :                 bs_sequence_finish(seq, 0);
    8603           4 :                 return;
    8604             :         }
    8605             : 
    8606        3406 :         blob->open_ref++;
    8607             : 
    8608        3406 :         spdk_bit_array_set(blob->bs->open_blobids, blob->id);
    8609        3406 :         RB_INSERT(spdk_blob_tree, &blob->bs->open_blobs, blob);
    8610             : 
    8611        3406 :         bs_sequence_finish(seq, bserrno);
    8612             : }
    8613             : 
    8614             : static inline void
    8615           4 : blob_open_opts_copy(const struct spdk_blob_open_opts *src, struct spdk_blob_open_opts *dst)
    8616             : {
    8617             : #define FIELD_OK(field) \
    8618             :         offsetof(struct spdk_blob_open_opts, field) + sizeof(src->field) <= src->opts_size
    8619             : 
    8620             : #define SET_FIELD(field) \
    8621             :         if (FIELD_OK(field)) { \
    8622             :                 dst->field = src->field; \
    8623             :         } \
    8624             : 
    8625           4 :         SET_FIELD(clear_method);
    8626           4 :         SET_FIELD(esnap_ctx);
    8627             : 
    8628           4 :         dst->opts_size = src->opts_size;
    8629             : 
    8630             :         /* You should not remove this statement, but need to update the assert statement
    8631             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    8632             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_open_opts) == 24, "Incorrect size");
    8633             : 
    8634             : #undef FIELD_OK
    8635             : #undef SET_FIELD
    8636           4 : }
    8637             : 
    8638             : static void
    8639        4279 : bs_open_blob(struct spdk_blob_store *bs,
    8640             :              spdk_blob_id blobid,
    8641             :              struct spdk_blob_open_opts *opts,
    8642             :              spdk_blob_op_with_handle_complete cb_fn,
    8643             :              void *cb_arg)
    8644             : {
    8645             :         struct spdk_blob                *blob;
    8646        4279 :         struct spdk_bs_cpl              cpl;
    8647        4279 :         struct spdk_blob_open_opts      opts_local;
    8648             :         spdk_bs_sequence_t              *seq;
    8649             :         uint32_t                        page_num;
    8650             : 
    8651        4279 :         SPDK_DEBUGLOG(blob, "Opening blob 0x%" PRIx64 "\n", blobid);
    8652        4279 :         assert(spdk_get_thread() == bs->md_thread);
    8653             : 
    8654        4279 :         page_num = bs_blobid_to_page(blobid);
    8655        4279 :         if (spdk_bit_array_get(bs->used_blobids, page_num) == false) {
    8656             :                 /* Invalid blobid */
    8657          48 :                 cb_fn(cb_arg, NULL, -ENOENT);
    8658          48 :                 return;
    8659             :         }
    8660             : 
    8661        4231 :         blob = blob_lookup(bs, blobid);
    8662        4231 :         if (blob) {
    8663         757 :                 blob->open_ref++;
    8664         757 :                 cb_fn(cb_arg, blob, 0);
    8665         757 :                 return;
    8666             :         }
    8667             : 
    8668        3474 :         blob = blob_alloc(bs, blobid);
    8669        3474 :         if (!blob) {
    8670           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    8671           0 :                 return;
    8672             :         }
    8673             : 
    8674        3474 :         spdk_blob_open_opts_init(&opts_local, sizeof(opts_local));
    8675        3474 :         if (opts) {
    8676           4 :                 blob_open_opts_copy(opts, &opts_local);
    8677             :         }
    8678             : 
    8679        3474 :         blob->clear_method = opts_local.clear_method;
    8680             : 
    8681        3474 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_HANDLE;
    8682        3474 :         cpl.u.blob_handle.cb_fn = cb_fn;
    8683        3474 :         cpl.u.blob_handle.cb_arg = cb_arg;
    8684        3474 :         cpl.u.blob_handle.blob = blob;
    8685        3474 :         cpl.u.blob_handle.esnap_ctx = opts_local.esnap_ctx;
    8686             : 
    8687        3474 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    8688        3474 :         if (!seq) {
    8689           0 :                 blob_free(blob);
    8690           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    8691           0 :                 return;
    8692             :         }
    8693             : 
    8694        3474 :         blob_load(seq, blob, bs_open_blob_cpl, blob);
    8695             : }
    8696             : 
    8697             : void
    8698        4275 : spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8699             :                   spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    8700             : {
    8701        4275 :         bs_open_blob(bs, blobid, NULL, cb_fn, cb_arg);
    8702        4275 : }
    8703             : 
    8704             : void
    8705           4 : spdk_bs_open_blob_ext(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8706             :                       struct spdk_blob_open_opts *opts, spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    8707             : {
    8708           4 :         bs_open_blob(bs, blobid, opts, cb_fn, cb_arg);
    8709           4 : }
    8710             : 
    8711             : /* END spdk_bs_open_blob */
    8712             : 
    8713             : /* START spdk_blob_set_read_only */
    8714             : int
    8715         236 : spdk_blob_set_read_only(struct spdk_blob *blob)
    8716             : {
    8717         236 :         blob_verify_md_op(blob);
    8718             : 
    8719         236 :         blob->data_ro_flags |= SPDK_BLOB_READ_ONLY;
    8720             : 
    8721         236 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    8722         236 :         return 0;
    8723             : }
    8724             : /* END spdk_blob_set_read_only */
    8725             : 
    8726             : /* START spdk_blob_sync_md */
    8727             : 
    8728             : static void
    8729        1607 : blob_sync_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8730             : {
    8731        1607 :         struct spdk_blob *blob = cb_arg;
    8732             : 
    8733        1607 :         if (bserrno == 0 && (blob->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
    8734         404 :                 blob->data_ro = true;
    8735         404 :                 blob->md_ro = true;
    8736             :         }
    8737             : 
    8738        1607 :         bs_sequence_finish(seq, bserrno);
    8739        1607 : }
    8740             : 
    8741             : static void
    8742        1607 : blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    8743             : {
    8744        1607 :         struct spdk_bs_cpl      cpl;
    8745             :         spdk_bs_sequence_t      *seq;
    8746             : 
    8747        1607 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8748        1607 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8749        1607 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8750             : 
    8751        1607 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    8752        1607 :         if (!seq) {
    8753           0 :                 cb_fn(cb_arg, -ENOMEM);
    8754           0 :                 return;
    8755             :         }
    8756             : 
    8757        1607 :         blob_persist(seq, blob, blob_sync_md_cpl, blob);
    8758             : }
    8759             : 
    8760             : void
    8761        1097 : spdk_blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    8762             : {
    8763        1097 :         blob_verify_md_op(blob);
    8764             : 
    8765        1097 :         SPDK_DEBUGLOG(blob, "Syncing blob 0x%" PRIx64 "\n", blob->id);
    8766             : 
    8767        1097 :         if (blob->md_ro) {
    8768           4 :                 assert(blob->state == SPDK_BLOB_STATE_CLEAN);
    8769           4 :                 cb_fn(cb_arg, 0);
    8770           4 :                 return;
    8771             :         }
    8772             : 
    8773        1093 :         blob_sync_md(blob, cb_fn, cb_arg);
    8774             : }
    8775             : 
    8776             : /* END spdk_blob_sync_md */
    8777             : 
    8778             : struct spdk_blob_cluster_op_ctx {
    8779             :         struct spdk_thread      *thread;
    8780             :         struct spdk_blob        *blob;
    8781             :         uint32_t                cluster_num;    /* cluster index in blob */
    8782             :         uint32_t                cluster;        /* cluster on disk */
    8783             :         uint32_t                extent_page;    /* extent page on disk */
    8784             :         struct spdk_blob_md_page *page; /* preallocated extent page */
    8785             :         int                     rc;
    8786             :         spdk_blob_op_complete   cb_fn;
    8787             :         void                    *cb_arg;
    8788             : };
    8789             : 
    8790             : static void
    8791         876 : blob_op_cluster_msg_cpl(void *arg)
    8792             : {
    8793         876 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8794             : 
    8795         876 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    8796         876 :         free(ctx);
    8797         876 : }
    8798             : 
    8799             : static void
    8800         846 : blob_op_cluster_msg_cb(void *arg, int bserrno)
    8801             : {
    8802         846 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8803             : 
    8804         846 :         ctx->rc = bserrno;
    8805         846 :         spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8806         846 : }
    8807             : 
    8808             : static void
    8809          82 : blob_insert_new_ep_cb(void *arg, int bserrno)
    8810             : {
    8811          82 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8812             :         uint32_t *extent_page;
    8813             : 
    8814          82 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    8815          82 :         *extent_page = ctx->extent_page;
    8816          82 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8817          82 :         blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    8818          82 : }
    8819             : 
    8820             : struct spdk_blob_write_extent_page_ctx {
    8821             :         struct spdk_blob_store          *bs;
    8822             : 
    8823             :         uint32_t                        extent;
    8824             :         struct spdk_blob_md_page        *page;
    8825             : };
    8826             : 
    8827             : static void
    8828          26 : blob_free_cluster_msg_cb(void *arg, int bserrno)
    8829             : {
    8830          26 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8831             : 
    8832          26 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8833          26 :         bs_release_cluster(ctx->blob->bs, ctx->cluster);
    8834          26 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8835             : 
    8836          26 :         ctx->rc = bserrno;
    8837          26 :         spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8838          26 : }
    8839             : 
    8840             : static void
    8841          26 : blob_free_cluster_update_ep_cb(void *arg, int bserrno)
    8842             : {
    8843          26 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8844             : 
    8845          26 :         if (bserrno != 0 || ctx->blob->bs->clean == 0) {
    8846          26 :                 blob_free_cluster_msg_cb(ctx, bserrno);
    8847          26 :                 return;
    8848             :         }
    8849             : 
    8850           0 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8851           0 :         blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
    8852             : }
    8853             : 
    8854             : static void
    8855           0 : blob_free_cluster_free_ep_cb(void *arg, int bserrno)
    8856             : {
    8857           0 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8858             : 
    8859           0 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8860           0 :         assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8861           0 :         bs_release_md_page(ctx->blob->bs, ctx->extent_page);
    8862           0 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8863           0 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8864           0 :         blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
    8865           0 : }
    8866             : 
    8867             : static void
    8868         434 : blob_persist_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8869             : {
    8870         434 :         struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
    8871             : 
    8872         434 :         free(ctx);
    8873         434 :         bs_sequence_finish(seq, bserrno);
    8874         434 : }
    8875             : 
    8876             : static void
    8877         434 : blob_write_extent_page_ready(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8878             : {
    8879         434 :         struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
    8880             : 
    8881         434 :         if (bserrno != 0) {
    8882           0 :                 blob_persist_extent_page_cpl(seq, ctx, bserrno);
    8883           0 :                 return;
    8884             :         }
    8885         434 :         bs_sequence_write_dev(seq, ctx->page, bs_md_page_to_lba(ctx->bs, ctx->extent),
    8886         434 :                               bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    8887             :                               blob_persist_extent_page_cpl, ctx);
    8888             : }
    8889             : 
    8890             : static void
    8891         434 : blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
    8892             :                        struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
    8893             : {
    8894             :         struct spdk_blob_write_extent_page_ctx  *ctx;
    8895             :         spdk_bs_sequence_t                      *seq;
    8896         434 :         struct spdk_bs_cpl                      cpl;
    8897             : 
    8898         434 :         ctx = calloc(1, sizeof(*ctx));
    8899         434 :         if (!ctx) {
    8900           0 :                 cb_fn(cb_arg, -ENOMEM);
    8901           0 :                 return;
    8902             :         }
    8903         434 :         ctx->bs = blob->bs;
    8904         434 :         ctx->extent = extent;
    8905         434 :         ctx->page = page;
    8906             : 
    8907         434 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8908         434 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8909         434 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8910             : 
    8911         434 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    8912         434 :         if (!seq) {
    8913           0 :                 free(ctx);
    8914           0 :                 cb_fn(cb_arg, -ENOMEM);
    8915           0 :                 return;
    8916             :         }
    8917             : 
    8918         434 :         assert(page);
    8919         434 :         page->next = SPDK_INVALID_MD_PAGE;
    8920         434 :         page->id = blob->id;
    8921         434 :         page->sequence_num = 0;
    8922             : 
    8923         434 :         blob_serialize_extent_page(blob, cluster_num, page);
    8924             : 
    8925         434 :         page->crc = blob_md_page_calc_crc(page);
    8926             : 
    8927         434 :         assert(spdk_bit_array_get(blob->bs->used_md_pages, extent) == true);
    8928             : 
    8929         434 :         bs_mark_dirty(seq, blob->bs, blob_write_extent_page_ready, ctx);
    8930             : }
    8931             : 
    8932             : static void
    8933         816 : blob_insert_cluster_msg(void *arg)
    8934             : {
    8935         816 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8936             :         uint32_t *extent_page;
    8937             : 
    8938         816 :         ctx->rc = blob_insert_cluster(ctx->blob, ctx->cluster_num, ctx->cluster);
    8939         816 :         if (ctx->rc != 0) {
    8940           4 :                 spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8941           4 :                 return;
    8942             :         }
    8943             : 
    8944         812 :         if (ctx->blob->use_extent_table == false) {
    8945             :                 /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
    8946         406 :                 ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8947         406 :                 blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    8948         406 :                 return;
    8949             :         }
    8950             : 
    8951         406 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    8952         406 :         if (*extent_page == 0) {
    8953             :                 /* Extent page requires allocation.
    8954             :                  * It was already claimed in the used_md_pages map and placed in ctx. */
    8955          82 :                 assert(ctx->extent_page != 0);
    8956          82 :                 assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8957          82 :                 blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
    8958             :                                        blob_insert_new_ep_cb, ctx);
    8959             :         } else {
    8960             :                 /* It is possible for original thread to allocate extent page for
    8961             :                  * different cluster in the same extent page. In such case proceed with
    8962             :                  * updating the existing extent page, but release the additional one. */
    8963         324 :                 if (ctx->extent_page != 0) {
    8964           0 :                         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8965           0 :                         assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8966           0 :                         bs_release_md_page(ctx->blob->bs, ctx->extent_page);
    8967           0 :                         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8968           0 :                         ctx->extent_page = 0;
    8969             :                 }
    8970             :                 /* Extent page already allocated.
    8971             :                  * Every cluster allocation, requires just an update of single extent page. */
    8972         324 :                 blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
    8973             :                                        blob_op_cluster_msg_cb, ctx);
    8974             :         }
    8975             : }
    8976             : 
    8977             : static void
    8978         816 : blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
    8979             :                                  uint64_t cluster, uint32_t extent_page, struct spdk_blob_md_page *page,
    8980             :                                  spdk_blob_op_complete cb_fn, void *cb_arg)
    8981             : {
    8982             :         struct spdk_blob_cluster_op_ctx *ctx;
    8983             : 
    8984         816 :         ctx = calloc(1, sizeof(*ctx));
    8985         816 :         if (ctx == NULL) {
    8986           0 :                 cb_fn(cb_arg, -ENOMEM);
    8987           0 :                 return;
    8988             :         }
    8989             : 
    8990         816 :         ctx->thread = spdk_get_thread();
    8991         816 :         ctx->blob = blob;
    8992         816 :         ctx->cluster_num = cluster_num;
    8993         816 :         ctx->cluster = cluster;
    8994         816 :         ctx->extent_page = extent_page;
    8995         816 :         ctx->page = page;
    8996         816 :         ctx->cb_fn = cb_fn;
    8997         816 :         ctx->cb_arg = cb_arg;
    8998             : 
    8999         816 :         spdk_thread_send_msg(blob->bs->md_thread, blob_insert_cluster_msg, ctx);
    9000             : }
    9001             : 
    9002             : static void
    9003          60 : blob_free_cluster_msg(void *arg)
    9004             : {
    9005          60 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    9006             :         uint32_t *extent_page;
    9007             :         uint32_t start_cluster_idx;
    9008          60 :         bool free_extent_page = true;
    9009             :         size_t i;
    9010             : 
    9011          60 :         ctx->cluster = bs_lba_to_cluster(ctx->blob->bs, ctx->blob->active.clusters[ctx->cluster_num]);
    9012             : 
    9013             :         /* There were concurrent unmaps to the same cluster, only release the cluster on the first one */
    9014          60 :         if (ctx->cluster == 0) {
    9015           8 :                 blob_op_cluster_msg_cb(ctx, 0);
    9016           8 :                 return;
    9017             :         }
    9018             : 
    9019          52 :         ctx->blob->active.clusters[ctx->cluster_num] = 0;
    9020          52 :         if (ctx->cluster != 0) {
    9021          52 :                 ctx->blob->active.num_allocated_clusters--;
    9022             :         }
    9023             : 
    9024          52 :         if (ctx->blob->use_extent_table == false) {
    9025             :                 /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
    9026          26 :                 spdk_spin_lock(&ctx->blob->bs->used_lock);
    9027          26 :                 bs_release_cluster(ctx->blob->bs, ctx->cluster);
    9028          26 :                 spdk_spin_unlock(&ctx->blob->bs->used_lock);
    9029          26 :                 ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    9030          26 :                 blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    9031          26 :                 return;
    9032             :         }
    9033             : 
    9034          26 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    9035             : 
    9036             :         /* There shouldn't be parallel release operations on same cluster */
    9037          26 :         assert(*extent_page == ctx->extent_page);
    9038             : 
    9039          26 :         start_cluster_idx = (ctx->cluster_num / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
    9040          48 :         for (i = 0; i < SPDK_EXTENTS_PER_EP; ++i) {
    9041          48 :                 if (ctx->blob->active.clusters[start_cluster_idx + i] != 0) {
    9042          26 :                         free_extent_page = false;
    9043          26 :                         break;
    9044             :                 }
    9045             :         }
    9046             : 
    9047          26 :         if (free_extent_page) {
    9048           0 :                 assert(ctx->extent_page != 0);
    9049           0 :                 assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    9050           0 :                 ctx->blob->active.extent_pages[bs_cluster_to_extent_table_id(ctx->cluster_num)] = 0;
    9051           0 :                 blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
    9052             :                                        blob_free_cluster_free_ep_cb, ctx);
    9053             :         } else {
    9054          26 :                 blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
    9055             :                                        blob_free_cluster_update_ep_cb, ctx);
    9056             :         }
    9057             : }
    9058             : 
    9059             : 
    9060             : static void
    9061          60 : blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num, uint32_t extent_page,
    9062             :                                struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
    9063             : {
    9064             :         struct spdk_blob_cluster_op_ctx *ctx;
    9065             : 
    9066          60 :         ctx = calloc(1, sizeof(*ctx));
    9067          60 :         if (ctx == NULL) {
    9068           0 :                 cb_fn(cb_arg, -ENOMEM);
    9069           0 :                 return;
    9070             :         }
    9071             : 
    9072          60 :         ctx->thread = spdk_get_thread();
    9073          60 :         ctx->blob = blob;
    9074          60 :         ctx->cluster_num = cluster_num;
    9075          60 :         ctx->extent_page = extent_page;
    9076          60 :         ctx->page = page;
    9077          60 :         ctx->cb_fn = cb_fn;
    9078          60 :         ctx->cb_arg = cb_arg;
    9079             : 
    9080          60 :         spdk_thread_send_msg(blob->bs->md_thread, blob_free_cluster_msg, ctx);
    9081             : }
    9082             : 
    9083             : /* START spdk_blob_close */
    9084             : 
    9085             : static void
    9086        4167 : blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9087             : {
    9088        4167 :         struct spdk_blob *blob = cb_arg;
    9089             : 
    9090        4167 :         if (bserrno == 0) {
    9091        4167 :                 blob->open_ref--;
    9092        4167 :                 if (blob->open_ref == 0) {
    9093             :                         /*
    9094             :                          * Blobs with active.num_pages == 0 are deleted blobs.
    9095             :                          *  these blobs are removed from the blob_store list
    9096             :                          *  when the deletion process starts - so don't try to
    9097             :                          *  remove them again.
    9098             :                          */
    9099        3406 :                         if (blob->active.num_pages > 0) {
    9100        1914 :                                 spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
    9101        1914 :                                 RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
    9102             :                         }
    9103        3406 :                         blob_free(blob);
    9104             :                 }
    9105             :         }
    9106             : 
    9107        4167 :         bs_sequence_finish(seq, bserrno);
    9108        4167 : }
    9109             : 
    9110             : static void
    9111         120 : blob_close_esnap_done(void *cb_arg, struct spdk_blob *blob, int bserrno)
    9112             : {
    9113         120 :         spdk_bs_sequence_t      *seq = cb_arg;
    9114             : 
    9115         120 :         if (bserrno != 0) {
    9116           0 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": close failed with error %d\n",
    9117             :                               blob->id, bserrno);
    9118           0 :                 bs_sequence_finish(seq, bserrno);
    9119           0 :                 return;
    9120             :         }
    9121             : 
    9122         120 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": closed, syncing metadata on thread %s\n",
    9123             :                       blob->id, spdk_thread_get_name(spdk_get_thread()));
    9124             : 
    9125             :         /* Sync metadata */
    9126         120 :         blob_persist(seq, blob, blob_close_cpl, blob);
    9127             : }
    9128             : 
    9129             : void
    9130        4167 : spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    9131             : {
    9132        4167 :         struct spdk_bs_cpl      cpl;
    9133             :         spdk_bs_sequence_t      *seq;
    9134             : 
    9135        4167 :         blob_verify_md_op(blob);
    9136             : 
    9137        4167 :         SPDK_DEBUGLOG(blob, "Closing blob 0x%" PRIx64 "\n", blob->id);
    9138             : 
    9139        4167 :         if (blob->open_ref == 0) {
    9140           0 :                 cb_fn(cb_arg, -EBADF);
    9141           0 :                 return;
    9142             :         }
    9143             : 
    9144        4167 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    9145        4167 :         cpl.u.blob_basic.cb_fn = cb_fn;
    9146        4167 :         cpl.u.blob_basic.cb_arg = cb_arg;
    9147             : 
    9148        4167 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    9149        4167 :         if (!seq) {
    9150           0 :                 cb_fn(cb_arg, -ENOMEM);
    9151           0 :                 return;
    9152             :         }
    9153             : 
    9154        4167 :         if (blob->open_ref == 1 && blob_is_esnap_clone(blob)) {
    9155         120 :                 blob_esnap_destroy_bs_dev_channels(blob, false, blob_close_esnap_done, seq);
    9156         120 :                 return;
    9157             :         }
    9158             : 
    9159             :         /* Sync metadata */
    9160        4047 :         blob_persist(seq, blob, blob_close_cpl, blob);
    9161             : }
    9162             : 
    9163             : /* END spdk_blob_close */
    9164             : 
    9165         233 : struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs)
    9166             : {
    9167         233 :         return spdk_get_io_channel(bs);
    9168             : }
    9169             : 
    9170             : void
    9171         233 : spdk_bs_free_io_channel(struct spdk_io_channel *channel)
    9172             : {
    9173         233 :         blob_esnap_destroy_bs_channel(spdk_io_channel_get_ctx(channel));
    9174         233 :         spdk_put_io_channel(channel);
    9175         233 : }
    9176             : 
    9177             : void
    9178         108 : spdk_blob_io_unmap(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9179             :                    uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
    9180             : {
    9181         108 :         blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
    9182             :                                SPDK_BLOB_UNMAP);
    9183         108 : }
    9184             : 
    9185             : void
    9186          48 : spdk_blob_io_write_zeroes(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9187             :                           uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
    9188             : {
    9189          48 :         blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
    9190             :                                SPDK_BLOB_WRITE_ZEROES);
    9191          48 : }
    9192             : 
    9193             : void
    9194       20868 : spdk_blob_io_write(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9195             :                    void *payload, uint64_t offset, uint64_t length,
    9196             :                    spdk_blob_op_complete cb_fn, void *cb_arg)
    9197             : {
    9198       20868 :         blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
    9199             :                                SPDK_BLOB_WRITE);
    9200       20868 : }
    9201             : 
    9202             : void
    9203       17500 : spdk_blob_io_read(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9204             :                   void *payload, uint64_t offset, uint64_t length,
    9205             :                   spdk_blob_op_complete cb_fn, void *cb_arg)
    9206             : {
    9207       17500 :         blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
    9208             :                                SPDK_BLOB_READ);
    9209       17500 : }
    9210             : 
    9211             : void
    9212         140 : spdk_blob_io_writev(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9213             :                     struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9214             :                     spdk_blob_op_complete cb_fn, void *cb_arg)
    9215             : {
    9216         140 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false, NULL);
    9217         140 : }
    9218             : 
    9219             : void
    9220         940 : spdk_blob_io_readv(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9221             :                    struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9222             :                    spdk_blob_op_complete cb_fn, void *cb_arg)
    9223             : {
    9224         940 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true, NULL);
    9225         940 : }
    9226             : 
    9227             : void
    9228         208 : spdk_blob_io_writev_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9229             :                         struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9230             :                         spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
    9231             : {
    9232         208 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false,
    9233             :                                    io_opts);
    9234         208 : }
    9235             : 
    9236             : void
    9237        1300 : spdk_blob_io_readv_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9238             :                        struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9239             :                        spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
    9240             : {
    9241        1300 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true,
    9242             :                                    io_opts);
    9243        1300 : }
    9244             : 
    9245             : struct spdk_bs_iter_ctx {
    9246             :         int64_t page_num;
    9247             :         struct spdk_blob_store *bs;
    9248             : 
    9249             :         spdk_blob_op_with_handle_complete cb_fn;
    9250             :         void *cb_arg;
    9251             : };
    9252             : 
    9253             : static void
    9254        1164 : bs_iter_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    9255             : {
    9256        1164 :         struct spdk_bs_iter_ctx *ctx = cb_arg;
    9257        1164 :         struct spdk_blob_store *bs = ctx->bs;
    9258             :         spdk_blob_id id;
    9259             : 
    9260        1164 :         if (bserrno == 0) {
    9261         444 :                 ctx->cb_fn(ctx->cb_arg, _blob, bserrno);
    9262         444 :                 free(ctx);
    9263         444 :                 return;
    9264             :         }
    9265             : 
    9266         720 :         ctx->page_num++;
    9267         720 :         ctx->page_num = spdk_bit_array_find_first_set(bs->used_blobids, ctx->page_num);
    9268         720 :         if (ctx->page_num >= spdk_bit_array_capacity(bs->used_blobids)) {
    9269         268 :                 ctx->cb_fn(ctx->cb_arg, NULL, -ENOENT);
    9270         268 :                 free(ctx);
    9271         268 :                 return;
    9272             :         }
    9273             : 
    9274         452 :         id = bs_page_to_blobid(ctx->page_num);
    9275             : 
    9276         452 :         spdk_bs_open_blob(bs, id, bs_iter_cpl, ctx);
    9277             : }
    9278             : 
    9279             : void
    9280         292 : spdk_bs_iter_first(struct spdk_blob_store *bs,
    9281             :                    spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    9282             : {
    9283             :         struct spdk_bs_iter_ctx *ctx;
    9284             : 
    9285         292 :         ctx = calloc(1, sizeof(*ctx));
    9286         292 :         if (!ctx) {
    9287           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    9288           0 :                 return;
    9289             :         }
    9290             : 
    9291         292 :         ctx->page_num = -1;
    9292         292 :         ctx->bs = bs;
    9293         292 :         ctx->cb_fn = cb_fn;
    9294         292 :         ctx->cb_arg = cb_arg;
    9295             : 
    9296         292 :         bs_iter_cpl(ctx, NULL, -1);
    9297             : }
    9298             : 
    9299             : static void
    9300         420 : bs_iter_close_cpl(void *cb_arg, int bserrno)
    9301             : {
    9302         420 :         struct spdk_bs_iter_ctx *ctx = cb_arg;
    9303             : 
    9304         420 :         bs_iter_cpl(ctx, NULL, -1);
    9305         420 : }
    9306             : 
    9307             : void
    9308         420 : spdk_bs_iter_next(struct spdk_blob_store *bs, struct spdk_blob *blob,
    9309             :                   spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    9310             : {
    9311             :         struct spdk_bs_iter_ctx *ctx;
    9312             : 
    9313         420 :         assert(blob != NULL);
    9314             : 
    9315         420 :         ctx = calloc(1, sizeof(*ctx));
    9316         420 :         if (!ctx) {
    9317           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    9318           0 :                 return;
    9319             :         }
    9320             : 
    9321         420 :         ctx->page_num = bs_blobid_to_page(blob->id);
    9322         420 :         ctx->bs = bs;
    9323         420 :         ctx->cb_fn = cb_fn;
    9324         420 :         ctx->cb_arg = cb_arg;
    9325             : 
    9326             :         /* Close the existing blob */
    9327         420 :         spdk_blob_close(blob, bs_iter_close_cpl, ctx);
    9328             : }
    9329             : 
    9330             : static int
    9331         959 : blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
    9332             :                uint16_t value_len, bool internal)
    9333             : {
    9334             :         struct spdk_xattr_tailq *xattrs;
    9335             :         struct spdk_xattr       *xattr;
    9336             :         size_t                  desc_size;
    9337             :         void                    *tmp;
    9338             : 
    9339         959 :         blob_verify_md_op(blob);
    9340             : 
    9341         959 :         if (blob->md_ro) {
    9342           4 :                 return -EPERM;
    9343             :         }
    9344             : 
    9345         955 :         desc_size = sizeof(struct spdk_blob_md_descriptor_xattr) + strlen(name) + value_len;
    9346         955 :         if (desc_size > SPDK_BS_MAX_DESC_SIZE) {
    9347           4 :                 SPDK_DEBUGLOG(blob, "Xattr '%s' of size %zu does not fix into single page %zu\n", name,
    9348             :                               desc_size, SPDK_BS_MAX_DESC_SIZE);
    9349           4 :                 return -ENOMEM;
    9350             :         }
    9351             : 
    9352         951 :         if (internal) {
    9353         740 :                 xattrs = &blob->xattrs_internal;
    9354         740 :                 blob->invalid_flags |= SPDK_BLOB_INTERNAL_XATTR;
    9355             :         } else {
    9356         211 :                 xattrs = &blob->xattrs;
    9357             :         }
    9358             : 
    9359        1182 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9360         340 :                 if (!strcmp(name, xattr->name)) {
    9361         109 :                         tmp = malloc(value_len);
    9362         109 :                         if (!tmp) {
    9363           0 :                                 return -ENOMEM;
    9364             :                         }
    9365             : 
    9366         109 :                         free(xattr->value);
    9367         109 :                         xattr->value_len = value_len;
    9368         109 :                         xattr->value = tmp;
    9369         109 :                         memcpy(xattr->value, value, value_len);
    9370             : 
    9371         109 :                         blob->state = SPDK_BLOB_STATE_DIRTY;
    9372             : 
    9373         109 :                         return 0;
    9374             :                 }
    9375             :         }
    9376             : 
    9377         842 :         xattr = calloc(1, sizeof(*xattr));
    9378         842 :         if (!xattr) {
    9379           0 :                 return -ENOMEM;
    9380             :         }
    9381             : 
    9382         842 :         xattr->name = strdup(name);
    9383         842 :         if (!xattr->name) {
    9384           0 :                 free(xattr);
    9385           0 :                 return -ENOMEM;
    9386             :         }
    9387             : 
    9388         842 :         xattr->value_len = value_len;
    9389         842 :         xattr->value = malloc(value_len);
    9390         842 :         if (!xattr->value) {
    9391           0 :                 free(xattr->name);
    9392           0 :                 free(xattr);
    9393           0 :                 return -ENOMEM;
    9394             :         }
    9395         842 :         memcpy(xattr->value, value, value_len);
    9396         842 :         TAILQ_INSERT_TAIL(xattrs, xattr, link);
    9397             : 
    9398         842 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    9399             : 
    9400         842 :         return 0;
    9401             : }
    9402             : 
    9403             : int
    9404         183 : spdk_blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
    9405             :                     uint16_t value_len)
    9406             : {
    9407         183 :         return blob_set_xattr(blob, name, value, value_len, false);
    9408             : }
    9409             : 
    9410             : static int
    9411         416 : blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal)
    9412             : {
    9413             :         struct spdk_xattr_tailq *xattrs;
    9414             :         struct spdk_xattr       *xattr;
    9415             : 
    9416         416 :         blob_verify_md_op(blob);
    9417             : 
    9418         416 :         if (blob->md_ro) {
    9419           4 :                 return -EPERM;
    9420             :         }
    9421         412 :         xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
    9422             : 
    9423         424 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9424         372 :                 if (!strcmp(name, xattr->name)) {
    9425         360 :                         TAILQ_REMOVE(xattrs, xattr, link);
    9426         360 :                         free(xattr->value);
    9427         360 :                         free(xattr->name);
    9428         360 :                         free(xattr);
    9429             : 
    9430         360 :                         if (internal && TAILQ_EMPTY(&blob->xattrs_internal)) {
    9431         244 :                                 blob->invalid_flags &= ~SPDK_BLOB_INTERNAL_XATTR;
    9432             :                         }
    9433         360 :                         blob->state = SPDK_BLOB_STATE_DIRTY;
    9434             : 
    9435         360 :                         return 0;
    9436             :                 }
    9437             :         }
    9438             : 
    9439          52 :         return -ENOENT;
    9440             : }
    9441             : 
    9442             : int
    9443          36 : spdk_blob_remove_xattr(struct spdk_blob *blob, const char *name)
    9444             : {
    9445          36 :         return blob_remove_xattr(blob, name, false);
    9446             : }
    9447             : 
    9448             : static int
    9449        2292 : blob_get_xattr_value(struct spdk_blob *blob, const char *name,
    9450             :                      const void **value, size_t *value_len, bool internal)
    9451             : {
    9452             :         struct spdk_xattr       *xattr;
    9453             :         struct spdk_xattr_tailq *xattrs;
    9454             : 
    9455        2292 :         xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
    9456             : 
    9457        2922 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9458        1396 :                 if (!strcmp(name, xattr->name)) {
    9459         766 :                         *value = xattr->value;
    9460         766 :                         *value_len = xattr->value_len;
    9461         766 :                         return 0;
    9462             :                 }
    9463             :         }
    9464        1526 :         return -ENOENT;
    9465             : }
    9466             : 
    9467             : int
    9468         154 : spdk_blob_get_xattr_value(struct spdk_blob *blob, const char *name,
    9469             :                           const void **value, size_t *value_len)
    9470             : {
    9471         154 :         blob_verify_md_op(blob);
    9472             : 
    9473         154 :         return blob_get_xattr_value(blob, name, value, value_len, false);
    9474             : }
    9475             : 
    9476             : struct spdk_xattr_names {
    9477             :         uint32_t        count;
    9478             :         const char      *names[0];
    9479             : };
    9480             : 
    9481             : static int
    9482           4 : blob_get_xattr_names(struct spdk_xattr_tailq *xattrs, struct spdk_xattr_names **names)
    9483             : {
    9484             :         struct spdk_xattr       *xattr;
    9485           4 :         int                     count = 0;
    9486             : 
    9487          12 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9488           8 :                 count++;
    9489             :         }
    9490             : 
    9491           4 :         *names = calloc(1, sizeof(struct spdk_xattr_names) + count * sizeof(char *));
    9492           4 :         if (*names == NULL) {
    9493           0 :                 return -ENOMEM;
    9494             :         }
    9495             : 
    9496          12 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9497           8 :                 (*names)->names[(*names)->count++] = xattr->name;
    9498             :         }
    9499             : 
    9500           4 :         return 0;
    9501             : }
    9502             : 
    9503             : int
    9504           4 : spdk_blob_get_xattr_names(struct spdk_blob *blob, struct spdk_xattr_names **names)
    9505             : {
    9506           4 :         blob_verify_md_op(blob);
    9507             : 
    9508           4 :         return blob_get_xattr_names(&blob->xattrs, names);
    9509             : }
    9510             : 
    9511             : uint32_t
    9512           4 : spdk_xattr_names_get_count(struct spdk_xattr_names *names)
    9513             : {
    9514           4 :         assert(names != NULL);
    9515             : 
    9516           4 :         return names->count;
    9517             : }
    9518             : 
    9519             : const char *
    9520           8 : spdk_xattr_names_get_name(struct spdk_xattr_names *names, uint32_t index)
    9521             : {
    9522           8 :         if (index >= names->count) {
    9523           0 :                 return NULL;
    9524             :         }
    9525             : 
    9526           8 :         return names->names[index];
    9527             : }
    9528             : 
    9529             : void
    9530           4 : spdk_xattr_names_free(struct spdk_xattr_names *names)
    9531             : {
    9532           4 :         free(names);
    9533           4 : }
    9534             : 
    9535             : struct spdk_bs_type
    9536           2 : spdk_bs_get_bstype(struct spdk_blob_store *bs)
    9537             : {
    9538           2 :         return bs->bstype;
    9539             : }
    9540             : 
    9541             : void
    9542           0 : spdk_bs_set_bstype(struct spdk_blob_store *bs, struct spdk_bs_type bstype)
    9543             : {
    9544           0 :         memcpy(&bs->bstype, &bstype, sizeof(bstype));
    9545           0 : }
    9546             : 
    9547             : bool
    9548          48 : spdk_blob_is_read_only(struct spdk_blob *blob)
    9549             : {
    9550          48 :         assert(blob != NULL);
    9551          48 :         return (blob->data_ro || blob->md_ro);
    9552             : }
    9553             : 
    9554             : bool
    9555          52 : spdk_blob_is_snapshot(struct spdk_blob *blob)
    9556             : {
    9557             :         struct spdk_blob_list *snapshot_entry;
    9558             : 
    9559          52 :         assert(blob != NULL);
    9560             : 
    9561          52 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    9562          52 :         if (snapshot_entry == NULL) {
    9563          28 :                 return false;
    9564             :         }
    9565             : 
    9566          24 :         return true;
    9567             : }
    9568             : 
    9569             : bool
    9570          68 : spdk_blob_is_clone(struct spdk_blob *blob)
    9571             : {
    9572          68 :         assert(blob != NULL);
    9573             : 
    9574          68 :         if (blob->parent_id != SPDK_BLOBID_INVALID &&
    9575          52 :             blob->parent_id != SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    9576          40 :                 assert(spdk_blob_is_thin_provisioned(blob));
    9577          40 :                 return true;
    9578             :         }
    9579             : 
    9580          28 :         return false;
    9581             : }
    9582             : 
    9583             : bool
    9584       36536 : spdk_blob_is_thin_provisioned(struct spdk_blob *blob)
    9585             : {
    9586       36536 :         assert(blob != NULL);
    9587       36536 :         return !!(blob->invalid_flags & SPDK_BLOB_THIN_PROV);
    9588             : }
    9589             : 
    9590             : bool
    9591       40888 : spdk_blob_is_esnap_clone(const struct spdk_blob *blob)
    9592             : {
    9593       40888 :         return blob_is_esnap_clone(blob);
    9594             : }
    9595             : 
    9596             : static void
    9597        3434 : blob_update_clear_method(struct spdk_blob *blob)
    9598             : {
    9599             :         enum blob_clear_method stored_cm;
    9600             : 
    9601        3434 :         assert(blob != NULL);
    9602             : 
    9603             :         /* If BLOB_CLEAR_WITH_DEFAULT was passed in, use the setting stored
    9604             :          * in metadata previously.  If something other than the default was
    9605             :          * specified, ignore stored value and used what was passed in.
    9606             :          */
    9607        3434 :         stored_cm = ((blob->md_ro_flags & SPDK_BLOB_CLEAR_METHOD) >> SPDK_BLOB_CLEAR_METHOD_SHIFT);
    9608             : 
    9609        3434 :         if (blob->clear_method == BLOB_CLEAR_WITH_DEFAULT) {
    9610        3434 :                 blob->clear_method = stored_cm;
    9611           0 :         } else if (blob->clear_method != stored_cm) {
    9612           0 :                 SPDK_WARNLOG("Using passed in clear method 0x%x instead of stored value of 0x%x\n",
    9613             :                              blob->clear_method, stored_cm);
    9614             :         }
    9615        3434 : }
    9616             : 
    9617             : spdk_blob_id
    9618         258 : spdk_blob_get_parent_snapshot(struct spdk_blob_store *bs, spdk_blob_id blob_id)
    9619             : {
    9620         258 :         struct spdk_blob_list *snapshot_entry = NULL;
    9621         258 :         struct spdk_blob_list *clone_entry = NULL;
    9622             : 
    9623         494 :         TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
    9624         732 :                 TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    9625         496 :                         if (clone_entry->id == blob_id) {
    9626         168 :                                 return snapshot_entry->id;
    9627             :                         }
    9628             :                 }
    9629             :         }
    9630             : 
    9631          90 :         return SPDK_BLOBID_INVALID;
    9632             : }
    9633             : 
    9634             : int
    9635         196 : spdk_blob_get_clones(struct spdk_blob_store *bs, spdk_blob_id blobid, spdk_blob_id *ids,
    9636             :                      size_t *count)
    9637             : {
    9638             :         struct spdk_blob_list *snapshot_entry, *clone_entry;
    9639             :         size_t n;
    9640             : 
    9641         196 :         snapshot_entry = bs_get_snapshot_entry(bs, blobid);
    9642         196 :         if (snapshot_entry == NULL) {
    9643          28 :                 *count = 0;
    9644          28 :                 return 0;
    9645             :         }
    9646             : 
    9647         168 :         if (ids == NULL || *count < snapshot_entry->clone_count) {
    9648           8 :                 *count = snapshot_entry->clone_count;
    9649           8 :                 return -ENOMEM;
    9650             :         }
    9651         160 :         *count = snapshot_entry->clone_count;
    9652             : 
    9653         160 :         n = 0;
    9654         340 :         TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    9655         180 :                 ids[n++] = clone_entry->id;
    9656             :         }
    9657             : 
    9658         160 :         return 0;
    9659             : }
    9660             : 
    9661             : static void
    9662           4 : bs_load_grow_continue(struct spdk_bs_load_ctx *ctx)
    9663             : {
    9664             :         int rc;
    9665             : 
    9666           4 :         if (ctx->super->size == 0) {
    9667           0 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9668             :         }
    9669             : 
    9670           4 :         if (ctx->super->io_unit_size == 0) {
    9671           0 :                 ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
    9672             :         }
    9673             : 
    9674             :         /* Parse the super block */
    9675           4 :         ctx->bs->clean = 1;
    9676           4 :         ctx->bs->cluster_sz = ctx->super->cluster_size;
    9677           4 :         ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
    9678           4 :         ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    9679           4 :         if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
    9680           4 :                 ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
    9681             :         }
    9682           4 :         ctx->bs->io_unit_size = ctx->super->io_unit_size;
    9683           4 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    9684           4 :         if (rc < 0) {
    9685           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9686           0 :                 return;
    9687             :         }
    9688           4 :         ctx->bs->md_start = ctx->super->md_start;
    9689           4 :         ctx->bs->md_len = ctx->super->md_len;
    9690           4 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
    9691           4 :         if (rc < 0) {
    9692           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9693           0 :                 return;
    9694             :         }
    9695             : 
    9696           8 :         ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
    9697           4 :                                                ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
    9698           4 :         ctx->bs->super_blob = ctx->super->super_blob;
    9699           4 :         memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
    9700             : 
    9701           4 :         if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0) {
    9702           0 :                 SPDK_ERRLOG("Can not grow an unclean blobstore, please load it normally to clean it.\n");
    9703           0 :                 bs_load_ctx_fail(ctx, -EIO);
    9704           0 :                 return;
    9705             :         } else {
    9706           4 :                 bs_load_read_used_pages(ctx);
    9707             :         }
    9708             : }
    9709             : 
    9710             : static void
    9711           4 : bs_load_grow_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9712             : {
    9713           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9714             : 
    9715           4 :         if (bserrno != 0) {
    9716           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9717           0 :                 return;
    9718             :         }
    9719           4 :         bs_load_grow_continue(ctx);
    9720             : }
    9721             : 
    9722             : static void
    9723           4 : bs_load_grow_used_clusters_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9724             : {
    9725           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9726             : 
    9727           4 :         if (bserrno != 0) {
    9728           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9729           0 :                 return;
    9730             :         }
    9731             : 
    9732           4 :         spdk_free(ctx->mask);
    9733             : 
    9734           4 :         bs_sequence_write_dev(ctx->seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
    9735           4 :                               bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
    9736             :                               bs_load_grow_super_write_cpl, ctx);
    9737             : }
    9738             : 
    9739             : static void
    9740           4 : bs_load_grow_used_clusters_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9741             : {
    9742           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9743             :         uint64_t                lba, lba_count;
    9744             :         uint64_t                dev_size;
    9745             :         uint64_t                total_clusters;
    9746             : 
    9747           4 :         if (bserrno != 0) {
    9748           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9749           0 :                 return;
    9750             :         }
    9751             : 
    9752             :         /* The type must be correct */
    9753           4 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
    9754             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    9755           4 :         assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
    9756             :                                              struct spdk_blob_md_page) * 8));
    9757           4 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9758           4 :         total_clusters = dev_size / ctx->super->cluster_size;
    9759           4 :         ctx->mask->length = total_clusters;
    9760             : 
    9761           4 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    9762           4 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    9763           4 :         bs_sequence_write_dev(ctx->seq, ctx->mask, lba, lba_count,
    9764             :                               bs_load_grow_used_clusters_write_cpl, ctx);
    9765             : }
    9766             : 
    9767             : static void
    9768           4 : bs_load_try_to_grow(struct spdk_bs_load_ctx *ctx)
    9769             : {
    9770             :         uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
    9771             :         uint64_t lba, lba_count, mask_size;
    9772             : 
    9773           4 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9774           4 :         total_clusters = dev_size / ctx->super->cluster_size;
    9775           4 :         used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    9776           4 :                                 spdk_divide_round_up(total_clusters, 8),
    9777             :                                 SPDK_BS_PAGE_SIZE);
    9778           4 :         max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
    9779             :         /* No necessary to grow or no space to grow */
    9780           4 :         if (ctx->super->size >= dev_size || used_cluster_mask_len > max_used_cluster_mask) {
    9781           0 :                 SPDK_DEBUGLOG(blob, "No grow\n");
    9782           0 :                 bs_load_grow_continue(ctx);
    9783           0 :                 return;
    9784             :         }
    9785             : 
    9786           4 :         SPDK_DEBUGLOG(blob, "Resize blobstore\n");
    9787             : 
    9788           4 :         ctx->super->size = dev_size;
    9789           4 :         ctx->super->used_cluster_mask_len = used_cluster_mask_len;
    9790           4 :         ctx->super->crc = blob_md_page_calc_crc(ctx->super);
    9791             : 
    9792           4 :         mask_size = used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    9793           4 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    9794             :                                  SPDK_MALLOC_DMA);
    9795           4 :         if (!ctx->mask) {
    9796           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9797           0 :                 return;
    9798             :         }
    9799           4 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    9800           4 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    9801           4 :         bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
    9802             :                              bs_load_grow_used_clusters_read_cpl, ctx);
    9803             : }
    9804             : 
    9805             : static void
    9806           4 : bs_grow_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9807             : {
    9808           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9809             :         int rc;
    9810             : 
    9811           4 :         rc = bs_super_validate(ctx->super, ctx->bs);
    9812           4 :         if (rc != 0) {
    9813           0 :                 bs_load_ctx_fail(ctx, rc);
    9814           0 :                 return;
    9815             :         }
    9816             : 
    9817           4 :         bs_load_try_to_grow(ctx);
    9818             : }
    9819             : 
    9820             : struct spdk_bs_grow_ctx {
    9821             :         struct spdk_blob_store          *bs;
    9822             :         struct spdk_bs_super_block      *super;
    9823             : 
    9824             :         struct spdk_bit_pool            *new_used_clusters;
    9825             :         struct spdk_bs_md_mask          *new_used_clusters_mask;
    9826             : 
    9827             :         spdk_bs_sequence_t              *seq;
    9828             : };
    9829             : 
    9830             : static void
    9831          32 : bs_grow_live_done(struct spdk_bs_grow_ctx *ctx, int bserrno)
    9832             : {
    9833          32 :         if (bserrno != 0) {
    9834           8 :                 spdk_bit_pool_free(&ctx->new_used_clusters);
    9835             :         }
    9836             : 
    9837          32 :         bs_sequence_finish(ctx->seq, bserrno);
    9838          32 :         free(ctx->new_used_clusters_mask);
    9839          32 :         spdk_free(ctx->super);
    9840          32 :         free(ctx);
    9841          32 : }
    9842             : 
    9843             : static void
    9844           8 : bs_grow_live_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9845             : {
    9846           8 :         struct spdk_bs_grow_ctx *ctx = cb_arg;
    9847           8 :         struct spdk_blob_store *bs = ctx->bs;
    9848             :         uint64_t total_clusters;
    9849             : 
    9850           8 :         if (bserrno != 0) {
    9851           0 :                 bs_grow_live_done(ctx, bserrno);
    9852           0 :                 return;
    9853             :         }
    9854             : 
    9855             :         /*
    9856             :          * Blobstore is not clean until unload, for now only the super block is up to date.
    9857             :          * This is similar to state right after blobstore init, when bs_write_used_md() didn't
    9858             :          * yet execute.
    9859             :          * When cleanly unloaded, the used md pages will be written out.
    9860             :          * In case of unclean shutdown, loading blobstore will go through recovery path correctly
    9861             :          * filling out the used_clusters with new size and writing it out.
    9862             :          */
    9863           8 :         bs->clean = 0;
    9864             : 
    9865             :         /* Reverting the super->size past this point is complex, avoid any error paths
    9866             :          * that require to do so. */
    9867           8 :         spdk_spin_lock(&bs->used_lock);
    9868             : 
    9869           8 :         total_clusters = ctx->super->size / ctx->super->cluster_size;
    9870             : 
    9871           8 :         assert(total_clusters >= spdk_bit_pool_capacity(bs->used_clusters));
    9872           8 :         spdk_bit_pool_store_mask(bs->used_clusters, ctx->new_used_clusters_mask);
    9873             : 
    9874           8 :         assert(total_clusters == spdk_bit_pool_capacity(ctx->new_used_clusters));
    9875           8 :         spdk_bit_pool_load_mask(ctx->new_used_clusters, ctx->new_used_clusters_mask);
    9876             : 
    9877           8 :         spdk_bit_pool_free(&bs->used_clusters);
    9878           8 :         bs->used_clusters = ctx->new_used_clusters;
    9879             : 
    9880           8 :         bs->total_clusters = total_clusters;
    9881          16 :         bs->total_data_clusters = bs->total_clusters - spdk_divide_round_up(
    9882           8 :                                           bs->md_start + bs->md_len, bs->pages_per_cluster);
    9883             : 
    9884           8 :         bs->num_free_clusters = spdk_bit_pool_count_free(bs->used_clusters);
    9885           8 :         assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
    9886           8 :         spdk_spin_unlock(&bs->used_lock);
    9887             : 
    9888           8 :         bs_grow_live_done(ctx, 0);
    9889             : }
    9890             : 
    9891             : static void
    9892          32 : bs_grow_live_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9893             : {
    9894          32 :         struct spdk_bs_grow_ctx *ctx = cb_arg;
    9895             :         uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
    9896             :         int rc;
    9897             : 
    9898          32 :         if (bserrno != 0) {
    9899           0 :                 bs_grow_live_done(ctx, bserrno);
    9900           0 :                 return;
    9901             :         }
    9902             : 
    9903          32 :         rc = bs_super_validate(ctx->super, ctx->bs);
    9904          32 :         if (rc != 0) {
    9905           4 :                 bs_grow_live_done(ctx, rc);
    9906           4 :                 return;
    9907             :         }
    9908             : 
    9909          28 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9910          28 :         total_clusters = dev_size / ctx->super->cluster_size;
    9911          28 :         used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    9912          28 :                                 spdk_divide_round_up(total_clusters, 8),
    9913             :                                 SPDK_BS_PAGE_SIZE);
    9914          28 :         max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
    9915             :         /* Only checking dev_size. Since it can change, but total_clusters remain the same. */
    9916          28 :         if (dev_size == ctx->super->size) {
    9917          16 :                 SPDK_DEBUGLOG(blob, "No need to grow blobstore\n");
    9918          16 :                 bs_grow_live_done(ctx, 0);
    9919          16 :                 return;
    9920             :         }
    9921             :         /*
    9922             :          * Blobstore cannot be shrunk, so check before if:
    9923             :          * - new size of the device is smaller than size in super_block
    9924             :          * - new total number of clusters is smaller than used_clusters bit_pool
    9925             :          * - there is enough space in metadata for used_cluster_mask to be written out
    9926             :          */
    9927          12 :         if (dev_size < ctx->super->size ||
    9928          12 :             total_clusters < spdk_bit_pool_capacity(ctx->bs->used_clusters) ||
    9929             :             used_cluster_mask_len > max_used_cluster_mask) {
    9930           4 :                 SPDK_DEBUGLOG(blob, "No space to grow blobstore\n");
    9931           4 :                 bs_grow_live_done(ctx, -ENOSPC);
    9932           4 :                 return;
    9933             :         }
    9934             : 
    9935           8 :         SPDK_DEBUGLOG(blob, "Resizing blobstore\n");
    9936             : 
    9937           8 :         ctx->new_used_clusters_mask = calloc(1, total_clusters);
    9938           8 :         if (!ctx->new_used_clusters_mask) {
    9939           0 :                 bs_grow_live_done(ctx, -ENOMEM);
    9940           0 :                 return;
    9941             :         }
    9942           8 :         ctx->new_used_clusters = spdk_bit_pool_create(total_clusters);
    9943           8 :         if (!ctx->new_used_clusters) {
    9944           0 :                 bs_grow_live_done(ctx, -ENOMEM);
    9945           0 :                 return;
    9946             :         }
    9947             : 
    9948           8 :         ctx->super->clean = 0;
    9949           8 :         ctx->super->size = dev_size;
    9950           8 :         ctx->super->used_cluster_mask_len = used_cluster_mask_len;
    9951           8 :         bs_write_super(seq, ctx->bs, ctx->super, bs_grow_live_super_write_cpl, ctx);
    9952             : }
    9953             : 
    9954             : void
    9955          32 : spdk_bs_grow_live(struct spdk_blob_store *bs,
    9956             :                   spdk_bs_op_complete cb_fn, void *cb_arg)
    9957             : {
    9958          32 :         struct spdk_bs_cpl      cpl;
    9959             :         struct spdk_bs_grow_ctx *ctx;
    9960             : 
    9961          32 :         assert(spdk_get_thread() == bs->md_thread);
    9962             : 
    9963          32 :         SPDK_DEBUGLOG(blob, "Growing blobstore on dev %p\n", bs->dev);
    9964             : 
    9965          32 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    9966          32 :         cpl.u.bs_basic.cb_fn = cb_fn;
    9967          32 :         cpl.u.bs_basic.cb_arg = cb_arg;
    9968             : 
    9969          32 :         ctx = calloc(1, sizeof(struct spdk_bs_grow_ctx));
    9970          32 :         if (!ctx) {
    9971           0 :                 cb_fn(cb_arg, -ENOMEM);
    9972           0 :                 return;
    9973             :         }
    9974          32 :         ctx->bs = bs;
    9975             : 
    9976          32 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    9977             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    9978          32 :         if (!ctx->super) {
    9979           0 :                 free(ctx);
    9980           0 :                 cb_fn(cb_arg, -ENOMEM);
    9981           0 :                 return;
    9982             :         }
    9983             : 
    9984          32 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    9985          32 :         if (!ctx->seq) {
    9986           0 :                 spdk_free(ctx->super);
    9987           0 :                 free(ctx);
    9988           0 :                 cb_fn(cb_arg, -ENOMEM);
    9989           0 :                 return;
    9990             :         }
    9991             : 
    9992             :         /* Read the super block */
    9993          32 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    9994          32 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    9995             :                              bs_grow_live_load_super_cpl, ctx);
    9996             : }
    9997             : 
    9998             : void
    9999           4 : spdk_bs_grow(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
   10000             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
   10001             : {
   10002           4 :         struct spdk_blob_store  *bs;
   10003           4 :         struct spdk_bs_cpl      cpl;
   10004           4 :         struct spdk_bs_load_ctx *ctx;
   10005           4 :         struct spdk_bs_opts     opts = {};
   10006             :         int err;
   10007             : 
   10008           4 :         SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
   10009             : 
   10010           4 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
   10011           0 :                 SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
   10012           0 :                 dev->destroy(dev);
   10013           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
   10014           0 :                 return;
   10015             :         }
   10016             : 
   10017           4 :         spdk_bs_opts_init(&opts, sizeof(opts));
   10018           4 :         if (o) {
   10019           4 :                 if (bs_opts_copy(o, &opts)) {
   10020           0 :                         return;
   10021             :                 }
   10022             :         }
   10023             : 
   10024           4 :         if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
   10025           0 :                 dev->destroy(dev);
   10026           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
   10027           0 :                 return;
   10028             :         }
   10029             : 
   10030           4 :         err = bs_alloc(dev, &opts, &bs, &ctx);
   10031           4 :         if (err) {
   10032           0 :                 dev->destroy(dev);
   10033           0 :                 cb_fn(cb_arg, NULL, err);
   10034           0 :                 return;
   10035             :         }
   10036             : 
   10037           4 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
   10038           4 :         cpl.u.bs_handle.cb_fn = cb_fn;
   10039           4 :         cpl.u.bs_handle.cb_arg = cb_arg;
   10040           4 :         cpl.u.bs_handle.bs = bs;
   10041             : 
   10042           4 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
   10043           4 :         if (!ctx->seq) {
   10044           0 :                 spdk_free(ctx->super);
   10045           0 :                 free(ctx);
   10046           0 :                 bs_free(bs);
   10047           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
   10048           0 :                 return;
   10049             :         }
   10050             : 
   10051             :         /* Read the super block */
   10052           4 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
   10053           4 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
   10054             :                              bs_grow_load_super_cpl, ctx);
   10055             : }
   10056             : 
   10057             : int
   10058          24 : spdk_blob_get_esnap_id(struct spdk_blob *blob, const void **id, size_t *len)
   10059             : {
   10060          24 :         if (!blob_is_esnap_clone(blob)) {
   10061          12 :                 return -EINVAL;
   10062             :         }
   10063             : 
   10064          12 :         return blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, id, len, true);
   10065             : }
   10066             : 
   10067             : struct spdk_io_channel *
   10068        8840 : blob_esnap_get_io_channel(struct spdk_io_channel *ch, struct spdk_blob *blob)
   10069             : {
   10070        8840 :         struct spdk_bs_channel          *bs_channel = spdk_io_channel_get_ctx(ch);
   10071        8840 :         struct spdk_bs_dev              *bs_dev = blob->back_bs_dev;
   10072        8840 :         struct blob_esnap_channel       find = {};
   10073             :         struct blob_esnap_channel       *esnap_channel, *existing;
   10074             : 
   10075        8840 :         find.blob_id = blob->id;
   10076        8840 :         esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
   10077        8840 :         if (spdk_likely(esnap_channel != NULL)) {
   10078        8796 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": using cached channel on thread %s\n",
   10079             :                               blob->id, spdk_thread_get_name(spdk_get_thread()));
   10080        8796 :                 return esnap_channel->channel;
   10081             :         }
   10082             : 
   10083          44 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": allocating channel on thread %s\n",
   10084             :                       blob->id, spdk_thread_get_name(spdk_get_thread()));
   10085             : 
   10086          44 :         esnap_channel = calloc(1, sizeof(*esnap_channel));
   10087          44 :         if (esnap_channel == NULL) {
   10088           0 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " channel allocation failed: no memory\n",
   10089             :                                find.blob_id);
   10090           0 :                 return NULL;
   10091             :         }
   10092          44 :         esnap_channel->channel = bs_dev->create_channel(bs_dev);
   10093          44 :         if (esnap_channel->channel == NULL) {
   10094           0 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " back channel allocation failed\n", blob->id);
   10095           0 :                 free(esnap_channel);
   10096           0 :                 return NULL;
   10097             :         }
   10098          44 :         esnap_channel->blob_id = find.blob_id;
   10099          44 :         existing = RB_INSERT(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
   10100          44 :         if (spdk_unlikely(existing != NULL)) {
   10101             :                 /*
   10102             :                  * This should be unreachable: all modifications to this tree happen on this thread.
   10103             :                  */
   10104           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 "lost race to allocate a channel\n", find.blob_id);
   10105           0 :                 assert(false);
   10106             : 
   10107             :                 bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
   10108             :                 free(esnap_channel);
   10109             : 
   10110             :                 return existing->channel;
   10111             :         }
   10112             : 
   10113          44 :         return esnap_channel->channel;
   10114             : }
   10115             : 
   10116             : static int
   10117        8816 : blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2)
   10118             : {
   10119        8816 :         return (c1->blob_id < c2->blob_id ? -1 : c1->blob_id > c2->blob_id);
   10120             : }
   10121             : 
   10122             : struct blob_esnap_destroy_ctx {
   10123             :         spdk_blob_op_with_handle_complete       cb_fn;
   10124             :         void                                    *cb_arg;
   10125             :         struct spdk_blob                        *blob;
   10126             :         struct spdk_bs_dev                      *back_bs_dev;
   10127             :         bool                                    abort_io;
   10128             : };
   10129             : 
   10130             : static void
   10131         152 : blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status)
   10132             : {
   10133         152 :         struct blob_esnap_destroy_ctx   *ctx = spdk_io_channel_iter_get_ctx(i);
   10134         152 :         struct spdk_blob                *blob = ctx->blob;
   10135         152 :         struct spdk_blob_store          *bs = blob->bs;
   10136             : 
   10137         152 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n",
   10138             :                       blob->id);
   10139             : 
   10140         152 :         if (ctx->cb_fn != NULL) {
   10141         136 :                 ctx->cb_fn(ctx->cb_arg, blob, status);
   10142             :         }
   10143         152 :         free(ctx);
   10144             : 
   10145         152 :         bs->esnap_channels_unloading--;
   10146         152 :         if (bs->esnap_channels_unloading == 0 && bs->esnap_unload_cb_fn != NULL) {
   10147           4 :                 spdk_bs_unload(bs, bs->esnap_unload_cb_fn, bs->esnap_unload_cb_arg);
   10148             :         }
   10149         152 : }
   10150             : 
   10151             : static void
   10152         160 : blob_esnap_destroy_one_channel(struct spdk_io_channel_iter *i)
   10153             : {
   10154         160 :         struct blob_esnap_destroy_ctx   *ctx = spdk_io_channel_iter_get_ctx(i);
   10155         160 :         struct spdk_blob                *blob = ctx->blob;
   10156         160 :         struct spdk_bs_dev              *bs_dev = ctx->back_bs_dev;
   10157         160 :         struct spdk_io_channel          *channel = spdk_io_channel_iter_get_channel(i);
   10158         160 :         struct spdk_bs_channel          *bs_channel = spdk_io_channel_get_ctx(channel);
   10159             :         struct blob_esnap_channel       *esnap_channel;
   10160         160 :         struct blob_esnap_channel       find = {};
   10161             : 
   10162         160 :         assert(spdk_get_thread() == spdk_io_channel_get_thread(channel));
   10163             : 
   10164         160 :         find.blob_id = blob->id;
   10165         160 :         esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
   10166         160 :         if (esnap_channel != NULL) {
   10167          12 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channel on thread %s\n",
   10168             :                               blob->id, spdk_thread_get_name(spdk_get_thread()));
   10169          12 :                 RB_REMOVE(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
   10170             : 
   10171          12 :                 if (ctx->abort_io) {
   10172             :                         spdk_bs_user_op_t *op, *tmp;
   10173             : 
   10174           8 :                         TAILQ_FOREACH_SAFE(op, &bs_channel->queued_io, link, tmp) {
   10175           0 :                                 if (op->back_channel == esnap_channel->channel) {
   10176           0 :                                         TAILQ_REMOVE(&bs_channel->queued_io, op, link);
   10177           0 :                                         bs_user_op_abort(op, -EIO);
   10178             :                                 }
   10179             :                         }
   10180             :                 }
   10181             : 
   10182          12 :                 bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
   10183          12 :                 free(esnap_channel);
   10184             :         }
   10185             : 
   10186         160 :         spdk_for_each_channel_continue(i, 0);
   10187         160 : }
   10188             : 
   10189             : /*
   10190             :  * Destroy the channels for a specific blob on each thread with a blobstore channel. This should be
   10191             :  * used when closing an esnap clone blob and after decoupling from the parent.
   10192             :  */
   10193             : static void
   10194         500 : blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
   10195             :                                    spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
   10196             : {
   10197             :         struct blob_esnap_destroy_ctx   *ctx;
   10198             : 
   10199         500 :         if (!blob_is_esnap_clone(blob) || blob->back_bs_dev == NULL) {
   10200         348 :                 if (cb_fn != NULL) {
   10201         348 :                         cb_fn(cb_arg, blob, 0);
   10202             :                 }
   10203         348 :                 return;
   10204             :         }
   10205             : 
   10206         152 :         ctx = calloc(1, sizeof(*ctx));
   10207         152 :         if (ctx == NULL) {
   10208           0 :                 if (cb_fn != NULL) {
   10209           0 :                         cb_fn(cb_arg, blob, -ENOMEM);
   10210             :                 }
   10211           0 :                 return;
   10212             :         }
   10213         152 :         ctx->cb_fn = cb_fn;
   10214         152 :         ctx->cb_arg = cb_arg;
   10215         152 :         ctx->blob = blob;
   10216         152 :         ctx->back_bs_dev = blob->back_bs_dev;
   10217         152 :         ctx->abort_io = abort_io;
   10218             : 
   10219         152 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channels for this blob\n",
   10220             :                       blob->id);
   10221             : 
   10222         152 :         blob->bs->esnap_channels_unloading++;
   10223         152 :         spdk_for_each_channel(blob->bs, blob_esnap_destroy_one_channel, ctx,
   10224             :                               blob_esnap_destroy_channels_done);
   10225             : }
   10226             : 
   10227             : /*
   10228             :  * Destroy all bs_dev channels on a specific blobstore channel. This should be used when a
   10229             :  * bs_channel is destroyed.
   10230             :  */
   10231             : static void
   10232        1029 : blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch)
   10233             : {
   10234             :         struct blob_esnap_channel *esnap_channel, *esnap_channel_tmp;
   10235             : 
   10236        1029 :         assert(spdk_get_thread() == spdk_io_channel_get_thread(spdk_io_channel_from_ctx(ch)));
   10237             : 
   10238        1029 :         SPDK_DEBUGLOG(blob_esnap, "destroying channels on thread %s\n",
   10239             :                       spdk_thread_get_name(spdk_get_thread()));
   10240        1061 :         RB_FOREACH_SAFE(esnap_channel, blob_esnap_channel_tree, &ch->esnap_channels,
   10241             :                         esnap_channel_tmp) {
   10242          32 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64
   10243             :                               ": destroying one channel in thread %s\n",
   10244             :                               esnap_channel->blob_id, spdk_thread_get_name(spdk_get_thread()));
   10245          32 :                 RB_REMOVE(blob_esnap_channel_tree, &ch->esnap_channels, esnap_channel);
   10246          32 :                 spdk_put_io_channel(esnap_channel->channel);
   10247          32 :                 free(esnap_channel);
   10248             :         }
   10249        1029 :         SPDK_DEBUGLOG(blob_esnap, "done destroying channels on thread %s\n",
   10250             :                       spdk_thread_get_name(spdk_get_thread()));
   10251        1029 : }
   10252             : 
   10253             : static void
   10254          28 : blob_set_back_bs_dev_done(void *_ctx, int bserrno)
   10255             : {
   10256          28 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10257             : 
   10258          28 :         if (bserrno != 0) {
   10259             :                 /* Even though the unfreeze failed, the update may have succeed. */
   10260           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": unfreeze failed with error %d\n", ctx->blob->id,
   10261             :                             bserrno);
   10262             :         }
   10263          28 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
   10264          28 :         free(ctx);
   10265          28 : }
   10266             : 
   10267             : static void
   10268          28 : blob_frozen_set_back_bs_dev(void *_ctx, struct spdk_blob *blob, int bserrno)
   10269             : {
   10270          28 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10271             :         int rc;
   10272             : 
   10273          28 :         if (bserrno != 0) {
   10274           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to release old back_bs_dev with error %d\n",
   10275             :                             blob->id, bserrno);
   10276           0 :                 ctx->bserrno = bserrno;
   10277           0 :                 blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10278           0 :                 return;
   10279             :         }
   10280             : 
   10281          28 :         if (blob->back_bs_dev != NULL) {
   10282          28 :                 blob_back_bs_dev_unref(blob);
   10283             :         }
   10284             : 
   10285          28 :         if (ctx->parent_refs_cb_fn) {
   10286          20 :                 rc = ctx->parent_refs_cb_fn(blob, ctx->parent_refs_cb_arg);
   10287          20 :                 if (rc != 0) {
   10288           0 :                         ctx->bserrno = rc;
   10289           0 :                         blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10290           0 :                         return;
   10291             :                 }
   10292             :         }
   10293             : 
   10294          28 :         SPDK_NOTICELOG("blob 0x%" PRIx64 ": hotplugged back_bs_dev\n", blob->id);
   10295          28 :         blob->back_bs_dev = ctx->back_bs_dev;
   10296          28 :         ctx->bserrno = 0;
   10297             : 
   10298          28 :         blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10299             : }
   10300             : 
   10301             : static void
   10302          28 : blob_set_back_bs_dev_frozen(void *_ctx, int bserrno)
   10303             : {
   10304          28 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10305          28 :         struct spdk_blob        *blob = ctx->blob;
   10306             : 
   10307          28 :         if (bserrno != 0) {
   10308           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to freeze with error %d\n", blob->id,
   10309             :                             bserrno);
   10310           0 :                 ctx->cb_fn(ctx->cb_arg, bserrno);
   10311           0 :                 free(ctx);
   10312           0 :                 return;
   10313             :         }
   10314             : 
   10315             :         /*
   10316             :          * This does not prevent future reads from the esnap device because any future IO will
   10317             :          * lazily create a new esnap IO channel.
   10318             :          */
   10319          28 :         blob_esnap_destroy_bs_dev_channels(blob, true, blob_frozen_set_back_bs_dev, ctx);
   10320             : }
   10321             : 
   10322             : void
   10323           8 : spdk_blob_set_esnap_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
   10324             :                            spdk_blob_op_complete cb_fn, void *cb_arg)
   10325             : {
   10326           8 :         if (!blob_is_esnap_clone(blob)) {
   10327           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
   10328           0 :                 cb_fn(cb_arg, -EINVAL);
   10329           0 :                 return;
   10330             :         }
   10331             : 
   10332           8 :         blob_set_back_bs_dev(blob, back_bs_dev, NULL, NULL, cb_fn, cb_arg);
   10333             : }
   10334             : 
   10335             : struct spdk_bs_dev *
   10336           4 : spdk_blob_get_esnap_bs_dev(const struct spdk_blob *blob)
   10337             : {
   10338           4 :         if (!blob_is_esnap_clone(blob)) {
   10339           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
   10340           0 :                 return NULL;
   10341             :         }
   10342             : 
   10343           4 :         return blob->back_bs_dev;
   10344             : }
   10345             : 
   10346             : bool
   10347          28 : spdk_blob_is_degraded(const struct spdk_blob *blob)
   10348             : {
   10349          28 :         if (blob->bs->dev->is_degraded != NULL && blob->bs->dev->is_degraded(blob->bs->dev)) {
   10350           4 :                 return true;
   10351             :         }
   10352          24 :         if (blob->back_bs_dev == NULL || blob->back_bs_dev->is_degraded == NULL) {
   10353          12 :                 return false;
   10354             :         }
   10355             : 
   10356          12 :         return blob->back_bs_dev->is_degraded(blob->back_bs_dev);
   10357             : }
   10358             : 
   10359           3 : SPDK_LOG_REGISTER_COMPONENT(blob)
   10360           3 : SPDK_LOG_REGISTER_COMPONENT(blob_esnap)

Generated by: LCOV version 1.15