Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2017 Intel Corporation.
3 : * All rights reserved.
4 : * Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 : */
6 :
7 : #include "spdk/stdinc.h"
8 :
9 : #include "spdk/blob.h"
10 : #include "spdk/crc32.h"
11 : #include "spdk/env.h"
12 : #include "spdk/queue.h"
13 : #include "spdk/thread.h"
14 : #include "spdk/bit_array.h"
15 : #include "spdk/bit_pool.h"
16 : #include "spdk/likely.h"
17 : #include "spdk/util.h"
18 : #include "spdk/string.h"
19 :
20 : #include "spdk_internal/assert.h"
21 : #include "spdk/log.h"
22 :
23 : #include "blobstore.h"
24 :
25 : #define BLOB_CRC32C_INITIAL 0xffffffffUL
26 :
27 : static int bs_register_md_thread(struct spdk_blob_store *bs);
28 : static int bs_unregister_md_thread(struct spdk_blob_store *bs);
29 : static void blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
30 : static void blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
31 : uint64_t cluster, uint32_t extent, struct spdk_blob_md_page *page,
32 : spdk_blob_op_complete cb_fn, void *cb_arg);
33 : static void blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
34 : uint32_t extent_page, struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
35 :
36 : static int blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
37 : uint16_t value_len, bool internal);
38 : static int blob_get_xattr_value(struct spdk_blob *blob, const char *name,
39 : const void **value, size_t *value_len, bool internal);
40 : static int blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal);
41 :
42 : static void blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
43 : struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
44 : static void blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg);
45 :
46 : static void bs_shallow_copy_cluster_find_next(void *cb_arg);
47 :
48 : /*
49 : * External snapshots require a channel per thread per esnap bdev. The tree
50 : * is populated lazily as blob IOs are handled by the back_bs_dev. When this
51 : * channel is destroyed, all the channels in the tree are destroyed.
52 : */
53 :
54 : struct blob_esnap_channel {
55 : RB_ENTRY(blob_esnap_channel) node;
56 : spdk_blob_id blob_id;
57 : struct spdk_io_channel *channel;
58 : };
59 :
60 : static int blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2);
61 : static void blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
62 : spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
63 : static void blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch);
64 : static void blob_set_back_bs_dev_frozen(void *_ctx, int bserrno);
65 10225 : RB_GENERATE_STATIC(blob_esnap_channel_tree, blob_esnap_channel, node, blob_esnap_channel_compare)
66 :
67 : static inline bool
68 49534 : blob_is_esnap_clone(const struct spdk_blob *blob)
69 : {
70 49534 : assert(blob != NULL);
71 49534 : return !!(blob->invalid_flags & SPDK_BLOB_EXTERNAL_SNAPSHOT);
72 : }
73 :
74 : static int
75 2273 : blob_id_cmp(struct spdk_blob *blob1, struct spdk_blob *blob2)
76 : {
77 2273 : assert(blob1 != NULL && blob2 != NULL);
78 2273 : return (blob1->id < blob2->id ? -1 : blob1->id > blob2->id);
79 : }
80 :
81 14685 : RB_GENERATE_STATIC(spdk_blob_tree, spdk_blob, link, blob_id_cmp);
82 :
83 : static void
84 36883 : blob_verify_md_op(struct spdk_blob *blob)
85 : {
86 36883 : assert(blob != NULL);
87 36883 : assert(spdk_get_thread() == blob->bs->md_thread);
88 36883 : assert(blob->state != SPDK_BLOB_STATE_LOADING);
89 36883 : }
90 :
91 : static struct spdk_blob_list *
92 3816 : bs_get_snapshot_entry(struct spdk_blob_store *bs, spdk_blob_id blobid)
93 : {
94 3816 : struct spdk_blob_list *snapshot_entry = NULL;
95 :
96 4796 : TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
97 1748 : if (snapshot_entry->id == blobid) {
98 768 : break;
99 : }
100 : }
101 :
102 3816 : return snapshot_entry;
103 : }
104 :
105 : static void
106 2900 : bs_claim_md_page(struct spdk_blob_store *bs, uint32_t page)
107 : {
108 2900 : assert(spdk_spin_held(&bs->used_lock));
109 2900 : assert(page < spdk_bit_array_capacity(bs->used_md_pages));
110 2900 : assert(spdk_bit_array_get(bs->used_md_pages, page) == false);
111 :
112 2900 : spdk_bit_array_set(bs->used_md_pages, page);
113 2900 : }
114 :
115 : static void
116 2196 : bs_release_md_page(struct spdk_blob_store *bs, uint32_t page)
117 : {
118 2196 : assert(spdk_spin_held(&bs->used_lock));
119 2196 : assert(page < spdk_bit_array_capacity(bs->used_md_pages));
120 2196 : assert(spdk_bit_array_get(bs->used_md_pages, page) == true);
121 :
122 2196 : spdk_bit_array_clear(bs->used_md_pages, page);
123 2196 : }
124 :
125 : static uint32_t
126 8220 : bs_claim_cluster(struct spdk_blob_store *bs)
127 : {
128 : uint32_t cluster_num;
129 :
130 8220 : assert(spdk_spin_held(&bs->used_lock));
131 :
132 8220 : cluster_num = spdk_bit_pool_allocate_bit(bs->used_clusters);
133 8220 : if (cluster_num == UINT32_MAX) {
134 0 : return UINT32_MAX;
135 : }
136 :
137 8220 : SPDK_DEBUGLOG(blob, "Claiming cluster %u\n", cluster_num);
138 8220 : bs->num_free_clusters--;
139 :
140 8220 : return cluster_num;
141 : }
142 :
143 : static void
144 2399 : bs_release_cluster(struct spdk_blob_store *bs, uint32_t cluster_num)
145 : {
146 2399 : assert(spdk_spin_held(&bs->used_lock));
147 2399 : assert(cluster_num < spdk_bit_pool_capacity(bs->used_clusters));
148 2399 : assert(spdk_bit_pool_is_allocated(bs->used_clusters, cluster_num) == true);
149 2399 : assert(bs->num_free_clusters < bs->total_clusters);
150 :
151 2399 : SPDK_DEBUGLOG(blob, "Releasing cluster %u\n", cluster_num);
152 :
153 2399 : spdk_bit_pool_free_bit(bs->used_clusters, cluster_num);
154 2399 : bs->num_free_clusters++;
155 2399 : }
156 :
157 : static int
158 8220 : blob_insert_cluster(struct spdk_blob *blob, uint32_t cluster_num, uint64_t cluster)
159 : {
160 8220 : uint64_t *cluster_lba = &blob->active.clusters[cluster_num];
161 :
162 8220 : blob_verify_md_op(blob);
163 :
164 8220 : if (*cluster_lba != 0) {
165 4 : return -EEXIST;
166 : }
167 :
168 8216 : *cluster_lba = bs_cluster_to_lba(blob->bs, cluster);
169 8216 : blob->active.num_allocated_clusters++;
170 :
171 8216 : return 0;
172 : }
173 :
174 : static int
175 8220 : bs_allocate_cluster(struct spdk_blob *blob, uint32_t cluster_num,
176 : uint64_t *cluster, uint32_t *lowest_free_md_page, bool update_map)
177 : {
178 8220 : uint32_t *extent_page = 0;
179 :
180 8220 : assert(spdk_spin_held(&blob->bs->used_lock));
181 :
182 8220 : *cluster = bs_claim_cluster(blob->bs);
183 8220 : if (*cluster == UINT32_MAX) {
184 : /* No more free clusters. Cannot satisfy the request */
185 0 : return -ENOSPC;
186 : }
187 :
188 8220 : if (blob->use_extent_table) {
189 4168 : extent_page = bs_cluster_to_extent_page(blob, cluster_num);
190 4168 : if (*extent_page == 0) {
191 : /* Extent page shall never occupy md_page so start the search from 1 */
192 728 : if (*lowest_free_md_page == 0) {
193 726 : *lowest_free_md_page = 1;
194 : }
195 : /* No extent_page is allocated for the cluster */
196 728 : *lowest_free_md_page = spdk_bit_array_find_first_clear(blob->bs->used_md_pages,
197 : *lowest_free_md_page);
198 728 : if (*lowest_free_md_page == UINT32_MAX) {
199 : /* No more free md pages. Cannot satisfy the request */
200 0 : bs_release_cluster(blob->bs, *cluster);
201 0 : return -ENOSPC;
202 : }
203 728 : bs_claim_md_page(blob->bs, *lowest_free_md_page);
204 : }
205 : }
206 :
207 8220 : SPDK_DEBUGLOG(blob, "Claiming cluster %" PRIu64 " for blob 0x%" PRIx64 "\n", *cluster,
208 : blob->id);
209 :
210 8220 : if (update_map) {
211 7404 : blob_insert_cluster(blob, cluster_num, *cluster);
212 7404 : if (blob->use_extent_table && *extent_page == 0) {
213 644 : *extent_page = *lowest_free_md_page;
214 : }
215 : }
216 :
217 8220 : return 0;
218 : }
219 :
220 : static void
221 5570 : blob_xattrs_init(struct spdk_blob_xattr_opts *xattrs)
222 : {
223 5570 : xattrs->count = 0;
224 5570 : xattrs->names = NULL;
225 5570 : xattrs->ctx = NULL;
226 5570 : xattrs->get_value = NULL;
227 5570 : }
228 :
229 : void
230 3680 : spdk_blob_opts_init(struct spdk_blob_opts *opts, size_t opts_size)
231 : {
232 3680 : if (!opts) {
233 0 : SPDK_ERRLOG("opts should not be NULL\n");
234 0 : return;
235 : }
236 :
237 3680 : if (!opts_size) {
238 0 : SPDK_ERRLOG("opts_size should not be zero value\n");
239 0 : return;
240 : }
241 :
242 3680 : memset(opts, 0, opts_size);
243 3680 : opts->opts_size = opts_size;
244 :
245 : #define FIELD_OK(field) \
246 : offsetof(struct spdk_blob_opts, field) + sizeof(opts->field) <= opts_size
247 :
248 : #define SET_FIELD(field, value) \
249 : if (FIELD_OK(field)) { \
250 : opts->field = value; \
251 : } \
252 :
253 3680 : SET_FIELD(num_clusters, 0);
254 3680 : SET_FIELD(thin_provision, false);
255 3680 : SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
256 :
257 3680 : if (FIELD_OK(xattrs)) {
258 3680 : blob_xattrs_init(&opts->xattrs);
259 : }
260 :
261 3680 : SET_FIELD(use_extent_table, true);
262 :
263 : #undef FIELD_OK
264 : #undef SET_FIELD
265 : }
266 :
267 : void
268 3470 : spdk_blob_open_opts_init(struct spdk_blob_open_opts *opts, size_t opts_size)
269 : {
270 3470 : if (!opts) {
271 0 : SPDK_ERRLOG("opts should not be NULL\n");
272 0 : return;
273 : }
274 :
275 3470 : if (!opts_size) {
276 0 : SPDK_ERRLOG("opts_size should not be zero value\n");
277 0 : return;
278 : }
279 :
280 3470 : memset(opts, 0, opts_size);
281 3470 : opts->opts_size = opts_size;
282 :
283 : #define FIELD_OK(field) \
284 : offsetof(struct spdk_blob_open_opts, field) + sizeof(opts->field) <= opts_size
285 :
286 : #define SET_FIELD(field, value) \
287 : if (FIELD_OK(field)) { \
288 : opts->field = value; \
289 : } \
290 :
291 3470 : SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
292 :
293 : #undef FIELD_OK
294 : #undef SET_FILED
295 : }
296 :
297 : static struct spdk_blob *
298 5356 : blob_alloc(struct spdk_blob_store *bs, spdk_blob_id id)
299 : {
300 : struct spdk_blob *blob;
301 :
302 5356 : blob = calloc(1, sizeof(*blob));
303 5356 : if (!blob) {
304 0 : return NULL;
305 : }
306 :
307 5356 : blob->id = id;
308 5356 : blob->bs = bs;
309 :
310 5356 : blob->parent_id = SPDK_BLOBID_INVALID;
311 :
312 5356 : blob->state = SPDK_BLOB_STATE_DIRTY;
313 5356 : blob->extent_rle_found = false;
314 5356 : blob->extent_table_found = false;
315 5356 : blob->active.num_pages = 1;
316 5356 : blob->active.pages = calloc(1, sizeof(*blob->active.pages));
317 5356 : if (!blob->active.pages) {
318 0 : free(blob);
319 0 : return NULL;
320 : }
321 :
322 5356 : blob->active.pages[0] = bs_blobid_to_page(id);
323 :
324 5356 : TAILQ_INIT(&blob->xattrs);
325 5356 : TAILQ_INIT(&blob->xattrs_internal);
326 5356 : TAILQ_INIT(&blob->pending_persists);
327 5356 : TAILQ_INIT(&blob->persists_to_complete);
328 :
329 5356 : return blob;
330 : }
331 :
332 : static void
333 10712 : xattrs_free(struct spdk_xattr_tailq *xattrs)
334 : {
335 : struct spdk_xattr *xattr, *xattr_tmp;
336 :
337 12466 : TAILQ_FOREACH_SAFE(xattr, xattrs, link, xattr_tmp) {
338 1754 : TAILQ_REMOVE(xattrs, xattr, link);
339 1754 : free(xattr->name);
340 1754 : free(xattr->value);
341 1754 : free(xattr);
342 : }
343 10712 : }
344 :
345 : static void
346 5356 : blob_free(struct spdk_blob *blob)
347 : {
348 5356 : assert(blob != NULL);
349 5356 : assert(TAILQ_EMPTY(&blob->pending_persists));
350 5356 : assert(TAILQ_EMPTY(&blob->persists_to_complete));
351 :
352 5356 : free(blob->active.extent_pages);
353 5356 : free(blob->clean.extent_pages);
354 5356 : free(blob->active.clusters);
355 5356 : free(blob->clean.clusters);
356 5356 : free(blob->active.pages);
357 5356 : free(blob->clean.pages);
358 :
359 5356 : xattrs_free(&blob->xattrs);
360 5356 : xattrs_free(&blob->xattrs_internal);
361 :
362 5356 : if (blob->back_bs_dev) {
363 1080 : blob->back_bs_dev->destroy(blob->back_bs_dev);
364 : }
365 :
366 5356 : free(blob);
367 5356 : }
368 :
369 : static void
370 320 : blob_back_bs_destroy_esnap_done(void *ctx, struct spdk_blob *blob, int bserrno)
371 : {
372 320 : struct spdk_bs_dev *bs_dev = ctx;
373 :
374 320 : if (bserrno != 0) {
375 : /*
376 : * This is probably due to a memory allocation failure when creating the
377 : * blob_esnap_destroy_ctx before iterating threads.
378 : */
379 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": Unable to destroy bs dev channels: error %d\n",
380 : blob->id, bserrno);
381 0 : assert(false);
382 : }
383 :
384 320 : if (bs_dev == NULL) {
385 : /*
386 : * This check exists to make scanbuild happy.
387 : *
388 : * blob->back_bs_dev for an esnap is NULL during the first iteration of blobs while
389 : * the blobstore is being loaded. It could also be NULL if there was an error
390 : * opening the esnap device. In each of these cases, no channels could have been
391 : * created because back_bs_dev->create_channel() would have led to a NULL pointer
392 : * deref.
393 : */
394 0 : assert(false);
395 : return;
396 : }
397 :
398 320 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": calling destroy on back_bs_dev\n", blob->id);
399 320 : bs_dev->destroy(bs_dev);
400 : }
401 :
402 : static void
403 320 : blob_back_bs_destroy(struct spdk_blob *blob)
404 : {
405 320 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": preparing to destroy back_bs_dev\n",
406 : blob->id);
407 :
408 320 : blob_esnap_destroy_bs_dev_channels(blob, false, blob_back_bs_destroy_esnap_done,
409 320 : blob->back_bs_dev);
410 320 : blob->back_bs_dev = NULL;
411 320 : }
412 :
413 : struct blob_parent {
414 : union {
415 : struct {
416 : spdk_blob_id id;
417 : struct spdk_blob *blob;
418 : } snapshot;
419 :
420 : struct {
421 : void *id;
422 : uint32_t id_len;
423 : struct spdk_bs_dev *back_bs_dev;
424 : } esnap;
425 : } u;
426 : };
427 :
428 : typedef int (*set_parent_refs_cb)(struct spdk_blob *blob, struct blob_parent *parent);
429 :
430 : struct set_bs_dev_ctx {
431 : struct spdk_blob *blob;
432 : struct spdk_bs_dev *back_bs_dev;
433 :
434 : /*
435 : * This callback is used during a set parent operation to change the references
436 : * to the parent of the blob.
437 : */
438 : set_parent_refs_cb parent_refs_cb_fn;
439 : struct blob_parent *parent_refs_cb_arg;
440 :
441 : spdk_blob_op_complete cb_fn;
442 : void *cb_arg;
443 : int bserrno;
444 : };
445 :
446 : static void
447 28 : blob_set_back_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
448 : set_parent_refs_cb parent_refs_cb_fn, struct blob_parent *parent_refs_cb_arg,
449 : spdk_blob_op_complete cb_fn, void *cb_arg)
450 : {
451 : struct set_bs_dev_ctx *ctx;
452 :
453 28 : ctx = calloc(1, sizeof(*ctx));
454 28 : if (ctx == NULL) {
455 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": out of memory while setting back_bs_dev\n",
456 : blob->id);
457 0 : cb_fn(cb_arg, -ENOMEM);
458 0 : return;
459 : }
460 :
461 28 : ctx->parent_refs_cb_fn = parent_refs_cb_fn;
462 28 : ctx->parent_refs_cb_arg = parent_refs_cb_arg;
463 28 : ctx->cb_fn = cb_fn;
464 28 : ctx->cb_arg = cb_arg;
465 28 : ctx->back_bs_dev = back_bs_dev;
466 28 : ctx->blob = blob;
467 :
468 28 : blob_freeze_io(blob, blob_set_back_bs_dev_frozen, ctx);
469 : }
470 :
471 : struct freeze_io_ctx {
472 : struct spdk_bs_cpl cpl;
473 : struct spdk_blob *blob;
474 : };
475 :
476 : static void
477 526 : blob_io_sync(struct spdk_io_channel_iter *i)
478 : {
479 526 : spdk_for_each_channel_continue(i, 0);
480 526 : }
481 :
482 : static void
483 514 : blob_execute_queued_io(struct spdk_io_channel_iter *i)
484 : {
485 514 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
486 514 : struct spdk_bs_channel *ch = spdk_io_channel_get_ctx(_ch);
487 514 : struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
488 : struct spdk_bs_request_set *set;
489 : struct spdk_bs_user_op_args *args;
490 : spdk_bs_user_op_t *op, *tmp;
491 :
492 518 : TAILQ_FOREACH_SAFE(op, &ch->queued_io, link, tmp) {
493 4 : set = (struct spdk_bs_request_set *)op;
494 4 : args = &set->u.user_op;
495 :
496 4 : if (args->blob == ctx->blob) {
497 4 : TAILQ_REMOVE(&ch->queued_io, op, link);
498 4 : bs_user_op_execute(op);
499 : }
500 : }
501 :
502 514 : spdk_for_each_channel_continue(i, 0);
503 514 : }
504 :
505 : static void
506 1008 : blob_io_cpl(struct spdk_io_channel_iter *i, int status)
507 : {
508 1008 : struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
509 :
510 1008 : ctx->cpl.u.blob_basic.cb_fn(ctx->cpl.u.blob_basic.cb_arg, 0);
511 :
512 1008 : free(ctx);
513 1008 : }
514 :
515 : static void
516 510 : blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
517 : {
518 : struct freeze_io_ctx *ctx;
519 :
520 510 : blob_verify_md_op(blob);
521 :
522 510 : ctx = calloc(1, sizeof(*ctx));
523 510 : if (!ctx) {
524 0 : cb_fn(cb_arg, -ENOMEM);
525 0 : return;
526 : }
527 :
528 510 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
529 510 : ctx->cpl.u.blob_basic.cb_fn = cb_fn;
530 510 : ctx->cpl.u.blob_basic.cb_arg = cb_arg;
531 510 : ctx->blob = blob;
532 :
533 : /* Freeze I/O on blob */
534 510 : blob->frozen_refcnt++;
535 :
536 510 : spdk_for_each_channel(blob->bs, blob_io_sync, ctx, blob_io_cpl);
537 : }
538 :
539 : static void
540 498 : blob_unfreeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
541 : {
542 : struct freeze_io_ctx *ctx;
543 :
544 498 : blob_verify_md_op(blob);
545 :
546 498 : ctx = calloc(1, sizeof(*ctx));
547 498 : if (!ctx) {
548 0 : cb_fn(cb_arg, -ENOMEM);
549 0 : return;
550 : }
551 :
552 498 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
553 498 : ctx->cpl.u.blob_basic.cb_fn = cb_fn;
554 498 : ctx->cpl.u.blob_basic.cb_arg = cb_arg;
555 498 : ctx->blob = blob;
556 :
557 498 : assert(blob->frozen_refcnt > 0);
558 :
559 498 : blob->frozen_refcnt--;
560 :
561 498 : spdk_for_each_channel(blob->bs, blob_execute_queued_io, ctx, blob_io_cpl);
562 : }
563 :
564 : static int
565 8442 : blob_mark_clean(struct spdk_blob *blob)
566 : {
567 8442 : uint32_t *extent_pages = NULL;
568 8442 : uint64_t *clusters = NULL;
569 8442 : uint32_t *pages = NULL;
570 :
571 8442 : assert(blob != NULL);
572 :
573 8442 : if (blob->active.num_extent_pages) {
574 2845 : assert(blob->active.extent_pages);
575 2845 : extent_pages = calloc(blob->active.num_extent_pages, sizeof(*blob->active.extent_pages));
576 2845 : if (!extent_pages) {
577 0 : return -ENOMEM;
578 : }
579 2845 : memcpy(extent_pages, blob->active.extent_pages,
580 2845 : blob->active.num_extent_pages * sizeof(*extent_pages));
581 : }
582 :
583 8442 : if (blob->active.num_clusters) {
584 5918 : assert(blob->active.clusters);
585 5918 : clusters = calloc(blob->active.num_clusters, sizeof(*blob->active.clusters));
586 5918 : if (!clusters) {
587 0 : free(extent_pages);
588 0 : return -ENOMEM;
589 : }
590 5918 : memcpy(clusters, blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
591 : }
592 :
593 8442 : if (blob->active.num_pages) {
594 6958 : assert(blob->active.pages);
595 6958 : pages = calloc(blob->active.num_pages, sizeof(*blob->active.pages));
596 6958 : if (!pages) {
597 0 : free(extent_pages);
598 0 : free(clusters);
599 0 : return -ENOMEM;
600 : }
601 6958 : memcpy(pages, blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
602 : }
603 :
604 8442 : free(blob->clean.extent_pages);
605 8442 : free(blob->clean.clusters);
606 8442 : free(blob->clean.pages);
607 :
608 8442 : blob->clean.num_extent_pages = blob->active.num_extent_pages;
609 8442 : blob->clean.extent_pages = blob->active.extent_pages;
610 8442 : blob->clean.num_clusters = blob->active.num_clusters;
611 8442 : blob->clean.clusters = blob->active.clusters;
612 8442 : blob->clean.num_allocated_clusters = blob->active.num_allocated_clusters;
613 8442 : blob->clean.num_pages = blob->active.num_pages;
614 8442 : blob->clean.pages = blob->active.pages;
615 :
616 8442 : blob->active.extent_pages = extent_pages;
617 8442 : blob->active.clusters = clusters;
618 8442 : blob->active.pages = pages;
619 :
620 : /* If the metadata was dirtied again while the metadata was being written to disk,
621 : * we do not want to revert the DIRTY state back to CLEAN here.
622 : */
623 8442 : if (blob->state == SPDK_BLOB_STATE_LOADING) {
624 3402 : blob->state = SPDK_BLOB_STATE_CLEAN;
625 : }
626 :
627 8442 : return 0;
628 : }
629 :
630 : static int
631 1276 : blob_deserialize_xattr(struct spdk_blob *blob,
632 : struct spdk_blob_md_descriptor_xattr *desc_xattr, bool internal)
633 : {
634 : struct spdk_xattr *xattr;
635 :
636 1276 : if (desc_xattr->length != sizeof(desc_xattr->name_length) +
637 : sizeof(desc_xattr->value_length) +
638 1276 : desc_xattr->name_length + desc_xattr->value_length) {
639 0 : return -EINVAL;
640 : }
641 :
642 1276 : xattr = calloc(1, sizeof(*xattr));
643 1276 : if (xattr == NULL) {
644 0 : return -ENOMEM;
645 : }
646 :
647 1276 : xattr->name = malloc(desc_xattr->name_length + 1);
648 1276 : if (xattr->name == NULL) {
649 0 : free(xattr);
650 0 : return -ENOMEM;
651 : }
652 :
653 1276 : xattr->value = malloc(desc_xattr->value_length);
654 1276 : if (xattr->value == NULL) {
655 0 : free(xattr->name);
656 0 : free(xattr);
657 0 : return -ENOMEM;
658 : }
659 :
660 1276 : memcpy(xattr->name, desc_xattr->name, desc_xattr->name_length);
661 1276 : xattr->name[desc_xattr->name_length] = '\0';
662 1276 : xattr->value_len = desc_xattr->value_length;
663 1276 : memcpy(xattr->value,
664 1276 : (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
665 1276 : desc_xattr->value_length);
666 :
667 1276 : TAILQ_INSERT_TAIL(internal ? &blob->xattrs_internal : &blob->xattrs, xattr, link);
668 :
669 1276 : return 0;
670 : }
671 :
672 :
673 : static int
674 4580 : blob_parse_page(const struct spdk_blob_md_page *page, struct spdk_blob *blob)
675 : {
676 : struct spdk_blob_md_descriptor *desc;
677 4580 : size_t cur_desc = 0;
678 : void *tmp;
679 :
680 4580 : desc = (struct spdk_blob_md_descriptor *)page->descriptors;
681 13444 : while (cur_desc < sizeof(page->descriptors)) {
682 13444 : if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
683 4532 : if (desc->length == 0) {
684 : /* If padding and length are 0, this terminates the page */
685 4532 : break;
686 : }
687 8912 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
688 : struct spdk_blob_md_descriptor_flags *desc_flags;
689 :
690 3434 : desc_flags = (struct spdk_blob_md_descriptor_flags *)desc;
691 :
692 3434 : if (desc_flags->length != sizeof(*desc_flags) - sizeof(*desc)) {
693 0 : return -EINVAL;
694 : }
695 :
696 3434 : if ((desc_flags->invalid_flags | SPDK_BLOB_INVALID_FLAGS_MASK) !=
697 : SPDK_BLOB_INVALID_FLAGS_MASK) {
698 8 : return -EINVAL;
699 : }
700 :
701 3426 : if ((desc_flags->data_ro_flags | SPDK_BLOB_DATA_RO_FLAGS_MASK) !=
702 : SPDK_BLOB_DATA_RO_FLAGS_MASK) {
703 12 : blob->data_ro = true;
704 12 : blob->md_ro = true;
705 : }
706 :
707 3426 : if ((desc_flags->md_ro_flags | SPDK_BLOB_MD_RO_FLAGS_MASK) !=
708 : SPDK_BLOB_MD_RO_FLAGS_MASK) {
709 12 : blob->md_ro = true;
710 : }
711 :
712 3426 : if ((desc_flags->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
713 562 : blob->data_ro = true;
714 562 : blob->md_ro = true;
715 : }
716 :
717 3426 : blob->invalid_flags = desc_flags->invalid_flags;
718 3426 : blob->data_ro_flags = desc_flags->data_ro_flags;
719 3426 : blob->md_ro_flags = desc_flags->md_ro_flags;
720 :
721 5478 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
722 : struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
723 : unsigned int i, j;
724 1392 : unsigned int cluster_count = blob->active.num_clusters;
725 :
726 1392 : if (blob->extent_table_found) {
727 : /* Extent Table already present in the md,
728 : * both descriptors should never be at the same time. */
729 0 : return -EINVAL;
730 : }
731 1392 : blob->extent_rle_found = true;
732 :
733 1392 : desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
734 :
735 1392 : if (desc_extent_rle->length == 0 ||
736 1392 : (desc_extent_rle->length % sizeof(desc_extent_rle->extents[0]) != 0)) {
737 0 : return -EINVAL;
738 : }
739 :
740 2962 : for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
741 21238 : for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
742 19668 : if (desc_extent_rle->extents[i].cluster_idx != 0) {
743 6692 : if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters,
744 6692 : desc_extent_rle->extents[i].cluster_idx + j)) {
745 0 : return -EINVAL;
746 : }
747 : }
748 19668 : cluster_count++;
749 : }
750 : }
751 :
752 1392 : if (cluster_count == 0) {
753 0 : return -EINVAL;
754 : }
755 1392 : tmp = realloc(blob->active.clusters, cluster_count * sizeof(*blob->active.clusters));
756 1392 : if (tmp == NULL) {
757 0 : return -ENOMEM;
758 : }
759 1392 : blob->active.clusters = tmp;
760 1392 : blob->active.cluster_array_size = cluster_count;
761 :
762 2962 : for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
763 21238 : for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
764 19668 : if (desc_extent_rle->extents[i].cluster_idx != 0) {
765 13384 : blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
766 6692 : desc_extent_rle->extents[i].cluster_idx + j);
767 6692 : blob->active.num_allocated_clusters++;
768 12976 : } else if (spdk_blob_is_thin_provisioned(blob)) {
769 12976 : blob->active.clusters[blob->active.num_clusters++] = 0;
770 : } else {
771 0 : return -EINVAL;
772 : }
773 : }
774 : }
775 4086 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
776 : struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
777 1764 : uint32_t num_extent_pages = blob->active.num_extent_pages;
778 : uint32_t i, j;
779 : size_t extent_pages_length;
780 :
781 1764 : desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
782 1764 : extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
783 :
784 1764 : if (blob->extent_rle_found) {
785 : /* This means that Extent RLE is present in MD,
786 : * both should never be at the same time. */
787 0 : return -EINVAL;
788 1764 : } else if (blob->extent_table_found &&
789 0 : desc_extent_table->num_clusters != blob->remaining_clusters_in_et) {
790 : /* Number of clusters in this ET does not match number
791 : * from previously read EXTENT_TABLE. */
792 0 : return -EINVAL;
793 : }
794 :
795 1764 : if (desc_extent_table->length == 0 ||
796 1764 : (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
797 0 : return -EINVAL;
798 : }
799 :
800 1764 : blob->extent_table_found = true;
801 :
802 3238 : for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
803 1474 : num_extent_pages += desc_extent_table->extent_page[i].num_pages;
804 : }
805 :
806 1764 : if (num_extent_pages > 0) {
807 1458 : tmp = realloc(blob->active.extent_pages, num_extent_pages * sizeof(uint32_t));
808 1458 : if (tmp == NULL) {
809 0 : return -ENOMEM;
810 : }
811 1458 : blob->active.extent_pages = tmp;
812 : }
813 1764 : blob->active.extent_pages_array_size = num_extent_pages;
814 :
815 1764 : blob->remaining_clusters_in_et = desc_extent_table->num_clusters;
816 :
817 : /* Extent table entries contain md page numbers for extent pages.
818 : * Zeroes represent unallocated extent pages, those are run-length-encoded.
819 : */
820 3238 : for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
821 1474 : if (desc_extent_table->extent_page[i].page_idx != 0) {
822 1052 : assert(desc_extent_table->extent_page[i].num_pages == 1);
823 1052 : blob->active.extent_pages[blob->active.num_extent_pages++] =
824 1052 : desc_extent_table->extent_page[i].page_idx;
825 422 : } else if (spdk_blob_is_thin_provisioned(blob)) {
826 844 : for (j = 0; j < desc_extent_table->extent_page[i].num_pages; j++) {
827 422 : blob->active.extent_pages[blob->active.num_extent_pages++] = 0;
828 : }
829 : } else {
830 0 : return -EINVAL;
831 : }
832 : }
833 2322 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
834 : struct spdk_blob_md_descriptor_extent_page *desc_extent;
835 : unsigned int i;
836 1046 : unsigned int cluster_count = 0;
837 : size_t cluster_idx_length;
838 :
839 1046 : if (blob->extent_rle_found) {
840 : /* This means that Extent RLE is present in MD,
841 : * both should never be at the same time. */
842 0 : return -EINVAL;
843 : }
844 :
845 1046 : desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
846 1046 : cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
847 :
848 1046 : if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
849 1046 : (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
850 0 : return -EINVAL;
851 : }
852 :
853 16344 : for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
854 15298 : if (desc_extent->cluster_idx[i] != 0) {
855 6962 : if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters, desc_extent->cluster_idx[i])) {
856 0 : return -EINVAL;
857 : }
858 : }
859 15298 : cluster_count++;
860 : }
861 :
862 1046 : if (cluster_count == 0) {
863 0 : return -EINVAL;
864 : }
865 :
866 : /* When reading extent pages sequentially starting cluster idx should match
867 : * current size of a blob.
868 : * If changed to batch reading, this check shall be removed. */
869 1046 : if (desc_extent->start_cluster_idx != blob->active.num_clusters) {
870 0 : return -EINVAL;
871 : }
872 :
873 1046 : tmp = realloc(blob->active.clusters,
874 1046 : (cluster_count + blob->active.num_clusters) * sizeof(*blob->active.clusters));
875 1046 : if (tmp == NULL) {
876 0 : return -ENOMEM;
877 : }
878 1046 : blob->active.clusters = tmp;
879 1046 : blob->active.cluster_array_size = (cluster_count + blob->active.num_clusters);
880 :
881 16344 : for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
882 15298 : if (desc_extent->cluster_idx[i] != 0) {
883 6962 : blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
884 : desc_extent->cluster_idx[i]);
885 6962 : blob->active.num_allocated_clusters++;
886 8336 : } else if (spdk_blob_is_thin_provisioned(blob)) {
887 8336 : blob->active.clusters[blob->active.num_clusters++] = 0;
888 : } else {
889 0 : return -EINVAL;
890 : }
891 : }
892 1046 : assert(desc_extent->start_cluster_idx + cluster_count == blob->active.num_clusters);
893 1046 : assert(blob->remaining_clusters_in_et >= cluster_count);
894 1046 : blob->remaining_clusters_in_et -= cluster_count;
895 1276 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
896 : int rc;
897 :
898 394 : rc = blob_deserialize_xattr(blob,
899 : (struct spdk_blob_md_descriptor_xattr *) desc, false);
900 394 : if (rc != 0) {
901 0 : return rc;
902 : }
903 882 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
904 : int rc;
905 :
906 882 : rc = blob_deserialize_xattr(blob,
907 : (struct spdk_blob_md_descriptor_xattr *) desc, true);
908 882 : if (rc != 0) {
909 0 : return rc;
910 : }
911 : } else {
912 : /* Unrecognized descriptor type. Do not fail - just continue to the
913 : * next descriptor. If this descriptor is associated with some feature
914 : * defined in a newer version of blobstore, that version of blobstore
915 : * should create and set an associated feature flag to specify if this
916 : * blob can be loaded or not.
917 : */
918 : }
919 :
920 : /* Advance to the next descriptor */
921 8904 : cur_desc += sizeof(*desc) + desc->length;
922 8904 : if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
923 40 : break;
924 : }
925 8864 : desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
926 : }
927 :
928 4572 : return 0;
929 : }
930 :
931 : static bool bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page);
932 :
933 : static int
934 1046 : blob_parse_extent_page(struct spdk_blob_md_page *extent_page, struct spdk_blob *blob)
935 : {
936 1046 : assert(blob != NULL);
937 1046 : assert(blob->state == SPDK_BLOB_STATE_LOADING);
938 :
939 1046 : if (bs_load_cur_extent_page_valid(extent_page) == false) {
940 0 : return -ENOENT;
941 : }
942 :
943 1046 : return blob_parse_page(extent_page, blob);
944 : }
945 :
946 : static int
947 3438 : blob_parse(const struct spdk_blob_md_page *pages, uint32_t page_count,
948 : struct spdk_blob *blob)
949 : {
950 : const struct spdk_blob_md_page *page;
951 : uint32_t i;
952 : int rc;
953 : void *tmp;
954 :
955 3438 : assert(page_count > 0);
956 3438 : assert(pages[0].sequence_num == 0);
957 3438 : assert(blob != NULL);
958 3438 : assert(blob->state == SPDK_BLOB_STATE_LOADING);
959 3438 : assert(blob->active.clusters == NULL);
960 :
961 : /* The blobid provided doesn't match what's in the MD, this can
962 : * happen for example if a bogus blobid is passed in through open.
963 : */
964 3438 : if (blob->id != pages[0].id) {
965 4 : SPDK_ERRLOG("Blobid (0x%" PRIx64 ") doesn't match what's in metadata "
966 : "(0x%" PRIx64 ")\n", blob->id, pages[0].id);
967 4 : return -ENOENT;
968 : }
969 :
970 3434 : tmp = realloc(blob->active.pages, page_count * sizeof(*blob->active.pages));
971 3434 : if (!tmp) {
972 0 : return -ENOMEM;
973 : }
974 3434 : blob->active.pages = tmp;
975 :
976 3434 : blob->active.pages[0] = pages[0].id;
977 :
978 3534 : for (i = 1; i < page_count; i++) {
979 100 : assert(spdk_bit_array_get(blob->bs->used_md_pages, pages[i - 1].next));
980 100 : blob->active.pages[i] = pages[i - 1].next;
981 : }
982 3434 : blob->active.num_pages = page_count;
983 :
984 6960 : for (i = 0; i < page_count; i++) {
985 3534 : page = &pages[i];
986 :
987 3534 : assert(page->id == blob->id);
988 3534 : assert(page->sequence_num == i);
989 :
990 3534 : rc = blob_parse_page(page, blob);
991 3534 : if (rc != 0) {
992 8 : return rc;
993 : }
994 : }
995 :
996 3426 : return 0;
997 : }
998 :
999 : static int
1000 4350 : blob_serialize_add_page(const struct spdk_blob *blob,
1001 : struct spdk_blob_md_page **pages,
1002 : uint32_t *page_count,
1003 : struct spdk_blob_md_page **last_page)
1004 : {
1005 : struct spdk_blob_md_page *page, *tmp_pages;
1006 :
1007 4350 : assert(pages != NULL);
1008 4350 : assert(page_count != NULL);
1009 :
1010 4350 : *last_page = NULL;
1011 4350 : if (*page_count == 0) {
1012 4262 : assert(*pages == NULL);
1013 4262 : *pages = spdk_malloc(SPDK_BS_PAGE_SIZE, 0,
1014 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
1015 4262 : if (*pages == NULL) {
1016 0 : return -ENOMEM;
1017 : }
1018 4262 : *page_count = 1;
1019 : } else {
1020 88 : assert(*pages != NULL);
1021 88 : tmp_pages = spdk_realloc(*pages, SPDK_BS_PAGE_SIZE * (*page_count + 1), 0);
1022 88 : if (tmp_pages == NULL) {
1023 0 : return -ENOMEM;
1024 : }
1025 88 : (*page_count)++;
1026 88 : *pages = tmp_pages;
1027 : }
1028 :
1029 4350 : page = &(*pages)[*page_count - 1];
1030 4350 : memset(page, 0, sizeof(*page));
1031 4350 : page->id = blob->id;
1032 4350 : page->sequence_num = *page_count - 1;
1033 4350 : page->next = SPDK_INVALID_MD_PAGE;
1034 4350 : *last_page = page;
1035 :
1036 4350 : return 0;
1037 : }
1038 :
1039 : /* Transform the in-memory representation 'xattr' into an on-disk xattr descriptor.
1040 : * Update required_sz on both success and failure.
1041 : *
1042 : */
1043 : static int
1044 1771 : blob_serialize_xattr(const struct spdk_xattr *xattr,
1045 : uint8_t *buf, size_t buf_sz,
1046 : size_t *required_sz, bool internal)
1047 : {
1048 : struct spdk_blob_md_descriptor_xattr *desc;
1049 :
1050 1771 : *required_sz = sizeof(struct spdk_blob_md_descriptor_xattr) +
1051 1771 : strlen(xattr->name) +
1052 1771 : xattr->value_len;
1053 :
1054 1771 : if (buf_sz < *required_sz) {
1055 48 : return -1;
1056 : }
1057 :
1058 1723 : desc = (struct spdk_blob_md_descriptor_xattr *)buf;
1059 :
1060 1723 : desc->type = internal ? SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL : SPDK_MD_DESCRIPTOR_TYPE_XATTR;
1061 1723 : desc->length = sizeof(desc->name_length) +
1062 : sizeof(desc->value_length) +
1063 1723 : strlen(xattr->name) +
1064 1723 : xattr->value_len;
1065 1723 : desc->name_length = strlen(xattr->name);
1066 1723 : desc->value_length = xattr->value_len;
1067 :
1068 1723 : memcpy(desc->name, xattr->name, desc->name_length);
1069 1723 : memcpy((void *)((uintptr_t)desc->name + desc->name_length),
1070 1723 : xattr->value,
1071 1723 : desc->value_length);
1072 :
1073 1723 : return 0;
1074 : }
1075 :
1076 : static void
1077 1685 : blob_serialize_extent_table_entry(const struct spdk_blob *blob,
1078 : uint64_t start_ep, uint64_t *next_ep,
1079 : uint8_t **buf, size_t *remaining_sz)
1080 : {
1081 : struct spdk_blob_md_descriptor_extent_table *desc;
1082 : size_t cur_sz;
1083 : uint64_t i, et_idx;
1084 : uint32_t extent_page, ep_len;
1085 :
1086 : /* The buffer must have room for at least num_clusters entry */
1087 1685 : cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc->num_clusters);
1088 1685 : if (*remaining_sz < cur_sz) {
1089 20 : *next_ep = start_ep;
1090 20 : return;
1091 : }
1092 :
1093 1665 : desc = (struct spdk_blob_md_descriptor_extent_table *)*buf;
1094 1665 : desc->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE;
1095 :
1096 1665 : desc->num_clusters = blob->active.num_clusters;
1097 :
1098 1665 : ep_len = 1;
1099 1665 : et_idx = 0;
1100 4236 : for (i = start_ep; i < blob->active.num_extent_pages; i++) {
1101 2571 : if (*remaining_sz < cur_sz + sizeof(desc->extent_page[0])) {
1102 : /* If we ran out of buffer space, return */
1103 0 : break;
1104 : }
1105 :
1106 2571 : extent_page = blob->active.extent_pages[i];
1107 : /* Verify that next extent_page is unallocated */
1108 2571 : if (extent_page == 0 &&
1109 1518 : (i + 1 < blob->active.num_extent_pages && blob->active.extent_pages[i + 1] == 0)) {
1110 1078 : ep_len++;
1111 1078 : continue;
1112 : }
1113 1493 : desc->extent_page[et_idx].page_idx = extent_page;
1114 1493 : desc->extent_page[et_idx].num_pages = ep_len;
1115 1493 : et_idx++;
1116 :
1117 1493 : ep_len = 1;
1118 1493 : cur_sz += sizeof(desc->extent_page[et_idx]);
1119 : }
1120 1665 : *next_ep = i;
1121 :
1122 1665 : desc->length = sizeof(desc->num_clusters) + sizeof(desc->extent_page[0]) * et_idx;
1123 1665 : *remaining_sz -= sizeof(struct spdk_blob_md_descriptor) + desc->length;
1124 1665 : *buf += sizeof(struct spdk_blob_md_descriptor) + desc->length;
1125 : }
1126 :
1127 : static int
1128 1667 : blob_serialize_extent_table(const struct spdk_blob *blob,
1129 : struct spdk_blob_md_page **pages,
1130 : struct spdk_blob_md_page *cur_page,
1131 : uint32_t *page_count, uint8_t **buf,
1132 : size_t *remaining_sz)
1133 : {
1134 1667 : uint64_t last_extent_page;
1135 : int rc;
1136 :
1137 1667 : last_extent_page = 0;
1138 : /* At least single extent table entry has to be always persisted.
1139 : * Such case occurs with num_extent_pages == 0. */
1140 1685 : while (last_extent_page <= blob->active.num_extent_pages) {
1141 1685 : blob_serialize_extent_table_entry(blob, last_extent_page, &last_extent_page, buf,
1142 : remaining_sz);
1143 :
1144 1685 : if (last_extent_page == blob->active.num_extent_pages) {
1145 1667 : break;
1146 : }
1147 :
1148 18 : rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
1149 18 : if (rc < 0) {
1150 0 : return rc;
1151 : }
1152 :
1153 18 : *buf = (uint8_t *)cur_page->descriptors;
1154 18 : *remaining_sz = sizeof(cur_page->descriptors);
1155 : }
1156 :
1157 1667 : return 0;
1158 : }
1159 :
1160 : static void
1161 1737 : blob_serialize_extent_rle(const struct spdk_blob *blob,
1162 : uint64_t start_cluster, uint64_t *next_cluster,
1163 : uint8_t **buf, size_t *buf_sz)
1164 : {
1165 : struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
1166 : size_t cur_sz;
1167 : uint64_t i, extent_idx;
1168 : uint64_t lba, lba_per_cluster, lba_count;
1169 :
1170 : /* The buffer must have room for at least one extent */
1171 1737 : cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc_extent_rle->extents[0]);
1172 1737 : if (*buf_sz < cur_sz) {
1173 18 : *next_cluster = start_cluster;
1174 18 : return;
1175 : }
1176 :
1177 1719 : desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)*buf;
1178 1719 : desc_extent_rle->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE;
1179 :
1180 1719 : lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
1181 : /* Assert for scan-build false positive */
1182 1719 : assert(lba_per_cluster > 0);
1183 :
1184 1719 : lba = blob->active.clusters[start_cluster];
1185 1719 : lba_count = lba_per_cluster;
1186 1719 : extent_idx = 0;
1187 810450 : for (i = start_cluster + 1; i < blob->active.num_clusters; i++) {
1188 808735 : if ((lba + lba_count) == blob->active.clusters[i] && lba != 0) {
1189 : /* Run-length encode sequential non-zero LBA */
1190 7276 : lba_count += lba_per_cluster;
1191 7276 : continue;
1192 801459 : } else if (lba == 0 && blob->active.clusters[i] == 0) {
1193 : /* Run-length encode unallocated clusters */
1194 800266 : lba_count += lba_per_cluster;
1195 800266 : continue;
1196 : }
1197 1193 : desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
1198 1193 : desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
1199 1193 : extent_idx++;
1200 :
1201 1193 : cur_sz += sizeof(desc_extent_rle->extents[extent_idx]);
1202 :
1203 1193 : if (*buf_sz < cur_sz) {
1204 : /* If we ran out of buffer space, return */
1205 4 : *next_cluster = i;
1206 4 : break;
1207 : }
1208 :
1209 1189 : lba = blob->active.clusters[i];
1210 1189 : lba_count = lba_per_cluster;
1211 : }
1212 :
1213 1719 : if (*buf_sz >= cur_sz) {
1214 1715 : desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
1215 1715 : desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
1216 1715 : extent_idx++;
1217 :
1218 1715 : *next_cluster = blob->active.num_clusters;
1219 : }
1220 :
1221 1719 : desc_extent_rle->length = sizeof(desc_extent_rle->extents[0]) * extent_idx;
1222 1719 : *buf_sz -= sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
1223 1719 : *buf += sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
1224 : }
1225 :
1226 : static int
1227 1929 : blob_serialize_extents_rle(const struct spdk_blob *blob,
1228 : struct spdk_blob_md_page **pages,
1229 : struct spdk_blob_md_page *cur_page,
1230 : uint32_t *page_count, uint8_t **buf,
1231 : size_t *remaining_sz)
1232 : {
1233 1929 : uint64_t last_cluster;
1234 : int rc;
1235 :
1236 1929 : last_cluster = 0;
1237 1951 : while (last_cluster < blob->active.num_clusters) {
1238 1737 : blob_serialize_extent_rle(blob, last_cluster, &last_cluster, buf, remaining_sz);
1239 :
1240 1737 : if (last_cluster == blob->active.num_clusters) {
1241 1715 : break;
1242 : }
1243 :
1244 22 : rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
1245 22 : if (rc < 0) {
1246 0 : return rc;
1247 : }
1248 :
1249 22 : *buf = (uint8_t *)cur_page->descriptors;
1250 22 : *remaining_sz = sizeof(cur_page->descriptors);
1251 : }
1252 :
1253 1929 : return 0;
1254 : }
1255 :
1256 : static void
1257 1100 : blob_serialize_extent_page(const struct spdk_blob *blob,
1258 : uint64_t cluster, struct spdk_blob_md_page *page)
1259 : {
1260 : struct spdk_blob_md_descriptor_extent_page *desc_extent;
1261 : uint64_t i, extent_idx;
1262 : uint64_t lba, lba_per_cluster;
1263 1100 : uint64_t start_cluster_idx = (cluster / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
1264 :
1265 1100 : desc_extent = (struct spdk_blob_md_descriptor_extent_page *) page->descriptors;
1266 1100 : desc_extent->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE;
1267 :
1268 1100 : lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
1269 :
1270 1100 : desc_extent->start_cluster_idx = start_cluster_idx;
1271 1100 : extent_idx = 0;
1272 42406 : for (i = start_cluster_idx; i < blob->active.num_clusters; i++) {
1273 41372 : lba = blob->active.clusters[i];
1274 41372 : desc_extent->cluster_idx[extent_idx++] = lba / lba_per_cluster;
1275 41372 : if (extent_idx >= SPDK_EXTENTS_PER_EP) {
1276 66 : break;
1277 : }
1278 : }
1279 1100 : desc_extent->length = sizeof(desc_extent->start_cluster_idx) +
1280 : sizeof(desc_extent->cluster_idx[0]) * extent_idx;
1281 1100 : }
1282 :
1283 : static void
1284 3596 : blob_serialize_flags(const struct spdk_blob *blob,
1285 : uint8_t *buf, size_t *buf_sz)
1286 : {
1287 : struct spdk_blob_md_descriptor_flags *desc;
1288 :
1289 : /*
1290 : * Flags get serialized first, so we should always have room for the flags
1291 : * descriptor.
1292 : */
1293 3596 : assert(*buf_sz >= sizeof(*desc));
1294 :
1295 3596 : desc = (struct spdk_blob_md_descriptor_flags *)buf;
1296 3596 : desc->type = SPDK_MD_DESCRIPTOR_TYPE_FLAGS;
1297 3596 : desc->length = sizeof(*desc) - sizeof(struct spdk_blob_md_descriptor);
1298 3596 : desc->invalid_flags = blob->invalid_flags;
1299 3596 : desc->data_ro_flags = blob->data_ro_flags;
1300 3596 : desc->md_ro_flags = blob->md_ro_flags;
1301 :
1302 3596 : *buf_sz -= sizeof(*desc);
1303 3596 : }
1304 :
1305 : static int
1306 7192 : blob_serialize_xattrs(const struct spdk_blob *blob,
1307 : const struct spdk_xattr_tailq *xattrs, bool internal,
1308 : struct spdk_blob_md_page **pages,
1309 : struct spdk_blob_md_page *cur_page,
1310 : uint32_t *page_count, uint8_t **buf,
1311 : size_t *remaining_sz)
1312 : {
1313 : const struct spdk_xattr *xattr;
1314 : int rc;
1315 :
1316 8915 : TAILQ_FOREACH(xattr, xattrs, link) {
1317 1723 : size_t required_sz = 0;
1318 :
1319 1723 : rc = blob_serialize_xattr(xattr,
1320 : *buf, *remaining_sz,
1321 : &required_sz, internal);
1322 1723 : if (rc < 0) {
1323 : /* Need to add a new page to the chain */
1324 48 : rc = blob_serialize_add_page(blob, pages, page_count,
1325 : &cur_page);
1326 48 : if (rc < 0) {
1327 0 : spdk_free(*pages);
1328 0 : *pages = NULL;
1329 0 : *page_count = 0;
1330 0 : return rc;
1331 : }
1332 :
1333 48 : *buf = (uint8_t *)cur_page->descriptors;
1334 48 : *remaining_sz = sizeof(cur_page->descriptors);
1335 :
1336 : /* Try again */
1337 48 : required_sz = 0;
1338 48 : rc = blob_serialize_xattr(xattr,
1339 : *buf, *remaining_sz,
1340 : &required_sz, internal);
1341 :
1342 48 : if (rc < 0) {
1343 0 : spdk_free(*pages);
1344 0 : *pages = NULL;
1345 0 : *page_count = 0;
1346 0 : return rc;
1347 : }
1348 : }
1349 :
1350 1723 : *remaining_sz -= required_sz;
1351 1723 : *buf += required_sz;
1352 : }
1353 :
1354 7192 : return 0;
1355 : }
1356 :
1357 : static int
1358 3596 : blob_serialize(const struct spdk_blob *blob, struct spdk_blob_md_page **pages,
1359 : uint32_t *page_count)
1360 : {
1361 3596 : struct spdk_blob_md_page *cur_page;
1362 : int rc;
1363 3596 : uint8_t *buf;
1364 3596 : size_t remaining_sz;
1365 :
1366 3596 : assert(pages != NULL);
1367 3596 : assert(page_count != NULL);
1368 3596 : assert(blob != NULL);
1369 3596 : assert(blob->state == SPDK_BLOB_STATE_DIRTY);
1370 :
1371 3596 : *pages = NULL;
1372 3596 : *page_count = 0;
1373 :
1374 : /* A blob always has at least 1 page, even if it has no descriptors */
1375 3596 : rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
1376 3596 : if (rc < 0) {
1377 0 : return rc;
1378 : }
1379 :
1380 3596 : buf = (uint8_t *)cur_page->descriptors;
1381 3596 : remaining_sz = sizeof(cur_page->descriptors);
1382 :
1383 : /* Serialize flags */
1384 3596 : blob_serialize_flags(blob, buf, &remaining_sz);
1385 3596 : buf += sizeof(struct spdk_blob_md_descriptor_flags);
1386 :
1387 : /* Serialize xattrs */
1388 3596 : rc = blob_serialize_xattrs(blob, &blob->xattrs, false,
1389 : pages, cur_page, page_count, &buf, &remaining_sz);
1390 3596 : if (rc < 0) {
1391 0 : return rc;
1392 : }
1393 :
1394 : /* Serialize internal xattrs */
1395 3596 : rc = blob_serialize_xattrs(blob, &blob->xattrs_internal, true,
1396 : pages, cur_page, page_count, &buf, &remaining_sz);
1397 3596 : if (rc < 0) {
1398 0 : return rc;
1399 : }
1400 :
1401 3596 : if (blob->use_extent_table) {
1402 : /* Serialize extent table */
1403 1667 : rc = blob_serialize_extent_table(blob, pages, cur_page, page_count, &buf, &remaining_sz);
1404 : } else {
1405 : /* Serialize extents */
1406 1929 : rc = blob_serialize_extents_rle(blob, pages, cur_page, page_count, &buf, &remaining_sz);
1407 : }
1408 :
1409 3596 : return rc;
1410 : }
1411 :
1412 : struct spdk_blob_load_ctx {
1413 : struct spdk_blob *blob;
1414 :
1415 : struct spdk_blob_md_page *pages;
1416 : uint32_t num_pages;
1417 : uint32_t next_extent_page;
1418 : spdk_bs_sequence_t *seq;
1419 :
1420 : spdk_bs_sequence_cpl cb_fn;
1421 : void *cb_arg;
1422 : };
1423 :
1424 : static uint32_t
1425 19930 : blob_md_page_calc_crc(void *page)
1426 : {
1427 : uint32_t crc;
1428 :
1429 19930 : crc = BLOB_CRC32C_INITIAL;
1430 19930 : crc = spdk_crc32c_update(page, SPDK_BS_PAGE_SIZE - 4, crc);
1431 19930 : crc ^= BLOB_CRC32C_INITIAL;
1432 :
1433 19930 : return crc;
1434 :
1435 : }
1436 :
1437 : static void
1438 3466 : blob_load_final(struct spdk_blob_load_ctx *ctx, int bserrno)
1439 : {
1440 3466 : struct spdk_blob *blob = ctx->blob;
1441 :
1442 3466 : if (bserrno == 0) {
1443 3402 : blob_mark_clean(blob);
1444 : }
1445 :
1446 3466 : ctx->cb_fn(ctx->seq, ctx->cb_arg, bserrno);
1447 :
1448 : /* Free the memory */
1449 3466 : spdk_free(ctx->pages);
1450 3466 : free(ctx);
1451 3466 : }
1452 :
1453 : static void
1454 454 : blob_load_snapshot_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
1455 : {
1456 454 : struct spdk_blob_load_ctx *ctx = cb_arg;
1457 454 : struct spdk_blob *blob = ctx->blob;
1458 :
1459 454 : if (bserrno == 0) {
1460 448 : blob->back_bs_dev = bs_create_blob_bs_dev(snapshot);
1461 448 : if (blob->back_bs_dev == NULL) {
1462 0 : bserrno = -ENOMEM;
1463 : }
1464 : }
1465 454 : if (bserrno != 0) {
1466 6 : SPDK_ERRLOG("Snapshot fail\n");
1467 : }
1468 :
1469 454 : blob_load_final(ctx, bserrno);
1470 454 : }
1471 :
1472 : static void blob_update_clear_method(struct spdk_blob *blob);
1473 :
1474 : static int
1475 120 : blob_load_esnap(struct spdk_blob *blob, void *blob_ctx)
1476 : {
1477 120 : struct spdk_blob_store *bs = blob->bs;
1478 120 : struct spdk_bs_dev *bs_dev = NULL;
1479 120 : const void *esnap_id = NULL;
1480 120 : size_t id_len = 0;
1481 : int rc;
1482 :
1483 120 : if (bs->esnap_bs_dev_create == NULL) {
1484 8 : SPDK_NOTICELOG("blob 0x%" PRIx64 " is an esnap clone but the blobstore was opened "
1485 : "without support for esnap clones\n", blob->id);
1486 8 : return -ENOTSUP;
1487 : }
1488 112 : assert(blob->back_bs_dev == NULL);
1489 :
1490 112 : rc = blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, &esnap_id, &id_len, true);
1491 112 : if (rc != 0) {
1492 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " is an esnap clone but has no esnap ID\n", blob->id);
1493 0 : return -EINVAL;
1494 : }
1495 112 : assert(id_len > 0 && id_len < UINT32_MAX);
1496 :
1497 112 : SPDK_INFOLOG(blob, "Creating external snapshot device\n");
1498 :
1499 112 : rc = bs->esnap_bs_dev_create(bs->esnap_ctx, blob_ctx, blob, esnap_id, (uint32_t)id_len,
1500 : &bs_dev);
1501 112 : if (rc != 0) {
1502 0 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": failed to load back_bs_dev "
1503 : "with error %d\n", blob->id, rc);
1504 0 : return rc;
1505 : }
1506 :
1507 : /*
1508 : * Note: bs_dev might be NULL if the consumer chose to not open the external snapshot.
1509 : * This especially might happen during spdk_bs_load() iteration.
1510 : */
1511 112 : if (bs_dev != NULL) {
1512 112 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": loaded back_bs_dev\n", blob->id);
1513 112 : if ((bs->io_unit_size % bs_dev->blocklen) != 0) {
1514 4 : SPDK_NOTICELOG("blob 0x%" PRIx64 " external snapshot device block size %u "
1515 : "is not compatible with blobstore block size %u\n",
1516 : blob->id, bs_dev->blocklen, bs->io_unit_size);
1517 4 : bs_dev->destroy(bs_dev);
1518 4 : return -EINVAL;
1519 : }
1520 : }
1521 :
1522 108 : blob->back_bs_dev = bs_dev;
1523 108 : blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
1524 :
1525 108 : return 0;
1526 : }
1527 :
1528 : static void
1529 3420 : blob_load_backing_dev(spdk_bs_sequence_t *seq, void *cb_arg)
1530 : {
1531 3420 : struct spdk_blob_load_ctx *ctx = cb_arg;
1532 3420 : struct spdk_blob *blob = ctx->blob;
1533 3420 : const void *value;
1534 3420 : size_t len;
1535 : int rc;
1536 :
1537 3420 : if (blob_is_esnap_clone(blob)) {
1538 120 : rc = blob_load_esnap(blob, seq->cpl.u.blob_handle.esnap_ctx);
1539 120 : blob_load_final(ctx, rc);
1540 120 : return;
1541 : }
1542 :
1543 3300 : if (spdk_blob_is_thin_provisioned(blob)) {
1544 1030 : rc = blob_get_xattr_value(blob, BLOB_SNAPSHOT, &value, &len, true);
1545 1030 : if (rc == 0) {
1546 454 : if (len != sizeof(spdk_blob_id)) {
1547 0 : blob_load_final(ctx, -EINVAL);
1548 0 : return;
1549 : }
1550 : /* open snapshot blob and continue in the callback function */
1551 454 : blob->parent_id = *(spdk_blob_id *)value;
1552 454 : spdk_bs_open_blob(blob->bs, blob->parent_id,
1553 : blob_load_snapshot_cpl, ctx);
1554 454 : return;
1555 : } else {
1556 : /* add zeroes_dev for thin provisioned blob */
1557 576 : blob->back_bs_dev = bs_create_zeroes_dev();
1558 : }
1559 : } else {
1560 : /* standard blob */
1561 2270 : blob->back_bs_dev = NULL;
1562 : }
1563 2846 : blob_load_final(ctx, 0);
1564 : }
1565 :
1566 : static void
1567 2816 : blob_load_cpl_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
1568 : {
1569 2816 : struct spdk_blob_load_ctx *ctx = cb_arg;
1570 2816 : struct spdk_blob *blob = ctx->blob;
1571 : struct spdk_blob_md_page *page;
1572 : uint64_t i;
1573 : uint32_t crc;
1574 : uint64_t lba;
1575 : void *tmp;
1576 : uint64_t sz;
1577 :
1578 2816 : if (bserrno) {
1579 6 : SPDK_ERRLOG("Extent page read failed: %d\n", bserrno);
1580 6 : blob_load_final(ctx, bserrno);
1581 6 : return;
1582 : }
1583 :
1584 2810 : if (ctx->pages == NULL) {
1585 : /* First iteration of this function, allocate buffer for single EXTENT_PAGE */
1586 1764 : ctx->pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
1587 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
1588 1764 : if (!ctx->pages) {
1589 0 : blob_load_final(ctx, -ENOMEM);
1590 0 : return;
1591 : }
1592 1764 : ctx->num_pages = 1;
1593 1764 : ctx->next_extent_page = 0;
1594 : } else {
1595 1046 : page = &ctx->pages[0];
1596 1046 : crc = blob_md_page_calc_crc(page);
1597 1046 : if (crc != page->crc) {
1598 0 : blob_load_final(ctx, -EINVAL);
1599 0 : return;
1600 : }
1601 :
1602 1046 : if (page->next != SPDK_INVALID_MD_PAGE) {
1603 0 : blob_load_final(ctx, -EINVAL);
1604 0 : return;
1605 : }
1606 :
1607 1046 : bserrno = blob_parse_extent_page(page, blob);
1608 1046 : if (bserrno) {
1609 0 : blob_load_final(ctx, bserrno);
1610 0 : return;
1611 : }
1612 : }
1613 :
1614 3232 : for (i = ctx->next_extent_page; i < blob->active.num_extent_pages; i++) {
1615 1474 : if (blob->active.extent_pages[i] != 0) {
1616 : /* Extent page was allocated, read and parse it. */
1617 1052 : lba = bs_md_page_to_lba(blob->bs, blob->active.extent_pages[i]);
1618 1052 : ctx->next_extent_page = i + 1;
1619 :
1620 1052 : bs_sequence_read_dev(seq, &ctx->pages[0], lba,
1621 1052 : bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
1622 : blob_load_cpl_extents_cpl, ctx);
1623 1052 : return;
1624 : } else {
1625 : /* Thin provisioned blobs can point to unallocated extent pages.
1626 : * In this case blob size should be increased by up to the amount left in remaining_clusters_in_et. */
1627 :
1628 422 : sz = spdk_min(blob->remaining_clusters_in_et, SPDK_EXTENTS_PER_EP);
1629 422 : blob->active.num_clusters += sz;
1630 422 : blob->remaining_clusters_in_et -= sz;
1631 :
1632 422 : assert(spdk_blob_is_thin_provisioned(blob));
1633 422 : assert(i + 1 < blob->active.num_extent_pages || blob->remaining_clusters_in_et == 0);
1634 :
1635 422 : tmp = realloc(blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
1636 422 : if (tmp == NULL) {
1637 0 : blob_load_final(ctx, -ENOMEM);
1638 0 : return;
1639 : }
1640 422 : memset(tmp + sizeof(*blob->active.clusters) * blob->active.cluster_array_size, 0,
1641 422 : sizeof(*blob->active.clusters) * (blob->active.num_clusters - blob->active.cluster_array_size));
1642 422 : blob->active.clusters = tmp;
1643 422 : blob->active.cluster_array_size = blob->active.num_clusters;
1644 : }
1645 : }
1646 :
1647 1758 : blob_load_backing_dev(seq, ctx);
1648 : }
1649 :
1650 : static void
1651 3566 : blob_load_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
1652 : {
1653 3566 : struct spdk_blob_load_ctx *ctx = cb_arg;
1654 3566 : struct spdk_blob *blob = ctx->blob;
1655 : struct spdk_blob_md_page *page;
1656 : int rc;
1657 : uint32_t crc;
1658 : uint32_t current_page;
1659 :
1660 3566 : if (ctx->num_pages == 1) {
1661 3466 : current_page = bs_blobid_to_page(blob->id);
1662 : } else {
1663 100 : assert(ctx->num_pages != 0);
1664 100 : page = &ctx->pages[ctx->num_pages - 2];
1665 100 : current_page = page->next;
1666 : }
1667 :
1668 3566 : if (bserrno) {
1669 20 : SPDK_ERRLOG("Metadata page %d read failed for blobid 0x%" PRIx64 ": %d\n",
1670 : current_page, blob->id, bserrno);
1671 20 : blob_load_final(ctx, bserrno);
1672 20 : return;
1673 : }
1674 :
1675 3546 : page = &ctx->pages[ctx->num_pages - 1];
1676 3546 : crc = blob_md_page_calc_crc(page);
1677 3546 : if (crc != page->crc) {
1678 8 : SPDK_ERRLOG("Metadata page %d crc mismatch for blobid 0x%" PRIx64 "\n",
1679 : current_page, blob->id);
1680 8 : blob_load_final(ctx, -EINVAL);
1681 8 : return;
1682 : }
1683 :
1684 3538 : if (page->next != SPDK_INVALID_MD_PAGE) {
1685 : struct spdk_blob_md_page *tmp_pages;
1686 100 : uint32_t next_page = page->next;
1687 100 : uint64_t next_lba = bs_md_page_to_lba(blob->bs, next_page);
1688 :
1689 : /* Read the next page */
1690 100 : tmp_pages = spdk_realloc(ctx->pages, (sizeof(*page) * (ctx->num_pages + 1)), 0);
1691 100 : if (tmp_pages == NULL) {
1692 0 : blob_load_final(ctx, -ENOMEM);
1693 0 : return;
1694 : }
1695 100 : ctx->num_pages++;
1696 100 : ctx->pages = tmp_pages;
1697 :
1698 100 : bs_sequence_read_dev(seq, &ctx->pages[ctx->num_pages - 1],
1699 : next_lba,
1700 100 : bs_byte_to_lba(blob->bs, sizeof(*page)),
1701 : blob_load_cpl, ctx);
1702 100 : return;
1703 : }
1704 :
1705 : /* Parse the pages */
1706 3438 : rc = blob_parse(ctx->pages, ctx->num_pages, blob);
1707 3438 : if (rc) {
1708 12 : blob_load_final(ctx, rc);
1709 12 : return;
1710 : }
1711 :
1712 3426 : if (blob->extent_table_found == true) {
1713 : /* If EXTENT_TABLE was found, that means support for it should be enabled. */
1714 1764 : assert(blob->extent_rle_found == false);
1715 1764 : blob->use_extent_table = true;
1716 : } else {
1717 : /* If EXTENT_RLE or no extent_* descriptor was found disable support
1718 : * for extent table. No extent_* descriptors means that blob has length of 0
1719 : * and no extent_rle descriptors were persisted for it.
1720 : * EXTENT_TABLE if used, is always present in metadata regardless of length. */
1721 1662 : blob->use_extent_table = false;
1722 : }
1723 :
1724 : /* Check the clear_method stored in metadata vs what may have been passed
1725 : * via spdk_bs_open_blob_ext() and update accordingly.
1726 : */
1727 3426 : blob_update_clear_method(blob);
1728 :
1729 3426 : spdk_free(ctx->pages);
1730 3426 : ctx->pages = NULL;
1731 :
1732 3426 : if (blob->extent_table_found) {
1733 1764 : blob_load_cpl_extents_cpl(seq, ctx, 0);
1734 : } else {
1735 1662 : blob_load_backing_dev(seq, ctx);
1736 : }
1737 : }
1738 :
1739 : /* Load a blob from disk given a blobid */
1740 : static void
1741 3466 : blob_load(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
1742 : spdk_bs_sequence_cpl cb_fn, void *cb_arg)
1743 : {
1744 : struct spdk_blob_load_ctx *ctx;
1745 : struct spdk_blob_store *bs;
1746 : uint32_t page_num;
1747 : uint64_t lba;
1748 :
1749 3466 : blob_verify_md_op(blob);
1750 :
1751 3466 : bs = blob->bs;
1752 :
1753 3466 : ctx = calloc(1, sizeof(*ctx));
1754 3466 : if (!ctx) {
1755 0 : cb_fn(seq, cb_arg, -ENOMEM);
1756 0 : return;
1757 : }
1758 :
1759 3466 : ctx->blob = blob;
1760 3466 : ctx->pages = spdk_realloc(ctx->pages, SPDK_BS_PAGE_SIZE, 0);
1761 3466 : if (!ctx->pages) {
1762 0 : free(ctx);
1763 0 : cb_fn(seq, cb_arg, -ENOMEM);
1764 0 : return;
1765 : }
1766 3466 : ctx->num_pages = 1;
1767 3466 : ctx->cb_fn = cb_fn;
1768 3466 : ctx->cb_arg = cb_arg;
1769 3466 : ctx->seq = seq;
1770 :
1771 3466 : page_num = bs_blobid_to_page(blob->id);
1772 3466 : lba = bs_md_page_to_lba(blob->bs, page_num);
1773 :
1774 3466 : blob->state = SPDK_BLOB_STATE_LOADING;
1775 :
1776 3466 : bs_sequence_read_dev(seq, &ctx->pages[0], lba,
1777 3466 : bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE),
1778 : blob_load_cpl, ctx);
1779 : }
1780 :
1781 : struct spdk_blob_persist_ctx {
1782 : struct spdk_blob *blob;
1783 :
1784 : struct spdk_blob_md_page *pages;
1785 : uint32_t next_extent_page;
1786 : struct spdk_blob_md_page *extent_page;
1787 :
1788 : spdk_bs_sequence_t *seq;
1789 : spdk_bs_sequence_cpl cb_fn;
1790 : void *cb_arg;
1791 : TAILQ_ENTRY(spdk_blob_persist_ctx) link;
1792 : };
1793 :
1794 : static void
1795 1262 : bs_batch_clear_dev(struct spdk_blob *blob, spdk_bs_batch_t *batch, uint64_t lba,
1796 : uint64_t lba_count)
1797 : {
1798 1262 : switch (blob->clear_method) {
1799 1262 : case BLOB_CLEAR_WITH_DEFAULT:
1800 : case BLOB_CLEAR_WITH_UNMAP:
1801 1262 : bs_batch_unmap_dev(batch, lba, lba_count);
1802 1262 : break;
1803 0 : case BLOB_CLEAR_WITH_WRITE_ZEROES:
1804 0 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
1805 0 : break;
1806 0 : case BLOB_CLEAR_WITH_NONE:
1807 : default:
1808 0 : break;
1809 : }
1810 1262 : }
1811 :
1812 : static int
1813 1152 : bs_super_validate(struct spdk_bs_super_block *super, struct spdk_blob_store *bs)
1814 : {
1815 : uint32_t crc;
1816 : static const char zeros[SPDK_BLOBSTORE_TYPE_LENGTH];
1817 :
1818 1152 : if (super->version > SPDK_BS_VERSION ||
1819 1148 : super->version < SPDK_BS_INITIAL_VERSION) {
1820 8 : return -EILSEQ;
1821 : }
1822 :
1823 1144 : if (memcmp(super->signature, SPDK_BS_SUPER_BLOCK_SIG,
1824 : sizeof(super->signature)) != 0) {
1825 0 : return -EILSEQ;
1826 : }
1827 :
1828 1144 : crc = blob_md_page_calc_crc(super);
1829 1144 : if (crc != super->crc) {
1830 4 : return -EILSEQ;
1831 : }
1832 :
1833 1140 : if (memcmp(&bs->bstype, &super->bstype, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
1834 1126 : SPDK_DEBUGLOG(blob, "Bstype matched - loading blobstore\n");
1835 14 : } else if (memcmp(&bs->bstype, zeros, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
1836 6 : SPDK_DEBUGLOG(blob, "Bstype wildcard used - loading blobstore regardless bstype\n");
1837 : } else {
1838 8 : SPDK_DEBUGLOG(blob, "Unexpected bstype\n");
1839 8 : SPDK_LOGDUMP(blob, "Expected:", bs->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
1840 8 : SPDK_LOGDUMP(blob, "Found:", super->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
1841 8 : return -ENXIO;
1842 : }
1843 :
1844 1132 : if (super->size > bs->dev->blockcnt * bs->dev->blocklen) {
1845 8 : SPDK_NOTICELOG("Size mismatch, dev size: %" PRIu64 ", blobstore size: %" PRIu64 "\n",
1846 : bs->dev->blockcnt * bs->dev->blocklen, super->size);
1847 8 : return -EILSEQ;
1848 : }
1849 :
1850 1124 : return 0;
1851 : }
1852 :
1853 : static void bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
1854 : spdk_bs_sequence_cpl cb_fn, void *cb_arg);
1855 :
1856 : static void
1857 5092 : blob_persist_complete_cb(void *arg)
1858 : {
1859 5092 : struct spdk_blob_persist_ctx *ctx = arg;
1860 :
1861 : /* Call user callback */
1862 5092 : ctx->cb_fn(ctx->seq, ctx->cb_arg, 0);
1863 :
1864 : /* Free the memory */
1865 5092 : spdk_free(ctx->pages);
1866 5092 : free(ctx);
1867 5092 : }
1868 :
1869 : static void blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
1870 :
1871 : static void
1872 5092 : blob_persist_complete(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx, int bserrno)
1873 : {
1874 : struct spdk_blob_persist_ctx *next_persist, *tmp;
1875 5092 : struct spdk_blob *blob = ctx->blob;
1876 :
1877 5092 : if (bserrno == 0) {
1878 5040 : blob_mark_clean(blob);
1879 : }
1880 :
1881 5092 : assert(ctx == TAILQ_FIRST(&blob->persists_to_complete));
1882 :
1883 : /* Complete all persists that were pending when the current persist started */
1884 10184 : TAILQ_FOREACH_SAFE(next_persist, &blob->persists_to_complete, link, tmp) {
1885 5092 : TAILQ_REMOVE(&blob->persists_to_complete, next_persist, link);
1886 5092 : spdk_thread_send_msg(spdk_get_thread(), blob_persist_complete_cb, next_persist);
1887 : }
1888 :
1889 5092 : if (TAILQ_EMPTY(&blob->pending_persists)) {
1890 5069 : return;
1891 : }
1892 :
1893 : /* Queue up all pending persists for completion and start blob persist with first one */
1894 23 : TAILQ_SWAP(&blob->persists_to_complete, &blob->pending_persists, spdk_blob_persist_ctx, link);
1895 23 : next_persist = TAILQ_FIRST(&blob->persists_to_complete);
1896 :
1897 23 : blob->state = SPDK_BLOB_STATE_DIRTY;
1898 23 : bs_mark_dirty(seq, blob->bs, blob_persist_start, next_persist);
1899 : }
1900 :
1901 : static void
1902 5040 : blob_persist_clear_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
1903 : {
1904 5040 : struct spdk_blob_persist_ctx *ctx = cb_arg;
1905 5040 : struct spdk_blob *blob = ctx->blob;
1906 5040 : struct spdk_blob_store *bs = blob->bs;
1907 : size_t i;
1908 :
1909 5040 : if (bserrno != 0) {
1910 0 : blob_persist_complete(seq, ctx, bserrno);
1911 0 : return;
1912 : }
1913 :
1914 5040 : spdk_spin_lock(&bs->used_lock);
1915 :
1916 : /* Release all extent_pages that were truncated */
1917 6774 : for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
1918 : /* Nothing to release if it was not allocated */
1919 1734 : if (blob->active.extent_pages[i] != 0) {
1920 626 : bs_release_md_page(bs, blob->active.extent_pages[i]);
1921 : }
1922 : }
1923 :
1924 5040 : spdk_spin_unlock(&bs->used_lock);
1925 :
1926 5040 : if (blob->active.num_extent_pages == 0) {
1927 3637 : free(blob->active.extent_pages);
1928 3637 : blob->active.extent_pages = NULL;
1929 3637 : blob->active.extent_pages_array_size = 0;
1930 1403 : } else if (blob->active.num_extent_pages != blob->active.extent_pages_array_size) {
1931 : #ifndef __clang_analyzer__
1932 : void *tmp;
1933 :
1934 : /* scan-build really can't figure reallocs, workaround it */
1935 2 : tmp = realloc(blob->active.extent_pages, sizeof(uint32_t) * blob->active.num_extent_pages);
1936 2 : assert(tmp != NULL);
1937 2 : blob->active.extent_pages = tmp;
1938 : #endif
1939 2 : blob->active.extent_pages_array_size = blob->active.num_extent_pages;
1940 : }
1941 :
1942 5040 : blob_persist_complete(seq, ctx, bserrno);
1943 : }
1944 :
1945 : static void
1946 5040 : blob_persist_clear_extents(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
1947 : {
1948 5040 : struct spdk_blob *blob = ctx->blob;
1949 5040 : struct spdk_blob_store *bs = blob->bs;
1950 : size_t i;
1951 : uint64_t lba;
1952 : uint64_t lba_count;
1953 : spdk_bs_batch_t *batch;
1954 :
1955 5040 : batch = bs_sequence_to_batch(seq, blob_persist_clear_extents_cpl, ctx);
1956 5040 : lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
1957 :
1958 : /* Clear all extent_pages that were truncated */
1959 6774 : for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
1960 : /* Nothing to clear if it was not allocated */
1961 1734 : if (blob->active.extent_pages[i] != 0) {
1962 626 : lba = bs_md_page_to_lba(bs, blob->active.extent_pages[i]);
1963 626 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
1964 : }
1965 : }
1966 :
1967 5040 : bs_batch_close(batch);
1968 5040 : }
1969 :
1970 : static void
1971 5040 : blob_persist_clear_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
1972 : {
1973 5040 : struct spdk_blob_persist_ctx *ctx = cb_arg;
1974 5040 : struct spdk_blob *blob = ctx->blob;
1975 5040 : struct spdk_blob_store *bs = blob->bs;
1976 : size_t i;
1977 :
1978 5040 : if (bserrno != 0) {
1979 0 : blob_persist_complete(seq, ctx, bserrno);
1980 0 : return;
1981 : }
1982 :
1983 5040 : spdk_spin_lock(&bs->used_lock);
1984 : /* Release all clusters that were truncated */
1985 1074047 : for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
1986 1069007 : uint32_t cluster_num = bs_lba_to_cluster(bs, blob->active.clusters[i]);
1987 :
1988 : /* Nothing to release if it was not allocated */
1989 1069007 : if (blob->active.clusters[i] != 0) {
1990 2343 : bs_release_cluster(bs, cluster_num);
1991 : }
1992 : }
1993 5040 : spdk_spin_unlock(&bs->used_lock);
1994 :
1995 5040 : if (blob->active.num_clusters == 0) {
1996 1940 : free(blob->active.clusters);
1997 1940 : blob->active.clusters = NULL;
1998 1940 : blob->active.cluster_array_size = 0;
1999 3100 : } else if (blob->active.num_clusters != blob->active.cluster_array_size) {
2000 : #ifndef __clang_analyzer__
2001 : void *tmp;
2002 :
2003 : /* scan-build really can't figure reallocs, workaround it */
2004 14 : tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * blob->active.num_clusters);
2005 14 : assert(tmp != NULL);
2006 14 : blob->active.clusters = tmp;
2007 :
2008 : #endif
2009 14 : blob->active.cluster_array_size = blob->active.num_clusters;
2010 : }
2011 :
2012 : /* Move on to clearing extent pages */
2013 5040 : blob_persist_clear_extents(seq, ctx);
2014 : }
2015 :
2016 : static void
2017 5040 : blob_persist_clear_clusters(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
2018 : {
2019 5040 : struct spdk_blob *blob = ctx->blob;
2020 5040 : struct spdk_blob_store *bs = blob->bs;
2021 : spdk_bs_batch_t *batch;
2022 : size_t i;
2023 : uint64_t lba;
2024 : uint64_t lba_count;
2025 :
2026 : /* Clusters don't move around in blobs. The list shrinks or grows
2027 : * at the end, but no changes ever occur in the middle of the list.
2028 : */
2029 :
2030 5040 : batch = bs_sequence_to_batch(seq, blob_persist_clear_clusters_cpl, ctx);
2031 :
2032 : /* Clear all clusters that were truncated */
2033 5040 : lba = 0;
2034 5040 : lba_count = 0;
2035 1074047 : for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
2036 1069007 : uint64_t next_lba = blob->active.clusters[i];
2037 1069007 : uint64_t next_lba_count = bs_cluster_to_lba(bs, 1);
2038 :
2039 1069007 : if (next_lba > 0 && (lba + lba_count) == next_lba) {
2040 : /* This cluster is contiguous with the previous one. */
2041 1085 : lba_count += next_lba_count;
2042 1085 : continue;
2043 1067922 : } else if (next_lba == 0) {
2044 1066664 : continue;
2045 : }
2046 :
2047 : /* This cluster is not contiguous with the previous one. */
2048 :
2049 : /* If a run of LBAs previously existing, clear them now */
2050 1258 : if (lba_count > 0) {
2051 36 : bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
2052 : }
2053 :
2054 : /* Start building the next batch */
2055 1258 : lba = next_lba;
2056 1258 : if (next_lba > 0) {
2057 1258 : lba_count = next_lba_count;
2058 : } else {
2059 0 : lba_count = 0;
2060 : }
2061 : }
2062 :
2063 : /* If we ended with a contiguous set of LBAs, clear them now */
2064 5040 : if (lba_count > 0) {
2065 1222 : bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
2066 : }
2067 :
2068 5040 : bs_batch_close(batch);
2069 5040 : }
2070 :
2071 : static void
2072 5044 : blob_persist_zero_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2073 : {
2074 5044 : struct spdk_blob_persist_ctx *ctx = cb_arg;
2075 5044 : struct spdk_blob *blob = ctx->blob;
2076 5044 : struct spdk_blob_store *bs = blob->bs;
2077 : size_t i;
2078 :
2079 5044 : if (bserrno != 0) {
2080 4 : blob_persist_complete(seq, ctx, bserrno);
2081 4 : return;
2082 : }
2083 :
2084 5040 : spdk_spin_lock(&bs->used_lock);
2085 :
2086 : /* This loop starts at 1 because the first page is special and handled
2087 : * below. The pages (except the first) are never written in place,
2088 : * so any pages in the clean list must be zeroed.
2089 : */
2090 5108 : for (i = 1; i < blob->clean.num_pages; i++) {
2091 68 : bs_release_md_page(bs, blob->clean.pages[i]);
2092 : }
2093 :
2094 5040 : if (blob->active.num_pages == 0) {
2095 : uint32_t page_num;
2096 :
2097 1484 : page_num = bs_blobid_to_page(blob->id);
2098 1484 : bs_release_md_page(bs, page_num);
2099 : }
2100 :
2101 5040 : spdk_spin_unlock(&bs->used_lock);
2102 :
2103 : /* Move on to clearing clusters */
2104 5040 : blob_persist_clear_clusters(seq, ctx);
2105 : }
2106 :
2107 : static void
2108 5084 : blob_persist_zero_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2109 : {
2110 5084 : struct spdk_blob_persist_ctx *ctx = cb_arg;
2111 5084 : struct spdk_blob *blob = ctx->blob;
2112 5084 : struct spdk_blob_store *bs = blob->bs;
2113 : uint64_t lba;
2114 : uint64_t lba_count;
2115 : spdk_bs_batch_t *batch;
2116 : size_t i;
2117 :
2118 5084 : if (bserrno != 0) {
2119 40 : blob_persist_complete(seq, ctx, bserrno);
2120 40 : return;
2121 : }
2122 :
2123 5044 : batch = bs_sequence_to_batch(seq, blob_persist_zero_pages_cpl, ctx);
2124 :
2125 5044 : lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
2126 :
2127 : /* This loop starts at 1 because the first page is special and handled
2128 : * below. The pages (except the first) are never written in place,
2129 : * so any pages in the clean list must be zeroed.
2130 : */
2131 5112 : for (i = 1; i < blob->clean.num_pages; i++) {
2132 68 : lba = bs_md_page_to_lba(bs, blob->clean.pages[i]);
2133 :
2134 68 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
2135 : }
2136 :
2137 : /* The first page will only be zeroed if this is a delete. */
2138 5044 : if (blob->active.num_pages == 0) {
2139 : uint32_t page_num;
2140 :
2141 : /* The first page in the metadata goes where the blobid indicates */
2142 1488 : page_num = bs_blobid_to_page(blob->id);
2143 1488 : lba = bs_md_page_to_lba(bs, page_num);
2144 :
2145 1488 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
2146 : }
2147 :
2148 5044 : bs_batch_close(batch);
2149 : }
2150 :
2151 : static void
2152 3596 : blob_persist_write_page_root(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2153 : {
2154 3596 : struct spdk_blob_persist_ctx *ctx = cb_arg;
2155 3596 : struct spdk_blob *blob = ctx->blob;
2156 3596 : struct spdk_blob_store *bs = blob->bs;
2157 : uint64_t lba;
2158 : uint32_t lba_count;
2159 : struct spdk_blob_md_page *page;
2160 :
2161 3596 : if (bserrno != 0) {
2162 0 : blob_persist_complete(seq, ctx, bserrno);
2163 0 : return;
2164 : }
2165 :
2166 3596 : if (blob->active.num_pages == 0) {
2167 : /* Move on to the next step */
2168 0 : blob_persist_zero_pages(seq, ctx, 0);
2169 0 : return;
2170 : }
2171 :
2172 3596 : lba_count = bs_byte_to_lba(bs, sizeof(*page));
2173 :
2174 3596 : page = &ctx->pages[0];
2175 : /* The first page in the metadata goes where the blobid indicates */
2176 3596 : lba = bs_md_page_to_lba(bs, bs_blobid_to_page(blob->id));
2177 :
2178 3596 : bs_sequence_write_dev(seq, page, lba, lba_count,
2179 : blob_persist_zero_pages, ctx);
2180 : }
2181 :
2182 : static void
2183 3596 : blob_persist_write_page_chain(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
2184 : {
2185 3596 : struct spdk_blob *blob = ctx->blob;
2186 3596 : struct spdk_blob_store *bs = blob->bs;
2187 : uint64_t lba;
2188 : uint32_t lba_count;
2189 : struct spdk_blob_md_page *page;
2190 : spdk_bs_batch_t *batch;
2191 : size_t i;
2192 :
2193 : /* Clusters don't move around in blobs. The list shrinks or grows
2194 : * at the end, but no changes ever occur in the middle of the list.
2195 : */
2196 :
2197 3596 : lba_count = bs_byte_to_lba(bs, sizeof(*page));
2198 :
2199 3596 : batch = bs_sequence_to_batch(seq, blob_persist_write_page_root, ctx);
2200 :
2201 : /* This starts at 1. The root page is not written until
2202 : * all of the others are finished
2203 : */
2204 3684 : for (i = 1; i < blob->active.num_pages; i++) {
2205 88 : page = &ctx->pages[i];
2206 88 : assert(page->sequence_num == i);
2207 :
2208 88 : lba = bs_md_page_to_lba(bs, blob->active.pages[i]);
2209 :
2210 88 : bs_batch_write_dev(batch, page, lba, lba_count);
2211 : }
2212 :
2213 3596 : bs_batch_close(batch);
2214 3596 : }
2215 :
2216 : static int
2217 3568 : blob_resize(struct spdk_blob *blob, uint64_t sz)
2218 : {
2219 : uint64_t i;
2220 : uint64_t *tmp;
2221 3568 : uint64_t cluster;
2222 3568 : uint32_t lfmd; /* lowest free md page */
2223 : uint64_t num_clusters;
2224 : uint32_t *ep_tmp;
2225 3568 : uint64_t new_num_ep = 0, current_num_ep = 0;
2226 : struct spdk_blob_store *bs;
2227 : int rc;
2228 :
2229 3568 : bs = blob->bs;
2230 :
2231 3568 : blob_verify_md_op(blob);
2232 :
2233 3568 : if (blob->active.num_clusters == sz) {
2234 456 : return 0;
2235 : }
2236 :
2237 3112 : if (blob->active.num_clusters < blob->active.cluster_array_size) {
2238 : /* If this blob was resized to be larger, then smaller, then
2239 : * larger without syncing, then the cluster array already
2240 : * contains spare assigned clusters we can use.
2241 : */
2242 0 : num_clusters = spdk_min(blob->active.cluster_array_size,
2243 : sz);
2244 : } else {
2245 3112 : num_clusters = blob->active.num_clusters;
2246 : }
2247 :
2248 3112 : if (blob->use_extent_table) {
2249 : /* Round up since every cluster beyond current Extent Table size,
2250 : * requires new extent page. */
2251 1578 : new_num_ep = spdk_divide_round_up(sz, SPDK_EXTENTS_PER_EP);
2252 1578 : current_num_ep = spdk_divide_round_up(num_clusters, SPDK_EXTENTS_PER_EP);
2253 : }
2254 :
2255 3112 : assert(!spdk_spin_held(&bs->used_lock));
2256 :
2257 : /* Check first that we have enough clusters and md pages before we start claiming them.
2258 : * bs->used_lock is held to ensure that clusters we think are free are still free when we go
2259 : * to claim them later in this function.
2260 : */
2261 3112 : if (sz > num_clusters && spdk_blob_is_thin_provisioned(blob) == false) {
2262 1302 : spdk_spin_lock(&bs->used_lock);
2263 1302 : if ((sz - num_clusters) > bs->num_free_clusters) {
2264 8 : rc = -ENOSPC;
2265 8 : goto out;
2266 : }
2267 1294 : lfmd = 0;
2268 1938 : for (i = current_num_ep; i < new_num_ep ; i++) {
2269 644 : lfmd = spdk_bit_array_find_first_clear(blob->bs->used_md_pages, lfmd);
2270 644 : if (lfmd == UINT32_MAX) {
2271 : /* No more free md pages. Cannot satisfy the request */
2272 0 : rc = -ENOSPC;
2273 0 : goto out;
2274 : }
2275 : }
2276 : }
2277 :
2278 3104 : if (sz > num_clusters) {
2279 : /* Expand the cluster array if necessary.
2280 : * We only shrink the array when persisting.
2281 : */
2282 1702 : tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * sz);
2283 1702 : if (sz > 0 && tmp == NULL) {
2284 0 : rc = -ENOMEM;
2285 0 : goto out;
2286 : }
2287 1702 : memset(tmp + blob->active.cluster_array_size, 0,
2288 1702 : sizeof(*blob->active.clusters) * (sz - blob->active.cluster_array_size));
2289 1702 : blob->active.clusters = tmp;
2290 1702 : blob->active.cluster_array_size = sz;
2291 :
2292 : /* Expand the extents table, only if enough clusters were added */
2293 1702 : if (new_num_ep > current_num_ep && blob->use_extent_table) {
2294 840 : ep_tmp = realloc(blob->active.extent_pages, sizeof(*blob->active.extent_pages) * new_num_ep);
2295 840 : if (new_num_ep > 0 && ep_tmp == NULL) {
2296 0 : rc = -ENOMEM;
2297 0 : goto out;
2298 : }
2299 840 : memset(ep_tmp + blob->active.extent_pages_array_size, 0,
2300 840 : sizeof(*blob->active.extent_pages) * (new_num_ep - blob->active.extent_pages_array_size));
2301 840 : blob->active.extent_pages = ep_tmp;
2302 840 : blob->active.extent_pages_array_size = new_num_ep;
2303 : }
2304 : }
2305 :
2306 3104 : blob->state = SPDK_BLOB_STATE_DIRTY;
2307 :
2308 3104 : if (spdk_blob_is_thin_provisioned(blob) == false) {
2309 2428 : cluster = 0;
2310 2428 : lfmd = 0;
2311 9832 : for (i = num_clusters; i < sz; i++) {
2312 7404 : bs_allocate_cluster(blob, i, &cluster, &lfmd, true);
2313 : /* Do not increment lfmd here. lfmd will get updated
2314 : * to the md_page allocated (if any) when a new extent
2315 : * page is needed. Just pass that value again,
2316 : * bs_allocate_cluster will just start at that index
2317 : * to find the next free md_page when needed.
2318 : */
2319 : }
2320 : }
2321 :
2322 : /* If we are shrinking the blob, we must adjust num_allocated_clusters */
2323 1072151 : for (i = sz; i < num_clusters; i++) {
2324 1069047 : if (blob->active.clusters[i] != 0) {
2325 2343 : blob->active.num_allocated_clusters--;
2326 : }
2327 : }
2328 :
2329 3104 : blob->active.num_clusters = sz;
2330 3104 : blob->active.num_extent_pages = new_num_ep;
2331 :
2332 3104 : rc = 0;
2333 3112 : out:
2334 3112 : if (spdk_spin_held(&bs->used_lock)) {
2335 1302 : spdk_spin_unlock(&bs->used_lock);
2336 : }
2337 :
2338 3112 : return rc;
2339 : }
2340 :
2341 : static void
2342 3596 : blob_persist_generate_new_md(struct spdk_blob_persist_ctx *ctx)
2343 : {
2344 3596 : spdk_bs_sequence_t *seq = ctx->seq;
2345 3596 : struct spdk_blob *blob = ctx->blob;
2346 3596 : struct spdk_blob_store *bs = blob->bs;
2347 : uint64_t i;
2348 : uint32_t page_num;
2349 : void *tmp;
2350 : int rc;
2351 :
2352 : /* Generate the new metadata */
2353 3596 : rc = blob_serialize(blob, &ctx->pages, &blob->active.num_pages);
2354 3596 : if (rc < 0) {
2355 0 : blob_persist_complete(seq, ctx, rc);
2356 0 : return;
2357 : }
2358 :
2359 3596 : assert(blob->active.num_pages >= 1);
2360 :
2361 : /* Resize the cache of page indices */
2362 3596 : tmp = realloc(blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
2363 3596 : if (!tmp) {
2364 0 : blob_persist_complete(seq, ctx, -ENOMEM);
2365 0 : return;
2366 : }
2367 3596 : blob->active.pages = tmp;
2368 :
2369 : /* Assign this metadata to pages. This requires two passes - one to verify that there are
2370 : * enough pages and a second to actually claim them. The used_lock is held across
2371 : * both passes to ensure things don't change in the middle.
2372 : */
2373 3596 : spdk_spin_lock(&bs->used_lock);
2374 3596 : page_num = 0;
2375 : /* Note that this loop starts at one. The first page location is fixed by the blobid. */
2376 3684 : for (i = 1; i < blob->active.num_pages; i++) {
2377 88 : page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
2378 88 : if (page_num == UINT32_MAX) {
2379 0 : spdk_spin_unlock(&bs->used_lock);
2380 0 : blob_persist_complete(seq, ctx, -ENOMEM);
2381 0 : return;
2382 : }
2383 88 : page_num++;
2384 : }
2385 :
2386 3596 : page_num = 0;
2387 3596 : blob->active.pages[0] = bs_blobid_to_page(blob->id);
2388 3684 : for (i = 1; i < blob->active.num_pages; i++) {
2389 88 : page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
2390 88 : ctx->pages[i - 1].next = page_num;
2391 : /* Now that previous metadata page is complete, calculate the crc for it. */
2392 88 : ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
2393 88 : blob->active.pages[i] = page_num;
2394 88 : bs_claim_md_page(bs, page_num);
2395 88 : SPDK_DEBUGLOG(blob, "Claiming page %u for blob 0x%" PRIx64 "\n", page_num,
2396 : blob->id);
2397 88 : page_num++;
2398 : }
2399 3596 : spdk_spin_unlock(&bs->used_lock);
2400 3596 : ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
2401 : /* Start writing the metadata from last page to first */
2402 3596 : blob->state = SPDK_BLOB_STATE_CLEAN;
2403 3596 : blob_persist_write_page_chain(seq, ctx);
2404 : }
2405 :
2406 : static void
2407 2354 : blob_persist_write_extent_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2408 : {
2409 2354 : struct spdk_blob_persist_ctx *ctx = cb_arg;
2410 2354 : struct spdk_blob *blob = ctx->blob;
2411 : size_t i;
2412 : uint32_t extent_page_id;
2413 2354 : uint32_t page_count = 0;
2414 : int rc;
2415 :
2416 2354 : if (ctx->extent_page != NULL) {
2417 666 : spdk_free(ctx->extent_page);
2418 666 : ctx->extent_page = NULL;
2419 : }
2420 :
2421 2354 : if (bserrno != 0) {
2422 0 : blob_persist_complete(seq, ctx, bserrno);
2423 0 : return;
2424 : }
2425 :
2426 : /* Only write out Extent Pages when blob was resized. */
2427 4608 : for (i = ctx->next_extent_page; i < blob->active.extent_pages_array_size; i++) {
2428 2920 : extent_page_id = blob->active.extent_pages[i];
2429 2920 : if (extent_page_id == 0) {
2430 : /* No Extent Page to persist */
2431 2254 : assert(spdk_blob_is_thin_provisioned(blob));
2432 2254 : continue;
2433 : }
2434 666 : assert(spdk_bit_array_get(blob->bs->used_md_pages, extent_page_id));
2435 666 : ctx->next_extent_page = i + 1;
2436 666 : rc = blob_serialize_add_page(ctx->blob, &ctx->extent_page, &page_count, &ctx->extent_page);
2437 666 : if (rc < 0) {
2438 0 : blob_persist_complete(seq, ctx, rc);
2439 0 : return;
2440 : }
2441 :
2442 666 : blob->state = SPDK_BLOB_STATE_DIRTY;
2443 666 : blob_serialize_extent_page(blob, i * SPDK_EXTENTS_PER_EP, ctx->extent_page);
2444 :
2445 666 : ctx->extent_page->crc = blob_md_page_calc_crc(ctx->extent_page);
2446 :
2447 666 : bs_sequence_write_dev(seq, ctx->extent_page, bs_md_page_to_lba(blob->bs, extent_page_id),
2448 666 : bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
2449 : blob_persist_write_extent_pages, ctx);
2450 666 : return;
2451 : }
2452 :
2453 1688 : blob_persist_generate_new_md(ctx);
2454 : }
2455 :
2456 : static void
2457 5092 : blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2458 : {
2459 5092 : struct spdk_blob_persist_ctx *ctx = cb_arg;
2460 5092 : struct spdk_blob *blob = ctx->blob;
2461 :
2462 5092 : if (bserrno != 0) {
2463 8 : blob_persist_complete(seq, ctx, bserrno);
2464 8 : return;
2465 : }
2466 :
2467 5084 : if (blob->active.num_pages == 0) {
2468 : /* This is the signal that the blob should be deleted.
2469 : * Immediately jump to the clean up routine. */
2470 1488 : assert(blob->clean.num_pages > 0);
2471 1488 : blob->state = SPDK_BLOB_STATE_CLEAN;
2472 1488 : blob_persist_zero_pages(seq, ctx, 0);
2473 1488 : return;
2474 :
2475 : }
2476 :
2477 3596 : if (blob->clean.num_clusters < blob->active.num_clusters) {
2478 : /* Blob was resized up */
2479 1674 : assert(blob->clean.num_extent_pages <= blob->active.num_extent_pages);
2480 1674 : ctx->next_extent_page = spdk_max(1, blob->clean.num_extent_pages) - 1;
2481 1922 : } else if (blob->active.num_clusters < blob->active.cluster_array_size) {
2482 : /* Blob was resized down */
2483 14 : assert(blob->clean.num_extent_pages >= blob->active.num_extent_pages);
2484 14 : ctx->next_extent_page = spdk_max(1, blob->active.num_extent_pages) - 1;
2485 : } else {
2486 : /* No change in size occurred */
2487 1908 : blob_persist_generate_new_md(ctx);
2488 1908 : return;
2489 : }
2490 :
2491 1688 : blob_persist_write_extent_pages(seq, ctx, 0);
2492 : }
2493 :
2494 : struct spdk_bs_mark_dirty {
2495 : struct spdk_blob_store *bs;
2496 : struct spdk_bs_super_block *super;
2497 : spdk_bs_sequence_cpl cb_fn;
2498 : void *cb_arg;
2499 : };
2500 :
2501 : static void
2502 158 : bs_mark_dirty_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2503 : {
2504 158 : struct spdk_bs_mark_dirty *ctx = cb_arg;
2505 :
2506 158 : if (bserrno == 0) {
2507 150 : ctx->bs->clean = 0;
2508 : }
2509 :
2510 158 : ctx->cb_fn(seq, ctx->cb_arg, bserrno);
2511 :
2512 158 : spdk_free(ctx->super);
2513 158 : free(ctx);
2514 158 : }
2515 :
2516 : static void bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
2517 : struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg);
2518 :
2519 :
2520 : static void
2521 158 : bs_mark_dirty_write(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2522 : {
2523 158 : struct spdk_bs_mark_dirty *ctx = cb_arg;
2524 : int rc;
2525 :
2526 158 : if (bserrno != 0) {
2527 4 : bs_mark_dirty_write_cpl(seq, ctx, bserrno);
2528 4 : return;
2529 : }
2530 :
2531 154 : rc = bs_super_validate(ctx->super, ctx->bs);
2532 154 : if (rc != 0) {
2533 0 : bs_mark_dirty_write_cpl(seq, ctx, rc);
2534 0 : return;
2535 : }
2536 :
2537 154 : ctx->super->clean = 0;
2538 154 : if (ctx->super->size == 0) {
2539 4 : ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
2540 : }
2541 :
2542 154 : bs_write_super(seq, ctx->bs, ctx->super, bs_mark_dirty_write_cpl, ctx);
2543 : }
2544 :
2545 : static void
2546 5526 : bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
2547 : spdk_bs_sequence_cpl cb_fn, void *cb_arg)
2548 : {
2549 : struct spdk_bs_mark_dirty *ctx;
2550 :
2551 : /* Blobstore is already marked dirty */
2552 5526 : if (bs->clean == 0) {
2553 5368 : cb_fn(seq, cb_arg, 0);
2554 5368 : return;
2555 : }
2556 :
2557 158 : ctx = calloc(1, sizeof(*ctx));
2558 158 : if (!ctx) {
2559 0 : cb_fn(seq, cb_arg, -ENOMEM);
2560 0 : return;
2561 : }
2562 158 : ctx->bs = bs;
2563 158 : ctx->cb_fn = cb_fn;
2564 158 : ctx->cb_arg = cb_arg;
2565 :
2566 158 : ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
2567 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
2568 158 : if (!ctx->super) {
2569 0 : free(ctx);
2570 0 : cb_fn(seq, cb_arg, -ENOMEM);
2571 0 : return;
2572 : }
2573 :
2574 158 : bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
2575 158 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
2576 : bs_mark_dirty_write, ctx);
2577 : }
2578 :
2579 : /* Write a blob to disk */
2580 : static void
2581 9104 : blob_persist(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
2582 : spdk_bs_sequence_cpl cb_fn, void *cb_arg)
2583 : {
2584 : struct spdk_blob_persist_ctx *ctx;
2585 :
2586 9104 : blob_verify_md_op(blob);
2587 :
2588 9104 : if (blob->state == SPDK_BLOB_STATE_CLEAN && TAILQ_EMPTY(&blob->persists_to_complete)) {
2589 4012 : cb_fn(seq, cb_arg, 0);
2590 4012 : return;
2591 : }
2592 :
2593 5092 : ctx = calloc(1, sizeof(*ctx));
2594 5092 : if (!ctx) {
2595 0 : cb_fn(seq, cb_arg, -ENOMEM);
2596 0 : return;
2597 : }
2598 5092 : ctx->blob = blob;
2599 5092 : ctx->seq = seq;
2600 5092 : ctx->cb_fn = cb_fn;
2601 5092 : ctx->cb_arg = cb_arg;
2602 :
2603 : /* Multiple blob persists can affect one another, via blob->state or
2604 : * blob mutable data changes. To prevent it, queue up the persists. */
2605 5092 : if (!TAILQ_EMPTY(&blob->persists_to_complete)) {
2606 23 : TAILQ_INSERT_TAIL(&blob->pending_persists, ctx, link);
2607 23 : return;
2608 : }
2609 5069 : TAILQ_INSERT_HEAD(&blob->persists_to_complete, ctx, link);
2610 :
2611 5069 : bs_mark_dirty(seq, blob->bs, blob_persist_start, ctx);
2612 : }
2613 :
2614 : struct spdk_blob_copy_cluster_ctx {
2615 : struct spdk_blob *blob;
2616 : uint8_t *buf;
2617 : uint64_t page;
2618 : uint64_t new_cluster;
2619 : uint32_t new_extent_page;
2620 : spdk_bs_sequence_t *seq;
2621 : struct spdk_blob_md_page *new_cluster_page;
2622 : };
2623 :
2624 : struct spdk_blob_free_cluster_ctx {
2625 : struct spdk_blob *blob;
2626 : uint64_t page;
2627 : struct spdk_blob_md_page *md_page;
2628 : uint64_t cluster_num;
2629 : uint32_t extent_page;
2630 : spdk_bs_sequence_t *seq;
2631 : };
2632 :
2633 : static void
2634 812 : blob_allocate_and_copy_cluster_cpl(void *cb_arg, int bserrno)
2635 : {
2636 812 : struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
2637 812 : struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)ctx->seq;
2638 812 : TAILQ_HEAD(, spdk_bs_request_set) requests;
2639 : spdk_bs_user_op_t *op;
2640 :
2641 812 : TAILQ_INIT(&requests);
2642 812 : TAILQ_SWAP(&set->channel->need_cluster_alloc, &requests, spdk_bs_request_set, link);
2643 :
2644 1624 : while (!TAILQ_EMPTY(&requests)) {
2645 812 : op = TAILQ_FIRST(&requests);
2646 812 : TAILQ_REMOVE(&requests, op, link);
2647 812 : if (bserrno == 0) {
2648 812 : bs_user_op_execute(op);
2649 : } else {
2650 0 : bs_user_op_abort(op, bserrno);
2651 : }
2652 : }
2653 :
2654 812 : spdk_free(ctx->buf);
2655 812 : free(ctx);
2656 812 : }
2657 :
2658 : static void
2659 60 : blob_free_cluster_cpl(void *cb_arg, int bserrno)
2660 : {
2661 60 : struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
2662 60 : spdk_bs_sequence_t *seq = ctx->seq;
2663 :
2664 60 : bs_sequence_finish(seq, bserrno);
2665 :
2666 60 : free(ctx);
2667 60 : }
2668 :
2669 : static void
2670 4 : blob_insert_cluster_revert(struct spdk_blob_copy_cluster_ctx *ctx)
2671 : {
2672 4 : spdk_spin_lock(&ctx->blob->bs->used_lock);
2673 4 : bs_release_cluster(ctx->blob->bs, ctx->new_cluster);
2674 4 : if (ctx->new_extent_page != 0) {
2675 2 : bs_release_md_page(ctx->blob->bs, ctx->new_extent_page);
2676 : }
2677 4 : spdk_spin_unlock(&ctx->blob->bs->used_lock);
2678 4 : }
2679 :
2680 : static void
2681 4 : blob_insert_cluster_clear_cpl(void *cb_arg, int bserrno)
2682 : {
2683 4 : struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
2684 :
2685 4 : if (bserrno) {
2686 0 : SPDK_WARNLOG("Failed to clear cluster: %d\n", bserrno);
2687 : }
2688 :
2689 4 : blob_insert_cluster_revert(ctx);
2690 4 : bs_sequence_finish(ctx->seq, bserrno);
2691 4 : }
2692 :
2693 : static void
2694 4 : blob_insert_cluster_clear(struct spdk_blob_copy_cluster_ctx *ctx)
2695 : {
2696 4 : struct spdk_bs_cpl cpl;
2697 : spdk_bs_batch_t *batch;
2698 4 : struct spdk_io_channel *ch = spdk_io_channel_from_ctx(ctx->seq->channel);
2699 :
2700 : /*
2701 : * We allocated a cluster and we copied data to it. But now, we realized that we don't need
2702 : * this cluster and we want to release it. We must ensure that we clear the data on this
2703 : * cluster.
2704 : * The cluster may later be re-allocated by a thick-provisioned blob for example. When
2705 : * reading from this thick-provisioned blob before writing data, we should read zeroes.
2706 : */
2707 :
2708 4 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
2709 4 : cpl.u.blob_basic.cb_fn = blob_insert_cluster_clear_cpl;
2710 4 : cpl.u.blob_basic.cb_arg = ctx;
2711 :
2712 4 : batch = bs_batch_open(ch, &cpl, ctx->blob);
2713 4 : if (!batch) {
2714 0 : blob_insert_cluster_clear_cpl(ctx, -ENOMEM);
2715 0 : return;
2716 : }
2717 :
2718 4 : bs_batch_clear_dev(ctx->blob, batch, bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
2719 4 : bs_cluster_to_lba(ctx->blob->bs, 1));
2720 4 : bs_batch_close(batch);
2721 : }
2722 :
2723 : static void
2724 812 : blob_insert_cluster_cpl(void *cb_arg, int bserrno)
2725 : {
2726 812 : struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
2727 :
2728 812 : if (bserrno) {
2729 4 : if (bserrno == -EEXIST) {
2730 : /* The metadata insert failed because another thread
2731 : * allocated the cluster first. Clear and free our cluster
2732 : * but continue without error. */
2733 4 : blob_insert_cluster_clear(ctx);
2734 4 : return;
2735 : }
2736 :
2737 0 : blob_insert_cluster_revert(ctx);
2738 : }
2739 :
2740 808 : bs_sequence_finish(ctx->seq, bserrno);
2741 : }
2742 :
2743 : static void
2744 408 : blob_write_copy_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2745 : {
2746 408 : struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
2747 : uint32_t cluster_number;
2748 :
2749 408 : if (bserrno) {
2750 : /* The write failed, so jump to the final completion handler */
2751 0 : bs_sequence_finish(seq, bserrno);
2752 0 : return;
2753 : }
2754 :
2755 408 : cluster_number = bs_page_to_cluster(ctx->blob->bs, ctx->page);
2756 :
2757 408 : blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
2758 : ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
2759 : }
2760 :
2761 : static void
2762 280 : blob_write_copy(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2763 : {
2764 280 : struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
2765 :
2766 280 : if (bserrno != 0) {
2767 : /* The read failed, so jump to the final completion handler */
2768 0 : bs_sequence_finish(seq, bserrno);
2769 0 : return;
2770 : }
2771 :
2772 : /* Write whole cluster */
2773 280 : bs_sequence_write_dev(seq, ctx->buf,
2774 280 : bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
2775 280 : bs_cluster_to_lba(ctx->blob->bs, 1),
2776 : blob_write_copy_cpl, ctx);
2777 : }
2778 :
2779 : static bool
2780 796 : blob_can_copy(struct spdk_blob *blob, uint32_t cluster_start_page, uint64_t *base_lba)
2781 : {
2782 796 : uint64_t lba = bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page);
2783 :
2784 1146 : return (!blob_is_esnap_clone(blob) && blob->bs->dev->copy != NULL) &&
2785 350 : blob->back_bs_dev->translate_lba(blob->back_bs_dev, lba, base_lba);
2786 : }
2787 :
2788 : static void
2789 128 : blob_copy(struct spdk_blob_copy_cluster_ctx *ctx, spdk_bs_user_op_t *op, uint64_t src_lba)
2790 : {
2791 128 : struct spdk_blob *blob = ctx->blob;
2792 128 : uint64_t lba_count = bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz);
2793 :
2794 128 : bs_sequence_copy_dev(ctx->seq,
2795 128 : bs_cluster_to_lba(blob->bs, ctx->new_cluster),
2796 : src_lba,
2797 : lba_count,
2798 : blob_write_copy_cpl, ctx);
2799 128 : }
2800 :
2801 : static void
2802 812 : bs_allocate_and_copy_cluster(struct spdk_blob *blob,
2803 : struct spdk_io_channel *_ch,
2804 : uint64_t io_unit, spdk_bs_user_op_t *op)
2805 : {
2806 812 : struct spdk_bs_cpl cpl;
2807 : struct spdk_bs_channel *ch;
2808 : struct spdk_blob_copy_cluster_ctx *ctx;
2809 : uint32_t cluster_start_page;
2810 : uint32_t cluster_number;
2811 : bool is_zeroes;
2812 : bool can_copy;
2813 : bool is_valid_range;
2814 812 : uint64_t copy_src_lba;
2815 : int rc;
2816 :
2817 812 : ch = spdk_io_channel_get_ctx(_ch);
2818 :
2819 812 : if (!TAILQ_EMPTY(&ch->need_cluster_alloc)) {
2820 : /* There are already operations pending. Queue this user op
2821 : * and return because it will be re-executed when the outstanding
2822 : * cluster allocation completes. */
2823 0 : TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
2824 0 : return;
2825 : }
2826 :
2827 : /* Round the io_unit offset down to the first page in the cluster */
2828 812 : cluster_start_page = bs_io_unit_to_cluster_start(blob, io_unit);
2829 :
2830 : /* Calculate which index in the metadata cluster array the corresponding
2831 : * cluster is supposed to be at. */
2832 812 : cluster_number = bs_io_unit_to_cluster_number(blob, io_unit);
2833 :
2834 812 : ctx = calloc(1, sizeof(*ctx));
2835 812 : if (!ctx) {
2836 0 : bs_user_op_abort(op, -ENOMEM);
2837 0 : return;
2838 : }
2839 :
2840 812 : assert(blob->bs->cluster_sz % blob->back_bs_dev->blocklen == 0);
2841 :
2842 812 : ctx->blob = blob;
2843 812 : ctx->page = cluster_start_page;
2844 812 : ctx->new_cluster_page = ch->new_cluster_page;
2845 812 : memset(ctx->new_cluster_page, 0, SPDK_BS_PAGE_SIZE);
2846 :
2847 : /* Check if the cluster that we intend to do CoW for is valid for
2848 : * the backing dev. For zeroes backing dev, it'll be always valid.
2849 : * For other backing dev e.g. a snapshot, it could be invalid if
2850 : * the blob has been resized after snapshot was taken. */
2851 812 : is_valid_range = blob->back_bs_dev->is_range_valid(blob->back_bs_dev,
2852 : bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
2853 812 : bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
2854 :
2855 812 : can_copy = is_valid_range && blob_can_copy(blob, cluster_start_page, ©_src_lba);
2856 :
2857 1608 : is_zeroes = is_valid_range && blob->back_bs_dev->is_zeroes(blob->back_bs_dev,
2858 : bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
2859 796 : bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
2860 812 : if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes && !can_copy) {
2861 280 : ctx->buf = spdk_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen,
2862 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
2863 280 : if (!ctx->buf) {
2864 0 : SPDK_ERRLOG("DMA allocation for cluster of size = %" PRIu32 " failed.\n",
2865 : blob->bs->cluster_sz);
2866 0 : free(ctx);
2867 0 : bs_user_op_abort(op, -ENOMEM);
2868 0 : return;
2869 : }
2870 : }
2871 :
2872 812 : spdk_spin_lock(&blob->bs->used_lock);
2873 812 : rc = bs_allocate_cluster(blob, cluster_number, &ctx->new_cluster, &ctx->new_extent_page,
2874 : false);
2875 812 : spdk_spin_unlock(&blob->bs->used_lock);
2876 812 : if (rc != 0) {
2877 0 : spdk_free(ctx->buf);
2878 0 : free(ctx);
2879 0 : bs_user_op_abort(op, rc);
2880 0 : return;
2881 : }
2882 :
2883 812 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
2884 812 : cpl.u.blob_basic.cb_fn = blob_allocate_and_copy_cluster_cpl;
2885 812 : cpl.u.blob_basic.cb_arg = ctx;
2886 :
2887 812 : ctx->seq = bs_sequence_start_blob(_ch, &cpl, blob);
2888 812 : if (!ctx->seq) {
2889 0 : spdk_spin_lock(&blob->bs->used_lock);
2890 0 : bs_release_cluster(blob->bs, ctx->new_cluster);
2891 0 : spdk_spin_unlock(&blob->bs->used_lock);
2892 0 : spdk_free(ctx->buf);
2893 0 : free(ctx);
2894 0 : bs_user_op_abort(op, -ENOMEM);
2895 0 : return;
2896 : }
2897 :
2898 : /* Queue the user op to block other incoming operations */
2899 812 : TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
2900 :
2901 812 : if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes) {
2902 408 : if (can_copy) {
2903 128 : blob_copy(ctx, op, copy_src_lba);
2904 : } else {
2905 : /* Read cluster from backing device */
2906 280 : bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf,
2907 : bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
2908 280 : bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz),
2909 : blob_write_copy, ctx);
2910 : }
2911 :
2912 : } else {
2913 404 : blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
2914 : ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
2915 : }
2916 : }
2917 :
2918 : static inline bool
2919 40206 : blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint64_t length,
2920 : uint64_t *lba, uint64_t *lba_count)
2921 : {
2922 40206 : *lba_count = length;
2923 :
2924 40206 : if (!bs_io_unit_is_allocated(blob, io_unit)) {
2925 2992 : assert(blob->back_bs_dev != NULL);
2926 2992 : *lba = bs_io_unit_to_back_dev_lba(blob, io_unit);
2927 2992 : *lba_count = bs_io_unit_to_back_dev_lba(blob, *lba_count);
2928 2992 : return false;
2929 : } else {
2930 37214 : *lba = bs_blob_io_unit_to_lba(blob, io_unit);
2931 37214 : return true;
2932 : }
2933 : }
2934 :
2935 : struct op_split_ctx {
2936 : struct spdk_blob *blob;
2937 : struct spdk_io_channel *channel;
2938 : uint64_t io_unit_offset;
2939 : uint64_t io_units_remaining;
2940 : void *curr_payload;
2941 : enum spdk_blob_op_type op_type;
2942 : spdk_bs_sequence_t *seq;
2943 : bool in_submit_ctx;
2944 : bool completed_in_submit_ctx;
2945 : bool done;
2946 : };
2947 :
2948 : static void
2949 774 : blob_request_submit_op_split_next(void *cb_arg, int bserrno)
2950 : {
2951 774 : struct op_split_ctx *ctx = cb_arg;
2952 774 : struct spdk_blob *blob = ctx->blob;
2953 774 : struct spdk_io_channel *ch = ctx->channel;
2954 774 : enum spdk_blob_op_type op_type = ctx->op_type;
2955 : uint8_t *buf;
2956 : uint64_t offset;
2957 : uint64_t length;
2958 : uint64_t op_length;
2959 :
2960 774 : if (bserrno != 0 || ctx->io_units_remaining == 0) {
2961 178 : bs_sequence_finish(ctx->seq, bserrno);
2962 178 : if (ctx->in_submit_ctx) {
2963 : /* Defer freeing of the ctx object, since it will be
2964 : * accessed when this unwinds back to the submission
2965 : * context.
2966 : */
2967 40 : ctx->done = true;
2968 : } else {
2969 138 : free(ctx);
2970 : }
2971 178 : return;
2972 : }
2973 :
2974 596 : if (ctx->in_submit_ctx) {
2975 : /* If this split operation completed in the context
2976 : * of its submission, mark the flag and return immediately
2977 : * to avoid recursion.
2978 : */
2979 68 : ctx->completed_in_submit_ctx = true;
2980 68 : return;
2981 : }
2982 :
2983 : while (true) {
2984 596 : ctx->completed_in_submit_ctx = false;
2985 :
2986 596 : offset = ctx->io_unit_offset;
2987 596 : length = ctx->io_units_remaining;
2988 596 : buf = ctx->curr_payload;
2989 596 : op_length = spdk_min(length, bs_num_io_units_to_cluster_boundary(blob,
2990 : offset));
2991 :
2992 : /* Update length and payload for next operation */
2993 596 : ctx->io_units_remaining -= op_length;
2994 596 : ctx->io_unit_offset += op_length;
2995 596 : if (op_type == SPDK_BLOB_WRITE || op_type == SPDK_BLOB_READ) {
2996 528 : ctx->curr_payload += op_length * blob->bs->io_unit_size;
2997 : }
2998 :
2999 596 : assert(!ctx->in_submit_ctx);
3000 596 : ctx->in_submit_ctx = true;
3001 :
3002 596 : switch (op_type) {
3003 418 : case SPDK_BLOB_READ:
3004 418 : spdk_blob_io_read(blob, ch, buf, offset, op_length,
3005 : blob_request_submit_op_split_next, ctx);
3006 418 : break;
3007 110 : case SPDK_BLOB_WRITE:
3008 110 : spdk_blob_io_write(blob, ch, buf, offset, op_length,
3009 : blob_request_submit_op_split_next, ctx);
3010 110 : break;
3011 36 : case SPDK_BLOB_UNMAP:
3012 36 : spdk_blob_io_unmap(blob, ch, offset, op_length,
3013 : blob_request_submit_op_split_next, ctx);
3014 36 : break;
3015 32 : case SPDK_BLOB_WRITE_ZEROES:
3016 32 : spdk_blob_io_write_zeroes(blob, ch, offset, op_length,
3017 : blob_request_submit_op_split_next, ctx);
3018 32 : break;
3019 0 : case SPDK_BLOB_READV:
3020 : case SPDK_BLOB_WRITEV:
3021 0 : SPDK_ERRLOG("readv/write not valid\n");
3022 0 : bs_sequence_finish(ctx->seq, -EINVAL);
3023 0 : free(ctx);
3024 0 : return;
3025 : }
3026 :
3027 : #ifndef __clang_analyzer__
3028 : /* scan-build reports a false positive around accessing the ctx here. It
3029 : * forms a path that recursively calls this function, but then says
3030 : * "assuming ctx->in_submit_ctx is false", when that isn't possible.
3031 : * This path does free(ctx), returns to here, and reports a use-after-free
3032 : * bug. Wrapping this bit of code so that scan-build doesn't see it
3033 : * works around the scan-build bug.
3034 : */
3035 596 : assert(ctx->in_submit_ctx);
3036 596 : ctx->in_submit_ctx = false;
3037 :
3038 : /* If the operation completed immediately, loop back and submit the
3039 : * next operation. Otherwise we can return and the next split
3040 : * operation will get submitted when this current operation is
3041 : * later completed asynchronously.
3042 : */
3043 596 : if (ctx->completed_in_submit_ctx) {
3044 68 : continue;
3045 528 : } else if (ctx->done) {
3046 40 : free(ctx);
3047 : }
3048 : #endif
3049 528 : break;
3050 : }
3051 : }
3052 :
3053 : static void
3054 178 : blob_request_submit_op_split(struct spdk_io_channel *ch, struct spdk_blob *blob,
3055 : void *payload, uint64_t offset, uint64_t length,
3056 : spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
3057 : {
3058 : struct op_split_ctx *ctx;
3059 : spdk_bs_sequence_t *seq;
3060 178 : struct spdk_bs_cpl cpl;
3061 :
3062 178 : assert(blob != NULL);
3063 :
3064 178 : ctx = calloc(1, sizeof(struct op_split_ctx));
3065 178 : if (ctx == NULL) {
3066 0 : cb_fn(cb_arg, -ENOMEM);
3067 0 : return;
3068 : }
3069 :
3070 178 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
3071 178 : cpl.u.blob_basic.cb_fn = cb_fn;
3072 178 : cpl.u.blob_basic.cb_arg = cb_arg;
3073 :
3074 178 : seq = bs_sequence_start_blob(ch, &cpl, blob);
3075 178 : if (!seq) {
3076 0 : free(ctx);
3077 0 : cb_fn(cb_arg, -ENOMEM);
3078 0 : return;
3079 : }
3080 :
3081 178 : ctx->blob = blob;
3082 178 : ctx->channel = ch;
3083 178 : ctx->curr_payload = payload;
3084 178 : ctx->io_unit_offset = offset;
3085 178 : ctx->io_units_remaining = length;
3086 178 : ctx->op_type = op_type;
3087 178 : ctx->seq = seq;
3088 :
3089 178 : blob_request_submit_op_split_next(ctx, 0);
3090 : }
3091 :
3092 : static void
3093 60 : spdk_free_cluster_unmap_complete(void *cb_arg, int bserrno)
3094 : {
3095 60 : struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
3096 :
3097 60 : if (bserrno) {
3098 0 : bs_sequence_finish(ctx->seq, bserrno);
3099 0 : free(ctx);
3100 0 : return;
3101 : }
3102 :
3103 60 : blob_free_cluster_on_md_thread(ctx->blob, ctx->cluster_num,
3104 : ctx->extent_page, ctx->md_page, blob_free_cluster_cpl, ctx);
3105 : }
3106 :
3107 : static void
3108 37834 : blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blob,
3109 : void *payload, uint64_t offset, uint64_t length,
3110 : spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
3111 : {
3112 37834 : struct spdk_bs_cpl cpl;
3113 37834 : uint64_t lba;
3114 37834 : uint64_t lba_count;
3115 : bool is_allocated;
3116 :
3117 37834 : assert(blob != NULL);
3118 :
3119 37834 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
3120 37834 : cpl.u.blob_basic.cb_fn = cb_fn;
3121 37834 : cpl.u.blob_basic.cb_arg = cb_arg;
3122 :
3123 37834 : if (blob->frozen_refcnt) {
3124 : /* This blob I/O is frozen */
3125 : spdk_bs_user_op_t *op;
3126 4 : struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
3127 :
3128 4 : op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
3129 4 : if (!op) {
3130 0 : cb_fn(cb_arg, -ENOMEM);
3131 0 : return;
3132 : }
3133 :
3134 4 : TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
3135 :
3136 4 : return;
3137 : }
3138 :
3139 37830 : is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
3140 :
3141 37830 : switch (op_type) {
3142 16887 : case SPDK_BLOB_READ: {
3143 : spdk_bs_batch_t *batch;
3144 :
3145 16887 : batch = bs_batch_open(_ch, &cpl, blob);
3146 16887 : if (!batch) {
3147 0 : cb_fn(cb_arg, -ENOMEM);
3148 0 : return;
3149 : }
3150 :
3151 16887 : if (is_allocated) {
3152 : /* Read from the blob */
3153 15799 : bs_batch_read_dev(batch, payload, lba, lba_count);
3154 : } else {
3155 : /* Read from the backing block device */
3156 1088 : bs_batch_read_bs_dev(batch, blob->back_bs_dev, payload, lba, lba_count);
3157 : }
3158 :
3159 16887 : bs_batch_close(batch);
3160 16887 : break;
3161 : }
3162 20851 : case SPDK_BLOB_WRITE:
3163 : case SPDK_BLOB_WRITE_ZEROES: {
3164 20851 : if (is_allocated) {
3165 : /* Write to the blob */
3166 : spdk_bs_batch_t *batch;
3167 :
3168 20507 : if (lba_count == 0) {
3169 0 : cb_fn(cb_arg, 0);
3170 0 : return;
3171 : }
3172 :
3173 20507 : batch = bs_batch_open(_ch, &cpl, blob);
3174 20507 : if (!batch) {
3175 0 : cb_fn(cb_arg, -ENOMEM);
3176 0 : return;
3177 : }
3178 :
3179 20507 : if (op_type == SPDK_BLOB_WRITE) {
3180 20475 : bs_batch_write_dev(batch, payload, lba, lba_count);
3181 : } else {
3182 32 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
3183 : }
3184 :
3185 20507 : bs_batch_close(batch);
3186 : } else {
3187 : /* Queue this operation and allocate the cluster */
3188 : spdk_bs_user_op_t *op;
3189 :
3190 344 : op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
3191 344 : if (!op) {
3192 0 : cb_fn(cb_arg, -ENOMEM);
3193 0 : return;
3194 : }
3195 :
3196 344 : bs_allocate_and_copy_cluster(blob, _ch, offset, op);
3197 : }
3198 20851 : break;
3199 : }
3200 92 : case SPDK_BLOB_UNMAP: {
3201 92 : struct spdk_blob_free_cluster_ctx *ctx = NULL;
3202 : spdk_bs_batch_t *batch;
3203 :
3204 : /* if aligned with cluster release cluster */
3205 160 : if (spdk_blob_is_thin_provisioned(blob) && is_allocated &&
3206 68 : bs_io_units_per_cluster(blob) == length) {
3207 60 : struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
3208 : uint32_t cluster_start_page;
3209 : uint32_t cluster_number;
3210 :
3211 60 : assert(offset % bs_io_units_per_cluster(blob) == 0);
3212 :
3213 : /* Round the io_unit offset down to the first page in the cluster */
3214 60 : cluster_start_page = bs_io_unit_to_cluster_start(blob, offset);
3215 :
3216 : /* Calculate which index in the metadata cluster array the corresponding
3217 : * cluster is supposed to be at. */
3218 60 : cluster_number = bs_io_unit_to_cluster_number(blob, offset);
3219 :
3220 60 : ctx = calloc(1, sizeof(*ctx));
3221 60 : if (!ctx) {
3222 0 : cb_fn(cb_arg, -ENOMEM);
3223 0 : return;
3224 : }
3225 : /* When freeing a cluster the flow should be (in order):
3226 : * 1. Unmap the underlying area (so if the cluster is reclaimed in the future, it won't leak
3227 : * old data)
3228 : * 2. Once the unmap completes (to avoid any races with incoming writes that may claim the
3229 : * cluster), update and sync metadata freeing the cluster
3230 : * 3. Once metadata update is done, complete the user unmap request
3231 : */
3232 60 : ctx->blob = blob;
3233 60 : ctx->page = cluster_start_page;
3234 60 : ctx->cluster_num = cluster_number;
3235 60 : ctx->md_page = bs_channel->new_cluster_page;
3236 60 : ctx->seq = bs_sequence_start_bs(_ch, &cpl);
3237 60 : if (!ctx->seq) {
3238 0 : free(ctx);
3239 0 : cb_fn(cb_arg, -ENOMEM);
3240 0 : return;
3241 : }
3242 :
3243 60 : if (blob->use_extent_table) {
3244 30 : ctx->extent_page = *bs_cluster_to_extent_page(blob, cluster_number);
3245 : }
3246 :
3247 60 : cpl.u.blob_basic.cb_fn = spdk_free_cluster_unmap_complete;
3248 60 : cpl.u.blob_basic.cb_arg = ctx;
3249 : }
3250 :
3251 92 : batch = bs_batch_open(_ch, &cpl, blob);
3252 92 : if (!batch) {
3253 0 : free(ctx);
3254 0 : cb_fn(cb_arg, -ENOMEM);
3255 0 : return;
3256 : }
3257 :
3258 92 : if (is_allocated) {
3259 92 : bs_batch_unmap_dev(batch, lba, lba_count);
3260 : }
3261 :
3262 92 : bs_batch_close(batch);
3263 92 : break;
3264 : }
3265 0 : case SPDK_BLOB_READV:
3266 : case SPDK_BLOB_WRITEV:
3267 0 : SPDK_ERRLOG("readv/write not valid\n");
3268 0 : cb_fn(cb_arg, -EINVAL);
3269 0 : break;
3270 : }
3271 : }
3272 :
3273 : static void
3274 38524 : blob_request_submit_op(struct spdk_blob *blob, struct spdk_io_channel *_channel,
3275 : void *payload, uint64_t offset, uint64_t length,
3276 : spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
3277 : {
3278 38524 : assert(blob != NULL);
3279 :
3280 38524 : if (blob->data_ro && op_type != SPDK_BLOB_READ) {
3281 4 : cb_fn(cb_arg, -EPERM);
3282 4 : return;
3283 : }
3284 :
3285 38520 : if (length == 0) {
3286 492 : cb_fn(cb_arg, 0);
3287 492 : return;
3288 : }
3289 :
3290 38028 : if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
3291 24 : cb_fn(cb_arg, -EINVAL);
3292 24 : return;
3293 : }
3294 38004 : if (length <= bs_num_io_units_to_cluster_boundary(blob, offset)) {
3295 37826 : blob_request_submit_op_single(_channel, blob, payload, offset, length,
3296 : cb_fn, cb_arg, op_type);
3297 : } else {
3298 178 : blob_request_submit_op_split(_channel, blob, payload, offset, length,
3299 : cb_fn, cb_arg, op_type);
3300 : }
3301 : }
3302 :
3303 : struct rw_iov_ctx {
3304 : struct spdk_blob *blob;
3305 : struct spdk_io_channel *channel;
3306 : spdk_blob_op_complete cb_fn;
3307 : void *cb_arg;
3308 : bool read;
3309 : int iovcnt;
3310 : struct iovec *orig_iov;
3311 : uint64_t io_unit_offset;
3312 : uint64_t io_units_remaining;
3313 : uint64_t io_units_done;
3314 : struct spdk_blob_ext_io_opts *ext_io_opts;
3315 : struct iovec iov[0];
3316 : };
3317 :
3318 : static void
3319 2360 : rw_iov_done(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
3320 : {
3321 2360 : assert(cb_arg == NULL);
3322 2360 : bs_sequence_finish(seq, bserrno);
3323 2360 : }
3324 :
3325 : static void
3326 744 : rw_iov_split_next(void *cb_arg, int bserrno)
3327 : {
3328 744 : struct rw_iov_ctx *ctx = cb_arg;
3329 744 : struct spdk_blob *blob = ctx->blob;
3330 : struct iovec *iov, *orig_iov;
3331 : int iovcnt;
3332 : size_t orig_iovoff;
3333 : uint64_t io_units_count, io_units_to_boundary, io_unit_offset;
3334 : uint64_t byte_count;
3335 :
3336 744 : if (bserrno != 0 || ctx->io_units_remaining == 0) {
3337 204 : ctx->cb_fn(ctx->cb_arg, bserrno);
3338 204 : free(ctx);
3339 204 : return;
3340 : }
3341 :
3342 540 : io_unit_offset = ctx->io_unit_offset;
3343 540 : io_units_to_boundary = bs_num_io_units_to_cluster_boundary(blob, io_unit_offset);
3344 540 : io_units_count = spdk_min(ctx->io_units_remaining, io_units_to_boundary);
3345 : /*
3346 : * Get index and offset into the original iov array for our current position in the I/O sequence.
3347 : * byte_count will keep track of how many bytes remaining until orig_iov and orig_iovoff will
3348 : * point to the current position in the I/O sequence.
3349 : */
3350 540 : byte_count = ctx->io_units_done * blob->bs->io_unit_size;
3351 540 : orig_iov = &ctx->orig_iov[0];
3352 540 : orig_iovoff = 0;
3353 1148 : while (byte_count > 0) {
3354 608 : if (byte_count >= orig_iov->iov_len) {
3355 352 : byte_count -= orig_iov->iov_len;
3356 352 : orig_iov++;
3357 : } else {
3358 256 : orig_iovoff = byte_count;
3359 256 : byte_count = 0;
3360 : }
3361 : }
3362 :
3363 : /*
3364 : * Build an iov array for the next I/O in the sequence. byte_count will keep track of how many
3365 : * bytes of this next I/O remain to be accounted for in the new iov array.
3366 : */
3367 540 : byte_count = io_units_count * blob->bs->io_unit_size;
3368 540 : iov = &ctx->iov[0];
3369 540 : iovcnt = 0;
3370 1380 : while (byte_count > 0) {
3371 840 : assert(iovcnt < ctx->iovcnt);
3372 840 : iov->iov_len = spdk_min(byte_count, orig_iov->iov_len - orig_iovoff);
3373 840 : iov->iov_base = orig_iov->iov_base + orig_iovoff;
3374 840 : byte_count -= iov->iov_len;
3375 840 : orig_iovoff = 0;
3376 840 : orig_iov++;
3377 840 : iov++;
3378 840 : iovcnt++;
3379 : }
3380 :
3381 540 : ctx->io_unit_offset += io_units_count;
3382 540 : ctx->io_units_remaining -= io_units_count;
3383 540 : ctx->io_units_done += io_units_count;
3384 540 : iov = &ctx->iov[0];
3385 :
3386 540 : if (ctx->read) {
3387 408 : spdk_blob_io_readv_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
3388 : io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
3389 : } else {
3390 132 : spdk_blob_io_writev_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
3391 : io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
3392 : }
3393 : }
3394 :
3395 : static void
3396 2588 : blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_channel,
3397 : struct iovec *iov, int iovcnt,
3398 : uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg, bool read,
3399 : struct spdk_blob_ext_io_opts *ext_io_opts)
3400 : {
3401 2588 : struct spdk_bs_cpl cpl;
3402 :
3403 2588 : assert(blob != NULL);
3404 :
3405 2588 : if (!read && blob->data_ro) {
3406 4 : cb_fn(cb_arg, -EPERM);
3407 4 : return;
3408 : }
3409 :
3410 2584 : if (length == 0) {
3411 0 : cb_fn(cb_arg, 0);
3412 0 : return;
3413 : }
3414 :
3415 2584 : if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
3416 0 : cb_fn(cb_arg, -EINVAL);
3417 0 : return;
3418 : }
3419 :
3420 : /*
3421 : * For now, we implement readv/writev using a sequence (instead of a batch) to account for having
3422 : * to split a request that spans a cluster boundary. For I/O that do not span a cluster boundary,
3423 : * there will be no noticeable difference compared to using a batch. For I/O that do span a cluster
3424 : * boundary, the target LBAs (after blob offset to LBA translation) may not be contiguous, so we need
3425 : * to allocate a separate iov array and split the I/O such that none of the resulting
3426 : * smaller I/O cross a cluster boundary. These smaller I/O will be issued in sequence (not in parallel)
3427 : * but since this case happens very infrequently, any performance impact will be negligible.
3428 : *
3429 : * This could be optimized in the future to allocate a big enough iov array to account for all of the iovs
3430 : * for all of the smaller I/Os, pre-build all of the iov arrays for the smaller I/Os, then issue them
3431 : * in a batch. That would also require creating an intermediate spdk_bs_cpl that would get called
3432 : * when the batch was completed, to allow for freeing the memory for the iov arrays.
3433 : */
3434 2584 : if (spdk_likely(length <= bs_num_io_units_to_cluster_boundary(blob, offset))) {
3435 2376 : uint64_t lba_count;
3436 2376 : uint64_t lba;
3437 : bool is_allocated;
3438 :
3439 2376 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
3440 2376 : cpl.u.blob_basic.cb_fn = cb_fn;
3441 2376 : cpl.u.blob_basic.cb_arg = cb_arg;
3442 :
3443 2376 : if (blob->frozen_refcnt) {
3444 : /* This blob I/O is frozen */
3445 : enum spdk_blob_op_type op_type;
3446 : spdk_bs_user_op_t *op;
3447 0 : struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_channel);
3448 :
3449 0 : op_type = read ? SPDK_BLOB_READV : SPDK_BLOB_WRITEV;
3450 0 : op = bs_user_op_alloc(_channel, &cpl, op_type, blob, iov, iovcnt, offset, length);
3451 0 : if (!op) {
3452 0 : cb_fn(cb_arg, -ENOMEM);
3453 0 : return;
3454 : }
3455 :
3456 0 : TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
3457 :
3458 0 : return;
3459 : }
3460 :
3461 2376 : is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
3462 :
3463 2376 : if (read) {
3464 : spdk_bs_sequence_t *seq;
3465 :
3466 2084 : seq = bs_sequence_start_blob(_channel, &cpl, blob);
3467 2084 : if (!seq) {
3468 0 : cb_fn(cb_arg, -ENOMEM);
3469 0 : return;
3470 : }
3471 :
3472 2084 : seq->ext_io_opts = ext_io_opts;
3473 :
3474 2084 : if (is_allocated) {
3475 540 : bs_sequence_readv_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
3476 : } else {
3477 1544 : bs_sequence_readv_bs_dev(seq, blob->back_bs_dev, iov, iovcnt, lba, lba_count,
3478 : rw_iov_done, NULL);
3479 : }
3480 : } else {
3481 292 : if (is_allocated) {
3482 : spdk_bs_sequence_t *seq;
3483 :
3484 276 : seq = bs_sequence_start_blob(_channel, &cpl, blob);
3485 276 : if (!seq) {
3486 0 : cb_fn(cb_arg, -ENOMEM);
3487 0 : return;
3488 : }
3489 :
3490 276 : seq->ext_io_opts = ext_io_opts;
3491 :
3492 276 : bs_sequence_writev_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
3493 : } else {
3494 : /* Queue this operation and allocate the cluster */
3495 : spdk_bs_user_op_t *op;
3496 :
3497 16 : op = bs_user_op_alloc(_channel, &cpl, SPDK_BLOB_WRITEV, blob, iov, iovcnt, offset,
3498 : length);
3499 16 : if (!op) {
3500 0 : cb_fn(cb_arg, -ENOMEM);
3501 0 : return;
3502 : }
3503 :
3504 16 : op->ext_io_opts = ext_io_opts;
3505 :
3506 16 : bs_allocate_and_copy_cluster(blob, _channel, offset, op);
3507 : }
3508 : }
3509 : } else {
3510 : struct rw_iov_ctx *ctx;
3511 :
3512 208 : ctx = calloc(1, sizeof(struct rw_iov_ctx) + iovcnt * sizeof(struct iovec));
3513 208 : if (ctx == NULL) {
3514 4 : cb_fn(cb_arg, -ENOMEM);
3515 4 : return;
3516 : }
3517 :
3518 204 : ctx->blob = blob;
3519 204 : ctx->channel = _channel;
3520 204 : ctx->cb_fn = cb_fn;
3521 204 : ctx->cb_arg = cb_arg;
3522 204 : ctx->read = read;
3523 204 : ctx->orig_iov = iov;
3524 204 : ctx->iovcnt = iovcnt;
3525 204 : ctx->io_unit_offset = offset;
3526 204 : ctx->io_units_remaining = length;
3527 204 : ctx->io_units_done = 0;
3528 204 : ctx->ext_io_opts = ext_io_opts;
3529 :
3530 204 : rw_iov_split_next(ctx, 0);
3531 : }
3532 : }
3533 :
3534 : static struct spdk_blob *
3535 7709 : blob_lookup(struct spdk_blob_store *bs, spdk_blob_id blobid)
3536 : {
3537 7709 : struct spdk_blob find;
3538 :
3539 7709 : if (spdk_bit_array_get(bs->open_blobids, blobid) == 0) {
3540 6932 : return NULL;
3541 : }
3542 :
3543 777 : find.id = blobid;
3544 777 : return RB_FIND(spdk_blob_tree, &bs->open_blobs, &find);
3545 : }
3546 :
3547 : static void
3548 1798 : blob_get_snapshot_and_clone_entries(struct spdk_blob *blob,
3549 : struct spdk_blob_list **snapshot_entry, struct spdk_blob_list **clone_entry)
3550 : {
3551 1798 : assert(blob != NULL);
3552 1798 : *snapshot_entry = NULL;
3553 1798 : *clone_entry = NULL;
3554 :
3555 1798 : if (blob->parent_id == SPDK_BLOBID_INVALID) {
3556 1518 : return;
3557 : }
3558 :
3559 424 : TAILQ_FOREACH(*snapshot_entry, &blob->bs->snapshots, link) {
3560 372 : if ((*snapshot_entry)->id == blob->parent_id) {
3561 228 : break;
3562 : }
3563 : }
3564 :
3565 280 : if (*snapshot_entry != NULL) {
3566 272 : TAILQ_FOREACH(*clone_entry, &(*snapshot_entry)->clones, link) {
3567 272 : if ((*clone_entry)->id == blob->id) {
3568 228 : break;
3569 : }
3570 : }
3571 :
3572 228 : assert(*clone_entry != NULL);
3573 : }
3574 : }
3575 :
3576 : static int
3577 796 : bs_channel_create(void *io_device, void *ctx_buf)
3578 : {
3579 796 : struct spdk_blob_store *bs = io_device;
3580 796 : struct spdk_bs_channel *channel = ctx_buf;
3581 : struct spdk_bs_dev *dev;
3582 796 : uint32_t max_ops = bs->max_channel_ops;
3583 : uint32_t i;
3584 :
3585 796 : dev = bs->dev;
3586 :
3587 796 : channel->req_mem = calloc(max_ops, sizeof(struct spdk_bs_request_set));
3588 796 : if (!channel->req_mem) {
3589 0 : return -1;
3590 : }
3591 :
3592 796 : TAILQ_INIT(&channel->reqs);
3593 :
3594 408348 : for (i = 0; i < max_ops; i++) {
3595 407552 : TAILQ_INSERT_TAIL(&channel->reqs, &channel->req_mem[i], link);
3596 : }
3597 :
3598 796 : channel->bs = bs;
3599 796 : channel->dev = dev;
3600 796 : channel->dev_channel = dev->create_channel(dev);
3601 :
3602 796 : if (!channel->dev_channel) {
3603 0 : SPDK_ERRLOG("Failed to create device channel.\n");
3604 0 : free(channel->req_mem);
3605 0 : return -1;
3606 : }
3607 :
3608 796 : channel->new_cluster_page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY,
3609 : SPDK_MALLOC_DMA);
3610 796 : if (!channel->new_cluster_page) {
3611 0 : SPDK_ERRLOG("Failed to allocate new cluster page\n");
3612 0 : free(channel->req_mem);
3613 0 : channel->dev->destroy_channel(channel->dev, channel->dev_channel);
3614 0 : return -1;
3615 : }
3616 :
3617 796 : TAILQ_INIT(&channel->need_cluster_alloc);
3618 796 : TAILQ_INIT(&channel->queued_io);
3619 796 : RB_INIT(&channel->esnap_channels);
3620 :
3621 796 : return 0;
3622 : }
3623 :
3624 : static void
3625 796 : bs_channel_destroy(void *io_device, void *ctx_buf)
3626 : {
3627 796 : struct spdk_bs_channel *channel = ctx_buf;
3628 : spdk_bs_user_op_t *op;
3629 :
3630 796 : while (!TAILQ_EMPTY(&channel->need_cluster_alloc)) {
3631 0 : op = TAILQ_FIRST(&channel->need_cluster_alloc);
3632 0 : TAILQ_REMOVE(&channel->need_cluster_alloc, op, link);
3633 0 : bs_user_op_abort(op, -EIO);
3634 : }
3635 :
3636 796 : while (!TAILQ_EMPTY(&channel->queued_io)) {
3637 0 : op = TAILQ_FIRST(&channel->queued_io);
3638 0 : TAILQ_REMOVE(&channel->queued_io, op, link);
3639 0 : bs_user_op_abort(op, -EIO);
3640 : }
3641 :
3642 796 : blob_esnap_destroy_bs_channel(channel);
3643 :
3644 796 : free(channel->req_mem);
3645 796 : spdk_free(channel->new_cluster_page);
3646 796 : channel->dev->destroy_channel(channel->dev, channel->dev_channel);
3647 796 : }
3648 :
3649 : static void
3650 780 : bs_dev_destroy(void *io_device)
3651 : {
3652 780 : struct spdk_blob_store *bs = io_device;
3653 : struct spdk_blob *blob, *blob_tmp;
3654 :
3655 780 : bs->dev->destroy(bs->dev);
3656 :
3657 780 : RB_FOREACH_SAFE(blob, spdk_blob_tree, &bs->open_blobs, blob_tmp) {
3658 0 : RB_REMOVE(spdk_blob_tree, &bs->open_blobs, blob);
3659 0 : spdk_bit_array_clear(bs->open_blobids, blob->id);
3660 0 : blob_free(blob);
3661 : }
3662 :
3663 780 : spdk_spin_destroy(&bs->used_lock);
3664 :
3665 780 : spdk_bit_array_free(&bs->open_blobids);
3666 780 : spdk_bit_array_free(&bs->used_blobids);
3667 780 : spdk_bit_array_free(&bs->used_md_pages);
3668 780 : spdk_bit_pool_free(&bs->used_clusters);
3669 : /*
3670 : * If this function is called for any reason except a successful unload,
3671 : * the unload_cpl type will be NONE and this will be a nop.
3672 : */
3673 780 : bs_call_cpl(&bs->unload_cpl, bs->unload_err);
3674 :
3675 780 : free(bs);
3676 780 : }
3677 :
3678 : static int
3679 900 : bs_blob_list_add(struct spdk_blob *blob)
3680 : {
3681 : spdk_blob_id snapshot_id;
3682 900 : struct spdk_blob_list *snapshot_entry = NULL;
3683 900 : struct spdk_blob_list *clone_entry = NULL;
3684 :
3685 900 : assert(blob != NULL);
3686 :
3687 900 : snapshot_id = blob->parent_id;
3688 900 : if (snapshot_id == SPDK_BLOBID_INVALID ||
3689 : snapshot_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
3690 488 : return 0;
3691 : }
3692 :
3693 412 : snapshot_entry = bs_get_snapshot_entry(blob->bs, snapshot_id);
3694 412 : if (snapshot_entry == NULL) {
3695 : /* Snapshot not found */
3696 284 : snapshot_entry = calloc(1, sizeof(struct spdk_blob_list));
3697 284 : if (snapshot_entry == NULL) {
3698 0 : return -ENOMEM;
3699 : }
3700 284 : snapshot_entry->id = snapshot_id;
3701 284 : TAILQ_INIT(&snapshot_entry->clones);
3702 284 : TAILQ_INSERT_TAIL(&blob->bs->snapshots, snapshot_entry, link);
3703 : } else {
3704 204 : TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
3705 76 : if (clone_entry->id == blob->id) {
3706 0 : break;
3707 : }
3708 : }
3709 : }
3710 :
3711 412 : if (clone_entry == NULL) {
3712 : /* Clone not found */
3713 412 : clone_entry = calloc(1, sizeof(struct spdk_blob_list));
3714 412 : if (clone_entry == NULL) {
3715 0 : return -ENOMEM;
3716 : }
3717 412 : clone_entry->id = blob->id;
3718 412 : TAILQ_INIT(&clone_entry->clones);
3719 412 : TAILQ_INSERT_TAIL(&snapshot_entry->clones, clone_entry, link);
3720 412 : snapshot_entry->clone_count++;
3721 : }
3722 :
3723 412 : return 0;
3724 : }
3725 :
3726 : static void
3727 1720 : bs_blob_list_remove(struct spdk_blob *blob)
3728 : {
3729 1720 : struct spdk_blob_list *snapshot_entry = NULL;
3730 1720 : struct spdk_blob_list *clone_entry = NULL;
3731 :
3732 1720 : blob_get_snapshot_and_clone_entries(blob, &snapshot_entry, &clone_entry);
3733 :
3734 1720 : if (snapshot_entry == NULL) {
3735 1508 : return;
3736 : }
3737 :
3738 212 : blob->parent_id = SPDK_BLOBID_INVALID;
3739 212 : TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
3740 212 : free(clone_entry);
3741 :
3742 212 : snapshot_entry->clone_count--;
3743 : }
3744 :
3745 : static int
3746 780 : bs_blob_list_free(struct spdk_blob_store *bs)
3747 : {
3748 : struct spdk_blob_list *snapshot_entry;
3749 : struct spdk_blob_list *snapshot_entry_tmp;
3750 : struct spdk_blob_list *clone_entry;
3751 : struct spdk_blob_list *clone_entry_tmp;
3752 :
3753 924 : TAILQ_FOREACH_SAFE(snapshot_entry, &bs->snapshots, link, snapshot_entry_tmp) {
3754 296 : TAILQ_FOREACH_SAFE(clone_entry, &snapshot_entry->clones, link, clone_entry_tmp) {
3755 152 : TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
3756 152 : free(clone_entry);
3757 : }
3758 144 : TAILQ_REMOVE(&bs->snapshots, snapshot_entry, link);
3759 144 : free(snapshot_entry);
3760 : }
3761 :
3762 780 : return 0;
3763 : }
3764 :
3765 : static void
3766 780 : bs_free(struct spdk_blob_store *bs)
3767 : {
3768 780 : bs_blob_list_free(bs);
3769 :
3770 780 : bs_unregister_md_thread(bs);
3771 780 : spdk_io_device_unregister(bs, bs_dev_destroy);
3772 780 : }
3773 :
3774 : void
3775 1048 : spdk_bs_opts_init(struct spdk_bs_opts *opts, size_t opts_size)
3776 : {
3777 :
3778 1048 : if (!opts) {
3779 0 : SPDK_ERRLOG("opts should not be NULL\n");
3780 0 : return;
3781 : }
3782 :
3783 1048 : if (!opts_size) {
3784 0 : SPDK_ERRLOG("opts_size should not be zero value\n");
3785 0 : return;
3786 : }
3787 :
3788 1048 : memset(opts, 0, opts_size);
3789 1048 : opts->opts_size = opts_size;
3790 :
3791 : #define FIELD_OK(field) \
3792 : offsetof(struct spdk_bs_opts, field) + sizeof(opts->field) <= opts_size
3793 :
3794 : #define SET_FIELD(field, value) \
3795 : if (FIELD_OK(field)) { \
3796 : opts->field = value; \
3797 : } \
3798 :
3799 1048 : SET_FIELD(cluster_sz, SPDK_BLOB_OPTS_CLUSTER_SZ);
3800 1048 : SET_FIELD(num_md_pages, SPDK_BLOB_OPTS_NUM_MD_PAGES);
3801 1048 : SET_FIELD(max_md_ops, SPDK_BLOB_OPTS_NUM_MD_PAGES);
3802 1048 : SET_FIELD(max_channel_ops, SPDK_BLOB_OPTS_DEFAULT_CHANNEL_OPS);
3803 1048 : SET_FIELD(clear_method, BS_CLEAR_WITH_UNMAP);
3804 :
3805 1048 : if (FIELD_OK(bstype)) {
3806 1048 : memset(&opts->bstype, 0, sizeof(opts->bstype));
3807 : }
3808 :
3809 1048 : SET_FIELD(iter_cb_fn, NULL);
3810 1048 : SET_FIELD(iter_cb_arg, NULL);
3811 1048 : SET_FIELD(force_recover, false);
3812 1048 : SET_FIELD(esnap_bs_dev_create, NULL);
3813 1048 : SET_FIELD(esnap_ctx, NULL);
3814 :
3815 : #undef FIELD_OK
3816 : #undef SET_FIELD
3817 : }
3818 :
3819 : static int
3820 484 : bs_opts_verify(struct spdk_bs_opts *opts)
3821 : {
3822 484 : if (opts->cluster_sz == 0 || opts->num_md_pages == 0 || opts->max_md_ops == 0 ||
3823 480 : opts->max_channel_ops == 0) {
3824 4 : SPDK_ERRLOG("Blobstore options cannot be set to 0\n");
3825 4 : return -1;
3826 : }
3827 :
3828 480 : return 0;
3829 : }
3830 :
3831 : /* START spdk_bs_load */
3832 :
3833 : /* spdk_bs_load_ctx is used for init, load, unload and dump code paths. */
3834 :
3835 : struct spdk_bs_load_ctx {
3836 : struct spdk_blob_store *bs;
3837 : struct spdk_bs_super_block *super;
3838 :
3839 : struct spdk_bs_md_mask *mask;
3840 : bool in_page_chain;
3841 : uint32_t page_index;
3842 : uint32_t cur_page;
3843 : struct spdk_blob_md_page *page;
3844 :
3845 : uint64_t num_extent_pages;
3846 : uint32_t *extent_page_num;
3847 : struct spdk_blob_md_page *extent_pages;
3848 : struct spdk_bit_array *used_clusters;
3849 :
3850 : spdk_bs_sequence_t *seq;
3851 : spdk_blob_op_with_handle_complete iter_cb_fn;
3852 : void *iter_cb_arg;
3853 : struct spdk_blob *blob;
3854 : spdk_blob_id blobid;
3855 :
3856 : bool force_recover;
3857 :
3858 : /* These fields are used in the spdk_bs_dump path. */
3859 : bool dumping;
3860 : FILE *fp;
3861 : spdk_bs_dump_print_xattr print_xattr_fn;
3862 : char xattr_name[4096];
3863 : };
3864 :
3865 : static int
3866 784 : bs_alloc(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts, struct spdk_blob_store **_bs,
3867 : struct spdk_bs_load_ctx **_ctx)
3868 : {
3869 : struct spdk_blob_store *bs;
3870 : struct spdk_bs_load_ctx *ctx;
3871 : uint64_t dev_size;
3872 : int rc;
3873 :
3874 784 : dev_size = dev->blocklen * dev->blockcnt;
3875 784 : if (dev_size < opts->cluster_sz) {
3876 : /* Device size cannot be smaller than cluster size of blobstore */
3877 0 : SPDK_INFOLOG(blob, "Device size %" PRIu64 " is smaller than cluster size %" PRIu32 "\n",
3878 : dev_size, opts->cluster_sz);
3879 0 : return -ENOSPC;
3880 : }
3881 784 : if (opts->cluster_sz < SPDK_BS_PAGE_SIZE) {
3882 : /* Cluster size cannot be smaller than page size */
3883 4 : SPDK_ERRLOG("Cluster size %" PRIu32 " is smaller than page size %d\n",
3884 : opts->cluster_sz, SPDK_BS_PAGE_SIZE);
3885 4 : return -EINVAL;
3886 : }
3887 780 : bs = calloc(1, sizeof(struct spdk_blob_store));
3888 780 : if (!bs) {
3889 0 : return -ENOMEM;
3890 : }
3891 :
3892 780 : ctx = calloc(1, sizeof(struct spdk_bs_load_ctx));
3893 780 : if (!ctx) {
3894 0 : free(bs);
3895 0 : return -ENOMEM;
3896 : }
3897 :
3898 780 : ctx->bs = bs;
3899 780 : ctx->iter_cb_fn = opts->iter_cb_fn;
3900 780 : ctx->iter_cb_arg = opts->iter_cb_arg;
3901 780 : ctx->force_recover = opts->force_recover;
3902 :
3903 780 : ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
3904 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
3905 780 : if (!ctx->super) {
3906 0 : free(ctx);
3907 0 : free(bs);
3908 0 : return -ENOMEM;
3909 : }
3910 :
3911 780 : RB_INIT(&bs->open_blobs);
3912 780 : TAILQ_INIT(&bs->snapshots);
3913 780 : bs->dev = dev;
3914 780 : bs->md_thread = spdk_get_thread();
3915 780 : assert(bs->md_thread != NULL);
3916 :
3917 : /*
3918 : * Do not use bs_lba_to_cluster() here since blockcnt may not be an
3919 : * even multiple of the cluster size.
3920 : */
3921 780 : bs->cluster_sz = opts->cluster_sz;
3922 780 : bs->total_clusters = dev->blockcnt / (bs->cluster_sz / dev->blocklen);
3923 780 : ctx->used_clusters = spdk_bit_array_create(bs->total_clusters);
3924 780 : if (!ctx->used_clusters) {
3925 0 : spdk_free(ctx->super);
3926 0 : free(ctx);
3927 0 : free(bs);
3928 0 : return -ENOMEM;
3929 : }
3930 :
3931 780 : bs->pages_per_cluster = bs->cluster_sz / SPDK_BS_PAGE_SIZE;
3932 780 : if (spdk_u32_is_pow2(bs->pages_per_cluster)) {
3933 780 : bs->pages_per_cluster_shift = spdk_u32log2(bs->pages_per_cluster);
3934 : }
3935 780 : bs->num_free_clusters = bs->total_clusters;
3936 780 : bs->io_unit_size = dev->blocklen;
3937 :
3938 780 : bs->max_channel_ops = opts->max_channel_ops;
3939 780 : bs->super_blob = SPDK_BLOBID_INVALID;
3940 780 : memcpy(&bs->bstype, &opts->bstype, sizeof(opts->bstype));
3941 780 : bs->esnap_bs_dev_create = opts->esnap_bs_dev_create;
3942 780 : bs->esnap_ctx = opts->esnap_ctx;
3943 :
3944 : /* The metadata is assumed to be at least 1 page */
3945 780 : bs->used_md_pages = spdk_bit_array_create(1);
3946 780 : bs->used_blobids = spdk_bit_array_create(0);
3947 780 : bs->open_blobids = spdk_bit_array_create(0);
3948 :
3949 780 : spdk_spin_init(&bs->used_lock);
3950 :
3951 780 : spdk_io_device_register(bs, bs_channel_create, bs_channel_destroy,
3952 : sizeof(struct spdk_bs_channel), "blobstore");
3953 780 : rc = bs_register_md_thread(bs);
3954 780 : if (rc == -1) {
3955 0 : spdk_io_device_unregister(bs, NULL);
3956 0 : spdk_spin_destroy(&bs->used_lock);
3957 0 : spdk_bit_array_free(&bs->open_blobids);
3958 0 : spdk_bit_array_free(&bs->used_blobids);
3959 0 : spdk_bit_array_free(&bs->used_md_pages);
3960 0 : spdk_bit_array_free(&ctx->used_clusters);
3961 0 : spdk_free(ctx->super);
3962 0 : free(ctx);
3963 0 : free(bs);
3964 : /* FIXME: this is a lie but don't know how to get a proper error code here */
3965 0 : return -ENOMEM;
3966 : }
3967 :
3968 780 : *_ctx = ctx;
3969 780 : *_bs = bs;
3970 780 : return 0;
3971 : }
3972 :
3973 : static void
3974 24 : bs_load_ctx_fail(struct spdk_bs_load_ctx *ctx, int bserrno)
3975 : {
3976 24 : assert(bserrno != 0);
3977 :
3978 24 : spdk_free(ctx->super);
3979 24 : bs_sequence_finish(ctx->seq, bserrno);
3980 24 : bs_free(ctx->bs);
3981 24 : spdk_bit_array_free(&ctx->used_clusters);
3982 24 : free(ctx);
3983 24 : }
3984 :
3985 : static void
3986 824 : bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
3987 : struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg)
3988 : {
3989 : /* Update the values in the super block */
3990 824 : super->super_blob = bs->super_blob;
3991 824 : memcpy(&super->bstype, &bs->bstype, sizeof(bs->bstype));
3992 824 : super->crc = blob_md_page_calc_crc(super);
3993 824 : bs_sequence_write_dev(seq, super, bs_page_to_lba(bs, 0),
3994 824 : bs_byte_to_lba(bs, sizeof(*super)),
3995 : cb_fn, cb_arg);
3996 824 : }
3997 :
3998 : static void
3999 760 : bs_write_used_clusters(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
4000 : {
4001 760 : struct spdk_bs_load_ctx *ctx = arg;
4002 : uint64_t mask_size, lba, lba_count;
4003 :
4004 : /* Write out the used clusters mask */
4005 760 : mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
4006 760 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
4007 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
4008 760 : if (!ctx->mask) {
4009 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4010 0 : return;
4011 : }
4012 :
4013 760 : ctx->mask->type = SPDK_MD_MASK_TYPE_USED_CLUSTERS;
4014 760 : ctx->mask->length = ctx->bs->total_clusters;
4015 : /* We could get here through the normal unload path, or through dirty
4016 : * shutdown recovery. For the normal unload path, we use the mask from
4017 : * the bit pool. For dirty shutdown recovery, we don't have a bit pool yet -
4018 : * only the bit array from the load ctx.
4019 : */
4020 760 : if (ctx->bs->used_clusters) {
4021 654 : assert(ctx->mask->length == spdk_bit_pool_capacity(ctx->bs->used_clusters));
4022 654 : spdk_bit_pool_store_mask(ctx->bs->used_clusters, ctx->mask->mask);
4023 : } else {
4024 106 : assert(ctx->mask->length == spdk_bit_array_capacity(ctx->used_clusters));
4025 106 : spdk_bit_array_store_mask(ctx->used_clusters, ctx->mask->mask);
4026 : }
4027 760 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
4028 760 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
4029 760 : bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
4030 : }
4031 :
4032 : static void
4033 760 : bs_write_used_md(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
4034 : {
4035 760 : struct spdk_bs_load_ctx *ctx = arg;
4036 : uint64_t mask_size, lba, lba_count;
4037 :
4038 760 : mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
4039 760 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
4040 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
4041 760 : if (!ctx->mask) {
4042 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4043 0 : return;
4044 : }
4045 :
4046 760 : ctx->mask->type = SPDK_MD_MASK_TYPE_USED_PAGES;
4047 760 : ctx->mask->length = ctx->super->md_len;
4048 760 : assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_md_pages));
4049 :
4050 760 : spdk_bit_array_store_mask(ctx->bs->used_md_pages, ctx->mask->mask);
4051 760 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
4052 760 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
4053 760 : bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
4054 : }
4055 :
4056 : static void
4057 760 : bs_write_used_blobids(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
4058 : {
4059 760 : struct spdk_bs_load_ctx *ctx = arg;
4060 : uint64_t mask_size, lba, lba_count;
4061 :
4062 760 : if (ctx->super->used_blobid_mask_len == 0) {
4063 : /*
4064 : * This is a pre-v3 on-disk format where the blobid mask does not get
4065 : * written to disk.
4066 : */
4067 24 : cb_fn(seq, arg, 0);
4068 24 : return;
4069 : }
4070 :
4071 736 : mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
4072 736 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
4073 : SPDK_MALLOC_DMA);
4074 736 : if (!ctx->mask) {
4075 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4076 0 : return;
4077 : }
4078 :
4079 736 : ctx->mask->type = SPDK_MD_MASK_TYPE_USED_BLOBIDS;
4080 736 : ctx->mask->length = ctx->super->md_len;
4081 736 : assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_blobids));
4082 :
4083 736 : spdk_bit_array_store_mask(ctx->bs->used_blobids, ctx->mask->mask);
4084 736 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
4085 736 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
4086 736 : bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
4087 : }
4088 :
4089 : static void
4090 696 : blob_set_thin_provision(struct spdk_blob *blob)
4091 : {
4092 696 : blob_verify_md_op(blob);
4093 696 : blob->invalid_flags |= SPDK_BLOB_THIN_PROV;
4094 696 : blob->state = SPDK_BLOB_STATE_DIRTY;
4095 696 : }
4096 :
4097 : static void
4098 2086 : blob_set_clear_method(struct spdk_blob *blob, enum blob_clear_method clear_method)
4099 : {
4100 2086 : blob_verify_md_op(blob);
4101 2086 : blob->clear_method = clear_method;
4102 2086 : blob->md_ro_flags |= (clear_method << SPDK_BLOB_CLEAR_METHOD_SHIFT);
4103 2086 : blob->state = SPDK_BLOB_STATE_DIRTY;
4104 2086 : }
4105 :
4106 : static void bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno);
4107 :
4108 : static void
4109 24 : bs_delete_corrupted_blob_cpl(void *cb_arg, int bserrno)
4110 : {
4111 24 : struct spdk_bs_load_ctx *ctx = cb_arg;
4112 : spdk_blob_id id;
4113 : int64_t page_num;
4114 :
4115 : /* Iterate to next blob (we can't use spdk_bs_iter_next function as our
4116 : * last blob has been removed */
4117 24 : page_num = bs_blobid_to_page(ctx->blobid);
4118 24 : page_num++;
4119 24 : page_num = spdk_bit_array_find_first_set(ctx->bs->used_blobids, page_num);
4120 24 : if (page_num >= spdk_bit_array_capacity(ctx->bs->used_blobids)) {
4121 24 : bs_load_iter(ctx, NULL, -ENOENT);
4122 24 : return;
4123 : }
4124 :
4125 0 : id = bs_page_to_blobid(page_num);
4126 :
4127 0 : spdk_bs_open_blob(ctx->bs, id, bs_load_iter, ctx);
4128 : }
4129 :
4130 : static void
4131 24 : bs_delete_corrupted_close_cb(void *cb_arg, int bserrno)
4132 : {
4133 24 : struct spdk_bs_load_ctx *ctx = cb_arg;
4134 :
4135 24 : if (bserrno != 0) {
4136 0 : SPDK_ERRLOG("Failed to close corrupted blob\n");
4137 0 : spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
4138 0 : return;
4139 : }
4140 :
4141 24 : spdk_bs_delete_blob(ctx->bs, ctx->blobid, bs_delete_corrupted_blob_cpl, ctx);
4142 : }
4143 :
4144 : static void
4145 24 : bs_delete_corrupted_blob(void *cb_arg, int bserrno)
4146 : {
4147 24 : struct spdk_bs_load_ctx *ctx = cb_arg;
4148 : uint64_t i;
4149 :
4150 24 : if (bserrno != 0) {
4151 0 : SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
4152 0 : spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
4153 0 : return;
4154 : }
4155 :
4156 : /* Snapshot and clone have the same copy of cluster map and extent pages
4157 : * at this point. Let's clear both for snapshot now,
4158 : * so that it won't be cleared for clone later when we remove snapshot.
4159 : * Also set thin provision to pass data corruption check */
4160 264 : for (i = 0; i < ctx->blob->active.num_clusters; i++) {
4161 240 : ctx->blob->active.clusters[i] = 0;
4162 : }
4163 36 : for (i = 0; i < ctx->blob->active.num_extent_pages; i++) {
4164 12 : ctx->blob->active.extent_pages[i] = 0;
4165 : }
4166 :
4167 24 : ctx->blob->active.num_allocated_clusters = 0;
4168 :
4169 24 : ctx->blob->md_ro = false;
4170 :
4171 24 : blob_set_thin_provision(ctx->blob);
4172 :
4173 24 : ctx->blobid = ctx->blob->id;
4174 :
4175 24 : spdk_blob_close(ctx->blob, bs_delete_corrupted_close_cb, ctx);
4176 : }
4177 :
4178 : static void
4179 12 : bs_update_corrupted_blob(void *cb_arg, int bserrno)
4180 : {
4181 12 : struct spdk_bs_load_ctx *ctx = cb_arg;
4182 :
4183 12 : if (bserrno != 0) {
4184 0 : SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
4185 0 : spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
4186 0 : return;
4187 : }
4188 :
4189 12 : ctx->blob->md_ro = false;
4190 12 : blob_remove_xattr(ctx->blob, SNAPSHOT_PENDING_REMOVAL, true);
4191 12 : blob_remove_xattr(ctx->blob, SNAPSHOT_IN_PROGRESS, true);
4192 12 : spdk_blob_set_read_only(ctx->blob);
4193 :
4194 12 : if (ctx->iter_cb_fn) {
4195 0 : ctx->iter_cb_fn(ctx->iter_cb_arg, ctx->blob, 0);
4196 : }
4197 12 : bs_blob_list_add(ctx->blob);
4198 :
4199 12 : spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
4200 : }
4201 :
4202 : static void
4203 36 : bs_examine_clone(void *cb_arg, struct spdk_blob *blob, int bserrno)
4204 : {
4205 36 : struct spdk_bs_load_ctx *ctx = cb_arg;
4206 :
4207 36 : if (bserrno != 0) {
4208 0 : SPDK_ERRLOG("Failed to open clone of a corrupted blob\n");
4209 0 : spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
4210 0 : return;
4211 : }
4212 :
4213 36 : if (blob->parent_id == ctx->blob->id) {
4214 : /* Power failure occurred before updating clone (snapshot delete case)
4215 : * or after updating clone (creating snapshot case) - keep snapshot */
4216 12 : spdk_blob_close(blob, bs_update_corrupted_blob, ctx);
4217 : } else {
4218 : /* Power failure occurred after updating clone (snapshot delete case)
4219 : * or before updating clone (creating snapshot case) - remove snapshot */
4220 24 : spdk_blob_close(blob, bs_delete_corrupted_blob, ctx);
4221 : }
4222 : }
4223 :
4224 : static void
4225 720 : bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno)
4226 : {
4227 720 : struct spdk_bs_load_ctx *ctx = arg;
4228 720 : const void *value;
4229 720 : size_t len;
4230 720 : int rc = 0;
4231 :
4232 720 : if (bserrno == 0) {
4233 : /* Examine blob if it is corrupted after power failure. Fix
4234 : * the ones that can be fixed and remove any other corrupted
4235 : * ones. If it is not corrupted just process it */
4236 440 : rc = blob_get_xattr_value(blob, SNAPSHOT_PENDING_REMOVAL, &value, &len, true);
4237 440 : if (rc != 0) {
4238 420 : rc = blob_get_xattr_value(blob, SNAPSHOT_IN_PROGRESS, &value, &len, true);
4239 420 : if (rc != 0) {
4240 : /* Not corrupted - process it and continue with iterating through blobs */
4241 404 : if (ctx->iter_cb_fn) {
4242 34 : ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0);
4243 : }
4244 404 : bs_blob_list_add(blob);
4245 404 : spdk_bs_iter_next(ctx->bs, blob, bs_load_iter, ctx);
4246 404 : return;
4247 : }
4248 :
4249 : }
4250 :
4251 36 : assert(len == sizeof(spdk_blob_id));
4252 :
4253 36 : ctx->blob = blob;
4254 :
4255 : /* Open clone to check if we are able to fix this blob or should we remove it */
4256 36 : spdk_bs_open_blob(ctx->bs, *(spdk_blob_id *)value, bs_examine_clone, ctx);
4257 36 : return;
4258 280 : } else if (bserrno == -ENOENT) {
4259 280 : bserrno = 0;
4260 : } else {
4261 : /*
4262 : * This case needs to be looked at further. Same problem
4263 : * exists with applications that rely on explicit blob
4264 : * iteration. We should just skip the blob that failed
4265 : * to load and continue on to the next one.
4266 : */
4267 0 : SPDK_ERRLOG("Error in iterating blobs\n");
4268 : }
4269 :
4270 280 : ctx->iter_cb_fn = NULL;
4271 :
4272 280 : spdk_free(ctx->super);
4273 280 : spdk_free(ctx->mask);
4274 280 : bs_sequence_finish(ctx->seq, bserrno);
4275 280 : free(ctx);
4276 : }
4277 :
4278 : static void bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg);
4279 :
4280 : static void
4281 280 : bs_load_complete(struct spdk_bs_load_ctx *ctx)
4282 : {
4283 280 : ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
4284 280 : if (ctx->dumping) {
4285 0 : bs_dump_read_md_page(ctx->seq, ctx);
4286 0 : return;
4287 : }
4288 280 : spdk_bs_iter_first(ctx->bs, bs_load_iter, ctx);
4289 : }
4290 :
4291 : static void
4292 174 : bs_load_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4293 : {
4294 174 : struct spdk_bs_load_ctx *ctx = cb_arg;
4295 : int rc;
4296 :
4297 : /* The type must be correct */
4298 174 : assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_BLOBIDS);
4299 :
4300 : /* The length of the mask (in bits) must not be greater than
4301 : * the length of the buffer (converted to bits) */
4302 174 : assert(ctx->mask->length <= (ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE * 8));
4303 :
4304 : /* The length of the mask must be exactly equal to the size
4305 : * (in pages) of the metadata region */
4306 174 : assert(ctx->mask->length == ctx->super->md_len);
4307 :
4308 174 : rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->mask->length);
4309 174 : if (rc < 0) {
4310 0 : spdk_free(ctx->mask);
4311 0 : bs_load_ctx_fail(ctx, rc);
4312 0 : return;
4313 : }
4314 :
4315 174 : spdk_bit_array_load_mask(ctx->bs->used_blobids, ctx->mask->mask);
4316 174 : bs_load_complete(ctx);
4317 : }
4318 :
4319 : static void
4320 174 : bs_load_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4321 : {
4322 174 : struct spdk_bs_load_ctx *ctx = cb_arg;
4323 : uint64_t lba, lba_count, mask_size;
4324 : int rc;
4325 :
4326 174 : if (bserrno != 0) {
4327 0 : bs_load_ctx_fail(ctx, bserrno);
4328 0 : return;
4329 : }
4330 :
4331 : /* The type must be correct */
4332 174 : assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
4333 : /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
4334 174 : assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
4335 : struct spdk_blob_md_page) * 8));
4336 : /*
4337 : * The length of the mask must be equal to or larger than the total number of clusters. It may be
4338 : * larger than the total number of clusters due to a failure spdk_bs_grow.
4339 : */
4340 174 : assert(ctx->mask->length >= ctx->bs->total_clusters);
4341 174 : if (ctx->mask->length > ctx->bs->total_clusters) {
4342 4 : SPDK_WARNLOG("Shrink the used_custers mask length to total_clusters");
4343 4 : ctx->mask->length = ctx->bs->total_clusters;
4344 : }
4345 :
4346 174 : rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->mask->length);
4347 174 : if (rc < 0) {
4348 0 : spdk_free(ctx->mask);
4349 0 : bs_load_ctx_fail(ctx, rc);
4350 0 : return;
4351 : }
4352 :
4353 174 : spdk_bit_array_load_mask(ctx->used_clusters, ctx->mask->mask);
4354 174 : ctx->bs->num_free_clusters = spdk_bit_array_count_clear(ctx->used_clusters);
4355 174 : assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
4356 :
4357 174 : spdk_free(ctx->mask);
4358 :
4359 : /* Read the used blobids mask */
4360 174 : mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
4361 174 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
4362 : SPDK_MALLOC_DMA);
4363 174 : if (!ctx->mask) {
4364 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4365 0 : return;
4366 : }
4367 174 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
4368 174 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
4369 174 : bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
4370 : bs_load_used_blobids_cpl, ctx);
4371 : }
4372 :
4373 : static void
4374 174 : bs_load_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4375 : {
4376 174 : struct spdk_bs_load_ctx *ctx = cb_arg;
4377 : uint64_t lba, lba_count, mask_size;
4378 : int rc;
4379 :
4380 174 : if (bserrno != 0) {
4381 0 : bs_load_ctx_fail(ctx, bserrno);
4382 0 : return;
4383 : }
4384 :
4385 : /* The type must be correct */
4386 174 : assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_PAGES);
4387 : /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
4388 174 : assert(ctx->mask->length <= (ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE *
4389 : 8));
4390 : /* The length of the mask must be exactly equal to the size (in pages) of the metadata region */
4391 174 : if (ctx->mask->length != ctx->super->md_len) {
4392 0 : SPDK_ERRLOG("mismatched md_len in used_pages mask: "
4393 : "mask->length=%" PRIu32 " super->md_len=%" PRIu32 "\n",
4394 : ctx->mask->length, ctx->super->md_len);
4395 0 : assert(false);
4396 : }
4397 :
4398 174 : rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->mask->length);
4399 174 : if (rc < 0) {
4400 0 : spdk_free(ctx->mask);
4401 0 : bs_load_ctx_fail(ctx, rc);
4402 0 : return;
4403 : }
4404 :
4405 174 : spdk_bit_array_load_mask(ctx->bs->used_md_pages, ctx->mask->mask);
4406 174 : spdk_free(ctx->mask);
4407 :
4408 : /* Read the used clusters mask */
4409 174 : mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
4410 174 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
4411 : SPDK_MALLOC_DMA);
4412 174 : if (!ctx->mask) {
4413 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4414 0 : return;
4415 : }
4416 174 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
4417 174 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
4418 174 : bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
4419 : bs_load_used_clusters_cpl, ctx);
4420 : }
4421 :
4422 : static void
4423 174 : bs_load_read_used_pages(struct spdk_bs_load_ctx *ctx)
4424 : {
4425 : uint64_t lba, lba_count, mask_size;
4426 :
4427 : /* Read the used pages mask */
4428 174 : mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
4429 174 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
4430 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
4431 174 : if (!ctx->mask) {
4432 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4433 0 : return;
4434 : }
4435 :
4436 174 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
4437 174 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
4438 174 : bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
4439 : bs_load_used_pages_cpl, ctx);
4440 : }
4441 :
4442 : static int
4443 246 : bs_load_replay_md_parse_page(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_page *page)
4444 : {
4445 246 : struct spdk_blob_store *bs = ctx->bs;
4446 : struct spdk_blob_md_descriptor *desc;
4447 246 : size_t cur_desc = 0;
4448 :
4449 246 : desc = (struct spdk_blob_md_descriptor *)page->descriptors;
4450 718 : while (cur_desc < sizeof(page->descriptors)) {
4451 718 : if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
4452 226 : if (desc->length == 0) {
4453 : /* If padding and length are 0, this terminates the page */
4454 226 : break;
4455 : }
4456 492 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
4457 : struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
4458 : unsigned int i, j;
4459 68 : unsigned int cluster_count = 0;
4460 : uint32_t cluster_idx;
4461 :
4462 68 : desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
4463 :
4464 136 : for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
4465 828 : for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
4466 760 : cluster_idx = desc_extent_rle->extents[i].cluster_idx;
4467 : /*
4468 : * cluster_idx = 0 means an unallocated cluster - don't mark that
4469 : * in the used cluster map.
4470 : */
4471 760 : if (cluster_idx != 0) {
4472 540 : SPDK_NOTICELOG("Recover: cluster %" PRIu32 "\n", cluster_idx + j);
4473 540 : spdk_bit_array_set(ctx->used_clusters, cluster_idx + j);
4474 540 : if (bs->num_free_clusters == 0) {
4475 0 : return -ENOSPC;
4476 : }
4477 540 : bs->num_free_clusters--;
4478 : }
4479 760 : cluster_count++;
4480 : }
4481 : }
4482 68 : if (cluster_count == 0) {
4483 0 : return -EINVAL;
4484 : }
4485 424 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
4486 : struct spdk_blob_md_descriptor_extent_page *desc_extent;
4487 : uint32_t i;
4488 52 : uint32_t cluster_count = 0;
4489 : uint32_t cluster_idx;
4490 : size_t cluster_idx_length;
4491 :
4492 52 : desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
4493 52 : cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
4494 :
4495 52 : if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
4496 52 : (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
4497 0 : return -EINVAL;
4498 : }
4499 :
4500 652 : for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
4501 600 : cluster_idx = desc_extent->cluster_idx[i];
4502 : /*
4503 : * cluster_idx = 0 means an unallocated cluster - don't mark that
4504 : * in the used cluster map.
4505 : */
4506 600 : if (cluster_idx != 0) {
4507 600 : if (cluster_idx < desc_extent->start_cluster_idx &&
4508 0 : cluster_idx >= desc_extent->start_cluster_idx + cluster_count) {
4509 0 : return -EINVAL;
4510 : }
4511 600 : spdk_bit_array_set(ctx->used_clusters, cluster_idx);
4512 600 : if (bs->num_free_clusters == 0) {
4513 0 : return -ENOSPC;
4514 : }
4515 600 : bs->num_free_clusters--;
4516 : }
4517 600 : cluster_count++;
4518 : }
4519 :
4520 52 : if (cluster_count == 0) {
4521 0 : return -EINVAL;
4522 : }
4523 372 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
4524 : /* Skip this item */
4525 296 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
4526 : /* Skip this item */
4527 236 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
4528 : /* Skip this item */
4529 82 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
4530 : struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
4531 82 : uint32_t num_extent_pages = ctx->num_extent_pages;
4532 : uint32_t i;
4533 : size_t extent_pages_length;
4534 : void *tmp;
4535 :
4536 82 : desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
4537 82 : extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
4538 :
4539 82 : if (desc_extent_table->length == 0 ||
4540 82 : (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
4541 0 : return -EINVAL;
4542 : }
4543 :
4544 160 : for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
4545 78 : if (desc_extent_table->extent_page[i].page_idx != 0) {
4546 52 : if (desc_extent_table->extent_page[i].num_pages != 1) {
4547 0 : return -EINVAL;
4548 : }
4549 52 : num_extent_pages += 1;
4550 : }
4551 : }
4552 :
4553 82 : if (num_extent_pages > 0) {
4554 52 : tmp = realloc(ctx->extent_page_num, num_extent_pages * sizeof(uint32_t));
4555 52 : if (tmp == NULL) {
4556 0 : return -ENOMEM;
4557 : }
4558 52 : ctx->extent_page_num = tmp;
4559 :
4560 : /* Extent table entries contain md page numbers for extent pages.
4561 : * Zeroes represent unallocated extent pages, those are run-length-encoded.
4562 : */
4563 104 : for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
4564 52 : if (desc_extent_table->extent_page[i].page_idx != 0) {
4565 52 : ctx->extent_page_num[ctx->num_extent_pages] = desc_extent_table->extent_page[i].page_idx;
4566 52 : ctx->num_extent_pages += 1;
4567 : }
4568 : }
4569 : }
4570 : } else {
4571 : /* Error */
4572 0 : return -EINVAL;
4573 : }
4574 : /* Advance to the next descriptor */
4575 492 : cur_desc += sizeof(*desc) + desc->length;
4576 492 : if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
4577 20 : break;
4578 : }
4579 472 : desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
4580 : }
4581 246 : return 0;
4582 : }
4583 :
4584 : static bool
4585 1296 : bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page)
4586 : {
4587 : uint32_t crc;
4588 1296 : struct spdk_blob_md_descriptor *desc = (struct spdk_blob_md_descriptor *)page->descriptors;
4589 : size_t desc_len;
4590 :
4591 1296 : crc = blob_md_page_calc_crc(page);
4592 1296 : if (crc != page->crc) {
4593 0 : return false;
4594 : }
4595 :
4596 : /* Extent page should always be of sequence num 0. */
4597 1296 : if (page->sequence_num != 0) {
4598 44 : return false;
4599 : }
4600 :
4601 : /* Descriptor type must be EXTENT_PAGE. */
4602 1252 : if (desc->type != SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
4603 154 : return false;
4604 : }
4605 :
4606 : /* Descriptor length cannot exceed the page. */
4607 1098 : desc_len = sizeof(*desc) + desc->length;
4608 1098 : if (desc_len > sizeof(page->descriptors)) {
4609 0 : return false;
4610 : }
4611 :
4612 : /* It has to be the only descriptor in the page. */
4613 1098 : if (desc_len + sizeof(*desc) <= sizeof(page->descriptors)) {
4614 1098 : desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + desc_len);
4615 1098 : if (desc->length != 0) {
4616 0 : return false;
4617 : }
4618 : }
4619 :
4620 1098 : return true;
4621 : }
4622 :
4623 : static bool
4624 6754 : bs_load_cur_md_page_valid(struct spdk_bs_load_ctx *ctx)
4625 : {
4626 : uint32_t crc;
4627 6754 : struct spdk_blob_md_page *page = ctx->page;
4628 :
4629 6754 : crc = blob_md_page_calc_crc(page);
4630 6754 : if (crc != page->crc) {
4631 6538 : return false;
4632 : }
4633 :
4634 : /* First page of a sequence should match the blobid. */
4635 216 : if (page->sequence_num == 0 &&
4636 172 : bs_page_to_blobid(ctx->cur_page) != page->id) {
4637 18 : return false;
4638 : }
4639 198 : assert(bs_load_cur_extent_page_valid(page) == false);
4640 :
4641 198 : return true;
4642 : }
4643 :
4644 : static void bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx);
4645 :
4646 : static void
4647 106 : bs_load_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4648 : {
4649 106 : struct spdk_bs_load_ctx *ctx = cb_arg;
4650 :
4651 106 : if (bserrno != 0) {
4652 0 : bs_load_ctx_fail(ctx, bserrno);
4653 0 : return;
4654 : }
4655 :
4656 106 : bs_load_complete(ctx);
4657 : }
4658 :
4659 : static void
4660 106 : bs_load_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4661 : {
4662 106 : struct spdk_bs_load_ctx *ctx = cb_arg;
4663 :
4664 106 : spdk_free(ctx->mask);
4665 106 : ctx->mask = NULL;
4666 :
4667 106 : if (bserrno != 0) {
4668 0 : bs_load_ctx_fail(ctx, bserrno);
4669 0 : return;
4670 : }
4671 :
4672 106 : bs_write_used_clusters(seq, ctx, bs_load_write_used_clusters_cpl);
4673 : }
4674 :
4675 : static void
4676 106 : bs_load_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4677 : {
4678 106 : struct spdk_bs_load_ctx *ctx = cb_arg;
4679 :
4680 106 : spdk_free(ctx->mask);
4681 106 : ctx->mask = NULL;
4682 :
4683 106 : if (bserrno != 0) {
4684 0 : bs_load_ctx_fail(ctx, bserrno);
4685 0 : return;
4686 : }
4687 :
4688 106 : bs_write_used_blobids(seq, ctx, bs_load_write_used_blobids_cpl);
4689 : }
4690 :
4691 : static void
4692 106 : bs_load_write_used_md(struct spdk_bs_load_ctx *ctx)
4693 : {
4694 106 : bs_write_used_md(ctx->seq, ctx, bs_load_write_used_pages_cpl);
4695 106 : }
4696 :
4697 : static void
4698 6714 : bs_load_replay_md_chain_cpl(struct spdk_bs_load_ctx *ctx)
4699 : {
4700 : uint64_t num_md_clusters;
4701 : uint64_t i;
4702 :
4703 6714 : ctx->in_page_chain = false;
4704 :
4705 : do {
4706 6784 : ctx->page_index++;
4707 6784 : } while (spdk_bit_array_get(ctx->bs->used_md_pages, ctx->page_index) == true);
4708 :
4709 6714 : if (ctx->page_index < ctx->super->md_len) {
4710 6608 : ctx->cur_page = ctx->page_index;
4711 6608 : bs_load_replay_cur_md_page(ctx);
4712 : } else {
4713 : /* Claim all of the clusters used by the metadata */
4714 106 : num_md_clusters = spdk_divide_round_up(
4715 106 : ctx->super->md_start + ctx->super->md_len, ctx->bs->pages_per_cluster);
4716 480 : for (i = 0; i < num_md_clusters; i++) {
4717 374 : spdk_bit_array_set(ctx->used_clusters, i);
4718 : }
4719 106 : ctx->bs->num_free_clusters -= num_md_clusters;
4720 106 : spdk_free(ctx->page);
4721 106 : bs_load_write_used_md(ctx);
4722 : }
4723 6714 : }
4724 :
4725 : static void
4726 52 : bs_load_replay_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4727 : {
4728 52 : struct spdk_bs_load_ctx *ctx = cb_arg;
4729 : uint32_t page_num;
4730 : uint64_t i;
4731 :
4732 52 : if (bserrno != 0) {
4733 0 : spdk_free(ctx->extent_pages);
4734 0 : bs_load_ctx_fail(ctx, bserrno);
4735 0 : return;
4736 : }
4737 :
4738 104 : for (i = 0; i < ctx->num_extent_pages; i++) {
4739 : /* Extent pages are only read when present within in chain md.
4740 : * Integrity of md is not right if that page was not a valid extent page. */
4741 52 : if (bs_load_cur_extent_page_valid(&ctx->extent_pages[i]) != true) {
4742 0 : spdk_free(ctx->extent_pages);
4743 0 : bs_load_ctx_fail(ctx, -EILSEQ);
4744 0 : return;
4745 : }
4746 :
4747 52 : page_num = ctx->extent_page_num[i];
4748 52 : spdk_bit_array_set(ctx->bs->used_md_pages, page_num);
4749 52 : if (bs_load_replay_md_parse_page(ctx, &ctx->extent_pages[i])) {
4750 0 : spdk_free(ctx->extent_pages);
4751 0 : bs_load_ctx_fail(ctx, -EILSEQ);
4752 0 : return;
4753 : }
4754 : }
4755 :
4756 52 : spdk_free(ctx->extent_pages);
4757 52 : free(ctx->extent_page_num);
4758 52 : ctx->extent_page_num = NULL;
4759 52 : ctx->num_extent_pages = 0;
4760 :
4761 52 : bs_load_replay_md_chain_cpl(ctx);
4762 : }
4763 :
4764 : static void
4765 52 : bs_load_replay_extent_pages(struct spdk_bs_load_ctx *ctx)
4766 : {
4767 : spdk_bs_batch_t *batch;
4768 : uint32_t page;
4769 : uint64_t lba;
4770 : uint64_t i;
4771 :
4772 52 : ctx->extent_pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE * ctx->num_extent_pages, 0,
4773 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
4774 52 : if (!ctx->extent_pages) {
4775 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4776 0 : return;
4777 : }
4778 :
4779 52 : batch = bs_sequence_to_batch(ctx->seq, bs_load_replay_extent_page_cpl, ctx);
4780 :
4781 104 : for (i = 0; i < ctx->num_extent_pages; i++) {
4782 52 : page = ctx->extent_page_num[i];
4783 52 : assert(page < ctx->super->md_len);
4784 52 : lba = bs_md_page_to_lba(ctx->bs, page);
4785 52 : bs_batch_read_dev(batch, &ctx->extent_pages[i], lba,
4786 52 : bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE));
4787 : }
4788 :
4789 52 : bs_batch_close(batch);
4790 : }
4791 :
4792 : static void
4793 6754 : bs_load_replay_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4794 : {
4795 6754 : struct spdk_bs_load_ctx *ctx = cb_arg;
4796 : uint32_t page_num;
4797 : struct spdk_blob_md_page *page;
4798 :
4799 6754 : if (bserrno != 0) {
4800 0 : bs_load_ctx_fail(ctx, bserrno);
4801 0 : return;
4802 : }
4803 :
4804 6754 : page_num = ctx->cur_page;
4805 6754 : page = ctx->page;
4806 6754 : if (bs_load_cur_md_page_valid(ctx) == true) {
4807 198 : if (page->sequence_num == 0 || ctx->in_page_chain == true) {
4808 194 : spdk_spin_lock(&ctx->bs->used_lock);
4809 194 : bs_claim_md_page(ctx->bs, page_num);
4810 194 : spdk_spin_unlock(&ctx->bs->used_lock);
4811 194 : if (page->sequence_num == 0) {
4812 154 : SPDK_NOTICELOG("Recover: blob 0x%" PRIx32 "\n", page_num);
4813 154 : spdk_bit_array_set(ctx->bs->used_blobids, page_num);
4814 : }
4815 194 : if (bs_load_replay_md_parse_page(ctx, page)) {
4816 0 : bs_load_ctx_fail(ctx, -EILSEQ);
4817 0 : return;
4818 : }
4819 194 : if (page->next != SPDK_INVALID_MD_PAGE) {
4820 40 : ctx->in_page_chain = true;
4821 40 : ctx->cur_page = page->next;
4822 40 : bs_load_replay_cur_md_page(ctx);
4823 40 : return;
4824 : }
4825 154 : if (ctx->num_extent_pages != 0) {
4826 52 : bs_load_replay_extent_pages(ctx);
4827 52 : return;
4828 : }
4829 : }
4830 : }
4831 6662 : bs_load_replay_md_chain_cpl(ctx);
4832 : }
4833 :
4834 : static void
4835 6754 : bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx)
4836 : {
4837 : uint64_t lba;
4838 :
4839 6754 : assert(ctx->cur_page < ctx->super->md_len);
4840 6754 : lba = bs_md_page_to_lba(ctx->bs, ctx->cur_page);
4841 6754 : bs_sequence_read_dev(ctx->seq, ctx->page, lba,
4842 6754 : bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
4843 : bs_load_replay_md_cpl, ctx);
4844 6754 : }
4845 :
4846 : static void
4847 106 : bs_load_replay_md(struct spdk_bs_load_ctx *ctx)
4848 : {
4849 106 : ctx->page_index = 0;
4850 106 : ctx->cur_page = 0;
4851 106 : ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
4852 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
4853 106 : if (!ctx->page) {
4854 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4855 0 : return;
4856 : }
4857 106 : bs_load_replay_cur_md_page(ctx);
4858 : }
4859 :
4860 : static void
4861 106 : bs_recover(struct spdk_bs_load_ctx *ctx)
4862 : {
4863 : int rc;
4864 :
4865 106 : SPDK_NOTICELOG("Performing recovery on blobstore\n");
4866 106 : rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->super->md_len);
4867 106 : if (rc < 0) {
4868 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4869 0 : return;
4870 : }
4871 :
4872 106 : rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->super->md_len);
4873 106 : if (rc < 0) {
4874 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4875 0 : return;
4876 : }
4877 :
4878 106 : rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
4879 106 : if (rc < 0) {
4880 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4881 0 : return;
4882 : }
4883 :
4884 106 : rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->super->md_len);
4885 106 : if (rc < 0) {
4886 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4887 0 : return;
4888 : }
4889 :
4890 106 : ctx->bs->num_free_clusters = ctx->bs->total_clusters;
4891 106 : bs_load_replay_md(ctx);
4892 : }
4893 :
4894 : static int
4895 276 : bs_parse_super(struct spdk_bs_load_ctx *ctx)
4896 : {
4897 : int rc;
4898 :
4899 276 : if (ctx->super->size == 0) {
4900 8 : ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
4901 : }
4902 :
4903 276 : if (ctx->super->io_unit_size == 0) {
4904 8 : ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
4905 : }
4906 :
4907 276 : ctx->bs->clean = 1;
4908 276 : ctx->bs->cluster_sz = ctx->super->cluster_size;
4909 276 : ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
4910 276 : ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
4911 276 : if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
4912 276 : ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
4913 : }
4914 276 : ctx->bs->io_unit_size = ctx->super->io_unit_size;
4915 276 : rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
4916 276 : if (rc < 0) {
4917 0 : return -ENOMEM;
4918 : }
4919 276 : ctx->bs->md_start = ctx->super->md_start;
4920 276 : ctx->bs->md_len = ctx->super->md_len;
4921 276 : rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
4922 276 : if (rc < 0) {
4923 0 : return -ENOMEM;
4924 : }
4925 :
4926 552 : ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
4927 276 : ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
4928 276 : ctx->bs->super_blob = ctx->super->super_blob;
4929 276 : memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
4930 :
4931 276 : return 0;
4932 : }
4933 :
4934 : static void
4935 300 : bs_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4936 : {
4937 300 : struct spdk_bs_load_ctx *ctx = cb_arg;
4938 : int rc;
4939 :
4940 300 : rc = bs_super_validate(ctx->super, ctx->bs);
4941 300 : if (rc != 0) {
4942 24 : bs_load_ctx_fail(ctx, rc);
4943 24 : return;
4944 : }
4945 :
4946 276 : rc = bs_parse_super(ctx);
4947 276 : if (rc < 0) {
4948 0 : bs_load_ctx_fail(ctx, rc);
4949 0 : return;
4950 : }
4951 :
4952 276 : if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0 || ctx->force_recover) {
4953 106 : bs_recover(ctx);
4954 : } else {
4955 170 : bs_load_read_used_pages(ctx);
4956 : }
4957 : }
4958 :
4959 : static inline int
4960 308 : bs_opts_copy(struct spdk_bs_opts *src, struct spdk_bs_opts *dst)
4961 : {
4962 :
4963 308 : if (!src->opts_size) {
4964 0 : SPDK_ERRLOG("opts_size should not be zero value\n");
4965 0 : return -1;
4966 : }
4967 :
4968 : #define FIELD_OK(field) \
4969 : offsetof(struct spdk_bs_opts, field) + sizeof(src->field) <= src->opts_size
4970 :
4971 : #define SET_FIELD(field) \
4972 : if (FIELD_OK(field)) { \
4973 : dst->field = src->field; \
4974 : } \
4975 :
4976 308 : SET_FIELD(cluster_sz);
4977 308 : SET_FIELD(num_md_pages);
4978 308 : SET_FIELD(max_md_ops);
4979 308 : SET_FIELD(max_channel_ops);
4980 308 : SET_FIELD(clear_method);
4981 :
4982 308 : if (FIELD_OK(bstype)) {
4983 308 : memcpy(&dst->bstype, &src->bstype, sizeof(dst->bstype));
4984 : }
4985 308 : SET_FIELD(iter_cb_fn);
4986 308 : SET_FIELD(iter_cb_arg);
4987 308 : SET_FIELD(force_recover);
4988 308 : SET_FIELD(esnap_bs_dev_create);
4989 308 : SET_FIELD(esnap_ctx);
4990 :
4991 308 : dst->opts_size = src->opts_size;
4992 :
4993 : /* You should not remove this statement, but need to update the assert statement
4994 : * if you add a new field, and also add a corresponding SET_FIELD statement */
4995 : SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_opts) == 88, "Incorrect size");
4996 :
4997 : #undef FIELD_OK
4998 : #undef SET_FIELD
4999 :
5000 308 : return 0;
5001 : }
5002 :
5003 : void
5004 312 : spdk_bs_load(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
5005 : spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
5006 : {
5007 312 : struct spdk_blob_store *bs;
5008 312 : struct spdk_bs_cpl cpl;
5009 312 : struct spdk_bs_load_ctx *ctx;
5010 312 : struct spdk_bs_opts opts = {};
5011 : int err;
5012 :
5013 312 : SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
5014 :
5015 312 : if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
5016 4 : SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
5017 4 : dev->destroy(dev);
5018 4 : cb_fn(cb_arg, NULL, -EINVAL);
5019 4 : return;
5020 : }
5021 :
5022 308 : spdk_bs_opts_init(&opts, sizeof(opts));
5023 308 : if (o) {
5024 122 : if (bs_opts_copy(o, &opts)) {
5025 0 : return;
5026 : }
5027 : }
5028 :
5029 308 : if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
5030 8 : dev->destroy(dev);
5031 8 : cb_fn(cb_arg, NULL, -EINVAL);
5032 8 : return;
5033 : }
5034 :
5035 300 : err = bs_alloc(dev, &opts, &bs, &ctx);
5036 300 : if (err) {
5037 0 : dev->destroy(dev);
5038 0 : cb_fn(cb_arg, NULL, err);
5039 0 : return;
5040 : }
5041 :
5042 300 : cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
5043 300 : cpl.u.bs_handle.cb_fn = cb_fn;
5044 300 : cpl.u.bs_handle.cb_arg = cb_arg;
5045 300 : cpl.u.bs_handle.bs = bs;
5046 :
5047 300 : ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
5048 300 : if (!ctx->seq) {
5049 0 : spdk_free(ctx->super);
5050 0 : free(ctx);
5051 0 : bs_free(bs);
5052 0 : cb_fn(cb_arg, NULL, -ENOMEM);
5053 0 : return;
5054 : }
5055 :
5056 : /* Read the super block */
5057 300 : bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
5058 300 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
5059 : bs_load_super_cpl, ctx);
5060 : }
5061 :
5062 : /* END spdk_bs_load */
5063 :
5064 : /* START spdk_bs_dump */
5065 :
5066 : static void
5067 0 : bs_dump_finish(spdk_bs_sequence_t *seq, struct spdk_bs_load_ctx *ctx, int bserrno)
5068 : {
5069 0 : spdk_free(ctx->super);
5070 :
5071 : /*
5072 : * We need to defer calling bs_call_cpl() until after
5073 : * dev destruction, so tuck these away for later use.
5074 : */
5075 0 : ctx->bs->unload_err = bserrno;
5076 0 : memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
5077 0 : seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
5078 :
5079 0 : bs_sequence_finish(seq, 0);
5080 0 : bs_free(ctx->bs);
5081 0 : free(ctx);
5082 0 : }
5083 :
5084 : static void
5085 0 : bs_dump_print_xattr(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
5086 : {
5087 : struct spdk_blob_md_descriptor_xattr *desc_xattr;
5088 : uint32_t i;
5089 : const char *type;
5090 :
5091 0 : desc_xattr = (struct spdk_blob_md_descriptor_xattr *)desc;
5092 :
5093 0 : if (desc_xattr->length !=
5094 : sizeof(desc_xattr->name_length) + sizeof(desc_xattr->value_length) +
5095 0 : desc_xattr->name_length + desc_xattr->value_length) {
5096 : }
5097 :
5098 0 : memcpy(ctx->xattr_name, desc_xattr->name, desc_xattr->name_length);
5099 0 : ctx->xattr_name[desc_xattr->name_length] = '\0';
5100 0 : if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
5101 0 : type = "XATTR";
5102 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
5103 0 : type = "XATTR_INTERNAL";
5104 : } else {
5105 0 : assert(false);
5106 : type = "XATTR_?";
5107 : }
5108 0 : fprintf(ctx->fp, "%s: name = \"%s\"\n", type, ctx->xattr_name);
5109 0 : fprintf(ctx->fp, " value = \"");
5110 0 : ctx->print_xattr_fn(ctx->fp, ctx->super->bstype.bstype, ctx->xattr_name,
5111 0 : (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
5112 0 : desc_xattr->value_length);
5113 0 : fprintf(ctx->fp, "\"\n");
5114 0 : for (i = 0; i < desc_xattr->value_length; i++) {
5115 0 : if (i % 16 == 0) {
5116 0 : fprintf(ctx->fp, " ");
5117 : }
5118 0 : fprintf(ctx->fp, "%02" PRIx8 " ", *((uint8_t *)desc_xattr->name + desc_xattr->name_length + i));
5119 0 : if ((i + 1) % 16 == 0) {
5120 0 : fprintf(ctx->fp, "\n");
5121 : }
5122 : }
5123 0 : if (i % 16 != 0) {
5124 0 : fprintf(ctx->fp, "\n");
5125 : }
5126 0 : }
5127 :
5128 : struct type_flag_desc {
5129 : uint64_t mask;
5130 : uint64_t val;
5131 : const char *name;
5132 : };
5133 :
5134 : static void
5135 0 : bs_dump_print_type_bits(struct spdk_bs_load_ctx *ctx, uint64_t flags,
5136 : struct type_flag_desc *desc, size_t numflags)
5137 : {
5138 0 : uint64_t covered = 0;
5139 : size_t i;
5140 :
5141 0 : for (i = 0; i < numflags; i++) {
5142 0 : if ((desc[i].mask & flags) != desc[i].val) {
5143 0 : continue;
5144 : }
5145 0 : fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " %s", desc[i].val, desc[i].name);
5146 0 : if (desc[i].mask != desc[i].val) {
5147 0 : fprintf(ctx->fp, " (mask 0x%" PRIx64 " value 0x%" PRIx64 ")",
5148 0 : desc[i].mask, desc[i].val);
5149 : }
5150 0 : fprintf(ctx->fp, "\n");
5151 0 : covered |= desc[i].mask;
5152 : }
5153 0 : if ((flags & ~covered) != 0) {
5154 0 : fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " Unknown\n", flags & ~covered);
5155 : }
5156 0 : }
5157 :
5158 : static void
5159 0 : bs_dump_print_type_flags(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
5160 : {
5161 : struct spdk_blob_md_descriptor_flags *type_desc;
5162 : #define ADD_FLAG(f) { f, f, #f }
5163 : #define ADD_MASK_VAL(m, v) { m, v, #v }
5164 : static struct type_flag_desc invalid[] = {
5165 : ADD_FLAG(SPDK_BLOB_THIN_PROV),
5166 : ADD_FLAG(SPDK_BLOB_INTERNAL_XATTR),
5167 : ADD_FLAG(SPDK_BLOB_EXTENT_TABLE),
5168 : };
5169 : static struct type_flag_desc data_ro[] = {
5170 : ADD_FLAG(SPDK_BLOB_READ_ONLY),
5171 : };
5172 : static struct type_flag_desc md_ro[] = {
5173 : ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_DEFAULT),
5174 : ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_NONE),
5175 : ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_UNMAP),
5176 : ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_WRITE_ZEROES),
5177 : };
5178 : #undef ADD_FLAG
5179 : #undef ADD_MASK_VAL
5180 :
5181 0 : type_desc = (struct spdk_blob_md_descriptor_flags *)desc;
5182 0 : fprintf(ctx->fp, "Flags:\n");
5183 0 : fprintf(ctx->fp, "\tinvalid: 0x%016" PRIx64 "\n", type_desc->invalid_flags);
5184 0 : bs_dump_print_type_bits(ctx, type_desc->invalid_flags, invalid,
5185 : SPDK_COUNTOF(invalid));
5186 0 : fprintf(ctx->fp, "\tdata_ro: 0x%016" PRIx64 "\n", type_desc->data_ro_flags);
5187 0 : bs_dump_print_type_bits(ctx, type_desc->data_ro_flags, data_ro,
5188 : SPDK_COUNTOF(data_ro));
5189 0 : fprintf(ctx->fp, "\t md_ro: 0x%016" PRIx64 "\n", type_desc->md_ro_flags);
5190 0 : bs_dump_print_type_bits(ctx, type_desc->md_ro_flags, md_ro,
5191 : SPDK_COUNTOF(md_ro));
5192 0 : }
5193 :
5194 : static void
5195 0 : bs_dump_print_extent_table(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
5196 : {
5197 : struct spdk_blob_md_descriptor_extent_table *et_desc;
5198 : uint64_t num_extent_pages;
5199 : uint32_t et_idx;
5200 :
5201 0 : et_desc = (struct spdk_blob_md_descriptor_extent_table *)desc;
5202 0 : num_extent_pages = (et_desc->length - sizeof(et_desc->num_clusters)) /
5203 : sizeof(et_desc->extent_page[0]);
5204 :
5205 0 : fprintf(ctx->fp, "Extent table:\n");
5206 0 : for (et_idx = 0; et_idx < num_extent_pages; et_idx++) {
5207 0 : if (et_desc->extent_page[et_idx].page_idx == 0) {
5208 : /* Zeroes represent unallocated extent pages. */
5209 0 : continue;
5210 : }
5211 0 : fprintf(ctx->fp, "\tExtent page: %5" PRIu32 " length %3" PRIu32
5212 : " at LBA %" PRIu64 "\n", et_desc->extent_page[et_idx].page_idx,
5213 : et_desc->extent_page[et_idx].num_pages,
5214 : bs_md_page_to_lba(ctx->bs, et_desc->extent_page[et_idx].page_idx));
5215 : }
5216 0 : }
5217 :
5218 : static void
5219 0 : bs_dump_print_md_page(struct spdk_bs_load_ctx *ctx)
5220 : {
5221 0 : uint32_t page_idx = ctx->cur_page;
5222 0 : struct spdk_blob_md_page *page = ctx->page;
5223 : struct spdk_blob_md_descriptor *desc;
5224 0 : size_t cur_desc = 0;
5225 : uint32_t crc;
5226 :
5227 0 : fprintf(ctx->fp, "=========\n");
5228 0 : fprintf(ctx->fp, "Metadata Page Index: %" PRIu32 " (0x%" PRIx32 ")\n", page_idx, page_idx);
5229 0 : fprintf(ctx->fp, "Start LBA: %" PRIu64 "\n", bs_md_page_to_lba(ctx->bs, page_idx));
5230 0 : fprintf(ctx->fp, "Blob ID: 0x%" PRIx64 "\n", page->id);
5231 0 : fprintf(ctx->fp, "Sequence: %" PRIu32 "\n", page->sequence_num);
5232 0 : if (page->next == SPDK_INVALID_MD_PAGE) {
5233 0 : fprintf(ctx->fp, "Next: None\n");
5234 : } else {
5235 0 : fprintf(ctx->fp, "Next: %" PRIu32 "\n", page->next);
5236 : }
5237 0 : fprintf(ctx->fp, "In used bit array%s:", ctx->super->clean ? "" : " (not clean: dubious)");
5238 0 : if (spdk_bit_array_get(ctx->bs->used_md_pages, page_idx)) {
5239 0 : fprintf(ctx->fp, " md");
5240 : }
5241 0 : if (spdk_bit_array_get(ctx->bs->used_blobids, page_idx)) {
5242 0 : fprintf(ctx->fp, " blob");
5243 : }
5244 0 : fprintf(ctx->fp, "\n");
5245 :
5246 0 : crc = blob_md_page_calc_crc(page);
5247 0 : fprintf(ctx->fp, "CRC: 0x%" PRIx32 " (%s)\n", page->crc, crc == page->crc ? "OK" : "Mismatch");
5248 :
5249 0 : desc = (struct spdk_blob_md_descriptor *)page->descriptors;
5250 0 : while (cur_desc < sizeof(page->descriptors)) {
5251 0 : if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
5252 0 : if (desc->length == 0) {
5253 : /* If padding and length are 0, this terminates the page */
5254 0 : break;
5255 : }
5256 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
5257 : struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
5258 : unsigned int i;
5259 :
5260 0 : desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
5261 :
5262 0 : for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
5263 0 : if (desc_extent_rle->extents[i].cluster_idx != 0) {
5264 0 : fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
5265 : desc_extent_rle->extents[i].cluster_idx);
5266 : } else {
5267 0 : fprintf(ctx->fp, "Unallocated Extent - ");
5268 : }
5269 0 : fprintf(ctx->fp, " Length: %" PRIu32, desc_extent_rle->extents[i].length);
5270 0 : fprintf(ctx->fp, "\n");
5271 : }
5272 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
5273 : struct spdk_blob_md_descriptor_extent_page *desc_extent;
5274 : unsigned int i;
5275 :
5276 0 : desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
5277 :
5278 0 : for (i = 0; i < desc_extent->length / sizeof(desc_extent->cluster_idx[0]); i++) {
5279 0 : if (desc_extent->cluster_idx[i] != 0) {
5280 0 : fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
5281 : desc_extent->cluster_idx[i]);
5282 : } else {
5283 0 : fprintf(ctx->fp, "Unallocated Extent");
5284 : }
5285 0 : fprintf(ctx->fp, "\n");
5286 : }
5287 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
5288 0 : bs_dump_print_xattr(ctx, desc);
5289 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
5290 0 : bs_dump_print_xattr(ctx, desc);
5291 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
5292 0 : bs_dump_print_type_flags(ctx, desc);
5293 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
5294 0 : bs_dump_print_extent_table(ctx, desc);
5295 : } else {
5296 : /* Error */
5297 0 : fprintf(ctx->fp, "Unknown descriptor type %" PRIu8 "\n", desc->type);
5298 : }
5299 : /* Advance to the next descriptor */
5300 0 : cur_desc += sizeof(*desc) + desc->length;
5301 0 : if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
5302 0 : break;
5303 : }
5304 0 : desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
5305 : }
5306 0 : }
5307 :
5308 : static void
5309 0 : bs_dump_read_md_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5310 : {
5311 0 : struct spdk_bs_load_ctx *ctx = cb_arg;
5312 :
5313 0 : if (bserrno != 0) {
5314 0 : bs_dump_finish(seq, ctx, bserrno);
5315 0 : return;
5316 : }
5317 :
5318 0 : if (ctx->page->id != 0) {
5319 0 : bs_dump_print_md_page(ctx);
5320 : }
5321 :
5322 0 : ctx->cur_page++;
5323 :
5324 0 : if (ctx->cur_page < ctx->super->md_len) {
5325 0 : bs_dump_read_md_page(seq, ctx);
5326 : } else {
5327 0 : spdk_free(ctx->page);
5328 0 : bs_dump_finish(seq, ctx, 0);
5329 : }
5330 : }
5331 :
5332 : static void
5333 0 : bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg)
5334 : {
5335 0 : struct spdk_bs_load_ctx *ctx = cb_arg;
5336 : uint64_t lba;
5337 :
5338 0 : assert(ctx->cur_page < ctx->super->md_len);
5339 0 : lba = bs_page_to_lba(ctx->bs, ctx->super->md_start + ctx->cur_page);
5340 0 : bs_sequence_read_dev(seq, ctx->page, lba,
5341 0 : bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
5342 : bs_dump_read_md_page_cpl, ctx);
5343 0 : }
5344 :
5345 : static void
5346 0 : bs_dump_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5347 : {
5348 0 : struct spdk_bs_load_ctx *ctx = cb_arg;
5349 : int rc;
5350 :
5351 0 : fprintf(ctx->fp, "Signature: \"%.8s\" ", ctx->super->signature);
5352 0 : if (memcmp(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
5353 : sizeof(ctx->super->signature)) != 0) {
5354 0 : fprintf(ctx->fp, "(Mismatch)\n");
5355 0 : bs_dump_finish(seq, ctx, bserrno);
5356 0 : return;
5357 : } else {
5358 0 : fprintf(ctx->fp, "(OK)\n");
5359 : }
5360 0 : fprintf(ctx->fp, "Version: %" PRIu32 "\n", ctx->super->version);
5361 0 : fprintf(ctx->fp, "CRC: 0x%x (%s)\n", ctx->super->crc,
5362 0 : (ctx->super->crc == blob_md_page_calc_crc(ctx->super)) ? "OK" : "Mismatch");
5363 0 : fprintf(ctx->fp, "Blobstore Type: %.*s\n", SPDK_BLOBSTORE_TYPE_LENGTH, ctx->super->bstype.bstype);
5364 0 : fprintf(ctx->fp, "Cluster Size: %" PRIu32 "\n", ctx->super->cluster_size);
5365 0 : fprintf(ctx->fp, "Super Blob ID: ");
5366 0 : if (ctx->super->super_blob == SPDK_BLOBID_INVALID) {
5367 0 : fprintf(ctx->fp, "(None)\n");
5368 : } else {
5369 0 : fprintf(ctx->fp, "0x%" PRIx64 "\n", ctx->super->super_blob);
5370 : }
5371 0 : fprintf(ctx->fp, "Clean: %" PRIu32 "\n", ctx->super->clean);
5372 0 : fprintf(ctx->fp, "Used Metadata Page Mask Start: %" PRIu32 "\n", ctx->super->used_page_mask_start);
5373 0 : fprintf(ctx->fp, "Used Metadata Page Mask Length: %" PRIu32 "\n", ctx->super->used_page_mask_len);
5374 0 : fprintf(ctx->fp, "Used Cluster Mask Start: %" PRIu32 "\n", ctx->super->used_cluster_mask_start);
5375 0 : fprintf(ctx->fp, "Used Cluster Mask Length: %" PRIu32 "\n", ctx->super->used_cluster_mask_len);
5376 0 : fprintf(ctx->fp, "Used Blob ID Mask Start: %" PRIu32 "\n", ctx->super->used_blobid_mask_start);
5377 0 : fprintf(ctx->fp, "Used Blob ID Mask Length: %" PRIu32 "\n", ctx->super->used_blobid_mask_len);
5378 0 : fprintf(ctx->fp, "Metadata Start: %" PRIu32 "\n", ctx->super->md_start);
5379 0 : fprintf(ctx->fp, "Metadata Length: %" PRIu32 "\n", ctx->super->md_len);
5380 :
5381 0 : ctx->cur_page = 0;
5382 0 : ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
5383 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
5384 0 : if (!ctx->page) {
5385 0 : bs_dump_finish(seq, ctx, -ENOMEM);
5386 0 : return;
5387 : }
5388 :
5389 0 : rc = bs_parse_super(ctx);
5390 0 : if (rc < 0) {
5391 0 : bs_load_ctx_fail(ctx, rc);
5392 0 : return;
5393 : }
5394 :
5395 0 : bs_load_read_used_pages(ctx);
5396 : }
5397 :
5398 : void
5399 0 : spdk_bs_dump(struct spdk_bs_dev *dev, FILE *fp, spdk_bs_dump_print_xattr print_xattr_fn,
5400 : spdk_bs_op_complete cb_fn, void *cb_arg)
5401 : {
5402 0 : struct spdk_blob_store *bs;
5403 0 : struct spdk_bs_cpl cpl;
5404 0 : struct spdk_bs_load_ctx *ctx;
5405 0 : struct spdk_bs_opts opts = {};
5406 : int err;
5407 :
5408 0 : SPDK_DEBUGLOG(blob, "Dumping blobstore from dev %p\n", dev);
5409 :
5410 0 : spdk_bs_opts_init(&opts, sizeof(opts));
5411 :
5412 0 : err = bs_alloc(dev, &opts, &bs, &ctx);
5413 0 : if (err) {
5414 0 : dev->destroy(dev);
5415 0 : cb_fn(cb_arg, err);
5416 0 : return;
5417 : }
5418 :
5419 0 : ctx->dumping = true;
5420 0 : ctx->fp = fp;
5421 0 : ctx->print_xattr_fn = print_xattr_fn;
5422 :
5423 0 : cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
5424 0 : cpl.u.bs_basic.cb_fn = cb_fn;
5425 0 : cpl.u.bs_basic.cb_arg = cb_arg;
5426 :
5427 0 : ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
5428 0 : if (!ctx->seq) {
5429 0 : spdk_free(ctx->super);
5430 0 : free(ctx);
5431 0 : bs_free(bs);
5432 0 : cb_fn(cb_arg, -ENOMEM);
5433 0 : return;
5434 : }
5435 :
5436 : /* Read the super block */
5437 0 : bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
5438 0 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
5439 : bs_dump_super_cpl, ctx);
5440 : }
5441 :
5442 : /* END spdk_bs_dump */
5443 :
5444 : /* START spdk_bs_init */
5445 :
5446 : static void
5447 472 : bs_init_persist_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5448 : {
5449 472 : struct spdk_bs_load_ctx *ctx = cb_arg;
5450 :
5451 472 : ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
5452 472 : spdk_free(ctx->super);
5453 472 : free(ctx);
5454 :
5455 472 : bs_sequence_finish(seq, bserrno);
5456 472 : }
5457 :
5458 : static void
5459 472 : bs_init_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5460 : {
5461 472 : struct spdk_bs_load_ctx *ctx = cb_arg;
5462 :
5463 : /* Write super block */
5464 472 : bs_sequence_write_dev(seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
5465 472 : bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
5466 : bs_init_persist_super_cpl, ctx);
5467 472 : }
5468 :
5469 : void
5470 488 : spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
5471 : spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
5472 : {
5473 488 : struct spdk_bs_load_ctx *ctx;
5474 488 : struct spdk_blob_store *bs;
5475 488 : struct spdk_bs_cpl cpl;
5476 : spdk_bs_sequence_t *seq;
5477 : spdk_bs_batch_t *batch;
5478 : uint64_t num_md_lba;
5479 : uint64_t num_md_pages;
5480 : uint64_t num_md_clusters;
5481 : uint64_t max_used_cluster_mask_len;
5482 : uint32_t i;
5483 488 : struct spdk_bs_opts opts = {};
5484 : int rc;
5485 : uint64_t lba, lba_count;
5486 :
5487 488 : SPDK_DEBUGLOG(blob, "Initializing blobstore on dev %p\n", dev);
5488 :
5489 488 : if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
5490 4 : SPDK_ERRLOG("unsupported dev block length of %d\n",
5491 : dev->blocklen);
5492 4 : dev->destroy(dev);
5493 4 : cb_fn(cb_arg, NULL, -EINVAL);
5494 4 : return;
5495 : }
5496 :
5497 484 : spdk_bs_opts_init(&opts, sizeof(opts));
5498 484 : if (o) {
5499 182 : if (bs_opts_copy(o, &opts)) {
5500 0 : return;
5501 : }
5502 : }
5503 :
5504 484 : if (bs_opts_verify(&opts) != 0) {
5505 4 : dev->destroy(dev);
5506 4 : cb_fn(cb_arg, NULL, -EINVAL);
5507 4 : return;
5508 : }
5509 :
5510 480 : rc = bs_alloc(dev, &opts, &bs, &ctx);
5511 480 : if (rc) {
5512 4 : dev->destroy(dev);
5513 4 : cb_fn(cb_arg, NULL, rc);
5514 4 : return;
5515 : }
5516 :
5517 476 : if (opts.num_md_pages == SPDK_BLOB_OPTS_NUM_MD_PAGES) {
5518 : /* By default, allocate 1 page per cluster.
5519 : * Technically, this over-allocates metadata
5520 : * because more metadata will reduce the number
5521 : * of usable clusters. This can be addressed with
5522 : * more complex math in the future.
5523 : */
5524 468 : bs->md_len = bs->total_clusters;
5525 : } else {
5526 8 : bs->md_len = opts.num_md_pages;
5527 : }
5528 476 : rc = spdk_bit_array_resize(&bs->used_md_pages, bs->md_len);
5529 476 : if (rc < 0) {
5530 0 : spdk_free(ctx->super);
5531 0 : free(ctx);
5532 0 : bs_free(bs);
5533 0 : cb_fn(cb_arg, NULL, -ENOMEM);
5534 0 : return;
5535 : }
5536 :
5537 476 : rc = spdk_bit_array_resize(&bs->used_blobids, bs->md_len);
5538 476 : if (rc < 0) {
5539 0 : spdk_free(ctx->super);
5540 0 : free(ctx);
5541 0 : bs_free(bs);
5542 0 : cb_fn(cb_arg, NULL, -ENOMEM);
5543 0 : return;
5544 : }
5545 :
5546 476 : rc = spdk_bit_array_resize(&bs->open_blobids, bs->md_len);
5547 476 : if (rc < 0) {
5548 0 : spdk_free(ctx->super);
5549 0 : free(ctx);
5550 0 : bs_free(bs);
5551 0 : cb_fn(cb_arg, NULL, -ENOMEM);
5552 0 : return;
5553 : }
5554 :
5555 476 : memcpy(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
5556 : sizeof(ctx->super->signature));
5557 476 : ctx->super->version = SPDK_BS_VERSION;
5558 476 : ctx->super->length = sizeof(*ctx->super);
5559 476 : ctx->super->super_blob = bs->super_blob;
5560 476 : ctx->super->clean = 0;
5561 476 : ctx->super->cluster_size = bs->cluster_sz;
5562 476 : ctx->super->io_unit_size = bs->io_unit_size;
5563 476 : memcpy(&ctx->super->bstype, &bs->bstype, sizeof(bs->bstype));
5564 :
5565 : /* Calculate how many pages the metadata consumes at the front
5566 : * of the disk.
5567 : */
5568 :
5569 : /* The super block uses 1 page */
5570 476 : num_md_pages = 1;
5571 :
5572 : /* The used_md_pages mask requires 1 bit per metadata page, rounded
5573 : * up to the nearest page, plus a header.
5574 : */
5575 476 : ctx->super->used_page_mask_start = num_md_pages;
5576 476 : ctx->super->used_page_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
5577 476 : spdk_divide_round_up(bs->md_len, 8),
5578 : SPDK_BS_PAGE_SIZE);
5579 476 : num_md_pages += ctx->super->used_page_mask_len;
5580 :
5581 : /* The used_clusters mask requires 1 bit per cluster, rounded
5582 : * up to the nearest page, plus a header.
5583 : */
5584 476 : ctx->super->used_cluster_mask_start = num_md_pages;
5585 476 : ctx->super->used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
5586 476 : spdk_divide_round_up(bs->total_clusters, 8),
5587 : SPDK_BS_PAGE_SIZE);
5588 : /* The blobstore might be extended, then the used_cluster bitmap will need more space.
5589 : * Here we calculate the max clusters we can support according to the
5590 : * num_md_pages (bs->md_len).
5591 : */
5592 476 : max_used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
5593 476 : spdk_divide_round_up(bs->md_len, 8),
5594 : SPDK_BS_PAGE_SIZE);
5595 476 : max_used_cluster_mask_len = spdk_max(max_used_cluster_mask_len,
5596 : ctx->super->used_cluster_mask_len);
5597 476 : num_md_pages += max_used_cluster_mask_len;
5598 :
5599 : /* The used_blobids mask requires 1 bit per metadata page, rounded
5600 : * up to the nearest page, plus a header.
5601 : */
5602 476 : ctx->super->used_blobid_mask_start = num_md_pages;
5603 476 : ctx->super->used_blobid_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
5604 476 : spdk_divide_round_up(bs->md_len, 8),
5605 : SPDK_BS_PAGE_SIZE);
5606 476 : num_md_pages += ctx->super->used_blobid_mask_len;
5607 :
5608 : /* The metadata region size was chosen above */
5609 476 : ctx->super->md_start = bs->md_start = num_md_pages;
5610 476 : ctx->super->md_len = bs->md_len;
5611 476 : num_md_pages += bs->md_len;
5612 :
5613 476 : num_md_lba = bs_page_to_lba(bs, num_md_pages);
5614 :
5615 476 : ctx->super->size = dev->blockcnt * dev->blocklen;
5616 :
5617 476 : ctx->super->crc = blob_md_page_calc_crc(ctx->super);
5618 :
5619 476 : num_md_clusters = spdk_divide_round_up(num_md_pages, bs->pages_per_cluster);
5620 476 : if (num_md_clusters > bs->total_clusters) {
5621 4 : SPDK_ERRLOG("Blobstore metadata cannot use more clusters than is available, "
5622 : "please decrease number of pages reserved for metadata "
5623 : "or increase cluster size.\n");
5624 4 : spdk_free(ctx->super);
5625 4 : spdk_bit_array_free(&ctx->used_clusters);
5626 4 : free(ctx);
5627 4 : bs_free(bs);
5628 4 : cb_fn(cb_arg, NULL, -ENOMEM);
5629 4 : return;
5630 : }
5631 : /* Claim all of the clusters used by the metadata */
5632 75700 : for (i = 0; i < num_md_clusters; i++) {
5633 75228 : spdk_bit_array_set(ctx->used_clusters, i);
5634 : }
5635 :
5636 472 : bs->num_free_clusters -= num_md_clusters;
5637 472 : bs->total_data_clusters = bs->num_free_clusters;
5638 :
5639 472 : cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
5640 472 : cpl.u.bs_handle.cb_fn = cb_fn;
5641 472 : cpl.u.bs_handle.cb_arg = cb_arg;
5642 472 : cpl.u.bs_handle.bs = bs;
5643 :
5644 472 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
5645 472 : if (!seq) {
5646 0 : spdk_free(ctx->super);
5647 0 : free(ctx);
5648 0 : bs_free(bs);
5649 0 : cb_fn(cb_arg, NULL, -ENOMEM);
5650 0 : return;
5651 : }
5652 :
5653 472 : batch = bs_sequence_to_batch(seq, bs_init_trim_cpl, ctx);
5654 :
5655 : /* Clear metadata space */
5656 472 : bs_batch_write_zeroes_dev(batch, 0, num_md_lba);
5657 :
5658 472 : lba = num_md_lba;
5659 472 : lba_count = ctx->bs->dev->blockcnt - lba;
5660 472 : switch (opts.clear_method) {
5661 456 : case BS_CLEAR_WITH_UNMAP:
5662 : /* Trim data clusters */
5663 456 : bs_batch_unmap_dev(batch, lba, lba_count);
5664 456 : break;
5665 0 : case BS_CLEAR_WITH_WRITE_ZEROES:
5666 : /* Write_zeroes to data clusters */
5667 0 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
5668 0 : break;
5669 16 : case BS_CLEAR_WITH_NONE:
5670 : default:
5671 16 : break;
5672 : }
5673 :
5674 472 : bs_batch_close(batch);
5675 : }
5676 :
5677 : /* END spdk_bs_init */
5678 :
5679 : /* START spdk_bs_destroy */
5680 :
5681 : static void
5682 4 : bs_destroy_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5683 : {
5684 4 : struct spdk_bs_load_ctx *ctx = cb_arg;
5685 4 : struct spdk_blob_store *bs = ctx->bs;
5686 :
5687 : /*
5688 : * We need to defer calling bs_call_cpl() until after
5689 : * dev destruction, so tuck these away for later use.
5690 : */
5691 4 : bs->unload_err = bserrno;
5692 4 : memcpy(&bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
5693 4 : seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
5694 :
5695 4 : bs_sequence_finish(seq, bserrno);
5696 :
5697 4 : bs_free(bs);
5698 4 : free(ctx);
5699 4 : }
5700 :
5701 : void
5702 4 : spdk_bs_destroy(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn,
5703 : void *cb_arg)
5704 : {
5705 4 : struct spdk_bs_cpl cpl;
5706 : spdk_bs_sequence_t *seq;
5707 : struct spdk_bs_load_ctx *ctx;
5708 :
5709 4 : SPDK_DEBUGLOG(blob, "Destroying blobstore\n");
5710 :
5711 4 : if (!RB_EMPTY(&bs->open_blobs)) {
5712 0 : SPDK_ERRLOG("Blobstore still has open blobs\n");
5713 0 : cb_fn(cb_arg, -EBUSY);
5714 0 : return;
5715 : }
5716 :
5717 4 : cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
5718 4 : cpl.u.bs_basic.cb_fn = cb_fn;
5719 4 : cpl.u.bs_basic.cb_arg = cb_arg;
5720 :
5721 4 : ctx = calloc(1, sizeof(*ctx));
5722 4 : if (!ctx) {
5723 0 : cb_fn(cb_arg, -ENOMEM);
5724 0 : return;
5725 : }
5726 :
5727 4 : ctx->bs = bs;
5728 :
5729 4 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
5730 4 : if (!seq) {
5731 0 : free(ctx);
5732 0 : cb_fn(cb_arg, -ENOMEM);
5733 0 : return;
5734 : }
5735 :
5736 : /* Write zeroes to the super block */
5737 4 : bs_sequence_write_zeroes_dev(seq,
5738 : bs_page_to_lba(bs, 0),
5739 : bs_byte_to_lba(bs, sizeof(struct spdk_bs_super_block)),
5740 : bs_destroy_trim_cpl, ctx);
5741 : }
5742 :
5743 : /* END spdk_bs_destroy */
5744 :
5745 : /* START spdk_bs_unload */
5746 :
5747 : static void
5748 654 : bs_unload_finish(struct spdk_bs_load_ctx *ctx, int bserrno)
5749 : {
5750 654 : spdk_bs_sequence_t *seq = ctx->seq;
5751 :
5752 654 : spdk_free(ctx->super);
5753 :
5754 : /*
5755 : * We need to defer calling bs_call_cpl() until after
5756 : * dev destruction, so tuck these away for later use.
5757 : */
5758 654 : ctx->bs->unload_err = bserrno;
5759 654 : memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
5760 654 : seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
5761 :
5762 654 : bs_sequence_finish(seq, bserrno);
5763 :
5764 654 : bs_free(ctx->bs);
5765 654 : free(ctx);
5766 654 : }
5767 :
5768 : static void
5769 654 : bs_unload_write_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5770 : {
5771 654 : struct spdk_bs_load_ctx *ctx = cb_arg;
5772 :
5773 654 : bs_unload_finish(ctx, bserrno);
5774 654 : }
5775 :
5776 : static void
5777 654 : bs_unload_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5778 : {
5779 654 : struct spdk_bs_load_ctx *ctx = cb_arg;
5780 :
5781 654 : spdk_free(ctx->mask);
5782 :
5783 654 : if (bserrno != 0) {
5784 0 : bs_unload_finish(ctx, bserrno);
5785 0 : return;
5786 : }
5787 :
5788 654 : ctx->super->clean = 1;
5789 :
5790 654 : bs_write_super(seq, ctx->bs, ctx->super, bs_unload_write_super_cpl, ctx);
5791 : }
5792 :
5793 : static void
5794 654 : bs_unload_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5795 : {
5796 654 : struct spdk_bs_load_ctx *ctx = cb_arg;
5797 :
5798 654 : spdk_free(ctx->mask);
5799 654 : ctx->mask = NULL;
5800 :
5801 654 : if (bserrno != 0) {
5802 0 : bs_unload_finish(ctx, bserrno);
5803 0 : return;
5804 : }
5805 :
5806 654 : bs_write_used_clusters(seq, ctx, bs_unload_write_used_clusters_cpl);
5807 : }
5808 :
5809 : static void
5810 654 : bs_unload_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5811 : {
5812 654 : struct spdk_bs_load_ctx *ctx = cb_arg;
5813 :
5814 654 : spdk_free(ctx->mask);
5815 654 : ctx->mask = NULL;
5816 :
5817 654 : if (bserrno != 0) {
5818 0 : bs_unload_finish(ctx, bserrno);
5819 0 : return;
5820 : }
5821 :
5822 654 : bs_write_used_blobids(seq, ctx, bs_unload_write_used_blobids_cpl);
5823 : }
5824 :
5825 : static void
5826 654 : bs_unload_read_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5827 : {
5828 654 : struct spdk_bs_load_ctx *ctx = cb_arg;
5829 : int rc;
5830 :
5831 654 : if (bserrno != 0) {
5832 0 : bs_unload_finish(ctx, bserrno);
5833 0 : return;
5834 : }
5835 :
5836 654 : rc = bs_super_validate(ctx->super, ctx->bs);
5837 654 : if (rc != 0) {
5838 0 : bs_unload_finish(ctx, rc);
5839 0 : return;
5840 : }
5841 :
5842 654 : bs_write_used_md(seq, cb_arg, bs_unload_write_used_pages_cpl);
5843 : }
5844 :
5845 : void
5846 662 : spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_arg)
5847 : {
5848 662 : struct spdk_bs_cpl cpl;
5849 : struct spdk_bs_load_ctx *ctx;
5850 :
5851 662 : SPDK_DEBUGLOG(blob, "Syncing blobstore\n");
5852 :
5853 : /*
5854 : * If external snapshot channels are being destroyed while the blobstore is unloaded, the
5855 : * unload is deferred until after the channel destruction completes.
5856 : */
5857 662 : if (bs->esnap_channels_unloading != 0) {
5858 4 : if (bs->esnap_unload_cb_fn != NULL) {
5859 0 : SPDK_ERRLOG("Blobstore unload in progress\n");
5860 0 : cb_fn(cb_arg, -EBUSY);
5861 0 : return;
5862 : }
5863 4 : SPDK_DEBUGLOG(blob_esnap, "Blobstore unload deferred: %" PRIu32
5864 : " esnap clones are unloading\n", bs->esnap_channels_unloading);
5865 4 : bs->esnap_unload_cb_fn = cb_fn;
5866 4 : bs->esnap_unload_cb_arg = cb_arg;
5867 4 : return;
5868 : }
5869 658 : if (bs->esnap_unload_cb_fn != NULL) {
5870 4 : SPDK_DEBUGLOG(blob_esnap, "Blobstore deferred unload progressing\n");
5871 4 : assert(bs->esnap_unload_cb_fn == cb_fn);
5872 4 : assert(bs->esnap_unload_cb_arg == cb_arg);
5873 4 : bs->esnap_unload_cb_fn = NULL;
5874 4 : bs->esnap_unload_cb_arg = NULL;
5875 : }
5876 :
5877 658 : if (!RB_EMPTY(&bs->open_blobs)) {
5878 4 : SPDK_ERRLOG("Blobstore still has open blobs\n");
5879 4 : cb_fn(cb_arg, -EBUSY);
5880 4 : return;
5881 : }
5882 :
5883 654 : ctx = calloc(1, sizeof(*ctx));
5884 654 : if (!ctx) {
5885 0 : cb_fn(cb_arg, -ENOMEM);
5886 0 : return;
5887 : }
5888 :
5889 654 : ctx->bs = bs;
5890 :
5891 654 : ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
5892 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
5893 654 : if (!ctx->super) {
5894 0 : free(ctx);
5895 0 : cb_fn(cb_arg, -ENOMEM);
5896 0 : return;
5897 : }
5898 :
5899 654 : cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
5900 654 : cpl.u.bs_basic.cb_fn = cb_fn;
5901 654 : cpl.u.bs_basic.cb_arg = cb_arg;
5902 :
5903 654 : ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
5904 654 : if (!ctx->seq) {
5905 0 : spdk_free(ctx->super);
5906 0 : free(ctx);
5907 0 : cb_fn(cb_arg, -ENOMEM);
5908 0 : return;
5909 : }
5910 :
5911 : /* Read super block */
5912 654 : bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
5913 654 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
5914 : bs_unload_read_super_cpl, ctx);
5915 : }
5916 :
5917 : /* END spdk_bs_unload */
5918 :
5919 : /* START spdk_bs_set_super */
5920 :
5921 : struct spdk_bs_set_super_ctx {
5922 : struct spdk_blob_store *bs;
5923 : struct spdk_bs_super_block *super;
5924 : };
5925 :
5926 : static void
5927 8 : bs_set_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5928 : {
5929 8 : struct spdk_bs_set_super_ctx *ctx = cb_arg;
5930 :
5931 8 : if (bserrno != 0) {
5932 0 : SPDK_ERRLOG("Unable to write to super block of blobstore\n");
5933 : }
5934 :
5935 8 : spdk_free(ctx->super);
5936 :
5937 8 : bs_sequence_finish(seq, bserrno);
5938 :
5939 8 : free(ctx);
5940 8 : }
5941 :
5942 : static void
5943 8 : bs_set_super_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5944 : {
5945 8 : struct spdk_bs_set_super_ctx *ctx = cb_arg;
5946 : int rc;
5947 :
5948 8 : if (bserrno != 0) {
5949 0 : SPDK_ERRLOG("Unable to read super block of blobstore\n");
5950 0 : spdk_free(ctx->super);
5951 0 : bs_sequence_finish(seq, bserrno);
5952 0 : free(ctx);
5953 0 : return;
5954 : }
5955 :
5956 8 : rc = bs_super_validate(ctx->super, ctx->bs);
5957 8 : if (rc != 0) {
5958 0 : SPDK_ERRLOG("Not a valid super block\n");
5959 0 : spdk_free(ctx->super);
5960 0 : bs_sequence_finish(seq, rc);
5961 0 : free(ctx);
5962 0 : return;
5963 : }
5964 :
5965 8 : bs_write_super(seq, ctx->bs, ctx->super, bs_set_super_write_cpl, ctx);
5966 : }
5967 :
5968 : void
5969 8 : spdk_bs_set_super(struct spdk_blob_store *bs, spdk_blob_id blobid,
5970 : spdk_bs_op_complete cb_fn, void *cb_arg)
5971 : {
5972 8 : struct spdk_bs_cpl cpl;
5973 : spdk_bs_sequence_t *seq;
5974 : struct spdk_bs_set_super_ctx *ctx;
5975 :
5976 8 : SPDK_DEBUGLOG(blob, "Setting super blob id on blobstore\n");
5977 :
5978 8 : ctx = calloc(1, sizeof(*ctx));
5979 8 : if (!ctx) {
5980 0 : cb_fn(cb_arg, -ENOMEM);
5981 0 : return;
5982 : }
5983 :
5984 8 : ctx->bs = bs;
5985 :
5986 8 : ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
5987 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
5988 8 : if (!ctx->super) {
5989 0 : free(ctx);
5990 0 : cb_fn(cb_arg, -ENOMEM);
5991 0 : return;
5992 : }
5993 :
5994 8 : cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
5995 8 : cpl.u.bs_basic.cb_fn = cb_fn;
5996 8 : cpl.u.bs_basic.cb_arg = cb_arg;
5997 :
5998 8 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
5999 8 : if (!seq) {
6000 0 : spdk_free(ctx->super);
6001 0 : free(ctx);
6002 0 : cb_fn(cb_arg, -ENOMEM);
6003 0 : return;
6004 : }
6005 :
6006 8 : bs->super_blob = blobid;
6007 :
6008 : /* Read super block */
6009 8 : bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
6010 8 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
6011 : bs_set_super_read_cpl, ctx);
6012 : }
6013 :
6014 : /* END spdk_bs_set_super */
6015 :
6016 : void
6017 12 : spdk_bs_get_super(struct spdk_blob_store *bs,
6018 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6019 : {
6020 12 : if (bs->super_blob == SPDK_BLOBID_INVALID) {
6021 4 : cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOENT);
6022 : } else {
6023 8 : cb_fn(cb_arg, bs->super_blob, 0);
6024 : }
6025 12 : }
6026 :
6027 : uint64_t
6028 132 : spdk_bs_get_cluster_size(struct spdk_blob_store *bs)
6029 : {
6030 132 : return bs->cluster_sz;
6031 : }
6032 :
6033 : uint64_t
6034 68 : spdk_bs_get_page_size(struct spdk_blob_store *bs)
6035 : {
6036 68 : return SPDK_BS_PAGE_SIZE;
6037 : }
6038 :
6039 : uint64_t
6040 734 : spdk_bs_get_io_unit_size(struct spdk_blob_store *bs)
6041 : {
6042 734 : return bs->io_unit_size;
6043 : }
6044 :
6045 : uint64_t
6046 540 : spdk_bs_free_cluster_count(struct spdk_blob_store *bs)
6047 : {
6048 540 : return bs->num_free_clusters;
6049 : }
6050 :
6051 : uint64_t
6052 92 : spdk_bs_total_data_cluster_count(struct spdk_blob_store *bs)
6053 : {
6054 92 : return bs->total_data_clusters;
6055 : }
6056 :
6057 : static int
6058 780 : bs_register_md_thread(struct spdk_blob_store *bs)
6059 : {
6060 780 : bs->md_channel = spdk_get_io_channel(bs);
6061 780 : if (!bs->md_channel) {
6062 0 : SPDK_ERRLOG("Failed to get IO channel.\n");
6063 0 : return -1;
6064 : }
6065 :
6066 780 : return 0;
6067 : }
6068 :
6069 : static int
6070 780 : bs_unregister_md_thread(struct spdk_blob_store *bs)
6071 : {
6072 780 : spdk_put_io_channel(bs->md_channel);
6073 :
6074 780 : return 0;
6075 : }
6076 :
6077 : spdk_blob_id
6078 562 : spdk_blob_get_id(struct spdk_blob *blob)
6079 : {
6080 562 : assert(blob != NULL);
6081 :
6082 562 : return blob->id;
6083 : }
6084 :
6085 : uint64_t
6086 24 : spdk_blob_get_num_pages(struct spdk_blob *blob)
6087 : {
6088 24 : assert(blob != NULL);
6089 :
6090 24 : return bs_cluster_to_page(blob->bs, blob->active.num_clusters);
6091 : }
6092 :
6093 : uint64_t
6094 24 : spdk_blob_get_num_io_units(struct spdk_blob *blob)
6095 : {
6096 24 : assert(blob != NULL);
6097 :
6098 24 : return spdk_blob_get_num_pages(blob) * bs_io_unit_per_page(blob->bs);
6099 : }
6100 :
6101 : uint64_t
6102 565 : spdk_blob_get_num_clusters(struct spdk_blob *blob)
6103 : {
6104 565 : assert(blob != NULL);
6105 :
6106 565 : return blob->active.num_clusters;
6107 : }
6108 :
6109 : uint64_t
6110 330 : spdk_blob_get_num_allocated_clusters(struct spdk_blob *blob)
6111 : {
6112 330 : assert(blob != NULL);
6113 :
6114 330 : return blob->active.num_allocated_clusters;
6115 : }
6116 :
6117 : static uint64_t
6118 24 : blob_find_io_unit(struct spdk_blob *blob, uint64_t offset, bool is_allocated)
6119 : {
6120 24 : uint64_t blob_io_unit_num = spdk_blob_get_num_io_units(blob);
6121 :
6122 44 : while (offset < blob_io_unit_num) {
6123 40 : if (bs_io_unit_is_allocated(blob, offset) == is_allocated) {
6124 20 : return offset;
6125 : }
6126 :
6127 20 : offset += bs_num_io_units_to_cluster_boundary(blob, offset);
6128 : }
6129 :
6130 4 : return UINT64_MAX;
6131 : }
6132 :
6133 : uint64_t
6134 12 : spdk_blob_get_next_allocated_io_unit(struct spdk_blob *blob, uint64_t offset)
6135 : {
6136 12 : return blob_find_io_unit(blob, offset, true);
6137 : }
6138 :
6139 : uint64_t
6140 12 : spdk_blob_get_next_unallocated_io_unit(struct spdk_blob *blob, uint64_t offset)
6141 : {
6142 12 : return blob_find_io_unit(blob, offset, false);
6143 : }
6144 :
6145 : /* START spdk_bs_create_blob */
6146 :
6147 : static void
6148 1874 : bs_create_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
6149 : {
6150 1874 : struct spdk_blob *blob = cb_arg;
6151 1874 : uint32_t page_idx = bs_blobid_to_page(blob->id);
6152 :
6153 1874 : if (bserrno != 0) {
6154 0 : spdk_spin_lock(&blob->bs->used_lock);
6155 0 : spdk_bit_array_clear(blob->bs->used_blobids, page_idx);
6156 0 : bs_release_md_page(blob->bs, page_idx);
6157 0 : spdk_spin_unlock(&blob->bs->used_lock);
6158 : }
6159 :
6160 1874 : blob_free(blob);
6161 :
6162 1874 : bs_sequence_finish(seq, bserrno);
6163 1874 : }
6164 :
6165 : static int
6166 3768 : blob_set_xattrs(struct spdk_blob *blob, const struct spdk_blob_xattr_opts *xattrs,
6167 : bool internal)
6168 : {
6169 : uint64_t i;
6170 3768 : size_t value_len = 0;
6171 : int rc;
6172 3768 : const void *value = NULL;
6173 3768 : if (xattrs->count > 0 && xattrs->get_value == NULL) {
6174 8 : return -EINVAL;
6175 : }
6176 4072 : for (i = 0; i < xattrs->count; i++) {
6177 316 : xattrs->get_value(xattrs->ctx, xattrs->names[i], &value, &value_len);
6178 316 : if (value == NULL || value_len == 0) {
6179 4 : return -EINVAL;
6180 : }
6181 312 : rc = blob_set_xattr(blob, xattrs->names[i], value, value_len, internal);
6182 312 : if (rc < 0) {
6183 0 : return rc;
6184 : }
6185 : }
6186 3756 : return 0;
6187 : }
6188 :
6189 : static void
6190 1858 : blob_opts_copy(const struct spdk_blob_opts *src, struct spdk_blob_opts *dst)
6191 : {
6192 : #define FIELD_OK(field) \
6193 : offsetof(struct spdk_blob_opts, field) + sizeof(src->field) <= src->opts_size
6194 :
6195 : #define SET_FIELD(field) \
6196 : if (FIELD_OK(field)) { \
6197 : dst->field = src->field; \
6198 : } \
6199 :
6200 1858 : SET_FIELD(num_clusters);
6201 1858 : SET_FIELD(thin_provision);
6202 1858 : SET_FIELD(clear_method);
6203 :
6204 1858 : if (FIELD_OK(xattrs)) {
6205 1858 : memcpy(&dst->xattrs, &src->xattrs, sizeof(src->xattrs));
6206 : }
6207 :
6208 1858 : SET_FIELD(use_extent_table);
6209 1858 : SET_FIELD(esnap_id);
6210 1858 : SET_FIELD(esnap_id_len);
6211 :
6212 1858 : dst->opts_size = src->opts_size;
6213 :
6214 : /* You should not remove this statement, but need to update the assert statement
6215 : * if you add a new field, and also add a corresponding SET_FIELD statement */
6216 : SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_opts) == 80, "Incorrect size");
6217 :
6218 : #undef FIELD_OK
6219 : #undef SET_FIELD
6220 1858 : }
6221 :
6222 : static void
6223 1890 : bs_create_blob(struct spdk_blob_store *bs,
6224 : const struct spdk_blob_opts *opts,
6225 : const struct spdk_blob_xattr_opts *internal_xattrs,
6226 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6227 : {
6228 : struct spdk_blob *blob;
6229 : uint32_t page_idx;
6230 1890 : struct spdk_bs_cpl cpl;
6231 1890 : struct spdk_blob_opts opts_local;
6232 1890 : struct spdk_blob_xattr_opts internal_xattrs_default;
6233 : spdk_bs_sequence_t *seq;
6234 : spdk_blob_id id;
6235 : int rc;
6236 :
6237 1890 : assert(spdk_get_thread() == bs->md_thread);
6238 :
6239 1890 : spdk_spin_lock(&bs->used_lock);
6240 1890 : page_idx = spdk_bit_array_find_first_clear(bs->used_md_pages, 0);
6241 1890 : if (page_idx == UINT32_MAX) {
6242 0 : spdk_spin_unlock(&bs->used_lock);
6243 0 : cb_fn(cb_arg, 0, -ENOMEM);
6244 0 : return;
6245 : }
6246 1890 : spdk_bit_array_set(bs->used_blobids, page_idx);
6247 1890 : bs_claim_md_page(bs, page_idx);
6248 1890 : spdk_spin_unlock(&bs->used_lock);
6249 :
6250 1890 : id = bs_page_to_blobid(page_idx);
6251 :
6252 1890 : SPDK_DEBUGLOG(blob, "Creating blob with id 0x%" PRIx64 " at page %u\n", id, page_idx);
6253 :
6254 1890 : spdk_blob_opts_init(&opts_local, sizeof(opts_local));
6255 1890 : if (opts) {
6256 1858 : blob_opts_copy(opts, &opts_local);
6257 : }
6258 :
6259 1890 : blob = blob_alloc(bs, id);
6260 1890 : if (!blob) {
6261 0 : rc = -ENOMEM;
6262 0 : goto error;
6263 : }
6264 :
6265 1890 : blob->use_extent_table = opts_local.use_extent_table;
6266 1890 : if (blob->use_extent_table) {
6267 966 : blob->invalid_flags |= SPDK_BLOB_EXTENT_TABLE;
6268 : }
6269 :
6270 1890 : if (!internal_xattrs) {
6271 1622 : blob_xattrs_init(&internal_xattrs_default);
6272 1622 : internal_xattrs = &internal_xattrs_default;
6273 : }
6274 :
6275 1890 : rc = blob_set_xattrs(blob, &opts_local.xattrs, false);
6276 1890 : if (rc < 0) {
6277 12 : goto error;
6278 : }
6279 :
6280 1878 : rc = blob_set_xattrs(blob, internal_xattrs, true);
6281 1878 : if (rc < 0) {
6282 0 : goto error;
6283 : }
6284 :
6285 1878 : if (opts_local.thin_provision) {
6286 352 : blob_set_thin_provision(blob);
6287 : }
6288 :
6289 1878 : blob_set_clear_method(blob, opts_local.clear_method);
6290 :
6291 1878 : if (opts_local.esnap_id != NULL) {
6292 60 : if (opts_local.esnap_id_len > UINT16_MAX) {
6293 0 : SPDK_ERRLOG("esnap id length %" PRIu64 "is too long\n",
6294 : opts_local.esnap_id_len);
6295 0 : rc = -EINVAL;
6296 0 : goto error;
6297 :
6298 : }
6299 60 : blob_set_thin_provision(blob);
6300 60 : blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
6301 60 : rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID,
6302 60 : opts_local.esnap_id, opts_local.esnap_id_len, true);
6303 60 : if (rc != 0) {
6304 0 : goto error;
6305 : }
6306 : }
6307 :
6308 1878 : rc = blob_resize(blob, opts_local.num_clusters);
6309 1878 : if (rc < 0) {
6310 4 : goto error;
6311 : }
6312 1874 : cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
6313 1874 : cpl.u.blobid.cb_fn = cb_fn;
6314 1874 : cpl.u.blobid.cb_arg = cb_arg;
6315 1874 : cpl.u.blobid.blobid = blob->id;
6316 :
6317 1874 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
6318 1874 : if (!seq) {
6319 0 : rc = -ENOMEM;
6320 0 : goto error;
6321 : }
6322 :
6323 1874 : blob_persist(seq, blob, bs_create_blob_cpl, blob);
6324 1874 : return;
6325 :
6326 16 : error:
6327 16 : SPDK_ERRLOG("Failed to create blob: %s, size in clusters/size: %lu (clusters)\n",
6328 : spdk_strerror(rc), opts_local.num_clusters);
6329 16 : if (blob != NULL) {
6330 16 : blob_free(blob);
6331 : }
6332 16 : spdk_spin_lock(&bs->used_lock);
6333 16 : spdk_bit_array_clear(bs->used_blobids, page_idx);
6334 16 : bs_release_md_page(bs, page_idx);
6335 16 : spdk_spin_unlock(&bs->used_lock);
6336 16 : cb_fn(cb_arg, 0, rc);
6337 : }
6338 :
6339 : void
6340 16 : spdk_bs_create_blob(struct spdk_blob_store *bs,
6341 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6342 : {
6343 16 : bs_create_blob(bs, NULL, NULL, cb_fn, cb_arg);
6344 16 : }
6345 :
6346 : void
6347 1598 : spdk_bs_create_blob_ext(struct spdk_blob_store *bs, const struct spdk_blob_opts *opts,
6348 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6349 : {
6350 1598 : bs_create_blob(bs, opts, NULL, cb_fn, cb_arg);
6351 1598 : }
6352 :
6353 : /* END spdk_bs_create_blob */
6354 :
6355 : /* START blob_cleanup */
6356 :
6357 : struct spdk_clone_snapshot_ctx {
6358 : struct spdk_bs_cpl cpl;
6359 : int bserrno;
6360 : bool frozen;
6361 :
6362 : struct spdk_io_channel *channel;
6363 :
6364 : /* Current cluster for inflate operation */
6365 : uint64_t cluster;
6366 :
6367 : /* For inflation force allocation of all unallocated clusters and remove
6368 : * thin-provisioning. Otherwise only decouple parent and keep clone thin. */
6369 : bool allocate_all;
6370 :
6371 : struct {
6372 : spdk_blob_id id;
6373 : struct spdk_blob *blob;
6374 : bool md_ro;
6375 : } original;
6376 : struct {
6377 : spdk_blob_id id;
6378 : struct spdk_blob *blob;
6379 : } new;
6380 :
6381 : /* xattrs specified for snapshot/clones only. They have no impact on
6382 : * the original blobs xattrs. */
6383 : const struct spdk_blob_xattr_opts *xattrs;
6384 : };
6385 :
6386 : static void
6387 338 : bs_clone_snapshot_cleanup_finish(void *cb_arg, int bserrno)
6388 : {
6389 338 : struct spdk_clone_snapshot_ctx *ctx = cb_arg;
6390 338 : struct spdk_bs_cpl *cpl = &ctx->cpl;
6391 :
6392 338 : if (bserrno != 0) {
6393 6 : if (ctx->bserrno != 0) {
6394 0 : SPDK_ERRLOG("Cleanup error %d\n", bserrno);
6395 : } else {
6396 6 : ctx->bserrno = bserrno;
6397 : }
6398 : }
6399 :
6400 338 : switch (cpl->type) {
6401 278 : case SPDK_BS_CPL_TYPE_BLOBID:
6402 278 : cpl->u.blobid.cb_fn(cpl->u.blobid.cb_arg, cpl->u.blobid.blobid, ctx->bserrno);
6403 278 : break;
6404 60 : case SPDK_BS_CPL_TYPE_BLOB_BASIC:
6405 60 : cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
6406 60 : break;
6407 0 : default:
6408 0 : SPDK_UNREACHABLE();
6409 : break;
6410 : }
6411 :
6412 338 : free(ctx);
6413 338 : }
6414 :
6415 : static void
6416 324 : bs_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
6417 : {
6418 324 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6419 324 : struct spdk_blob *origblob = ctx->original.blob;
6420 :
6421 324 : if (bserrno != 0) {
6422 0 : if (ctx->bserrno != 0) {
6423 0 : SPDK_ERRLOG("Unfreeze error %d\n", bserrno);
6424 : } else {
6425 0 : ctx->bserrno = bserrno;
6426 : }
6427 : }
6428 :
6429 324 : ctx->original.id = origblob->id;
6430 324 : origblob->locked_operation_in_progress = false;
6431 :
6432 : /* Revert md_ro to original state */
6433 324 : origblob->md_ro = ctx->original.md_ro;
6434 :
6435 324 : spdk_blob_close(origblob, bs_clone_snapshot_cleanup_finish, ctx);
6436 324 : }
6437 :
6438 : static void
6439 324 : bs_clone_snapshot_origblob_cleanup(void *cb_arg, int bserrno)
6440 : {
6441 324 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6442 324 : struct spdk_blob *origblob = ctx->original.blob;
6443 :
6444 324 : if (bserrno != 0) {
6445 24 : if (ctx->bserrno != 0) {
6446 4 : SPDK_ERRLOG("Cleanup error %d\n", bserrno);
6447 : } else {
6448 20 : ctx->bserrno = bserrno;
6449 : }
6450 : }
6451 :
6452 324 : if (ctx->frozen) {
6453 : /* Unfreeze any outstanding I/O */
6454 208 : blob_unfreeze_io(origblob, bs_snapshot_unfreeze_cpl, ctx);
6455 : } else {
6456 116 : bs_snapshot_unfreeze_cpl(ctx, 0);
6457 : }
6458 :
6459 324 : }
6460 :
6461 : static void
6462 4 : bs_clone_snapshot_newblob_cleanup(struct spdk_clone_snapshot_ctx *ctx, int bserrno)
6463 : {
6464 4 : struct spdk_blob *newblob = ctx->new.blob;
6465 :
6466 4 : if (bserrno != 0) {
6467 4 : if (ctx->bserrno != 0) {
6468 0 : SPDK_ERRLOG("Cleanup error %d\n", bserrno);
6469 : } else {
6470 4 : ctx->bserrno = bserrno;
6471 : }
6472 : }
6473 :
6474 4 : ctx->new.id = newblob->id;
6475 4 : spdk_blob_close(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
6476 4 : }
6477 :
6478 : /* END blob_cleanup */
6479 :
6480 : /* START spdk_bs_create_snapshot */
6481 :
6482 : static void
6483 216 : bs_snapshot_swap_cluster_maps(struct spdk_blob *blob1, struct spdk_blob *blob2)
6484 : {
6485 : uint64_t *cluster_temp;
6486 : uint64_t num_allocated_clusters_temp;
6487 : uint32_t *extent_page_temp;
6488 :
6489 216 : cluster_temp = blob1->active.clusters;
6490 216 : blob1->active.clusters = blob2->active.clusters;
6491 216 : blob2->active.clusters = cluster_temp;
6492 :
6493 216 : num_allocated_clusters_temp = blob1->active.num_allocated_clusters;
6494 216 : blob1->active.num_allocated_clusters = blob2->active.num_allocated_clusters;
6495 216 : blob2->active.num_allocated_clusters = num_allocated_clusters_temp;
6496 :
6497 216 : extent_page_temp = blob1->active.extent_pages;
6498 216 : blob1->active.extent_pages = blob2->active.extent_pages;
6499 216 : blob2->active.extent_pages = extent_page_temp;
6500 216 : }
6501 :
6502 : /* Copies an internal xattr */
6503 : static int
6504 20 : bs_snapshot_copy_xattr(struct spdk_blob *toblob, struct spdk_blob *fromblob, const char *name)
6505 : {
6506 20 : const void *val = NULL;
6507 20 : size_t len;
6508 : int bserrno;
6509 :
6510 20 : bserrno = blob_get_xattr_value(fromblob, name, &val, &len, true);
6511 20 : if (bserrno != 0) {
6512 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " missing %s XATTR\n", fromblob->id, name);
6513 0 : return bserrno;
6514 : }
6515 :
6516 20 : bserrno = blob_set_xattr(toblob, name, val, len, true);
6517 20 : if (bserrno != 0) {
6518 0 : SPDK_ERRLOG("could not set %s XATTR on blob 0x%" PRIx64 "\n",
6519 : name, toblob->id);
6520 0 : return bserrno;
6521 : }
6522 20 : return 0;
6523 : }
6524 :
6525 : static void
6526 204 : bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno)
6527 : {
6528 204 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6529 204 : struct spdk_blob *origblob = ctx->original.blob;
6530 204 : struct spdk_blob *newblob = ctx->new.blob;
6531 :
6532 204 : if (bserrno != 0) {
6533 4 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6534 4 : if (blob_is_esnap_clone(newblob)) {
6535 0 : bs_snapshot_copy_xattr(origblob, newblob, BLOB_EXTERNAL_SNAPSHOT_ID);
6536 0 : origblob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
6537 : }
6538 4 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6539 4 : return;
6540 : }
6541 :
6542 : /* Remove metadata descriptor SNAPSHOT_IN_PROGRESS */
6543 200 : bserrno = blob_remove_xattr(newblob, SNAPSHOT_IN_PROGRESS, true);
6544 200 : if (bserrno != 0) {
6545 0 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6546 0 : return;
6547 : }
6548 :
6549 200 : bs_blob_list_add(ctx->original.blob);
6550 :
6551 200 : spdk_blob_set_read_only(newblob);
6552 :
6553 : /* sync snapshot metadata */
6554 200 : spdk_blob_sync_md(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
6555 : }
6556 :
6557 : static void
6558 208 : bs_snapshot_newblob_sync_cpl(void *cb_arg, int bserrno)
6559 : {
6560 208 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6561 208 : struct spdk_blob *origblob = ctx->original.blob;
6562 208 : struct spdk_blob *newblob = ctx->new.blob;
6563 :
6564 208 : if (bserrno != 0) {
6565 : /* return cluster map back to original */
6566 4 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6567 :
6568 : /* Newblob md sync failed. Valid clusters are only present in origblob.
6569 : * Since I/O is frozen on origblob, not changes to zeroed out cluster map should have occurred.
6570 : * Newblob needs to be reverted to thin_provisioned state at creation to properly close. */
6571 4 : blob_set_thin_provision(newblob);
6572 4 : assert(spdk_mem_all_zero(newblob->active.clusters,
6573 : newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
6574 4 : assert(spdk_mem_all_zero(newblob->active.extent_pages,
6575 : newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
6576 :
6577 4 : bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
6578 4 : return;
6579 : }
6580 :
6581 : /* Set internal xattr for snapshot id */
6582 204 : bserrno = blob_set_xattr(origblob, BLOB_SNAPSHOT, &newblob->id, sizeof(spdk_blob_id), true);
6583 204 : if (bserrno != 0) {
6584 : /* return cluster map back to original */
6585 0 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6586 0 : blob_set_thin_provision(newblob);
6587 0 : bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
6588 0 : return;
6589 : }
6590 :
6591 : /* Create new back_bs_dev for snapshot */
6592 204 : origblob->back_bs_dev = bs_create_blob_bs_dev(newblob);
6593 204 : if (origblob->back_bs_dev == NULL) {
6594 : /* return cluster map back to original */
6595 0 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6596 0 : blob_set_thin_provision(newblob);
6597 0 : bs_clone_snapshot_newblob_cleanup(ctx, -EINVAL);
6598 0 : return;
6599 : }
6600 :
6601 : /* Remove the xattr that references an external snapshot */
6602 204 : if (blob_is_esnap_clone(origblob)) {
6603 12 : origblob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
6604 12 : bserrno = blob_remove_xattr(origblob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
6605 12 : if (bserrno != 0) {
6606 0 : if (bserrno == -ENOENT) {
6607 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " has no " BLOB_EXTERNAL_SNAPSHOT_ID
6608 : " xattr to remove\n", origblob->id);
6609 0 : assert(false);
6610 : } else {
6611 : /* return cluster map back to original */
6612 0 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6613 0 : blob_set_thin_provision(newblob);
6614 0 : bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
6615 0 : return;
6616 : }
6617 : }
6618 : }
6619 :
6620 204 : bs_blob_list_remove(origblob);
6621 204 : origblob->parent_id = newblob->id;
6622 : /* set clone blob as thin provisioned */
6623 204 : blob_set_thin_provision(origblob);
6624 :
6625 204 : bs_blob_list_add(newblob);
6626 :
6627 : /* sync clone metadata */
6628 204 : spdk_blob_sync_md(origblob, bs_snapshot_origblob_sync_cpl, ctx);
6629 : }
6630 :
6631 : static void
6632 208 : bs_snapshot_freeze_cpl(void *cb_arg, int rc)
6633 : {
6634 208 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6635 208 : struct spdk_blob *origblob = ctx->original.blob;
6636 208 : struct spdk_blob *newblob = ctx->new.blob;
6637 : int bserrno;
6638 :
6639 208 : if (rc != 0) {
6640 0 : bs_clone_snapshot_newblob_cleanup(ctx, rc);
6641 0 : return;
6642 : }
6643 :
6644 208 : ctx->frozen = true;
6645 :
6646 208 : if (blob_is_esnap_clone(origblob)) {
6647 : /* Clean up any channels associated with the original blob id because future IO will
6648 : * perform IO using the snapshot blob_id.
6649 : */
6650 12 : blob_esnap_destroy_bs_dev_channels(origblob, false, NULL, NULL);
6651 : }
6652 208 : if (newblob->back_bs_dev) {
6653 208 : blob_back_bs_destroy(newblob);
6654 : }
6655 : /* set new back_bs_dev for snapshot */
6656 208 : newblob->back_bs_dev = origblob->back_bs_dev;
6657 : /* Set invalid flags from origblob */
6658 208 : newblob->invalid_flags = origblob->invalid_flags;
6659 :
6660 : /* inherit parent from original blob if set */
6661 208 : newblob->parent_id = origblob->parent_id;
6662 208 : switch (origblob->parent_id) {
6663 12 : case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
6664 12 : bserrno = bs_snapshot_copy_xattr(newblob, origblob, BLOB_EXTERNAL_SNAPSHOT_ID);
6665 12 : if (bserrno != 0) {
6666 0 : bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
6667 0 : return;
6668 : }
6669 12 : break;
6670 144 : case SPDK_BLOBID_INVALID:
6671 144 : break;
6672 52 : default:
6673 : /* Set internal xattr for snapshot id */
6674 52 : bserrno = blob_set_xattr(newblob, BLOB_SNAPSHOT,
6675 52 : &origblob->parent_id, sizeof(spdk_blob_id), true);
6676 52 : if (bserrno != 0) {
6677 0 : bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
6678 0 : return;
6679 : }
6680 : }
6681 :
6682 : /* swap cluster maps */
6683 208 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6684 :
6685 : /* Set the clear method on the new blob to match the original. */
6686 208 : blob_set_clear_method(newblob, origblob->clear_method);
6687 :
6688 : /* sync snapshot metadata */
6689 208 : spdk_blob_sync_md(newblob, bs_snapshot_newblob_sync_cpl, ctx);
6690 : }
6691 :
6692 : static void
6693 212 : bs_snapshot_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
6694 : {
6695 212 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6696 212 : struct spdk_blob *origblob = ctx->original.blob;
6697 212 : struct spdk_blob *newblob = _blob;
6698 :
6699 212 : if (bserrno != 0) {
6700 4 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6701 4 : return;
6702 : }
6703 :
6704 208 : ctx->new.blob = newblob;
6705 208 : assert(spdk_blob_is_thin_provisioned(newblob));
6706 208 : assert(spdk_mem_all_zero(newblob->active.clusters,
6707 : newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
6708 208 : assert(spdk_mem_all_zero(newblob->active.extent_pages,
6709 : newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
6710 :
6711 208 : blob_freeze_io(origblob, bs_snapshot_freeze_cpl, ctx);
6712 : }
6713 :
6714 : static void
6715 216 : bs_snapshot_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
6716 : {
6717 216 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6718 216 : struct spdk_blob *origblob = ctx->original.blob;
6719 :
6720 216 : if (bserrno != 0) {
6721 4 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6722 4 : return;
6723 : }
6724 :
6725 212 : ctx->new.id = blobid;
6726 212 : ctx->cpl.u.blobid.blobid = blobid;
6727 :
6728 212 : spdk_bs_open_blob(origblob->bs, ctx->new.id, bs_snapshot_newblob_open_cpl, ctx);
6729 : }
6730 :
6731 :
6732 : static void
6733 216 : bs_xattr_snapshot(void *arg, const char *name,
6734 : const void **value, size_t *value_len)
6735 : {
6736 216 : assert(strncmp(name, SNAPSHOT_IN_PROGRESS, sizeof(SNAPSHOT_IN_PROGRESS)) == 0);
6737 :
6738 216 : struct spdk_blob *blob = (struct spdk_blob *)arg;
6739 216 : *value = &blob->id;
6740 216 : *value_len = sizeof(blob->id);
6741 216 : }
6742 :
6743 : static void
6744 226 : bs_snapshot_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
6745 : {
6746 226 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6747 226 : struct spdk_blob_opts opts;
6748 226 : struct spdk_blob_xattr_opts internal_xattrs;
6749 226 : char *xattrs_names[] = { SNAPSHOT_IN_PROGRESS };
6750 :
6751 226 : if (bserrno != 0) {
6752 6 : bs_clone_snapshot_cleanup_finish(ctx, bserrno);
6753 6 : return;
6754 : }
6755 :
6756 220 : ctx->original.blob = _blob;
6757 :
6758 220 : if (_blob->data_ro || _blob->md_ro) {
6759 4 : SPDK_DEBUGLOG(blob, "Cannot create snapshot from read only blob with id 0x%"
6760 : PRIx64 "\n", _blob->id);
6761 4 : ctx->bserrno = -EINVAL;
6762 4 : spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
6763 4 : return;
6764 : }
6765 :
6766 216 : if (_blob->locked_operation_in_progress) {
6767 0 : SPDK_DEBUGLOG(blob, "Cannot create snapshot - another operation in progress\n");
6768 0 : ctx->bserrno = -EBUSY;
6769 0 : spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
6770 0 : return;
6771 : }
6772 :
6773 216 : _blob->locked_operation_in_progress = true;
6774 :
6775 216 : spdk_blob_opts_init(&opts, sizeof(opts));
6776 216 : blob_xattrs_init(&internal_xattrs);
6777 :
6778 : /* Change the size of new blob to the same as in original blob,
6779 : * but do not allocate clusters */
6780 216 : opts.thin_provision = true;
6781 216 : opts.num_clusters = spdk_blob_get_num_clusters(_blob);
6782 216 : opts.use_extent_table = _blob->use_extent_table;
6783 :
6784 : /* If there are any xattrs specified for snapshot, set them now */
6785 216 : if (ctx->xattrs) {
6786 4 : memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
6787 : }
6788 : /* Set internal xattr SNAPSHOT_IN_PROGRESS */
6789 216 : internal_xattrs.count = 1;
6790 216 : internal_xattrs.ctx = _blob;
6791 216 : internal_xattrs.names = xattrs_names;
6792 216 : internal_xattrs.get_value = bs_xattr_snapshot;
6793 :
6794 216 : bs_create_blob(_blob->bs, &opts, &internal_xattrs,
6795 : bs_snapshot_newblob_create_cpl, ctx);
6796 : }
6797 :
6798 : void
6799 226 : spdk_bs_create_snapshot(struct spdk_blob_store *bs, spdk_blob_id blobid,
6800 : const struct spdk_blob_xattr_opts *snapshot_xattrs,
6801 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6802 : {
6803 226 : struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
6804 :
6805 226 : if (!ctx) {
6806 0 : cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
6807 0 : return;
6808 : }
6809 226 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
6810 226 : ctx->cpl.u.blobid.cb_fn = cb_fn;
6811 226 : ctx->cpl.u.blobid.cb_arg = cb_arg;
6812 226 : ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
6813 226 : ctx->bserrno = 0;
6814 226 : ctx->frozen = false;
6815 226 : ctx->original.id = blobid;
6816 226 : ctx->xattrs = snapshot_xattrs;
6817 :
6818 226 : spdk_bs_open_blob(bs, ctx->original.id, bs_snapshot_origblob_open_cpl, ctx);
6819 : }
6820 : /* END spdk_bs_create_snapshot */
6821 :
6822 : /* START spdk_bs_create_clone */
6823 :
6824 : static void
6825 48 : bs_xattr_clone(void *arg, const char *name,
6826 : const void **value, size_t *value_len)
6827 : {
6828 48 : assert(strncmp(name, BLOB_SNAPSHOT, sizeof(BLOB_SNAPSHOT)) == 0);
6829 :
6830 48 : struct spdk_blob *blob = (struct spdk_blob *)arg;
6831 48 : *value = &blob->id;
6832 48 : *value_len = sizeof(blob->id);
6833 48 : }
6834 :
6835 : static void
6836 48 : bs_clone_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
6837 : {
6838 48 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6839 48 : struct spdk_blob *clone = _blob;
6840 :
6841 48 : ctx->new.blob = clone;
6842 48 : bs_blob_list_add(clone);
6843 :
6844 48 : spdk_blob_close(clone, bs_clone_snapshot_origblob_cleanup, ctx);
6845 48 : }
6846 :
6847 : static void
6848 48 : bs_clone_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
6849 : {
6850 48 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6851 :
6852 48 : ctx->cpl.u.blobid.blobid = blobid;
6853 48 : spdk_bs_open_blob(ctx->original.blob->bs, blobid, bs_clone_newblob_open_cpl, ctx);
6854 48 : }
6855 :
6856 : static void
6857 52 : bs_clone_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
6858 : {
6859 52 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6860 52 : struct spdk_blob_opts opts;
6861 52 : struct spdk_blob_xattr_opts internal_xattrs;
6862 52 : char *xattr_names[] = { BLOB_SNAPSHOT };
6863 :
6864 52 : if (bserrno != 0) {
6865 0 : bs_clone_snapshot_cleanup_finish(ctx, bserrno);
6866 0 : return;
6867 : }
6868 :
6869 52 : ctx->original.blob = _blob;
6870 52 : ctx->original.md_ro = _blob->md_ro;
6871 :
6872 52 : if (!_blob->data_ro || !_blob->md_ro) {
6873 4 : SPDK_DEBUGLOG(blob, "Clone not from read-only blob\n");
6874 4 : ctx->bserrno = -EINVAL;
6875 4 : spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
6876 4 : return;
6877 : }
6878 :
6879 48 : if (_blob->locked_operation_in_progress) {
6880 0 : SPDK_DEBUGLOG(blob, "Cannot create clone - another operation in progress\n");
6881 0 : ctx->bserrno = -EBUSY;
6882 0 : spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
6883 0 : return;
6884 : }
6885 :
6886 48 : _blob->locked_operation_in_progress = true;
6887 :
6888 48 : spdk_blob_opts_init(&opts, sizeof(opts));
6889 48 : blob_xattrs_init(&internal_xattrs);
6890 :
6891 48 : opts.thin_provision = true;
6892 48 : opts.num_clusters = spdk_blob_get_num_clusters(_blob);
6893 48 : opts.use_extent_table = _blob->use_extent_table;
6894 48 : if (ctx->xattrs) {
6895 4 : memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
6896 : }
6897 :
6898 : /* Set internal xattr BLOB_SNAPSHOT */
6899 48 : internal_xattrs.count = 1;
6900 48 : internal_xattrs.ctx = _blob;
6901 48 : internal_xattrs.names = xattr_names;
6902 48 : internal_xattrs.get_value = bs_xattr_clone;
6903 :
6904 48 : bs_create_blob(_blob->bs, &opts, &internal_xattrs,
6905 : bs_clone_newblob_create_cpl, ctx);
6906 : }
6907 :
6908 : void
6909 52 : spdk_bs_create_clone(struct spdk_blob_store *bs, spdk_blob_id blobid,
6910 : const struct spdk_blob_xattr_opts *clone_xattrs,
6911 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6912 : {
6913 52 : struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
6914 :
6915 52 : if (!ctx) {
6916 0 : cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
6917 0 : return;
6918 : }
6919 :
6920 52 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
6921 52 : ctx->cpl.u.blobid.cb_fn = cb_fn;
6922 52 : ctx->cpl.u.blobid.cb_arg = cb_arg;
6923 52 : ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
6924 52 : ctx->bserrno = 0;
6925 52 : ctx->xattrs = clone_xattrs;
6926 52 : ctx->original.id = blobid;
6927 :
6928 52 : spdk_bs_open_blob(bs, ctx->original.id, bs_clone_origblob_open_cpl, ctx);
6929 : }
6930 :
6931 : /* END spdk_bs_create_clone */
6932 :
6933 : /* START spdk_bs_inflate_blob */
6934 :
6935 : static void
6936 12 : bs_inflate_blob_set_parent_cpl(void *cb_arg, struct spdk_blob *_parent, int bserrno)
6937 : {
6938 12 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6939 12 : struct spdk_blob *_blob = ctx->original.blob;
6940 :
6941 12 : if (bserrno != 0) {
6942 0 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6943 0 : return;
6944 : }
6945 :
6946 : /* Temporarily override md_ro flag for MD modification */
6947 12 : _blob->md_ro = false;
6948 :
6949 12 : bserrno = blob_set_xattr(_blob, BLOB_SNAPSHOT, &_parent->id, sizeof(spdk_blob_id), true);
6950 12 : if (bserrno != 0) {
6951 0 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6952 0 : return;
6953 : }
6954 :
6955 12 : assert(_parent != NULL);
6956 :
6957 12 : bs_blob_list_remove(_blob);
6958 12 : _blob->parent_id = _parent->id;
6959 :
6960 12 : blob_back_bs_destroy(_blob);
6961 12 : _blob->back_bs_dev = bs_create_blob_bs_dev(_parent);
6962 12 : bs_blob_list_add(_blob);
6963 :
6964 12 : spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
6965 : }
6966 :
6967 : static void
6968 56 : bs_inflate_blob_done(struct spdk_clone_snapshot_ctx *ctx)
6969 : {
6970 56 : struct spdk_blob *_blob = ctx->original.blob;
6971 : struct spdk_blob *_parent;
6972 :
6973 56 : if (ctx->allocate_all) {
6974 : /* remove thin provisioning */
6975 32 : bs_blob_list_remove(_blob);
6976 32 : if (_blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
6977 8 : blob_remove_xattr(_blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
6978 8 : _blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
6979 : } else {
6980 24 : blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
6981 : }
6982 32 : _blob->invalid_flags = _blob->invalid_flags & ~SPDK_BLOB_THIN_PROV;
6983 32 : blob_back_bs_destroy(_blob);
6984 32 : _blob->parent_id = SPDK_BLOBID_INVALID;
6985 : } else {
6986 : /* For now, esnap clones always have allocate_all set. */
6987 24 : assert(!blob_is_esnap_clone(_blob));
6988 :
6989 24 : _parent = ((struct spdk_blob_bs_dev *)(_blob->back_bs_dev))->blob;
6990 24 : if (_parent->parent_id != SPDK_BLOBID_INVALID) {
6991 : /* We must change the parent of the inflated blob */
6992 12 : spdk_bs_open_blob(_blob->bs, _parent->parent_id,
6993 : bs_inflate_blob_set_parent_cpl, ctx);
6994 12 : return;
6995 : }
6996 :
6997 12 : bs_blob_list_remove(_blob);
6998 12 : _blob->parent_id = SPDK_BLOBID_INVALID;
6999 12 : blob_back_bs_destroy(_blob);
7000 12 : _blob->back_bs_dev = bs_create_zeroes_dev();
7001 : }
7002 :
7003 : /* Temporarily override md_ro flag for MD modification */
7004 44 : _blob->md_ro = false;
7005 44 : blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
7006 44 : _blob->state = SPDK_BLOB_STATE_DIRTY;
7007 :
7008 44 : spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
7009 : }
7010 :
7011 : /* Check if cluster needs allocation */
7012 : static inline bool
7013 1200 : bs_cluster_needs_allocation(struct spdk_blob *blob, uint64_t cluster, bool allocate_all)
7014 : {
7015 : struct spdk_blob_bs_dev *b;
7016 :
7017 1200 : assert(blob != NULL);
7018 :
7019 1200 : if (blob->active.clusters[cluster] != 0) {
7020 : /* Cluster is already allocated */
7021 32 : return false;
7022 : }
7023 :
7024 1168 : if (blob->parent_id == SPDK_BLOBID_INVALID) {
7025 : /* Blob have no parent blob */
7026 80 : return allocate_all;
7027 : }
7028 :
7029 1088 : if (blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
7030 64 : return true;
7031 : }
7032 :
7033 1024 : b = (struct spdk_blob_bs_dev *)blob->back_bs_dev;
7034 1024 : return (allocate_all || b->blob->active.clusters[cluster] != 0);
7035 : }
7036 :
7037 : static void
7038 508 : bs_inflate_blob_touch_next(void *cb_arg, int bserrno)
7039 : {
7040 508 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
7041 508 : struct spdk_blob *_blob = ctx->original.blob;
7042 508 : struct spdk_bs_cpl cpl;
7043 : spdk_bs_user_op_t *op;
7044 : uint64_t offset;
7045 :
7046 508 : if (bserrno != 0) {
7047 0 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
7048 0 : return;
7049 : }
7050 :
7051 656 : for (; ctx->cluster < _blob->active.num_clusters; ctx->cluster++) {
7052 600 : if (bs_cluster_needs_allocation(_blob, ctx->cluster, ctx->allocate_all)) {
7053 452 : break;
7054 : }
7055 : }
7056 :
7057 508 : if (ctx->cluster < _blob->active.num_clusters) {
7058 452 : offset = bs_cluster_to_lba(_blob->bs, ctx->cluster);
7059 :
7060 : /* We may safely increment a cluster before copying */
7061 452 : ctx->cluster++;
7062 :
7063 : /* Use a dummy 0B read as a context for cluster copy */
7064 452 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
7065 452 : cpl.u.blob_basic.cb_fn = bs_inflate_blob_touch_next;
7066 452 : cpl.u.blob_basic.cb_arg = ctx;
7067 :
7068 452 : op = bs_user_op_alloc(ctx->channel, &cpl, SPDK_BLOB_READ, _blob,
7069 : NULL, 0, offset, 0);
7070 452 : if (!op) {
7071 0 : bs_clone_snapshot_origblob_cleanup(ctx, -ENOMEM);
7072 0 : return;
7073 : }
7074 :
7075 452 : bs_allocate_and_copy_cluster(_blob, ctx->channel, offset, op);
7076 : } else {
7077 56 : bs_inflate_blob_done(ctx);
7078 : }
7079 : }
7080 :
7081 : static void
7082 60 : bs_inflate_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
7083 : {
7084 60 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
7085 : uint64_t clusters_needed;
7086 : uint64_t i;
7087 :
7088 60 : if (bserrno != 0) {
7089 0 : bs_clone_snapshot_cleanup_finish(ctx, bserrno);
7090 0 : return;
7091 : }
7092 :
7093 60 : ctx->original.blob = _blob;
7094 60 : ctx->original.md_ro = _blob->md_ro;
7095 :
7096 60 : if (_blob->locked_operation_in_progress) {
7097 0 : SPDK_DEBUGLOG(blob, "Cannot inflate blob - another operation in progress\n");
7098 0 : ctx->bserrno = -EBUSY;
7099 0 : spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
7100 0 : return;
7101 : }
7102 :
7103 60 : _blob->locked_operation_in_progress = true;
7104 :
7105 60 : switch (_blob->parent_id) {
7106 8 : case SPDK_BLOBID_INVALID:
7107 8 : if (!ctx->allocate_all) {
7108 : /* This blob has no parent, so we cannot decouple it. */
7109 4 : SPDK_ERRLOG("Cannot decouple parent of blob with no parent.\n");
7110 4 : bs_clone_snapshot_origblob_cleanup(ctx, -EINVAL);
7111 4 : return;
7112 : }
7113 4 : break;
7114 8 : case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
7115 : /*
7116 : * It would be better to rely on back_bs_dev->is_zeroes(), to determine which
7117 : * clusters require allocation. Until there is a blobstore consumer that
7118 : * uses esnaps with an spdk_bs_dev that implements a useful is_zeroes() it is not
7119 : * worth the effort.
7120 : */
7121 8 : ctx->allocate_all = true;
7122 8 : break;
7123 44 : default:
7124 44 : break;
7125 : }
7126 :
7127 56 : if (spdk_blob_is_thin_provisioned(_blob) == false) {
7128 : /* This is not thin provisioned blob. No need to inflate. */
7129 0 : bs_clone_snapshot_origblob_cleanup(ctx, 0);
7130 0 : return;
7131 : }
7132 :
7133 : /* Do two passes - one to verify that we can obtain enough clusters
7134 : * and another to actually claim them.
7135 : */
7136 56 : clusters_needed = 0;
7137 656 : for (i = 0; i < _blob->active.num_clusters; i++) {
7138 600 : if (bs_cluster_needs_allocation(_blob, i, ctx->allocate_all)) {
7139 452 : clusters_needed++;
7140 : }
7141 : }
7142 :
7143 56 : if (clusters_needed > _blob->bs->num_free_clusters) {
7144 : /* Not enough free clusters. Cannot satisfy the request. */
7145 0 : bs_clone_snapshot_origblob_cleanup(ctx, -ENOSPC);
7146 0 : return;
7147 : }
7148 :
7149 56 : ctx->cluster = 0;
7150 56 : bs_inflate_blob_touch_next(ctx, 0);
7151 : }
7152 :
7153 : static void
7154 60 : bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
7155 : spdk_blob_id blobid, bool allocate_all, spdk_blob_op_complete cb_fn, void *cb_arg)
7156 : {
7157 60 : struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
7158 :
7159 60 : if (!ctx) {
7160 0 : cb_fn(cb_arg, -ENOMEM);
7161 0 : return;
7162 : }
7163 60 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
7164 60 : ctx->cpl.u.bs_basic.cb_fn = cb_fn;
7165 60 : ctx->cpl.u.bs_basic.cb_arg = cb_arg;
7166 60 : ctx->bserrno = 0;
7167 60 : ctx->original.id = blobid;
7168 60 : ctx->channel = channel;
7169 60 : ctx->allocate_all = allocate_all;
7170 :
7171 60 : spdk_bs_open_blob(bs, ctx->original.id, bs_inflate_blob_open_cpl, ctx);
7172 : }
7173 :
7174 : void
7175 28 : spdk_bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
7176 : spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
7177 : {
7178 28 : bs_inflate_blob(bs, channel, blobid, true, cb_fn, cb_arg);
7179 28 : }
7180 :
7181 : void
7182 32 : spdk_bs_blob_decouple_parent(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
7183 : spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
7184 : {
7185 32 : bs_inflate_blob(bs, channel, blobid, false, cb_fn, cb_arg);
7186 32 : }
7187 : /* END spdk_bs_inflate_blob */
7188 :
7189 : /* START spdk_bs_blob_shallow_copy */
7190 :
7191 : struct shallow_copy_ctx {
7192 : struct spdk_bs_cpl cpl;
7193 : int bserrno;
7194 :
7195 : /* Blob source for copy */
7196 : struct spdk_blob_store *bs;
7197 : spdk_blob_id blobid;
7198 : struct spdk_blob *blob;
7199 : struct spdk_io_channel *blob_channel;
7200 :
7201 : /* Destination device for copy */
7202 : struct spdk_bs_dev *ext_dev;
7203 : struct spdk_io_channel *ext_channel;
7204 :
7205 : /* Current cluster for copy operation */
7206 : uint64_t cluster;
7207 :
7208 : /* Buffer for blob reading */
7209 : uint8_t *read_buff;
7210 :
7211 : /* Struct for external device writing */
7212 : struct spdk_bs_dev_cb_args ext_args;
7213 :
7214 : /* Actual number of copied clusters */
7215 : uint64_t copied_clusters_count;
7216 :
7217 : /* Status callback for updates about the ongoing operation */
7218 : spdk_blob_shallow_copy_status status_cb;
7219 :
7220 : /* Argument passed to function status_cb */
7221 : void *status_cb_arg;
7222 : };
7223 :
7224 : static void
7225 16 : bs_shallow_copy_cleanup_finish(void *cb_arg, int bserrno)
7226 : {
7227 16 : struct shallow_copy_ctx *ctx = cb_arg;
7228 16 : struct spdk_bs_cpl *cpl = &ctx->cpl;
7229 :
7230 16 : if (bserrno != 0) {
7231 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, cleanup error %d\n", ctx->blob->id, bserrno);
7232 0 : ctx->bserrno = bserrno;
7233 : }
7234 :
7235 16 : ctx->ext_dev->destroy_channel(ctx->ext_dev, ctx->ext_channel);
7236 16 : spdk_free(ctx->read_buff);
7237 :
7238 16 : cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
7239 :
7240 16 : free(ctx);
7241 16 : }
7242 :
7243 : static void
7244 8 : bs_shallow_copy_bdev_write_cpl(struct spdk_io_channel *channel, void *cb_arg, int bserrno)
7245 : {
7246 8 : struct shallow_copy_ctx *ctx = cb_arg;
7247 8 : struct spdk_blob *_blob = ctx->blob;
7248 :
7249 8 : if (bserrno != 0) {
7250 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, ext dev write error %d\n", ctx->blob->id, bserrno);
7251 0 : ctx->bserrno = bserrno;
7252 0 : _blob->locked_operation_in_progress = false;
7253 0 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7254 0 : return;
7255 : }
7256 :
7257 8 : ctx->cluster++;
7258 8 : if (ctx->status_cb) {
7259 8 : ctx->copied_clusters_count++;
7260 8 : ctx->status_cb(ctx->copied_clusters_count, ctx->status_cb_arg);
7261 : }
7262 :
7263 8 : bs_shallow_copy_cluster_find_next(ctx);
7264 : }
7265 :
7266 : static void
7267 8 : bs_shallow_copy_blob_read_cpl(void *cb_arg, int bserrno)
7268 : {
7269 8 : struct shallow_copy_ctx *ctx = cb_arg;
7270 8 : struct spdk_bs_dev *ext_dev = ctx->ext_dev;
7271 8 : struct spdk_blob *_blob = ctx->blob;
7272 :
7273 8 : if (bserrno != 0) {
7274 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob read error %d\n", ctx->blob->id, bserrno);
7275 0 : ctx->bserrno = bserrno;
7276 0 : _blob->locked_operation_in_progress = false;
7277 0 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7278 0 : return;
7279 : }
7280 :
7281 8 : ctx->ext_args.channel = ctx->ext_channel;
7282 8 : ctx->ext_args.cb_fn = bs_shallow_copy_bdev_write_cpl;
7283 8 : ctx->ext_args.cb_arg = ctx;
7284 :
7285 8 : ext_dev->write(ext_dev, ctx->ext_channel, ctx->read_buff,
7286 8 : bs_cluster_to_lba(_blob->bs, ctx->cluster),
7287 8 : bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
7288 : &ctx->ext_args);
7289 : }
7290 :
7291 : static void
7292 12 : bs_shallow_copy_cluster_find_next(void *cb_arg)
7293 : {
7294 12 : struct shallow_copy_ctx *ctx = cb_arg;
7295 12 : struct spdk_blob *_blob = ctx->blob;
7296 :
7297 20 : while (ctx->cluster < _blob->active.num_clusters) {
7298 16 : if (_blob->active.clusters[ctx->cluster] != 0) {
7299 8 : break;
7300 : }
7301 :
7302 8 : ctx->cluster++;
7303 : }
7304 :
7305 12 : if (ctx->cluster < _blob->active.num_clusters) {
7306 8 : blob_request_submit_op_single(ctx->blob_channel, _blob, ctx->read_buff,
7307 8 : bs_cluster_to_lba(_blob->bs, ctx->cluster),
7308 8 : bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
7309 : bs_shallow_copy_blob_read_cpl, ctx, SPDK_BLOB_READ);
7310 : } else {
7311 4 : _blob->locked_operation_in_progress = false;
7312 4 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7313 : }
7314 12 : }
7315 :
7316 : static void
7317 16 : bs_shallow_copy_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
7318 : {
7319 16 : struct shallow_copy_ctx *ctx = cb_arg;
7320 16 : struct spdk_bs_dev *ext_dev = ctx->ext_dev;
7321 : uint32_t blob_block_size;
7322 : uint64_t blob_total_size;
7323 :
7324 16 : if (bserrno != 0) {
7325 0 : SPDK_ERRLOG("Shallow copy blob open error %d\n", bserrno);
7326 0 : ctx->bserrno = bserrno;
7327 0 : bs_shallow_copy_cleanup_finish(ctx, 0);
7328 0 : return;
7329 : }
7330 :
7331 16 : if (!spdk_blob_is_read_only(_blob)) {
7332 4 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob must be read only\n", _blob->id);
7333 4 : ctx->bserrno = -EPERM;
7334 4 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7335 4 : return;
7336 : }
7337 :
7338 12 : blob_block_size = _blob->bs->dev->blocklen;
7339 12 : blob_total_size = spdk_blob_get_num_clusters(_blob) * spdk_bs_get_cluster_size(_blob->bs);
7340 :
7341 12 : if (blob_total_size > ext_dev->blockcnt * ext_dev->blocklen) {
7342 4 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device must have at least blob size\n",
7343 : _blob->id);
7344 4 : ctx->bserrno = -EINVAL;
7345 4 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7346 4 : return;
7347 : }
7348 :
7349 8 : if (blob_block_size % ext_dev->blocklen != 0) {
7350 4 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device block size is not compatible with \
7351 : blobstore block size\n", _blob->id);
7352 4 : ctx->bserrno = -EINVAL;
7353 4 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7354 4 : return;
7355 : }
7356 :
7357 4 : ctx->blob = _blob;
7358 :
7359 4 : if (_blob->locked_operation_in_progress) {
7360 0 : SPDK_DEBUGLOG(blob, "blob 0x%" PRIx64 " shallow copy - another operation in progress\n", _blob->id);
7361 0 : ctx->bserrno = -EBUSY;
7362 0 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7363 0 : return;
7364 : }
7365 :
7366 4 : _blob->locked_operation_in_progress = true;
7367 :
7368 4 : ctx->cluster = 0;
7369 4 : bs_shallow_copy_cluster_find_next(ctx);
7370 : }
7371 :
7372 : int
7373 16 : spdk_bs_blob_shallow_copy(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
7374 : spdk_blob_id blobid, struct spdk_bs_dev *ext_dev,
7375 : spdk_blob_shallow_copy_status status_cb_fn, void *status_cb_arg,
7376 : spdk_blob_op_complete cb_fn, void *cb_arg)
7377 : {
7378 : struct shallow_copy_ctx *ctx;
7379 : struct spdk_io_channel *ext_channel;
7380 :
7381 16 : ctx = calloc(1, sizeof(*ctx));
7382 16 : if (!ctx) {
7383 0 : return -ENOMEM;
7384 : }
7385 :
7386 16 : ctx->bs = bs;
7387 16 : ctx->blobid = blobid;
7388 16 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
7389 16 : ctx->cpl.u.bs_basic.cb_fn = cb_fn;
7390 16 : ctx->cpl.u.bs_basic.cb_arg = cb_arg;
7391 16 : ctx->bserrno = 0;
7392 16 : ctx->blob_channel = channel;
7393 16 : ctx->status_cb = status_cb_fn;
7394 16 : ctx->status_cb_arg = status_cb_arg;
7395 16 : ctx->read_buff = spdk_malloc(bs->cluster_sz, bs->dev->blocklen, NULL,
7396 : SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
7397 16 : if (!ctx->read_buff) {
7398 0 : free(ctx);
7399 0 : return -ENOMEM;
7400 : }
7401 :
7402 16 : ext_channel = ext_dev->create_channel(ext_dev);
7403 16 : if (!ext_channel) {
7404 0 : spdk_free(ctx->read_buff);
7405 0 : free(ctx);
7406 0 : return -ENOMEM;
7407 : }
7408 16 : ctx->ext_dev = ext_dev;
7409 16 : ctx->ext_channel = ext_channel;
7410 :
7411 16 : spdk_bs_open_blob(ctx->bs, ctx->blobid, bs_shallow_copy_blob_open_cpl, ctx);
7412 :
7413 16 : return 0;
7414 : }
7415 : /* END spdk_bs_blob_shallow_copy */
7416 :
7417 : /* START spdk_bs_blob_set_parent */
7418 :
7419 : struct set_parent_ctx {
7420 : struct spdk_blob_store *bs;
7421 : int bserrno;
7422 : spdk_bs_op_complete cb_fn;
7423 : void *cb_arg;
7424 :
7425 : struct spdk_blob *blob;
7426 : bool blob_md_ro;
7427 :
7428 : struct blob_parent parent;
7429 : };
7430 :
7431 : static void
7432 24 : bs_set_parent_cleanup_finish(void *cb_arg, int bserrno)
7433 : {
7434 24 : struct set_parent_ctx *ctx = cb_arg;
7435 :
7436 24 : assert(ctx != NULL);
7437 :
7438 24 : if (bserrno != 0) {
7439 0 : SPDK_ERRLOG("blob set parent finish error %d\n", bserrno);
7440 0 : if (ctx->bserrno == 0) {
7441 0 : ctx->bserrno = bserrno;
7442 : }
7443 : }
7444 :
7445 24 : ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
7446 :
7447 24 : free(ctx);
7448 24 : }
7449 :
7450 : static void
7451 20 : bs_set_parent_close_snapshot(void *cb_arg, int bserrno)
7452 : {
7453 20 : struct set_parent_ctx *ctx = cb_arg;
7454 :
7455 20 : if (ctx->bserrno != 0) {
7456 8 : spdk_blob_close(ctx->parent.u.snapshot.blob, bs_set_parent_cleanup_finish, ctx);
7457 8 : return;
7458 : }
7459 :
7460 12 : if (bserrno != 0) {
7461 0 : SPDK_ERRLOG("blob close error %d\n", bserrno);
7462 0 : ctx->bserrno = bserrno;
7463 : }
7464 :
7465 12 : bs_set_parent_cleanup_finish(ctx, ctx->bserrno);
7466 : }
7467 :
7468 : static void
7469 12 : bs_set_parent_close_blob(void *cb_arg, int bserrno)
7470 : {
7471 12 : struct set_parent_ctx *ctx = cb_arg;
7472 12 : struct spdk_blob *blob = ctx->blob;
7473 12 : struct spdk_blob *snapshot = ctx->parent.u.snapshot.blob;
7474 :
7475 12 : if (bserrno != 0 && ctx->bserrno == 0) {
7476 0 : SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
7477 0 : ctx->bserrno = bserrno;
7478 : }
7479 :
7480 : /* Revert md_ro to original state */
7481 12 : blob->md_ro = ctx->blob_md_ro;
7482 :
7483 12 : blob->locked_operation_in_progress = false;
7484 12 : snapshot->locked_operation_in_progress = false;
7485 :
7486 12 : spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
7487 12 : }
7488 :
7489 : static void
7490 12 : bs_set_parent_set_back_bs_dev_done(void *cb_arg, int bserrno)
7491 : {
7492 12 : struct set_parent_ctx *ctx = cb_arg;
7493 12 : struct spdk_blob *blob = ctx->blob;
7494 :
7495 12 : if (bserrno != 0) {
7496 0 : SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
7497 0 : ctx->bserrno = bserrno;
7498 0 : bs_set_parent_close_blob(ctx, bserrno);
7499 0 : return;
7500 : }
7501 :
7502 12 : spdk_blob_sync_md(blob, bs_set_parent_close_blob, ctx);
7503 : }
7504 :
7505 : static int
7506 12 : bs_set_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
7507 : {
7508 : int rc;
7509 :
7510 12 : bs_blob_list_remove(blob);
7511 :
7512 12 : rc = blob_set_xattr(blob, BLOB_SNAPSHOT, &parent->u.snapshot.id, sizeof(spdk_blob_id), true);
7513 12 : if (rc != 0) {
7514 0 : SPDK_ERRLOG("error %d setting snapshot xattr\n", rc);
7515 0 : return rc;
7516 : }
7517 12 : blob->parent_id = parent->u.snapshot.id;
7518 :
7519 12 : if (blob_is_esnap_clone(blob)) {
7520 : /* Remove the xattr that references the external snapshot */
7521 4 : blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
7522 4 : blob_remove_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
7523 : }
7524 :
7525 12 : bs_blob_list_add(blob);
7526 :
7527 12 : return 0;
7528 : }
7529 :
7530 : static void
7531 20 : bs_set_parent_snapshot_open_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
7532 : {
7533 20 : struct set_parent_ctx *ctx = cb_arg;
7534 20 : struct spdk_blob *blob = ctx->blob;
7535 : struct spdk_bs_dev *back_bs_dev;
7536 :
7537 20 : if (bserrno != 0) {
7538 0 : SPDK_ERRLOG("snapshot open error %d\n", bserrno);
7539 0 : ctx->bserrno = bserrno;
7540 0 : spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
7541 0 : return;
7542 : }
7543 :
7544 20 : ctx->parent.u.snapshot.blob = snapshot;
7545 20 : ctx->parent.u.snapshot.id = snapshot->id;
7546 :
7547 20 : if (!spdk_blob_is_snapshot(snapshot)) {
7548 4 : SPDK_ERRLOG("parent blob is not a snapshot\n");
7549 4 : ctx->bserrno = -EINVAL;
7550 4 : spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
7551 4 : return;
7552 : }
7553 :
7554 16 : if (blob->active.num_clusters != snapshot->active.num_clusters) {
7555 4 : SPDK_ERRLOG("parent blob has a number of clusters different from child's ones\n");
7556 4 : ctx->bserrno = -EINVAL;
7557 4 : spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
7558 4 : return;
7559 : }
7560 :
7561 12 : if (blob->locked_operation_in_progress || snapshot->locked_operation_in_progress) {
7562 0 : SPDK_ERRLOG("cannot set parent of blob, another operation in progress\n");
7563 0 : ctx->bserrno = -EBUSY;
7564 0 : spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
7565 0 : return;
7566 : }
7567 :
7568 12 : blob->locked_operation_in_progress = true;
7569 12 : snapshot->locked_operation_in_progress = true;
7570 :
7571 : /* Temporarily override md_ro flag for MD modification */
7572 12 : blob->md_ro = false;
7573 :
7574 12 : back_bs_dev = bs_create_blob_bs_dev(snapshot);
7575 :
7576 12 : blob_set_back_bs_dev(blob, back_bs_dev, bs_set_parent_refs, &ctx->parent,
7577 : bs_set_parent_set_back_bs_dev_done,
7578 : ctx);
7579 : }
7580 :
7581 : static void
7582 24 : bs_set_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
7583 : {
7584 24 : struct set_parent_ctx *ctx = cb_arg;
7585 :
7586 24 : if (bserrno != 0) {
7587 0 : SPDK_ERRLOG("blob open error %d\n", bserrno);
7588 0 : ctx->bserrno = bserrno;
7589 0 : bs_set_parent_cleanup_finish(ctx, 0);
7590 0 : return;
7591 : }
7592 :
7593 24 : if (!spdk_blob_is_thin_provisioned(blob)) {
7594 4 : SPDK_ERRLOG("blob is not thin-provisioned\n");
7595 4 : ctx->bserrno = -EINVAL;
7596 4 : spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
7597 4 : return;
7598 : }
7599 :
7600 20 : ctx->blob = blob;
7601 20 : ctx->blob_md_ro = blob->md_ro;
7602 :
7603 20 : spdk_bs_open_blob(ctx->bs, ctx->parent.u.snapshot.id, bs_set_parent_snapshot_open_cpl, ctx);
7604 : }
7605 :
7606 : void
7607 36 : spdk_bs_blob_set_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
7608 : spdk_blob_id snapshot_id, spdk_blob_op_complete cb_fn, void *cb_arg)
7609 : {
7610 : struct set_parent_ctx *ctx;
7611 :
7612 36 : if (snapshot_id == SPDK_BLOBID_INVALID) {
7613 4 : SPDK_ERRLOG("snapshot id not valid\n");
7614 4 : cb_fn(cb_arg, -EINVAL);
7615 4 : return;
7616 : }
7617 :
7618 32 : if (blob_id == snapshot_id) {
7619 4 : SPDK_ERRLOG("blob id and snapshot id cannot be the same\n");
7620 4 : cb_fn(cb_arg, -EINVAL);
7621 4 : return;
7622 : }
7623 :
7624 28 : if (spdk_blob_get_parent_snapshot(bs, blob_id) == snapshot_id) {
7625 4 : SPDK_NOTICELOG("snapshot is already the parent of blob\n");
7626 4 : cb_fn(cb_arg, -EEXIST);
7627 4 : return;
7628 : }
7629 :
7630 24 : ctx = calloc(1, sizeof(*ctx));
7631 24 : if (!ctx) {
7632 0 : cb_fn(cb_arg, -ENOMEM);
7633 0 : return;
7634 : }
7635 :
7636 24 : ctx->bs = bs;
7637 24 : ctx->parent.u.snapshot.id = snapshot_id;
7638 24 : ctx->cb_fn = cb_fn;
7639 24 : ctx->cb_arg = cb_arg;
7640 24 : ctx->bserrno = 0;
7641 :
7642 24 : spdk_bs_open_blob(bs, blob_id, bs_set_parent_blob_open_cpl, ctx);
7643 : }
7644 : /* END spdk_bs_blob_set_parent */
7645 :
7646 : /* START spdk_bs_blob_set_external_parent */
7647 :
7648 : static void
7649 16 : bs_set_external_parent_cleanup_finish(void *cb_arg, int bserrno)
7650 : {
7651 16 : struct set_parent_ctx *ctx = cb_arg;
7652 :
7653 16 : if (bserrno != 0) {
7654 0 : SPDK_ERRLOG("blob set external parent finish error %d\n", bserrno);
7655 0 : if (ctx->bserrno == 0) {
7656 0 : ctx->bserrno = bserrno;
7657 : }
7658 : }
7659 :
7660 16 : ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
7661 :
7662 16 : free(ctx->parent.u.esnap.id);
7663 16 : free(ctx);
7664 16 : }
7665 :
7666 : static void
7667 8 : bs_set_external_parent_close_blob(void *cb_arg, int bserrno)
7668 : {
7669 8 : struct set_parent_ctx *ctx = cb_arg;
7670 8 : struct spdk_blob *blob = ctx->blob;
7671 :
7672 8 : if (bserrno != 0 && ctx->bserrno == 0) {
7673 0 : SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
7674 0 : ctx->bserrno = bserrno;
7675 : }
7676 :
7677 : /* Revert md_ro to original state */
7678 8 : blob->md_ro = ctx->blob_md_ro;
7679 :
7680 8 : blob->locked_operation_in_progress = false;
7681 :
7682 8 : spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
7683 8 : }
7684 :
7685 : static void
7686 8 : bs_set_external_parent_unfrozen(void *cb_arg, int bserrno)
7687 : {
7688 8 : struct set_parent_ctx *ctx = cb_arg;
7689 8 : struct spdk_blob *blob = ctx->blob;
7690 :
7691 8 : if (bserrno != 0) {
7692 0 : SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
7693 0 : ctx->bserrno = bserrno;
7694 0 : bs_set_external_parent_close_blob(ctx, bserrno);
7695 0 : return;
7696 : }
7697 :
7698 8 : spdk_blob_sync_md(blob, bs_set_external_parent_close_blob, ctx);
7699 : }
7700 :
7701 : static int
7702 8 : bs_set_external_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
7703 : {
7704 : int rc;
7705 :
7706 8 : bs_blob_list_remove(blob);
7707 :
7708 8 : if (spdk_blob_is_clone(blob)) {
7709 : /* Remove the xattr that references the snapshot */
7710 0 : blob->parent_id = SPDK_BLOBID_INVALID;
7711 0 : blob_remove_xattr(blob, BLOB_SNAPSHOT, true);
7712 : }
7713 :
7714 8 : rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, parent->u.esnap.id,
7715 8 : parent->u.esnap.id_len, true);
7716 8 : if (rc != 0) {
7717 0 : SPDK_ERRLOG("error %d setting external snapshot xattr\n", rc);
7718 0 : return rc;
7719 : }
7720 8 : blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
7721 8 : blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
7722 :
7723 8 : bs_blob_list_add(blob);
7724 :
7725 8 : return 0;
7726 : }
7727 :
7728 : static void
7729 16 : bs_set_external_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
7730 : {
7731 16 : struct set_parent_ctx *ctx = cb_arg;
7732 16 : const void *esnap_id;
7733 16 : size_t esnap_id_len;
7734 : int rc;
7735 :
7736 16 : if (bserrno != 0) {
7737 0 : SPDK_ERRLOG("blob open error %d\n", bserrno);
7738 0 : ctx->bserrno = bserrno;
7739 0 : bs_set_parent_cleanup_finish(ctx, 0);
7740 0 : return;
7741 : }
7742 :
7743 16 : ctx->blob = blob;
7744 16 : ctx->blob_md_ro = blob->md_ro;
7745 :
7746 16 : rc = spdk_blob_get_esnap_id(blob, &esnap_id, &esnap_id_len);
7747 16 : if (rc == 0 && esnap_id != NULL && esnap_id_len == ctx->parent.u.esnap.id_len &&
7748 4 : memcmp(esnap_id, ctx->parent.u.esnap.id, esnap_id_len) == 0) {
7749 4 : SPDK_ERRLOG("external snapshot is already the parent of blob\n");
7750 4 : ctx->bserrno = -EEXIST;
7751 4 : goto error;
7752 : }
7753 :
7754 12 : if (!spdk_blob_is_thin_provisioned(blob)) {
7755 4 : SPDK_ERRLOG("blob is not thin-provisioned\n");
7756 4 : ctx->bserrno = -EINVAL;
7757 4 : goto error;
7758 : }
7759 :
7760 8 : if (blob->locked_operation_in_progress) {
7761 0 : SPDK_ERRLOG("cannot set external parent of blob, another operation in progress\n");
7762 0 : ctx->bserrno = -EBUSY;
7763 0 : goto error;
7764 : }
7765 :
7766 8 : blob->locked_operation_in_progress = true;
7767 :
7768 : /* Temporarily override md_ro flag for MD modification */
7769 8 : blob->md_ro = false;
7770 :
7771 8 : blob_set_back_bs_dev(blob, ctx->parent.u.esnap.back_bs_dev, bs_set_external_parent_refs,
7772 : &ctx->parent, bs_set_external_parent_unfrozen, ctx);
7773 8 : return;
7774 :
7775 8 : error:
7776 8 : spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
7777 : }
7778 :
7779 : void
7780 24 : spdk_bs_blob_set_external_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
7781 : struct spdk_bs_dev *esnap_bs_dev, const void *esnap_id,
7782 : uint32_t esnap_id_len, spdk_blob_op_complete cb_fn, void *cb_arg)
7783 : {
7784 : struct set_parent_ctx *ctx;
7785 : uint64_t esnap_dev_size, cluster_sz;
7786 :
7787 24 : if (sizeof(blob_id) == esnap_id_len && memcmp(&blob_id, esnap_id, sizeof(blob_id)) == 0) {
7788 4 : SPDK_ERRLOG("blob id and external snapshot id cannot be the same\n");
7789 4 : cb_fn(cb_arg, -EINVAL);
7790 4 : return;
7791 : }
7792 :
7793 20 : esnap_dev_size = esnap_bs_dev->blockcnt * esnap_bs_dev->blocklen;
7794 20 : cluster_sz = spdk_bs_get_cluster_size(bs);
7795 20 : if ((esnap_dev_size % cluster_sz) != 0) {
7796 4 : SPDK_ERRLOG("Esnap device size %" PRIu64 " is not an integer multiple of "
7797 : "cluster size %" PRIu64 "\n", esnap_dev_size, cluster_sz);
7798 4 : cb_fn(cb_arg, -EINVAL);
7799 4 : return;
7800 : }
7801 :
7802 16 : ctx = calloc(1, sizeof(*ctx));
7803 16 : if (!ctx) {
7804 0 : cb_fn(cb_arg, -ENOMEM);
7805 0 : return;
7806 : }
7807 :
7808 16 : ctx->parent.u.esnap.id = calloc(1, esnap_id_len);
7809 16 : if (!ctx->parent.u.esnap.id) {
7810 0 : free(ctx);
7811 0 : cb_fn(cb_arg, -ENOMEM);
7812 0 : return;
7813 : }
7814 :
7815 16 : ctx->bs = bs;
7816 16 : ctx->parent.u.esnap.back_bs_dev = esnap_bs_dev;
7817 16 : memcpy(ctx->parent.u.esnap.id, esnap_id, esnap_id_len);
7818 16 : ctx->parent.u.esnap.id_len = esnap_id_len;
7819 16 : ctx->cb_fn = cb_fn;
7820 16 : ctx->cb_arg = cb_arg;
7821 16 : ctx->bserrno = 0;
7822 :
7823 16 : spdk_bs_open_blob(bs, blob_id, bs_set_external_parent_blob_open_cpl, ctx);
7824 : }
7825 : /* END spdk_bs_blob_set_external_parent */
7826 :
7827 : /* START spdk_blob_resize */
7828 : struct spdk_bs_resize_ctx {
7829 : spdk_blob_op_complete cb_fn;
7830 : void *cb_arg;
7831 : struct spdk_blob *blob;
7832 : uint64_t sz;
7833 : int rc;
7834 : };
7835 :
7836 : static void
7837 202 : bs_resize_unfreeze_cpl(void *cb_arg, int rc)
7838 : {
7839 202 : struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
7840 :
7841 202 : if (rc != 0) {
7842 0 : SPDK_ERRLOG("Unfreeze failed, rc=%d\n", rc);
7843 : }
7844 :
7845 202 : if (ctx->rc != 0) {
7846 4 : SPDK_ERRLOG("Unfreeze failed, ctx->rc=%d\n", ctx->rc);
7847 4 : rc = ctx->rc;
7848 : }
7849 :
7850 202 : ctx->blob->locked_operation_in_progress = false;
7851 :
7852 202 : ctx->cb_fn(ctx->cb_arg, rc);
7853 202 : free(ctx);
7854 202 : }
7855 :
7856 : static void
7857 202 : bs_resize_freeze_cpl(void *cb_arg, int rc)
7858 : {
7859 202 : struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
7860 :
7861 202 : if (rc != 0) {
7862 0 : ctx->blob->locked_operation_in_progress = false;
7863 0 : ctx->cb_fn(ctx->cb_arg, rc);
7864 0 : free(ctx);
7865 0 : return;
7866 : }
7867 :
7868 202 : ctx->rc = blob_resize(ctx->blob, ctx->sz);
7869 :
7870 202 : blob_unfreeze_io(ctx->blob, bs_resize_unfreeze_cpl, ctx);
7871 : }
7872 :
7873 : void
7874 216 : spdk_blob_resize(struct spdk_blob *blob, uint64_t sz, spdk_blob_op_complete cb_fn, void *cb_arg)
7875 : {
7876 : struct spdk_bs_resize_ctx *ctx;
7877 :
7878 216 : blob_verify_md_op(blob);
7879 :
7880 216 : SPDK_DEBUGLOG(blob, "Resizing blob 0x%" PRIx64 " to %" PRIu64 " clusters\n", blob->id, sz);
7881 :
7882 216 : if (blob->md_ro) {
7883 4 : cb_fn(cb_arg, -EPERM);
7884 4 : return;
7885 : }
7886 :
7887 212 : if (sz == blob->active.num_clusters) {
7888 10 : cb_fn(cb_arg, 0);
7889 10 : return;
7890 : }
7891 :
7892 202 : if (blob->locked_operation_in_progress) {
7893 0 : cb_fn(cb_arg, -EBUSY);
7894 0 : return;
7895 : }
7896 :
7897 202 : ctx = calloc(1, sizeof(*ctx));
7898 202 : if (!ctx) {
7899 0 : cb_fn(cb_arg, -ENOMEM);
7900 0 : return;
7901 : }
7902 :
7903 202 : blob->locked_operation_in_progress = true;
7904 202 : ctx->cb_fn = cb_fn;
7905 202 : ctx->cb_arg = cb_arg;
7906 202 : ctx->blob = blob;
7907 202 : ctx->sz = sz;
7908 202 : blob_freeze_io(blob, bs_resize_freeze_cpl, ctx);
7909 : }
7910 :
7911 : /* END spdk_blob_resize */
7912 :
7913 :
7914 : /* START spdk_bs_delete_blob */
7915 :
7916 : static void
7917 1488 : bs_delete_close_cpl(void *cb_arg, int bserrno)
7918 : {
7919 1488 : spdk_bs_sequence_t *seq = cb_arg;
7920 :
7921 1488 : bs_sequence_finish(seq, bserrno);
7922 1488 : }
7923 :
7924 : static void
7925 1488 : bs_delete_persist_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
7926 : {
7927 1488 : struct spdk_blob *blob = cb_arg;
7928 :
7929 1488 : if (bserrno != 0) {
7930 : /*
7931 : * We already removed this blob from the blobstore tailq, so
7932 : * we need to free it here since this is the last reference
7933 : * to it.
7934 : */
7935 0 : blob_free(blob);
7936 0 : bs_delete_close_cpl(seq, bserrno);
7937 0 : return;
7938 : }
7939 :
7940 : /*
7941 : * This will immediately decrement the ref_count and call
7942 : * the completion routine since the metadata state is clean.
7943 : * By calling spdk_blob_close, we reduce the number of call
7944 : * points into code that touches the blob->open_ref count
7945 : * and the blobstore's blob list.
7946 : */
7947 1488 : spdk_blob_close(blob, bs_delete_close_cpl, seq);
7948 : }
7949 :
7950 : struct delete_snapshot_ctx {
7951 : struct spdk_blob_list *parent_snapshot_entry;
7952 : struct spdk_blob *snapshot;
7953 : struct spdk_blob_md_page *page;
7954 : bool snapshot_md_ro;
7955 : struct spdk_blob *clone;
7956 : bool clone_md_ro;
7957 : spdk_blob_op_with_handle_complete cb_fn;
7958 : void *cb_arg;
7959 : int bserrno;
7960 : uint32_t next_extent_page;
7961 : };
7962 :
7963 : static void
7964 110 : delete_blob_cleanup_finish(void *cb_arg, int bserrno)
7965 : {
7966 110 : struct delete_snapshot_ctx *ctx = cb_arg;
7967 :
7968 110 : if (bserrno != 0) {
7969 0 : SPDK_ERRLOG("Snapshot cleanup error %d\n", bserrno);
7970 : }
7971 :
7972 110 : assert(ctx != NULL);
7973 :
7974 110 : if (bserrno != 0 && ctx->bserrno == 0) {
7975 0 : ctx->bserrno = bserrno;
7976 : }
7977 :
7978 110 : ctx->cb_fn(ctx->cb_arg, ctx->snapshot, ctx->bserrno);
7979 110 : spdk_free(ctx->page);
7980 110 : free(ctx);
7981 110 : }
7982 :
7983 : static void
7984 22 : delete_snapshot_cleanup_snapshot(void *cb_arg, int bserrno)
7985 : {
7986 22 : struct delete_snapshot_ctx *ctx = cb_arg;
7987 :
7988 22 : if (bserrno != 0) {
7989 0 : ctx->bserrno = bserrno;
7990 0 : SPDK_ERRLOG("Clone cleanup error %d\n", bserrno);
7991 : }
7992 :
7993 22 : if (ctx->bserrno != 0) {
7994 22 : assert(blob_lookup(ctx->snapshot->bs, ctx->snapshot->id) == NULL);
7995 22 : RB_INSERT(spdk_blob_tree, &ctx->snapshot->bs->open_blobs, ctx->snapshot);
7996 22 : spdk_bit_array_set(ctx->snapshot->bs->open_blobids, ctx->snapshot->id);
7997 : }
7998 :
7999 22 : ctx->snapshot->locked_operation_in_progress = false;
8000 22 : ctx->snapshot->md_ro = ctx->snapshot_md_ro;
8001 :
8002 22 : spdk_blob_close(ctx->snapshot, delete_blob_cleanup_finish, ctx);
8003 22 : }
8004 :
8005 : static void
8006 12 : delete_snapshot_cleanup_clone(void *cb_arg, int bserrno)
8007 : {
8008 12 : struct delete_snapshot_ctx *ctx = cb_arg;
8009 :
8010 12 : ctx->clone->locked_operation_in_progress = false;
8011 12 : ctx->clone->md_ro = ctx->clone_md_ro;
8012 :
8013 12 : spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
8014 12 : }
8015 :
8016 : static void
8017 48 : delete_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
8018 : {
8019 48 : struct delete_snapshot_ctx *ctx = cb_arg;
8020 :
8021 48 : if (bserrno) {
8022 0 : ctx->bserrno = bserrno;
8023 0 : delete_snapshot_cleanup_clone(ctx, 0);
8024 0 : return;
8025 : }
8026 :
8027 48 : ctx->clone->locked_operation_in_progress = false;
8028 48 : spdk_blob_close(ctx->clone, delete_blob_cleanup_finish, ctx);
8029 : }
8030 :
8031 : static void
8032 52 : delete_snapshot_sync_snapshot_cpl(void *cb_arg, int bserrno)
8033 : {
8034 52 : struct delete_snapshot_ctx *ctx = cb_arg;
8035 52 : struct spdk_blob_list *parent_snapshot_entry = NULL;
8036 52 : struct spdk_blob_list *snapshot_entry = NULL;
8037 52 : struct spdk_blob_list *clone_entry = NULL;
8038 52 : struct spdk_blob_list *snapshot_clone_entry = NULL;
8039 :
8040 52 : if (bserrno) {
8041 4 : SPDK_ERRLOG("Failed to sync MD on blob\n");
8042 4 : ctx->bserrno = bserrno;
8043 4 : delete_snapshot_cleanup_clone(ctx, 0);
8044 4 : return;
8045 : }
8046 :
8047 : /* Get snapshot entry for the snapshot we want to remove */
8048 48 : snapshot_entry = bs_get_snapshot_entry(ctx->snapshot->bs, ctx->snapshot->id);
8049 :
8050 48 : assert(snapshot_entry != NULL);
8051 :
8052 : /* Remove clone entry in this snapshot (at this point there can be only one clone) */
8053 48 : clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
8054 48 : assert(clone_entry != NULL);
8055 48 : TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
8056 48 : snapshot_entry->clone_count--;
8057 48 : assert(TAILQ_EMPTY(&snapshot_entry->clones));
8058 :
8059 48 : switch (ctx->snapshot->parent_id) {
8060 40 : case SPDK_BLOBID_INVALID:
8061 : case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
8062 : /* No parent snapshot - just remove clone entry */
8063 40 : free(clone_entry);
8064 40 : break;
8065 8 : default:
8066 : /* This snapshot is at the same time a clone of another snapshot - we need to
8067 : * update parent snapshot (remove current clone, add new one inherited from
8068 : * the snapshot that is being removed) */
8069 :
8070 : /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
8071 : * snapshot that we are removing */
8072 8 : blob_get_snapshot_and_clone_entries(ctx->snapshot, &parent_snapshot_entry,
8073 : &snapshot_clone_entry);
8074 :
8075 : /* Switch clone entry in parent snapshot */
8076 8 : TAILQ_INSERT_TAIL(&parent_snapshot_entry->clones, clone_entry, link);
8077 8 : TAILQ_REMOVE(&parent_snapshot_entry->clones, snapshot_clone_entry, link);
8078 8 : free(snapshot_clone_entry);
8079 : }
8080 :
8081 : /* Restore md_ro flags */
8082 48 : ctx->clone->md_ro = ctx->clone_md_ro;
8083 48 : ctx->snapshot->md_ro = ctx->snapshot_md_ro;
8084 :
8085 48 : blob_unfreeze_io(ctx->clone, delete_snapshot_unfreeze_cpl, ctx);
8086 : }
8087 :
8088 : static void
8089 56 : delete_snapshot_sync_clone_cpl(void *cb_arg, int bserrno)
8090 : {
8091 56 : struct delete_snapshot_ctx *ctx = cb_arg;
8092 : uint64_t i;
8093 :
8094 56 : ctx->snapshot->md_ro = false;
8095 :
8096 56 : if (bserrno) {
8097 4 : SPDK_ERRLOG("Failed to sync MD on clone\n");
8098 4 : ctx->bserrno = bserrno;
8099 :
8100 : /* Restore snapshot to previous state */
8101 4 : bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
8102 4 : if (bserrno != 0) {
8103 0 : delete_snapshot_cleanup_clone(ctx, bserrno);
8104 0 : return;
8105 : }
8106 :
8107 4 : spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
8108 4 : return;
8109 : }
8110 :
8111 : /* Clear cluster map entries for snapshot */
8112 552 : for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
8113 500 : if (ctx->clone->active.clusters[i] == ctx->snapshot->active.clusters[i]) {
8114 492 : if (ctx->snapshot->active.clusters[i] != 0) {
8115 328 : ctx->snapshot->active.num_allocated_clusters--;
8116 : }
8117 492 : ctx->snapshot->active.clusters[i] = 0;
8118 : }
8119 : }
8120 78 : for (i = 0; i < ctx->snapshot->active.num_extent_pages &&
8121 52 : i < ctx->clone->active.num_extent_pages; i++) {
8122 26 : if (ctx->clone->active.extent_pages[i] == ctx->snapshot->active.extent_pages[i]) {
8123 24 : ctx->snapshot->active.extent_pages[i] = 0;
8124 : }
8125 : }
8126 :
8127 52 : blob_set_thin_provision(ctx->snapshot);
8128 52 : ctx->snapshot->state = SPDK_BLOB_STATE_DIRTY;
8129 :
8130 52 : if (ctx->parent_snapshot_entry != NULL) {
8131 8 : ctx->snapshot->back_bs_dev = NULL;
8132 : }
8133 :
8134 52 : spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_cpl, ctx);
8135 : }
8136 :
8137 : static void
8138 56 : delete_snapshot_update_extent_pages_cpl(struct delete_snapshot_ctx *ctx)
8139 : {
8140 : int bserrno;
8141 :
8142 : /* Delete old backing bs_dev from clone (related to snapshot that will be removed) */
8143 56 : blob_back_bs_destroy(ctx->clone);
8144 :
8145 : /* Set/remove snapshot xattr and switch parent ID and backing bs_dev on clone... */
8146 56 : if (ctx->snapshot->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
8147 8 : bserrno = bs_snapshot_copy_xattr(ctx->clone, ctx->snapshot,
8148 : BLOB_EXTERNAL_SNAPSHOT_ID);
8149 8 : if (bserrno != 0) {
8150 0 : ctx->bserrno = bserrno;
8151 :
8152 : /* Restore snapshot to previous state */
8153 0 : bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
8154 0 : if (bserrno != 0) {
8155 0 : delete_snapshot_cleanup_clone(ctx, bserrno);
8156 0 : return;
8157 : }
8158 :
8159 0 : spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
8160 0 : return;
8161 : }
8162 8 : ctx->clone->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
8163 8 : ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
8164 : /* Do not delete the external snapshot along with this snapshot */
8165 8 : ctx->snapshot->back_bs_dev = NULL;
8166 8 : ctx->clone->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
8167 48 : } else if (ctx->parent_snapshot_entry != NULL) {
8168 : /* ...to parent snapshot */
8169 8 : ctx->clone->parent_id = ctx->parent_snapshot_entry->id;
8170 8 : ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
8171 8 : blob_set_xattr(ctx->clone, BLOB_SNAPSHOT, &ctx->parent_snapshot_entry->id,
8172 : sizeof(spdk_blob_id),
8173 : true);
8174 : } else {
8175 : /* ...to blobid invalid and zeroes dev */
8176 40 : ctx->clone->parent_id = SPDK_BLOBID_INVALID;
8177 40 : ctx->clone->back_bs_dev = bs_create_zeroes_dev();
8178 40 : blob_remove_xattr(ctx->clone, BLOB_SNAPSHOT, true);
8179 : }
8180 :
8181 56 : spdk_blob_sync_md(ctx->clone, delete_snapshot_sync_clone_cpl, ctx);
8182 : }
8183 :
8184 : static void
8185 58 : delete_snapshot_update_extent_pages(void *cb_arg, int bserrno)
8186 : {
8187 58 : struct delete_snapshot_ctx *ctx = cb_arg;
8188 : uint32_t *extent_page;
8189 : uint64_t i;
8190 :
8191 84 : for (i = ctx->next_extent_page; i < ctx->snapshot->active.num_extent_pages &&
8192 54 : i < ctx->clone->active.num_extent_pages; i++) {
8193 28 : if (ctx->snapshot->active.extent_pages[i] == 0) {
8194 : /* No extent page to use from snapshot */
8195 8 : continue;
8196 : }
8197 :
8198 20 : extent_page = &ctx->clone->active.extent_pages[i];
8199 20 : if (*extent_page == 0) {
8200 : /* Copy extent page from snapshot when clone did not have a matching one */
8201 18 : *extent_page = ctx->snapshot->active.extent_pages[i];
8202 18 : continue;
8203 : }
8204 :
8205 : /* Clone and snapshot both contain partially filled matching extent pages.
8206 : * Update the clone extent page in place with cluster map containing the mix of both. */
8207 2 : ctx->next_extent_page = i + 1;
8208 2 : memset(ctx->page, 0, SPDK_BS_PAGE_SIZE);
8209 :
8210 2 : blob_write_extent_page(ctx->clone, *extent_page, i * SPDK_EXTENTS_PER_EP, ctx->page,
8211 : delete_snapshot_update_extent_pages, ctx);
8212 2 : return;
8213 : }
8214 56 : delete_snapshot_update_extent_pages_cpl(ctx);
8215 : }
8216 :
8217 : static void
8218 60 : delete_snapshot_sync_snapshot_xattr_cpl(void *cb_arg, int bserrno)
8219 : {
8220 60 : struct delete_snapshot_ctx *ctx = cb_arg;
8221 : uint64_t i;
8222 :
8223 : /* Temporarily override md_ro flag for clone for MD modification */
8224 60 : ctx->clone_md_ro = ctx->clone->md_ro;
8225 60 : ctx->clone->md_ro = false;
8226 :
8227 60 : if (bserrno) {
8228 4 : SPDK_ERRLOG("Failed to sync MD with xattr on blob\n");
8229 4 : ctx->bserrno = bserrno;
8230 4 : delete_snapshot_cleanup_clone(ctx, 0);
8231 4 : return;
8232 : }
8233 :
8234 : /* Copy snapshot map to clone map (only unallocated clusters in clone) */
8235 596 : for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
8236 540 : if (ctx->clone->active.clusters[i] == 0) {
8237 532 : ctx->clone->active.clusters[i] = ctx->snapshot->active.clusters[i];
8238 532 : if (ctx->clone->active.clusters[i] != 0) {
8239 368 : ctx->clone->active.num_allocated_clusters++;
8240 : }
8241 : }
8242 : }
8243 56 : ctx->next_extent_page = 0;
8244 56 : delete_snapshot_update_extent_pages(ctx, 0);
8245 : }
8246 :
8247 : static void
8248 8 : delete_snapshot_esnap_channels_destroyed_cb(void *cb_arg, struct spdk_blob *blob, int bserrno)
8249 : {
8250 8 : struct delete_snapshot_ctx *ctx = cb_arg;
8251 :
8252 8 : if (bserrno != 0) {
8253 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to destroy esnap channels: %d\n",
8254 : blob->id, bserrno);
8255 : /* That error should not stop us from syncing metadata. */
8256 : }
8257 :
8258 8 : spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
8259 8 : }
8260 :
8261 : static void
8262 60 : delete_snapshot_freeze_io_cb(void *cb_arg, int bserrno)
8263 : {
8264 60 : struct delete_snapshot_ctx *ctx = cb_arg;
8265 :
8266 60 : if (bserrno) {
8267 0 : SPDK_ERRLOG("Failed to freeze I/O on clone\n");
8268 0 : ctx->bserrno = bserrno;
8269 0 : delete_snapshot_cleanup_clone(ctx, 0);
8270 0 : return;
8271 : }
8272 :
8273 : /* Temporarily override md_ro flag for snapshot for MD modification */
8274 60 : ctx->snapshot_md_ro = ctx->snapshot->md_ro;
8275 60 : ctx->snapshot->md_ro = false;
8276 :
8277 : /* Mark blob as pending for removal for power failure safety, use clone id for recovery */
8278 60 : ctx->bserrno = blob_set_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, &ctx->clone->id,
8279 : sizeof(spdk_blob_id), true);
8280 60 : if (ctx->bserrno != 0) {
8281 0 : delete_snapshot_cleanup_clone(ctx, 0);
8282 0 : return;
8283 : }
8284 :
8285 60 : if (blob_is_esnap_clone(ctx->snapshot)) {
8286 8 : blob_esnap_destroy_bs_dev_channels(ctx->snapshot, false,
8287 : delete_snapshot_esnap_channels_destroyed_cb,
8288 : ctx);
8289 8 : return;
8290 : }
8291 :
8292 52 : spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
8293 : }
8294 :
8295 : static void
8296 70 : delete_snapshot_open_clone_cb(void *cb_arg, struct spdk_blob *clone, int bserrno)
8297 : {
8298 70 : struct delete_snapshot_ctx *ctx = cb_arg;
8299 :
8300 70 : if (bserrno) {
8301 10 : SPDK_ERRLOG("Failed to open clone\n");
8302 10 : ctx->bserrno = bserrno;
8303 10 : delete_snapshot_cleanup_snapshot(ctx, 0);
8304 10 : return;
8305 : }
8306 :
8307 60 : ctx->clone = clone;
8308 :
8309 60 : if (clone->locked_operation_in_progress) {
8310 0 : SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress on its clone\n");
8311 0 : ctx->bserrno = -EBUSY;
8312 0 : spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
8313 0 : return;
8314 : }
8315 :
8316 60 : clone->locked_operation_in_progress = true;
8317 :
8318 60 : blob_freeze_io(clone, delete_snapshot_freeze_io_cb, ctx);
8319 : }
8320 :
8321 : static void
8322 70 : update_clone_on_snapshot_deletion(struct spdk_blob *snapshot, struct delete_snapshot_ctx *ctx)
8323 : {
8324 70 : struct spdk_blob_list *snapshot_entry = NULL;
8325 70 : struct spdk_blob_list *clone_entry = NULL;
8326 70 : struct spdk_blob_list *snapshot_clone_entry = NULL;
8327 :
8328 : /* Get snapshot entry for the snapshot we want to remove */
8329 70 : snapshot_entry = bs_get_snapshot_entry(snapshot->bs, snapshot->id);
8330 :
8331 70 : assert(snapshot_entry != NULL);
8332 :
8333 : /* Get clone of the snapshot (at this point there can be only one clone) */
8334 70 : clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
8335 70 : assert(snapshot_entry->clone_count == 1);
8336 70 : assert(clone_entry != NULL);
8337 :
8338 : /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
8339 : * snapshot that we are removing */
8340 70 : blob_get_snapshot_and_clone_entries(snapshot, &ctx->parent_snapshot_entry,
8341 : &snapshot_clone_entry);
8342 :
8343 70 : spdk_bs_open_blob(snapshot->bs, clone_entry->id, delete_snapshot_open_clone_cb, ctx);
8344 70 : }
8345 :
8346 : static void
8347 1550 : bs_delete_blob_finish(void *cb_arg, struct spdk_blob *blob, int bserrno)
8348 : {
8349 1550 : spdk_bs_sequence_t *seq = cb_arg;
8350 1550 : struct spdk_blob_list *snapshot_entry = NULL;
8351 : uint32_t page_num;
8352 :
8353 1550 : if (bserrno) {
8354 62 : SPDK_ERRLOG("Failed to remove blob\n");
8355 62 : bs_sequence_finish(seq, bserrno);
8356 62 : return;
8357 : }
8358 :
8359 : /* Remove snapshot from the list */
8360 1488 : snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
8361 1488 : if (snapshot_entry != NULL) {
8362 140 : TAILQ_REMOVE(&blob->bs->snapshots, snapshot_entry, link);
8363 140 : free(snapshot_entry);
8364 : }
8365 :
8366 1488 : page_num = bs_blobid_to_page(blob->id);
8367 1488 : spdk_bit_array_clear(blob->bs->used_blobids, page_num);
8368 1488 : blob->state = SPDK_BLOB_STATE_DIRTY;
8369 1488 : blob->active.num_pages = 0;
8370 1488 : blob_resize(blob, 0);
8371 :
8372 1488 : blob_persist(seq, blob, bs_delete_persist_cpl, blob);
8373 : }
8374 :
8375 : static int
8376 1550 : bs_is_blob_deletable(struct spdk_blob *blob, bool *update_clone)
8377 : {
8378 1550 : struct spdk_blob_list *snapshot_entry = NULL;
8379 1550 : struct spdk_blob_list *clone_entry = NULL;
8380 1550 : struct spdk_blob *clone = NULL;
8381 1550 : bool has_one_clone = false;
8382 :
8383 : /* Check if this is a snapshot with clones */
8384 1550 : snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
8385 1550 : if (snapshot_entry != NULL) {
8386 190 : if (snapshot_entry->clone_count > 1) {
8387 24 : SPDK_ERRLOG("Cannot remove snapshot with more than one clone\n");
8388 24 : return -EBUSY;
8389 166 : } else if (snapshot_entry->clone_count == 1) {
8390 70 : has_one_clone = true;
8391 : }
8392 : }
8393 :
8394 : /* Check if someone has this blob open (besides this delete context):
8395 : * - open_ref = 1 - only this context opened blob, so it is ok to remove it
8396 : * - open_ref <= 2 && has_one_clone = true - clone is holding snapshot
8397 : * and that is ok, because we will update it accordingly */
8398 1526 : if (blob->open_ref <= 2 && has_one_clone) {
8399 70 : clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
8400 70 : assert(clone_entry != NULL);
8401 70 : clone = blob_lookup(blob->bs, clone_entry->id);
8402 :
8403 70 : if (blob->open_ref == 2 && clone == NULL) {
8404 : /* Clone is closed and someone else opened this blob */
8405 0 : SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
8406 0 : return -EBUSY;
8407 : }
8408 :
8409 70 : *update_clone = true;
8410 70 : return 0;
8411 : }
8412 :
8413 1456 : if (blob->open_ref > 1) {
8414 16 : SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
8415 16 : return -EBUSY;
8416 : }
8417 :
8418 1440 : assert(has_one_clone == false);
8419 1440 : *update_clone = false;
8420 1440 : return 0;
8421 : }
8422 :
8423 : static void
8424 0 : bs_delete_enomem_close_cpl(void *cb_arg, int bserrno)
8425 : {
8426 0 : spdk_bs_sequence_t *seq = cb_arg;
8427 :
8428 0 : bs_sequence_finish(seq, -ENOMEM);
8429 0 : }
8430 :
8431 : static void
8432 1560 : bs_delete_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
8433 : {
8434 1560 : spdk_bs_sequence_t *seq = cb_arg;
8435 : struct delete_snapshot_ctx *ctx;
8436 1560 : bool update_clone = false;
8437 :
8438 1560 : if (bserrno != 0) {
8439 10 : bs_sequence_finish(seq, bserrno);
8440 10 : return;
8441 : }
8442 :
8443 1550 : blob_verify_md_op(blob);
8444 :
8445 1550 : ctx = calloc(1, sizeof(*ctx));
8446 1550 : if (ctx == NULL) {
8447 0 : spdk_blob_close(blob, bs_delete_enomem_close_cpl, seq);
8448 0 : return;
8449 : }
8450 :
8451 1550 : ctx->snapshot = blob;
8452 1550 : ctx->cb_fn = bs_delete_blob_finish;
8453 1550 : ctx->cb_arg = seq;
8454 :
8455 : /* Check if blob can be removed and if it is a snapshot with clone on top of it */
8456 1550 : ctx->bserrno = bs_is_blob_deletable(blob, &update_clone);
8457 1550 : if (ctx->bserrno) {
8458 40 : spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
8459 40 : return;
8460 : }
8461 :
8462 1510 : if (blob->locked_operation_in_progress) {
8463 0 : SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress\n");
8464 0 : ctx->bserrno = -EBUSY;
8465 0 : spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
8466 0 : return;
8467 : }
8468 :
8469 1510 : blob->locked_operation_in_progress = true;
8470 :
8471 : /*
8472 : * Remove the blob from the blob_store list now, to ensure it does not
8473 : * get returned after this point by blob_lookup().
8474 : */
8475 1510 : spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
8476 1510 : RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
8477 :
8478 1510 : if (update_clone) {
8479 70 : ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
8480 70 : if (!ctx->page) {
8481 0 : ctx->bserrno = -ENOMEM;
8482 0 : spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
8483 0 : return;
8484 : }
8485 : /* This blob is a snapshot with active clone - update clone first */
8486 70 : update_clone_on_snapshot_deletion(blob, ctx);
8487 : } else {
8488 : /* This blob does not have any clones - just remove it */
8489 1440 : bs_blob_list_remove(blob);
8490 1440 : bs_delete_blob_finish(seq, blob, 0);
8491 1440 : free(ctx);
8492 : }
8493 : }
8494 :
8495 : void
8496 1560 : spdk_bs_delete_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
8497 : spdk_blob_op_complete cb_fn, void *cb_arg)
8498 : {
8499 1560 : struct spdk_bs_cpl cpl;
8500 : spdk_bs_sequence_t *seq;
8501 :
8502 1560 : SPDK_DEBUGLOG(blob, "Deleting blob 0x%" PRIx64 "\n", blobid);
8503 :
8504 1560 : assert(spdk_get_thread() == bs->md_thread);
8505 :
8506 1560 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
8507 1560 : cpl.u.blob_basic.cb_fn = cb_fn;
8508 1560 : cpl.u.blob_basic.cb_arg = cb_arg;
8509 :
8510 1560 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
8511 1560 : if (!seq) {
8512 0 : cb_fn(cb_arg, -ENOMEM);
8513 0 : return;
8514 : }
8515 :
8516 1560 : spdk_bs_open_blob(bs, blobid, bs_delete_open_cpl, seq);
8517 : }
8518 :
8519 : /* END spdk_bs_delete_blob */
8520 :
8521 : /* START spdk_bs_open_blob */
8522 :
8523 : static void
8524 3466 : bs_open_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
8525 : {
8526 3466 : struct spdk_blob *blob = cb_arg;
8527 : struct spdk_blob *existing;
8528 :
8529 3466 : if (bserrno != 0) {
8530 64 : blob_free(blob);
8531 64 : seq->cpl.u.blob_handle.blob = NULL;
8532 64 : bs_sequence_finish(seq, bserrno);
8533 64 : return;
8534 : }
8535 :
8536 3402 : existing = blob_lookup(blob->bs, blob->id);
8537 3402 : if (existing) {
8538 4 : blob_free(blob);
8539 4 : existing->open_ref++;
8540 4 : seq->cpl.u.blob_handle.blob = existing;
8541 4 : bs_sequence_finish(seq, 0);
8542 4 : return;
8543 : }
8544 :
8545 3398 : blob->open_ref++;
8546 :
8547 3398 : spdk_bit_array_set(blob->bs->open_blobids, blob->id);
8548 3398 : RB_INSERT(spdk_blob_tree, &blob->bs->open_blobs, blob);
8549 :
8550 3398 : bs_sequence_finish(seq, bserrno);
8551 : }
8552 :
8553 : static inline void
8554 4 : blob_open_opts_copy(const struct spdk_blob_open_opts *src, struct spdk_blob_open_opts *dst)
8555 : {
8556 : #define FIELD_OK(field) \
8557 : offsetof(struct spdk_blob_open_opts, field) + sizeof(src->field) <= src->opts_size
8558 :
8559 : #define SET_FIELD(field) \
8560 : if (FIELD_OK(field)) { \
8561 : dst->field = src->field; \
8562 : } \
8563 :
8564 4 : SET_FIELD(clear_method);
8565 4 : SET_FIELD(esnap_ctx);
8566 :
8567 4 : dst->opts_size = src->opts_size;
8568 :
8569 : /* You should not remove this statement, but need to update the assert statement
8570 : * if you add a new field, and also add a corresponding SET_FIELD statement */
8571 : SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_open_opts) == 24, "Incorrect size");
8572 :
8573 : #undef FIELD_OK
8574 : #undef SET_FIELD
8575 4 : }
8576 :
8577 : static void
8578 4263 : bs_open_blob(struct spdk_blob_store *bs,
8579 : spdk_blob_id blobid,
8580 : struct spdk_blob_open_opts *opts,
8581 : spdk_blob_op_with_handle_complete cb_fn,
8582 : void *cb_arg)
8583 : {
8584 : struct spdk_blob *blob;
8585 4263 : struct spdk_bs_cpl cpl;
8586 4263 : struct spdk_blob_open_opts opts_local;
8587 : spdk_bs_sequence_t *seq;
8588 : uint32_t page_num;
8589 :
8590 4263 : SPDK_DEBUGLOG(blob, "Opening blob 0x%" PRIx64 "\n", blobid);
8591 4263 : assert(spdk_get_thread() == bs->md_thread);
8592 :
8593 4263 : page_num = bs_blobid_to_page(blobid);
8594 4263 : if (spdk_bit_array_get(bs->used_blobids, page_num) == false) {
8595 : /* Invalid blobid */
8596 48 : cb_fn(cb_arg, NULL, -ENOENT);
8597 48 : return;
8598 : }
8599 :
8600 4215 : blob = blob_lookup(bs, blobid);
8601 4215 : if (blob) {
8602 749 : blob->open_ref++;
8603 749 : cb_fn(cb_arg, blob, 0);
8604 749 : return;
8605 : }
8606 :
8607 3466 : blob = blob_alloc(bs, blobid);
8608 3466 : if (!blob) {
8609 0 : cb_fn(cb_arg, NULL, -ENOMEM);
8610 0 : return;
8611 : }
8612 :
8613 3466 : spdk_blob_open_opts_init(&opts_local, sizeof(opts_local));
8614 3466 : if (opts) {
8615 4 : blob_open_opts_copy(opts, &opts_local);
8616 : }
8617 :
8618 3466 : blob->clear_method = opts_local.clear_method;
8619 :
8620 3466 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_HANDLE;
8621 3466 : cpl.u.blob_handle.cb_fn = cb_fn;
8622 3466 : cpl.u.blob_handle.cb_arg = cb_arg;
8623 3466 : cpl.u.blob_handle.blob = blob;
8624 3466 : cpl.u.blob_handle.esnap_ctx = opts_local.esnap_ctx;
8625 :
8626 3466 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
8627 3466 : if (!seq) {
8628 0 : blob_free(blob);
8629 0 : cb_fn(cb_arg, NULL, -ENOMEM);
8630 0 : return;
8631 : }
8632 :
8633 3466 : blob_load(seq, blob, bs_open_blob_cpl, blob);
8634 : }
8635 :
8636 : void
8637 4259 : spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
8638 : spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
8639 : {
8640 4259 : bs_open_blob(bs, blobid, NULL, cb_fn, cb_arg);
8641 4259 : }
8642 :
8643 : void
8644 4 : spdk_bs_open_blob_ext(struct spdk_blob_store *bs, spdk_blob_id blobid,
8645 : struct spdk_blob_open_opts *opts, spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
8646 : {
8647 4 : bs_open_blob(bs, blobid, opts, cb_fn, cb_arg);
8648 4 : }
8649 :
8650 : /* END spdk_bs_open_blob */
8651 :
8652 : /* START spdk_blob_set_read_only */
8653 : int
8654 232 : spdk_blob_set_read_only(struct spdk_blob *blob)
8655 : {
8656 232 : blob_verify_md_op(blob);
8657 :
8658 232 : blob->data_ro_flags |= SPDK_BLOB_READ_ONLY;
8659 :
8660 232 : blob->state = SPDK_BLOB_STATE_DIRTY;
8661 232 : return 0;
8662 : }
8663 : /* END spdk_blob_set_read_only */
8664 :
8665 : /* START spdk_blob_sync_md */
8666 :
8667 : static void
8668 1591 : blob_sync_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
8669 : {
8670 1591 : struct spdk_blob *blob = cb_arg;
8671 :
8672 1591 : if (bserrno == 0 && (blob->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
8673 400 : blob->data_ro = true;
8674 400 : blob->md_ro = true;
8675 : }
8676 :
8677 1591 : bs_sequence_finish(seq, bserrno);
8678 1591 : }
8679 :
8680 : static void
8681 1591 : blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
8682 : {
8683 1591 : struct spdk_bs_cpl cpl;
8684 : spdk_bs_sequence_t *seq;
8685 :
8686 1591 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
8687 1591 : cpl.u.blob_basic.cb_fn = cb_fn;
8688 1591 : cpl.u.blob_basic.cb_arg = cb_arg;
8689 :
8690 1591 : seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
8691 1591 : if (!seq) {
8692 0 : cb_fn(cb_arg, -ENOMEM);
8693 0 : return;
8694 : }
8695 :
8696 1591 : blob_persist(seq, blob, blob_sync_md_cpl, blob);
8697 : }
8698 :
8699 : void
8700 1081 : spdk_blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
8701 : {
8702 1081 : blob_verify_md_op(blob);
8703 :
8704 1081 : SPDK_DEBUGLOG(blob, "Syncing blob 0x%" PRIx64 "\n", blob->id);
8705 :
8706 1081 : if (blob->md_ro) {
8707 4 : assert(blob->state == SPDK_BLOB_STATE_CLEAN);
8708 4 : cb_fn(cb_arg, 0);
8709 4 : return;
8710 : }
8711 :
8712 1077 : blob_sync_md(blob, cb_fn, cb_arg);
8713 : }
8714 :
8715 : /* END spdk_blob_sync_md */
8716 :
8717 : struct spdk_blob_cluster_op_ctx {
8718 : struct spdk_thread *thread;
8719 : struct spdk_blob *blob;
8720 : uint32_t cluster_num; /* cluster index in blob */
8721 : uint32_t cluster; /* cluster on disk */
8722 : uint32_t extent_page; /* extent page on disk */
8723 : struct spdk_blob_md_page *page; /* preallocated extent page */
8724 : int rc;
8725 : spdk_blob_op_complete cb_fn;
8726 : void *cb_arg;
8727 : };
8728 :
8729 : static void
8730 876 : blob_op_cluster_msg_cpl(void *arg)
8731 : {
8732 876 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8733 :
8734 876 : ctx->cb_fn(ctx->cb_arg, ctx->rc);
8735 876 : free(ctx);
8736 876 : }
8737 :
8738 : static void
8739 846 : blob_op_cluster_msg_cb(void *arg, int bserrno)
8740 : {
8741 846 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8742 :
8743 846 : ctx->rc = bserrno;
8744 846 : spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
8745 846 : }
8746 :
8747 : static void
8748 82 : blob_insert_new_ep_cb(void *arg, int bserrno)
8749 : {
8750 82 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8751 : uint32_t *extent_page;
8752 :
8753 82 : extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
8754 82 : *extent_page = ctx->extent_page;
8755 82 : ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
8756 82 : blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
8757 82 : }
8758 :
8759 : struct spdk_blob_write_extent_page_ctx {
8760 : struct spdk_blob_store *bs;
8761 :
8762 : uint32_t extent;
8763 : struct spdk_blob_md_page *page;
8764 : };
8765 :
8766 : static void
8767 26 : blob_free_cluster_msg_cb(void *arg, int bserrno)
8768 : {
8769 26 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8770 :
8771 26 : spdk_spin_lock(&ctx->blob->bs->used_lock);
8772 26 : bs_release_cluster(ctx->blob->bs, ctx->cluster);
8773 26 : spdk_spin_unlock(&ctx->blob->bs->used_lock);
8774 :
8775 26 : ctx->rc = bserrno;
8776 26 : spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
8777 26 : }
8778 :
8779 : static void
8780 26 : blob_free_cluster_update_ep_cb(void *arg, int bserrno)
8781 : {
8782 26 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8783 :
8784 26 : if (bserrno != 0 || ctx->blob->bs->clean == 0) {
8785 26 : blob_free_cluster_msg_cb(ctx, bserrno);
8786 26 : return;
8787 : }
8788 :
8789 0 : ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
8790 0 : blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
8791 : }
8792 :
8793 : static void
8794 0 : blob_free_cluster_free_ep_cb(void *arg, int bserrno)
8795 : {
8796 0 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8797 :
8798 0 : spdk_spin_lock(&ctx->blob->bs->used_lock);
8799 0 : assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
8800 0 : bs_release_md_page(ctx->blob->bs, ctx->extent_page);
8801 0 : spdk_spin_unlock(&ctx->blob->bs->used_lock);
8802 0 : ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
8803 0 : blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
8804 0 : }
8805 :
8806 : static void
8807 434 : blob_persist_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
8808 : {
8809 434 : struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
8810 :
8811 434 : free(ctx);
8812 434 : bs_sequence_finish(seq, bserrno);
8813 434 : }
8814 :
8815 : static void
8816 434 : blob_write_extent_page_ready(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
8817 : {
8818 434 : struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
8819 :
8820 434 : if (bserrno != 0) {
8821 0 : blob_persist_extent_page_cpl(seq, ctx, bserrno);
8822 0 : return;
8823 : }
8824 434 : bs_sequence_write_dev(seq, ctx->page, bs_md_page_to_lba(ctx->bs, ctx->extent),
8825 434 : bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
8826 : blob_persist_extent_page_cpl, ctx);
8827 : }
8828 :
8829 : static void
8830 434 : blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
8831 : struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
8832 : {
8833 : struct spdk_blob_write_extent_page_ctx *ctx;
8834 : spdk_bs_sequence_t *seq;
8835 434 : struct spdk_bs_cpl cpl;
8836 :
8837 434 : ctx = calloc(1, sizeof(*ctx));
8838 434 : if (!ctx) {
8839 0 : cb_fn(cb_arg, -ENOMEM);
8840 0 : return;
8841 : }
8842 434 : ctx->bs = blob->bs;
8843 434 : ctx->extent = extent;
8844 434 : ctx->page = page;
8845 :
8846 434 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
8847 434 : cpl.u.blob_basic.cb_fn = cb_fn;
8848 434 : cpl.u.blob_basic.cb_arg = cb_arg;
8849 :
8850 434 : seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
8851 434 : if (!seq) {
8852 0 : free(ctx);
8853 0 : cb_fn(cb_arg, -ENOMEM);
8854 0 : return;
8855 : }
8856 :
8857 434 : assert(page);
8858 434 : page->next = SPDK_INVALID_MD_PAGE;
8859 434 : page->id = blob->id;
8860 434 : page->sequence_num = 0;
8861 :
8862 434 : blob_serialize_extent_page(blob, cluster_num, page);
8863 :
8864 434 : page->crc = blob_md_page_calc_crc(page);
8865 :
8866 434 : assert(spdk_bit_array_get(blob->bs->used_md_pages, extent) == true);
8867 :
8868 434 : bs_mark_dirty(seq, blob->bs, blob_write_extent_page_ready, ctx);
8869 : }
8870 :
8871 : static void
8872 816 : blob_insert_cluster_msg(void *arg)
8873 : {
8874 816 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8875 : uint32_t *extent_page;
8876 :
8877 816 : ctx->rc = blob_insert_cluster(ctx->blob, ctx->cluster_num, ctx->cluster);
8878 816 : if (ctx->rc != 0) {
8879 4 : spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
8880 4 : return;
8881 : }
8882 :
8883 812 : if (ctx->blob->use_extent_table == false) {
8884 : /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
8885 406 : ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
8886 406 : blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
8887 406 : return;
8888 : }
8889 :
8890 406 : extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
8891 406 : if (*extent_page == 0) {
8892 : /* Extent page requires allocation.
8893 : * It was already claimed in the used_md_pages map and placed in ctx. */
8894 82 : assert(ctx->extent_page != 0);
8895 82 : assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
8896 82 : blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
8897 : blob_insert_new_ep_cb, ctx);
8898 : } else {
8899 : /* It is possible for original thread to allocate extent page for
8900 : * different cluster in the same extent page. In such case proceed with
8901 : * updating the existing extent page, but release the additional one. */
8902 324 : if (ctx->extent_page != 0) {
8903 0 : spdk_spin_lock(&ctx->blob->bs->used_lock);
8904 0 : assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
8905 0 : bs_release_md_page(ctx->blob->bs, ctx->extent_page);
8906 0 : spdk_spin_unlock(&ctx->blob->bs->used_lock);
8907 0 : ctx->extent_page = 0;
8908 : }
8909 : /* Extent page already allocated.
8910 : * Every cluster allocation, requires just an update of single extent page. */
8911 324 : blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
8912 : blob_op_cluster_msg_cb, ctx);
8913 : }
8914 : }
8915 :
8916 : static void
8917 816 : blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
8918 : uint64_t cluster, uint32_t extent_page, struct spdk_blob_md_page *page,
8919 : spdk_blob_op_complete cb_fn, void *cb_arg)
8920 : {
8921 : struct spdk_blob_cluster_op_ctx *ctx;
8922 :
8923 816 : ctx = calloc(1, sizeof(*ctx));
8924 816 : if (ctx == NULL) {
8925 0 : cb_fn(cb_arg, -ENOMEM);
8926 0 : return;
8927 : }
8928 :
8929 816 : ctx->thread = spdk_get_thread();
8930 816 : ctx->blob = blob;
8931 816 : ctx->cluster_num = cluster_num;
8932 816 : ctx->cluster = cluster;
8933 816 : ctx->extent_page = extent_page;
8934 816 : ctx->page = page;
8935 816 : ctx->cb_fn = cb_fn;
8936 816 : ctx->cb_arg = cb_arg;
8937 :
8938 816 : spdk_thread_send_msg(blob->bs->md_thread, blob_insert_cluster_msg, ctx);
8939 : }
8940 :
8941 : static void
8942 60 : blob_free_cluster_msg(void *arg)
8943 : {
8944 60 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8945 : uint32_t *extent_page;
8946 : uint32_t start_cluster_idx;
8947 60 : bool free_extent_page = true;
8948 : size_t i;
8949 :
8950 60 : ctx->cluster = bs_lba_to_cluster(ctx->blob->bs, ctx->blob->active.clusters[ctx->cluster_num]);
8951 :
8952 : /* There were concurrent unmaps to the same cluster, only release the cluster on the first one */
8953 60 : if (ctx->cluster == 0) {
8954 8 : blob_op_cluster_msg_cb(ctx, 0);
8955 8 : return;
8956 : }
8957 :
8958 52 : ctx->blob->active.clusters[ctx->cluster_num] = 0;
8959 52 : if (ctx->cluster != 0) {
8960 52 : ctx->blob->active.num_allocated_clusters--;
8961 : }
8962 :
8963 52 : if (ctx->blob->use_extent_table == false) {
8964 : /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
8965 26 : spdk_spin_lock(&ctx->blob->bs->used_lock);
8966 26 : bs_release_cluster(ctx->blob->bs, ctx->cluster);
8967 26 : spdk_spin_unlock(&ctx->blob->bs->used_lock);
8968 26 : ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
8969 26 : blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
8970 26 : return;
8971 : }
8972 :
8973 26 : extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
8974 :
8975 : /* There shouldn't be parallel release operations on same cluster */
8976 26 : assert(*extent_page == ctx->extent_page);
8977 :
8978 26 : start_cluster_idx = (ctx->cluster_num / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
8979 48 : for (i = 0; i < SPDK_EXTENTS_PER_EP; ++i) {
8980 48 : if (ctx->blob->active.clusters[start_cluster_idx + i] != 0) {
8981 26 : free_extent_page = false;
8982 26 : break;
8983 : }
8984 : }
8985 :
8986 26 : if (free_extent_page) {
8987 0 : assert(ctx->extent_page != 0);
8988 0 : assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
8989 0 : ctx->blob->active.extent_pages[bs_cluster_to_extent_table_id(ctx->cluster_num)] = 0;
8990 0 : blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
8991 : blob_free_cluster_free_ep_cb, ctx);
8992 : } else {
8993 26 : blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
8994 : blob_free_cluster_update_ep_cb, ctx);
8995 : }
8996 : }
8997 :
8998 :
8999 : static void
9000 60 : blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num, uint32_t extent_page,
9001 : struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
9002 : {
9003 : struct spdk_blob_cluster_op_ctx *ctx;
9004 :
9005 60 : ctx = calloc(1, sizeof(*ctx));
9006 60 : if (ctx == NULL) {
9007 0 : cb_fn(cb_arg, -ENOMEM);
9008 0 : return;
9009 : }
9010 :
9011 60 : ctx->thread = spdk_get_thread();
9012 60 : ctx->blob = blob;
9013 60 : ctx->cluster_num = cluster_num;
9014 60 : ctx->extent_page = extent_page;
9015 60 : ctx->page = page;
9016 60 : ctx->cb_fn = cb_fn;
9017 60 : ctx->cb_arg = cb_arg;
9018 :
9019 60 : spdk_thread_send_msg(blob->bs->md_thread, blob_free_cluster_msg, ctx);
9020 : }
9021 :
9022 : /* START spdk_blob_close */
9023 :
9024 : static void
9025 4151 : blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9026 : {
9027 4151 : struct spdk_blob *blob = cb_arg;
9028 :
9029 4151 : if (bserrno == 0) {
9030 4151 : blob->open_ref--;
9031 4151 : if (blob->open_ref == 0) {
9032 : /*
9033 : * Blobs with active.num_pages == 0 are deleted blobs.
9034 : * these blobs are removed from the blob_store list
9035 : * when the deletion process starts - so don't try to
9036 : * remove them again.
9037 : */
9038 3398 : if (blob->active.num_pages > 0) {
9039 1910 : spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
9040 1910 : RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
9041 : }
9042 3398 : blob_free(blob);
9043 : }
9044 : }
9045 :
9046 4151 : bs_sequence_finish(seq, bserrno);
9047 4151 : }
9048 :
9049 : static void
9050 112 : blob_close_esnap_done(void *cb_arg, struct spdk_blob *blob, int bserrno)
9051 : {
9052 112 : spdk_bs_sequence_t *seq = cb_arg;
9053 :
9054 112 : if (bserrno != 0) {
9055 0 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": close failed with error %d\n",
9056 : blob->id, bserrno);
9057 0 : bs_sequence_finish(seq, bserrno);
9058 0 : return;
9059 : }
9060 :
9061 112 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": closed, syncing metadata on thread %s\n",
9062 : blob->id, spdk_thread_get_name(spdk_get_thread()));
9063 :
9064 : /* Sync metadata */
9065 112 : blob_persist(seq, blob, blob_close_cpl, blob);
9066 : }
9067 :
9068 : void
9069 4151 : spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
9070 : {
9071 4151 : struct spdk_bs_cpl cpl;
9072 : spdk_bs_sequence_t *seq;
9073 :
9074 4151 : blob_verify_md_op(blob);
9075 :
9076 4151 : SPDK_DEBUGLOG(blob, "Closing blob 0x%" PRIx64 "\n", blob->id);
9077 :
9078 4151 : if (blob->open_ref == 0) {
9079 0 : cb_fn(cb_arg, -EBADF);
9080 0 : return;
9081 : }
9082 :
9083 4151 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
9084 4151 : cpl.u.blob_basic.cb_fn = cb_fn;
9085 4151 : cpl.u.blob_basic.cb_arg = cb_arg;
9086 :
9087 4151 : seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
9088 4151 : if (!seq) {
9089 0 : cb_fn(cb_arg, -ENOMEM);
9090 0 : return;
9091 : }
9092 :
9093 4151 : if (blob->open_ref == 1 && blob_is_esnap_clone(blob)) {
9094 112 : blob_esnap_destroy_bs_dev_channels(blob, false, blob_close_esnap_done, seq);
9095 112 : return;
9096 : }
9097 :
9098 : /* Sync metadata */
9099 4039 : blob_persist(seq, blob, blob_close_cpl, blob);
9100 : }
9101 :
9102 : /* END spdk_blob_close */
9103 :
9104 229 : struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs)
9105 : {
9106 229 : return spdk_get_io_channel(bs);
9107 : }
9108 :
9109 : void
9110 229 : spdk_bs_free_io_channel(struct spdk_io_channel *channel)
9111 : {
9112 229 : blob_esnap_destroy_bs_channel(spdk_io_channel_get_ctx(channel));
9113 229 : spdk_put_io_channel(channel);
9114 229 : }
9115 :
9116 : void
9117 108 : spdk_blob_io_unmap(struct spdk_blob *blob, struct spdk_io_channel *channel,
9118 : uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
9119 : {
9120 108 : blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
9121 : SPDK_BLOB_UNMAP);
9122 108 : }
9123 :
9124 : void
9125 48 : spdk_blob_io_write_zeroes(struct spdk_blob *blob, struct spdk_io_channel *channel,
9126 : uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
9127 : {
9128 48 : blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
9129 : SPDK_BLOB_WRITE_ZEROES);
9130 48 : }
9131 :
9132 : void
9133 20868 : spdk_blob_io_write(struct spdk_blob *blob, struct spdk_io_channel *channel,
9134 : void *payload, uint64_t offset, uint64_t length,
9135 : spdk_blob_op_complete cb_fn, void *cb_arg)
9136 : {
9137 20868 : blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
9138 : SPDK_BLOB_WRITE);
9139 20868 : }
9140 :
9141 : void
9142 17500 : spdk_blob_io_read(struct spdk_blob *blob, struct spdk_io_channel *channel,
9143 : void *payload, uint64_t offset, uint64_t length,
9144 : spdk_blob_op_complete cb_fn, void *cb_arg)
9145 : {
9146 17500 : blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
9147 : SPDK_BLOB_READ);
9148 17500 : }
9149 :
9150 : void
9151 140 : spdk_blob_io_writev(struct spdk_blob *blob, struct spdk_io_channel *channel,
9152 : struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
9153 : spdk_blob_op_complete cb_fn, void *cb_arg)
9154 : {
9155 140 : blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false, NULL);
9156 140 : }
9157 :
9158 : void
9159 940 : spdk_blob_io_readv(struct spdk_blob *blob, struct spdk_io_channel *channel,
9160 : struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
9161 : spdk_blob_op_complete cb_fn, void *cb_arg)
9162 : {
9163 940 : blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true, NULL);
9164 940 : }
9165 :
9166 : void
9167 208 : spdk_blob_io_writev_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
9168 : struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
9169 : spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
9170 : {
9171 208 : blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false,
9172 : io_opts);
9173 208 : }
9174 :
9175 : void
9176 1300 : spdk_blob_io_readv_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
9177 : struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
9178 : spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
9179 : {
9180 1300 : blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true,
9181 : io_opts);
9182 1300 : }
9183 :
9184 : struct spdk_bs_iter_ctx {
9185 : int64_t page_num;
9186 : struct spdk_blob_store *bs;
9187 :
9188 : spdk_blob_op_with_handle_complete cb_fn;
9189 : void *cb_arg;
9190 : };
9191 :
9192 : static void
9193 1164 : bs_iter_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
9194 : {
9195 1164 : struct spdk_bs_iter_ctx *ctx = cb_arg;
9196 1164 : struct spdk_blob_store *bs = ctx->bs;
9197 : spdk_blob_id id;
9198 :
9199 1164 : if (bserrno == 0) {
9200 444 : ctx->cb_fn(ctx->cb_arg, _blob, bserrno);
9201 444 : free(ctx);
9202 444 : return;
9203 : }
9204 :
9205 720 : ctx->page_num++;
9206 720 : ctx->page_num = spdk_bit_array_find_first_set(bs->used_blobids, ctx->page_num);
9207 720 : if (ctx->page_num >= spdk_bit_array_capacity(bs->used_blobids)) {
9208 268 : ctx->cb_fn(ctx->cb_arg, NULL, -ENOENT);
9209 268 : free(ctx);
9210 268 : return;
9211 : }
9212 :
9213 452 : id = bs_page_to_blobid(ctx->page_num);
9214 :
9215 452 : spdk_bs_open_blob(bs, id, bs_iter_cpl, ctx);
9216 : }
9217 :
9218 : void
9219 292 : spdk_bs_iter_first(struct spdk_blob_store *bs,
9220 : spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
9221 : {
9222 : struct spdk_bs_iter_ctx *ctx;
9223 :
9224 292 : ctx = calloc(1, sizeof(*ctx));
9225 292 : if (!ctx) {
9226 0 : cb_fn(cb_arg, NULL, -ENOMEM);
9227 0 : return;
9228 : }
9229 :
9230 292 : ctx->page_num = -1;
9231 292 : ctx->bs = bs;
9232 292 : ctx->cb_fn = cb_fn;
9233 292 : ctx->cb_arg = cb_arg;
9234 :
9235 292 : bs_iter_cpl(ctx, NULL, -1);
9236 : }
9237 :
9238 : static void
9239 420 : bs_iter_close_cpl(void *cb_arg, int bserrno)
9240 : {
9241 420 : struct spdk_bs_iter_ctx *ctx = cb_arg;
9242 :
9243 420 : bs_iter_cpl(ctx, NULL, -1);
9244 420 : }
9245 :
9246 : void
9247 420 : spdk_bs_iter_next(struct spdk_blob_store *bs, struct spdk_blob *blob,
9248 : spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
9249 : {
9250 : struct spdk_bs_iter_ctx *ctx;
9251 :
9252 420 : assert(blob != NULL);
9253 :
9254 420 : ctx = calloc(1, sizeof(*ctx));
9255 420 : if (!ctx) {
9256 0 : cb_fn(cb_arg, NULL, -ENOMEM);
9257 0 : return;
9258 : }
9259 :
9260 420 : ctx->page_num = bs_blobid_to_page(blob->id);
9261 420 : ctx->bs = bs;
9262 420 : ctx->cb_fn = cb_fn;
9263 420 : ctx->cb_arg = cb_arg;
9264 :
9265 : /* Close the existing blob */
9266 420 : spdk_blob_close(blob, bs_iter_close_cpl, ctx);
9267 : }
9268 :
9269 : static int
9270 943 : blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
9271 : uint16_t value_len, bool internal)
9272 : {
9273 : struct spdk_xattr_tailq *xattrs;
9274 : struct spdk_xattr *xattr;
9275 : size_t desc_size;
9276 : void *tmp;
9277 :
9278 943 : blob_verify_md_op(blob);
9279 :
9280 943 : if (blob->md_ro) {
9281 4 : return -EPERM;
9282 : }
9283 :
9284 939 : desc_size = sizeof(struct spdk_blob_md_descriptor_xattr) + strlen(name) + value_len;
9285 939 : if (desc_size > SPDK_BS_MAX_DESC_SIZE) {
9286 4 : SPDK_DEBUGLOG(blob, "Xattr '%s' of size %zu does not fix into single page %zu\n", name,
9287 : desc_size, SPDK_BS_MAX_DESC_SIZE);
9288 4 : return -ENOMEM;
9289 : }
9290 :
9291 935 : if (internal) {
9292 724 : xattrs = &blob->xattrs_internal;
9293 724 : blob->invalid_flags |= SPDK_BLOB_INTERNAL_XATTR;
9294 : } else {
9295 211 : xattrs = &blob->xattrs;
9296 : }
9297 :
9298 1158 : TAILQ_FOREACH(xattr, xattrs, link) {
9299 332 : if (!strcmp(name, xattr->name)) {
9300 109 : tmp = malloc(value_len);
9301 109 : if (!tmp) {
9302 0 : return -ENOMEM;
9303 : }
9304 :
9305 109 : free(xattr->value);
9306 109 : xattr->value_len = value_len;
9307 109 : xattr->value = tmp;
9308 109 : memcpy(xattr->value, value, value_len);
9309 :
9310 109 : blob->state = SPDK_BLOB_STATE_DIRTY;
9311 :
9312 109 : return 0;
9313 : }
9314 : }
9315 :
9316 826 : xattr = calloc(1, sizeof(*xattr));
9317 826 : if (!xattr) {
9318 0 : return -ENOMEM;
9319 : }
9320 :
9321 826 : xattr->name = strdup(name);
9322 826 : if (!xattr->name) {
9323 0 : free(xattr);
9324 0 : return -ENOMEM;
9325 : }
9326 :
9327 826 : xattr->value_len = value_len;
9328 826 : xattr->value = malloc(value_len);
9329 826 : if (!xattr->value) {
9330 0 : free(xattr->name);
9331 0 : free(xattr);
9332 0 : return -ENOMEM;
9333 : }
9334 826 : memcpy(xattr->value, value, value_len);
9335 826 : TAILQ_INSERT_TAIL(xattrs, xattr, link);
9336 :
9337 826 : blob->state = SPDK_BLOB_STATE_DIRTY;
9338 :
9339 826 : return 0;
9340 : }
9341 :
9342 : int
9343 183 : spdk_blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
9344 : uint16_t value_len)
9345 : {
9346 183 : return blob_set_xattr(blob, name, value, value_len, false);
9347 : }
9348 :
9349 : static int
9350 404 : blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal)
9351 : {
9352 : struct spdk_xattr_tailq *xattrs;
9353 : struct spdk_xattr *xattr;
9354 :
9355 404 : blob_verify_md_op(blob);
9356 :
9357 404 : if (blob->md_ro) {
9358 4 : return -EPERM;
9359 : }
9360 400 : xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
9361 :
9362 412 : TAILQ_FOREACH(xattr, xattrs, link) {
9363 360 : if (!strcmp(name, xattr->name)) {
9364 348 : TAILQ_REMOVE(xattrs, xattr, link);
9365 348 : free(xattr->value);
9366 348 : free(xattr->name);
9367 348 : free(xattr);
9368 :
9369 348 : if (internal && TAILQ_EMPTY(&blob->xattrs_internal)) {
9370 240 : blob->invalid_flags &= ~SPDK_BLOB_INTERNAL_XATTR;
9371 : }
9372 348 : blob->state = SPDK_BLOB_STATE_DIRTY;
9373 :
9374 348 : return 0;
9375 : }
9376 : }
9377 :
9378 52 : return -ENOENT;
9379 : }
9380 :
9381 : int
9382 36 : spdk_blob_remove_xattr(struct spdk_blob *blob, const char *name)
9383 : {
9384 36 : return blob_remove_xattr(blob, name, false);
9385 : }
9386 :
9387 : static int
9388 2268 : blob_get_xattr_value(struct spdk_blob *blob, const char *name,
9389 : const void **value, size_t *value_len, bool internal)
9390 : {
9391 : struct spdk_xattr *xattr;
9392 : struct spdk_xattr_tailq *xattrs;
9393 :
9394 2268 : xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
9395 :
9396 2890 : TAILQ_FOREACH(xattr, xattrs, link) {
9397 1372 : if (!strcmp(name, xattr->name)) {
9398 750 : *value = xattr->value;
9399 750 : *value_len = xattr->value_len;
9400 750 : return 0;
9401 : }
9402 : }
9403 1518 : return -ENOENT;
9404 : }
9405 :
9406 : int
9407 154 : spdk_blob_get_xattr_value(struct spdk_blob *blob, const char *name,
9408 : const void **value, size_t *value_len)
9409 : {
9410 154 : blob_verify_md_op(blob);
9411 :
9412 154 : return blob_get_xattr_value(blob, name, value, value_len, false);
9413 : }
9414 :
9415 : struct spdk_xattr_names {
9416 : uint32_t count;
9417 : const char *names[0];
9418 : };
9419 :
9420 : static int
9421 4 : blob_get_xattr_names(struct spdk_xattr_tailq *xattrs, struct spdk_xattr_names **names)
9422 : {
9423 : struct spdk_xattr *xattr;
9424 4 : int count = 0;
9425 :
9426 12 : TAILQ_FOREACH(xattr, xattrs, link) {
9427 8 : count++;
9428 : }
9429 :
9430 4 : *names = calloc(1, sizeof(struct spdk_xattr_names) + count * sizeof(char *));
9431 4 : if (*names == NULL) {
9432 0 : return -ENOMEM;
9433 : }
9434 :
9435 12 : TAILQ_FOREACH(xattr, xattrs, link) {
9436 8 : (*names)->names[(*names)->count++] = xattr->name;
9437 : }
9438 :
9439 4 : return 0;
9440 : }
9441 :
9442 : int
9443 4 : spdk_blob_get_xattr_names(struct spdk_blob *blob, struct spdk_xattr_names **names)
9444 : {
9445 4 : blob_verify_md_op(blob);
9446 :
9447 4 : return blob_get_xattr_names(&blob->xattrs, names);
9448 : }
9449 :
9450 : uint32_t
9451 4 : spdk_xattr_names_get_count(struct spdk_xattr_names *names)
9452 : {
9453 4 : assert(names != NULL);
9454 :
9455 4 : return names->count;
9456 : }
9457 :
9458 : const char *
9459 8 : spdk_xattr_names_get_name(struct spdk_xattr_names *names, uint32_t index)
9460 : {
9461 8 : if (index >= names->count) {
9462 0 : return NULL;
9463 : }
9464 :
9465 8 : return names->names[index];
9466 : }
9467 :
9468 : void
9469 4 : spdk_xattr_names_free(struct spdk_xattr_names *names)
9470 : {
9471 4 : free(names);
9472 4 : }
9473 :
9474 : struct spdk_bs_type
9475 2 : spdk_bs_get_bstype(struct spdk_blob_store *bs)
9476 : {
9477 2 : return bs->bstype;
9478 : }
9479 :
9480 : void
9481 0 : spdk_bs_set_bstype(struct spdk_blob_store *bs, struct spdk_bs_type bstype)
9482 : {
9483 0 : memcpy(&bs->bstype, &bstype, sizeof(bstype));
9484 0 : }
9485 :
9486 : bool
9487 48 : spdk_blob_is_read_only(struct spdk_blob *blob)
9488 : {
9489 48 : assert(blob != NULL);
9490 48 : return (blob->data_ro || blob->md_ro);
9491 : }
9492 :
9493 : bool
9494 52 : spdk_blob_is_snapshot(struct spdk_blob *blob)
9495 : {
9496 : struct spdk_blob_list *snapshot_entry;
9497 :
9498 52 : assert(blob != NULL);
9499 :
9500 52 : snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
9501 52 : if (snapshot_entry == NULL) {
9502 28 : return false;
9503 : }
9504 :
9505 24 : return true;
9506 : }
9507 :
9508 : bool
9509 68 : spdk_blob_is_clone(struct spdk_blob *blob)
9510 : {
9511 68 : assert(blob != NULL);
9512 :
9513 68 : if (blob->parent_id != SPDK_BLOBID_INVALID &&
9514 52 : blob->parent_id != SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
9515 40 : assert(spdk_blob_is_thin_provisioned(blob));
9516 40 : return true;
9517 : }
9518 :
9519 28 : return false;
9520 : }
9521 :
9522 : bool
9523 36462 : spdk_blob_is_thin_provisioned(struct spdk_blob *blob)
9524 : {
9525 36462 : assert(blob != NULL);
9526 36462 : return !!(blob->invalid_flags & SPDK_BLOB_THIN_PROV);
9527 : }
9528 :
9529 : bool
9530 40888 : spdk_blob_is_esnap_clone(const struct spdk_blob *blob)
9531 : {
9532 40888 : return blob_is_esnap_clone(blob);
9533 : }
9534 :
9535 : static void
9536 3426 : blob_update_clear_method(struct spdk_blob *blob)
9537 : {
9538 : enum blob_clear_method stored_cm;
9539 :
9540 3426 : assert(blob != NULL);
9541 :
9542 : /* If BLOB_CLEAR_WITH_DEFAULT was passed in, use the setting stored
9543 : * in metadata previously. If something other than the default was
9544 : * specified, ignore stored value and used what was passed in.
9545 : */
9546 3426 : stored_cm = ((blob->md_ro_flags & SPDK_BLOB_CLEAR_METHOD) >> SPDK_BLOB_CLEAR_METHOD_SHIFT);
9547 :
9548 3426 : if (blob->clear_method == BLOB_CLEAR_WITH_DEFAULT) {
9549 3426 : blob->clear_method = stored_cm;
9550 0 : } else if (blob->clear_method != stored_cm) {
9551 0 : SPDK_WARNLOG("Using passed in clear method 0x%x instead of stored value of 0x%x\n",
9552 : blob->clear_method, stored_cm);
9553 : }
9554 3426 : }
9555 :
9556 : spdk_blob_id
9557 258 : spdk_blob_get_parent_snapshot(struct spdk_blob_store *bs, spdk_blob_id blob_id)
9558 : {
9559 258 : struct spdk_blob_list *snapshot_entry = NULL;
9560 258 : struct spdk_blob_list *clone_entry = NULL;
9561 :
9562 494 : TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
9563 732 : TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
9564 496 : if (clone_entry->id == blob_id) {
9565 168 : return snapshot_entry->id;
9566 : }
9567 : }
9568 : }
9569 :
9570 90 : return SPDK_BLOBID_INVALID;
9571 : }
9572 :
9573 : int
9574 196 : spdk_blob_get_clones(struct spdk_blob_store *bs, spdk_blob_id blobid, spdk_blob_id *ids,
9575 : size_t *count)
9576 : {
9577 : struct spdk_blob_list *snapshot_entry, *clone_entry;
9578 : size_t n;
9579 :
9580 196 : snapshot_entry = bs_get_snapshot_entry(bs, blobid);
9581 196 : if (snapshot_entry == NULL) {
9582 28 : *count = 0;
9583 28 : return 0;
9584 : }
9585 :
9586 168 : if (ids == NULL || *count < snapshot_entry->clone_count) {
9587 8 : *count = snapshot_entry->clone_count;
9588 8 : return -ENOMEM;
9589 : }
9590 160 : *count = snapshot_entry->clone_count;
9591 :
9592 160 : n = 0;
9593 340 : TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
9594 180 : ids[n++] = clone_entry->id;
9595 : }
9596 :
9597 160 : return 0;
9598 : }
9599 :
9600 : static void
9601 4 : bs_load_grow_continue(struct spdk_bs_load_ctx *ctx)
9602 : {
9603 : int rc;
9604 :
9605 4 : if (ctx->super->size == 0) {
9606 0 : ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
9607 : }
9608 :
9609 4 : if (ctx->super->io_unit_size == 0) {
9610 0 : ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
9611 : }
9612 :
9613 : /* Parse the super block */
9614 4 : ctx->bs->clean = 1;
9615 4 : ctx->bs->cluster_sz = ctx->super->cluster_size;
9616 4 : ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
9617 4 : ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
9618 4 : if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
9619 4 : ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
9620 : }
9621 4 : ctx->bs->io_unit_size = ctx->super->io_unit_size;
9622 4 : rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
9623 4 : if (rc < 0) {
9624 0 : bs_load_ctx_fail(ctx, -ENOMEM);
9625 0 : return;
9626 : }
9627 4 : ctx->bs->md_start = ctx->super->md_start;
9628 4 : ctx->bs->md_len = ctx->super->md_len;
9629 4 : rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
9630 4 : if (rc < 0) {
9631 0 : bs_load_ctx_fail(ctx, -ENOMEM);
9632 0 : return;
9633 : }
9634 :
9635 8 : ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
9636 4 : ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
9637 4 : ctx->bs->super_blob = ctx->super->super_blob;
9638 4 : memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
9639 :
9640 4 : if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0) {
9641 0 : SPDK_ERRLOG("Can not grow an unclean blobstore, please load it normally to clean it.\n");
9642 0 : bs_load_ctx_fail(ctx, -EIO);
9643 0 : return;
9644 : } else {
9645 4 : bs_load_read_used_pages(ctx);
9646 : }
9647 : }
9648 :
9649 : static void
9650 4 : bs_load_grow_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9651 : {
9652 4 : struct spdk_bs_load_ctx *ctx = cb_arg;
9653 :
9654 4 : if (bserrno != 0) {
9655 0 : bs_load_ctx_fail(ctx, bserrno);
9656 0 : return;
9657 : }
9658 4 : bs_load_grow_continue(ctx);
9659 : }
9660 :
9661 : static void
9662 4 : bs_load_grow_used_clusters_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9663 : {
9664 4 : struct spdk_bs_load_ctx *ctx = cb_arg;
9665 :
9666 4 : if (bserrno != 0) {
9667 0 : bs_load_ctx_fail(ctx, bserrno);
9668 0 : return;
9669 : }
9670 :
9671 4 : spdk_free(ctx->mask);
9672 :
9673 4 : bs_sequence_write_dev(ctx->seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
9674 4 : bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
9675 : bs_load_grow_super_write_cpl, ctx);
9676 : }
9677 :
9678 : static void
9679 4 : bs_load_grow_used_clusters_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9680 : {
9681 4 : struct spdk_bs_load_ctx *ctx = cb_arg;
9682 : uint64_t lba, lba_count;
9683 : uint64_t dev_size;
9684 : uint64_t total_clusters;
9685 :
9686 4 : if (bserrno != 0) {
9687 0 : bs_load_ctx_fail(ctx, bserrno);
9688 0 : return;
9689 : }
9690 :
9691 : /* The type must be correct */
9692 4 : assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
9693 : /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
9694 4 : assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
9695 : struct spdk_blob_md_page) * 8));
9696 4 : dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
9697 4 : total_clusters = dev_size / ctx->super->cluster_size;
9698 4 : ctx->mask->length = total_clusters;
9699 :
9700 4 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
9701 4 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
9702 4 : bs_sequence_write_dev(ctx->seq, ctx->mask, lba, lba_count,
9703 : bs_load_grow_used_clusters_write_cpl, ctx);
9704 : }
9705 :
9706 : static void
9707 4 : bs_load_try_to_grow(struct spdk_bs_load_ctx *ctx)
9708 : {
9709 : uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
9710 : uint64_t lba, lba_count, mask_size;
9711 :
9712 4 : dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
9713 4 : total_clusters = dev_size / ctx->super->cluster_size;
9714 4 : used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
9715 4 : spdk_divide_round_up(total_clusters, 8),
9716 : SPDK_BS_PAGE_SIZE);
9717 4 : max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
9718 : /* No necessary to grow or no space to grow */
9719 4 : if (ctx->super->size >= dev_size || used_cluster_mask_len > max_used_cluster_mask) {
9720 0 : SPDK_DEBUGLOG(blob, "No grow\n");
9721 0 : bs_load_grow_continue(ctx);
9722 0 : return;
9723 : }
9724 :
9725 4 : SPDK_DEBUGLOG(blob, "Resize blobstore\n");
9726 :
9727 4 : ctx->super->size = dev_size;
9728 4 : ctx->super->used_cluster_mask_len = used_cluster_mask_len;
9729 4 : ctx->super->crc = blob_md_page_calc_crc(ctx->super);
9730 :
9731 4 : mask_size = used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
9732 4 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
9733 : SPDK_MALLOC_DMA);
9734 4 : if (!ctx->mask) {
9735 0 : bs_load_ctx_fail(ctx, -ENOMEM);
9736 0 : return;
9737 : }
9738 4 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
9739 4 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
9740 4 : bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
9741 : bs_load_grow_used_clusters_read_cpl, ctx);
9742 : }
9743 :
9744 : static void
9745 4 : bs_grow_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9746 : {
9747 4 : struct spdk_bs_load_ctx *ctx = cb_arg;
9748 : int rc;
9749 :
9750 4 : rc = bs_super_validate(ctx->super, ctx->bs);
9751 4 : if (rc != 0) {
9752 0 : bs_load_ctx_fail(ctx, rc);
9753 0 : return;
9754 : }
9755 :
9756 4 : bs_load_try_to_grow(ctx);
9757 : }
9758 :
9759 : struct spdk_bs_grow_ctx {
9760 : struct spdk_blob_store *bs;
9761 : struct spdk_bs_super_block *super;
9762 :
9763 : struct spdk_bit_pool *new_used_clusters;
9764 : struct spdk_bs_md_mask *new_used_clusters_mask;
9765 :
9766 : spdk_bs_sequence_t *seq;
9767 : };
9768 :
9769 : static void
9770 32 : bs_grow_live_done(struct spdk_bs_grow_ctx *ctx, int bserrno)
9771 : {
9772 32 : if (bserrno != 0) {
9773 8 : spdk_bit_pool_free(&ctx->new_used_clusters);
9774 : }
9775 :
9776 32 : bs_sequence_finish(ctx->seq, bserrno);
9777 32 : free(ctx->new_used_clusters_mask);
9778 32 : spdk_free(ctx->super);
9779 32 : free(ctx);
9780 32 : }
9781 :
9782 : static void
9783 8 : bs_grow_live_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9784 : {
9785 8 : struct spdk_bs_grow_ctx *ctx = cb_arg;
9786 8 : struct spdk_blob_store *bs = ctx->bs;
9787 : uint64_t total_clusters;
9788 :
9789 8 : if (bserrno != 0) {
9790 0 : bs_grow_live_done(ctx, bserrno);
9791 0 : return;
9792 : }
9793 :
9794 : /*
9795 : * Blobstore is not clean until unload, for now only the super block is up to date.
9796 : * This is similar to state right after blobstore init, when bs_write_used_md() didn't
9797 : * yet execute.
9798 : * When cleanly unloaded, the used md pages will be written out.
9799 : * In case of unclean shutdown, loading blobstore will go through recovery path correctly
9800 : * filling out the used_clusters with new size and writing it out.
9801 : */
9802 8 : bs->clean = 0;
9803 :
9804 : /* Reverting the super->size past this point is complex, avoid any error paths
9805 : * that require to do so. */
9806 8 : spdk_spin_lock(&bs->used_lock);
9807 :
9808 8 : total_clusters = ctx->super->size / ctx->super->cluster_size;
9809 :
9810 8 : assert(total_clusters >= spdk_bit_pool_capacity(bs->used_clusters));
9811 8 : spdk_bit_pool_store_mask(bs->used_clusters, ctx->new_used_clusters_mask);
9812 :
9813 8 : assert(total_clusters == spdk_bit_pool_capacity(ctx->new_used_clusters));
9814 8 : spdk_bit_pool_load_mask(ctx->new_used_clusters, ctx->new_used_clusters_mask);
9815 :
9816 8 : spdk_bit_pool_free(&bs->used_clusters);
9817 8 : bs->used_clusters = ctx->new_used_clusters;
9818 :
9819 8 : bs->total_clusters = total_clusters;
9820 16 : bs->total_data_clusters = bs->total_clusters - spdk_divide_round_up(
9821 8 : bs->md_start + bs->md_len, bs->pages_per_cluster);
9822 :
9823 8 : bs->num_free_clusters = spdk_bit_pool_count_free(bs->used_clusters);
9824 8 : assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
9825 8 : spdk_spin_unlock(&bs->used_lock);
9826 :
9827 8 : bs_grow_live_done(ctx, 0);
9828 : }
9829 :
9830 : static void
9831 32 : bs_grow_live_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9832 : {
9833 32 : struct spdk_bs_grow_ctx *ctx = cb_arg;
9834 : uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
9835 : int rc;
9836 :
9837 32 : if (bserrno != 0) {
9838 0 : bs_grow_live_done(ctx, bserrno);
9839 0 : return;
9840 : }
9841 :
9842 32 : rc = bs_super_validate(ctx->super, ctx->bs);
9843 32 : if (rc != 0) {
9844 4 : bs_grow_live_done(ctx, rc);
9845 4 : return;
9846 : }
9847 :
9848 28 : dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
9849 28 : total_clusters = dev_size / ctx->super->cluster_size;
9850 28 : used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
9851 28 : spdk_divide_round_up(total_clusters, 8),
9852 : SPDK_BS_PAGE_SIZE);
9853 28 : max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
9854 : /* Only checking dev_size. Since it can change, but total_clusters remain the same. */
9855 28 : if (dev_size == ctx->super->size) {
9856 16 : SPDK_DEBUGLOG(blob, "No need to grow blobstore\n");
9857 16 : bs_grow_live_done(ctx, 0);
9858 16 : return;
9859 : }
9860 : /*
9861 : * Blobstore cannot be shrunk, so check before if:
9862 : * - new size of the device is smaller than size in super_block
9863 : * - new total number of clusters is smaller than used_clusters bit_pool
9864 : * - there is enough space in metadata for used_cluster_mask to be written out
9865 : */
9866 12 : if (dev_size < ctx->super->size ||
9867 12 : total_clusters < spdk_bit_pool_capacity(ctx->bs->used_clusters) ||
9868 : used_cluster_mask_len > max_used_cluster_mask) {
9869 4 : SPDK_DEBUGLOG(blob, "No space to grow blobstore\n");
9870 4 : bs_grow_live_done(ctx, -ENOSPC);
9871 4 : return;
9872 : }
9873 :
9874 8 : SPDK_DEBUGLOG(blob, "Resizing blobstore\n");
9875 :
9876 8 : ctx->new_used_clusters_mask = calloc(1, total_clusters);
9877 8 : if (!ctx->new_used_clusters_mask) {
9878 0 : bs_grow_live_done(ctx, -ENOMEM);
9879 0 : return;
9880 : }
9881 8 : ctx->new_used_clusters = spdk_bit_pool_create(total_clusters);
9882 8 : if (!ctx->new_used_clusters) {
9883 0 : bs_grow_live_done(ctx, -ENOMEM);
9884 0 : return;
9885 : }
9886 :
9887 8 : ctx->super->clean = 0;
9888 8 : ctx->super->size = dev_size;
9889 8 : ctx->super->used_cluster_mask_len = used_cluster_mask_len;
9890 8 : bs_write_super(seq, ctx->bs, ctx->super, bs_grow_live_super_write_cpl, ctx);
9891 : }
9892 :
9893 : void
9894 32 : spdk_bs_grow_live(struct spdk_blob_store *bs,
9895 : spdk_bs_op_complete cb_fn, void *cb_arg)
9896 : {
9897 32 : struct spdk_bs_cpl cpl;
9898 : struct spdk_bs_grow_ctx *ctx;
9899 :
9900 32 : assert(spdk_get_thread() == bs->md_thread);
9901 :
9902 32 : SPDK_DEBUGLOG(blob, "Growing blobstore on dev %p\n", bs->dev);
9903 :
9904 32 : cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
9905 32 : cpl.u.bs_basic.cb_fn = cb_fn;
9906 32 : cpl.u.bs_basic.cb_arg = cb_arg;
9907 :
9908 32 : ctx = calloc(1, sizeof(struct spdk_bs_grow_ctx));
9909 32 : if (!ctx) {
9910 0 : cb_fn(cb_arg, -ENOMEM);
9911 0 : return;
9912 : }
9913 32 : ctx->bs = bs;
9914 :
9915 32 : ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
9916 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
9917 32 : if (!ctx->super) {
9918 0 : free(ctx);
9919 0 : cb_fn(cb_arg, -ENOMEM);
9920 0 : return;
9921 : }
9922 :
9923 32 : ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
9924 32 : if (!ctx->seq) {
9925 0 : spdk_free(ctx->super);
9926 0 : free(ctx);
9927 0 : cb_fn(cb_arg, -ENOMEM);
9928 0 : return;
9929 : }
9930 :
9931 : /* Read the super block */
9932 32 : bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
9933 32 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
9934 : bs_grow_live_load_super_cpl, ctx);
9935 : }
9936 :
9937 : void
9938 4 : spdk_bs_grow(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
9939 : spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
9940 : {
9941 4 : struct spdk_blob_store *bs;
9942 4 : struct spdk_bs_cpl cpl;
9943 4 : struct spdk_bs_load_ctx *ctx;
9944 4 : struct spdk_bs_opts opts = {};
9945 : int err;
9946 :
9947 4 : SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
9948 :
9949 4 : if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
9950 0 : SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
9951 0 : dev->destroy(dev);
9952 0 : cb_fn(cb_arg, NULL, -EINVAL);
9953 0 : return;
9954 : }
9955 :
9956 4 : spdk_bs_opts_init(&opts, sizeof(opts));
9957 4 : if (o) {
9958 4 : if (bs_opts_copy(o, &opts)) {
9959 0 : return;
9960 : }
9961 : }
9962 :
9963 4 : if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
9964 0 : dev->destroy(dev);
9965 0 : cb_fn(cb_arg, NULL, -EINVAL);
9966 0 : return;
9967 : }
9968 :
9969 4 : err = bs_alloc(dev, &opts, &bs, &ctx);
9970 4 : if (err) {
9971 0 : dev->destroy(dev);
9972 0 : cb_fn(cb_arg, NULL, err);
9973 0 : return;
9974 : }
9975 :
9976 4 : cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
9977 4 : cpl.u.bs_handle.cb_fn = cb_fn;
9978 4 : cpl.u.bs_handle.cb_arg = cb_arg;
9979 4 : cpl.u.bs_handle.bs = bs;
9980 :
9981 4 : ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
9982 4 : if (!ctx->seq) {
9983 0 : spdk_free(ctx->super);
9984 0 : free(ctx);
9985 0 : bs_free(bs);
9986 0 : cb_fn(cb_arg, NULL, -ENOMEM);
9987 0 : return;
9988 : }
9989 :
9990 : /* Read the super block */
9991 4 : bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
9992 4 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
9993 : bs_grow_load_super_cpl, ctx);
9994 : }
9995 :
9996 : int
9997 24 : spdk_blob_get_esnap_id(struct spdk_blob *blob, const void **id, size_t *len)
9998 : {
9999 24 : if (!blob_is_esnap_clone(blob)) {
10000 12 : return -EINVAL;
10001 : }
10002 :
10003 12 : return blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, id, len, true);
10004 : }
10005 :
10006 : struct spdk_io_channel *
10007 8840 : blob_esnap_get_io_channel(struct spdk_io_channel *ch, struct spdk_blob *blob)
10008 : {
10009 8840 : struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(ch);
10010 8840 : struct spdk_bs_dev *bs_dev = blob->back_bs_dev;
10011 8840 : struct blob_esnap_channel find = {};
10012 : struct blob_esnap_channel *esnap_channel, *existing;
10013 :
10014 8840 : find.blob_id = blob->id;
10015 8840 : esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
10016 8840 : if (spdk_likely(esnap_channel != NULL)) {
10017 8796 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": using cached channel on thread %s\n",
10018 : blob->id, spdk_thread_get_name(spdk_get_thread()));
10019 8796 : return esnap_channel->channel;
10020 : }
10021 :
10022 44 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": allocating channel on thread %s\n",
10023 : blob->id, spdk_thread_get_name(spdk_get_thread()));
10024 :
10025 44 : esnap_channel = calloc(1, sizeof(*esnap_channel));
10026 44 : if (esnap_channel == NULL) {
10027 0 : SPDK_NOTICELOG("blob 0x%" PRIx64 " channel allocation failed: no memory\n",
10028 : find.blob_id);
10029 0 : return NULL;
10030 : }
10031 44 : esnap_channel->channel = bs_dev->create_channel(bs_dev);
10032 44 : if (esnap_channel->channel == NULL) {
10033 0 : SPDK_NOTICELOG("blob 0x%" PRIx64 " back channel allocation failed\n", blob->id);
10034 0 : free(esnap_channel);
10035 0 : return NULL;
10036 : }
10037 44 : esnap_channel->blob_id = find.blob_id;
10038 44 : existing = RB_INSERT(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
10039 44 : if (spdk_unlikely(existing != NULL)) {
10040 : /*
10041 : * This should be unreachable: all modifications to this tree happen on this thread.
10042 : */
10043 0 : SPDK_ERRLOG("blob 0x%" PRIx64 "lost race to allocate a channel\n", find.blob_id);
10044 0 : assert(false);
10045 :
10046 : bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
10047 : free(esnap_channel);
10048 :
10049 : return existing->channel;
10050 : }
10051 :
10052 44 : return esnap_channel->channel;
10053 : }
10054 :
10055 : static int
10056 8816 : blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2)
10057 : {
10058 8816 : return (c1->blob_id < c2->blob_id ? -1 : c1->blob_id > c2->blob_id);
10059 : }
10060 :
10061 : struct blob_esnap_destroy_ctx {
10062 : spdk_blob_op_with_handle_complete cb_fn;
10063 : void *cb_arg;
10064 : struct spdk_blob *blob;
10065 : struct spdk_bs_dev *back_bs_dev;
10066 : bool abort_io;
10067 : };
10068 :
10069 : static void
10070 136 : blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status)
10071 : {
10072 136 : struct blob_esnap_destroy_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
10073 136 : struct spdk_blob *blob = ctx->blob;
10074 136 : struct spdk_blob_store *bs = blob->bs;
10075 :
10076 136 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n",
10077 : blob->id);
10078 :
10079 136 : if (ctx->cb_fn != NULL) {
10080 124 : ctx->cb_fn(ctx->cb_arg, blob, status);
10081 : }
10082 136 : free(ctx);
10083 :
10084 136 : bs->esnap_channels_unloading--;
10085 136 : if (bs->esnap_channels_unloading == 0 && bs->esnap_unload_cb_fn != NULL) {
10086 4 : spdk_bs_unload(bs, bs->esnap_unload_cb_fn, bs->esnap_unload_cb_arg);
10087 : }
10088 136 : }
10089 :
10090 : static void
10091 144 : blob_esnap_destroy_one_channel(struct spdk_io_channel_iter *i)
10092 : {
10093 144 : struct blob_esnap_destroy_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
10094 144 : struct spdk_blob *blob = ctx->blob;
10095 144 : struct spdk_bs_dev *bs_dev = ctx->back_bs_dev;
10096 144 : struct spdk_io_channel *channel = spdk_io_channel_iter_get_channel(i);
10097 144 : struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(channel);
10098 : struct blob_esnap_channel *esnap_channel;
10099 144 : struct blob_esnap_channel find = {};
10100 :
10101 144 : assert(spdk_get_thread() == spdk_io_channel_get_thread(channel));
10102 :
10103 144 : find.blob_id = blob->id;
10104 144 : esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
10105 144 : if (esnap_channel != NULL) {
10106 12 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channel on thread %s\n",
10107 : blob->id, spdk_thread_get_name(spdk_get_thread()));
10108 12 : RB_REMOVE(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
10109 :
10110 12 : if (ctx->abort_io) {
10111 : spdk_bs_user_op_t *op, *tmp;
10112 :
10113 8 : TAILQ_FOREACH_SAFE(op, &bs_channel->queued_io, link, tmp) {
10114 0 : if (op->back_channel == esnap_channel->channel) {
10115 0 : TAILQ_REMOVE(&bs_channel->queued_io, op, link);
10116 0 : bs_user_op_abort(op, -EIO);
10117 : }
10118 : }
10119 : }
10120 :
10121 12 : bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
10122 12 : free(esnap_channel);
10123 : }
10124 :
10125 144 : spdk_for_each_channel_continue(i, 0);
10126 144 : }
10127 :
10128 : /*
10129 : * Destroy the channels for a specific blob on each thread with a blobstore channel. This should be
10130 : * used when closing an esnap clone blob and after decoupling from the parent.
10131 : */
10132 : static void
10133 480 : blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
10134 : spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
10135 : {
10136 : struct blob_esnap_destroy_ctx *ctx;
10137 :
10138 480 : if (!blob_is_esnap_clone(blob) || blob->back_bs_dev == NULL) {
10139 344 : if (cb_fn != NULL) {
10140 344 : cb_fn(cb_arg, blob, 0);
10141 : }
10142 344 : return;
10143 : }
10144 :
10145 136 : ctx = calloc(1, sizeof(*ctx));
10146 136 : if (ctx == NULL) {
10147 0 : if (cb_fn != NULL) {
10148 0 : cb_fn(cb_arg, blob, -ENOMEM);
10149 : }
10150 0 : return;
10151 : }
10152 136 : ctx->cb_fn = cb_fn;
10153 136 : ctx->cb_arg = cb_arg;
10154 136 : ctx->blob = blob;
10155 136 : ctx->back_bs_dev = blob->back_bs_dev;
10156 136 : ctx->abort_io = abort_io;
10157 :
10158 136 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channels for this blob\n",
10159 : blob->id);
10160 :
10161 136 : blob->bs->esnap_channels_unloading++;
10162 136 : spdk_for_each_channel(blob->bs, blob_esnap_destroy_one_channel, ctx,
10163 : blob_esnap_destroy_channels_done);
10164 : }
10165 :
10166 : /*
10167 : * Destroy all bs_dev channels on a specific blobstore channel. This should be used when a
10168 : * bs_channel is destroyed.
10169 : */
10170 : static void
10171 1025 : blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch)
10172 : {
10173 : struct blob_esnap_channel *esnap_channel, *esnap_channel_tmp;
10174 :
10175 1025 : assert(spdk_get_thread() == spdk_io_channel_get_thread(spdk_io_channel_from_ctx(ch)));
10176 :
10177 1025 : SPDK_DEBUGLOG(blob_esnap, "destroying channels on thread %s\n",
10178 : spdk_thread_get_name(spdk_get_thread()));
10179 1057 : RB_FOREACH_SAFE(esnap_channel, blob_esnap_channel_tree, &ch->esnap_channels,
10180 : esnap_channel_tmp) {
10181 32 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64
10182 : ": destroying one channel in thread %s\n",
10183 : esnap_channel->blob_id, spdk_thread_get_name(spdk_get_thread()));
10184 32 : RB_REMOVE(blob_esnap_channel_tree, &ch->esnap_channels, esnap_channel);
10185 32 : spdk_put_io_channel(esnap_channel->channel);
10186 32 : free(esnap_channel);
10187 : }
10188 1025 : SPDK_DEBUGLOG(blob_esnap, "done destroying channels on thread %s\n",
10189 : spdk_thread_get_name(spdk_get_thread()));
10190 1025 : }
10191 :
10192 : static void
10193 28 : blob_set_back_bs_dev_done(void *_ctx, int bserrno)
10194 : {
10195 28 : struct set_bs_dev_ctx *ctx = _ctx;
10196 :
10197 28 : if (bserrno != 0) {
10198 : /* Even though the unfreeze failed, the update may have succeed. */
10199 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": unfreeze failed with error %d\n", ctx->blob->id,
10200 : bserrno);
10201 : }
10202 28 : ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
10203 28 : free(ctx);
10204 28 : }
10205 :
10206 : static void
10207 28 : blob_frozen_set_back_bs_dev(void *_ctx, struct spdk_blob *blob, int bserrno)
10208 : {
10209 28 : struct set_bs_dev_ctx *ctx = _ctx;
10210 : int rc;
10211 :
10212 28 : if (bserrno != 0) {
10213 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to release old back_bs_dev with error %d\n",
10214 : blob->id, bserrno);
10215 0 : ctx->bserrno = bserrno;
10216 0 : blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
10217 0 : return;
10218 : }
10219 :
10220 28 : if (blob->back_bs_dev != NULL) {
10221 28 : blob->back_bs_dev->destroy(blob->back_bs_dev);
10222 28 : blob->back_bs_dev = NULL;
10223 : }
10224 :
10225 28 : if (ctx->parent_refs_cb_fn) {
10226 20 : rc = ctx->parent_refs_cb_fn(blob, ctx->parent_refs_cb_arg);
10227 20 : if (rc != 0) {
10228 0 : ctx->bserrno = rc;
10229 0 : blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
10230 0 : return;
10231 : }
10232 : }
10233 :
10234 28 : SPDK_NOTICELOG("blob 0x%" PRIx64 ": hotplugged back_bs_dev\n", blob->id);
10235 28 : blob->back_bs_dev = ctx->back_bs_dev;
10236 28 : ctx->bserrno = 0;
10237 :
10238 28 : blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
10239 : }
10240 :
10241 : static void
10242 28 : blob_set_back_bs_dev_frozen(void *_ctx, int bserrno)
10243 : {
10244 28 : struct set_bs_dev_ctx *ctx = _ctx;
10245 28 : struct spdk_blob *blob = ctx->blob;
10246 :
10247 28 : if (bserrno != 0) {
10248 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to freeze with error %d\n", blob->id,
10249 : bserrno);
10250 0 : ctx->cb_fn(ctx->cb_arg, bserrno);
10251 0 : free(ctx);
10252 0 : return;
10253 : }
10254 :
10255 : /*
10256 : * This does not prevent future reads from the esnap device because any future IO will
10257 : * lazily create a new esnap IO channel.
10258 : */
10259 28 : blob_esnap_destroy_bs_dev_channels(blob, true, blob_frozen_set_back_bs_dev, ctx);
10260 : }
10261 :
10262 : void
10263 8 : spdk_blob_set_esnap_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
10264 : spdk_blob_op_complete cb_fn, void *cb_arg)
10265 : {
10266 8 : if (!blob_is_esnap_clone(blob)) {
10267 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
10268 0 : cb_fn(cb_arg, -EINVAL);
10269 0 : return;
10270 : }
10271 :
10272 8 : blob_set_back_bs_dev(blob, back_bs_dev, NULL, NULL, cb_fn, cb_arg);
10273 : }
10274 :
10275 : struct spdk_bs_dev *
10276 4 : spdk_blob_get_esnap_bs_dev(const struct spdk_blob *blob)
10277 : {
10278 4 : if (!blob_is_esnap_clone(blob)) {
10279 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
10280 0 : return NULL;
10281 : }
10282 :
10283 4 : return blob->back_bs_dev;
10284 : }
10285 :
10286 : bool
10287 28 : spdk_blob_is_degraded(const struct spdk_blob *blob)
10288 : {
10289 28 : if (blob->bs->dev->is_degraded != NULL && blob->bs->dev->is_degraded(blob->bs->dev)) {
10290 4 : return true;
10291 : }
10292 24 : if (blob->back_bs_dev == NULL || blob->back_bs_dev->is_degraded == NULL) {
10293 12 : return false;
10294 : }
10295 :
10296 12 : return blob->back_bs_dev->is_degraded(blob->back_bs_dev);
10297 : }
10298 :
10299 3 : SPDK_LOG_REGISTER_COMPONENT(blob)
10300 3 : SPDK_LOG_REGISTER_COMPONENT(blob_esnap)
|