Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2017 Intel Corporation.
3 : * All rights reserved.
4 : * Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 : */
6 :
7 : #include "spdk/stdinc.h"
8 :
9 : #include "spdk/blob.h"
10 : #include "spdk/crc32.h"
11 : #include "spdk/env.h"
12 : #include "spdk/queue.h"
13 : #include "spdk/thread.h"
14 : #include "spdk/bit_array.h"
15 : #include "spdk/bit_pool.h"
16 : #include "spdk/likely.h"
17 : #include "spdk/util.h"
18 : #include "spdk/string.h"
19 :
20 : #include "spdk_internal/assert.h"
21 : #include "spdk/log.h"
22 :
23 : #include "blobstore.h"
24 :
25 : #define BLOB_CRC32C_INITIAL 0xffffffffUL
26 :
27 : static int bs_register_md_thread(struct spdk_blob_store *bs);
28 : static int bs_unregister_md_thread(struct spdk_blob_store *bs);
29 : static void blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
30 : static void blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
31 : uint64_t cluster, uint32_t extent, struct spdk_blob_md_page *page,
32 : spdk_blob_op_complete cb_fn, void *cb_arg);
33 : static void blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
34 : uint32_t extent_page, struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
35 :
36 : static int blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
37 : uint16_t value_len, bool internal);
38 : static int blob_get_xattr_value(struct spdk_blob *blob, const char *name,
39 : const void **value, size_t *value_len, bool internal);
40 : static int blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal);
41 :
42 : static void blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
43 : struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
44 : static void blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg);
45 :
46 : static void bs_shallow_copy_cluster_find_next(void *cb_arg);
47 :
48 : /*
49 : * External snapshots require a channel per thread per esnap bdev. The tree
50 : * is populated lazily as blob IOs are handled by the back_bs_dev. When this
51 : * channel is destroyed, all the channels in the tree are destroyed.
52 : */
53 :
54 : struct blob_esnap_channel {
55 : RB_ENTRY(blob_esnap_channel) node;
56 : spdk_blob_id blob_id;
57 : struct spdk_io_channel *channel;
58 : };
59 :
60 : static int blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2);
61 : static void blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
62 : spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
63 : static void blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch);
64 : static void blob_set_back_bs_dev_frozen(void *_ctx, int bserrno);
65 10245 : RB_GENERATE_STATIC(blob_esnap_channel_tree, blob_esnap_channel, node, blob_esnap_channel_compare)
66 :
67 : static inline bool
68 49582 : blob_is_esnap_clone(const struct spdk_blob *blob)
69 : {
70 49582 : assert(blob != NULL);
71 49582 : return !!(blob->invalid_flags & SPDK_BLOB_EXTERNAL_SNAPSHOT);
72 : }
73 :
74 : static int
75 2289 : blob_id_cmp(struct spdk_blob *blob1, struct spdk_blob *blob2)
76 : {
77 2289 : assert(blob1 != NULL && blob2 != NULL);
78 2289 : return (blob1->id < blob2->id ? -1 : blob1->id > blob2->id);
79 : }
80 :
81 14741 : RB_GENERATE_STATIC(spdk_blob_tree, spdk_blob, link, blob_id_cmp);
82 :
83 : static void
84 37031 : blob_verify_md_op(struct spdk_blob *blob)
85 : {
86 37031 : assert(blob != NULL);
87 37031 : assert(spdk_get_thread() == blob->bs->md_thread);
88 37031 : assert(blob->state != SPDK_BLOB_STATE_LOADING);
89 37031 : }
90 :
91 : static struct spdk_blob_list *
92 3828 : bs_get_snapshot_entry(struct spdk_blob_store *bs, spdk_blob_id blobid)
93 : {
94 3828 : struct spdk_blob_list *snapshot_entry = NULL;
95 :
96 4808 : TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
97 1756 : if (snapshot_entry->id == blobid) {
98 776 : break;
99 : }
100 : }
101 :
102 3828 : return snapshot_entry;
103 : }
104 :
105 : static void
106 2904 : bs_claim_md_page(struct spdk_blob_store *bs, uint32_t page)
107 : {
108 2904 : assert(spdk_spin_held(&bs->used_lock));
109 2904 : assert(page < spdk_bit_array_capacity(bs->used_md_pages));
110 2904 : assert(spdk_bit_array_get(bs->used_md_pages, page) == false);
111 :
112 2904 : spdk_bit_array_set(bs->used_md_pages, page);
113 2904 : }
114 :
115 : static void
116 2200 : bs_release_md_page(struct spdk_blob_store *bs, uint32_t page)
117 : {
118 2200 : assert(spdk_spin_held(&bs->used_lock));
119 2200 : assert(page < spdk_bit_array_capacity(bs->used_md_pages));
120 2200 : assert(spdk_bit_array_get(bs->used_md_pages, page) == true);
121 :
122 2200 : spdk_bit_array_clear(bs->used_md_pages, page);
123 2200 : }
124 :
125 : static uint32_t
126 8220 : bs_claim_cluster(struct spdk_blob_store *bs)
127 : {
128 : uint32_t cluster_num;
129 :
130 8220 : assert(spdk_spin_held(&bs->used_lock));
131 :
132 8220 : cluster_num = spdk_bit_pool_allocate_bit(bs->used_clusters);
133 8220 : if (cluster_num == UINT32_MAX) {
134 0 : return UINT32_MAX;
135 : }
136 :
137 8220 : SPDK_DEBUGLOG(blob, "Claiming cluster %u\n", cluster_num);
138 8220 : bs->num_free_clusters--;
139 :
140 8220 : return cluster_num;
141 : }
142 :
143 : static void
144 2399 : bs_release_cluster(struct spdk_blob_store *bs, uint32_t cluster_num)
145 : {
146 2399 : assert(spdk_spin_held(&bs->used_lock));
147 2399 : assert(cluster_num < spdk_bit_pool_capacity(bs->used_clusters));
148 2399 : assert(spdk_bit_pool_is_allocated(bs->used_clusters, cluster_num) == true);
149 2399 : assert(bs->num_free_clusters < bs->total_clusters);
150 :
151 2399 : SPDK_DEBUGLOG(blob, "Releasing cluster %u\n", cluster_num);
152 :
153 2399 : spdk_bit_pool_free_bit(bs->used_clusters, cluster_num);
154 2399 : bs->num_free_clusters++;
155 2399 : }
156 :
157 : static int
158 8220 : blob_insert_cluster(struct spdk_blob *blob, uint32_t cluster_num, uint64_t cluster)
159 : {
160 8220 : uint64_t *cluster_lba = &blob->active.clusters[cluster_num];
161 :
162 8220 : blob_verify_md_op(blob);
163 :
164 8220 : if (*cluster_lba != 0) {
165 4 : return -EEXIST;
166 : }
167 :
168 8216 : *cluster_lba = bs_cluster_to_lba(blob->bs, cluster);
169 8216 : blob->active.num_allocated_clusters++;
170 :
171 8216 : return 0;
172 : }
173 :
174 : static int
175 8220 : bs_allocate_cluster(struct spdk_blob *blob, uint32_t cluster_num,
176 : uint64_t *cluster, uint32_t *lowest_free_md_page, bool update_map)
177 : {
178 8220 : uint32_t *extent_page = 0;
179 :
180 8220 : assert(spdk_spin_held(&blob->bs->used_lock));
181 :
182 8220 : *cluster = bs_claim_cluster(blob->bs);
183 8220 : if (*cluster == UINT32_MAX) {
184 : /* No more free clusters. Cannot satisfy the request */
185 0 : return -ENOSPC;
186 : }
187 :
188 8220 : if (blob->use_extent_table) {
189 4168 : extent_page = bs_cluster_to_extent_page(blob, cluster_num);
190 4168 : if (*extent_page == 0) {
191 : /* Extent page shall never occupy md_page so start the search from 1 */
192 728 : if (*lowest_free_md_page == 0) {
193 726 : *lowest_free_md_page = 1;
194 : }
195 : /* No extent_page is allocated for the cluster */
196 728 : *lowest_free_md_page = spdk_bit_array_find_first_clear(blob->bs->used_md_pages,
197 : *lowest_free_md_page);
198 728 : if (*lowest_free_md_page == UINT32_MAX) {
199 : /* No more free md pages. Cannot satisfy the request */
200 0 : bs_release_cluster(blob->bs, *cluster);
201 0 : return -ENOSPC;
202 : }
203 728 : bs_claim_md_page(blob->bs, *lowest_free_md_page);
204 : }
205 : }
206 :
207 8220 : SPDK_DEBUGLOG(blob, "Claiming cluster %" PRIu64 " for blob 0x%" PRIx64 "\n", *cluster,
208 : blob->id);
209 :
210 8220 : if (update_map) {
211 7404 : blob_insert_cluster(blob, cluster_num, *cluster);
212 7404 : if (blob->use_extent_table && *extent_page == 0) {
213 644 : *extent_page = *lowest_free_md_page;
214 : }
215 : }
216 :
217 8220 : return 0;
218 : }
219 :
220 : static void
221 5582 : blob_xattrs_init(struct spdk_blob_xattr_opts *xattrs)
222 : {
223 5582 : xattrs->count = 0;
224 5582 : xattrs->names = NULL;
225 5582 : xattrs->ctx = NULL;
226 5582 : xattrs->get_value = NULL;
227 5582 : }
228 :
229 : void
230 3688 : spdk_blob_opts_init(struct spdk_blob_opts *opts, size_t opts_size)
231 : {
232 3688 : if (!opts) {
233 0 : SPDK_ERRLOG("opts should not be NULL\n");
234 0 : return;
235 : }
236 :
237 3688 : if (!opts_size) {
238 0 : SPDK_ERRLOG("opts_size should not be zero value\n");
239 0 : return;
240 : }
241 :
242 3688 : memset(opts, 0, opts_size);
243 3688 : opts->opts_size = opts_size;
244 :
245 : #define FIELD_OK(field) \
246 : offsetof(struct spdk_blob_opts, field) + sizeof(opts->field) <= opts_size
247 :
248 : #define SET_FIELD(field, value) \
249 : if (FIELD_OK(field)) { \
250 : opts->field = value; \
251 : } \
252 :
253 3688 : SET_FIELD(num_clusters, 0);
254 3688 : SET_FIELD(thin_provision, false);
255 3688 : SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
256 :
257 3688 : if (FIELD_OK(xattrs)) {
258 3688 : blob_xattrs_init(&opts->xattrs);
259 : }
260 :
261 3688 : SET_FIELD(use_extent_table, true);
262 :
263 : #undef FIELD_OK
264 : #undef SET_FIELD
265 : }
266 :
267 : void
268 3478 : spdk_blob_open_opts_init(struct spdk_blob_open_opts *opts, size_t opts_size)
269 : {
270 3478 : if (!opts) {
271 0 : SPDK_ERRLOG("opts should not be NULL\n");
272 0 : return;
273 : }
274 :
275 3478 : if (!opts_size) {
276 0 : SPDK_ERRLOG("opts_size should not be zero value\n");
277 0 : return;
278 : }
279 :
280 3478 : memset(opts, 0, opts_size);
281 3478 : opts->opts_size = opts_size;
282 :
283 : #define FIELD_OK(field) \
284 : offsetof(struct spdk_blob_open_opts, field) + sizeof(opts->field) <= opts_size
285 :
286 : #define SET_FIELD(field, value) \
287 : if (FIELD_OK(field)) { \
288 : opts->field = value; \
289 : } \
290 :
291 3478 : SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
292 :
293 : #undef FIELD_OK
294 : #undef SET_FILED
295 : }
296 :
297 : static struct spdk_blob *
298 5368 : blob_alloc(struct spdk_blob_store *bs, spdk_blob_id id)
299 : {
300 : struct spdk_blob *blob;
301 :
302 5368 : blob = calloc(1, sizeof(*blob));
303 5368 : if (!blob) {
304 0 : return NULL;
305 : }
306 :
307 5368 : blob->id = id;
308 5368 : blob->bs = bs;
309 :
310 5368 : blob->parent_id = SPDK_BLOBID_INVALID;
311 :
312 5368 : blob->state = SPDK_BLOB_STATE_DIRTY;
313 5368 : blob->extent_rle_found = false;
314 5368 : blob->extent_table_found = false;
315 5368 : blob->active.num_pages = 1;
316 5368 : blob->active.pages = calloc(1, sizeof(*blob->active.pages));
317 5368 : if (!blob->active.pages) {
318 0 : free(blob);
319 0 : return NULL;
320 : }
321 :
322 5368 : blob->active.pages[0] = bs_blobid_to_page(id);
323 :
324 5368 : TAILQ_INIT(&blob->xattrs);
325 5368 : TAILQ_INIT(&blob->xattrs_internal);
326 5368 : TAILQ_INIT(&blob->pending_persists);
327 5368 : TAILQ_INIT(&blob->persists_to_complete);
328 :
329 5368 : return blob;
330 : }
331 :
332 : static void
333 10736 : xattrs_free(struct spdk_xattr_tailq *xattrs)
334 : {
335 : struct spdk_xattr *xattr, *xattr_tmp;
336 :
337 12502 : TAILQ_FOREACH_SAFE(xattr, xattrs, link, xattr_tmp) {
338 1766 : TAILQ_REMOVE(xattrs, xattr, link);
339 1766 : free(xattr->name);
340 1766 : free(xattr->value);
341 1766 : free(xattr);
342 : }
343 10736 : }
344 :
345 : static void
346 1116 : blob_back_bs_dev_unref(struct spdk_blob *blob)
347 : {
348 1116 : struct spdk_blob **le_prev = blob->back_bs_dev_link.le_prev;
349 1116 : struct spdk_blob *le_next = blob->back_bs_dev_link.le_next;
350 :
351 1116 : if (!le_next && !le_prev) {
352 : /* If this is the last reference to the back_bs_dev, destroy it. */
353 1112 : blob->back_bs_dev->destroy(blob->back_bs_dev);
354 : } else {
355 : /* Remove the reference to back_bs_dev. */
356 4 : if (le_prev) {
357 0 : *le_prev = le_next;
358 : }
359 :
360 4 : if (le_next) {
361 4 : le_next->back_bs_dev_link.le_prev = le_prev;
362 : }
363 : }
364 :
365 1116 : blob->back_bs_dev = NULL;
366 1116 : }
367 :
368 : static void
369 5368 : blob_free(struct spdk_blob *blob)
370 : {
371 5368 : assert(blob != NULL);
372 5368 : assert(TAILQ_EMPTY(&blob->pending_persists));
373 5368 : assert(TAILQ_EMPTY(&blob->persists_to_complete));
374 :
375 5368 : free(blob->active.extent_pages);
376 5368 : free(blob->clean.extent_pages);
377 5368 : free(blob->active.clusters);
378 5368 : free(blob->clean.clusters);
379 5368 : free(blob->active.pages);
380 5368 : free(blob->clean.pages);
381 :
382 5368 : xattrs_free(&blob->xattrs);
383 5368 : xattrs_free(&blob->xattrs_internal);
384 :
385 5368 : if (blob->back_bs_dev) {
386 1088 : blob_back_bs_dev_unref(blob);
387 : }
388 :
389 5368 : free(blob);
390 5368 : }
391 :
392 : static void
393 328 : blob_back_bs_destroy_esnap_done(void *ctx, struct spdk_blob *blob, int bserrno)
394 : {
395 328 : struct spdk_bs_dev *bs_dev = ctx;
396 :
397 328 : if (bserrno != 0) {
398 : /*
399 : * This is probably due to a memory allocation failure when creating the
400 : * blob_esnap_destroy_ctx before iterating threads.
401 : */
402 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": Unable to destroy bs dev channels: error %d\n",
403 : blob->id, bserrno);
404 0 : assert(false);
405 : }
406 :
407 328 : if (bs_dev == NULL) {
408 : /*
409 : * This check exists to make scanbuild happy.
410 : *
411 : * blob->back_bs_dev for an esnap is NULL during the first iteration of blobs while
412 : * the blobstore is being loaded. It could also be NULL if there was an error
413 : * opening the esnap device. In each of these cases, no channels could have been
414 : * created because back_bs_dev->create_channel() would have led to a NULL pointer
415 : * deref.
416 : */
417 0 : assert(false);
418 : return;
419 : }
420 :
421 328 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": calling destroy on back_bs_dev\n", blob->id);
422 328 : bs_dev->destroy(bs_dev);
423 : }
424 :
425 : static void
426 328 : blob_back_bs_destroy(struct spdk_blob *blob)
427 : {
428 328 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": preparing to destroy back_bs_dev\n",
429 : blob->id);
430 :
431 328 : blob_esnap_destroy_bs_dev_channels(blob, false, blob_back_bs_destroy_esnap_done,
432 328 : blob->back_bs_dev);
433 328 : blob->back_bs_dev = NULL;
434 328 : }
435 :
436 : struct blob_parent {
437 : union {
438 : struct {
439 : spdk_blob_id id;
440 : struct spdk_blob *blob;
441 : } snapshot;
442 :
443 : struct {
444 : void *id;
445 : uint32_t id_len;
446 : struct spdk_bs_dev *back_bs_dev;
447 : } esnap;
448 : } u;
449 : };
450 :
451 : typedef int (*set_parent_refs_cb)(struct spdk_blob *blob, struct blob_parent *parent);
452 :
453 : struct set_bs_dev_ctx {
454 : struct spdk_blob *blob;
455 : struct spdk_bs_dev *back_bs_dev;
456 :
457 : /*
458 : * This callback is used during a set parent operation to change the references
459 : * to the parent of the blob.
460 : */
461 : set_parent_refs_cb parent_refs_cb_fn;
462 : struct blob_parent *parent_refs_cb_arg;
463 :
464 : spdk_blob_op_complete cb_fn;
465 : void *cb_arg;
466 : int bserrno;
467 : };
468 :
469 : static void
470 28 : blob_set_back_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
471 : set_parent_refs_cb parent_refs_cb_fn, struct blob_parent *parent_refs_cb_arg,
472 : spdk_blob_op_complete cb_fn, void *cb_arg)
473 : {
474 : struct set_bs_dev_ctx *ctx;
475 :
476 28 : ctx = calloc(1, sizeof(*ctx));
477 28 : if (ctx == NULL) {
478 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": out of memory while setting back_bs_dev\n",
479 : blob->id);
480 0 : cb_fn(cb_arg, -ENOMEM);
481 0 : return;
482 : }
483 :
484 28 : ctx->parent_refs_cb_fn = parent_refs_cb_fn;
485 28 : ctx->parent_refs_cb_arg = parent_refs_cb_arg;
486 28 : ctx->cb_fn = cb_fn;
487 28 : ctx->cb_arg = cb_arg;
488 28 : ctx->back_bs_dev = back_bs_dev;
489 28 : ctx->blob = blob;
490 :
491 28 : blob_freeze_io(blob, blob_set_back_bs_dev_frozen, ctx);
492 : }
493 :
494 : struct freeze_io_ctx {
495 : struct spdk_bs_cpl cpl;
496 : struct spdk_blob *blob;
497 : };
498 :
499 : static void
500 530 : blob_io_sync(struct spdk_io_channel_iter *i)
501 : {
502 530 : spdk_for_each_channel_continue(i, 0);
503 530 : }
504 :
505 : static void
506 518 : blob_execute_queued_io(struct spdk_io_channel_iter *i)
507 : {
508 518 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
509 518 : struct spdk_bs_channel *ch = spdk_io_channel_get_ctx(_ch);
510 518 : struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
511 : struct spdk_bs_request_set *set;
512 : struct spdk_bs_user_op_args *args;
513 : spdk_bs_user_op_t *op, *tmp;
514 :
515 522 : TAILQ_FOREACH_SAFE(op, &ch->queued_io, link, tmp) {
516 4 : set = (struct spdk_bs_request_set *)op;
517 4 : args = &set->u.user_op;
518 :
519 4 : if (args->blob == ctx->blob) {
520 4 : TAILQ_REMOVE(&ch->queued_io, op, link);
521 4 : bs_user_op_execute(op);
522 : }
523 : }
524 :
525 518 : spdk_for_each_channel_continue(i, 0);
526 518 : }
527 :
528 : static void
529 1016 : blob_io_cpl(struct spdk_io_channel_iter *i, int status)
530 : {
531 1016 : struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
532 :
533 1016 : ctx->cpl.u.blob_basic.cb_fn(ctx->cpl.u.blob_basic.cb_arg, 0);
534 :
535 1016 : free(ctx);
536 1016 : }
537 :
538 : static void
539 514 : blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
540 : {
541 : struct freeze_io_ctx *ctx;
542 :
543 514 : blob_verify_md_op(blob);
544 :
545 514 : ctx = calloc(1, sizeof(*ctx));
546 514 : if (!ctx) {
547 0 : cb_fn(cb_arg, -ENOMEM);
548 0 : return;
549 : }
550 :
551 514 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
552 514 : ctx->cpl.u.blob_basic.cb_fn = cb_fn;
553 514 : ctx->cpl.u.blob_basic.cb_arg = cb_arg;
554 514 : ctx->blob = blob;
555 :
556 : /* Freeze I/O on blob */
557 514 : blob->frozen_refcnt++;
558 :
559 514 : spdk_for_each_channel(blob->bs, blob_io_sync, ctx, blob_io_cpl);
560 : }
561 :
562 : static void
563 502 : blob_unfreeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
564 : {
565 : struct freeze_io_ctx *ctx;
566 :
567 502 : blob_verify_md_op(blob);
568 :
569 502 : ctx = calloc(1, sizeof(*ctx));
570 502 : if (!ctx) {
571 0 : cb_fn(cb_arg, -ENOMEM);
572 0 : return;
573 : }
574 :
575 502 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
576 502 : ctx->cpl.u.blob_basic.cb_fn = cb_fn;
577 502 : ctx->cpl.u.blob_basic.cb_arg = cb_arg;
578 502 : ctx->blob = blob;
579 :
580 502 : assert(blob->frozen_refcnt > 0);
581 :
582 502 : blob->frozen_refcnt--;
583 :
584 502 : spdk_for_each_channel(blob->bs, blob_execute_queued_io, ctx, blob_io_cpl);
585 : }
586 :
587 : static int
588 8474 : blob_mark_clean(struct spdk_blob *blob)
589 : {
590 8474 : uint32_t *extent_pages = NULL;
591 8474 : uint64_t *clusters = NULL;
592 8474 : uint32_t *pages = NULL;
593 :
594 8474 : assert(blob != NULL);
595 :
596 8474 : if (blob->active.num_extent_pages) {
597 2859 : assert(blob->active.extent_pages);
598 2859 : extent_pages = calloc(blob->active.num_extent_pages, sizeof(*blob->active.extent_pages));
599 2859 : if (!extent_pages) {
600 0 : return -ENOMEM;
601 : }
602 2859 : memcpy(extent_pages, blob->active.extent_pages,
603 2859 : blob->active.num_extent_pages * sizeof(*extent_pages));
604 : }
605 :
606 8474 : if (blob->active.num_clusters) {
607 5946 : assert(blob->active.clusters);
608 5946 : clusters = calloc(blob->active.num_clusters, sizeof(*blob->active.clusters));
609 5946 : if (!clusters) {
610 0 : free(extent_pages);
611 0 : return -ENOMEM;
612 : }
613 5946 : memcpy(clusters, blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
614 : }
615 :
616 8474 : if (blob->active.num_pages) {
617 6986 : assert(blob->active.pages);
618 6986 : pages = calloc(blob->active.num_pages, sizeof(*blob->active.pages));
619 6986 : if (!pages) {
620 0 : free(extent_pages);
621 0 : free(clusters);
622 0 : return -ENOMEM;
623 : }
624 6986 : memcpy(pages, blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
625 : }
626 :
627 8474 : free(blob->clean.extent_pages);
628 8474 : free(blob->clean.clusters);
629 8474 : free(blob->clean.pages);
630 :
631 8474 : blob->clean.num_extent_pages = blob->active.num_extent_pages;
632 8474 : blob->clean.extent_pages = blob->active.extent_pages;
633 8474 : blob->clean.num_clusters = blob->active.num_clusters;
634 8474 : blob->clean.clusters = blob->active.clusters;
635 8474 : blob->clean.num_allocated_clusters = blob->active.num_allocated_clusters;
636 8474 : blob->clean.num_pages = blob->active.num_pages;
637 8474 : blob->clean.pages = blob->active.pages;
638 :
639 8474 : blob->active.extent_pages = extent_pages;
640 8474 : blob->active.clusters = clusters;
641 8474 : blob->active.pages = pages;
642 :
643 : /* If the metadata was dirtied again while the metadata was being written to disk,
644 : * we do not want to revert the DIRTY state back to CLEAN here.
645 : */
646 8474 : if (blob->state == SPDK_BLOB_STATE_LOADING) {
647 3410 : blob->state = SPDK_BLOB_STATE_CLEAN;
648 : }
649 :
650 8474 : return 0;
651 : }
652 :
653 : static int
654 1284 : blob_deserialize_xattr(struct spdk_blob *blob,
655 : struct spdk_blob_md_descriptor_xattr *desc_xattr, bool internal)
656 : {
657 : struct spdk_xattr *xattr;
658 :
659 1284 : if (desc_xattr->length != sizeof(desc_xattr->name_length) +
660 : sizeof(desc_xattr->value_length) +
661 1284 : desc_xattr->name_length + desc_xattr->value_length) {
662 0 : return -EINVAL;
663 : }
664 :
665 1284 : xattr = calloc(1, sizeof(*xattr));
666 1284 : if (xattr == NULL) {
667 0 : return -ENOMEM;
668 : }
669 :
670 1284 : xattr->name = malloc(desc_xattr->name_length + 1);
671 1284 : if (xattr->name == NULL) {
672 0 : free(xattr);
673 0 : return -ENOMEM;
674 : }
675 :
676 1284 : xattr->value = malloc(desc_xattr->value_length);
677 1284 : if (xattr->value == NULL) {
678 0 : free(xattr->name);
679 0 : free(xattr);
680 0 : return -ENOMEM;
681 : }
682 :
683 1284 : memcpy(xattr->name, desc_xattr->name, desc_xattr->name_length);
684 1284 : xattr->name[desc_xattr->name_length] = '\0';
685 1284 : xattr->value_len = desc_xattr->value_length;
686 1284 : memcpy(xattr->value,
687 1284 : (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
688 1284 : desc_xattr->value_length);
689 :
690 1284 : TAILQ_INSERT_TAIL(internal ? &blob->xattrs_internal : &blob->xattrs, xattr, link);
691 :
692 1284 : return 0;
693 : }
694 :
695 :
696 : static int
697 4588 : blob_parse_page(const struct spdk_blob_md_page *page, struct spdk_blob *blob)
698 : {
699 : struct spdk_blob_md_descriptor *desc;
700 4588 : size_t cur_desc = 0;
701 : void *tmp;
702 :
703 4588 : desc = (struct spdk_blob_md_descriptor *)page->descriptors;
704 13476 : while (cur_desc < sizeof(page->descriptors)) {
705 13476 : if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
706 4540 : if (desc->length == 0) {
707 : /* If padding and length are 0, this terminates the page */
708 4540 : break;
709 : }
710 8936 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
711 : struct spdk_blob_md_descriptor_flags *desc_flags;
712 :
713 3442 : desc_flags = (struct spdk_blob_md_descriptor_flags *)desc;
714 :
715 3442 : if (desc_flags->length != sizeof(*desc_flags) - sizeof(*desc)) {
716 0 : return -EINVAL;
717 : }
718 :
719 3442 : if ((desc_flags->invalid_flags | SPDK_BLOB_INVALID_FLAGS_MASK) !=
720 : SPDK_BLOB_INVALID_FLAGS_MASK) {
721 8 : return -EINVAL;
722 : }
723 :
724 3434 : if ((desc_flags->data_ro_flags | SPDK_BLOB_DATA_RO_FLAGS_MASK) !=
725 : SPDK_BLOB_DATA_RO_FLAGS_MASK) {
726 12 : blob->data_ro = true;
727 12 : blob->md_ro = true;
728 : }
729 :
730 3434 : if ((desc_flags->md_ro_flags | SPDK_BLOB_MD_RO_FLAGS_MASK) !=
731 : SPDK_BLOB_MD_RO_FLAGS_MASK) {
732 12 : blob->md_ro = true;
733 : }
734 :
735 3434 : if ((desc_flags->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
736 566 : blob->data_ro = true;
737 566 : blob->md_ro = true;
738 : }
739 :
740 3434 : blob->invalid_flags = desc_flags->invalid_flags;
741 3434 : blob->data_ro_flags = desc_flags->data_ro_flags;
742 3434 : blob->md_ro_flags = desc_flags->md_ro_flags;
743 :
744 5494 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
745 : struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
746 : unsigned int i, j;
747 1396 : unsigned int cluster_count = blob->active.num_clusters;
748 :
749 1396 : if (blob->extent_table_found) {
750 : /* Extent Table already present in the md,
751 : * both descriptors should never be at the same time. */
752 0 : return -EINVAL;
753 : }
754 1396 : blob->extent_rle_found = true;
755 :
756 1396 : desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
757 :
758 1396 : if (desc_extent_rle->length == 0 ||
759 1396 : (desc_extent_rle->length % sizeof(desc_extent_rle->extents[0]) != 0)) {
760 0 : return -EINVAL;
761 : }
762 :
763 2970 : for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
764 21282 : for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
765 19708 : if (desc_extent_rle->extents[i].cluster_idx != 0) {
766 6692 : if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters,
767 6692 : desc_extent_rle->extents[i].cluster_idx + j)) {
768 0 : return -EINVAL;
769 : }
770 : }
771 19708 : cluster_count++;
772 : }
773 : }
774 :
775 1396 : if (cluster_count == 0) {
776 0 : return -EINVAL;
777 : }
778 1396 : tmp = realloc(blob->active.clusters, cluster_count * sizeof(*blob->active.clusters));
779 1396 : if (tmp == NULL) {
780 0 : return -ENOMEM;
781 : }
782 1396 : blob->active.clusters = tmp;
783 1396 : blob->active.cluster_array_size = cluster_count;
784 :
785 2970 : for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
786 21282 : for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
787 19708 : if (desc_extent_rle->extents[i].cluster_idx != 0) {
788 13384 : blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
789 6692 : desc_extent_rle->extents[i].cluster_idx + j);
790 6692 : blob->active.num_allocated_clusters++;
791 13016 : } else if (spdk_blob_is_thin_provisioned(blob)) {
792 13016 : blob->active.clusters[blob->active.num_clusters++] = 0;
793 : } else {
794 0 : return -EINVAL;
795 : }
796 : }
797 : }
798 4098 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
799 : struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
800 1768 : uint32_t num_extent_pages = blob->active.num_extent_pages;
801 : uint32_t i, j;
802 : size_t extent_pages_length;
803 :
804 1768 : desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
805 1768 : extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
806 :
807 1768 : if (blob->extent_rle_found) {
808 : /* This means that Extent RLE is present in MD,
809 : * both should never be at the same time. */
810 0 : return -EINVAL;
811 1768 : } else if (blob->extent_table_found &&
812 0 : desc_extent_table->num_clusters != blob->remaining_clusters_in_et) {
813 : /* Number of clusters in this ET does not match number
814 : * from previously read EXTENT_TABLE. */
815 0 : return -EINVAL;
816 : }
817 :
818 1768 : if (desc_extent_table->length == 0 ||
819 1768 : (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
820 0 : return -EINVAL;
821 : }
822 :
823 1768 : blob->extent_table_found = true;
824 :
825 3246 : for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
826 1478 : num_extent_pages += desc_extent_table->extent_page[i].num_pages;
827 : }
828 :
829 1768 : if (num_extent_pages > 0) {
830 1462 : tmp = realloc(blob->active.extent_pages, num_extent_pages * sizeof(uint32_t));
831 1462 : if (tmp == NULL) {
832 0 : return -ENOMEM;
833 : }
834 1462 : blob->active.extent_pages = tmp;
835 : }
836 1768 : blob->active.extent_pages_array_size = num_extent_pages;
837 :
838 1768 : blob->remaining_clusters_in_et = desc_extent_table->num_clusters;
839 :
840 : /* Extent table entries contain md page numbers for extent pages.
841 : * Zeroes represent unallocated extent pages, those are run-length-encoded.
842 : */
843 3246 : for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
844 1478 : if (desc_extent_table->extent_page[i].page_idx != 0) {
845 1052 : assert(desc_extent_table->extent_page[i].num_pages == 1);
846 1052 : blob->active.extent_pages[blob->active.num_extent_pages++] =
847 1052 : desc_extent_table->extent_page[i].page_idx;
848 426 : } else if (spdk_blob_is_thin_provisioned(blob)) {
849 852 : for (j = 0; j < desc_extent_table->extent_page[i].num_pages; j++) {
850 426 : blob->active.extent_pages[blob->active.num_extent_pages++] = 0;
851 : }
852 : } else {
853 0 : return -EINVAL;
854 : }
855 : }
856 2330 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
857 : struct spdk_blob_md_descriptor_extent_page *desc_extent;
858 : unsigned int i;
859 1046 : unsigned int cluster_count = 0;
860 : size_t cluster_idx_length;
861 :
862 1046 : if (blob->extent_rle_found) {
863 : /* This means that Extent RLE is present in MD,
864 : * both should never be at the same time. */
865 0 : return -EINVAL;
866 : }
867 :
868 1046 : desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
869 1046 : cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
870 :
871 1046 : if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
872 1046 : (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
873 0 : return -EINVAL;
874 : }
875 :
876 16344 : for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
877 15298 : if (desc_extent->cluster_idx[i] != 0) {
878 6962 : if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters, desc_extent->cluster_idx[i])) {
879 0 : return -EINVAL;
880 : }
881 : }
882 15298 : cluster_count++;
883 : }
884 :
885 1046 : if (cluster_count == 0) {
886 0 : return -EINVAL;
887 : }
888 :
889 : /* When reading extent pages sequentially starting cluster idx should match
890 : * current size of a blob.
891 : * If changed to batch reading, this check shall be removed. */
892 1046 : if (desc_extent->start_cluster_idx != blob->active.num_clusters) {
893 0 : return -EINVAL;
894 : }
895 :
896 1046 : tmp = realloc(blob->active.clusters,
897 1046 : (cluster_count + blob->active.num_clusters) * sizeof(*blob->active.clusters));
898 1046 : if (tmp == NULL) {
899 0 : return -ENOMEM;
900 : }
901 1046 : blob->active.clusters = tmp;
902 1046 : blob->active.cluster_array_size = (cluster_count + blob->active.num_clusters);
903 :
904 16344 : for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
905 15298 : if (desc_extent->cluster_idx[i] != 0) {
906 6962 : blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
907 : desc_extent->cluster_idx[i]);
908 6962 : blob->active.num_allocated_clusters++;
909 8336 : } else if (spdk_blob_is_thin_provisioned(blob)) {
910 8336 : blob->active.clusters[blob->active.num_clusters++] = 0;
911 : } else {
912 0 : return -EINVAL;
913 : }
914 : }
915 1046 : assert(desc_extent->start_cluster_idx + cluster_count == blob->active.num_clusters);
916 1046 : assert(blob->remaining_clusters_in_et >= cluster_count);
917 1046 : blob->remaining_clusters_in_et -= cluster_count;
918 1284 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
919 : int rc;
920 :
921 394 : rc = blob_deserialize_xattr(blob,
922 : (struct spdk_blob_md_descriptor_xattr *) desc, false);
923 394 : if (rc != 0) {
924 0 : return rc;
925 : }
926 890 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
927 : int rc;
928 :
929 890 : rc = blob_deserialize_xattr(blob,
930 : (struct spdk_blob_md_descriptor_xattr *) desc, true);
931 890 : if (rc != 0) {
932 0 : return rc;
933 : }
934 : } else {
935 : /* Unrecognized descriptor type. Do not fail - just continue to the
936 : * next descriptor. If this descriptor is associated with some feature
937 : * defined in a newer version of blobstore, that version of blobstore
938 : * should create and set an associated feature flag to specify if this
939 : * blob can be loaded or not.
940 : */
941 : }
942 :
943 : /* Advance to the next descriptor */
944 8928 : cur_desc += sizeof(*desc) + desc->length;
945 8928 : if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
946 40 : break;
947 : }
948 8888 : desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
949 : }
950 :
951 4580 : return 0;
952 : }
953 :
954 : static bool bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page);
955 :
956 : static int
957 1046 : blob_parse_extent_page(struct spdk_blob_md_page *extent_page, struct spdk_blob *blob)
958 : {
959 1046 : assert(blob != NULL);
960 1046 : assert(blob->state == SPDK_BLOB_STATE_LOADING);
961 :
962 1046 : if (bs_load_cur_extent_page_valid(extent_page) == false) {
963 0 : return -ENOENT;
964 : }
965 :
966 1046 : return blob_parse_page(extent_page, blob);
967 : }
968 :
969 : static int
970 3446 : blob_parse(const struct spdk_blob_md_page *pages, uint32_t page_count,
971 : struct spdk_blob *blob)
972 : {
973 : const struct spdk_blob_md_page *page;
974 : uint32_t i;
975 : int rc;
976 : void *tmp;
977 :
978 3446 : assert(page_count > 0);
979 3446 : assert(pages[0].sequence_num == 0);
980 3446 : assert(blob != NULL);
981 3446 : assert(blob->state == SPDK_BLOB_STATE_LOADING);
982 3446 : assert(blob->active.clusters == NULL);
983 :
984 : /* The blobid provided doesn't match what's in the MD, this can
985 : * happen for example if a bogus blobid is passed in through open.
986 : */
987 3446 : if (blob->id != pages[0].id) {
988 4 : SPDK_ERRLOG("Blobid (0x%" PRIx64 ") doesn't match what's in metadata "
989 : "(0x%" PRIx64 ")\n", blob->id, pages[0].id);
990 4 : return -ENOENT;
991 : }
992 :
993 3442 : tmp = realloc(blob->active.pages, page_count * sizeof(*blob->active.pages));
994 3442 : if (!tmp) {
995 0 : return -ENOMEM;
996 : }
997 3442 : blob->active.pages = tmp;
998 :
999 3442 : blob->active.pages[0] = pages[0].id;
1000 :
1001 3542 : for (i = 1; i < page_count; i++) {
1002 100 : assert(spdk_bit_array_get(blob->bs->used_md_pages, pages[i - 1].next));
1003 100 : blob->active.pages[i] = pages[i - 1].next;
1004 : }
1005 3442 : blob->active.num_pages = page_count;
1006 :
1007 6976 : for (i = 0; i < page_count; i++) {
1008 3542 : page = &pages[i];
1009 :
1010 3542 : assert(page->id == blob->id);
1011 3542 : assert(page->sequence_num == i);
1012 :
1013 3542 : rc = blob_parse_page(page, blob);
1014 3542 : if (rc != 0) {
1015 8 : return rc;
1016 : }
1017 : }
1018 :
1019 3434 : return 0;
1020 : }
1021 :
1022 : static int
1023 4370 : blob_serialize_add_page(const struct spdk_blob *blob,
1024 : struct spdk_blob_md_page **pages,
1025 : uint32_t *page_count,
1026 : struct spdk_blob_md_page **last_page)
1027 : {
1028 : struct spdk_blob_md_page *page, *tmp_pages;
1029 :
1030 4370 : assert(pages != NULL);
1031 4370 : assert(page_count != NULL);
1032 :
1033 4370 : *last_page = NULL;
1034 4370 : if (*page_count == 0) {
1035 4282 : assert(*pages == NULL);
1036 4282 : *pages = spdk_malloc(SPDK_BS_PAGE_SIZE, 0,
1037 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
1038 4282 : if (*pages == NULL) {
1039 0 : return -ENOMEM;
1040 : }
1041 4282 : *page_count = 1;
1042 : } else {
1043 88 : assert(*pages != NULL);
1044 88 : tmp_pages = spdk_realloc(*pages, SPDK_BS_PAGE_SIZE * (*page_count + 1), 0);
1045 88 : if (tmp_pages == NULL) {
1046 0 : return -ENOMEM;
1047 : }
1048 88 : (*page_count)++;
1049 88 : *pages = tmp_pages;
1050 : }
1051 :
1052 4370 : page = &(*pages)[*page_count - 1];
1053 4370 : memset(page, 0, sizeof(*page));
1054 4370 : page->id = blob->id;
1055 4370 : page->sequence_num = *page_count - 1;
1056 4370 : page->next = SPDK_INVALID_MD_PAGE;
1057 4370 : *last_page = page;
1058 :
1059 4370 : return 0;
1060 : }
1061 :
1062 : /* Transform the in-memory representation 'xattr' into an on-disk xattr descriptor.
1063 : * Update required_sz on both success and failure.
1064 : *
1065 : */
1066 : static int
1067 1795 : blob_serialize_xattr(const struct spdk_xattr *xattr,
1068 : uint8_t *buf, size_t buf_sz,
1069 : size_t *required_sz, bool internal)
1070 : {
1071 : struct spdk_blob_md_descriptor_xattr *desc;
1072 :
1073 1795 : *required_sz = sizeof(struct spdk_blob_md_descriptor_xattr) +
1074 1795 : strlen(xattr->name) +
1075 1795 : xattr->value_len;
1076 :
1077 1795 : if (buf_sz < *required_sz) {
1078 48 : return -1;
1079 : }
1080 :
1081 1747 : desc = (struct spdk_blob_md_descriptor_xattr *)buf;
1082 :
1083 1747 : desc->type = internal ? SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL : SPDK_MD_DESCRIPTOR_TYPE_XATTR;
1084 1747 : desc->length = sizeof(desc->name_length) +
1085 : sizeof(desc->value_length) +
1086 1747 : strlen(xattr->name) +
1087 1747 : xattr->value_len;
1088 1747 : desc->name_length = strlen(xattr->name);
1089 1747 : desc->value_length = xattr->value_len;
1090 :
1091 1747 : memcpy(desc->name, xattr->name, desc->name_length);
1092 1747 : memcpy((void *)((uintptr_t)desc->name + desc->name_length),
1093 1747 : xattr->value,
1094 1747 : desc->value_length);
1095 :
1096 1747 : return 0;
1097 : }
1098 :
1099 : static void
1100 1695 : blob_serialize_extent_table_entry(const struct spdk_blob *blob,
1101 : uint64_t start_ep, uint64_t *next_ep,
1102 : uint8_t **buf, size_t *remaining_sz)
1103 : {
1104 : struct spdk_blob_md_descriptor_extent_table *desc;
1105 : size_t cur_sz;
1106 : uint64_t i, et_idx;
1107 : uint32_t extent_page, ep_len;
1108 :
1109 : /* The buffer must have room for at least num_clusters entry */
1110 1695 : cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc->num_clusters);
1111 1695 : if (*remaining_sz < cur_sz) {
1112 20 : *next_ep = start_ep;
1113 20 : return;
1114 : }
1115 :
1116 1675 : desc = (struct spdk_blob_md_descriptor_extent_table *)*buf;
1117 1675 : desc->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE;
1118 :
1119 1675 : desc->num_clusters = blob->active.num_clusters;
1120 :
1121 1675 : ep_len = 1;
1122 1675 : et_idx = 0;
1123 4256 : for (i = start_ep; i < blob->active.num_extent_pages; i++) {
1124 2581 : if (*remaining_sz < cur_sz + sizeof(desc->extent_page[0])) {
1125 : /* If we ran out of buffer space, return */
1126 0 : break;
1127 : }
1128 :
1129 2581 : extent_page = blob->active.extent_pages[i];
1130 : /* Verify that next extent_page is unallocated */
1131 2581 : if (extent_page == 0 &&
1132 1528 : (i + 1 < blob->active.num_extent_pages && blob->active.extent_pages[i + 1] == 0)) {
1133 1078 : ep_len++;
1134 1078 : continue;
1135 : }
1136 1503 : desc->extent_page[et_idx].page_idx = extent_page;
1137 1503 : desc->extent_page[et_idx].num_pages = ep_len;
1138 1503 : et_idx++;
1139 :
1140 1503 : ep_len = 1;
1141 1503 : cur_sz += sizeof(desc->extent_page[et_idx]);
1142 : }
1143 1675 : *next_ep = i;
1144 :
1145 1675 : desc->length = sizeof(desc->num_clusters) + sizeof(desc->extent_page[0]) * et_idx;
1146 1675 : *remaining_sz -= sizeof(struct spdk_blob_md_descriptor) + desc->length;
1147 1675 : *buf += sizeof(struct spdk_blob_md_descriptor) + desc->length;
1148 : }
1149 :
1150 : static int
1151 1677 : blob_serialize_extent_table(const struct spdk_blob *blob,
1152 : struct spdk_blob_md_page **pages,
1153 : struct spdk_blob_md_page *cur_page,
1154 : uint32_t *page_count, uint8_t **buf,
1155 : size_t *remaining_sz)
1156 : {
1157 1677 : uint64_t last_extent_page;
1158 : int rc;
1159 :
1160 1677 : last_extent_page = 0;
1161 : /* At least single extent table entry has to be always persisted.
1162 : * Such case occurs with num_extent_pages == 0. */
1163 1695 : while (last_extent_page <= blob->active.num_extent_pages) {
1164 1695 : blob_serialize_extent_table_entry(blob, last_extent_page, &last_extent_page, buf,
1165 : remaining_sz);
1166 :
1167 1695 : if (last_extent_page == blob->active.num_extent_pages) {
1168 1677 : break;
1169 : }
1170 :
1171 18 : rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
1172 18 : if (rc < 0) {
1173 0 : return rc;
1174 : }
1175 :
1176 18 : *buf = (uint8_t *)cur_page->descriptors;
1177 18 : *remaining_sz = sizeof(cur_page->descriptors);
1178 : }
1179 :
1180 1677 : return 0;
1181 : }
1182 :
1183 : static void
1184 1747 : blob_serialize_extent_rle(const struct spdk_blob *blob,
1185 : uint64_t start_cluster, uint64_t *next_cluster,
1186 : uint8_t **buf, size_t *buf_sz)
1187 : {
1188 : struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
1189 : size_t cur_sz;
1190 : uint64_t i, extent_idx;
1191 : uint64_t lba, lba_per_cluster, lba_count;
1192 :
1193 : /* The buffer must have room for at least one extent */
1194 1747 : cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc_extent_rle->extents[0]);
1195 1747 : if (*buf_sz < cur_sz) {
1196 18 : *next_cluster = start_cluster;
1197 18 : return;
1198 : }
1199 :
1200 1729 : desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)*buf;
1201 1729 : desc_extent_rle->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE;
1202 :
1203 1729 : lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
1204 : /* Assert for scan-build false positive */
1205 1729 : assert(lba_per_cluster > 0);
1206 :
1207 1729 : lba = blob->active.clusters[start_cluster];
1208 1729 : lba_count = lba_per_cluster;
1209 1729 : extent_idx = 0;
1210 810550 : for (i = start_cluster + 1; i < blob->active.num_clusters; i++) {
1211 808825 : if ((lba + lba_count) == blob->active.clusters[i] && lba != 0) {
1212 : /* Run-length encode sequential non-zero LBA */
1213 7276 : lba_count += lba_per_cluster;
1214 7276 : continue;
1215 801549 : } else if (lba == 0 && blob->active.clusters[i] == 0) {
1216 : /* Run-length encode unallocated clusters */
1217 800356 : lba_count += lba_per_cluster;
1218 800356 : continue;
1219 : }
1220 1193 : desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
1221 1193 : desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
1222 1193 : extent_idx++;
1223 :
1224 1193 : cur_sz += sizeof(desc_extent_rle->extents[extent_idx]);
1225 :
1226 1193 : if (*buf_sz < cur_sz) {
1227 : /* If we ran out of buffer space, return */
1228 4 : *next_cluster = i;
1229 4 : break;
1230 : }
1231 :
1232 1189 : lba = blob->active.clusters[i];
1233 1189 : lba_count = lba_per_cluster;
1234 : }
1235 :
1236 1729 : if (*buf_sz >= cur_sz) {
1237 1725 : desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
1238 1725 : desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
1239 1725 : extent_idx++;
1240 :
1241 1725 : *next_cluster = blob->active.num_clusters;
1242 : }
1243 :
1244 1729 : desc_extent_rle->length = sizeof(desc_extent_rle->extents[0]) * extent_idx;
1245 1729 : *buf_sz -= sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
1246 1729 : *buf += sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
1247 : }
1248 :
1249 : static int
1250 1939 : blob_serialize_extents_rle(const struct spdk_blob *blob,
1251 : struct spdk_blob_md_page **pages,
1252 : struct spdk_blob_md_page *cur_page,
1253 : uint32_t *page_count, uint8_t **buf,
1254 : size_t *remaining_sz)
1255 : {
1256 1939 : uint64_t last_cluster;
1257 : int rc;
1258 :
1259 1939 : last_cluster = 0;
1260 1961 : while (last_cluster < blob->active.num_clusters) {
1261 1747 : blob_serialize_extent_rle(blob, last_cluster, &last_cluster, buf, remaining_sz);
1262 :
1263 1747 : if (last_cluster == blob->active.num_clusters) {
1264 1725 : break;
1265 : }
1266 :
1267 22 : rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
1268 22 : if (rc < 0) {
1269 0 : return rc;
1270 : }
1271 :
1272 22 : *buf = (uint8_t *)cur_page->descriptors;
1273 22 : *remaining_sz = sizeof(cur_page->descriptors);
1274 : }
1275 :
1276 1939 : return 0;
1277 : }
1278 :
1279 : static void
1280 1100 : blob_serialize_extent_page(const struct spdk_blob *blob,
1281 : uint64_t cluster, struct spdk_blob_md_page *page)
1282 : {
1283 : struct spdk_blob_md_descriptor_extent_page *desc_extent;
1284 : uint64_t i, extent_idx;
1285 : uint64_t lba, lba_per_cluster;
1286 1100 : uint64_t start_cluster_idx = (cluster / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
1287 :
1288 1100 : desc_extent = (struct spdk_blob_md_descriptor_extent_page *) page->descriptors;
1289 1100 : desc_extent->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE;
1290 :
1291 1100 : lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
1292 :
1293 1100 : desc_extent->start_cluster_idx = start_cluster_idx;
1294 1100 : extent_idx = 0;
1295 42406 : for (i = start_cluster_idx; i < blob->active.num_clusters; i++) {
1296 41372 : lba = blob->active.clusters[i];
1297 41372 : desc_extent->cluster_idx[extent_idx++] = lba / lba_per_cluster;
1298 41372 : if (extent_idx >= SPDK_EXTENTS_PER_EP) {
1299 66 : break;
1300 : }
1301 : }
1302 1100 : desc_extent->length = sizeof(desc_extent->start_cluster_idx) +
1303 : sizeof(desc_extent->cluster_idx[0]) * extent_idx;
1304 1100 : }
1305 :
1306 : static void
1307 3616 : blob_serialize_flags(const struct spdk_blob *blob,
1308 : uint8_t *buf, size_t *buf_sz)
1309 : {
1310 : struct spdk_blob_md_descriptor_flags *desc;
1311 :
1312 : /*
1313 : * Flags get serialized first, so we should always have room for the flags
1314 : * descriptor.
1315 : */
1316 3616 : assert(*buf_sz >= sizeof(*desc));
1317 :
1318 3616 : desc = (struct spdk_blob_md_descriptor_flags *)buf;
1319 3616 : desc->type = SPDK_MD_DESCRIPTOR_TYPE_FLAGS;
1320 3616 : desc->length = sizeof(*desc) - sizeof(struct spdk_blob_md_descriptor);
1321 3616 : desc->invalid_flags = blob->invalid_flags;
1322 3616 : desc->data_ro_flags = blob->data_ro_flags;
1323 3616 : desc->md_ro_flags = blob->md_ro_flags;
1324 :
1325 3616 : *buf_sz -= sizeof(*desc);
1326 3616 : }
1327 :
1328 : static int
1329 7232 : blob_serialize_xattrs(const struct spdk_blob *blob,
1330 : const struct spdk_xattr_tailq *xattrs, bool internal,
1331 : struct spdk_blob_md_page **pages,
1332 : struct spdk_blob_md_page *cur_page,
1333 : uint32_t *page_count, uint8_t **buf,
1334 : size_t *remaining_sz)
1335 : {
1336 : const struct spdk_xattr *xattr;
1337 : int rc;
1338 :
1339 8979 : TAILQ_FOREACH(xattr, xattrs, link) {
1340 1747 : size_t required_sz = 0;
1341 :
1342 1747 : rc = blob_serialize_xattr(xattr,
1343 : *buf, *remaining_sz,
1344 : &required_sz, internal);
1345 1747 : if (rc < 0) {
1346 : /* Need to add a new page to the chain */
1347 48 : rc = blob_serialize_add_page(blob, pages, page_count,
1348 : &cur_page);
1349 48 : if (rc < 0) {
1350 0 : spdk_free(*pages);
1351 0 : *pages = NULL;
1352 0 : *page_count = 0;
1353 0 : return rc;
1354 : }
1355 :
1356 48 : *buf = (uint8_t *)cur_page->descriptors;
1357 48 : *remaining_sz = sizeof(cur_page->descriptors);
1358 :
1359 : /* Try again */
1360 48 : required_sz = 0;
1361 48 : rc = blob_serialize_xattr(xattr,
1362 : *buf, *remaining_sz,
1363 : &required_sz, internal);
1364 :
1365 48 : if (rc < 0) {
1366 0 : spdk_free(*pages);
1367 0 : *pages = NULL;
1368 0 : *page_count = 0;
1369 0 : return rc;
1370 : }
1371 : }
1372 :
1373 1747 : *remaining_sz -= required_sz;
1374 1747 : *buf += required_sz;
1375 : }
1376 :
1377 7232 : return 0;
1378 : }
1379 :
1380 : static int
1381 3616 : blob_serialize(const struct spdk_blob *blob, struct spdk_blob_md_page **pages,
1382 : uint32_t *page_count)
1383 : {
1384 3616 : struct spdk_blob_md_page *cur_page;
1385 : int rc;
1386 3616 : uint8_t *buf;
1387 3616 : size_t remaining_sz;
1388 :
1389 3616 : assert(pages != NULL);
1390 3616 : assert(page_count != NULL);
1391 3616 : assert(blob != NULL);
1392 3616 : assert(blob->state == SPDK_BLOB_STATE_DIRTY);
1393 :
1394 3616 : *pages = NULL;
1395 3616 : *page_count = 0;
1396 :
1397 : /* A blob always has at least 1 page, even if it has no descriptors */
1398 3616 : rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
1399 3616 : if (rc < 0) {
1400 0 : return rc;
1401 : }
1402 :
1403 3616 : buf = (uint8_t *)cur_page->descriptors;
1404 3616 : remaining_sz = sizeof(cur_page->descriptors);
1405 :
1406 : /* Serialize flags */
1407 3616 : blob_serialize_flags(blob, buf, &remaining_sz);
1408 3616 : buf += sizeof(struct spdk_blob_md_descriptor_flags);
1409 :
1410 : /* Serialize xattrs */
1411 3616 : rc = blob_serialize_xattrs(blob, &blob->xattrs, false,
1412 : pages, cur_page, page_count, &buf, &remaining_sz);
1413 3616 : if (rc < 0) {
1414 0 : return rc;
1415 : }
1416 :
1417 : /* Serialize internal xattrs */
1418 3616 : rc = blob_serialize_xattrs(blob, &blob->xattrs_internal, true,
1419 : pages, cur_page, page_count, &buf, &remaining_sz);
1420 3616 : if (rc < 0) {
1421 0 : return rc;
1422 : }
1423 :
1424 3616 : if (blob->use_extent_table) {
1425 : /* Serialize extent table */
1426 1677 : rc = blob_serialize_extent_table(blob, pages, cur_page, page_count, &buf, &remaining_sz);
1427 : } else {
1428 : /* Serialize extents */
1429 1939 : rc = blob_serialize_extents_rle(blob, pages, cur_page, page_count, &buf, &remaining_sz);
1430 : }
1431 :
1432 3616 : return rc;
1433 : }
1434 :
1435 : struct spdk_blob_load_ctx {
1436 : struct spdk_blob *blob;
1437 :
1438 : struct spdk_blob_md_page *pages;
1439 : uint32_t num_pages;
1440 : uint32_t next_extent_page;
1441 : spdk_bs_sequence_t *seq;
1442 :
1443 : spdk_bs_sequence_cpl cb_fn;
1444 : void *cb_arg;
1445 : };
1446 :
1447 : static uint32_t
1448 19958 : blob_md_page_calc_crc(void *page)
1449 : {
1450 : uint32_t crc;
1451 :
1452 19958 : crc = BLOB_CRC32C_INITIAL;
1453 19958 : crc = spdk_crc32c_update(page, SPDK_BS_PAGE_SIZE - 4, crc);
1454 19958 : crc ^= BLOB_CRC32C_INITIAL;
1455 :
1456 19958 : return crc;
1457 :
1458 : }
1459 :
1460 : static void
1461 3474 : blob_load_final(struct spdk_blob_load_ctx *ctx, int bserrno)
1462 : {
1463 3474 : struct spdk_blob *blob = ctx->blob;
1464 :
1465 3474 : if (bserrno == 0) {
1466 3410 : blob_mark_clean(blob);
1467 : }
1468 :
1469 3474 : ctx->cb_fn(ctx->seq, ctx->cb_arg, bserrno);
1470 :
1471 : /* Free the memory */
1472 3474 : spdk_free(ctx->pages);
1473 3474 : free(ctx);
1474 3474 : }
1475 :
1476 : static void
1477 454 : blob_load_snapshot_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
1478 : {
1479 454 : struct spdk_blob_load_ctx *ctx = cb_arg;
1480 454 : struct spdk_blob *blob = ctx->blob;
1481 :
1482 454 : if (bserrno == 0) {
1483 448 : blob->back_bs_dev = bs_create_blob_bs_dev(snapshot);
1484 448 : if (blob->back_bs_dev == NULL) {
1485 0 : bserrno = -ENOMEM;
1486 : }
1487 : }
1488 454 : if (bserrno != 0) {
1489 6 : SPDK_ERRLOG("Snapshot fail\n");
1490 : }
1491 :
1492 454 : blob_load_final(ctx, bserrno);
1493 454 : }
1494 :
1495 : static void blob_update_clear_method(struct spdk_blob *blob);
1496 :
1497 : static int
1498 124 : blob_load_esnap(struct spdk_blob *blob, void *blob_ctx)
1499 : {
1500 124 : struct spdk_blob_store *bs = blob->bs;
1501 124 : struct spdk_bs_dev *bs_dev = NULL;
1502 124 : const void *esnap_id = NULL;
1503 124 : size_t id_len = 0;
1504 : int rc;
1505 :
1506 124 : if (bs->esnap_bs_dev_create == NULL) {
1507 8 : SPDK_NOTICELOG("blob 0x%" PRIx64 " is an esnap clone but the blobstore was opened "
1508 : "without support for esnap clones\n", blob->id);
1509 8 : return -ENOTSUP;
1510 : }
1511 116 : assert(blob->back_bs_dev == NULL);
1512 :
1513 116 : rc = blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, &esnap_id, &id_len, true);
1514 116 : if (rc != 0) {
1515 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " is an esnap clone but has no esnap ID\n", blob->id);
1516 0 : return -EINVAL;
1517 : }
1518 116 : assert(id_len > 0 && id_len < UINT32_MAX);
1519 :
1520 116 : SPDK_INFOLOG(blob, "Creating external snapshot device\n");
1521 :
1522 116 : rc = bs->esnap_bs_dev_create(bs->esnap_ctx, blob_ctx, blob, esnap_id, (uint32_t)id_len,
1523 : &bs_dev);
1524 116 : if (rc != 0) {
1525 0 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": failed to load back_bs_dev "
1526 : "with error %d\n", blob->id, rc);
1527 0 : return rc;
1528 : }
1529 :
1530 : /*
1531 : * Note: bs_dev might be NULL if the consumer chose to not open the external snapshot.
1532 : * This especially might happen during spdk_bs_load() iteration.
1533 : */
1534 116 : if (bs_dev != NULL) {
1535 116 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": loaded back_bs_dev\n", blob->id);
1536 116 : if ((bs->io_unit_size % bs_dev->blocklen) != 0) {
1537 4 : SPDK_NOTICELOG("blob 0x%" PRIx64 " external snapshot device block size %u "
1538 : "is not compatible with blobstore block size %u\n",
1539 : blob->id, bs_dev->blocklen, bs->io_unit_size);
1540 4 : bs_dev->destroy(bs_dev);
1541 4 : return -EINVAL;
1542 : }
1543 : }
1544 :
1545 112 : blob->back_bs_dev = bs_dev;
1546 112 : blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
1547 :
1548 112 : return 0;
1549 : }
1550 :
1551 : static void
1552 3428 : blob_load_backing_dev(spdk_bs_sequence_t *seq, void *cb_arg)
1553 : {
1554 3428 : struct spdk_blob_load_ctx *ctx = cb_arg;
1555 3428 : struct spdk_blob *blob = ctx->blob;
1556 3428 : const void *value;
1557 3428 : size_t len;
1558 : int rc;
1559 :
1560 3428 : if (blob_is_esnap_clone(blob)) {
1561 124 : rc = blob_load_esnap(blob, seq->cpl.u.blob_handle.esnap_ctx);
1562 124 : blob_load_final(ctx, rc);
1563 124 : return;
1564 : }
1565 :
1566 3304 : if (spdk_blob_is_thin_provisioned(blob)) {
1567 1034 : rc = blob_get_xattr_value(blob, BLOB_SNAPSHOT, &value, &len, true);
1568 1034 : if (rc == 0) {
1569 454 : if (len != sizeof(spdk_blob_id)) {
1570 0 : blob_load_final(ctx, -EINVAL);
1571 0 : return;
1572 : }
1573 : /* open snapshot blob and continue in the callback function */
1574 454 : blob->parent_id = *(spdk_blob_id *)value;
1575 454 : spdk_bs_open_blob(blob->bs, blob->parent_id,
1576 : blob_load_snapshot_cpl, ctx);
1577 454 : return;
1578 : } else {
1579 : /* add zeroes_dev for thin provisioned blob */
1580 580 : blob->back_bs_dev = bs_create_zeroes_dev();
1581 : }
1582 : } else {
1583 : /* standard blob */
1584 2270 : blob->back_bs_dev = NULL;
1585 : }
1586 2850 : blob_load_final(ctx, 0);
1587 : }
1588 :
1589 : static void
1590 2820 : blob_load_cpl_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
1591 : {
1592 2820 : struct spdk_blob_load_ctx *ctx = cb_arg;
1593 2820 : struct spdk_blob *blob = ctx->blob;
1594 : struct spdk_blob_md_page *page;
1595 : uint64_t i;
1596 : uint32_t crc;
1597 : uint64_t lba;
1598 : void *tmp;
1599 : uint64_t sz;
1600 :
1601 2820 : if (bserrno) {
1602 6 : SPDK_ERRLOG("Extent page read failed: %d\n", bserrno);
1603 6 : blob_load_final(ctx, bserrno);
1604 6 : return;
1605 : }
1606 :
1607 2814 : if (ctx->pages == NULL) {
1608 : /* First iteration of this function, allocate buffer for single EXTENT_PAGE */
1609 1768 : ctx->pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
1610 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
1611 1768 : if (!ctx->pages) {
1612 0 : blob_load_final(ctx, -ENOMEM);
1613 0 : return;
1614 : }
1615 1768 : ctx->num_pages = 1;
1616 1768 : ctx->next_extent_page = 0;
1617 : } else {
1618 1046 : page = &ctx->pages[0];
1619 1046 : crc = blob_md_page_calc_crc(page);
1620 1046 : if (crc != page->crc) {
1621 0 : blob_load_final(ctx, -EINVAL);
1622 0 : return;
1623 : }
1624 :
1625 1046 : if (page->next != SPDK_INVALID_MD_PAGE) {
1626 0 : blob_load_final(ctx, -EINVAL);
1627 0 : return;
1628 : }
1629 :
1630 1046 : bserrno = blob_parse_extent_page(page, blob);
1631 1046 : if (bserrno) {
1632 0 : blob_load_final(ctx, bserrno);
1633 0 : return;
1634 : }
1635 : }
1636 :
1637 3240 : for (i = ctx->next_extent_page; i < blob->active.num_extent_pages; i++) {
1638 1478 : if (blob->active.extent_pages[i] != 0) {
1639 : /* Extent page was allocated, read and parse it. */
1640 1052 : lba = bs_md_page_to_lba(blob->bs, blob->active.extent_pages[i]);
1641 1052 : ctx->next_extent_page = i + 1;
1642 :
1643 1052 : bs_sequence_read_dev(seq, &ctx->pages[0], lba,
1644 1052 : bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
1645 : blob_load_cpl_extents_cpl, ctx);
1646 1052 : return;
1647 : } else {
1648 : /* Thin provisioned blobs can point to unallocated extent pages.
1649 : * In this case blob size should be increased by up to the amount left in remaining_clusters_in_et. */
1650 :
1651 426 : sz = spdk_min(blob->remaining_clusters_in_et, SPDK_EXTENTS_PER_EP);
1652 426 : blob->active.num_clusters += sz;
1653 426 : blob->remaining_clusters_in_et -= sz;
1654 :
1655 426 : assert(spdk_blob_is_thin_provisioned(blob));
1656 426 : assert(i + 1 < blob->active.num_extent_pages || blob->remaining_clusters_in_et == 0);
1657 :
1658 426 : tmp = realloc(blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
1659 426 : if (tmp == NULL) {
1660 0 : blob_load_final(ctx, -ENOMEM);
1661 0 : return;
1662 : }
1663 426 : memset(tmp + sizeof(*blob->active.clusters) * blob->active.cluster_array_size, 0,
1664 426 : sizeof(*blob->active.clusters) * (blob->active.num_clusters - blob->active.cluster_array_size));
1665 426 : blob->active.clusters = tmp;
1666 426 : blob->active.cluster_array_size = blob->active.num_clusters;
1667 : }
1668 : }
1669 :
1670 1762 : blob_load_backing_dev(seq, ctx);
1671 : }
1672 :
1673 : static void
1674 3574 : blob_load_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
1675 : {
1676 3574 : struct spdk_blob_load_ctx *ctx = cb_arg;
1677 3574 : struct spdk_blob *blob = ctx->blob;
1678 : struct spdk_blob_md_page *page;
1679 : int rc;
1680 : uint32_t crc;
1681 : uint32_t current_page;
1682 :
1683 3574 : if (ctx->num_pages == 1) {
1684 3474 : current_page = bs_blobid_to_page(blob->id);
1685 : } else {
1686 100 : assert(ctx->num_pages != 0);
1687 100 : page = &ctx->pages[ctx->num_pages - 2];
1688 100 : current_page = page->next;
1689 : }
1690 :
1691 3574 : if (bserrno) {
1692 20 : SPDK_ERRLOG("Metadata page %d read failed for blobid 0x%" PRIx64 ": %d\n",
1693 : current_page, blob->id, bserrno);
1694 20 : blob_load_final(ctx, bserrno);
1695 20 : return;
1696 : }
1697 :
1698 3554 : page = &ctx->pages[ctx->num_pages - 1];
1699 3554 : crc = blob_md_page_calc_crc(page);
1700 3554 : if (crc != page->crc) {
1701 8 : SPDK_ERRLOG("Metadata page %d crc mismatch for blobid 0x%" PRIx64 "\n",
1702 : current_page, blob->id);
1703 8 : blob_load_final(ctx, -EINVAL);
1704 8 : return;
1705 : }
1706 :
1707 3546 : if (page->next != SPDK_INVALID_MD_PAGE) {
1708 : struct spdk_blob_md_page *tmp_pages;
1709 100 : uint32_t next_page = page->next;
1710 100 : uint64_t next_lba = bs_md_page_to_lba(blob->bs, next_page);
1711 :
1712 : /* Read the next page */
1713 100 : tmp_pages = spdk_realloc(ctx->pages, (sizeof(*page) * (ctx->num_pages + 1)), 0);
1714 100 : if (tmp_pages == NULL) {
1715 0 : blob_load_final(ctx, -ENOMEM);
1716 0 : return;
1717 : }
1718 100 : ctx->num_pages++;
1719 100 : ctx->pages = tmp_pages;
1720 :
1721 100 : bs_sequence_read_dev(seq, &ctx->pages[ctx->num_pages - 1],
1722 : next_lba,
1723 100 : bs_byte_to_lba(blob->bs, sizeof(*page)),
1724 : blob_load_cpl, ctx);
1725 100 : return;
1726 : }
1727 :
1728 : /* Parse the pages */
1729 3446 : rc = blob_parse(ctx->pages, ctx->num_pages, blob);
1730 3446 : if (rc) {
1731 12 : blob_load_final(ctx, rc);
1732 12 : return;
1733 : }
1734 :
1735 3434 : if (blob->extent_table_found == true) {
1736 : /* If EXTENT_TABLE was found, that means support for it should be enabled. */
1737 1768 : assert(blob->extent_rle_found == false);
1738 1768 : blob->use_extent_table = true;
1739 : } else {
1740 : /* If EXTENT_RLE or no extent_* descriptor was found disable support
1741 : * for extent table. No extent_* descriptors means that blob has length of 0
1742 : * and no extent_rle descriptors were persisted for it.
1743 : * EXTENT_TABLE if used, is always present in metadata regardless of length. */
1744 1666 : blob->use_extent_table = false;
1745 : }
1746 :
1747 : /* Check the clear_method stored in metadata vs what may have been passed
1748 : * via spdk_bs_open_blob_ext() and update accordingly.
1749 : */
1750 3434 : blob_update_clear_method(blob);
1751 :
1752 3434 : spdk_free(ctx->pages);
1753 3434 : ctx->pages = NULL;
1754 :
1755 3434 : if (blob->extent_table_found) {
1756 1768 : blob_load_cpl_extents_cpl(seq, ctx, 0);
1757 : } else {
1758 1666 : blob_load_backing_dev(seq, ctx);
1759 : }
1760 : }
1761 :
1762 : /* Load a blob from disk given a blobid */
1763 : static void
1764 3474 : blob_load(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
1765 : spdk_bs_sequence_cpl cb_fn, void *cb_arg)
1766 : {
1767 : struct spdk_blob_load_ctx *ctx;
1768 : struct spdk_blob_store *bs;
1769 : uint32_t page_num;
1770 : uint64_t lba;
1771 :
1772 3474 : blob_verify_md_op(blob);
1773 :
1774 3474 : bs = blob->bs;
1775 :
1776 3474 : ctx = calloc(1, sizeof(*ctx));
1777 3474 : if (!ctx) {
1778 0 : cb_fn(seq, cb_arg, -ENOMEM);
1779 0 : return;
1780 : }
1781 :
1782 3474 : ctx->blob = blob;
1783 3474 : ctx->pages = spdk_realloc(ctx->pages, SPDK_BS_PAGE_SIZE, 0);
1784 3474 : if (!ctx->pages) {
1785 0 : free(ctx);
1786 0 : cb_fn(seq, cb_arg, -ENOMEM);
1787 0 : return;
1788 : }
1789 3474 : ctx->num_pages = 1;
1790 3474 : ctx->cb_fn = cb_fn;
1791 3474 : ctx->cb_arg = cb_arg;
1792 3474 : ctx->seq = seq;
1793 :
1794 3474 : page_num = bs_blobid_to_page(blob->id);
1795 3474 : lba = bs_md_page_to_lba(blob->bs, page_num);
1796 :
1797 3474 : blob->state = SPDK_BLOB_STATE_LOADING;
1798 :
1799 3474 : bs_sequence_read_dev(seq, &ctx->pages[0], lba,
1800 3474 : bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE),
1801 : blob_load_cpl, ctx);
1802 : }
1803 :
1804 : struct spdk_blob_persist_ctx {
1805 : struct spdk_blob *blob;
1806 :
1807 : struct spdk_blob_md_page *pages;
1808 : uint32_t next_extent_page;
1809 : struct spdk_blob_md_page *extent_page;
1810 :
1811 : spdk_bs_sequence_t *seq;
1812 : spdk_bs_sequence_cpl cb_fn;
1813 : void *cb_arg;
1814 : TAILQ_ENTRY(spdk_blob_persist_ctx) link;
1815 : };
1816 :
1817 : static void
1818 1262 : bs_batch_clear_dev(struct spdk_blob *blob, spdk_bs_batch_t *batch, uint64_t lba,
1819 : uint64_t lba_count)
1820 : {
1821 1262 : switch (blob->clear_method) {
1822 1262 : case BLOB_CLEAR_WITH_DEFAULT:
1823 : case BLOB_CLEAR_WITH_UNMAP:
1824 1262 : bs_batch_unmap_dev(batch, lba, lba_count);
1825 1262 : break;
1826 0 : case BLOB_CLEAR_WITH_WRITE_ZEROES:
1827 0 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
1828 0 : break;
1829 0 : case BLOB_CLEAR_WITH_NONE:
1830 : default:
1831 0 : break;
1832 : }
1833 1262 : }
1834 :
1835 : static int
1836 1152 : bs_super_validate(struct spdk_bs_super_block *super, struct spdk_blob_store *bs)
1837 : {
1838 : uint32_t crc;
1839 : static const char zeros[SPDK_BLOBSTORE_TYPE_LENGTH];
1840 :
1841 1152 : if (super->version > SPDK_BS_VERSION ||
1842 1148 : super->version < SPDK_BS_INITIAL_VERSION) {
1843 8 : return -EILSEQ;
1844 : }
1845 :
1846 1144 : if (memcmp(super->signature, SPDK_BS_SUPER_BLOCK_SIG,
1847 : sizeof(super->signature)) != 0) {
1848 0 : return -EILSEQ;
1849 : }
1850 :
1851 1144 : crc = blob_md_page_calc_crc(super);
1852 1144 : if (crc != super->crc) {
1853 4 : return -EILSEQ;
1854 : }
1855 :
1856 1140 : if (memcmp(&bs->bstype, &super->bstype, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
1857 1126 : SPDK_DEBUGLOG(blob, "Bstype matched - loading blobstore\n");
1858 14 : } else if (memcmp(&bs->bstype, zeros, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
1859 6 : SPDK_DEBUGLOG(blob, "Bstype wildcard used - loading blobstore regardless bstype\n");
1860 : } else {
1861 8 : SPDK_DEBUGLOG(blob, "Unexpected bstype\n");
1862 8 : SPDK_LOGDUMP(blob, "Expected:", bs->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
1863 8 : SPDK_LOGDUMP(blob, "Found:", super->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
1864 8 : return -ENXIO;
1865 : }
1866 :
1867 1132 : if (super->size > bs->dev->blockcnt * bs->dev->blocklen) {
1868 8 : SPDK_NOTICELOG("Size mismatch, dev size: %" PRIu64 ", blobstore size: %" PRIu64 "\n",
1869 : bs->dev->blockcnt * bs->dev->blocklen, super->size);
1870 8 : return -EILSEQ;
1871 : }
1872 :
1873 1124 : return 0;
1874 : }
1875 :
1876 : static void bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
1877 : spdk_bs_sequence_cpl cb_fn, void *cb_arg);
1878 :
1879 : static void
1880 5116 : blob_persist_complete_cb(void *arg)
1881 : {
1882 5116 : struct spdk_blob_persist_ctx *ctx = arg;
1883 :
1884 : /* Call user callback */
1885 5116 : ctx->cb_fn(ctx->seq, ctx->cb_arg, 0);
1886 :
1887 : /* Free the memory */
1888 5116 : spdk_free(ctx->pages);
1889 5116 : free(ctx);
1890 5116 : }
1891 :
1892 : static void blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
1893 :
1894 : static void
1895 5116 : blob_persist_complete(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx, int bserrno)
1896 : {
1897 : struct spdk_blob_persist_ctx *next_persist, *tmp;
1898 5116 : struct spdk_blob *blob = ctx->blob;
1899 :
1900 5116 : if (bserrno == 0) {
1901 5064 : blob_mark_clean(blob);
1902 : }
1903 :
1904 5116 : assert(ctx == TAILQ_FIRST(&blob->persists_to_complete));
1905 :
1906 : /* Complete all persists that were pending when the current persist started */
1907 10232 : TAILQ_FOREACH_SAFE(next_persist, &blob->persists_to_complete, link, tmp) {
1908 5116 : TAILQ_REMOVE(&blob->persists_to_complete, next_persist, link);
1909 5116 : spdk_thread_send_msg(spdk_get_thread(), blob_persist_complete_cb, next_persist);
1910 : }
1911 :
1912 5116 : if (TAILQ_EMPTY(&blob->pending_persists)) {
1913 5093 : return;
1914 : }
1915 :
1916 : /* Queue up all pending persists for completion and start blob persist with first one */
1917 23 : TAILQ_SWAP(&blob->persists_to_complete, &blob->pending_persists, spdk_blob_persist_ctx, link);
1918 23 : next_persist = TAILQ_FIRST(&blob->persists_to_complete);
1919 :
1920 23 : blob->state = SPDK_BLOB_STATE_DIRTY;
1921 23 : bs_mark_dirty(seq, blob->bs, blob_persist_start, next_persist);
1922 : }
1923 :
1924 : static void
1925 5064 : blob_persist_clear_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
1926 : {
1927 5064 : struct spdk_blob_persist_ctx *ctx = cb_arg;
1928 5064 : struct spdk_blob *blob = ctx->blob;
1929 5064 : struct spdk_blob_store *bs = blob->bs;
1930 : size_t i;
1931 :
1932 5064 : if (bserrno != 0) {
1933 0 : blob_persist_complete(seq, ctx, bserrno);
1934 0 : return;
1935 : }
1936 :
1937 5064 : spdk_spin_lock(&bs->used_lock);
1938 :
1939 : /* Release all extent_pages that were truncated */
1940 6800 : for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
1941 : /* Nothing to release if it was not allocated */
1942 1736 : if (blob->active.extent_pages[i] != 0) {
1943 626 : bs_release_md_page(bs, blob->active.extent_pages[i]);
1944 : }
1945 : }
1946 :
1947 5064 : spdk_spin_unlock(&bs->used_lock);
1948 :
1949 5064 : if (blob->active.num_extent_pages == 0) {
1950 3651 : free(blob->active.extent_pages);
1951 3651 : blob->active.extent_pages = NULL;
1952 3651 : blob->active.extent_pages_array_size = 0;
1953 1413 : } else if (blob->active.num_extent_pages != blob->active.extent_pages_array_size) {
1954 : #ifndef __clang_analyzer__
1955 : void *tmp;
1956 :
1957 : /* scan-build really can't figure reallocs, workaround it */
1958 2 : tmp = realloc(blob->active.extent_pages, sizeof(uint32_t) * blob->active.num_extent_pages);
1959 2 : assert(tmp != NULL);
1960 2 : blob->active.extent_pages = tmp;
1961 : #endif
1962 2 : blob->active.extent_pages_array_size = blob->active.num_extent_pages;
1963 : }
1964 :
1965 5064 : blob_persist_complete(seq, ctx, bserrno);
1966 : }
1967 :
1968 : static void
1969 5064 : blob_persist_clear_extents(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
1970 : {
1971 5064 : struct spdk_blob *blob = ctx->blob;
1972 5064 : struct spdk_blob_store *bs = blob->bs;
1973 : size_t i;
1974 : uint64_t lba;
1975 : uint64_t lba_count;
1976 : spdk_bs_batch_t *batch;
1977 :
1978 5064 : batch = bs_sequence_to_batch(seq, blob_persist_clear_extents_cpl, ctx);
1979 5064 : lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
1980 :
1981 : /* Clear all extent_pages that were truncated */
1982 6800 : for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
1983 : /* Nothing to clear if it was not allocated */
1984 1736 : if (blob->active.extent_pages[i] != 0) {
1985 626 : lba = bs_md_page_to_lba(bs, blob->active.extent_pages[i]);
1986 626 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
1987 : }
1988 : }
1989 :
1990 5064 : bs_batch_close(batch);
1991 5064 : }
1992 :
1993 : static void
1994 5064 : blob_persist_clear_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
1995 : {
1996 5064 : struct spdk_blob_persist_ctx *ctx = cb_arg;
1997 5064 : struct spdk_blob *blob = ctx->blob;
1998 5064 : struct spdk_blob_store *bs = blob->bs;
1999 : size_t i;
2000 :
2001 5064 : if (bserrno != 0) {
2002 0 : blob_persist_complete(seq, ctx, bserrno);
2003 0 : return;
2004 : }
2005 :
2006 5064 : spdk_spin_lock(&bs->used_lock);
2007 : /* Release all clusters that were truncated */
2008 1074111 : for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
2009 1069047 : uint32_t cluster_num = bs_lba_to_cluster(bs, blob->active.clusters[i]);
2010 :
2011 : /* Nothing to release if it was not allocated */
2012 1069047 : if (blob->active.clusters[i] != 0) {
2013 2343 : bs_release_cluster(bs, cluster_num);
2014 : }
2015 : }
2016 5064 : spdk_spin_unlock(&bs->used_lock);
2017 :
2018 5064 : if (blob->active.num_clusters == 0) {
2019 1944 : free(blob->active.clusters);
2020 1944 : blob->active.clusters = NULL;
2021 1944 : blob->active.cluster_array_size = 0;
2022 3120 : } else if (blob->active.num_clusters != blob->active.cluster_array_size) {
2023 : #ifndef __clang_analyzer__
2024 : void *tmp;
2025 :
2026 : /* scan-build really can't figure reallocs, workaround it */
2027 14 : tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * blob->active.num_clusters);
2028 14 : assert(tmp != NULL);
2029 14 : blob->active.clusters = tmp;
2030 :
2031 : #endif
2032 14 : blob->active.cluster_array_size = blob->active.num_clusters;
2033 : }
2034 :
2035 : /* Move on to clearing extent pages */
2036 5064 : blob_persist_clear_extents(seq, ctx);
2037 : }
2038 :
2039 : static void
2040 5064 : blob_persist_clear_clusters(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
2041 : {
2042 5064 : struct spdk_blob *blob = ctx->blob;
2043 5064 : struct spdk_blob_store *bs = blob->bs;
2044 : spdk_bs_batch_t *batch;
2045 : size_t i;
2046 : uint64_t lba;
2047 : uint64_t lba_count;
2048 :
2049 : /* Clusters don't move around in blobs. The list shrinks or grows
2050 : * at the end, but no changes ever occur in the middle of the list.
2051 : */
2052 :
2053 5064 : batch = bs_sequence_to_batch(seq, blob_persist_clear_clusters_cpl, ctx);
2054 :
2055 : /* Clear all clusters that were truncated */
2056 5064 : lba = 0;
2057 5064 : lba_count = 0;
2058 1074111 : for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
2059 1069047 : uint64_t next_lba = blob->active.clusters[i];
2060 1069047 : uint64_t next_lba_count = bs_cluster_to_lba(bs, 1);
2061 :
2062 1069047 : if (next_lba > 0 && (lba + lba_count) == next_lba) {
2063 : /* This cluster is contiguous with the previous one. */
2064 1085 : lba_count += next_lba_count;
2065 1085 : continue;
2066 1067962 : } else if (next_lba == 0) {
2067 1066704 : continue;
2068 : }
2069 :
2070 : /* This cluster is not contiguous with the previous one. */
2071 :
2072 : /* If a run of LBAs previously existing, clear them now */
2073 1258 : if (lba_count > 0) {
2074 36 : bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
2075 : }
2076 :
2077 : /* Start building the next batch */
2078 1258 : lba = next_lba;
2079 1258 : if (next_lba > 0) {
2080 1258 : lba_count = next_lba_count;
2081 : } else {
2082 0 : lba_count = 0;
2083 : }
2084 : }
2085 :
2086 : /* If we ended with a contiguous set of LBAs, clear them now */
2087 5064 : if (lba_count > 0) {
2088 1222 : bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
2089 : }
2090 :
2091 5064 : bs_batch_close(batch);
2092 5064 : }
2093 :
2094 : static void
2095 5068 : blob_persist_zero_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2096 : {
2097 5068 : struct spdk_blob_persist_ctx *ctx = cb_arg;
2098 5068 : struct spdk_blob *blob = ctx->blob;
2099 5068 : struct spdk_blob_store *bs = blob->bs;
2100 : size_t i;
2101 :
2102 5068 : if (bserrno != 0) {
2103 4 : blob_persist_complete(seq, ctx, bserrno);
2104 4 : return;
2105 : }
2106 :
2107 5064 : spdk_spin_lock(&bs->used_lock);
2108 :
2109 : /* This loop starts at 1 because the first page is special and handled
2110 : * below. The pages (except the first) are never written in place,
2111 : * so any pages in the clean list must be zeroed.
2112 : */
2113 5132 : for (i = 1; i < blob->clean.num_pages; i++) {
2114 68 : bs_release_md_page(bs, blob->clean.pages[i]);
2115 : }
2116 :
2117 5064 : if (blob->active.num_pages == 0) {
2118 : uint32_t page_num;
2119 :
2120 1488 : page_num = bs_blobid_to_page(blob->id);
2121 1488 : bs_release_md_page(bs, page_num);
2122 : }
2123 :
2124 5064 : spdk_spin_unlock(&bs->used_lock);
2125 :
2126 : /* Move on to clearing clusters */
2127 5064 : blob_persist_clear_clusters(seq, ctx);
2128 : }
2129 :
2130 : static void
2131 5108 : blob_persist_zero_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2132 : {
2133 5108 : struct spdk_blob_persist_ctx *ctx = cb_arg;
2134 5108 : struct spdk_blob *blob = ctx->blob;
2135 5108 : struct spdk_blob_store *bs = blob->bs;
2136 : uint64_t lba;
2137 : uint64_t lba_count;
2138 : spdk_bs_batch_t *batch;
2139 : size_t i;
2140 :
2141 5108 : if (bserrno != 0) {
2142 40 : blob_persist_complete(seq, ctx, bserrno);
2143 40 : return;
2144 : }
2145 :
2146 5068 : batch = bs_sequence_to_batch(seq, blob_persist_zero_pages_cpl, ctx);
2147 :
2148 5068 : lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
2149 :
2150 : /* This loop starts at 1 because the first page is special and handled
2151 : * below. The pages (except the first) are never written in place,
2152 : * so any pages in the clean list must be zeroed.
2153 : */
2154 5136 : for (i = 1; i < blob->clean.num_pages; i++) {
2155 68 : lba = bs_md_page_to_lba(bs, blob->clean.pages[i]);
2156 :
2157 68 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
2158 : }
2159 :
2160 : /* The first page will only be zeroed if this is a delete. */
2161 5068 : if (blob->active.num_pages == 0) {
2162 : uint32_t page_num;
2163 :
2164 : /* The first page in the metadata goes where the blobid indicates */
2165 1492 : page_num = bs_blobid_to_page(blob->id);
2166 1492 : lba = bs_md_page_to_lba(bs, page_num);
2167 :
2168 1492 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
2169 : }
2170 :
2171 5068 : bs_batch_close(batch);
2172 : }
2173 :
2174 : static void
2175 3616 : blob_persist_write_page_root(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2176 : {
2177 3616 : struct spdk_blob_persist_ctx *ctx = cb_arg;
2178 3616 : struct spdk_blob *blob = ctx->blob;
2179 3616 : struct spdk_blob_store *bs = blob->bs;
2180 : uint64_t lba;
2181 : uint32_t lba_count;
2182 : struct spdk_blob_md_page *page;
2183 :
2184 3616 : if (bserrno != 0) {
2185 0 : blob_persist_complete(seq, ctx, bserrno);
2186 0 : return;
2187 : }
2188 :
2189 3616 : if (blob->active.num_pages == 0) {
2190 : /* Move on to the next step */
2191 0 : blob_persist_zero_pages(seq, ctx, 0);
2192 0 : return;
2193 : }
2194 :
2195 3616 : lba_count = bs_byte_to_lba(bs, sizeof(*page));
2196 :
2197 3616 : page = &ctx->pages[0];
2198 : /* The first page in the metadata goes where the blobid indicates */
2199 3616 : lba = bs_md_page_to_lba(bs, bs_blobid_to_page(blob->id));
2200 :
2201 3616 : bs_sequence_write_dev(seq, page, lba, lba_count,
2202 : blob_persist_zero_pages, ctx);
2203 : }
2204 :
2205 : static void
2206 3616 : blob_persist_write_page_chain(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
2207 : {
2208 3616 : struct spdk_blob *blob = ctx->blob;
2209 3616 : struct spdk_blob_store *bs = blob->bs;
2210 : uint64_t lba;
2211 : uint32_t lba_count;
2212 : struct spdk_blob_md_page *page;
2213 : spdk_bs_batch_t *batch;
2214 : size_t i;
2215 :
2216 : /* Clusters don't move around in blobs. The list shrinks or grows
2217 : * at the end, but no changes ever occur in the middle of the list.
2218 : */
2219 :
2220 3616 : lba_count = bs_byte_to_lba(bs, sizeof(*page));
2221 :
2222 3616 : batch = bs_sequence_to_batch(seq, blob_persist_write_page_root, ctx);
2223 :
2224 : /* This starts at 1. The root page is not written until
2225 : * all of the others are finished
2226 : */
2227 3704 : for (i = 1; i < blob->active.num_pages; i++) {
2228 88 : page = &ctx->pages[i];
2229 88 : assert(page->sequence_num == i);
2230 :
2231 88 : lba = bs_md_page_to_lba(bs, blob->active.pages[i]);
2232 :
2233 88 : bs_batch_write_dev(batch, page, lba, lba_count);
2234 : }
2235 :
2236 3616 : bs_batch_close(batch);
2237 3616 : }
2238 :
2239 : static int
2240 3576 : blob_resize(struct spdk_blob *blob, uint64_t sz)
2241 : {
2242 : uint64_t i;
2243 : uint64_t *tmp;
2244 3576 : uint64_t cluster;
2245 3576 : uint32_t lfmd; /* lowest free md page */
2246 : uint64_t num_clusters;
2247 : uint32_t *ep_tmp;
2248 3576 : uint64_t new_num_ep = 0, current_num_ep = 0;
2249 : struct spdk_blob_store *bs;
2250 : int rc;
2251 :
2252 3576 : bs = blob->bs;
2253 :
2254 3576 : blob_verify_md_op(blob);
2255 :
2256 3576 : if (blob->active.num_clusters == sz) {
2257 456 : return 0;
2258 : }
2259 :
2260 3120 : if (blob->active.num_clusters < blob->active.cluster_array_size) {
2261 : /* If this blob was resized to be larger, then smaller, then
2262 : * larger without syncing, then the cluster array already
2263 : * contains spare assigned clusters we can use.
2264 : */
2265 0 : num_clusters = spdk_min(blob->active.cluster_array_size,
2266 : sz);
2267 : } else {
2268 3120 : num_clusters = blob->active.num_clusters;
2269 : }
2270 :
2271 3120 : if (blob->use_extent_table) {
2272 : /* Round up since every cluster beyond current Extent Table size,
2273 : * requires new extent page. */
2274 1582 : new_num_ep = spdk_divide_round_up(sz, SPDK_EXTENTS_PER_EP);
2275 1582 : current_num_ep = spdk_divide_round_up(num_clusters, SPDK_EXTENTS_PER_EP);
2276 : }
2277 :
2278 3120 : assert(!spdk_spin_held(&bs->used_lock));
2279 :
2280 : /* Check first that we have enough clusters and md pages before we start claiming them.
2281 : * bs->used_lock is held to ensure that clusters we think are free are still free when we go
2282 : * to claim them later in this function.
2283 : */
2284 3120 : if (sz > num_clusters && spdk_blob_is_thin_provisioned(blob) == false) {
2285 1302 : spdk_spin_lock(&bs->used_lock);
2286 1302 : if ((sz - num_clusters) > bs->num_free_clusters) {
2287 8 : rc = -ENOSPC;
2288 8 : goto out;
2289 : }
2290 1294 : lfmd = 0;
2291 1938 : for (i = current_num_ep; i < new_num_ep ; i++) {
2292 644 : lfmd = spdk_bit_array_find_first_clear(blob->bs->used_md_pages, lfmd);
2293 644 : if (lfmd == UINT32_MAX) {
2294 : /* No more free md pages. Cannot satisfy the request */
2295 0 : rc = -ENOSPC;
2296 0 : goto out;
2297 : }
2298 : }
2299 : }
2300 :
2301 3112 : if (sz > num_clusters) {
2302 : /* Expand the cluster array if necessary.
2303 : * We only shrink the array when persisting.
2304 : */
2305 1706 : tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * sz);
2306 1706 : if (sz > 0 && tmp == NULL) {
2307 0 : rc = -ENOMEM;
2308 0 : goto out;
2309 : }
2310 1706 : memset(tmp + blob->active.cluster_array_size, 0,
2311 1706 : sizeof(*blob->active.clusters) * (sz - blob->active.cluster_array_size));
2312 1706 : blob->active.clusters = tmp;
2313 1706 : blob->active.cluster_array_size = sz;
2314 :
2315 : /* Expand the extents table, only if enough clusters were added */
2316 1706 : if (new_num_ep > current_num_ep && blob->use_extent_table) {
2317 842 : ep_tmp = realloc(blob->active.extent_pages, sizeof(*blob->active.extent_pages) * new_num_ep);
2318 842 : if (new_num_ep > 0 && ep_tmp == NULL) {
2319 0 : rc = -ENOMEM;
2320 0 : goto out;
2321 : }
2322 842 : memset(ep_tmp + blob->active.extent_pages_array_size, 0,
2323 842 : sizeof(*blob->active.extent_pages) * (new_num_ep - blob->active.extent_pages_array_size));
2324 842 : blob->active.extent_pages = ep_tmp;
2325 842 : blob->active.extent_pages_array_size = new_num_ep;
2326 : }
2327 : }
2328 :
2329 3112 : blob->state = SPDK_BLOB_STATE_DIRTY;
2330 :
2331 3112 : if (spdk_blob_is_thin_provisioned(blob) == false) {
2332 2428 : cluster = 0;
2333 2428 : lfmd = 0;
2334 9832 : for (i = num_clusters; i < sz; i++) {
2335 7404 : bs_allocate_cluster(blob, i, &cluster, &lfmd, true);
2336 : /* Do not increment lfmd here. lfmd will get updated
2337 : * to the md_page allocated (if any) when a new extent
2338 : * page is needed. Just pass that value again,
2339 : * bs_allocate_cluster will just start at that index
2340 : * to find the next free md_page when needed.
2341 : */
2342 : }
2343 : }
2344 :
2345 : /* If we are shrinking the blob, we must adjust num_allocated_clusters */
2346 1072199 : for (i = sz; i < num_clusters; i++) {
2347 1069087 : if (blob->active.clusters[i] != 0) {
2348 2343 : blob->active.num_allocated_clusters--;
2349 : }
2350 : }
2351 :
2352 3112 : blob->active.num_clusters = sz;
2353 3112 : blob->active.num_extent_pages = new_num_ep;
2354 :
2355 3112 : rc = 0;
2356 3120 : out:
2357 3120 : if (spdk_spin_held(&bs->used_lock)) {
2358 1302 : spdk_spin_unlock(&bs->used_lock);
2359 : }
2360 :
2361 3120 : return rc;
2362 : }
2363 :
2364 : static void
2365 3616 : blob_persist_generate_new_md(struct spdk_blob_persist_ctx *ctx)
2366 : {
2367 3616 : spdk_bs_sequence_t *seq = ctx->seq;
2368 3616 : struct spdk_blob *blob = ctx->blob;
2369 3616 : struct spdk_blob_store *bs = blob->bs;
2370 : uint64_t i;
2371 : uint32_t page_num;
2372 : void *tmp;
2373 : int rc;
2374 :
2375 : /* Generate the new metadata */
2376 3616 : rc = blob_serialize(blob, &ctx->pages, &blob->active.num_pages);
2377 3616 : if (rc < 0) {
2378 0 : blob_persist_complete(seq, ctx, rc);
2379 0 : return;
2380 : }
2381 :
2382 3616 : assert(blob->active.num_pages >= 1);
2383 :
2384 : /* Resize the cache of page indices */
2385 3616 : tmp = realloc(blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
2386 3616 : if (!tmp) {
2387 0 : blob_persist_complete(seq, ctx, -ENOMEM);
2388 0 : return;
2389 : }
2390 3616 : blob->active.pages = tmp;
2391 :
2392 : /* Assign this metadata to pages. This requires two passes - one to verify that there are
2393 : * enough pages and a second to actually claim them. The used_lock is held across
2394 : * both passes to ensure things don't change in the middle.
2395 : */
2396 3616 : spdk_spin_lock(&bs->used_lock);
2397 3616 : page_num = 0;
2398 : /* Note that this loop starts at one. The first page location is fixed by the blobid. */
2399 3704 : for (i = 1; i < blob->active.num_pages; i++) {
2400 88 : page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
2401 88 : if (page_num == UINT32_MAX) {
2402 0 : spdk_spin_unlock(&bs->used_lock);
2403 0 : blob_persist_complete(seq, ctx, -ENOMEM);
2404 0 : return;
2405 : }
2406 88 : page_num++;
2407 : }
2408 :
2409 3616 : page_num = 0;
2410 3616 : blob->active.pages[0] = bs_blobid_to_page(blob->id);
2411 3704 : for (i = 1; i < blob->active.num_pages; i++) {
2412 88 : page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
2413 88 : ctx->pages[i - 1].next = page_num;
2414 : /* Now that previous metadata page is complete, calculate the crc for it. */
2415 88 : ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
2416 88 : blob->active.pages[i] = page_num;
2417 88 : bs_claim_md_page(bs, page_num);
2418 88 : SPDK_DEBUGLOG(blob, "Claiming page %u for blob 0x%" PRIx64 "\n", page_num,
2419 : blob->id);
2420 88 : page_num++;
2421 : }
2422 3616 : spdk_spin_unlock(&bs->used_lock);
2423 3616 : ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
2424 : /* Start writing the metadata from last page to first */
2425 3616 : blob->state = SPDK_BLOB_STATE_CLEAN;
2426 3616 : blob_persist_write_page_chain(seq, ctx);
2427 : }
2428 :
2429 : static void
2430 2358 : blob_persist_write_extent_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2431 : {
2432 2358 : struct spdk_blob_persist_ctx *ctx = cb_arg;
2433 2358 : struct spdk_blob *blob = ctx->blob;
2434 : size_t i;
2435 : uint32_t extent_page_id;
2436 2358 : uint32_t page_count = 0;
2437 : int rc;
2438 :
2439 2358 : if (ctx->extent_page != NULL) {
2440 666 : spdk_free(ctx->extent_page);
2441 666 : ctx->extent_page = NULL;
2442 : }
2443 :
2444 2358 : if (bserrno != 0) {
2445 0 : blob_persist_complete(seq, ctx, bserrno);
2446 0 : return;
2447 : }
2448 :
2449 : /* Only write out Extent Pages when blob was resized. */
2450 4614 : for (i = ctx->next_extent_page; i < blob->active.extent_pages_array_size; i++) {
2451 2922 : extent_page_id = blob->active.extent_pages[i];
2452 2922 : if (extent_page_id == 0) {
2453 : /* No Extent Page to persist */
2454 2256 : assert(spdk_blob_is_thin_provisioned(blob));
2455 2256 : continue;
2456 : }
2457 666 : assert(spdk_bit_array_get(blob->bs->used_md_pages, extent_page_id));
2458 666 : ctx->next_extent_page = i + 1;
2459 666 : rc = blob_serialize_add_page(ctx->blob, &ctx->extent_page, &page_count, &ctx->extent_page);
2460 666 : if (rc < 0) {
2461 0 : blob_persist_complete(seq, ctx, rc);
2462 0 : return;
2463 : }
2464 :
2465 666 : blob->state = SPDK_BLOB_STATE_DIRTY;
2466 666 : blob_serialize_extent_page(blob, i * SPDK_EXTENTS_PER_EP, ctx->extent_page);
2467 :
2468 666 : ctx->extent_page->crc = blob_md_page_calc_crc(ctx->extent_page);
2469 :
2470 666 : bs_sequence_write_dev(seq, ctx->extent_page, bs_md_page_to_lba(blob->bs, extent_page_id),
2471 666 : bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
2472 : blob_persist_write_extent_pages, ctx);
2473 666 : return;
2474 : }
2475 :
2476 1692 : blob_persist_generate_new_md(ctx);
2477 : }
2478 :
2479 : static void
2480 5116 : blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2481 : {
2482 5116 : struct spdk_blob_persist_ctx *ctx = cb_arg;
2483 5116 : struct spdk_blob *blob = ctx->blob;
2484 :
2485 5116 : if (bserrno != 0) {
2486 8 : blob_persist_complete(seq, ctx, bserrno);
2487 8 : return;
2488 : }
2489 :
2490 5108 : if (blob->active.num_pages == 0) {
2491 : /* This is the signal that the blob should be deleted.
2492 : * Immediately jump to the clean up routine. */
2493 1492 : assert(blob->clean.num_pages > 0);
2494 1492 : blob->state = SPDK_BLOB_STATE_CLEAN;
2495 1492 : blob_persist_zero_pages(seq, ctx, 0);
2496 1492 : return;
2497 :
2498 : }
2499 :
2500 3616 : if (blob->clean.num_clusters < blob->active.num_clusters) {
2501 : /* Blob was resized up */
2502 1678 : assert(blob->clean.num_extent_pages <= blob->active.num_extent_pages);
2503 1678 : ctx->next_extent_page = spdk_max(1, blob->clean.num_extent_pages) - 1;
2504 1938 : } else if (blob->active.num_clusters < blob->active.cluster_array_size) {
2505 : /* Blob was resized down */
2506 14 : assert(blob->clean.num_extent_pages >= blob->active.num_extent_pages);
2507 14 : ctx->next_extent_page = spdk_max(1, blob->active.num_extent_pages) - 1;
2508 : } else {
2509 : /* No change in size occurred */
2510 1924 : blob_persist_generate_new_md(ctx);
2511 1924 : return;
2512 : }
2513 :
2514 1692 : blob_persist_write_extent_pages(seq, ctx, 0);
2515 : }
2516 :
2517 : struct spdk_bs_mark_dirty {
2518 : struct spdk_blob_store *bs;
2519 : struct spdk_bs_super_block *super;
2520 : spdk_bs_sequence_cpl cb_fn;
2521 : void *cb_arg;
2522 : };
2523 :
2524 : static void
2525 158 : bs_mark_dirty_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2526 : {
2527 158 : struct spdk_bs_mark_dirty *ctx = cb_arg;
2528 :
2529 158 : if (bserrno == 0) {
2530 150 : ctx->bs->clean = 0;
2531 : }
2532 :
2533 158 : ctx->cb_fn(seq, ctx->cb_arg, bserrno);
2534 :
2535 158 : spdk_free(ctx->super);
2536 158 : free(ctx);
2537 158 : }
2538 :
2539 : static void bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
2540 : struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg);
2541 :
2542 :
2543 : static void
2544 158 : bs_mark_dirty_write(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2545 : {
2546 158 : struct spdk_bs_mark_dirty *ctx = cb_arg;
2547 : int rc;
2548 :
2549 158 : if (bserrno != 0) {
2550 4 : bs_mark_dirty_write_cpl(seq, ctx, bserrno);
2551 4 : return;
2552 : }
2553 :
2554 154 : rc = bs_super_validate(ctx->super, ctx->bs);
2555 154 : if (rc != 0) {
2556 0 : bs_mark_dirty_write_cpl(seq, ctx, rc);
2557 0 : return;
2558 : }
2559 :
2560 154 : ctx->super->clean = 0;
2561 154 : if (ctx->super->size == 0) {
2562 4 : ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
2563 : }
2564 :
2565 154 : bs_write_super(seq, ctx->bs, ctx->super, bs_mark_dirty_write_cpl, ctx);
2566 : }
2567 :
2568 : static void
2569 5550 : bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
2570 : spdk_bs_sequence_cpl cb_fn, void *cb_arg)
2571 : {
2572 : struct spdk_bs_mark_dirty *ctx;
2573 :
2574 : /* Blobstore is already marked dirty */
2575 5550 : if (bs->clean == 0) {
2576 5392 : cb_fn(seq, cb_arg, 0);
2577 5392 : return;
2578 : }
2579 :
2580 158 : ctx = calloc(1, sizeof(*ctx));
2581 158 : if (!ctx) {
2582 0 : cb_fn(seq, cb_arg, -ENOMEM);
2583 0 : return;
2584 : }
2585 158 : ctx->bs = bs;
2586 158 : ctx->cb_fn = cb_fn;
2587 158 : ctx->cb_arg = cb_arg;
2588 :
2589 158 : ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
2590 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
2591 158 : if (!ctx->super) {
2592 0 : free(ctx);
2593 0 : cb_fn(seq, cb_arg, -ENOMEM);
2594 0 : return;
2595 : }
2596 :
2597 158 : bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
2598 158 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
2599 : bs_mark_dirty_write, ctx);
2600 : }
2601 :
2602 : /* Write a blob to disk */
2603 : static void
2604 9144 : blob_persist(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
2605 : spdk_bs_sequence_cpl cb_fn, void *cb_arg)
2606 : {
2607 : struct spdk_blob_persist_ctx *ctx;
2608 :
2609 9144 : blob_verify_md_op(blob);
2610 :
2611 9144 : if (blob->state == SPDK_BLOB_STATE_CLEAN && TAILQ_EMPTY(&blob->persists_to_complete)) {
2612 4028 : cb_fn(seq, cb_arg, 0);
2613 4028 : return;
2614 : }
2615 :
2616 5116 : ctx = calloc(1, sizeof(*ctx));
2617 5116 : if (!ctx) {
2618 0 : cb_fn(seq, cb_arg, -ENOMEM);
2619 0 : return;
2620 : }
2621 5116 : ctx->blob = blob;
2622 5116 : ctx->seq = seq;
2623 5116 : ctx->cb_fn = cb_fn;
2624 5116 : ctx->cb_arg = cb_arg;
2625 :
2626 : /* Multiple blob persists can affect one another, via blob->state or
2627 : * blob mutable data changes. To prevent it, queue up the persists. */
2628 5116 : if (!TAILQ_EMPTY(&blob->persists_to_complete)) {
2629 23 : TAILQ_INSERT_TAIL(&blob->pending_persists, ctx, link);
2630 23 : return;
2631 : }
2632 5093 : TAILQ_INSERT_HEAD(&blob->persists_to_complete, ctx, link);
2633 :
2634 5093 : bs_mark_dirty(seq, blob->bs, blob_persist_start, ctx);
2635 : }
2636 :
2637 : struct spdk_blob_copy_cluster_ctx {
2638 : struct spdk_blob *blob;
2639 : uint8_t *buf;
2640 : uint64_t page;
2641 : uint64_t new_cluster;
2642 : uint32_t new_extent_page;
2643 : spdk_bs_sequence_t *seq;
2644 : struct spdk_blob_md_page *new_cluster_page;
2645 : };
2646 :
2647 : struct spdk_blob_free_cluster_ctx {
2648 : struct spdk_blob *blob;
2649 : uint64_t page;
2650 : struct spdk_blob_md_page *md_page;
2651 : uint64_t cluster_num;
2652 : uint32_t extent_page;
2653 : spdk_bs_sequence_t *seq;
2654 : };
2655 :
2656 : static void
2657 812 : blob_allocate_and_copy_cluster_cpl(void *cb_arg, int bserrno)
2658 : {
2659 812 : struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
2660 812 : struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)ctx->seq;
2661 812 : TAILQ_HEAD(, spdk_bs_request_set) requests;
2662 : spdk_bs_user_op_t *op;
2663 :
2664 812 : TAILQ_INIT(&requests);
2665 812 : TAILQ_SWAP(&set->channel->need_cluster_alloc, &requests, spdk_bs_request_set, link);
2666 :
2667 1624 : while (!TAILQ_EMPTY(&requests)) {
2668 812 : op = TAILQ_FIRST(&requests);
2669 812 : TAILQ_REMOVE(&requests, op, link);
2670 812 : if (bserrno == 0) {
2671 812 : bs_user_op_execute(op);
2672 : } else {
2673 0 : bs_user_op_abort(op, bserrno);
2674 : }
2675 : }
2676 :
2677 812 : spdk_free(ctx->buf);
2678 812 : free(ctx);
2679 812 : }
2680 :
2681 : static void
2682 60 : blob_free_cluster_cpl(void *cb_arg, int bserrno)
2683 : {
2684 60 : struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
2685 60 : spdk_bs_sequence_t *seq = ctx->seq;
2686 :
2687 60 : bs_sequence_finish(seq, bserrno);
2688 :
2689 60 : free(ctx);
2690 60 : }
2691 :
2692 : static void
2693 4 : blob_insert_cluster_revert(struct spdk_blob_copy_cluster_ctx *ctx)
2694 : {
2695 4 : spdk_spin_lock(&ctx->blob->bs->used_lock);
2696 4 : bs_release_cluster(ctx->blob->bs, ctx->new_cluster);
2697 4 : if (ctx->new_extent_page != 0) {
2698 2 : bs_release_md_page(ctx->blob->bs, ctx->new_extent_page);
2699 : }
2700 4 : spdk_spin_unlock(&ctx->blob->bs->used_lock);
2701 4 : }
2702 :
2703 : static void
2704 4 : blob_insert_cluster_clear_cpl(void *cb_arg, int bserrno)
2705 : {
2706 4 : struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
2707 :
2708 4 : if (bserrno) {
2709 0 : SPDK_WARNLOG("Failed to clear cluster: %d\n", bserrno);
2710 : }
2711 :
2712 4 : blob_insert_cluster_revert(ctx);
2713 4 : bs_sequence_finish(ctx->seq, bserrno);
2714 4 : }
2715 :
2716 : static void
2717 4 : blob_insert_cluster_clear(struct spdk_blob_copy_cluster_ctx *ctx)
2718 : {
2719 4 : struct spdk_bs_cpl cpl;
2720 : spdk_bs_batch_t *batch;
2721 4 : struct spdk_io_channel *ch = spdk_io_channel_from_ctx(ctx->seq->channel);
2722 :
2723 : /*
2724 : * We allocated a cluster and we copied data to it. But now, we realized that we don't need
2725 : * this cluster and we want to release it. We must ensure that we clear the data on this
2726 : * cluster.
2727 : * The cluster may later be re-allocated by a thick-provisioned blob for example. When
2728 : * reading from this thick-provisioned blob before writing data, we should read zeroes.
2729 : */
2730 :
2731 4 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
2732 4 : cpl.u.blob_basic.cb_fn = blob_insert_cluster_clear_cpl;
2733 4 : cpl.u.blob_basic.cb_arg = ctx;
2734 :
2735 4 : batch = bs_batch_open(ch, &cpl, ctx->blob);
2736 4 : if (!batch) {
2737 0 : blob_insert_cluster_clear_cpl(ctx, -ENOMEM);
2738 0 : return;
2739 : }
2740 :
2741 4 : bs_batch_clear_dev(ctx->blob, batch, bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
2742 4 : bs_cluster_to_lba(ctx->blob->bs, 1));
2743 4 : bs_batch_close(batch);
2744 : }
2745 :
2746 : static void
2747 812 : blob_insert_cluster_cpl(void *cb_arg, int bserrno)
2748 : {
2749 812 : struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
2750 :
2751 812 : if (bserrno) {
2752 4 : if (bserrno == -EEXIST) {
2753 : /* The metadata insert failed because another thread
2754 : * allocated the cluster first. Clear and free our cluster
2755 : * but continue without error. */
2756 4 : blob_insert_cluster_clear(ctx);
2757 4 : return;
2758 : }
2759 :
2760 0 : blob_insert_cluster_revert(ctx);
2761 : }
2762 :
2763 808 : bs_sequence_finish(ctx->seq, bserrno);
2764 : }
2765 :
2766 : static void
2767 408 : blob_write_copy_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2768 : {
2769 408 : struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
2770 : uint32_t cluster_number;
2771 :
2772 408 : if (bserrno) {
2773 : /* The write failed, so jump to the final completion handler */
2774 0 : bs_sequence_finish(seq, bserrno);
2775 0 : return;
2776 : }
2777 :
2778 408 : cluster_number = bs_page_to_cluster(ctx->blob->bs, ctx->page);
2779 :
2780 408 : blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
2781 : ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
2782 : }
2783 :
2784 : static void
2785 280 : blob_write_copy(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2786 : {
2787 280 : struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
2788 :
2789 280 : if (bserrno != 0) {
2790 : /* The read failed, so jump to the final completion handler */
2791 0 : bs_sequence_finish(seq, bserrno);
2792 0 : return;
2793 : }
2794 :
2795 : /* Write whole cluster */
2796 280 : bs_sequence_write_dev(seq, ctx->buf,
2797 280 : bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
2798 280 : bs_cluster_to_lba(ctx->blob->bs, 1),
2799 : blob_write_copy_cpl, ctx);
2800 : }
2801 :
2802 : static bool
2803 796 : blob_can_copy(struct spdk_blob *blob, uint32_t cluster_start_page, uint64_t *base_lba)
2804 : {
2805 796 : uint64_t lba = bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page);
2806 :
2807 1146 : return (!blob_is_esnap_clone(blob) && blob->bs->dev->copy != NULL) &&
2808 350 : blob->back_bs_dev->translate_lba(blob->back_bs_dev, lba, base_lba);
2809 : }
2810 :
2811 : static void
2812 128 : blob_copy(struct spdk_blob_copy_cluster_ctx *ctx, spdk_bs_user_op_t *op, uint64_t src_lba)
2813 : {
2814 128 : struct spdk_blob *blob = ctx->blob;
2815 128 : uint64_t lba_count = bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz);
2816 :
2817 128 : bs_sequence_copy_dev(ctx->seq,
2818 128 : bs_cluster_to_lba(blob->bs, ctx->new_cluster),
2819 : src_lba,
2820 : lba_count,
2821 : blob_write_copy_cpl, ctx);
2822 128 : }
2823 :
2824 : static void
2825 812 : bs_allocate_and_copy_cluster(struct spdk_blob *blob,
2826 : struct spdk_io_channel *_ch,
2827 : uint64_t io_unit, spdk_bs_user_op_t *op)
2828 : {
2829 812 : struct spdk_bs_cpl cpl;
2830 : struct spdk_bs_channel *ch;
2831 : struct spdk_blob_copy_cluster_ctx *ctx;
2832 : uint32_t cluster_start_page;
2833 : uint32_t cluster_number;
2834 : bool is_zeroes;
2835 : bool can_copy;
2836 : bool is_valid_range;
2837 812 : uint64_t copy_src_lba;
2838 : int rc;
2839 :
2840 812 : ch = spdk_io_channel_get_ctx(_ch);
2841 :
2842 812 : if (!TAILQ_EMPTY(&ch->need_cluster_alloc)) {
2843 : /* There are already operations pending. Queue this user op
2844 : * and return because it will be re-executed when the outstanding
2845 : * cluster allocation completes. */
2846 0 : TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
2847 0 : return;
2848 : }
2849 :
2850 : /* Round the io_unit offset down to the first page in the cluster */
2851 812 : cluster_start_page = bs_io_unit_to_cluster_start(blob, io_unit);
2852 :
2853 : /* Calculate which index in the metadata cluster array the corresponding
2854 : * cluster is supposed to be at. */
2855 812 : cluster_number = bs_io_unit_to_cluster_number(blob, io_unit);
2856 :
2857 812 : ctx = calloc(1, sizeof(*ctx));
2858 812 : if (!ctx) {
2859 0 : bs_user_op_abort(op, -ENOMEM);
2860 0 : return;
2861 : }
2862 :
2863 812 : assert(blob->bs->cluster_sz % blob->back_bs_dev->blocklen == 0);
2864 :
2865 812 : ctx->blob = blob;
2866 812 : ctx->page = cluster_start_page;
2867 812 : ctx->new_cluster_page = ch->new_cluster_page;
2868 812 : memset(ctx->new_cluster_page, 0, SPDK_BS_PAGE_SIZE);
2869 :
2870 : /* Check if the cluster that we intend to do CoW for is valid for
2871 : * the backing dev. For zeroes backing dev, it'll be always valid.
2872 : * For other backing dev e.g. a snapshot, it could be invalid if
2873 : * the blob has been resized after snapshot was taken. */
2874 812 : is_valid_range = blob->back_bs_dev->is_range_valid(blob->back_bs_dev,
2875 : bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
2876 812 : bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
2877 :
2878 812 : can_copy = is_valid_range && blob_can_copy(blob, cluster_start_page, ©_src_lba);
2879 :
2880 1608 : is_zeroes = is_valid_range && blob->back_bs_dev->is_zeroes(blob->back_bs_dev,
2881 : bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
2882 796 : bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
2883 812 : if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes && !can_copy) {
2884 280 : ctx->buf = spdk_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen,
2885 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
2886 280 : if (!ctx->buf) {
2887 0 : SPDK_ERRLOG("DMA allocation for cluster of size = %" PRIu32 " failed.\n",
2888 : blob->bs->cluster_sz);
2889 0 : free(ctx);
2890 0 : bs_user_op_abort(op, -ENOMEM);
2891 0 : return;
2892 : }
2893 : }
2894 :
2895 812 : spdk_spin_lock(&blob->bs->used_lock);
2896 812 : rc = bs_allocate_cluster(blob, cluster_number, &ctx->new_cluster, &ctx->new_extent_page,
2897 : false);
2898 812 : spdk_spin_unlock(&blob->bs->used_lock);
2899 812 : if (rc != 0) {
2900 0 : spdk_free(ctx->buf);
2901 0 : free(ctx);
2902 0 : bs_user_op_abort(op, rc);
2903 0 : return;
2904 : }
2905 :
2906 812 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
2907 812 : cpl.u.blob_basic.cb_fn = blob_allocate_and_copy_cluster_cpl;
2908 812 : cpl.u.blob_basic.cb_arg = ctx;
2909 :
2910 812 : ctx->seq = bs_sequence_start_blob(_ch, &cpl, blob);
2911 812 : if (!ctx->seq) {
2912 0 : spdk_spin_lock(&blob->bs->used_lock);
2913 0 : bs_release_cluster(blob->bs, ctx->new_cluster);
2914 0 : spdk_spin_unlock(&blob->bs->used_lock);
2915 0 : spdk_free(ctx->buf);
2916 0 : free(ctx);
2917 0 : bs_user_op_abort(op, -ENOMEM);
2918 0 : return;
2919 : }
2920 :
2921 : /* Queue the user op to block other incoming operations */
2922 812 : TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
2923 :
2924 812 : if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes) {
2925 408 : if (can_copy) {
2926 128 : blob_copy(ctx, op, copy_src_lba);
2927 : } else {
2928 : /* Read cluster from backing device */
2929 280 : bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf,
2930 : bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
2931 280 : bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz),
2932 : blob_write_copy, ctx);
2933 : }
2934 :
2935 : } else {
2936 404 : blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
2937 : ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
2938 : }
2939 : }
2940 :
2941 : static inline bool
2942 40206 : blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint64_t length,
2943 : uint64_t *lba, uint64_t *lba_count)
2944 : {
2945 40206 : *lba_count = length;
2946 :
2947 40206 : if (!bs_io_unit_is_allocated(blob, io_unit)) {
2948 2992 : assert(blob->back_bs_dev != NULL);
2949 2992 : *lba = bs_io_unit_to_back_dev_lba(blob, io_unit);
2950 2992 : *lba_count = bs_io_unit_to_back_dev_lba(blob, *lba_count);
2951 2992 : return false;
2952 : } else {
2953 37214 : *lba = bs_blob_io_unit_to_lba(blob, io_unit);
2954 37214 : return true;
2955 : }
2956 : }
2957 :
2958 : struct op_split_ctx {
2959 : struct spdk_blob *blob;
2960 : struct spdk_io_channel *channel;
2961 : uint64_t io_unit_offset;
2962 : uint64_t io_units_remaining;
2963 : void *curr_payload;
2964 : enum spdk_blob_op_type op_type;
2965 : spdk_bs_sequence_t *seq;
2966 : bool in_submit_ctx;
2967 : bool completed_in_submit_ctx;
2968 : bool done;
2969 : };
2970 :
2971 : static void
2972 774 : blob_request_submit_op_split_next(void *cb_arg, int bserrno)
2973 : {
2974 774 : struct op_split_ctx *ctx = cb_arg;
2975 774 : struct spdk_blob *blob = ctx->blob;
2976 774 : struct spdk_io_channel *ch = ctx->channel;
2977 774 : enum spdk_blob_op_type op_type = ctx->op_type;
2978 : uint8_t *buf;
2979 : uint64_t offset;
2980 : uint64_t length;
2981 : uint64_t op_length;
2982 :
2983 774 : if (bserrno != 0 || ctx->io_units_remaining == 0) {
2984 178 : bs_sequence_finish(ctx->seq, bserrno);
2985 178 : if (ctx->in_submit_ctx) {
2986 : /* Defer freeing of the ctx object, since it will be
2987 : * accessed when this unwinds back to the submisison
2988 : * context.
2989 : */
2990 40 : ctx->done = true;
2991 : } else {
2992 138 : free(ctx);
2993 : }
2994 178 : return;
2995 : }
2996 :
2997 596 : if (ctx->in_submit_ctx) {
2998 : /* If this split operation completed in the context
2999 : * of its submission, mark the flag and return immediately
3000 : * to avoid recursion.
3001 : */
3002 68 : ctx->completed_in_submit_ctx = true;
3003 68 : return;
3004 : }
3005 :
3006 : while (true) {
3007 596 : ctx->completed_in_submit_ctx = false;
3008 :
3009 596 : offset = ctx->io_unit_offset;
3010 596 : length = ctx->io_units_remaining;
3011 596 : buf = ctx->curr_payload;
3012 596 : op_length = spdk_min(length, bs_num_io_units_to_cluster_boundary(blob,
3013 : offset));
3014 :
3015 : /* Update length and payload for next operation */
3016 596 : ctx->io_units_remaining -= op_length;
3017 596 : ctx->io_unit_offset += op_length;
3018 596 : if (op_type == SPDK_BLOB_WRITE || op_type == SPDK_BLOB_READ) {
3019 528 : ctx->curr_payload += op_length * blob->bs->io_unit_size;
3020 : }
3021 :
3022 596 : assert(!ctx->in_submit_ctx);
3023 596 : ctx->in_submit_ctx = true;
3024 :
3025 596 : switch (op_type) {
3026 418 : case SPDK_BLOB_READ:
3027 418 : spdk_blob_io_read(blob, ch, buf, offset, op_length,
3028 : blob_request_submit_op_split_next, ctx);
3029 418 : break;
3030 110 : case SPDK_BLOB_WRITE:
3031 110 : spdk_blob_io_write(blob, ch, buf, offset, op_length,
3032 : blob_request_submit_op_split_next, ctx);
3033 110 : break;
3034 36 : case SPDK_BLOB_UNMAP:
3035 36 : spdk_blob_io_unmap(blob, ch, offset, op_length,
3036 : blob_request_submit_op_split_next, ctx);
3037 36 : break;
3038 32 : case SPDK_BLOB_WRITE_ZEROES:
3039 32 : spdk_blob_io_write_zeroes(blob, ch, offset, op_length,
3040 : blob_request_submit_op_split_next, ctx);
3041 32 : break;
3042 0 : case SPDK_BLOB_READV:
3043 : case SPDK_BLOB_WRITEV:
3044 0 : SPDK_ERRLOG("readv/write not valid\n");
3045 0 : bs_sequence_finish(ctx->seq, -EINVAL);
3046 0 : free(ctx);
3047 0 : return;
3048 : }
3049 :
3050 : #ifndef __clang_analyzer__
3051 : /* scan-build reports a false positive around accessing the ctx here. It
3052 : * forms a path that recursively calls this function, but then says
3053 : * "assuming ctx->in_submit_ctx is false", when that isn't possible.
3054 : * This path does free(ctx), returns to here, and reports a use-after-free
3055 : * bug. Wrapping this bit of code so that scan-build doesn't see it
3056 : * works around the scan-build bug.
3057 : */
3058 596 : assert(ctx->in_submit_ctx);
3059 596 : ctx->in_submit_ctx = false;
3060 :
3061 : /* If the operation completed immediately, loop back and submit the
3062 : * next operation. Otherwise we can return and the next split
3063 : * operation will get submitted when this current operation is
3064 : * later completed asynchronously.
3065 : */
3066 596 : if (ctx->completed_in_submit_ctx) {
3067 68 : continue;
3068 528 : } else if (ctx->done) {
3069 40 : free(ctx);
3070 : }
3071 : #endif
3072 528 : break;
3073 : }
3074 : }
3075 :
3076 : static void
3077 178 : blob_request_submit_op_split(struct spdk_io_channel *ch, struct spdk_blob *blob,
3078 : void *payload, uint64_t offset, uint64_t length,
3079 : spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
3080 : {
3081 : struct op_split_ctx *ctx;
3082 : spdk_bs_sequence_t *seq;
3083 178 : struct spdk_bs_cpl cpl;
3084 :
3085 178 : assert(blob != NULL);
3086 :
3087 178 : ctx = calloc(1, sizeof(struct op_split_ctx));
3088 178 : if (ctx == NULL) {
3089 0 : cb_fn(cb_arg, -ENOMEM);
3090 0 : return;
3091 : }
3092 :
3093 178 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
3094 178 : cpl.u.blob_basic.cb_fn = cb_fn;
3095 178 : cpl.u.blob_basic.cb_arg = cb_arg;
3096 :
3097 178 : seq = bs_sequence_start_blob(ch, &cpl, blob);
3098 178 : if (!seq) {
3099 0 : free(ctx);
3100 0 : cb_fn(cb_arg, -ENOMEM);
3101 0 : return;
3102 : }
3103 :
3104 178 : ctx->blob = blob;
3105 178 : ctx->channel = ch;
3106 178 : ctx->curr_payload = payload;
3107 178 : ctx->io_unit_offset = offset;
3108 178 : ctx->io_units_remaining = length;
3109 178 : ctx->op_type = op_type;
3110 178 : ctx->seq = seq;
3111 :
3112 178 : blob_request_submit_op_split_next(ctx, 0);
3113 : }
3114 :
3115 : static void
3116 60 : spdk_free_cluster_unmap_complete(void *cb_arg, int bserrno)
3117 : {
3118 60 : struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
3119 :
3120 60 : if (bserrno) {
3121 0 : bs_sequence_finish(ctx->seq, bserrno);
3122 0 : free(ctx);
3123 0 : return;
3124 : }
3125 :
3126 60 : blob_free_cluster_on_md_thread(ctx->blob, ctx->cluster_num,
3127 : ctx->extent_page, ctx->md_page, blob_free_cluster_cpl, ctx);
3128 : }
3129 :
3130 : static void
3131 37834 : blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blob,
3132 : void *payload, uint64_t offset, uint64_t length,
3133 : spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
3134 : {
3135 37834 : struct spdk_bs_cpl cpl;
3136 37834 : uint64_t lba;
3137 37834 : uint64_t lba_count;
3138 : bool is_allocated;
3139 :
3140 37834 : assert(blob != NULL);
3141 :
3142 37834 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
3143 37834 : cpl.u.blob_basic.cb_fn = cb_fn;
3144 37834 : cpl.u.blob_basic.cb_arg = cb_arg;
3145 :
3146 37834 : if (blob->frozen_refcnt) {
3147 : /* This blob I/O is frozen */
3148 : spdk_bs_user_op_t *op;
3149 4 : struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
3150 :
3151 4 : op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
3152 4 : if (!op) {
3153 0 : cb_fn(cb_arg, -ENOMEM);
3154 0 : return;
3155 : }
3156 :
3157 4 : TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
3158 :
3159 4 : return;
3160 : }
3161 :
3162 37830 : is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
3163 :
3164 37830 : switch (op_type) {
3165 16887 : case SPDK_BLOB_READ: {
3166 : spdk_bs_batch_t *batch;
3167 :
3168 16887 : batch = bs_batch_open(_ch, &cpl, blob);
3169 16887 : if (!batch) {
3170 0 : cb_fn(cb_arg, -ENOMEM);
3171 0 : return;
3172 : }
3173 :
3174 16887 : if (is_allocated) {
3175 : /* Read from the blob */
3176 15799 : bs_batch_read_dev(batch, payload, lba, lba_count);
3177 : } else {
3178 : /* Read from the backing block device */
3179 1088 : bs_batch_read_bs_dev(batch, blob->back_bs_dev, payload, lba, lba_count);
3180 : }
3181 :
3182 16887 : bs_batch_close(batch);
3183 16887 : break;
3184 : }
3185 20851 : case SPDK_BLOB_WRITE:
3186 : case SPDK_BLOB_WRITE_ZEROES: {
3187 20851 : if (is_allocated) {
3188 : /* Write to the blob */
3189 : spdk_bs_batch_t *batch;
3190 :
3191 20507 : if (lba_count == 0) {
3192 0 : cb_fn(cb_arg, 0);
3193 0 : return;
3194 : }
3195 :
3196 20507 : batch = bs_batch_open(_ch, &cpl, blob);
3197 20507 : if (!batch) {
3198 0 : cb_fn(cb_arg, -ENOMEM);
3199 0 : return;
3200 : }
3201 :
3202 20507 : if (op_type == SPDK_BLOB_WRITE) {
3203 20475 : bs_batch_write_dev(batch, payload, lba, lba_count);
3204 : } else {
3205 32 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
3206 : }
3207 :
3208 20507 : bs_batch_close(batch);
3209 : } else {
3210 : /* Queue this operation and allocate the cluster */
3211 : spdk_bs_user_op_t *op;
3212 :
3213 344 : op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
3214 344 : if (!op) {
3215 0 : cb_fn(cb_arg, -ENOMEM);
3216 0 : return;
3217 : }
3218 :
3219 344 : bs_allocate_and_copy_cluster(blob, _ch, offset, op);
3220 : }
3221 20851 : break;
3222 : }
3223 92 : case SPDK_BLOB_UNMAP: {
3224 92 : struct spdk_blob_free_cluster_ctx *ctx = NULL;
3225 : spdk_bs_batch_t *batch;
3226 :
3227 : /* if aligned with cluster release cluster */
3228 160 : if (spdk_blob_is_thin_provisioned(blob) && is_allocated &&
3229 68 : bs_io_units_per_cluster(blob) == length) {
3230 60 : struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
3231 : uint32_t cluster_start_page;
3232 : uint32_t cluster_number;
3233 :
3234 60 : assert(offset % bs_io_units_per_cluster(blob) == 0);
3235 :
3236 : /* Round the io_unit offset down to the first page in the cluster */
3237 60 : cluster_start_page = bs_io_unit_to_cluster_start(blob, offset);
3238 :
3239 : /* Calculate which index in the metadata cluster array the corresponding
3240 : * cluster is supposed to be at. */
3241 60 : cluster_number = bs_io_unit_to_cluster_number(blob, offset);
3242 :
3243 60 : ctx = calloc(1, sizeof(*ctx));
3244 60 : if (!ctx) {
3245 0 : cb_fn(cb_arg, -ENOMEM);
3246 0 : return;
3247 : }
3248 : /* When freeing a cluster the flow should be (in order):
3249 : * 1. Unmap the underlying area (so if the cluster is reclaimed in the future, it won't leak
3250 : * old data)
3251 : * 2. Once the unmap completes (to avoid any races with incoming writes that may claim the
3252 : * cluster), update and sync metadata freeing the cluster
3253 : * 3. Once metadata update is done, complete the user unmap request
3254 : */
3255 60 : ctx->blob = blob;
3256 60 : ctx->page = cluster_start_page;
3257 60 : ctx->cluster_num = cluster_number;
3258 60 : ctx->md_page = bs_channel->new_cluster_page;
3259 60 : ctx->seq = bs_sequence_start_bs(_ch, &cpl);
3260 60 : if (!ctx->seq) {
3261 0 : free(ctx);
3262 0 : cb_fn(cb_arg, -ENOMEM);
3263 0 : return;
3264 : }
3265 :
3266 60 : if (blob->use_extent_table) {
3267 30 : ctx->extent_page = *bs_cluster_to_extent_page(blob, cluster_number);
3268 : }
3269 :
3270 60 : cpl.u.blob_basic.cb_fn = spdk_free_cluster_unmap_complete;
3271 60 : cpl.u.blob_basic.cb_arg = ctx;
3272 : }
3273 :
3274 92 : batch = bs_batch_open(_ch, &cpl, blob);
3275 92 : if (!batch) {
3276 0 : free(ctx);
3277 0 : cb_fn(cb_arg, -ENOMEM);
3278 0 : return;
3279 : }
3280 :
3281 92 : if (is_allocated) {
3282 92 : bs_batch_unmap_dev(batch, lba, lba_count);
3283 : }
3284 :
3285 92 : bs_batch_close(batch);
3286 92 : break;
3287 : }
3288 0 : case SPDK_BLOB_READV:
3289 : case SPDK_BLOB_WRITEV:
3290 0 : SPDK_ERRLOG("readv/write not valid\n");
3291 0 : cb_fn(cb_arg, -EINVAL);
3292 0 : break;
3293 : }
3294 : }
3295 :
3296 : static void
3297 38524 : blob_request_submit_op(struct spdk_blob *blob, struct spdk_io_channel *_channel,
3298 : void *payload, uint64_t offset, uint64_t length,
3299 : spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
3300 : {
3301 38524 : assert(blob != NULL);
3302 :
3303 38524 : if (blob->data_ro && op_type != SPDK_BLOB_READ) {
3304 4 : cb_fn(cb_arg, -EPERM);
3305 4 : return;
3306 : }
3307 :
3308 38520 : if (length == 0) {
3309 492 : cb_fn(cb_arg, 0);
3310 492 : return;
3311 : }
3312 :
3313 38028 : if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
3314 24 : cb_fn(cb_arg, -EINVAL);
3315 24 : return;
3316 : }
3317 38004 : if (length <= bs_num_io_units_to_cluster_boundary(blob, offset)) {
3318 37826 : blob_request_submit_op_single(_channel, blob, payload, offset, length,
3319 : cb_fn, cb_arg, op_type);
3320 : } else {
3321 178 : blob_request_submit_op_split(_channel, blob, payload, offset, length,
3322 : cb_fn, cb_arg, op_type);
3323 : }
3324 : }
3325 :
3326 : struct rw_iov_ctx {
3327 : struct spdk_blob *blob;
3328 : struct spdk_io_channel *channel;
3329 : spdk_blob_op_complete cb_fn;
3330 : void *cb_arg;
3331 : bool read;
3332 : int iovcnt;
3333 : struct iovec *orig_iov;
3334 : uint64_t io_unit_offset;
3335 : uint64_t io_units_remaining;
3336 : uint64_t io_units_done;
3337 : struct spdk_blob_ext_io_opts *ext_io_opts;
3338 : struct iovec iov[0];
3339 : };
3340 :
3341 : static void
3342 2360 : rw_iov_done(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
3343 : {
3344 2360 : assert(cb_arg == NULL);
3345 2360 : bs_sequence_finish(seq, bserrno);
3346 2360 : }
3347 :
3348 : static void
3349 744 : rw_iov_split_next(void *cb_arg, int bserrno)
3350 : {
3351 744 : struct rw_iov_ctx *ctx = cb_arg;
3352 744 : struct spdk_blob *blob = ctx->blob;
3353 : struct iovec *iov, *orig_iov;
3354 : int iovcnt;
3355 : size_t orig_iovoff;
3356 : uint64_t io_units_count, io_units_to_boundary, io_unit_offset;
3357 : uint64_t byte_count;
3358 :
3359 744 : if (bserrno != 0 || ctx->io_units_remaining == 0) {
3360 204 : ctx->cb_fn(ctx->cb_arg, bserrno);
3361 204 : free(ctx);
3362 204 : return;
3363 : }
3364 :
3365 540 : io_unit_offset = ctx->io_unit_offset;
3366 540 : io_units_to_boundary = bs_num_io_units_to_cluster_boundary(blob, io_unit_offset);
3367 540 : io_units_count = spdk_min(ctx->io_units_remaining, io_units_to_boundary);
3368 : /*
3369 : * Get index and offset into the original iov array for our current position in the I/O sequence.
3370 : * byte_count will keep track of how many bytes remaining until orig_iov and orig_iovoff will
3371 : * point to the current position in the I/O sequence.
3372 : */
3373 540 : byte_count = ctx->io_units_done * blob->bs->io_unit_size;
3374 540 : orig_iov = &ctx->orig_iov[0];
3375 540 : orig_iovoff = 0;
3376 1148 : while (byte_count > 0) {
3377 608 : if (byte_count >= orig_iov->iov_len) {
3378 352 : byte_count -= orig_iov->iov_len;
3379 352 : orig_iov++;
3380 : } else {
3381 256 : orig_iovoff = byte_count;
3382 256 : byte_count = 0;
3383 : }
3384 : }
3385 :
3386 : /*
3387 : * Build an iov array for the next I/O in the sequence. byte_count will keep track of how many
3388 : * bytes of this next I/O remain to be accounted for in the new iov array.
3389 : */
3390 540 : byte_count = io_units_count * blob->bs->io_unit_size;
3391 540 : iov = &ctx->iov[0];
3392 540 : iovcnt = 0;
3393 1380 : while (byte_count > 0) {
3394 840 : assert(iovcnt < ctx->iovcnt);
3395 840 : iov->iov_len = spdk_min(byte_count, orig_iov->iov_len - orig_iovoff);
3396 840 : iov->iov_base = orig_iov->iov_base + orig_iovoff;
3397 840 : byte_count -= iov->iov_len;
3398 840 : orig_iovoff = 0;
3399 840 : orig_iov++;
3400 840 : iov++;
3401 840 : iovcnt++;
3402 : }
3403 :
3404 540 : ctx->io_unit_offset += io_units_count;
3405 540 : ctx->io_units_remaining -= io_units_count;
3406 540 : ctx->io_units_done += io_units_count;
3407 540 : iov = &ctx->iov[0];
3408 :
3409 540 : if (ctx->read) {
3410 408 : spdk_blob_io_readv_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
3411 : io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
3412 : } else {
3413 132 : spdk_blob_io_writev_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
3414 : io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
3415 : }
3416 : }
3417 :
3418 : static void
3419 2588 : blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_channel,
3420 : struct iovec *iov, int iovcnt,
3421 : uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg, bool read,
3422 : struct spdk_blob_ext_io_opts *ext_io_opts)
3423 : {
3424 2588 : struct spdk_bs_cpl cpl;
3425 :
3426 2588 : assert(blob != NULL);
3427 :
3428 2588 : if (!read && blob->data_ro) {
3429 4 : cb_fn(cb_arg, -EPERM);
3430 4 : return;
3431 : }
3432 :
3433 2584 : if (length == 0) {
3434 0 : cb_fn(cb_arg, 0);
3435 0 : return;
3436 : }
3437 :
3438 2584 : if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
3439 0 : cb_fn(cb_arg, -EINVAL);
3440 0 : return;
3441 : }
3442 :
3443 : /*
3444 : * For now, we implement readv/writev using a sequence (instead of a batch) to account for having
3445 : * to split a request that spans a cluster boundary. For I/O that do not span a cluster boundary,
3446 : * there will be no noticeable difference compared to using a batch. For I/O that do span a cluster
3447 : * boundary, the target LBAs (after blob offset to LBA translation) may not be contiguous, so we need
3448 : * to allocate a separate iov array and split the I/O such that none of the resulting
3449 : * smaller I/O cross a cluster boundary. These smaller I/O will be issued in sequence (not in parallel)
3450 : * but since this case happens very infrequently, any performance impact will be negligible.
3451 : *
3452 : * This could be optimized in the future to allocate a big enough iov array to account for all of the iovs
3453 : * for all of the smaller I/Os, pre-build all of the iov arrays for the smaller I/Os, then issue them
3454 : * in a batch. That would also require creating an intermediate spdk_bs_cpl that would get called
3455 : * when the batch was completed, to allow for freeing the memory for the iov arrays.
3456 : */
3457 2584 : if (spdk_likely(length <= bs_num_io_units_to_cluster_boundary(blob, offset))) {
3458 2376 : uint64_t lba_count;
3459 2376 : uint64_t lba;
3460 : bool is_allocated;
3461 :
3462 2376 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
3463 2376 : cpl.u.blob_basic.cb_fn = cb_fn;
3464 2376 : cpl.u.blob_basic.cb_arg = cb_arg;
3465 :
3466 2376 : if (blob->frozen_refcnt) {
3467 : /* This blob I/O is frozen */
3468 : enum spdk_blob_op_type op_type;
3469 : spdk_bs_user_op_t *op;
3470 0 : struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_channel);
3471 :
3472 0 : op_type = read ? SPDK_BLOB_READV : SPDK_BLOB_WRITEV;
3473 0 : op = bs_user_op_alloc(_channel, &cpl, op_type, blob, iov, iovcnt, offset, length);
3474 0 : if (!op) {
3475 0 : cb_fn(cb_arg, -ENOMEM);
3476 0 : return;
3477 : }
3478 :
3479 0 : TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
3480 :
3481 0 : return;
3482 : }
3483 :
3484 2376 : is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
3485 :
3486 2376 : if (read) {
3487 : spdk_bs_sequence_t *seq;
3488 :
3489 2084 : seq = bs_sequence_start_blob(_channel, &cpl, blob);
3490 2084 : if (!seq) {
3491 0 : cb_fn(cb_arg, -ENOMEM);
3492 0 : return;
3493 : }
3494 :
3495 2084 : seq->ext_io_opts = ext_io_opts;
3496 :
3497 2084 : if (is_allocated) {
3498 540 : bs_sequence_readv_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
3499 : } else {
3500 1544 : bs_sequence_readv_bs_dev(seq, blob->back_bs_dev, iov, iovcnt, lba, lba_count,
3501 : rw_iov_done, NULL);
3502 : }
3503 : } else {
3504 292 : if (is_allocated) {
3505 : spdk_bs_sequence_t *seq;
3506 :
3507 276 : seq = bs_sequence_start_blob(_channel, &cpl, blob);
3508 276 : if (!seq) {
3509 0 : cb_fn(cb_arg, -ENOMEM);
3510 0 : return;
3511 : }
3512 :
3513 276 : seq->ext_io_opts = ext_io_opts;
3514 :
3515 276 : bs_sequence_writev_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
3516 : } else {
3517 : /* Queue this operation and allocate the cluster */
3518 : spdk_bs_user_op_t *op;
3519 :
3520 16 : op = bs_user_op_alloc(_channel, &cpl, SPDK_BLOB_WRITEV, blob, iov, iovcnt, offset,
3521 : length);
3522 16 : if (!op) {
3523 0 : cb_fn(cb_arg, -ENOMEM);
3524 0 : return;
3525 : }
3526 :
3527 16 : op->ext_io_opts = ext_io_opts;
3528 :
3529 16 : bs_allocate_and_copy_cluster(blob, _channel, offset, op);
3530 : }
3531 : }
3532 : } else {
3533 : struct rw_iov_ctx *ctx;
3534 :
3535 208 : ctx = calloc(1, sizeof(struct rw_iov_ctx) + iovcnt * sizeof(struct iovec));
3536 208 : if (ctx == NULL) {
3537 4 : cb_fn(cb_arg, -ENOMEM);
3538 4 : return;
3539 : }
3540 :
3541 204 : ctx->blob = blob;
3542 204 : ctx->channel = _channel;
3543 204 : ctx->cb_fn = cb_fn;
3544 204 : ctx->cb_arg = cb_arg;
3545 204 : ctx->read = read;
3546 204 : ctx->orig_iov = iov;
3547 204 : ctx->iovcnt = iovcnt;
3548 204 : ctx->io_unit_offset = offset;
3549 204 : ctx->io_units_remaining = length;
3550 204 : ctx->io_units_done = 0;
3551 204 : ctx->ext_io_opts = ext_io_opts;
3552 :
3553 204 : rw_iov_split_next(ctx, 0);
3554 : }
3555 : }
3556 :
3557 : static struct spdk_blob *
3558 7733 : blob_lookup(struct spdk_blob_store *bs, spdk_blob_id blobid)
3559 : {
3560 7733 : struct spdk_blob find;
3561 :
3562 7733 : if (spdk_bit_array_get(bs->open_blobids, blobid) == 0) {
3563 6948 : return NULL;
3564 : }
3565 :
3566 785 : find.id = blobid;
3567 785 : return RB_FIND(spdk_blob_tree, &bs->open_blobs, &find);
3568 : }
3569 :
3570 : static void
3571 1810 : blob_get_snapshot_and_clone_entries(struct spdk_blob *blob,
3572 : struct spdk_blob_list **snapshot_entry, struct spdk_blob_list **clone_entry)
3573 : {
3574 1810 : assert(blob != NULL);
3575 1810 : *snapshot_entry = NULL;
3576 1810 : *clone_entry = NULL;
3577 :
3578 1810 : if (blob->parent_id == SPDK_BLOBID_INVALID) {
3579 1518 : return;
3580 : }
3581 :
3582 440 : TAILQ_FOREACH(*snapshot_entry, &blob->bs->snapshots, link) {
3583 380 : if ((*snapshot_entry)->id == blob->parent_id) {
3584 232 : break;
3585 : }
3586 : }
3587 :
3588 292 : if (*snapshot_entry != NULL) {
3589 276 : TAILQ_FOREACH(*clone_entry, &(*snapshot_entry)->clones, link) {
3590 276 : if ((*clone_entry)->id == blob->id) {
3591 232 : break;
3592 : }
3593 : }
3594 :
3595 232 : assert(*clone_entry != NULL);
3596 : }
3597 : }
3598 :
3599 : static int
3600 796 : bs_channel_create(void *io_device, void *ctx_buf)
3601 : {
3602 796 : struct spdk_blob_store *bs = io_device;
3603 796 : struct spdk_bs_channel *channel = ctx_buf;
3604 : struct spdk_bs_dev *dev;
3605 796 : uint32_t max_ops = bs->max_channel_ops;
3606 : uint32_t i;
3607 :
3608 796 : dev = bs->dev;
3609 :
3610 796 : channel->req_mem = calloc(max_ops, sizeof(struct spdk_bs_request_set));
3611 796 : if (!channel->req_mem) {
3612 0 : return -1;
3613 : }
3614 :
3615 796 : TAILQ_INIT(&channel->reqs);
3616 :
3617 408348 : for (i = 0; i < max_ops; i++) {
3618 407552 : TAILQ_INSERT_TAIL(&channel->reqs, &channel->req_mem[i], link);
3619 : }
3620 :
3621 796 : channel->bs = bs;
3622 796 : channel->dev = dev;
3623 796 : channel->dev_channel = dev->create_channel(dev);
3624 :
3625 796 : if (!channel->dev_channel) {
3626 0 : SPDK_ERRLOG("Failed to create device channel.\n");
3627 0 : free(channel->req_mem);
3628 0 : return -1;
3629 : }
3630 :
3631 796 : channel->new_cluster_page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY,
3632 : SPDK_MALLOC_DMA);
3633 796 : if (!channel->new_cluster_page) {
3634 0 : SPDK_ERRLOG("Failed to allocate new cluster page\n");
3635 0 : free(channel->req_mem);
3636 0 : channel->dev->destroy_channel(channel->dev, channel->dev_channel);
3637 0 : return -1;
3638 : }
3639 :
3640 796 : TAILQ_INIT(&channel->need_cluster_alloc);
3641 796 : TAILQ_INIT(&channel->queued_io);
3642 796 : RB_INIT(&channel->esnap_channels);
3643 :
3644 796 : return 0;
3645 : }
3646 :
3647 : static void
3648 796 : bs_channel_destroy(void *io_device, void *ctx_buf)
3649 : {
3650 796 : struct spdk_bs_channel *channel = ctx_buf;
3651 : spdk_bs_user_op_t *op;
3652 :
3653 796 : while (!TAILQ_EMPTY(&channel->need_cluster_alloc)) {
3654 0 : op = TAILQ_FIRST(&channel->need_cluster_alloc);
3655 0 : TAILQ_REMOVE(&channel->need_cluster_alloc, op, link);
3656 0 : bs_user_op_abort(op, -EIO);
3657 : }
3658 :
3659 796 : while (!TAILQ_EMPTY(&channel->queued_io)) {
3660 0 : op = TAILQ_FIRST(&channel->queued_io);
3661 0 : TAILQ_REMOVE(&channel->queued_io, op, link);
3662 0 : bs_user_op_abort(op, -EIO);
3663 : }
3664 :
3665 796 : blob_esnap_destroy_bs_channel(channel);
3666 :
3667 796 : free(channel->req_mem);
3668 796 : spdk_free(channel->new_cluster_page);
3669 796 : channel->dev->destroy_channel(channel->dev, channel->dev_channel);
3670 796 : }
3671 :
3672 : static void
3673 780 : bs_dev_destroy(void *io_device)
3674 : {
3675 780 : struct spdk_blob_store *bs = io_device;
3676 : struct spdk_blob *blob, *blob_tmp;
3677 :
3678 780 : bs->dev->destroy(bs->dev);
3679 :
3680 780 : RB_FOREACH_SAFE(blob, spdk_blob_tree, &bs->open_blobs, blob_tmp) {
3681 0 : RB_REMOVE(spdk_blob_tree, &bs->open_blobs, blob);
3682 0 : spdk_bit_array_clear(bs->open_blobids, blob->id);
3683 0 : blob_free(blob);
3684 : }
3685 :
3686 780 : spdk_spin_destroy(&bs->used_lock);
3687 :
3688 780 : spdk_bit_array_free(&bs->open_blobids);
3689 780 : spdk_bit_array_free(&bs->used_blobids);
3690 780 : spdk_bit_array_free(&bs->used_md_pages);
3691 780 : spdk_bit_pool_free(&bs->used_clusters);
3692 : /*
3693 : * If this function is called for any reason except a successful unload,
3694 : * the unload_cpl type will be NONE and this will be a nop.
3695 : */
3696 780 : bs_call_cpl(&bs->unload_cpl, bs->unload_err);
3697 :
3698 780 : free(bs);
3699 780 : }
3700 :
3701 : static int
3702 908 : bs_blob_list_add(struct spdk_blob *blob)
3703 : {
3704 : spdk_blob_id snapshot_id;
3705 908 : struct spdk_blob_list *snapshot_entry = NULL;
3706 908 : struct spdk_blob_list *clone_entry = NULL;
3707 :
3708 908 : assert(blob != NULL);
3709 :
3710 908 : snapshot_id = blob->parent_id;
3711 908 : if (snapshot_id == SPDK_BLOBID_INVALID ||
3712 : snapshot_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
3713 492 : return 0;
3714 : }
3715 :
3716 416 : snapshot_entry = bs_get_snapshot_entry(blob->bs, snapshot_id);
3717 416 : if (snapshot_entry == NULL) {
3718 : /* Snapshot not found */
3719 288 : snapshot_entry = calloc(1, sizeof(struct spdk_blob_list));
3720 288 : if (snapshot_entry == NULL) {
3721 0 : return -ENOMEM;
3722 : }
3723 288 : snapshot_entry->id = snapshot_id;
3724 288 : TAILQ_INIT(&snapshot_entry->clones);
3725 288 : TAILQ_INSERT_TAIL(&blob->bs->snapshots, snapshot_entry, link);
3726 : } else {
3727 204 : TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
3728 76 : if (clone_entry->id == blob->id) {
3729 0 : break;
3730 : }
3731 : }
3732 : }
3733 :
3734 416 : if (clone_entry == NULL) {
3735 : /* Clone not found */
3736 416 : clone_entry = calloc(1, sizeof(struct spdk_blob_list));
3737 416 : if (clone_entry == NULL) {
3738 0 : return -ENOMEM;
3739 : }
3740 416 : clone_entry->id = blob->id;
3741 416 : TAILQ_INIT(&clone_entry->clones);
3742 416 : TAILQ_INSERT_TAIL(&snapshot_entry->clones, clone_entry, link);
3743 416 : snapshot_entry->clone_count++;
3744 : }
3745 :
3746 416 : return 0;
3747 : }
3748 :
3749 : static void
3750 1732 : bs_blob_list_remove(struct spdk_blob *blob)
3751 : {
3752 1732 : struct spdk_blob_list *snapshot_entry = NULL;
3753 1732 : struct spdk_blob_list *clone_entry = NULL;
3754 :
3755 1732 : blob_get_snapshot_and_clone_entries(blob, &snapshot_entry, &clone_entry);
3756 :
3757 1732 : if (snapshot_entry == NULL) {
3758 1516 : return;
3759 : }
3760 :
3761 216 : blob->parent_id = SPDK_BLOBID_INVALID;
3762 216 : TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
3763 216 : free(clone_entry);
3764 :
3765 216 : snapshot_entry->clone_count--;
3766 : }
3767 :
3768 : static int
3769 780 : bs_blob_list_free(struct spdk_blob_store *bs)
3770 : {
3771 : struct spdk_blob_list *snapshot_entry;
3772 : struct spdk_blob_list *snapshot_entry_tmp;
3773 : struct spdk_blob_list *clone_entry;
3774 : struct spdk_blob_list *clone_entry_tmp;
3775 :
3776 924 : TAILQ_FOREACH_SAFE(snapshot_entry, &bs->snapshots, link, snapshot_entry_tmp) {
3777 296 : TAILQ_FOREACH_SAFE(clone_entry, &snapshot_entry->clones, link, clone_entry_tmp) {
3778 152 : TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
3779 152 : free(clone_entry);
3780 : }
3781 144 : TAILQ_REMOVE(&bs->snapshots, snapshot_entry, link);
3782 144 : free(snapshot_entry);
3783 : }
3784 :
3785 780 : return 0;
3786 : }
3787 :
3788 : static void
3789 780 : bs_free(struct spdk_blob_store *bs)
3790 : {
3791 780 : bs_blob_list_free(bs);
3792 :
3793 780 : bs_unregister_md_thread(bs);
3794 780 : spdk_io_device_unregister(bs, bs_dev_destroy);
3795 780 : }
3796 :
3797 : void
3798 1048 : spdk_bs_opts_init(struct spdk_bs_opts *opts, size_t opts_size)
3799 : {
3800 :
3801 1048 : if (!opts) {
3802 0 : SPDK_ERRLOG("opts should not be NULL\n");
3803 0 : return;
3804 : }
3805 :
3806 1048 : if (!opts_size) {
3807 0 : SPDK_ERRLOG("opts_size should not be zero value\n");
3808 0 : return;
3809 : }
3810 :
3811 1048 : memset(opts, 0, opts_size);
3812 1048 : opts->opts_size = opts_size;
3813 :
3814 : #define FIELD_OK(field) \
3815 : offsetof(struct spdk_bs_opts, field) + sizeof(opts->field) <= opts_size
3816 :
3817 : #define SET_FIELD(field, value) \
3818 : if (FIELD_OK(field)) { \
3819 : opts->field = value; \
3820 : } \
3821 :
3822 1048 : SET_FIELD(cluster_sz, SPDK_BLOB_OPTS_CLUSTER_SZ);
3823 1048 : SET_FIELD(num_md_pages, SPDK_BLOB_OPTS_NUM_MD_PAGES);
3824 1048 : SET_FIELD(max_md_ops, SPDK_BLOB_OPTS_NUM_MD_PAGES);
3825 1048 : SET_FIELD(max_channel_ops, SPDK_BLOB_OPTS_DEFAULT_CHANNEL_OPS);
3826 1048 : SET_FIELD(clear_method, BS_CLEAR_WITH_UNMAP);
3827 :
3828 1048 : if (FIELD_OK(bstype)) {
3829 1048 : memset(&opts->bstype, 0, sizeof(opts->bstype));
3830 : }
3831 :
3832 1048 : SET_FIELD(iter_cb_fn, NULL);
3833 1048 : SET_FIELD(iter_cb_arg, NULL);
3834 1048 : SET_FIELD(force_recover, false);
3835 1048 : SET_FIELD(esnap_bs_dev_create, NULL);
3836 1048 : SET_FIELD(esnap_ctx, NULL);
3837 :
3838 : #undef FIELD_OK
3839 : #undef SET_FIELD
3840 : }
3841 :
3842 : static int
3843 484 : bs_opts_verify(struct spdk_bs_opts *opts)
3844 : {
3845 484 : if (opts->cluster_sz == 0 || opts->num_md_pages == 0 || opts->max_md_ops == 0 ||
3846 480 : opts->max_channel_ops == 0) {
3847 4 : SPDK_ERRLOG("Blobstore options cannot be set to 0\n");
3848 4 : return -1;
3849 : }
3850 :
3851 480 : return 0;
3852 : }
3853 :
3854 : /* START spdk_bs_load */
3855 :
3856 : /* spdk_bs_load_ctx is used for init, load, unload and dump code paths. */
3857 :
3858 : struct spdk_bs_load_ctx {
3859 : struct spdk_blob_store *bs;
3860 : struct spdk_bs_super_block *super;
3861 :
3862 : struct spdk_bs_md_mask *mask;
3863 : bool in_page_chain;
3864 : uint32_t page_index;
3865 : uint32_t cur_page;
3866 : struct spdk_blob_md_page *page;
3867 :
3868 : uint64_t num_extent_pages;
3869 : uint32_t *extent_page_num;
3870 : struct spdk_blob_md_page *extent_pages;
3871 : struct spdk_bit_array *used_clusters;
3872 :
3873 : spdk_bs_sequence_t *seq;
3874 : spdk_blob_op_with_handle_complete iter_cb_fn;
3875 : void *iter_cb_arg;
3876 : struct spdk_blob *blob;
3877 : spdk_blob_id blobid;
3878 :
3879 : bool force_recover;
3880 :
3881 : /* These fields are used in the spdk_bs_dump path. */
3882 : bool dumping;
3883 : FILE *fp;
3884 : spdk_bs_dump_print_xattr print_xattr_fn;
3885 : char xattr_name[4096];
3886 : };
3887 :
3888 : static int
3889 784 : bs_alloc(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts, struct spdk_blob_store **_bs,
3890 : struct spdk_bs_load_ctx **_ctx)
3891 : {
3892 : struct spdk_blob_store *bs;
3893 : struct spdk_bs_load_ctx *ctx;
3894 : uint64_t dev_size;
3895 : int rc;
3896 :
3897 784 : dev_size = dev->blocklen * dev->blockcnt;
3898 784 : if (dev_size < opts->cluster_sz) {
3899 : /* Device size cannot be smaller than cluster size of blobstore */
3900 0 : SPDK_INFOLOG(blob, "Device size %" PRIu64 " is smaller than cluster size %" PRIu32 "\n",
3901 : dev_size, opts->cluster_sz);
3902 0 : return -ENOSPC;
3903 : }
3904 784 : if (opts->cluster_sz < SPDK_BS_PAGE_SIZE) {
3905 : /* Cluster size cannot be smaller than page size */
3906 4 : SPDK_ERRLOG("Cluster size %" PRIu32 " is smaller than page size %d\n",
3907 : opts->cluster_sz, SPDK_BS_PAGE_SIZE);
3908 4 : return -EINVAL;
3909 : }
3910 780 : bs = calloc(1, sizeof(struct spdk_blob_store));
3911 780 : if (!bs) {
3912 0 : return -ENOMEM;
3913 : }
3914 :
3915 780 : ctx = calloc(1, sizeof(struct spdk_bs_load_ctx));
3916 780 : if (!ctx) {
3917 0 : free(bs);
3918 0 : return -ENOMEM;
3919 : }
3920 :
3921 780 : ctx->bs = bs;
3922 780 : ctx->iter_cb_fn = opts->iter_cb_fn;
3923 780 : ctx->iter_cb_arg = opts->iter_cb_arg;
3924 780 : ctx->force_recover = opts->force_recover;
3925 :
3926 780 : ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
3927 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
3928 780 : if (!ctx->super) {
3929 0 : free(ctx);
3930 0 : free(bs);
3931 0 : return -ENOMEM;
3932 : }
3933 :
3934 780 : RB_INIT(&bs->open_blobs);
3935 780 : TAILQ_INIT(&bs->snapshots);
3936 780 : bs->dev = dev;
3937 780 : bs->md_thread = spdk_get_thread();
3938 780 : assert(bs->md_thread != NULL);
3939 :
3940 : /*
3941 : * Do not use bs_lba_to_cluster() here since blockcnt may not be an
3942 : * even multiple of the cluster size.
3943 : */
3944 780 : bs->cluster_sz = opts->cluster_sz;
3945 780 : bs->total_clusters = dev->blockcnt / (bs->cluster_sz / dev->blocklen);
3946 780 : ctx->used_clusters = spdk_bit_array_create(bs->total_clusters);
3947 780 : if (!ctx->used_clusters) {
3948 0 : spdk_free(ctx->super);
3949 0 : free(ctx);
3950 0 : free(bs);
3951 0 : return -ENOMEM;
3952 : }
3953 :
3954 780 : bs->pages_per_cluster = bs->cluster_sz / SPDK_BS_PAGE_SIZE;
3955 780 : if (spdk_u32_is_pow2(bs->pages_per_cluster)) {
3956 780 : bs->pages_per_cluster_shift = spdk_u32log2(bs->pages_per_cluster);
3957 : }
3958 780 : bs->num_free_clusters = bs->total_clusters;
3959 780 : bs->io_unit_size = dev->blocklen;
3960 :
3961 780 : bs->max_channel_ops = opts->max_channel_ops;
3962 780 : bs->super_blob = SPDK_BLOBID_INVALID;
3963 780 : memcpy(&bs->bstype, &opts->bstype, sizeof(opts->bstype));
3964 780 : bs->esnap_bs_dev_create = opts->esnap_bs_dev_create;
3965 780 : bs->esnap_ctx = opts->esnap_ctx;
3966 :
3967 : /* The metadata is assumed to be at least 1 page */
3968 780 : bs->used_md_pages = spdk_bit_array_create(1);
3969 780 : bs->used_blobids = spdk_bit_array_create(0);
3970 780 : bs->open_blobids = spdk_bit_array_create(0);
3971 :
3972 780 : spdk_spin_init(&bs->used_lock);
3973 :
3974 780 : spdk_io_device_register(bs, bs_channel_create, bs_channel_destroy,
3975 : sizeof(struct spdk_bs_channel), "blobstore");
3976 780 : rc = bs_register_md_thread(bs);
3977 780 : if (rc == -1) {
3978 0 : spdk_io_device_unregister(bs, NULL);
3979 0 : spdk_spin_destroy(&bs->used_lock);
3980 0 : spdk_bit_array_free(&bs->open_blobids);
3981 0 : spdk_bit_array_free(&bs->used_blobids);
3982 0 : spdk_bit_array_free(&bs->used_md_pages);
3983 0 : spdk_bit_array_free(&ctx->used_clusters);
3984 0 : spdk_free(ctx->super);
3985 0 : free(ctx);
3986 0 : free(bs);
3987 : /* FIXME: this is a lie but don't know how to get a proper error code here */
3988 0 : return -ENOMEM;
3989 : }
3990 :
3991 780 : *_ctx = ctx;
3992 780 : *_bs = bs;
3993 780 : return 0;
3994 : }
3995 :
3996 : static void
3997 24 : bs_load_ctx_fail(struct spdk_bs_load_ctx *ctx, int bserrno)
3998 : {
3999 24 : assert(bserrno != 0);
4000 :
4001 24 : spdk_free(ctx->super);
4002 24 : bs_sequence_finish(ctx->seq, bserrno);
4003 24 : bs_free(ctx->bs);
4004 24 : spdk_bit_array_free(&ctx->used_clusters);
4005 24 : free(ctx);
4006 24 : }
4007 :
4008 : static void
4009 824 : bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
4010 : struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg)
4011 : {
4012 : /* Update the values in the super block */
4013 824 : super->super_blob = bs->super_blob;
4014 824 : memcpy(&super->bstype, &bs->bstype, sizeof(bs->bstype));
4015 824 : super->crc = blob_md_page_calc_crc(super);
4016 824 : bs_sequence_write_dev(seq, super, bs_page_to_lba(bs, 0),
4017 824 : bs_byte_to_lba(bs, sizeof(*super)),
4018 : cb_fn, cb_arg);
4019 824 : }
4020 :
4021 : static void
4022 760 : bs_write_used_clusters(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
4023 : {
4024 760 : struct spdk_bs_load_ctx *ctx = arg;
4025 : uint64_t mask_size, lba, lba_count;
4026 :
4027 : /* Write out the used clusters mask */
4028 760 : mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
4029 760 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
4030 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
4031 760 : if (!ctx->mask) {
4032 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4033 0 : return;
4034 : }
4035 :
4036 760 : ctx->mask->type = SPDK_MD_MASK_TYPE_USED_CLUSTERS;
4037 760 : ctx->mask->length = ctx->bs->total_clusters;
4038 : /* We could get here through the normal unload path, or through dirty
4039 : * shutdown recovery. For the normal unload path, we use the mask from
4040 : * the bit pool. For dirty shutdown recovery, we don't have a bit pool yet -
4041 : * only the bit array from the load ctx.
4042 : */
4043 760 : if (ctx->bs->used_clusters) {
4044 654 : assert(ctx->mask->length == spdk_bit_pool_capacity(ctx->bs->used_clusters));
4045 654 : spdk_bit_pool_store_mask(ctx->bs->used_clusters, ctx->mask->mask);
4046 : } else {
4047 106 : assert(ctx->mask->length == spdk_bit_array_capacity(ctx->used_clusters));
4048 106 : spdk_bit_array_store_mask(ctx->used_clusters, ctx->mask->mask);
4049 : }
4050 760 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
4051 760 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
4052 760 : bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
4053 : }
4054 :
4055 : static void
4056 760 : bs_write_used_md(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
4057 : {
4058 760 : struct spdk_bs_load_ctx *ctx = arg;
4059 : uint64_t mask_size, lba, lba_count;
4060 :
4061 760 : mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
4062 760 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
4063 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
4064 760 : if (!ctx->mask) {
4065 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4066 0 : return;
4067 : }
4068 :
4069 760 : ctx->mask->type = SPDK_MD_MASK_TYPE_USED_PAGES;
4070 760 : ctx->mask->length = ctx->super->md_len;
4071 760 : assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_md_pages));
4072 :
4073 760 : spdk_bit_array_store_mask(ctx->bs->used_md_pages, ctx->mask->mask);
4074 760 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
4075 760 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
4076 760 : bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
4077 : }
4078 :
4079 : static void
4080 760 : bs_write_used_blobids(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
4081 : {
4082 760 : struct spdk_bs_load_ctx *ctx = arg;
4083 : uint64_t mask_size, lba, lba_count;
4084 :
4085 760 : if (ctx->super->used_blobid_mask_len == 0) {
4086 : /*
4087 : * This is a pre-v3 on-disk format where the blobid mask does not get
4088 : * written to disk.
4089 : */
4090 24 : cb_fn(seq, arg, 0);
4091 24 : return;
4092 : }
4093 :
4094 736 : mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
4095 736 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
4096 : SPDK_MALLOC_DMA);
4097 736 : if (!ctx->mask) {
4098 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4099 0 : return;
4100 : }
4101 :
4102 736 : ctx->mask->type = SPDK_MD_MASK_TYPE_USED_BLOBIDS;
4103 736 : ctx->mask->length = ctx->super->md_len;
4104 736 : assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_blobids));
4105 :
4106 736 : spdk_bit_array_store_mask(ctx->bs->used_blobids, ctx->mask->mask);
4107 736 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
4108 736 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
4109 736 : bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
4110 : }
4111 :
4112 : static void
4113 704 : blob_set_thin_provision(struct spdk_blob *blob)
4114 : {
4115 704 : blob_verify_md_op(blob);
4116 704 : blob->invalid_flags |= SPDK_BLOB_THIN_PROV;
4117 704 : blob->state = SPDK_BLOB_STATE_DIRTY;
4118 704 : }
4119 :
4120 : static void
4121 2094 : blob_set_clear_method(struct spdk_blob *blob, enum blob_clear_method clear_method)
4122 : {
4123 2094 : blob_verify_md_op(blob);
4124 2094 : blob->clear_method = clear_method;
4125 2094 : blob->md_ro_flags |= (clear_method << SPDK_BLOB_CLEAR_METHOD_SHIFT);
4126 2094 : blob->state = SPDK_BLOB_STATE_DIRTY;
4127 2094 : }
4128 :
4129 : static void bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno);
4130 :
4131 : static void
4132 24 : bs_delete_corrupted_blob_cpl(void *cb_arg, int bserrno)
4133 : {
4134 24 : struct spdk_bs_load_ctx *ctx = cb_arg;
4135 : spdk_blob_id id;
4136 : int64_t page_num;
4137 :
4138 : /* Iterate to next blob (we can't use spdk_bs_iter_next function as our
4139 : * last blob has been removed */
4140 24 : page_num = bs_blobid_to_page(ctx->blobid);
4141 24 : page_num++;
4142 24 : page_num = spdk_bit_array_find_first_set(ctx->bs->used_blobids, page_num);
4143 24 : if (page_num >= spdk_bit_array_capacity(ctx->bs->used_blobids)) {
4144 24 : bs_load_iter(ctx, NULL, -ENOENT);
4145 24 : return;
4146 : }
4147 :
4148 0 : id = bs_page_to_blobid(page_num);
4149 :
4150 0 : spdk_bs_open_blob(ctx->bs, id, bs_load_iter, ctx);
4151 : }
4152 :
4153 : static void
4154 24 : bs_delete_corrupted_close_cb(void *cb_arg, int bserrno)
4155 : {
4156 24 : struct spdk_bs_load_ctx *ctx = cb_arg;
4157 :
4158 24 : if (bserrno != 0) {
4159 0 : SPDK_ERRLOG("Failed to close corrupted blob\n");
4160 0 : spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
4161 0 : return;
4162 : }
4163 :
4164 24 : spdk_bs_delete_blob(ctx->bs, ctx->blobid, bs_delete_corrupted_blob_cpl, ctx);
4165 : }
4166 :
4167 : static void
4168 24 : bs_delete_corrupted_blob(void *cb_arg, int bserrno)
4169 : {
4170 24 : struct spdk_bs_load_ctx *ctx = cb_arg;
4171 : uint64_t i;
4172 :
4173 24 : if (bserrno != 0) {
4174 0 : SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
4175 0 : spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
4176 0 : return;
4177 : }
4178 :
4179 : /* Snapshot and clone have the same copy of cluster map and extent pages
4180 : * at this point. Let's clear both for snapshot now,
4181 : * so that it won't be cleared for clone later when we remove snapshot.
4182 : * Also set thin provision to pass data corruption check */
4183 264 : for (i = 0; i < ctx->blob->active.num_clusters; i++) {
4184 240 : ctx->blob->active.clusters[i] = 0;
4185 : }
4186 36 : for (i = 0; i < ctx->blob->active.num_extent_pages; i++) {
4187 12 : ctx->blob->active.extent_pages[i] = 0;
4188 : }
4189 :
4190 24 : ctx->blob->active.num_allocated_clusters = 0;
4191 :
4192 24 : ctx->blob->md_ro = false;
4193 :
4194 24 : blob_set_thin_provision(ctx->blob);
4195 :
4196 24 : ctx->blobid = ctx->blob->id;
4197 :
4198 24 : spdk_blob_close(ctx->blob, bs_delete_corrupted_close_cb, ctx);
4199 : }
4200 :
4201 : static void
4202 12 : bs_update_corrupted_blob(void *cb_arg, int bserrno)
4203 : {
4204 12 : struct spdk_bs_load_ctx *ctx = cb_arg;
4205 :
4206 12 : if (bserrno != 0) {
4207 0 : SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
4208 0 : spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
4209 0 : return;
4210 : }
4211 :
4212 12 : ctx->blob->md_ro = false;
4213 12 : blob_remove_xattr(ctx->blob, SNAPSHOT_PENDING_REMOVAL, true);
4214 12 : blob_remove_xattr(ctx->blob, SNAPSHOT_IN_PROGRESS, true);
4215 12 : spdk_blob_set_read_only(ctx->blob);
4216 :
4217 12 : if (ctx->iter_cb_fn) {
4218 0 : ctx->iter_cb_fn(ctx->iter_cb_arg, ctx->blob, 0);
4219 : }
4220 12 : bs_blob_list_add(ctx->blob);
4221 :
4222 12 : spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
4223 : }
4224 :
4225 : static void
4226 36 : bs_examine_clone(void *cb_arg, struct spdk_blob *blob, int bserrno)
4227 : {
4228 36 : struct spdk_bs_load_ctx *ctx = cb_arg;
4229 :
4230 36 : if (bserrno != 0) {
4231 0 : SPDK_ERRLOG("Failed to open clone of a corrupted blob\n");
4232 0 : spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
4233 0 : return;
4234 : }
4235 :
4236 36 : if (blob->parent_id == ctx->blob->id) {
4237 : /* Power failure occurred before updating clone (snapshot delete case)
4238 : * or after updating clone (creating snapshot case) - keep snapshot */
4239 12 : spdk_blob_close(blob, bs_update_corrupted_blob, ctx);
4240 : } else {
4241 : /* Power failure occurred after updating clone (snapshot delete case)
4242 : * or before updating clone (creating snapshot case) - remove snapshot */
4243 24 : spdk_blob_close(blob, bs_delete_corrupted_blob, ctx);
4244 : }
4245 : }
4246 :
4247 : static void
4248 720 : bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno)
4249 : {
4250 720 : struct spdk_bs_load_ctx *ctx = arg;
4251 720 : const void *value;
4252 720 : size_t len;
4253 720 : int rc = 0;
4254 :
4255 720 : if (bserrno == 0) {
4256 : /* Examine blob if it is corrupted after power failure. Fix
4257 : * the ones that can be fixed and remove any other corrupted
4258 : * ones. If it is not corrupted just process it */
4259 440 : rc = blob_get_xattr_value(blob, SNAPSHOT_PENDING_REMOVAL, &value, &len, true);
4260 440 : if (rc != 0) {
4261 420 : rc = blob_get_xattr_value(blob, SNAPSHOT_IN_PROGRESS, &value, &len, true);
4262 420 : if (rc != 0) {
4263 : /* Not corrupted - process it and continue with iterating through blobs */
4264 404 : if (ctx->iter_cb_fn) {
4265 34 : ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0);
4266 : }
4267 404 : bs_blob_list_add(blob);
4268 404 : spdk_bs_iter_next(ctx->bs, blob, bs_load_iter, ctx);
4269 404 : return;
4270 : }
4271 :
4272 : }
4273 :
4274 36 : assert(len == sizeof(spdk_blob_id));
4275 :
4276 36 : ctx->blob = blob;
4277 :
4278 : /* Open clone to check if we are able to fix this blob or should we remove it */
4279 36 : spdk_bs_open_blob(ctx->bs, *(spdk_blob_id *)value, bs_examine_clone, ctx);
4280 36 : return;
4281 280 : } else if (bserrno == -ENOENT) {
4282 280 : bserrno = 0;
4283 : } else {
4284 : /*
4285 : * This case needs to be looked at further. Same problem
4286 : * exists with applications that rely on explicit blob
4287 : * iteration. We should just skip the blob that failed
4288 : * to load and continue on to the next one.
4289 : */
4290 0 : SPDK_ERRLOG("Error in iterating blobs\n");
4291 : }
4292 :
4293 280 : ctx->iter_cb_fn = NULL;
4294 :
4295 280 : spdk_free(ctx->super);
4296 280 : spdk_free(ctx->mask);
4297 280 : bs_sequence_finish(ctx->seq, bserrno);
4298 280 : free(ctx);
4299 : }
4300 :
4301 : static void bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg);
4302 :
4303 : static void
4304 280 : bs_load_complete(struct spdk_bs_load_ctx *ctx)
4305 : {
4306 280 : ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
4307 280 : if (ctx->dumping) {
4308 0 : bs_dump_read_md_page(ctx->seq, ctx);
4309 0 : return;
4310 : }
4311 280 : spdk_bs_iter_first(ctx->bs, bs_load_iter, ctx);
4312 : }
4313 :
4314 : static void
4315 174 : bs_load_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4316 : {
4317 174 : struct spdk_bs_load_ctx *ctx = cb_arg;
4318 : int rc;
4319 :
4320 : /* The type must be correct */
4321 174 : assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_BLOBIDS);
4322 :
4323 : /* The length of the mask (in bits) must not be greater than
4324 : * the length of the buffer (converted to bits) */
4325 174 : assert(ctx->mask->length <= (ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE * 8));
4326 :
4327 : /* The length of the mask must be exactly equal to the size
4328 : * (in pages) of the metadata region */
4329 174 : assert(ctx->mask->length == ctx->super->md_len);
4330 :
4331 174 : rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->mask->length);
4332 174 : if (rc < 0) {
4333 0 : spdk_free(ctx->mask);
4334 0 : bs_load_ctx_fail(ctx, rc);
4335 0 : return;
4336 : }
4337 :
4338 174 : spdk_bit_array_load_mask(ctx->bs->used_blobids, ctx->mask->mask);
4339 174 : bs_load_complete(ctx);
4340 : }
4341 :
4342 : static void
4343 174 : bs_load_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4344 : {
4345 174 : struct spdk_bs_load_ctx *ctx = cb_arg;
4346 : uint64_t lba, lba_count, mask_size;
4347 : int rc;
4348 :
4349 174 : if (bserrno != 0) {
4350 0 : bs_load_ctx_fail(ctx, bserrno);
4351 0 : return;
4352 : }
4353 :
4354 : /* The type must be correct */
4355 174 : assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
4356 : /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
4357 174 : assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
4358 : struct spdk_blob_md_page) * 8));
4359 : /*
4360 : * The length of the mask must be equal to or larger than the total number of clusters. It may be
4361 : * larger than the total number of clusters due to a failure spdk_bs_grow.
4362 : */
4363 174 : assert(ctx->mask->length >= ctx->bs->total_clusters);
4364 174 : if (ctx->mask->length > ctx->bs->total_clusters) {
4365 4 : SPDK_WARNLOG("Shrink the used_custers mask length to total_clusters");
4366 4 : ctx->mask->length = ctx->bs->total_clusters;
4367 : }
4368 :
4369 174 : rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->mask->length);
4370 174 : if (rc < 0) {
4371 0 : spdk_free(ctx->mask);
4372 0 : bs_load_ctx_fail(ctx, rc);
4373 0 : return;
4374 : }
4375 :
4376 174 : spdk_bit_array_load_mask(ctx->used_clusters, ctx->mask->mask);
4377 174 : ctx->bs->num_free_clusters = spdk_bit_array_count_clear(ctx->used_clusters);
4378 174 : assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
4379 :
4380 174 : spdk_free(ctx->mask);
4381 :
4382 : /* Read the used blobids mask */
4383 174 : mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
4384 174 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
4385 : SPDK_MALLOC_DMA);
4386 174 : if (!ctx->mask) {
4387 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4388 0 : return;
4389 : }
4390 174 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
4391 174 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
4392 174 : bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
4393 : bs_load_used_blobids_cpl, ctx);
4394 : }
4395 :
4396 : static void
4397 174 : bs_load_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4398 : {
4399 174 : struct spdk_bs_load_ctx *ctx = cb_arg;
4400 : uint64_t lba, lba_count, mask_size;
4401 : int rc;
4402 :
4403 174 : if (bserrno != 0) {
4404 0 : bs_load_ctx_fail(ctx, bserrno);
4405 0 : return;
4406 : }
4407 :
4408 : /* The type must be correct */
4409 174 : assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_PAGES);
4410 : /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
4411 174 : assert(ctx->mask->length <= (ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE *
4412 : 8));
4413 : /* The length of the mask must be exactly equal to the size (in pages) of the metadata region */
4414 174 : if (ctx->mask->length != ctx->super->md_len) {
4415 0 : SPDK_ERRLOG("mismatched md_len in used_pages mask: "
4416 : "mask->length=%" PRIu32 " super->md_len=%" PRIu32 "\n",
4417 : ctx->mask->length, ctx->super->md_len);
4418 0 : assert(false);
4419 : }
4420 :
4421 174 : rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->mask->length);
4422 174 : if (rc < 0) {
4423 0 : spdk_free(ctx->mask);
4424 0 : bs_load_ctx_fail(ctx, rc);
4425 0 : return;
4426 : }
4427 :
4428 174 : spdk_bit_array_load_mask(ctx->bs->used_md_pages, ctx->mask->mask);
4429 174 : spdk_free(ctx->mask);
4430 :
4431 : /* Read the used clusters mask */
4432 174 : mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
4433 174 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
4434 : SPDK_MALLOC_DMA);
4435 174 : if (!ctx->mask) {
4436 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4437 0 : return;
4438 : }
4439 174 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
4440 174 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
4441 174 : bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
4442 : bs_load_used_clusters_cpl, ctx);
4443 : }
4444 :
4445 : static void
4446 174 : bs_load_read_used_pages(struct spdk_bs_load_ctx *ctx)
4447 : {
4448 : uint64_t lba, lba_count, mask_size;
4449 :
4450 : /* Read the used pages mask */
4451 174 : mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
4452 174 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
4453 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
4454 174 : if (!ctx->mask) {
4455 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4456 0 : return;
4457 : }
4458 :
4459 174 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
4460 174 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
4461 174 : bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
4462 : bs_load_used_pages_cpl, ctx);
4463 : }
4464 :
4465 : static int
4466 246 : bs_load_replay_md_parse_page(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_page *page)
4467 : {
4468 246 : struct spdk_blob_store *bs = ctx->bs;
4469 : struct spdk_blob_md_descriptor *desc;
4470 246 : size_t cur_desc = 0;
4471 :
4472 246 : desc = (struct spdk_blob_md_descriptor *)page->descriptors;
4473 718 : while (cur_desc < sizeof(page->descriptors)) {
4474 718 : if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
4475 226 : if (desc->length == 0) {
4476 : /* If padding and length are 0, this terminates the page */
4477 226 : break;
4478 : }
4479 492 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
4480 : struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
4481 : unsigned int i, j;
4482 68 : unsigned int cluster_count = 0;
4483 : uint32_t cluster_idx;
4484 :
4485 68 : desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
4486 :
4487 136 : for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
4488 828 : for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
4489 760 : cluster_idx = desc_extent_rle->extents[i].cluster_idx;
4490 : /*
4491 : * cluster_idx = 0 means an unallocated cluster - don't mark that
4492 : * in the used cluster map.
4493 : */
4494 760 : if (cluster_idx != 0) {
4495 540 : SPDK_NOTICELOG("Recover: cluster %" PRIu32 "\n", cluster_idx + j);
4496 540 : spdk_bit_array_set(ctx->used_clusters, cluster_idx + j);
4497 540 : if (bs->num_free_clusters == 0) {
4498 0 : return -ENOSPC;
4499 : }
4500 540 : bs->num_free_clusters--;
4501 : }
4502 760 : cluster_count++;
4503 : }
4504 : }
4505 68 : if (cluster_count == 0) {
4506 0 : return -EINVAL;
4507 : }
4508 424 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
4509 : struct spdk_blob_md_descriptor_extent_page *desc_extent;
4510 : uint32_t i;
4511 52 : uint32_t cluster_count = 0;
4512 : uint32_t cluster_idx;
4513 : size_t cluster_idx_length;
4514 :
4515 52 : desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
4516 52 : cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
4517 :
4518 52 : if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
4519 52 : (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
4520 0 : return -EINVAL;
4521 : }
4522 :
4523 652 : for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
4524 600 : cluster_idx = desc_extent->cluster_idx[i];
4525 : /*
4526 : * cluster_idx = 0 means an unallocated cluster - don't mark that
4527 : * in the used cluster map.
4528 : */
4529 600 : if (cluster_idx != 0) {
4530 600 : if (cluster_idx < desc_extent->start_cluster_idx &&
4531 0 : cluster_idx >= desc_extent->start_cluster_idx + cluster_count) {
4532 0 : return -EINVAL;
4533 : }
4534 600 : spdk_bit_array_set(ctx->used_clusters, cluster_idx);
4535 600 : if (bs->num_free_clusters == 0) {
4536 0 : return -ENOSPC;
4537 : }
4538 600 : bs->num_free_clusters--;
4539 : }
4540 600 : cluster_count++;
4541 : }
4542 :
4543 52 : if (cluster_count == 0) {
4544 0 : return -EINVAL;
4545 : }
4546 372 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
4547 : /* Skip this item */
4548 296 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
4549 : /* Skip this item */
4550 236 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
4551 : /* Skip this item */
4552 82 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
4553 : struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
4554 82 : uint32_t num_extent_pages = ctx->num_extent_pages;
4555 : uint32_t i;
4556 : size_t extent_pages_length;
4557 : void *tmp;
4558 :
4559 82 : desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
4560 82 : extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
4561 :
4562 82 : if (desc_extent_table->length == 0 ||
4563 82 : (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
4564 0 : return -EINVAL;
4565 : }
4566 :
4567 160 : for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
4568 78 : if (desc_extent_table->extent_page[i].page_idx != 0) {
4569 52 : if (desc_extent_table->extent_page[i].num_pages != 1) {
4570 0 : return -EINVAL;
4571 : }
4572 52 : num_extent_pages += 1;
4573 : }
4574 : }
4575 :
4576 82 : if (num_extent_pages > 0) {
4577 52 : tmp = realloc(ctx->extent_page_num, num_extent_pages * sizeof(uint32_t));
4578 52 : if (tmp == NULL) {
4579 0 : return -ENOMEM;
4580 : }
4581 52 : ctx->extent_page_num = tmp;
4582 :
4583 : /* Extent table entries contain md page numbers for extent pages.
4584 : * Zeroes represent unallocated extent pages, those are run-length-encoded.
4585 : */
4586 104 : for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
4587 52 : if (desc_extent_table->extent_page[i].page_idx != 0) {
4588 52 : ctx->extent_page_num[ctx->num_extent_pages] = desc_extent_table->extent_page[i].page_idx;
4589 52 : ctx->num_extent_pages += 1;
4590 : }
4591 : }
4592 : }
4593 : } else {
4594 : /* Error */
4595 0 : return -EINVAL;
4596 : }
4597 : /* Advance to the next descriptor */
4598 492 : cur_desc += sizeof(*desc) + desc->length;
4599 492 : if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
4600 20 : break;
4601 : }
4602 472 : desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
4603 : }
4604 246 : return 0;
4605 : }
4606 :
4607 : static bool
4608 1296 : bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page)
4609 : {
4610 : uint32_t crc;
4611 1296 : struct spdk_blob_md_descriptor *desc = (struct spdk_blob_md_descriptor *)page->descriptors;
4612 : size_t desc_len;
4613 :
4614 1296 : crc = blob_md_page_calc_crc(page);
4615 1296 : if (crc != page->crc) {
4616 0 : return false;
4617 : }
4618 :
4619 : /* Extent page should always be of sequence num 0. */
4620 1296 : if (page->sequence_num != 0) {
4621 44 : return false;
4622 : }
4623 :
4624 : /* Descriptor type must be EXTENT_PAGE. */
4625 1252 : if (desc->type != SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
4626 154 : return false;
4627 : }
4628 :
4629 : /* Descriptor length cannot exceed the page. */
4630 1098 : desc_len = sizeof(*desc) + desc->length;
4631 1098 : if (desc_len > sizeof(page->descriptors)) {
4632 0 : return false;
4633 : }
4634 :
4635 : /* It has to be the only descriptor in the page. */
4636 1098 : if (desc_len + sizeof(*desc) <= sizeof(page->descriptors)) {
4637 1098 : desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + desc_len);
4638 1098 : if (desc->length != 0) {
4639 0 : return false;
4640 : }
4641 : }
4642 :
4643 1098 : return true;
4644 : }
4645 :
4646 : static bool
4647 6754 : bs_load_cur_md_page_valid(struct spdk_bs_load_ctx *ctx)
4648 : {
4649 : uint32_t crc;
4650 6754 : struct spdk_blob_md_page *page = ctx->page;
4651 :
4652 6754 : crc = blob_md_page_calc_crc(page);
4653 6754 : if (crc != page->crc) {
4654 6538 : return false;
4655 : }
4656 :
4657 : /* First page of a sequence should match the blobid. */
4658 216 : if (page->sequence_num == 0 &&
4659 172 : bs_page_to_blobid(ctx->cur_page) != page->id) {
4660 18 : return false;
4661 : }
4662 198 : assert(bs_load_cur_extent_page_valid(page) == false);
4663 :
4664 198 : return true;
4665 : }
4666 :
4667 : static void bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx);
4668 :
4669 : static void
4670 106 : bs_load_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4671 : {
4672 106 : struct spdk_bs_load_ctx *ctx = cb_arg;
4673 :
4674 106 : if (bserrno != 0) {
4675 0 : bs_load_ctx_fail(ctx, bserrno);
4676 0 : return;
4677 : }
4678 :
4679 106 : bs_load_complete(ctx);
4680 : }
4681 :
4682 : static void
4683 106 : bs_load_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4684 : {
4685 106 : struct spdk_bs_load_ctx *ctx = cb_arg;
4686 :
4687 106 : spdk_free(ctx->mask);
4688 106 : ctx->mask = NULL;
4689 :
4690 106 : if (bserrno != 0) {
4691 0 : bs_load_ctx_fail(ctx, bserrno);
4692 0 : return;
4693 : }
4694 :
4695 106 : bs_write_used_clusters(seq, ctx, bs_load_write_used_clusters_cpl);
4696 : }
4697 :
4698 : static void
4699 106 : bs_load_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4700 : {
4701 106 : struct spdk_bs_load_ctx *ctx = cb_arg;
4702 :
4703 106 : spdk_free(ctx->mask);
4704 106 : ctx->mask = NULL;
4705 :
4706 106 : if (bserrno != 0) {
4707 0 : bs_load_ctx_fail(ctx, bserrno);
4708 0 : return;
4709 : }
4710 :
4711 106 : bs_write_used_blobids(seq, ctx, bs_load_write_used_blobids_cpl);
4712 : }
4713 :
4714 : static void
4715 106 : bs_load_write_used_md(struct spdk_bs_load_ctx *ctx)
4716 : {
4717 106 : bs_write_used_md(ctx->seq, ctx, bs_load_write_used_pages_cpl);
4718 106 : }
4719 :
4720 : static void
4721 6714 : bs_load_replay_md_chain_cpl(struct spdk_bs_load_ctx *ctx)
4722 : {
4723 : uint64_t num_md_clusters;
4724 : uint64_t i;
4725 :
4726 6714 : ctx->in_page_chain = false;
4727 :
4728 : do {
4729 6784 : ctx->page_index++;
4730 6784 : } while (spdk_bit_array_get(ctx->bs->used_md_pages, ctx->page_index) == true);
4731 :
4732 6714 : if (ctx->page_index < ctx->super->md_len) {
4733 6608 : ctx->cur_page = ctx->page_index;
4734 6608 : bs_load_replay_cur_md_page(ctx);
4735 : } else {
4736 : /* Claim all of the clusters used by the metadata */
4737 106 : num_md_clusters = spdk_divide_round_up(
4738 106 : ctx->super->md_start + ctx->super->md_len, ctx->bs->pages_per_cluster);
4739 480 : for (i = 0; i < num_md_clusters; i++) {
4740 374 : spdk_bit_array_set(ctx->used_clusters, i);
4741 : }
4742 106 : ctx->bs->num_free_clusters -= num_md_clusters;
4743 106 : spdk_free(ctx->page);
4744 106 : bs_load_write_used_md(ctx);
4745 : }
4746 6714 : }
4747 :
4748 : static void
4749 52 : bs_load_replay_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4750 : {
4751 52 : struct spdk_bs_load_ctx *ctx = cb_arg;
4752 : uint32_t page_num;
4753 : uint64_t i;
4754 :
4755 52 : if (bserrno != 0) {
4756 0 : spdk_free(ctx->extent_pages);
4757 0 : bs_load_ctx_fail(ctx, bserrno);
4758 0 : return;
4759 : }
4760 :
4761 104 : for (i = 0; i < ctx->num_extent_pages; i++) {
4762 : /* Extent pages are only read when present within in chain md.
4763 : * Integrity of md is not right if that page was not a valid extent page. */
4764 52 : if (bs_load_cur_extent_page_valid(&ctx->extent_pages[i]) != true) {
4765 0 : spdk_free(ctx->extent_pages);
4766 0 : bs_load_ctx_fail(ctx, -EILSEQ);
4767 0 : return;
4768 : }
4769 :
4770 52 : page_num = ctx->extent_page_num[i];
4771 52 : spdk_bit_array_set(ctx->bs->used_md_pages, page_num);
4772 52 : if (bs_load_replay_md_parse_page(ctx, &ctx->extent_pages[i])) {
4773 0 : spdk_free(ctx->extent_pages);
4774 0 : bs_load_ctx_fail(ctx, -EILSEQ);
4775 0 : return;
4776 : }
4777 : }
4778 :
4779 52 : spdk_free(ctx->extent_pages);
4780 52 : free(ctx->extent_page_num);
4781 52 : ctx->extent_page_num = NULL;
4782 52 : ctx->num_extent_pages = 0;
4783 :
4784 52 : bs_load_replay_md_chain_cpl(ctx);
4785 : }
4786 :
4787 : static void
4788 52 : bs_load_replay_extent_pages(struct spdk_bs_load_ctx *ctx)
4789 : {
4790 : spdk_bs_batch_t *batch;
4791 : uint32_t page;
4792 : uint64_t lba;
4793 : uint64_t i;
4794 :
4795 52 : ctx->extent_pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE * ctx->num_extent_pages, 0,
4796 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
4797 52 : if (!ctx->extent_pages) {
4798 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4799 0 : return;
4800 : }
4801 :
4802 52 : batch = bs_sequence_to_batch(ctx->seq, bs_load_replay_extent_page_cpl, ctx);
4803 :
4804 104 : for (i = 0; i < ctx->num_extent_pages; i++) {
4805 52 : page = ctx->extent_page_num[i];
4806 52 : assert(page < ctx->super->md_len);
4807 52 : lba = bs_md_page_to_lba(ctx->bs, page);
4808 52 : bs_batch_read_dev(batch, &ctx->extent_pages[i], lba,
4809 52 : bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE));
4810 : }
4811 :
4812 52 : bs_batch_close(batch);
4813 : }
4814 :
4815 : static void
4816 6754 : bs_load_replay_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4817 : {
4818 6754 : struct spdk_bs_load_ctx *ctx = cb_arg;
4819 : uint32_t page_num;
4820 : struct spdk_blob_md_page *page;
4821 :
4822 6754 : if (bserrno != 0) {
4823 0 : bs_load_ctx_fail(ctx, bserrno);
4824 0 : return;
4825 : }
4826 :
4827 6754 : page_num = ctx->cur_page;
4828 6754 : page = ctx->page;
4829 6754 : if (bs_load_cur_md_page_valid(ctx) == true) {
4830 198 : if (page->sequence_num == 0 || ctx->in_page_chain == true) {
4831 194 : spdk_spin_lock(&ctx->bs->used_lock);
4832 194 : bs_claim_md_page(ctx->bs, page_num);
4833 194 : spdk_spin_unlock(&ctx->bs->used_lock);
4834 194 : if (page->sequence_num == 0) {
4835 154 : SPDK_NOTICELOG("Recover: blob 0x%" PRIx32 "\n", page_num);
4836 154 : spdk_bit_array_set(ctx->bs->used_blobids, page_num);
4837 : }
4838 194 : if (bs_load_replay_md_parse_page(ctx, page)) {
4839 0 : bs_load_ctx_fail(ctx, -EILSEQ);
4840 0 : return;
4841 : }
4842 194 : if (page->next != SPDK_INVALID_MD_PAGE) {
4843 40 : ctx->in_page_chain = true;
4844 40 : ctx->cur_page = page->next;
4845 40 : bs_load_replay_cur_md_page(ctx);
4846 40 : return;
4847 : }
4848 154 : if (ctx->num_extent_pages != 0) {
4849 52 : bs_load_replay_extent_pages(ctx);
4850 52 : return;
4851 : }
4852 : }
4853 : }
4854 6662 : bs_load_replay_md_chain_cpl(ctx);
4855 : }
4856 :
4857 : static void
4858 6754 : bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx)
4859 : {
4860 : uint64_t lba;
4861 :
4862 6754 : assert(ctx->cur_page < ctx->super->md_len);
4863 6754 : lba = bs_md_page_to_lba(ctx->bs, ctx->cur_page);
4864 6754 : bs_sequence_read_dev(ctx->seq, ctx->page, lba,
4865 6754 : bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
4866 : bs_load_replay_md_cpl, ctx);
4867 6754 : }
4868 :
4869 : static void
4870 106 : bs_load_replay_md(struct spdk_bs_load_ctx *ctx)
4871 : {
4872 106 : ctx->page_index = 0;
4873 106 : ctx->cur_page = 0;
4874 106 : ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
4875 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
4876 106 : if (!ctx->page) {
4877 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4878 0 : return;
4879 : }
4880 106 : bs_load_replay_cur_md_page(ctx);
4881 : }
4882 :
4883 : static void
4884 106 : bs_recover(struct spdk_bs_load_ctx *ctx)
4885 : {
4886 : int rc;
4887 :
4888 106 : SPDK_NOTICELOG("Performing recovery on blobstore\n");
4889 106 : rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->super->md_len);
4890 106 : if (rc < 0) {
4891 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4892 0 : return;
4893 : }
4894 :
4895 106 : rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->super->md_len);
4896 106 : if (rc < 0) {
4897 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4898 0 : return;
4899 : }
4900 :
4901 106 : rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
4902 106 : if (rc < 0) {
4903 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4904 0 : return;
4905 : }
4906 :
4907 106 : rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->super->md_len);
4908 106 : if (rc < 0) {
4909 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4910 0 : return;
4911 : }
4912 :
4913 106 : ctx->bs->num_free_clusters = ctx->bs->total_clusters;
4914 106 : bs_load_replay_md(ctx);
4915 : }
4916 :
4917 : static int
4918 276 : bs_parse_super(struct spdk_bs_load_ctx *ctx)
4919 : {
4920 : int rc;
4921 :
4922 276 : if (ctx->super->size == 0) {
4923 8 : ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
4924 : }
4925 :
4926 276 : if (ctx->super->io_unit_size == 0) {
4927 8 : ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
4928 : }
4929 :
4930 276 : ctx->bs->clean = 1;
4931 276 : ctx->bs->cluster_sz = ctx->super->cluster_size;
4932 276 : ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
4933 276 : ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
4934 276 : if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
4935 276 : ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
4936 : }
4937 276 : ctx->bs->io_unit_size = ctx->super->io_unit_size;
4938 276 : rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
4939 276 : if (rc < 0) {
4940 0 : return -ENOMEM;
4941 : }
4942 276 : ctx->bs->md_start = ctx->super->md_start;
4943 276 : ctx->bs->md_len = ctx->super->md_len;
4944 276 : rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
4945 276 : if (rc < 0) {
4946 0 : return -ENOMEM;
4947 : }
4948 :
4949 552 : ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
4950 276 : ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
4951 276 : ctx->bs->super_blob = ctx->super->super_blob;
4952 276 : memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
4953 :
4954 276 : return 0;
4955 : }
4956 :
4957 : static void
4958 300 : bs_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4959 : {
4960 300 : struct spdk_bs_load_ctx *ctx = cb_arg;
4961 : int rc;
4962 :
4963 300 : rc = bs_super_validate(ctx->super, ctx->bs);
4964 300 : if (rc != 0) {
4965 24 : bs_load_ctx_fail(ctx, rc);
4966 24 : return;
4967 : }
4968 :
4969 276 : rc = bs_parse_super(ctx);
4970 276 : if (rc < 0) {
4971 0 : bs_load_ctx_fail(ctx, rc);
4972 0 : return;
4973 : }
4974 :
4975 276 : if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0 || ctx->force_recover) {
4976 106 : bs_recover(ctx);
4977 : } else {
4978 170 : bs_load_read_used_pages(ctx);
4979 : }
4980 : }
4981 :
4982 : static inline int
4983 308 : bs_opts_copy(struct spdk_bs_opts *src, struct spdk_bs_opts *dst)
4984 : {
4985 :
4986 308 : if (!src->opts_size) {
4987 0 : SPDK_ERRLOG("opts_size should not be zero value\n");
4988 0 : return -1;
4989 : }
4990 :
4991 : #define FIELD_OK(field) \
4992 : offsetof(struct spdk_bs_opts, field) + sizeof(src->field) <= src->opts_size
4993 :
4994 : #define SET_FIELD(field) \
4995 : if (FIELD_OK(field)) { \
4996 : dst->field = src->field; \
4997 : } \
4998 :
4999 308 : SET_FIELD(cluster_sz);
5000 308 : SET_FIELD(num_md_pages);
5001 308 : SET_FIELD(max_md_ops);
5002 308 : SET_FIELD(max_channel_ops);
5003 308 : SET_FIELD(clear_method);
5004 :
5005 308 : if (FIELD_OK(bstype)) {
5006 308 : memcpy(&dst->bstype, &src->bstype, sizeof(dst->bstype));
5007 : }
5008 308 : SET_FIELD(iter_cb_fn);
5009 308 : SET_FIELD(iter_cb_arg);
5010 308 : SET_FIELD(force_recover);
5011 308 : SET_FIELD(esnap_bs_dev_create);
5012 308 : SET_FIELD(esnap_ctx);
5013 :
5014 308 : dst->opts_size = src->opts_size;
5015 :
5016 : /* You should not remove this statement, but need to update the assert statement
5017 : * if you add a new field, and also add a corresponding SET_FIELD statement */
5018 : SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_opts) == 88, "Incorrect size");
5019 :
5020 : #undef FIELD_OK
5021 : #undef SET_FIELD
5022 :
5023 308 : return 0;
5024 : }
5025 :
5026 : void
5027 312 : spdk_bs_load(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
5028 : spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
5029 : {
5030 312 : struct spdk_blob_store *bs;
5031 312 : struct spdk_bs_cpl cpl;
5032 312 : struct spdk_bs_load_ctx *ctx;
5033 312 : struct spdk_bs_opts opts = {};
5034 : int err;
5035 :
5036 312 : SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
5037 :
5038 312 : if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
5039 4 : SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
5040 4 : dev->destroy(dev);
5041 4 : cb_fn(cb_arg, NULL, -EINVAL);
5042 4 : return;
5043 : }
5044 :
5045 308 : spdk_bs_opts_init(&opts, sizeof(opts));
5046 308 : if (o) {
5047 122 : if (bs_opts_copy(o, &opts)) {
5048 0 : return;
5049 : }
5050 : }
5051 :
5052 308 : if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
5053 8 : dev->destroy(dev);
5054 8 : cb_fn(cb_arg, NULL, -EINVAL);
5055 8 : return;
5056 : }
5057 :
5058 300 : err = bs_alloc(dev, &opts, &bs, &ctx);
5059 300 : if (err) {
5060 0 : dev->destroy(dev);
5061 0 : cb_fn(cb_arg, NULL, err);
5062 0 : return;
5063 : }
5064 :
5065 300 : cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
5066 300 : cpl.u.bs_handle.cb_fn = cb_fn;
5067 300 : cpl.u.bs_handle.cb_arg = cb_arg;
5068 300 : cpl.u.bs_handle.bs = bs;
5069 :
5070 300 : ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
5071 300 : if (!ctx->seq) {
5072 0 : spdk_free(ctx->super);
5073 0 : free(ctx);
5074 0 : bs_free(bs);
5075 0 : cb_fn(cb_arg, NULL, -ENOMEM);
5076 0 : return;
5077 : }
5078 :
5079 : /* Read the super block */
5080 300 : bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
5081 300 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
5082 : bs_load_super_cpl, ctx);
5083 : }
5084 :
5085 : /* END spdk_bs_load */
5086 :
5087 : /* START spdk_bs_dump */
5088 :
5089 : static void
5090 0 : bs_dump_finish(spdk_bs_sequence_t *seq, struct spdk_bs_load_ctx *ctx, int bserrno)
5091 : {
5092 0 : spdk_free(ctx->super);
5093 :
5094 : /*
5095 : * We need to defer calling bs_call_cpl() until after
5096 : * dev destruction, so tuck these away for later use.
5097 : */
5098 0 : ctx->bs->unload_err = bserrno;
5099 0 : memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
5100 0 : seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
5101 :
5102 0 : bs_sequence_finish(seq, 0);
5103 0 : bs_free(ctx->bs);
5104 0 : free(ctx);
5105 0 : }
5106 :
5107 : static void
5108 0 : bs_dump_print_xattr(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
5109 : {
5110 : struct spdk_blob_md_descriptor_xattr *desc_xattr;
5111 : uint32_t i;
5112 : const char *type;
5113 :
5114 0 : desc_xattr = (struct spdk_blob_md_descriptor_xattr *)desc;
5115 :
5116 0 : if (desc_xattr->length !=
5117 : sizeof(desc_xattr->name_length) + sizeof(desc_xattr->value_length) +
5118 0 : desc_xattr->name_length + desc_xattr->value_length) {
5119 : }
5120 :
5121 0 : memcpy(ctx->xattr_name, desc_xattr->name, desc_xattr->name_length);
5122 0 : ctx->xattr_name[desc_xattr->name_length] = '\0';
5123 0 : if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
5124 0 : type = "XATTR";
5125 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
5126 0 : type = "XATTR_INTERNAL";
5127 : } else {
5128 0 : assert(false);
5129 : type = "XATTR_?";
5130 : }
5131 0 : fprintf(ctx->fp, "%s: name = \"%s\"\n", type, ctx->xattr_name);
5132 0 : fprintf(ctx->fp, " value = \"");
5133 0 : ctx->print_xattr_fn(ctx->fp, ctx->super->bstype.bstype, ctx->xattr_name,
5134 0 : (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
5135 0 : desc_xattr->value_length);
5136 0 : fprintf(ctx->fp, "\"\n");
5137 0 : for (i = 0; i < desc_xattr->value_length; i++) {
5138 0 : if (i % 16 == 0) {
5139 0 : fprintf(ctx->fp, " ");
5140 : }
5141 0 : fprintf(ctx->fp, "%02" PRIx8 " ", *((uint8_t *)desc_xattr->name + desc_xattr->name_length + i));
5142 0 : if ((i + 1) % 16 == 0) {
5143 0 : fprintf(ctx->fp, "\n");
5144 : }
5145 : }
5146 0 : if (i % 16 != 0) {
5147 0 : fprintf(ctx->fp, "\n");
5148 : }
5149 0 : }
5150 :
5151 : struct type_flag_desc {
5152 : uint64_t mask;
5153 : uint64_t val;
5154 : const char *name;
5155 : };
5156 :
5157 : static void
5158 0 : bs_dump_print_type_bits(struct spdk_bs_load_ctx *ctx, uint64_t flags,
5159 : struct type_flag_desc *desc, size_t numflags)
5160 : {
5161 0 : uint64_t covered = 0;
5162 : size_t i;
5163 :
5164 0 : for (i = 0; i < numflags; i++) {
5165 0 : if ((desc[i].mask & flags) != desc[i].val) {
5166 0 : continue;
5167 : }
5168 0 : fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " %s", desc[i].val, desc[i].name);
5169 0 : if (desc[i].mask != desc[i].val) {
5170 0 : fprintf(ctx->fp, " (mask 0x%" PRIx64 " value 0x%" PRIx64 ")",
5171 0 : desc[i].mask, desc[i].val);
5172 : }
5173 0 : fprintf(ctx->fp, "\n");
5174 0 : covered |= desc[i].mask;
5175 : }
5176 0 : if ((flags & ~covered) != 0) {
5177 0 : fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " Unknown\n", flags & ~covered);
5178 : }
5179 0 : }
5180 :
5181 : static void
5182 0 : bs_dump_print_type_flags(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
5183 : {
5184 : struct spdk_blob_md_descriptor_flags *type_desc;
5185 : #define ADD_FLAG(f) { f, f, #f }
5186 : #define ADD_MASK_VAL(m, v) { m, v, #v }
5187 : static struct type_flag_desc invalid[] = {
5188 : ADD_FLAG(SPDK_BLOB_THIN_PROV),
5189 : ADD_FLAG(SPDK_BLOB_INTERNAL_XATTR),
5190 : ADD_FLAG(SPDK_BLOB_EXTENT_TABLE),
5191 : };
5192 : static struct type_flag_desc data_ro[] = {
5193 : ADD_FLAG(SPDK_BLOB_READ_ONLY),
5194 : };
5195 : static struct type_flag_desc md_ro[] = {
5196 : ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_DEFAULT),
5197 : ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_NONE),
5198 : ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_UNMAP),
5199 : ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_WRITE_ZEROES),
5200 : };
5201 : #undef ADD_FLAG
5202 : #undef ADD_MASK_VAL
5203 :
5204 0 : type_desc = (struct spdk_blob_md_descriptor_flags *)desc;
5205 0 : fprintf(ctx->fp, "Flags:\n");
5206 0 : fprintf(ctx->fp, "\tinvalid: 0x%016" PRIx64 "\n", type_desc->invalid_flags);
5207 0 : bs_dump_print_type_bits(ctx, type_desc->invalid_flags, invalid,
5208 : SPDK_COUNTOF(invalid));
5209 0 : fprintf(ctx->fp, "\tdata_ro: 0x%016" PRIx64 "\n", type_desc->data_ro_flags);
5210 0 : bs_dump_print_type_bits(ctx, type_desc->data_ro_flags, data_ro,
5211 : SPDK_COUNTOF(data_ro));
5212 0 : fprintf(ctx->fp, "\t md_ro: 0x%016" PRIx64 "\n", type_desc->md_ro_flags);
5213 0 : bs_dump_print_type_bits(ctx, type_desc->md_ro_flags, md_ro,
5214 : SPDK_COUNTOF(md_ro));
5215 0 : }
5216 :
5217 : static void
5218 0 : bs_dump_print_extent_table(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
5219 : {
5220 : struct spdk_blob_md_descriptor_extent_table *et_desc;
5221 : uint64_t num_extent_pages;
5222 : uint32_t et_idx;
5223 :
5224 0 : et_desc = (struct spdk_blob_md_descriptor_extent_table *)desc;
5225 0 : num_extent_pages = (et_desc->length - sizeof(et_desc->num_clusters)) /
5226 : sizeof(et_desc->extent_page[0]);
5227 :
5228 0 : fprintf(ctx->fp, "Extent table:\n");
5229 0 : for (et_idx = 0; et_idx < num_extent_pages; et_idx++) {
5230 0 : if (et_desc->extent_page[et_idx].page_idx == 0) {
5231 : /* Zeroes represent unallocated extent pages. */
5232 0 : continue;
5233 : }
5234 0 : fprintf(ctx->fp, "\tExtent page: %5" PRIu32 " length %3" PRIu32
5235 : " at LBA %" PRIu64 "\n", et_desc->extent_page[et_idx].page_idx,
5236 : et_desc->extent_page[et_idx].num_pages,
5237 : bs_md_page_to_lba(ctx->bs, et_desc->extent_page[et_idx].page_idx));
5238 : }
5239 0 : }
5240 :
5241 : static void
5242 0 : bs_dump_print_md_page(struct spdk_bs_load_ctx *ctx)
5243 : {
5244 0 : uint32_t page_idx = ctx->cur_page;
5245 0 : struct spdk_blob_md_page *page = ctx->page;
5246 : struct spdk_blob_md_descriptor *desc;
5247 0 : size_t cur_desc = 0;
5248 : uint32_t crc;
5249 :
5250 0 : fprintf(ctx->fp, "=========\n");
5251 0 : fprintf(ctx->fp, "Metadata Page Index: %" PRIu32 " (0x%" PRIx32 ")\n", page_idx, page_idx);
5252 0 : fprintf(ctx->fp, "Start LBA: %" PRIu64 "\n", bs_md_page_to_lba(ctx->bs, page_idx));
5253 0 : fprintf(ctx->fp, "Blob ID: 0x%" PRIx64 "\n", page->id);
5254 0 : fprintf(ctx->fp, "Sequence: %" PRIu32 "\n", page->sequence_num);
5255 0 : if (page->next == SPDK_INVALID_MD_PAGE) {
5256 0 : fprintf(ctx->fp, "Next: None\n");
5257 : } else {
5258 0 : fprintf(ctx->fp, "Next: %" PRIu32 "\n", page->next);
5259 : }
5260 0 : fprintf(ctx->fp, "In used bit array%s:", ctx->super->clean ? "" : " (not clean: dubious)");
5261 0 : if (spdk_bit_array_get(ctx->bs->used_md_pages, page_idx)) {
5262 0 : fprintf(ctx->fp, " md");
5263 : }
5264 0 : if (spdk_bit_array_get(ctx->bs->used_blobids, page_idx)) {
5265 0 : fprintf(ctx->fp, " blob");
5266 : }
5267 0 : fprintf(ctx->fp, "\n");
5268 :
5269 0 : crc = blob_md_page_calc_crc(page);
5270 0 : fprintf(ctx->fp, "CRC: 0x%" PRIx32 " (%s)\n", page->crc, crc == page->crc ? "OK" : "Mismatch");
5271 :
5272 0 : desc = (struct spdk_blob_md_descriptor *)page->descriptors;
5273 0 : while (cur_desc < sizeof(page->descriptors)) {
5274 0 : if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
5275 0 : if (desc->length == 0) {
5276 : /* If padding and length are 0, this terminates the page */
5277 0 : break;
5278 : }
5279 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
5280 : struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
5281 : unsigned int i;
5282 :
5283 0 : desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
5284 :
5285 0 : for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
5286 0 : if (desc_extent_rle->extents[i].cluster_idx != 0) {
5287 0 : fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
5288 : desc_extent_rle->extents[i].cluster_idx);
5289 : } else {
5290 0 : fprintf(ctx->fp, "Unallocated Extent - ");
5291 : }
5292 0 : fprintf(ctx->fp, " Length: %" PRIu32, desc_extent_rle->extents[i].length);
5293 0 : fprintf(ctx->fp, "\n");
5294 : }
5295 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
5296 : struct spdk_blob_md_descriptor_extent_page *desc_extent;
5297 : unsigned int i;
5298 :
5299 0 : desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
5300 :
5301 0 : for (i = 0; i < desc_extent->length / sizeof(desc_extent->cluster_idx[0]); i++) {
5302 0 : if (desc_extent->cluster_idx[i] != 0) {
5303 0 : fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
5304 : desc_extent->cluster_idx[i]);
5305 : } else {
5306 0 : fprintf(ctx->fp, "Unallocated Extent");
5307 : }
5308 0 : fprintf(ctx->fp, "\n");
5309 : }
5310 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
5311 0 : bs_dump_print_xattr(ctx, desc);
5312 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
5313 0 : bs_dump_print_xattr(ctx, desc);
5314 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
5315 0 : bs_dump_print_type_flags(ctx, desc);
5316 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
5317 0 : bs_dump_print_extent_table(ctx, desc);
5318 : } else {
5319 : /* Error */
5320 0 : fprintf(ctx->fp, "Unknown descriptor type %" PRIu8 "\n", desc->type);
5321 : }
5322 : /* Advance to the next descriptor */
5323 0 : cur_desc += sizeof(*desc) + desc->length;
5324 0 : if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
5325 0 : break;
5326 : }
5327 0 : desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
5328 : }
5329 0 : }
5330 :
5331 : static void
5332 0 : bs_dump_read_md_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5333 : {
5334 0 : struct spdk_bs_load_ctx *ctx = cb_arg;
5335 :
5336 0 : if (bserrno != 0) {
5337 0 : bs_dump_finish(seq, ctx, bserrno);
5338 0 : return;
5339 : }
5340 :
5341 0 : if (ctx->page->id != 0) {
5342 0 : bs_dump_print_md_page(ctx);
5343 : }
5344 :
5345 0 : ctx->cur_page++;
5346 :
5347 0 : if (ctx->cur_page < ctx->super->md_len) {
5348 0 : bs_dump_read_md_page(seq, ctx);
5349 : } else {
5350 0 : spdk_free(ctx->page);
5351 0 : bs_dump_finish(seq, ctx, 0);
5352 : }
5353 : }
5354 :
5355 : static void
5356 0 : bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg)
5357 : {
5358 0 : struct spdk_bs_load_ctx *ctx = cb_arg;
5359 : uint64_t lba;
5360 :
5361 0 : assert(ctx->cur_page < ctx->super->md_len);
5362 0 : lba = bs_page_to_lba(ctx->bs, ctx->super->md_start + ctx->cur_page);
5363 0 : bs_sequence_read_dev(seq, ctx->page, lba,
5364 0 : bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
5365 : bs_dump_read_md_page_cpl, ctx);
5366 0 : }
5367 :
5368 : static void
5369 0 : bs_dump_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5370 : {
5371 0 : struct spdk_bs_load_ctx *ctx = cb_arg;
5372 : int rc;
5373 :
5374 0 : fprintf(ctx->fp, "Signature: \"%.8s\" ", ctx->super->signature);
5375 0 : if (memcmp(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
5376 : sizeof(ctx->super->signature)) != 0) {
5377 0 : fprintf(ctx->fp, "(Mismatch)\n");
5378 0 : bs_dump_finish(seq, ctx, bserrno);
5379 0 : return;
5380 : } else {
5381 0 : fprintf(ctx->fp, "(OK)\n");
5382 : }
5383 0 : fprintf(ctx->fp, "Version: %" PRIu32 "\n", ctx->super->version);
5384 0 : fprintf(ctx->fp, "CRC: 0x%x (%s)\n", ctx->super->crc,
5385 0 : (ctx->super->crc == blob_md_page_calc_crc(ctx->super)) ? "OK" : "Mismatch");
5386 0 : fprintf(ctx->fp, "Blobstore Type: %.*s\n", SPDK_BLOBSTORE_TYPE_LENGTH, ctx->super->bstype.bstype);
5387 0 : fprintf(ctx->fp, "Cluster Size: %" PRIu32 "\n", ctx->super->cluster_size);
5388 0 : fprintf(ctx->fp, "Super Blob ID: ");
5389 0 : if (ctx->super->super_blob == SPDK_BLOBID_INVALID) {
5390 0 : fprintf(ctx->fp, "(None)\n");
5391 : } else {
5392 0 : fprintf(ctx->fp, "0x%" PRIx64 "\n", ctx->super->super_blob);
5393 : }
5394 0 : fprintf(ctx->fp, "Clean: %" PRIu32 "\n", ctx->super->clean);
5395 0 : fprintf(ctx->fp, "Used Metadata Page Mask Start: %" PRIu32 "\n", ctx->super->used_page_mask_start);
5396 0 : fprintf(ctx->fp, "Used Metadata Page Mask Length: %" PRIu32 "\n", ctx->super->used_page_mask_len);
5397 0 : fprintf(ctx->fp, "Used Cluster Mask Start: %" PRIu32 "\n", ctx->super->used_cluster_mask_start);
5398 0 : fprintf(ctx->fp, "Used Cluster Mask Length: %" PRIu32 "\n", ctx->super->used_cluster_mask_len);
5399 0 : fprintf(ctx->fp, "Used Blob ID Mask Start: %" PRIu32 "\n", ctx->super->used_blobid_mask_start);
5400 0 : fprintf(ctx->fp, "Used Blob ID Mask Length: %" PRIu32 "\n", ctx->super->used_blobid_mask_len);
5401 0 : fprintf(ctx->fp, "Metadata Start: %" PRIu32 "\n", ctx->super->md_start);
5402 0 : fprintf(ctx->fp, "Metadata Length: %" PRIu32 "\n", ctx->super->md_len);
5403 :
5404 0 : ctx->cur_page = 0;
5405 0 : ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
5406 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
5407 0 : if (!ctx->page) {
5408 0 : bs_dump_finish(seq, ctx, -ENOMEM);
5409 0 : return;
5410 : }
5411 :
5412 0 : rc = bs_parse_super(ctx);
5413 0 : if (rc < 0) {
5414 0 : bs_load_ctx_fail(ctx, rc);
5415 0 : return;
5416 : }
5417 :
5418 0 : bs_load_read_used_pages(ctx);
5419 : }
5420 :
5421 : void
5422 0 : spdk_bs_dump(struct spdk_bs_dev *dev, FILE *fp, spdk_bs_dump_print_xattr print_xattr_fn,
5423 : spdk_bs_op_complete cb_fn, void *cb_arg)
5424 : {
5425 0 : struct spdk_blob_store *bs;
5426 0 : struct spdk_bs_cpl cpl;
5427 0 : struct spdk_bs_load_ctx *ctx;
5428 0 : struct spdk_bs_opts opts = {};
5429 : int err;
5430 :
5431 0 : SPDK_DEBUGLOG(blob, "Dumping blobstore from dev %p\n", dev);
5432 :
5433 0 : spdk_bs_opts_init(&opts, sizeof(opts));
5434 :
5435 0 : err = bs_alloc(dev, &opts, &bs, &ctx);
5436 0 : if (err) {
5437 0 : dev->destroy(dev);
5438 0 : cb_fn(cb_arg, err);
5439 0 : return;
5440 : }
5441 :
5442 0 : ctx->dumping = true;
5443 0 : ctx->fp = fp;
5444 0 : ctx->print_xattr_fn = print_xattr_fn;
5445 :
5446 0 : cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
5447 0 : cpl.u.bs_basic.cb_fn = cb_fn;
5448 0 : cpl.u.bs_basic.cb_arg = cb_arg;
5449 :
5450 0 : ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
5451 0 : if (!ctx->seq) {
5452 0 : spdk_free(ctx->super);
5453 0 : free(ctx);
5454 0 : bs_free(bs);
5455 0 : cb_fn(cb_arg, -ENOMEM);
5456 0 : return;
5457 : }
5458 :
5459 : /* Read the super block */
5460 0 : bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
5461 0 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
5462 : bs_dump_super_cpl, ctx);
5463 : }
5464 :
5465 : /* END spdk_bs_dump */
5466 :
5467 : /* START spdk_bs_init */
5468 :
5469 : static void
5470 472 : bs_init_persist_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5471 : {
5472 472 : struct spdk_bs_load_ctx *ctx = cb_arg;
5473 :
5474 472 : ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
5475 472 : spdk_free(ctx->super);
5476 472 : free(ctx);
5477 :
5478 472 : bs_sequence_finish(seq, bserrno);
5479 472 : }
5480 :
5481 : static void
5482 472 : bs_init_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5483 : {
5484 472 : struct spdk_bs_load_ctx *ctx = cb_arg;
5485 :
5486 : /* Write super block */
5487 472 : bs_sequence_write_dev(seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
5488 472 : bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
5489 : bs_init_persist_super_cpl, ctx);
5490 472 : }
5491 :
5492 : void
5493 488 : spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
5494 : spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
5495 : {
5496 488 : struct spdk_bs_load_ctx *ctx;
5497 488 : struct spdk_blob_store *bs;
5498 488 : struct spdk_bs_cpl cpl;
5499 : spdk_bs_sequence_t *seq;
5500 : spdk_bs_batch_t *batch;
5501 : uint64_t num_md_lba;
5502 : uint64_t num_md_pages;
5503 : uint64_t num_md_clusters;
5504 : uint64_t max_used_cluster_mask_len;
5505 : uint32_t i;
5506 488 : struct spdk_bs_opts opts = {};
5507 : int rc;
5508 : uint64_t lba, lba_count;
5509 :
5510 488 : SPDK_DEBUGLOG(blob, "Initializing blobstore on dev %p\n", dev);
5511 :
5512 488 : if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
5513 4 : SPDK_ERRLOG("unsupported dev block length of %d\n",
5514 : dev->blocklen);
5515 4 : dev->destroy(dev);
5516 4 : cb_fn(cb_arg, NULL, -EINVAL);
5517 4 : return;
5518 : }
5519 :
5520 484 : spdk_bs_opts_init(&opts, sizeof(opts));
5521 484 : if (o) {
5522 182 : if (bs_opts_copy(o, &opts)) {
5523 0 : return;
5524 : }
5525 : }
5526 :
5527 484 : if (bs_opts_verify(&opts) != 0) {
5528 4 : dev->destroy(dev);
5529 4 : cb_fn(cb_arg, NULL, -EINVAL);
5530 4 : return;
5531 : }
5532 :
5533 480 : rc = bs_alloc(dev, &opts, &bs, &ctx);
5534 480 : if (rc) {
5535 4 : dev->destroy(dev);
5536 4 : cb_fn(cb_arg, NULL, rc);
5537 4 : return;
5538 : }
5539 :
5540 476 : if (opts.num_md_pages == SPDK_BLOB_OPTS_NUM_MD_PAGES) {
5541 : /* By default, allocate 1 page per cluster.
5542 : * Technically, this over-allocates metadata
5543 : * because more metadata will reduce the number
5544 : * of usable clusters. This can be addressed with
5545 : * more complex math in the future.
5546 : */
5547 468 : bs->md_len = bs->total_clusters;
5548 : } else {
5549 8 : bs->md_len = opts.num_md_pages;
5550 : }
5551 476 : rc = spdk_bit_array_resize(&bs->used_md_pages, bs->md_len);
5552 476 : if (rc < 0) {
5553 0 : spdk_free(ctx->super);
5554 0 : free(ctx);
5555 0 : bs_free(bs);
5556 0 : cb_fn(cb_arg, NULL, -ENOMEM);
5557 0 : return;
5558 : }
5559 :
5560 476 : rc = spdk_bit_array_resize(&bs->used_blobids, bs->md_len);
5561 476 : if (rc < 0) {
5562 0 : spdk_free(ctx->super);
5563 0 : free(ctx);
5564 0 : bs_free(bs);
5565 0 : cb_fn(cb_arg, NULL, -ENOMEM);
5566 0 : return;
5567 : }
5568 :
5569 476 : rc = spdk_bit_array_resize(&bs->open_blobids, bs->md_len);
5570 476 : if (rc < 0) {
5571 0 : spdk_free(ctx->super);
5572 0 : free(ctx);
5573 0 : bs_free(bs);
5574 0 : cb_fn(cb_arg, NULL, -ENOMEM);
5575 0 : return;
5576 : }
5577 :
5578 476 : memcpy(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
5579 : sizeof(ctx->super->signature));
5580 476 : ctx->super->version = SPDK_BS_VERSION;
5581 476 : ctx->super->length = sizeof(*ctx->super);
5582 476 : ctx->super->super_blob = bs->super_blob;
5583 476 : ctx->super->clean = 0;
5584 476 : ctx->super->cluster_size = bs->cluster_sz;
5585 476 : ctx->super->io_unit_size = bs->io_unit_size;
5586 476 : memcpy(&ctx->super->bstype, &bs->bstype, sizeof(bs->bstype));
5587 :
5588 : /* Calculate how many pages the metadata consumes at the front
5589 : * of the disk.
5590 : */
5591 :
5592 : /* The super block uses 1 page */
5593 476 : num_md_pages = 1;
5594 :
5595 : /* The used_md_pages mask requires 1 bit per metadata page, rounded
5596 : * up to the nearest page, plus a header.
5597 : */
5598 476 : ctx->super->used_page_mask_start = num_md_pages;
5599 476 : ctx->super->used_page_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
5600 476 : spdk_divide_round_up(bs->md_len, 8),
5601 : SPDK_BS_PAGE_SIZE);
5602 476 : num_md_pages += ctx->super->used_page_mask_len;
5603 :
5604 : /* The used_clusters mask requires 1 bit per cluster, rounded
5605 : * up to the nearest page, plus a header.
5606 : */
5607 476 : ctx->super->used_cluster_mask_start = num_md_pages;
5608 476 : ctx->super->used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
5609 476 : spdk_divide_round_up(bs->total_clusters, 8),
5610 : SPDK_BS_PAGE_SIZE);
5611 : /* The blobstore might be extended, then the used_cluster bitmap will need more space.
5612 : * Here we calculate the max clusters we can support according to the
5613 : * num_md_pages (bs->md_len).
5614 : */
5615 476 : max_used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
5616 476 : spdk_divide_round_up(bs->md_len, 8),
5617 : SPDK_BS_PAGE_SIZE);
5618 476 : max_used_cluster_mask_len = spdk_max(max_used_cluster_mask_len,
5619 : ctx->super->used_cluster_mask_len);
5620 476 : num_md_pages += max_used_cluster_mask_len;
5621 :
5622 : /* The used_blobids mask requires 1 bit per metadata page, rounded
5623 : * up to the nearest page, plus a header.
5624 : */
5625 476 : ctx->super->used_blobid_mask_start = num_md_pages;
5626 476 : ctx->super->used_blobid_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
5627 476 : spdk_divide_round_up(bs->md_len, 8),
5628 : SPDK_BS_PAGE_SIZE);
5629 476 : num_md_pages += ctx->super->used_blobid_mask_len;
5630 :
5631 : /* The metadata region size was chosen above */
5632 476 : ctx->super->md_start = bs->md_start = num_md_pages;
5633 476 : ctx->super->md_len = bs->md_len;
5634 476 : num_md_pages += bs->md_len;
5635 :
5636 476 : num_md_lba = bs_page_to_lba(bs, num_md_pages);
5637 :
5638 476 : ctx->super->size = dev->blockcnt * dev->blocklen;
5639 :
5640 476 : ctx->super->crc = blob_md_page_calc_crc(ctx->super);
5641 :
5642 476 : num_md_clusters = spdk_divide_round_up(num_md_pages, bs->pages_per_cluster);
5643 476 : if (num_md_clusters > bs->total_clusters) {
5644 4 : SPDK_ERRLOG("Blobstore metadata cannot use more clusters than is available, "
5645 : "please decrease number of pages reserved for metadata "
5646 : "or increase cluster size.\n");
5647 4 : spdk_free(ctx->super);
5648 4 : spdk_bit_array_free(&ctx->used_clusters);
5649 4 : free(ctx);
5650 4 : bs_free(bs);
5651 4 : cb_fn(cb_arg, NULL, -ENOMEM);
5652 4 : return;
5653 : }
5654 : /* Claim all of the clusters used by the metadata */
5655 75700 : for (i = 0; i < num_md_clusters; i++) {
5656 75228 : spdk_bit_array_set(ctx->used_clusters, i);
5657 : }
5658 :
5659 472 : bs->num_free_clusters -= num_md_clusters;
5660 472 : bs->total_data_clusters = bs->num_free_clusters;
5661 :
5662 472 : cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
5663 472 : cpl.u.bs_handle.cb_fn = cb_fn;
5664 472 : cpl.u.bs_handle.cb_arg = cb_arg;
5665 472 : cpl.u.bs_handle.bs = bs;
5666 :
5667 472 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
5668 472 : if (!seq) {
5669 0 : spdk_free(ctx->super);
5670 0 : free(ctx);
5671 0 : bs_free(bs);
5672 0 : cb_fn(cb_arg, NULL, -ENOMEM);
5673 0 : return;
5674 : }
5675 :
5676 472 : batch = bs_sequence_to_batch(seq, bs_init_trim_cpl, ctx);
5677 :
5678 : /* Clear metadata space */
5679 472 : bs_batch_write_zeroes_dev(batch, 0, num_md_lba);
5680 :
5681 472 : lba = num_md_lba;
5682 472 : lba_count = ctx->bs->dev->blockcnt - lba;
5683 472 : switch (opts.clear_method) {
5684 456 : case BS_CLEAR_WITH_UNMAP:
5685 : /* Trim data clusters */
5686 456 : bs_batch_unmap_dev(batch, lba, lba_count);
5687 456 : break;
5688 0 : case BS_CLEAR_WITH_WRITE_ZEROES:
5689 : /* Write_zeroes to data clusters */
5690 0 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
5691 0 : break;
5692 16 : case BS_CLEAR_WITH_NONE:
5693 : default:
5694 16 : break;
5695 : }
5696 :
5697 472 : bs_batch_close(batch);
5698 : }
5699 :
5700 : /* END spdk_bs_init */
5701 :
5702 : /* START spdk_bs_destroy */
5703 :
5704 : static void
5705 4 : bs_destroy_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5706 : {
5707 4 : struct spdk_bs_load_ctx *ctx = cb_arg;
5708 4 : struct spdk_blob_store *bs = ctx->bs;
5709 :
5710 : /*
5711 : * We need to defer calling bs_call_cpl() until after
5712 : * dev destruction, so tuck these away for later use.
5713 : */
5714 4 : bs->unload_err = bserrno;
5715 4 : memcpy(&bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
5716 4 : seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
5717 :
5718 4 : bs_sequence_finish(seq, bserrno);
5719 :
5720 4 : bs_free(bs);
5721 4 : free(ctx);
5722 4 : }
5723 :
5724 : void
5725 4 : spdk_bs_destroy(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn,
5726 : void *cb_arg)
5727 : {
5728 4 : struct spdk_bs_cpl cpl;
5729 : spdk_bs_sequence_t *seq;
5730 : struct spdk_bs_load_ctx *ctx;
5731 :
5732 4 : SPDK_DEBUGLOG(blob, "Destroying blobstore\n");
5733 :
5734 4 : if (!RB_EMPTY(&bs->open_blobs)) {
5735 0 : SPDK_ERRLOG("Blobstore still has open blobs\n");
5736 0 : cb_fn(cb_arg, -EBUSY);
5737 0 : return;
5738 : }
5739 :
5740 4 : cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
5741 4 : cpl.u.bs_basic.cb_fn = cb_fn;
5742 4 : cpl.u.bs_basic.cb_arg = cb_arg;
5743 :
5744 4 : ctx = calloc(1, sizeof(*ctx));
5745 4 : if (!ctx) {
5746 0 : cb_fn(cb_arg, -ENOMEM);
5747 0 : return;
5748 : }
5749 :
5750 4 : ctx->bs = bs;
5751 :
5752 4 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
5753 4 : if (!seq) {
5754 0 : free(ctx);
5755 0 : cb_fn(cb_arg, -ENOMEM);
5756 0 : return;
5757 : }
5758 :
5759 : /* Write zeroes to the super block */
5760 4 : bs_sequence_write_zeroes_dev(seq,
5761 : bs_page_to_lba(bs, 0),
5762 : bs_byte_to_lba(bs, sizeof(struct spdk_bs_super_block)),
5763 : bs_destroy_trim_cpl, ctx);
5764 : }
5765 :
5766 : /* END spdk_bs_destroy */
5767 :
5768 : /* START spdk_bs_unload */
5769 :
5770 : static void
5771 654 : bs_unload_finish(struct spdk_bs_load_ctx *ctx, int bserrno)
5772 : {
5773 654 : spdk_bs_sequence_t *seq = ctx->seq;
5774 :
5775 654 : spdk_free(ctx->super);
5776 :
5777 : /*
5778 : * We need to defer calling bs_call_cpl() until after
5779 : * dev destruction, so tuck these away for later use.
5780 : */
5781 654 : ctx->bs->unload_err = bserrno;
5782 654 : memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
5783 654 : seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
5784 :
5785 654 : bs_sequence_finish(seq, bserrno);
5786 :
5787 654 : bs_free(ctx->bs);
5788 654 : free(ctx);
5789 654 : }
5790 :
5791 : static void
5792 654 : bs_unload_write_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5793 : {
5794 654 : struct spdk_bs_load_ctx *ctx = cb_arg;
5795 :
5796 654 : bs_unload_finish(ctx, bserrno);
5797 654 : }
5798 :
5799 : static void
5800 654 : bs_unload_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5801 : {
5802 654 : struct spdk_bs_load_ctx *ctx = cb_arg;
5803 :
5804 654 : spdk_free(ctx->mask);
5805 :
5806 654 : if (bserrno != 0) {
5807 0 : bs_unload_finish(ctx, bserrno);
5808 0 : return;
5809 : }
5810 :
5811 654 : ctx->super->clean = 1;
5812 :
5813 654 : bs_write_super(seq, ctx->bs, ctx->super, bs_unload_write_super_cpl, ctx);
5814 : }
5815 :
5816 : static void
5817 654 : bs_unload_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5818 : {
5819 654 : struct spdk_bs_load_ctx *ctx = cb_arg;
5820 :
5821 654 : spdk_free(ctx->mask);
5822 654 : ctx->mask = NULL;
5823 :
5824 654 : if (bserrno != 0) {
5825 0 : bs_unload_finish(ctx, bserrno);
5826 0 : return;
5827 : }
5828 :
5829 654 : bs_write_used_clusters(seq, ctx, bs_unload_write_used_clusters_cpl);
5830 : }
5831 :
5832 : static void
5833 654 : bs_unload_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5834 : {
5835 654 : struct spdk_bs_load_ctx *ctx = cb_arg;
5836 :
5837 654 : spdk_free(ctx->mask);
5838 654 : ctx->mask = NULL;
5839 :
5840 654 : if (bserrno != 0) {
5841 0 : bs_unload_finish(ctx, bserrno);
5842 0 : return;
5843 : }
5844 :
5845 654 : bs_write_used_blobids(seq, ctx, bs_unload_write_used_blobids_cpl);
5846 : }
5847 :
5848 : static void
5849 654 : bs_unload_read_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5850 : {
5851 654 : struct spdk_bs_load_ctx *ctx = cb_arg;
5852 : int rc;
5853 :
5854 654 : if (bserrno != 0) {
5855 0 : bs_unload_finish(ctx, bserrno);
5856 0 : return;
5857 : }
5858 :
5859 654 : rc = bs_super_validate(ctx->super, ctx->bs);
5860 654 : if (rc != 0) {
5861 0 : bs_unload_finish(ctx, rc);
5862 0 : return;
5863 : }
5864 :
5865 654 : bs_write_used_md(seq, cb_arg, bs_unload_write_used_pages_cpl);
5866 : }
5867 :
5868 : void
5869 662 : spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_arg)
5870 : {
5871 662 : struct spdk_bs_cpl cpl;
5872 : struct spdk_bs_load_ctx *ctx;
5873 :
5874 662 : SPDK_DEBUGLOG(blob, "Syncing blobstore\n");
5875 :
5876 : /*
5877 : * If external snapshot channels are being destroyed while the blobstore is unloaded, the
5878 : * unload is deferred until after the channel destruction completes.
5879 : */
5880 662 : if (bs->esnap_channels_unloading != 0) {
5881 4 : if (bs->esnap_unload_cb_fn != NULL) {
5882 0 : SPDK_ERRLOG("Blobstore unload in progress\n");
5883 0 : cb_fn(cb_arg, -EBUSY);
5884 0 : return;
5885 : }
5886 4 : SPDK_DEBUGLOG(blob_esnap, "Blobstore unload deferred: %" PRIu32
5887 : " esnap clones are unloading\n", bs->esnap_channels_unloading);
5888 4 : bs->esnap_unload_cb_fn = cb_fn;
5889 4 : bs->esnap_unload_cb_arg = cb_arg;
5890 4 : return;
5891 : }
5892 658 : if (bs->esnap_unload_cb_fn != NULL) {
5893 4 : SPDK_DEBUGLOG(blob_esnap, "Blobstore deferred unload progressing\n");
5894 4 : assert(bs->esnap_unload_cb_fn == cb_fn);
5895 4 : assert(bs->esnap_unload_cb_arg == cb_arg);
5896 4 : bs->esnap_unload_cb_fn = NULL;
5897 4 : bs->esnap_unload_cb_arg = NULL;
5898 : }
5899 :
5900 658 : if (!RB_EMPTY(&bs->open_blobs)) {
5901 4 : SPDK_ERRLOG("Blobstore still has open blobs\n");
5902 4 : cb_fn(cb_arg, -EBUSY);
5903 4 : return;
5904 : }
5905 :
5906 654 : ctx = calloc(1, sizeof(*ctx));
5907 654 : if (!ctx) {
5908 0 : cb_fn(cb_arg, -ENOMEM);
5909 0 : return;
5910 : }
5911 :
5912 654 : ctx->bs = bs;
5913 :
5914 654 : ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
5915 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
5916 654 : if (!ctx->super) {
5917 0 : free(ctx);
5918 0 : cb_fn(cb_arg, -ENOMEM);
5919 0 : return;
5920 : }
5921 :
5922 654 : cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
5923 654 : cpl.u.bs_basic.cb_fn = cb_fn;
5924 654 : cpl.u.bs_basic.cb_arg = cb_arg;
5925 :
5926 654 : ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
5927 654 : if (!ctx->seq) {
5928 0 : spdk_free(ctx->super);
5929 0 : free(ctx);
5930 0 : cb_fn(cb_arg, -ENOMEM);
5931 0 : return;
5932 : }
5933 :
5934 : /* Read super block */
5935 654 : bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
5936 654 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
5937 : bs_unload_read_super_cpl, ctx);
5938 : }
5939 :
5940 : /* END spdk_bs_unload */
5941 :
5942 : /* START spdk_bs_set_super */
5943 :
5944 : struct spdk_bs_set_super_ctx {
5945 : struct spdk_blob_store *bs;
5946 : struct spdk_bs_super_block *super;
5947 : };
5948 :
5949 : static void
5950 8 : bs_set_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5951 : {
5952 8 : struct spdk_bs_set_super_ctx *ctx = cb_arg;
5953 :
5954 8 : if (bserrno != 0) {
5955 0 : SPDK_ERRLOG("Unable to write to super block of blobstore\n");
5956 : }
5957 :
5958 8 : spdk_free(ctx->super);
5959 :
5960 8 : bs_sequence_finish(seq, bserrno);
5961 :
5962 8 : free(ctx);
5963 8 : }
5964 :
5965 : static void
5966 8 : bs_set_super_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5967 : {
5968 8 : struct spdk_bs_set_super_ctx *ctx = cb_arg;
5969 : int rc;
5970 :
5971 8 : if (bserrno != 0) {
5972 0 : SPDK_ERRLOG("Unable to read super block of blobstore\n");
5973 0 : spdk_free(ctx->super);
5974 0 : bs_sequence_finish(seq, bserrno);
5975 0 : free(ctx);
5976 0 : return;
5977 : }
5978 :
5979 8 : rc = bs_super_validate(ctx->super, ctx->bs);
5980 8 : if (rc != 0) {
5981 0 : SPDK_ERRLOG("Not a valid super block\n");
5982 0 : spdk_free(ctx->super);
5983 0 : bs_sequence_finish(seq, rc);
5984 0 : free(ctx);
5985 0 : return;
5986 : }
5987 :
5988 8 : bs_write_super(seq, ctx->bs, ctx->super, bs_set_super_write_cpl, ctx);
5989 : }
5990 :
5991 : void
5992 8 : spdk_bs_set_super(struct spdk_blob_store *bs, spdk_blob_id blobid,
5993 : spdk_bs_op_complete cb_fn, void *cb_arg)
5994 : {
5995 8 : struct spdk_bs_cpl cpl;
5996 : spdk_bs_sequence_t *seq;
5997 : struct spdk_bs_set_super_ctx *ctx;
5998 :
5999 8 : SPDK_DEBUGLOG(blob, "Setting super blob id on blobstore\n");
6000 :
6001 8 : ctx = calloc(1, sizeof(*ctx));
6002 8 : if (!ctx) {
6003 0 : cb_fn(cb_arg, -ENOMEM);
6004 0 : return;
6005 : }
6006 :
6007 8 : ctx->bs = bs;
6008 :
6009 8 : ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
6010 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
6011 8 : if (!ctx->super) {
6012 0 : free(ctx);
6013 0 : cb_fn(cb_arg, -ENOMEM);
6014 0 : return;
6015 : }
6016 :
6017 8 : cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
6018 8 : cpl.u.bs_basic.cb_fn = cb_fn;
6019 8 : cpl.u.bs_basic.cb_arg = cb_arg;
6020 :
6021 8 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
6022 8 : if (!seq) {
6023 0 : spdk_free(ctx->super);
6024 0 : free(ctx);
6025 0 : cb_fn(cb_arg, -ENOMEM);
6026 0 : return;
6027 : }
6028 :
6029 8 : bs->super_blob = blobid;
6030 :
6031 : /* Read super block */
6032 8 : bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
6033 8 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
6034 : bs_set_super_read_cpl, ctx);
6035 : }
6036 :
6037 : /* END spdk_bs_set_super */
6038 :
6039 : void
6040 12 : spdk_bs_get_super(struct spdk_blob_store *bs,
6041 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6042 : {
6043 12 : if (bs->super_blob == SPDK_BLOBID_INVALID) {
6044 4 : cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOENT);
6045 : } else {
6046 8 : cb_fn(cb_arg, bs->super_blob, 0);
6047 : }
6048 12 : }
6049 :
6050 : uint64_t
6051 132 : spdk_bs_get_cluster_size(struct spdk_blob_store *bs)
6052 : {
6053 132 : return bs->cluster_sz;
6054 : }
6055 :
6056 : uint64_t
6057 68 : spdk_bs_get_page_size(struct spdk_blob_store *bs)
6058 : {
6059 68 : return SPDK_BS_PAGE_SIZE;
6060 : }
6061 :
6062 : uint64_t
6063 738 : spdk_bs_get_io_unit_size(struct spdk_blob_store *bs)
6064 : {
6065 738 : return bs->io_unit_size;
6066 : }
6067 :
6068 : uint64_t
6069 540 : spdk_bs_free_cluster_count(struct spdk_blob_store *bs)
6070 : {
6071 540 : return bs->num_free_clusters;
6072 : }
6073 :
6074 : uint64_t
6075 92 : spdk_bs_total_data_cluster_count(struct spdk_blob_store *bs)
6076 : {
6077 92 : return bs->total_data_clusters;
6078 : }
6079 :
6080 : static int
6081 780 : bs_register_md_thread(struct spdk_blob_store *bs)
6082 : {
6083 780 : bs->md_channel = spdk_get_io_channel(bs);
6084 780 : if (!bs->md_channel) {
6085 0 : SPDK_ERRLOG("Failed to get IO channel.\n");
6086 0 : return -1;
6087 : }
6088 :
6089 780 : return 0;
6090 : }
6091 :
6092 : static int
6093 780 : bs_unregister_md_thread(struct spdk_blob_store *bs)
6094 : {
6095 780 : spdk_put_io_channel(bs->md_channel);
6096 :
6097 780 : return 0;
6098 : }
6099 :
6100 : spdk_blob_id
6101 562 : spdk_blob_get_id(struct spdk_blob *blob)
6102 : {
6103 562 : assert(blob != NULL);
6104 :
6105 562 : return blob->id;
6106 : }
6107 :
6108 : uint64_t
6109 24 : spdk_blob_get_num_pages(struct spdk_blob *blob)
6110 : {
6111 24 : assert(blob != NULL);
6112 :
6113 24 : return bs_cluster_to_page(blob->bs, blob->active.num_clusters);
6114 : }
6115 :
6116 : uint64_t
6117 24 : spdk_blob_get_num_io_units(struct spdk_blob *blob)
6118 : {
6119 24 : assert(blob != NULL);
6120 :
6121 24 : return spdk_blob_get_num_pages(blob) * bs_io_unit_per_page(blob->bs);
6122 : }
6123 :
6124 : uint64_t
6125 569 : spdk_blob_get_num_clusters(struct spdk_blob *blob)
6126 : {
6127 569 : assert(blob != NULL);
6128 :
6129 569 : return blob->active.num_clusters;
6130 : }
6131 :
6132 : uint64_t
6133 330 : spdk_blob_get_num_allocated_clusters(struct spdk_blob *blob)
6134 : {
6135 330 : assert(blob != NULL);
6136 :
6137 330 : return blob->active.num_allocated_clusters;
6138 : }
6139 :
6140 : static uint64_t
6141 24 : blob_find_io_unit(struct spdk_blob *blob, uint64_t offset, bool is_allocated)
6142 : {
6143 24 : uint64_t blob_io_unit_num = spdk_blob_get_num_io_units(blob);
6144 :
6145 44 : while (offset < blob_io_unit_num) {
6146 40 : if (bs_io_unit_is_allocated(blob, offset) == is_allocated) {
6147 20 : return offset;
6148 : }
6149 :
6150 20 : offset += bs_num_io_units_to_cluster_boundary(blob, offset);
6151 : }
6152 :
6153 4 : return UINT64_MAX;
6154 : }
6155 :
6156 : uint64_t
6157 12 : spdk_blob_get_next_allocated_io_unit(struct spdk_blob *blob, uint64_t offset)
6158 : {
6159 12 : return blob_find_io_unit(blob, offset, true);
6160 : }
6161 :
6162 : uint64_t
6163 12 : spdk_blob_get_next_unallocated_io_unit(struct spdk_blob *blob, uint64_t offset)
6164 : {
6165 12 : return blob_find_io_unit(blob, offset, false);
6166 : }
6167 :
6168 : /* START spdk_bs_create_blob */
6169 :
6170 : static void
6171 1878 : bs_create_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
6172 : {
6173 1878 : struct spdk_blob *blob = cb_arg;
6174 1878 : uint32_t page_idx = bs_blobid_to_page(blob->id);
6175 :
6176 1878 : if (bserrno != 0) {
6177 0 : spdk_spin_lock(&blob->bs->used_lock);
6178 0 : spdk_bit_array_clear(blob->bs->used_blobids, page_idx);
6179 0 : bs_release_md_page(blob->bs, page_idx);
6180 0 : spdk_spin_unlock(&blob->bs->used_lock);
6181 : }
6182 :
6183 1878 : blob_free(blob);
6184 :
6185 1878 : bs_sequence_finish(seq, bserrno);
6186 1878 : }
6187 :
6188 : static int
6189 3776 : blob_set_xattrs(struct spdk_blob *blob, const struct spdk_blob_xattr_opts *xattrs,
6190 : bool internal)
6191 : {
6192 : uint64_t i;
6193 3776 : size_t value_len = 0;
6194 : int rc;
6195 3776 : const void *value = NULL;
6196 3776 : if (xattrs->count > 0 && xattrs->get_value == NULL) {
6197 8 : return -EINVAL;
6198 : }
6199 4084 : for (i = 0; i < xattrs->count; i++) {
6200 320 : xattrs->get_value(xattrs->ctx, xattrs->names[i], &value, &value_len);
6201 320 : if (value == NULL || value_len == 0) {
6202 4 : return -EINVAL;
6203 : }
6204 316 : rc = blob_set_xattr(blob, xattrs->names[i], value, value_len, internal);
6205 316 : if (rc < 0) {
6206 0 : return rc;
6207 : }
6208 : }
6209 3764 : return 0;
6210 : }
6211 :
6212 : static void
6213 1862 : blob_opts_copy(const struct spdk_blob_opts *src, struct spdk_blob_opts *dst)
6214 : {
6215 : #define FIELD_OK(field) \
6216 : offsetof(struct spdk_blob_opts, field) + sizeof(src->field) <= src->opts_size
6217 :
6218 : #define SET_FIELD(field) \
6219 : if (FIELD_OK(field)) { \
6220 : dst->field = src->field; \
6221 : } \
6222 :
6223 1862 : SET_FIELD(num_clusters);
6224 1862 : SET_FIELD(thin_provision);
6225 1862 : SET_FIELD(clear_method);
6226 :
6227 1862 : if (FIELD_OK(xattrs)) {
6228 1862 : memcpy(&dst->xattrs, &src->xattrs, sizeof(src->xattrs));
6229 : }
6230 :
6231 1862 : SET_FIELD(use_extent_table);
6232 1862 : SET_FIELD(esnap_id);
6233 1862 : SET_FIELD(esnap_id_len);
6234 :
6235 1862 : dst->opts_size = src->opts_size;
6236 :
6237 : /* You should not remove this statement, but need to update the assert statement
6238 : * if you add a new field, and also add a corresponding SET_FIELD statement */
6239 : SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_opts) == 80, "Incorrect size");
6240 :
6241 : #undef FIELD_OK
6242 : #undef SET_FIELD
6243 1862 : }
6244 :
6245 : static void
6246 1894 : bs_create_blob(struct spdk_blob_store *bs,
6247 : const struct spdk_blob_opts *opts,
6248 : const struct spdk_blob_xattr_opts *internal_xattrs,
6249 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6250 : {
6251 : struct spdk_blob *blob;
6252 : uint32_t page_idx;
6253 1894 : struct spdk_bs_cpl cpl;
6254 1894 : struct spdk_blob_opts opts_local;
6255 1894 : struct spdk_blob_xattr_opts internal_xattrs_default;
6256 : spdk_bs_sequence_t *seq;
6257 : spdk_blob_id id;
6258 : int rc;
6259 :
6260 1894 : assert(spdk_get_thread() == bs->md_thread);
6261 :
6262 1894 : spdk_spin_lock(&bs->used_lock);
6263 1894 : page_idx = spdk_bit_array_find_first_clear(bs->used_md_pages, 0);
6264 1894 : if (page_idx == UINT32_MAX) {
6265 0 : spdk_spin_unlock(&bs->used_lock);
6266 0 : cb_fn(cb_arg, 0, -ENOMEM);
6267 0 : return;
6268 : }
6269 1894 : spdk_bit_array_set(bs->used_blobids, page_idx);
6270 1894 : bs_claim_md_page(bs, page_idx);
6271 1894 : spdk_spin_unlock(&bs->used_lock);
6272 :
6273 1894 : id = bs_page_to_blobid(page_idx);
6274 :
6275 1894 : SPDK_DEBUGLOG(blob, "Creating blob with id 0x%" PRIx64 " at page %u\n", id, page_idx);
6276 :
6277 1894 : spdk_blob_opts_init(&opts_local, sizeof(opts_local));
6278 1894 : if (opts) {
6279 1862 : blob_opts_copy(opts, &opts_local);
6280 : }
6281 :
6282 1894 : blob = blob_alloc(bs, id);
6283 1894 : if (!blob) {
6284 0 : rc = -ENOMEM;
6285 0 : goto error;
6286 : }
6287 :
6288 1894 : blob->use_extent_table = opts_local.use_extent_table;
6289 1894 : if (blob->use_extent_table) {
6290 968 : blob->invalid_flags |= SPDK_BLOB_EXTENT_TABLE;
6291 : }
6292 :
6293 1894 : if (!internal_xattrs) {
6294 1622 : blob_xattrs_init(&internal_xattrs_default);
6295 1622 : internal_xattrs = &internal_xattrs_default;
6296 : }
6297 :
6298 1894 : rc = blob_set_xattrs(blob, &opts_local.xattrs, false);
6299 1894 : if (rc < 0) {
6300 12 : goto error;
6301 : }
6302 :
6303 1882 : rc = blob_set_xattrs(blob, internal_xattrs, true);
6304 1882 : if (rc < 0) {
6305 0 : goto error;
6306 : }
6307 :
6308 1882 : if (opts_local.thin_provision) {
6309 356 : blob_set_thin_provision(blob);
6310 : }
6311 :
6312 1882 : blob_set_clear_method(blob, opts_local.clear_method);
6313 :
6314 1882 : if (opts_local.esnap_id != NULL) {
6315 60 : if (opts_local.esnap_id_len > UINT16_MAX) {
6316 0 : SPDK_ERRLOG("esnap id length %" PRIu64 "is too long\n",
6317 : opts_local.esnap_id_len);
6318 0 : rc = -EINVAL;
6319 0 : goto error;
6320 :
6321 : }
6322 60 : blob_set_thin_provision(blob);
6323 60 : blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
6324 60 : rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID,
6325 60 : opts_local.esnap_id, opts_local.esnap_id_len, true);
6326 60 : if (rc != 0) {
6327 0 : goto error;
6328 : }
6329 : }
6330 :
6331 1882 : rc = blob_resize(blob, opts_local.num_clusters);
6332 1882 : if (rc < 0) {
6333 4 : goto error;
6334 : }
6335 1878 : cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
6336 1878 : cpl.u.blobid.cb_fn = cb_fn;
6337 1878 : cpl.u.blobid.cb_arg = cb_arg;
6338 1878 : cpl.u.blobid.blobid = blob->id;
6339 :
6340 1878 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
6341 1878 : if (!seq) {
6342 0 : rc = -ENOMEM;
6343 0 : goto error;
6344 : }
6345 :
6346 1878 : blob_persist(seq, blob, bs_create_blob_cpl, blob);
6347 1878 : return;
6348 :
6349 16 : error:
6350 16 : SPDK_ERRLOG("Failed to create blob: %s, size in clusters/size: %lu (clusters)\n",
6351 : spdk_strerror(rc), opts_local.num_clusters);
6352 16 : if (blob != NULL) {
6353 16 : blob_free(blob);
6354 : }
6355 16 : spdk_spin_lock(&bs->used_lock);
6356 16 : spdk_bit_array_clear(bs->used_blobids, page_idx);
6357 16 : bs_release_md_page(bs, page_idx);
6358 16 : spdk_spin_unlock(&bs->used_lock);
6359 16 : cb_fn(cb_arg, 0, rc);
6360 : }
6361 :
6362 : void
6363 16 : spdk_bs_create_blob(struct spdk_blob_store *bs,
6364 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6365 : {
6366 16 : bs_create_blob(bs, NULL, NULL, cb_fn, cb_arg);
6367 16 : }
6368 :
6369 : void
6370 1598 : spdk_bs_create_blob_ext(struct spdk_blob_store *bs, const struct spdk_blob_opts *opts,
6371 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6372 : {
6373 1598 : bs_create_blob(bs, opts, NULL, cb_fn, cb_arg);
6374 1598 : }
6375 :
6376 : /* END spdk_bs_create_blob */
6377 :
6378 : /* START blob_cleanup */
6379 :
6380 : struct spdk_clone_snapshot_ctx {
6381 : struct spdk_bs_cpl cpl;
6382 : int bserrno;
6383 : bool frozen;
6384 :
6385 : struct spdk_io_channel *channel;
6386 :
6387 : /* Current cluster for inflate operation */
6388 : uint64_t cluster;
6389 :
6390 : /* For inflation force allocation of all unallocated clusters and remove
6391 : * thin-provisioning. Otherwise only decouple parent and keep clone thin. */
6392 : bool allocate_all;
6393 :
6394 : struct {
6395 : spdk_blob_id id;
6396 : struct spdk_blob *blob;
6397 : bool md_ro;
6398 : } original;
6399 : struct {
6400 : spdk_blob_id id;
6401 : struct spdk_blob *blob;
6402 : } new;
6403 :
6404 : /* xattrs specified for snapshot/clones only. They have no impact on
6405 : * the original blobs xattrs. */
6406 : const struct spdk_blob_xattr_opts *xattrs;
6407 : };
6408 :
6409 : static void
6410 346 : bs_clone_snapshot_cleanup_finish(void *cb_arg, int bserrno)
6411 : {
6412 346 : struct spdk_clone_snapshot_ctx *ctx = cb_arg;
6413 346 : struct spdk_bs_cpl *cpl = &ctx->cpl;
6414 :
6415 346 : if (bserrno != 0) {
6416 6 : if (ctx->bserrno != 0) {
6417 0 : SPDK_ERRLOG("Cleanup error %d\n", bserrno);
6418 : } else {
6419 6 : ctx->bserrno = bserrno;
6420 : }
6421 : }
6422 :
6423 346 : switch (cpl->type) {
6424 282 : case SPDK_BS_CPL_TYPE_BLOBID:
6425 282 : cpl->u.blobid.cb_fn(cpl->u.blobid.cb_arg, cpl->u.blobid.blobid, ctx->bserrno);
6426 282 : break;
6427 64 : case SPDK_BS_CPL_TYPE_BLOB_BASIC:
6428 64 : cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
6429 64 : break;
6430 0 : default:
6431 0 : SPDK_UNREACHABLE();
6432 : break;
6433 : }
6434 :
6435 346 : free(ctx);
6436 346 : }
6437 :
6438 : static void
6439 332 : bs_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
6440 : {
6441 332 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6442 332 : struct spdk_blob *origblob = ctx->original.blob;
6443 :
6444 332 : if (bserrno != 0) {
6445 0 : if (ctx->bserrno != 0) {
6446 0 : SPDK_ERRLOG("Unfreeze error %d\n", bserrno);
6447 : } else {
6448 0 : ctx->bserrno = bserrno;
6449 : }
6450 : }
6451 :
6452 332 : ctx->original.id = origblob->id;
6453 332 : origblob->locked_operation_in_progress = false;
6454 :
6455 : /* Revert md_ro to original state */
6456 332 : origblob->md_ro = ctx->original.md_ro;
6457 :
6458 332 : spdk_blob_close(origblob, bs_clone_snapshot_cleanup_finish, ctx);
6459 332 : }
6460 :
6461 : static void
6462 332 : bs_clone_snapshot_origblob_cleanup(void *cb_arg, int bserrno)
6463 : {
6464 332 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6465 332 : struct spdk_blob *origblob = ctx->original.blob;
6466 :
6467 332 : if (bserrno != 0) {
6468 24 : if (ctx->bserrno != 0) {
6469 4 : SPDK_ERRLOG("Cleanup error %d\n", bserrno);
6470 : } else {
6471 20 : ctx->bserrno = bserrno;
6472 : }
6473 : }
6474 :
6475 332 : if (ctx->frozen) {
6476 : /* Unfreeze any outstanding I/O */
6477 212 : blob_unfreeze_io(origblob, bs_snapshot_unfreeze_cpl, ctx);
6478 : } else {
6479 120 : bs_snapshot_unfreeze_cpl(ctx, 0);
6480 : }
6481 :
6482 332 : }
6483 :
6484 : static void
6485 4 : bs_clone_snapshot_newblob_cleanup(struct spdk_clone_snapshot_ctx *ctx, int bserrno)
6486 : {
6487 4 : struct spdk_blob *newblob = ctx->new.blob;
6488 :
6489 4 : if (bserrno != 0) {
6490 4 : if (ctx->bserrno != 0) {
6491 0 : SPDK_ERRLOG("Cleanup error %d\n", bserrno);
6492 : } else {
6493 4 : ctx->bserrno = bserrno;
6494 : }
6495 : }
6496 :
6497 4 : ctx->new.id = newblob->id;
6498 4 : spdk_blob_close(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
6499 4 : }
6500 :
6501 : /* END blob_cleanup */
6502 :
6503 : /* START spdk_bs_create_snapshot */
6504 :
6505 : static void
6506 220 : bs_snapshot_swap_cluster_maps(struct spdk_blob *blob1, struct spdk_blob *blob2)
6507 : {
6508 : uint64_t *cluster_temp;
6509 : uint64_t num_allocated_clusters_temp;
6510 : uint32_t *extent_page_temp;
6511 :
6512 220 : cluster_temp = blob1->active.clusters;
6513 220 : blob1->active.clusters = blob2->active.clusters;
6514 220 : blob2->active.clusters = cluster_temp;
6515 :
6516 220 : num_allocated_clusters_temp = blob1->active.num_allocated_clusters;
6517 220 : blob1->active.num_allocated_clusters = blob2->active.num_allocated_clusters;
6518 220 : blob2->active.num_allocated_clusters = num_allocated_clusters_temp;
6519 :
6520 220 : extent_page_temp = blob1->active.extent_pages;
6521 220 : blob1->active.extent_pages = blob2->active.extent_pages;
6522 220 : blob2->active.extent_pages = extent_page_temp;
6523 220 : }
6524 :
6525 : /* Copies an internal xattr */
6526 : static int
6527 28 : bs_snapshot_copy_xattr(struct spdk_blob *toblob, struct spdk_blob *fromblob, const char *name)
6528 : {
6529 28 : const void *val = NULL;
6530 28 : size_t len;
6531 : int bserrno;
6532 :
6533 28 : bserrno = blob_get_xattr_value(fromblob, name, &val, &len, true);
6534 28 : if (bserrno != 0) {
6535 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " missing %s XATTR\n", fromblob->id, name);
6536 0 : return bserrno;
6537 : }
6538 :
6539 28 : bserrno = blob_set_xattr(toblob, name, val, len, true);
6540 28 : if (bserrno != 0) {
6541 0 : SPDK_ERRLOG("could not set %s XATTR on blob 0x%" PRIx64 "\n",
6542 : name, toblob->id);
6543 0 : return bserrno;
6544 : }
6545 28 : return 0;
6546 : }
6547 :
6548 : static void
6549 208 : bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno)
6550 : {
6551 208 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6552 208 : struct spdk_blob *origblob = ctx->original.blob;
6553 208 : struct spdk_blob *newblob = ctx->new.blob;
6554 :
6555 208 : if (bserrno != 0) {
6556 4 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6557 4 : if (blob_is_esnap_clone(newblob)) {
6558 0 : bs_snapshot_copy_xattr(origblob, newblob, BLOB_EXTERNAL_SNAPSHOT_ID);
6559 0 : origblob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
6560 : }
6561 4 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6562 4 : return;
6563 : }
6564 :
6565 : /* Remove metadata descriptor SNAPSHOT_IN_PROGRESS */
6566 204 : bserrno = blob_remove_xattr(newblob, SNAPSHOT_IN_PROGRESS, true);
6567 204 : if (bserrno != 0) {
6568 0 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6569 0 : return;
6570 : }
6571 :
6572 204 : bs_blob_list_add(ctx->original.blob);
6573 :
6574 204 : spdk_blob_set_read_only(newblob);
6575 :
6576 : /* sync snapshot metadata */
6577 204 : spdk_blob_sync_md(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
6578 : }
6579 :
6580 : static void
6581 212 : bs_snapshot_newblob_sync_cpl(void *cb_arg, int bserrno)
6582 : {
6583 212 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6584 212 : struct spdk_blob *origblob = ctx->original.blob;
6585 212 : struct spdk_blob *newblob = ctx->new.blob;
6586 :
6587 212 : if (bserrno != 0) {
6588 : /* return cluster map back to original */
6589 4 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6590 :
6591 : /* Newblob md sync failed. Valid clusters are only present in origblob.
6592 : * Since I/O is frozen on origblob, not changes to zeroed out cluster map should have occurred.
6593 : * Newblob needs to be reverted to thin_provisioned state at creation to properly close. */
6594 4 : blob_set_thin_provision(newblob);
6595 4 : assert(spdk_mem_all_zero(newblob->active.clusters,
6596 : newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
6597 4 : assert(spdk_mem_all_zero(newblob->active.extent_pages,
6598 : newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
6599 :
6600 4 : bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
6601 4 : return;
6602 : }
6603 :
6604 : /* Set internal xattr for snapshot id */
6605 208 : bserrno = blob_set_xattr(origblob, BLOB_SNAPSHOT, &newblob->id, sizeof(spdk_blob_id), true);
6606 208 : if (bserrno != 0) {
6607 : /* return cluster map back to original */
6608 0 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6609 0 : blob_set_thin_provision(newblob);
6610 0 : bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
6611 0 : return;
6612 : }
6613 :
6614 : /* Create new back_bs_dev for snapshot */
6615 208 : origblob->back_bs_dev = bs_create_blob_bs_dev(newblob);
6616 208 : if (origblob->back_bs_dev == NULL) {
6617 : /* return cluster map back to original */
6618 0 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6619 0 : blob_set_thin_provision(newblob);
6620 0 : bs_clone_snapshot_newblob_cleanup(ctx, -EINVAL);
6621 0 : return;
6622 : }
6623 :
6624 : /* Remove the xattr that references an external snapshot */
6625 208 : if (blob_is_esnap_clone(origblob)) {
6626 16 : origblob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
6627 16 : bserrno = blob_remove_xattr(origblob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
6628 16 : if (bserrno != 0) {
6629 0 : if (bserrno == -ENOENT) {
6630 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " has no " BLOB_EXTERNAL_SNAPSHOT_ID
6631 : " xattr to remove\n", origblob->id);
6632 0 : assert(false);
6633 : } else {
6634 : /* return cluster map back to original */
6635 0 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6636 0 : blob_set_thin_provision(newblob);
6637 0 : bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
6638 0 : return;
6639 : }
6640 : }
6641 : }
6642 :
6643 208 : bs_blob_list_remove(origblob);
6644 208 : origblob->parent_id = newblob->id;
6645 : /* set clone blob as thin provisioned */
6646 208 : blob_set_thin_provision(origblob);
6647 :
6648 208 : bs_blob_list_add(newblob);
6649 :
6650 : /* sync clone metadata */
6651 208 : spdk_blob_sync_md(origblob, bs_snapshot_origblob_sync_cpl, ctx);
6652 : }
6653 :
6654 : static void
6655 212 : bs_snapshot_freeze_cpl(void *cb_arg, int rc)
6656 : {
6657 212 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6658 212 : struct spdk_blob *origblob = ctx->original.blob;
6659 212 : struct spdk_blob *newblob = ctx->new.blob;
6660 : int bserrno;
6661 :
6662 212 : if (rc != 0) {
6663 0 : bs_clone_snapshot_newblob_cleanup(ctx, rc);
6664 0 : return;
6665 : }
6666 :
6667 212 : ctx->frozen = true;
6668 :
6669 212 : if (blob_is_esnap_clone(origblob)) {
6670 : /* Clean up any channels associated with the original blob id because future IO will
6671 : * perform IO using the snapshot blob_id.
6672 : */
6673 16 : blob_esnap_destroy_bs_dev_channels(origblob, false, NULL, NULL);
6674 : }
6675 212 : if (newblob->back_bs_dev) {
6676 212 : blob_back_bs_destroy(newblob);
6677 : }
6678 : /* set new back_bs_dev for snapshot */
6679 212 : newblob->back_bs_dev = origblob->back_bs_dev;
6680 : /* Set invalid flags from origblob */
6681 212 : newblob->invalid_flags = origblob->invalid_flags;
6682 :
6683 : /* inherit parent from original blob if set */
6684 212 : newblob->parent_id = origblob->parent_id;
6685 212 : switch (origblob->parent_id) {
6686 16 : case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
6687 16 : bserrno = bs_snapshot_copy_xattr(newblob, origblob, BLOB_EXTERNAL_SNAPSHOT_ID);
6688 16 : if (bserrno != 0) {
6689 0 : bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
6690 0 : return;
6691 : }
6692 16 : break;
6693 144 : case SPDK_BLOBID_INVALID:
6694 144 : break;
6695 52 : default:
6696 : /* Set internal xattr for snapshot id */
6697 52 : bserrno = blob_set_xattr(newblob, BLOB_SNAPSHOT,
6698 52 : &origblob->parent_id, sizeof(spdk_blob_id), true);
6699 52 : if (bserrno != 0) {
6700 0 : bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
6701 0 : return;
6702 : }
6703 : }
6704 :
6705 : /* swap cluster maps */
6706 212 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6707 :
6708 : /* Set the clear method on the new blob to match the original. */
6709 212 : blob_set_clear_method(newblob, origblob->clear_method);
6710 :
6711 : /* sync snapshot metadata */
6712 212 : spdk_blob_sync_md(newblob, bs_snapshot_newblob_sync_cpl, ctx);
6713 : }
6714 :
6715 : static void
6716 216 : bs_snapshot_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
6717 : {
6718 216 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6719 216 : struct spdk_blob *origblob = ctx->original.blob;
6720 216 : struct spdk_blob *newblob = _blob;
6721 :
6722 216 : if (bserrno != 0) {
6723 4 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6724 4 : return;
6725 : }
6726 :
6727 212 : ctx->new.blob = newblob;
6728 212 : assert(spdk_blob_is_thin_provisioned(newblob));
6729 212 : assert(spdk_mem_all_zero(newblob->active.clusters,
6730 : newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
6731 212 : assert(spdk_mem_all_zero(newblob->active.extent_pages,
6732 : newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
6733 :
6734 212 : blob_freeze_io(origblob, bs_snapshot_freeze_cpl, ctx);
6735 : }
6736 :
6737 : static void
6738 220 : bs_snapshot_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
6739 : {
6740 220 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6741 220 : struct spdk_blob *origblob = ctx->original.blob;
6742 :
6743 220 : if (bserrno != 0) {
6744 4 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6745 4 : return;
6746 : }
6747 :
6748 216 : ctx->new.id = blobid;
6749 216 : ctx->cpl.u.blobid.blobid = blobid;
6750 :
6751 216 : spdk_bs_open_blob(origblob->bs, ctx->new.id, bs_snapshot_newblob_open_cpl, ctx);
6752 : }
6753 :
6754 :
6755 : static void
6756 220 : bs_xattr_snapshot(void *arg, const char *name,
6757 : const void **value, size_t *value_len)
6758 : {
6759 220 : assert(strncmp(name, SNAPSHOT_IN_PROGRESS, sizeof(SNAPSHOT_IN_PROGRESS)) == 0);
6760 :
6761 220 : struct spdk_blob *blob = (struct spdk_blob *)arg;
6762 220 : *value = &blob->id;
6763 220 : *value_len = sizeof(blob->id);
6764 220 : }
6765 :
6766 : static void
6767 230 : bs_snapshot_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
6768 : {
6769 230 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6770 230 : struct spdk_blob_opts opts;
6771 230 : struct spdk_blob_xattr_opts internal_xattrs;
6772 230 : char *xattrs_names[] = { SNAPSHOT_IN_PROGRESS };
6773 :
6774 230 : if (bserrno != 0) {
6775 6 : bs_clone_snapshot_cleanup_finish(ctx, bserrno);
6776 6 : return;
6777 : }
6778 :
6779 224 : ctx->original.blob = _blob;
6780 :
6781 224 : if (_blob->data_ro || _blob->md_ro) {
6782 4 : SPDK_DEBUGLOG(blob, "Cannot create snapshot from read only blob with id 0x%"
6783 : PRIx64 "\n", _blob->id);
6784 4 : ctx->bserrno = -EINVAL;
6785 4 : spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
6786 4 : return;
6787 : }
6788 :
6789 220 : if (_blob->locked_operation_in_progress) {
6790 0 : SPDK_DEBUGLOG(blob, "Cannot create snapshot - another operation in progress\n");
6791 0 : ctx->bserrno = -EBUSY;
6792 0 : spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
6793 0 : return;
6794 : }
6795 :
6796 220 : _blob->locked_operation_in_progress = true;
6797 :
6798 220 : spdk_blob_opts_init(&opts, sizeof(opts));
6799 220 : blob_xattrs_init(&internal_xattrs);
6800 :
6801 : /* Change the size of new blob to the same as in original blob,
6802 : * but do not allocate clusters */
6803 220 : opts.thin_provision = true;
6804 220 : opts.num_clusters = spdk_blob_get_num_clusters(_blob);
6805 220 : opts.use_extent_table = _blob->use_extent_table;
6806 :
6807 : /* If there are any xattrs specified for snapshot, set them now */
6808 220 : if (ctx->xattrs) {
6809 4 : memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
6810 : }
6811 : /* Set internal xattr SNAPSHOT_IN_PROGRESS */
6812 220 : internal_xattrs.count = 1;
6813 220 : internal_xattrs.ctx = _blob;
6814 220 : internal_xattrs.names = xattrs_names;
6815 220 : internal_xattrs.get_value = bs_xattr_snapshot;
6816 :
6817 220 : bs_create_blob(_blob->bs, &opts, &internal_xattrs,
6818 : bs_snapshot_newblob_create_cpl, ctx);
6819 : }
6820 :
6821 : void
6822 230 : spdk_bs_create_snapshot(struct spdk_blob_store *bs, spdk_blob_id blobid,
6823 : const struct spdk_blob_xattr_opts *snapshot_xattrs,
6824 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6825 : {
6826 230 : struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
6827 :
6828 230 : if (!ctx) {
6829 0 : cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
6830 0 : return;
6831 : }
6832 230 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
6833 230 : ctx->cpl.u.blobid.cb_fn = cb_fn;
6834 230 : ctx->cpl.u.blobid.cb_arg = cb_arg;
6835 230 : ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
6836 230 : ctx->bserrno = 0;
6837 230 : ctx->frozen = false;
6838 230 : ctx->original.id = blobid;
6839 230 : ctx->xattrs = snapshot_xattrs;
6840 :
6841 230 : spdk_bs_open_blob(bs, ctx->original.id, bs_snapshot_origblob_open_cpl, ctx);
6842 : }
6843 : /* END spdk_bs_create_snapshot */
6844 :
6845 : /* START spdk_bs_create_clone */
6846 :
6847 : static void
6848 48 : bs_xattr_clone(void *arg, const char *name,
6849 : const void **value, size_t *value_len)
6850 : {
6851 48 : assert(strncmp(name, BLOB_SNAPSHOT, sizeof(BLOB_SNAPSHOT)) == 0);
6852 :
6853 48 : struct spdk_blob *blob = (struct spdk_blob *)arg;
6854 48 : *value = &blob->id;
6855 48 : *value_len = sizeof(blob->id);
6856 48 : }
6857 :
6858 : static void
6859 48 : bs_clone_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
6860 : {
6861 48 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6862 48 : struct spdk_blob *clone = _blob;
6863 :
6864 48 : ctx->new.blob = clone;
6865 48 : bs_blob_list_add(clone);
6866 :
6867 48 : spdk_blob_close(clone, bs_clone_snapshot_origblob_cleanup, ctx);
6868 48 : }
6869 :
6870 : static void
6871 48 : bs_clone_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
6872 : {
6873 48 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6874 :
6875 48 : ctx->cpl.u.blobid.blobid = blobid;
6876 48 : spdk_bs_open_blob(ctx->original.blob->bs, blobid, bs_clone_newblob_open_cpl, ctx);
6877 48 : }
6878 :
6879 : static void
6880 52 : bs_clone_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
6881 : {
6882 52 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6883 52 : struct spdk_blob_opts opts;
6884 52 : struct spdk_blob_xattr_opts internal_xattrs;
6885 52 : char *xattr_names[] = { BLOB_SNAPSHOT };
6886 :
6887 52 : if (bserrno != 0) {
6888 0 : bs_clone_snapshot_cleanup_finish(ctx, bserrno);
6889 0 : return;
6890 : }
6891 :
6892 52 : ctx->original.blob = _blob;
6893 52 : ctx->original.md_ro = _blob->md_ro;
6894 :
6895 52 : if (!_blob->data_ro || !_blob->md_ro) {
6896 4 : SPDK_DEBUGLOG(blob, "Clone not from read-only blob\n");
6897 4 : ctx->bserrno = -EINVAL;
6898 4 : spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
6899 4 : return;
6900 : }
6901 :
6902 48 : if (_blob->locked_operation_in_progress) {
6903 0 : SPDK_DEBUGLOG(blob, "Cannot create clone - another operation in progress\n");
6904 0 : ctx->bserrno = -EBUSY;
6905 0 : spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
6906 0 : return;
6907 : }
6908 :
6909 48 : _blob->locked_operation_in_progress = true;
6910 :
6911 48 : spdk_blob_opts_init(&opts, sizeof(opts));
6912 48 : blob_xattrs_init(&internal_xattrs);
6913 :
6914 48 : opts.thin_provision = true;
6915 48 : opts.num_clusters = spdk_blob_get_num_clusters(_blob);
6916 48 : opts.use_extent_table = _blob->use_extent_table;
6917 48 : if (ctx->xattrs) {
6918 4 : memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
6919 : }
6920 :
6921 : /* Set internal xattr BLOB_SNAPSHOT */
6922 48 : internal_xattrs.count = 1;
6923 48 : internal_xattrs.ctx = _blob;
6924 48 : internal_xattrs.names = xattr_names;
6925 48 : internal_xattrs.get_value = bs_xattr_clone;
6926 :
6927 48 : bs_create_blob(_blob->bs, &opts, &internal_xattrs,
6928 : bs_clone_newblob_create_cpl, ctx);
6929 : }
6930 :
6931 : void
6932 52 : spdk_bs_create_clone(struct spdk_blob_store *bs, spdk_blob_id blobid,
6933 : const struct spdk_blob_xattr_opts *clone_xattrs,
6934 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6935 : {
6936 52 : struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
6937 :
6938 52 : if (!ctx) {
6939 0 : cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
6940 0 : return;
6941 : }
6942 :
6943 52 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
6944 52 : ctx->cpl.u.blobid.cb_fn = cb_fn;
6945 52 : ctx->cpl.u.blobid.cb_arg = cb_arg;
6946 52 : ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
6947 52 : ctx->bserrno = 0;
6948 52 : ctx->xattrs = clone_xattrs;
6949 52 : ctx->original.id = blobid;
6950 :
6951 52 : spdk_bs_open_blob(bs, ctx->original.id, bs_clone_origblob_open_cpl, ctx);
6952 : }
6953 :
6954 : /* END spdk_bs_create_clone */
6955 :
6956 : /* START spdk_bs_inflate_blob */
6957 :
6958 : static void
6959 12 : bs_inflate_blob_set_parent_cpl(void *cb_arg, struct spdk_blob *_parent, int bserrno)
6960 : {
6961 12 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6962 12 : struct spdk_blob *_blob = ctx->original.blob;
6963 :
6964 12 : if (bserrno != 0) {
6965 0 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6966 0 : return;
6967 : }
6968 :
6969 : /* Temporarily override md_ro flag for MD modification */
6970 12 : _blob->md_ro = false;
6971 :
6972 12 : bserrno = blob_set_xattr(_blob, BLOB_SNAPSHOT, &_parent->id, sizeof(spdk_blob_id), true);
6973 12 : if (bserrno != 0) {
6974 0 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6975 0 : return;
6976 : }
6977 :
6978 12 : assert(_parent != NULL);
6979 :
6980 12 : bs_blob_list_remove(_blob);
6981 12 : _blob->parent_id = _parent->id;
6982 :
6983 12 : blob_back_bs_destroy(_blob);
6984 12 : _blob->back_bs_dev = bs_create_blob_bs_dev(_parent);
6985 12 : bs_blob_list_add(_blob);
6986 :
6987 12 : spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
6988 : }
6989 :
6990 : static void
6991 4 : bs_inflate_blob_set_esnap_refs(struct spdk_clone_snapshot_ctx *ctx)
6992 : {
6993 4 : struct spdk_blob *_blob = ctx->original.blob;
6994 4 : struct spdk_blob *_parent = ((struct spdk_blob_bs_dev *)(_blob->back_bs_dev))->blob;
6995 : int bserrno;
6996 :
6997 4 : assert(_parent != NULL);
6998 4 : assert(_parent->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT);
6999 :
7000 : /* Temporarily override md_ro flag for MD modification */
7001 4 : _blob->md_ro = false;
7002 :
7003 4 : blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
7004 4 : bserrno = bs_snapshot_copy_xattr(_blob, _parent, BLOB_EXTERNAL_SNAPSHOT_ID);
7005 4 : if (bserrno != 0) {
7006 0 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
7007 0 : return;
7008 : }
7009 :
7010 4 : bs_blob_list_remove(_blob);
7011 :
7012 4 : _blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
7013 4 : _blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
7014 :
7015 4 : blob_back_bs_destroy(_blob);
7016 4 : _blob->back_bs_dev = _parent->back_bs_dev;
7017 :
7018 4 : LIST_INSERT_AFTER(_parent, _blob, back_bs_dev_link);
7019 :
7020 4 : spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
7021 : }
7022 :
7023 : static void
7024 60 : bs_inflate_blob_done(struct spdk_clone_snapshot_ctx *ctx)
7025 : {
7026 60 : struct spdk_blob *_blob = ctx->original.blob;
7027 : struct spdk_blob *_parent;
7028 :
7029 60 : if (ctx->allocate_all) {
7030 : /* remove thin provisioning */
7031 32 : bs_blob_list_remove(_blob);
7032 32 : if (_blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
7033 8 : blob_remove_xattr(_blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
7034 8 : _blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
7035 : } else {
7036 24 : blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
7037 : }
7038 32 : _blob->invalid_flags = _blob->invalid_flags & ~SPDK_BLOB_THIN_PROV;
7039 32 : blob_back_bs_destroy(_blob);
7040 32 : _blob->parent_id = SPDK_BLOBID_INVALID;
7041 : } else {
7042 : /* For now, esnap clones always have allocate_all set. */
7043 28 : assert(!blob_is_esnap_clone(_blob));
7044 :
7045 28 : _parent = ((struct spdk_blob_bs_dev *)(_blob->back_bs_dev))->blob;
7046 28 : switch (_parent->parent_id) {
7047 12 : case SPDK_BLOBID_INVALID:
7048 12 : bs_blob_list_remove(_blob);
7049 12 : _blob->parent_id = SPDK_BLOBID_INVALID;
7050 12 : blob_back_bs_destroy(_blob);
7051 12 : _blob->back_bs_dev = bs_create_zeroes_dev();
7052 12 : break;
7053 4 : case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
7054 4 : bs_inflate_blob_set_esnap_refs(ctx);
7055 4 : return;
7056 12 : default:
7057 : /* We must change the parent of the inflated blob */
7058 12 : spdk_bs_open_blob(_blob->bs, _parent->parent_id,
7059 : bs_inflate_blob_set_parent_cpl, ctx);
7060 12 : return;
7061 : }
7062 : }
7063 :
7064 : /* Temporarily override md_ro flag for MD modification */
7065 44 : _blob->md_ro = false;
7066 44 : blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
7067 44 : _blob->state = SPDK_BLOB_STATE_DIRTY;
7068 :
7069 44 : spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
7070 : }
7071 :
7072 : /* Check if cluster needs allocation */
7073 : static inline bool
7074 1280 : bs_cluster_needs_allocation(struct spdk_blob *blob, uint64_t cluster, bool allocate_all)
7075 : {
7076 : struct spdk_blob_bs_dev *b;
7077 :
7078 1280 : assert(blob != NULL);
7079 :
7080 1280 : if (blob->active.clusters[cluster] != 0) {
7081 : /* Cluster is already allocated */
7082 32 : return false;
7083 : }
7084 :
7085 1248 : if (blob->parent_id == SPDK_BLOBID_INVALID) {
7086 : /* Blob have no parent blob */
7087 80 : return allocate_all;
7088 : }
7089 :
7090 1168 : if (blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
7091 64 : return true;
7092 : }
7093 :
7094 1104 : b = (struct spdk_blob_bs_dev *)blob->back_bs_dev;
7095 1104 : return (allocate_all || b->blob->active.clusters[cluster] != 0);
7096 : }
7097 :
7098 : static void
7099 512 : bs_inflate_blob_touch_next(void *cb_arg, int bserrno)
7100 : {
7101 512 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
7102 512 : struct spdk_blob *_blob = ctx->original.blob;
7103 512 : struct spdk_bs_cpl cpl;
7104 : spdk_bs_user_op_t *op;
7105 : uint64_t offset;
7106 :
7107 512 : if (bserrno != 0) {
7108 0 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
7109 0 : return;
7110 : }
7111 :
7112 700 : for (; ctx->cluster < _blob->active.num_clusters; ctx->cluster++) {
7113 640 : if (bs_cluster_needs_allocation(_blob, ctx->cluster, ctx->allocate_all)) {
7114 452 : break;
7115 : }
7116 : }
7117 :
7118 512 : if (ctx->cluster < _blob->active.num_clusters) {
7119 452 : offset = bs_cluster_to_lba(_blob->bs, ctx->cluster);
7120 :
7121 : /* We may safely increment a cluster before copying */
7122 452 : ctx->cluster++;
7123 :
7124 : /* Use a dummy 0B read as a context for cluster copy */
7125 452 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
7126 452 : cpl.u.blob_basic.cb_fn = bs_inflate_blob_touch_next;
7127 452 : cpl.u.blob_basic.cb_arg = ctx;
7128 :
7129 452 : op = bs_user_op_alloc(ctx->channel, &cpl, SPDK_BLOB_READ, _blob,
7130 : NULL, 0, offset, 0);
7131 452 : if (!op) {
7132 0 : bs_clone_snapshot_origblob_cleanup(ctx, -ENOMEM);
7133 0 : return;
7134 : }
7135 :
7136 452 : bs_allocate_and_copy_cluster(_blob, ctx->channel, offset, op);
7137 : } else {
7138 60 : bs_inflate_blob_done(ctx);
7139 : }
7140 : }
7141 :
7142 : static void
7143 64 : bs_inflate_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
7144 : {
7145 64 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
7146 : uint64_t clusters_needed;
7147 : uint64_t i;
7148 :
7149 64 : if (bserrno != 0) {
7150 0 : bs_clone_snapshot_cleanup_finish(ctx, bserrno);
7151 0 : return;
7152 : }
7153 :
7154 64 : ctx->original.blob = _blob;
7155 64 : ctx->original.md_ro = _blob->md_ro;
7156 :
7157 64 : if (_blob->locked_operation_in_progress) {
7158 0 : SPDK_DEBUGLOG(blob, "Cannot inflate blob - another operation in progress\n");
7159 0 : ctx->bserrno = -EBUSY;
7160 0 : spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
7161 0 : return;
7162 : }
7163 :
7164 64 : _blob->locked_operation_in_progress = true;
7165 :
7166 64 : switch (_blob->parent_id) {
7167 8 : case SPDK_BLOBID_INVALID:
7168 8 : if (!ctx->allocate_all) {
7169 : /* This blob has no parent, so we cannot decouple it. */
7170 4 : SPDK_ERRLOG("Cannot decouple parent of blob with no parent.\n");
7171 4 : bs_clone_snapshot_origblob_cleanup(ctx, -EINVAL);
7172 4 : return;
7173 : }
7174 4 : break;
7175 8 : case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
7176 : /*
7177 : * It would be better to rely on back_bs_dev->is_zeroes(), to determine which
7178 : * clusters require allocation. Until there is a blobstore consumer that
7179 : * uses esnaps with an spdk_bs_dev that implements a useful is_zeroes() it is not
7180 : * worth the effort.
7181 : */
7182 8 : ctx->allocate_all = true;
7183 8 : break;
7184 48 : default:
7185 48 : break;
7186 : }
7187 :
7188 60 : if (spdk_blob_is_thin_provisioned(_blob) == false) {
7189 : /* This is not thin provisioned blob. No need to inflate. */
7190 0 : bs_clone_snapshot_origblob_cleanup(ctx, 0);
7191 0 : return;
7192 : }
7193 :
7194 : /* Do two passes - one to verify that we can obtain enough clusters
7195 : * and another to actually claim them.
7196 : */
7197 60 : clusters_needed = 0;
7198 700 : for (i = 0; i < _blob->active.num_clusters; i++) {
7199 640 : if (bs_cluster_needs_allocation(_blob, i, ctx->allocate_all)) {
7200 452 : clusters_needed++;
7201 : }
7202 : }
7203 :
7204 60 : if (clusters_needed > _blob->bs->num_free_clusters) {
7205 : /* Not enough free clusters. Cannot satisfy the request. */
7206 0 : bs_clone_snapshot_origblob_cleanup(ctx, -ENOSPC);
7207 0 : return;
7208 : }
7209 :
7210 60 : ctx->cluster = 0;
7211 60 : bs_inflate_blob_touch_next(ctx, 0);
7212 : }
7213 :
7214 : static void
7215 64 : bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
7216 : spdk_blob_id blobid, bool allocate_all, spdk_blob_op_complete cb_fn, void *cb_arg)
7217 : {
7218 64 : struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
7219 :
7220 64 : if (!ctx) {
7221 0 : cb_fn(cb_arg, -ENOMEM);
7222 0 : return;
7223 : }
7224 64 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
7225 64 : ctx->cpl.u.bs_basic.cb_fn = cb_fn;
7226 64 : ctx->cpl.u.bs_basic.cb_arg = cb_arg;
7227 64 : ctx->bserrno = 0;
7228 64 : ctx->original.id = blobid;
7229 64 : ctx->channel = channel;
7230 64 : ctx->allocate_all = allocate_all;
7231 :
7232 64 : spdk_bs_open_blob(bs, ctx->original.id, bs_inflate_blob_open_cpl, ctx);
7233 : }
7234 :
7235 : void
7236 28 : spdk_bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
7237 : spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
7238 : {
7239 28 : bs_inflate_blob(bs, channel, blobid, true, cb_fn, cb_arg);
7240 28 : }
7241 :
7242 : void
7243 36 : spdk_bs_blob_decouple_parent(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
7244 : spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
7245 : {
7246 36 : bs_inflate_blob(bs, channel, blobid, false, cb_fn, cb_arg);
7247 36 : }
7248 : /* END spdk_bs_inflate_blob */
7249 :
7250 : /* START spdk_bs_blob_shallow_copy */
7251 :
7252 : struct shallow_copy_ctx {
7253 : struct spdk_bs_cpl cpl;
7254 : int bserrno;
7255 :
7256 : /* Blob source for copy */
7257 : struct spdk_blob_store *bs;
7258 : spdk_blob_id blobid;
7259 : struct spdk_blob *blob;
7260 : struct spdk_io_channel *blob_channel;
7261 :
7262 : /* Destination device for copy */
7263 : struct spdk_bs_dev *ext_dev;
7264 : struct spdk_io_channel *ext_channel;
7265 :
7266 : /* Current cluster for copy operation */
7267 : uint64_t cluster;
7268 :
7269 : /* Buffer for blob reading */
7270 : uint8_t *read_buff;
7271 :
7272 : /* Struct for external device writing */
7273 : struct spdk_bs_dev_cb_args ext_args;
7274 :
7275 : /* Actual number of copied clusters */
7276 : uint64_t copied_clusters_count;
7277 :
7278 : /* Status callback for updates about the ongoing operation */
7279 : spdk_blob_shallow_copy_status status_cb;
7280 :
7281 : /* Argument passed to function status_cb */
7282 : void *status_cb_arg;
7283 : };
7284 :
7285 : static void
7286 16 : bs_shallow_copy_cleanup_finish(void *cb_arg, int bserrno)
7287 : {
7288 16 : struct shallow_copy_ctx *ctx = cb_arg;
7289 16 : struct spdk_bs_cpl *cpl = &ctx->cpl;
7290 :
7291 16 : if (bserrno != 0) {
7292 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, cleanup error %d\n", ctx->blob->id, bserrno);
7293 0 : ctx->bserrno = bserrno;
7294 : }
7295 :
7296 16 : ctx->ext_dev->destroy_channel(ctx->ext_dev, ctx->ext_channel);
7297 16 : spdk_free(ctx->read_buff);
7298 :
7299 16 : cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
7300 :
7301 16 : free(ctx);
7302 16 : }
7303 :
7304 : static void
7305 8 : bs_shallow_copy_bdev_write_cpl(struct spdk_io_channel *channel, void *cb_arg, int bserrno)
7306 : {
7307 8 : struct shallow_copy_ctx *ctx = cb_arg;
7308 8 : struct spdk_blob *_blob = ctx->blob;
7309 :
7310 8 : if (bserrno != 0) {
7311 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, ext dev write error %d\n", ctx->blob->id, bserrno);
7312 0 : ctx->bserrno = bserrno;
7313 0 : _blob->locked_operation_in_progress = false;
7314 0 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7315 0 : return;
7316 : }
7317 :
7318 8 : ctx->cluster++;
7319 8 : if (ctx->status_cb) {
7320 8 : ctx->copied_clusters_count++;
7321 8 : ctx->status_cb(ctx->copied_clusters_count, ctx->status_cb_arg);
7322 : }
7323 :
7324 8 : bs_shallow_copy_cluster_find_next(ctx);
7325 : }
7326 :
7327 : static void
7328 8 : bs_shallow_copy_blob_read_cpl(void *cb_arg, int bserrno)
7329 : {
7330 8 : struct shallow_copy_ctx *ctx = cb_arg;
7331 8 : struct spdk_bs_dev *ext_dev = ctx->ext_dev;
7332 8 : struct spdk_blob *_blob = ctx->blob;
7333 :
7334 8 : if (bserrno != 0) {
7335 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob read error %d\n", ctx->blob->id, bserrno);
7336 0 : ctx->bserrno = bserrno;
7337 0 : _blob->locked_operation_in_progress = false;
7338 0 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7339 0 : return;
7340 : }
7341 :
7342 8 : ctx->ext_args.channel = ctx->ext_channel;
7343 8 : ctx->ext_args.cb_fn = bs_shallow_copy_bdev_write_cpl;
7344 8 : ctx->ext_args.cb_arg = ctx;
7345 :
7346 8 : ext_dev->write(ext_dev, ctx->ext_channel, ctx->read_buff,
7347 8 : bs_cluster_to_lba(_blob->bs, ctx->cluster),
7348 8 : bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
7349 : &ctx->ext_args);
7350 : }
7351 :
7352 : static void
7353 12 : bs_shallow_copy_cluster_find_next(void *cb_arg)
7354 : {
7355 12 : struct shallow_copy_ctx *ctx = cb_arg;
7356 12 : struct spdk_blob *_blob = ctx->blob;
7357 :
7358 20 : while (ctx->cluster < _blob->active.num_clusters) {
7359 16 : if (_blob->active.clusters[ctx->cluster] != 0) {
7360 8 : break;
7361 : }
7362 :
7363 8 : ctx->cluster++;
7364 : }
7365 :
7366 12 : if (ctx->cluster < _blob->active.num_clusters) {
7367 8 : blob_request_submit_op_single(ctx->blob_channel, _blob, ctx->read_buff,
7368 8 : bs_cluster_to_lba(_blob->bs, ctx->cluster),
7369 8 : bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
7370 : bs_shallow_copy_blob_read_cpl, ctx, SPDK_BLOB_READ);
7371 : } else {
7372 4 : _blob->locked_operation_in_progress = false;
7373 4 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7374 : }
7375 12 : }
7376 :
7377 : static void
7378 16 : bs_shallow_copy_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
7379 : {
7380 16 : struct shallow_copy_ctx *ctx = cb_arg;
7381 16 : struct spdk_bs_dev *ext_dev = ctx->ext_dev;
7382 : uint32_t blob_block_size;
7383 : uint64_t blob_total_size;
7384 :
7385 16 : if (bserrno != 0) {
7386 0 : SPDK_ERRLOG("Shallow copy blob open error %d\n", bserrno);
7387 0 : ctx->bserrno = bserrno;
7388 0 : bs_shallow_copy_cleanup_finish(ctx, 0);
7389 0 : return;
7390 : }
7391 :
7392 16 : if (!spdk_blob_is_read_only(_blob)) {
7393 4 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob must be read only\n", _blob->id);
7394 4 : ctx->bserrno = -EPERM;
7395 4 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7396 4 : return;
7397 : }
7398 :
7399 12 : blob_block_size = _blob->bs->dev->blocklen;
7400 12 : blob_total_size = spdk_blob_get_num_clusters(_blob) * spdk_bs_get_cluster_size(_blob->bs);
7401 :
7402 12 : if (blob_total_size > ext_dev->blockcnt * ext_dev->blocklen) {
7403 4 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device must have at least blob size\n",
7404 : _blob->id);
7405 4 : ctx->bserrno = -EINVAL;
7406 4 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7407 4 : return;
7408 : }
7409 :
7410 8 : if (blob_block_size % ext_dev->blocklen != 0) {
7411 4 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device block size is not compatible with \
7412 : blobstore block size\n", _blob->id);
7413 4 : ctx->bserrno = -EINVAL;
7414 4 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7415 4 : return;
7416 : }
7417 :
7418 4 : ctx->blob = _blob;
7419 :
7420 4 : if (_blob->locked_operation_in_progress) {
7421 0 : SPDK_DEBUGLOG(blob, "blob 0x%" PRIx64 " shallow copy - another operation in progress\n", _blob->id);
7422 0 : ctx->bserrno = -EBUSY;
7423 0 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7424 0 : return;
7425 : }
7426 :
7427 4 : _blob->locked_operation_in_progress = true;
7428 :
7429 4 : ctx->cluster = 0;
7430 4 : bs_shallow_copy_cluster_find_next(ctx);
7431 : }
7432 :
7433 : int
7434 16 : spdk_bs_blob_shallow_copy(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
7435 : spdk_blob_id blobid, struct spdk_bs_dev *ext_dev,
7436 : spdk_blob_shallow_copy_status status_cb_fn, void *status_cb_arg,
7437 : spdk_blob_op_complete cb_fn, void *cb_arg)
7438 : {
7439 : struct shallow_copy_ctx *ctx;
7440 : struct spdk_io_channel *ext_channel;
7441 :
7442 16 : ctx = calloc(1, sizeof(*ctx));
7443 16 : if (!ctx) {
7444 0 : return -ENOMEM;
7445 : }
7446 :
7447 16 : ctx->bs = bs;
7448 16 : ctx->blobid = blobid;
7449 16 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
7450 16 : ctx->cpl.u.bs_basic.cb_fn = cb_fn;
7451 16 : ctx->cpl.u.bs_basic.cb_arg = cb_arg;
7452 16 : ctx->bserrno = 0;
7453 16 : ctx->blob_channel = channel;
7454 16 : ctx->status_cb = status_cb_fn;
7455 16 : ctx->status_cb_arg = status_cb_arg;
7456 16 : ctx->read_buff = spdk_malloc(bs->cluster_sz, bs->dev->blocklen, NULL,
7457 : SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
7458 16 : if (!ctx->read_buff) {
7459 0 : free(ctx);
7460 0 : return -ENOMEM;
7461 : }
7462 :
7463 16 : ext_channel = ext_dev->create_channel(ext_dev);
7464 16 : if (!ext_channel) {
7465 0 : spdk_free(ctx->read_buff);
7466 0 : free(ctx);
7467 0 : return -ENOMEM;
7468 : }
7469 16 : ctx->ext_dev = ext_dev;
7470 16 : ctx->ext_channel = ext_channel;
7471 :
7472 16 : spdk_bs_open_blob(ctx->bs, ctx->blobid, bs_shallow_copy_blob_open_cpl, ctx);
7473 :
7474 16 : return 0;
7475 : }
7476 : /* END spdk_bs_blob_shallow_copy */
7477 :
7478 : /* START spdk_bs_blob_set_parent */
7479 :
7480 : struct set_parent_ctx {
7481 : struct spdk_blob_store *bs;
7482 : int bserrno;
7483 : spdk_bs_op_complete cb_fn;
7484 : void *cb_arg;
7485 :
7486 : struct spdk_blob *blob;
7487 : bool blob_md_ro;
7488 :
7489 : struct blob_parent parent;
7490 : };
7491 :
7492 : static void
7493 24 : bs_set_parent_cleanup_finish(void *cb_arg, int bserrno)
7494 : {
7495 24 : struct set_parent_ctx *ctx = cb_arg;
7496 :
7497 24 : assert(ctx != NULL);
7498 :
7499 24 : if (bserrno != 0) {
7500 0 : SPDK_ERRLOG("blob set parent finish error %d\n", bserrno);
7501 0 : if (ctx->bserrno == 0) {
7502 0 : ctx->bserrno = bserrno;
7503 : }
7504 : }
7505 :
7506 24 : ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
7507 :
7508 24 : free(ctx);
7509 24 : }
7510 :
7511 : static void
7512 20 : bs_set_parent_close_snapshot(void *cb_arg, int bserrno)
7513 : {
7514 20 : struct set_parent_ctx *ctx = cb_arg;
7515 :
7516 20 : if (ctx->bserrno != 0) {
7517 8 : spdk_blob_close(ctx->parent.u.snapshot.blob, bs_set_parent_cleanup_finish, ctx);
7518 8 : return;
7519 : }
7520 :
7521 12 : if (bserrno != 0) {
7522 0 : SPDK_ERRLOG("blob close error %d\n", bserrno);
7523 0 : ctx->bserrno = bserrno;
7524 : }
7525 :
7526 12 : bs_set_parent_cleanup_finish(ctx, ctx->bserrno);
7527 : }
7528 :
7529 : static void
7530 12 : bs_set_parent_close_blob(void *cb_arg, int bserrno)
7531 : {
7532 12 : struct set_parent_ctx *ctx = cb_arg;
7533 12 : struct spdk_blob *blob = ctx->blob;
7534 12 : struct spdk_blob *snapshot = ctx->parent.u.snapshot.blob;
7535 :
7536 12 : if (bserrno != 0 && ctx->bserrno == 0) {
7537 0 : SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
7538 0 : ctx->bserrno = bserrno;
7539 : }
7540 :
7541 : /* Revert md_ro to original state */
7542 12 : blob->md_ro = ctx->blob_md_ro;
7543 :
7544 12 : blob->locked_operation_in_progress = false;
7545 12 : snapshot->locked_operation_in_progress = false;
7546 :
7547 12 : spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
7548 12 : }
7549 :
7550 : static void
7551 12 : bs_set_parent_set_back_bs_dev_done(void *cb_arg, int bserrno)
7552 : {
7553 12 : struct set_parent_ctx *ctx = cb_arg;
7554 12 : struct spdk_blob *blob = ctx->blob;
7555 :
7556 12 : if (bserrno != 0) {
7557 0 : SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
7558 0 : ctx->bserrno = bserrno;
7559 0 : bs_set_parent_close_blob(ctx, bserrno);
7560 0 : return;
7561 : }
7562 :
7563 12 : spdk_blob_sync_md(blob, bs_set_parent_close_blob, ctx);
7564 : }
7565 :
7566 : static int
7567 12 : bs_set_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
7568 : {
7569 : int rc;
7570 :
7571 12 : bs_blob_list_remove(blob);
7572 :
7573 12 : rc = blob_set_xattr(blob, BLOB_SNAPSHOT, &parent->u.snapshot.id, sizeof(spdk_blob_id), true);
7574 12 : if (rc != 0) {
7575 0 : SPDK_ERRLOG("error %d setting snapshot xattr\n", rc);
7576 0 : return rc;
7577 : }
7578 12 : blob->parent_id = parent->u.snapshot.id;
7579 :
7580 12 : if (blob_is_esnap_clone(blob)) {
7581 : /* Remove the xattr that references the external snapshot */
7582 4 : blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
7583 4 : blob_remove_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
7584 : }
7585 :
7586 12 : bs_blob_list_add(blob);
7587 :
7588 12 : return 0;
7589 : }
7590 :
7591 : static void
7592 20 : bs_set_parent_snapshot_open_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
7593 : {
7594 20 : struct set_parent_ctx *ctx = cb_arg;
7595 20 : struct spdk_blob *blob = ctx->blob;
7596 : struct spdk_bs_dev *back_bs_dev;
7597 :
7598 20 : if (bserrno != 0) {
7599 0 : SPDK_ERRLOG("snapshot open error %d\n", bserrno);
7600 0 : ctx->bserrno = bserrno;
7601 0 : spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
7602 0 : return;
7603 : }
7604 :
7605 20 : ctx->parent.u.snapshot.blob = snapshot;
7606 20 : ctx->parent.u.snapshot.id = snapshot->id;
7607 :
7608 20 : if (!spdk_blob_is_snapshot(snapshot)) {
7609 4 : SPDK_ERRLOG("parent blob is not a snapshot\n");
7610 4 : ctx->bserrno = -EINVAL;
7611 4 : spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
7612 4 : return;
7613 : }
7614 :
7615 16 : if (blob->active.num_clusters != snapshot->active.num_clusters) {
7616 4 : SPDK_ERRLOG("parent blob has a number of clusters different from child's ones\n");
7617 4 : ctx->bserrno = -EINVAL;
7618 4 : spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
7619 4 : return;
7620 : }
7621 :
7622 12 : if (blob->locked_operation_in_progress || snapshot->locked_operation_in_progress) {
7623 0 : SPDK_ERRLOG("cannot set parent of blob, another operation in progress\n");
7624 0 : ctx->bserrno = -EBUSY;
7625 0 : spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
7626 0 : return;
7627 : }
7628 :
7629 12 : blob->locked_operation_in_progress = true;
7630 12 : snapshot->locked_operation_in_progress = true;
7631 :
7632 : /* Temporarily override md_ro flag for MD modification */
7633 12 : blob->md_ro = false;
7634 :
7635 12 : back_bs_dev = bs_create_blob_bs_dev(snapshot);
7636 :
7637 12 : blob_set_back_bs_dev(blob, back_bs_dev, bs_set_parent_refs, &ctx->parent,
7638 : bs_set_parent_set_back_bs_dev_done,
7639 : ctx);
7640 : }
7641 :
7642 : static void
7643 24 : bs_set_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
7644 : {
7645 24 : struct set_parent_ctx *ctx = cb_arg;
7646 :
7647 24 : if (bserrno != 0) {
7648 0 : SPDK_ERRLOG("blob open error %d\n", bserrno);
7649 0 : ctx->bserrno = bserrno;
7650 0 : bs_set_parent_cleanup_finish(ctx, 0);
7651 0 : return;
7652 : }
7653 :
7654 24 : if (!spdk_blob_is_thin_provisioned(blob)) {
7655 4 : SPDK_ERRLOG("blob is not thin-provisioned\n");
7656 4 : ctx->bserrno = -EINVAL;
7657 4 : spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
7658 4 : return;
7659 : }
7660 :
7661 20 : ctx->blob = blob;
7662 20 : ctx->blob_md_ro = blob->md_ro;
7663 :
7664 20 : spdk_bs_open_blob(ctx->bs, ctx->parent.u.snapshot.id, bs_set_parent_snapshot_open_cpl, ctx);
7665 : }
7666 :
7667 : void
7668 36 : spdk_bs_blob_set_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
7669 : spdk_blob_id snapshot_id, spdk_blob_op_complete cb_fn, void *cb_arg)
7670 : {
7671 : struct set_parent_ctx *ctx;
7672 :
7673 36 : if (snapshot_id == SPDK_BLOBID_INVALID) {
7674 4 : SPDK_ERRLOG("snapshot id not valid\n");
7675 4 : cb_fn(cb_arg, -EINVAL);
7676 4 : return;
7677 : }
7678 :
7679 32 : if (blob_id == snapshot_id) {
7680 4 : SPDK_ERRLOG("blob id and snapshot id cannot be the same\n");
7681 4 : cb_fn(cb_arg, -EINVAL);
7682 4 : return;
7683 : }
7684 :
7685 28 : if (spdk_blob_get_parent_snapshot(bs, blob_id) == snapshot_id) {
7686 4 : SPDK_NOTICELOG("snapshot is already the parent of blob\n");
7687 4 : cb_fn(cb_arg, -EEXIST);
7688 4 : return;
7689 : }
7690 :
7691 24 : ctx = calloc(1, sizeof(*ctx));
7692 24 : if (!ctx) {
7693 0 : cb_fn(cb_arg, -ENOMEM);
7694 0 : return;
7695 : }
7696 :
7697 24 : ctx->bs = bs;
7698 24 : ctx->parent.u.snapshot.id = snapshot_id;
7699 24 : ctx->cb_fn = cb_fn;
7700 24 : ctx->cb_arg = cb_arg;
7701 24 : ctx->bserrno = 0;
7702 :
7703 24 : spdk_bs_open_blob(bs, blob_id, bs_set_parent_blob_open_cpl, ctx);
7704 : }
7705 : /* END spdk_bs_blob_set_parent */
7706 :
7707 : /* START spdk_bs_blob_set_external_parent */
7708 :
7709 : static void
7710 16 : bs_set_external_parent_cleanup_finish(void *cb_arg, int bserrno)
7711 : {
7712 16 : struct set_parent_ctx *ctx = cb_arg;
7713 :
7714 16 : if (bserrno != 0) {
7715 0 : SPDK_ERRLOG("blob set external parent finish error %d\n", bserrno);
7716 0 : if (ctx->bserrno == 0) {
7717 0 : ctx->bserrno = bserrno;
7718 : }
7719 : }
7720 :
7721 16 : ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
7722 :
7723 16 : free(ctx->parent.u.esnap.id);
7724 16 : free(ctx);
7725 16 : }
7726 :
7727 : static void
7728 8 : bs_set_external_parent_close_blob(void *cb_arg, int bserrno)
7729 : {
7730 8 : struct set_parent_ctx *ctx = cb_arg;
7731 8 : struct spdk_blob *blob = ctx->blob;
7732 :
7733 8 : if (bserrno != 0 && ctx->bserrno == 0) {
7734 0 : SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
7735 0 : ctx->bserrno = bserrno;
7736 : }
7737 :
7738 : /* Revert md_ro to original state */
7739 8 : blob->md_ro = ctx->blob_md_ro;
7740 :
7741 8 : blob->locked_operation_in_progress = false;
7742 :
7743 8 : spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
7744 8 : }
7745 :
7746 : static void
7747 8 : bs_set_external_parent_unfrozen(void *cb_arg, int bserrno)
7748 : {
7749 8 : struct set_parent_ctx *ctx = cb_arg;
7750 8 : struct spdk_blob *blob = ctx->blob;
7751 :
7752 8 : if (bserrno != 0) {
7753 0 : SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
7754 0 : ctx->bserrno = bserrno;
7755 0 : bs_set_external_parent_close_blob(ctx, bserrno);
7756 0 : return;
7757 : }
7758 :
7759 8 : spdk_blob_sync_md(blob, bs_set_external_parent_close_blob, ctx);
7760 : }
7761 :
7762 : static int
7763 8 : bs_set_external_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
7764 : {
7765 : int rc;
7766 :
7767 8 : bs_blob_list_remove(blob);
7768 :
7769 8 : if (spdk_blob_is_clone(blob)) {
7770 : /* Remove the xattr that references the snapshot */
7771 0 : blob->parent_id = SPDK_BLOBID_INVALID;
7772 0 : blob_remove_xattr(blob, BLOB_SNAPSHOT, true);
7773 : }
7774 :
7775 8 : rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, parent->u.esnap.id,
7776 8 : parent->u.esnap.id_len, true);
7777 8 : if (rc != 0) {
7778 0 : SPDK_ERRLOG("error %d setting external snapshot xattr\n", rc);
7779 0 : return rc;
7780 : }
7781 8 : blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
7782 8 : blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
7783 :
7784 8 : bs_blob_list_add(blob);
7785 :
7786 8 : return 0;
7787 : }
7788 :
7789 : static void
7790 16 : bs_set_external_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
7791 : {
7792 16 : struct set_parent_ctx *ctx = cb_arg;
7793 16 : const void *esnap_id;
7794 16 : size_t esnap_id_len;
7795 : int rc;
7796 :
7797 16 : if (bserrno != 0) {
7798 0 : SPDK_ERRLOG("blob open error %d\n", bserrno);
7799 0 : ctx->bserrno = bserrno;
7800 0 : bs_set_parent_cleanup_finish(ctx, 0);
7801 0 : return;
7802 : }
7803 :
7804 16 : ctx->blob = blob;
7805 16 : ctx->blob_md_ro = blob->md_ro;
7806 :
7807 16 : rc = spdk_blob_get_esnap_id(blob, &esnap_id, &esnap_id_len);
7808 16 : if (rc == 0 && esnap_id != NULL && esnap_id_len == ctx->parent.u.esnap.id_len &&
7809 4 : memcmp(esnap_id, ctx->parent.u.esnap.id, esnap_id_len) == 0) {
7810 4 : SPDK_ERRLOG("external snapshot is already the parent of blob\n");
7811 4 : ctx->bserrno = -EEXIST;
7812 4 : goto error;
7813 : }
7814 :
7815 12 : if (!spdk_blob_is_thin_provisioned(blob)) {
7816 4 : SPDK_ERRLOG("blob is not thin-provisioned\n");
7817 4 : ctx->bserrno = -EINVAL;
7818 4 : goto error;
7819 : }
7820 :
7821 8 : if (blob->locked_operation_in_progress) {
7822 0 : SPDK_ERRLOG("cannot set external parent of blob, another operation in progress\n");
7823 0 : ctx->bserrno = -EBUSY;
7824 0 : goto error;
7825 : }
7826 :
7827 8 : blob->locked_operation_in_progress = true;
7828 :
7829 : /* Temporarily override md_ro flag for MD modification */
7830 8 : blob->md_ro = false;
7831 :
7832 8 : blob_set_back_bs_dev(blob, ctx->parent.u.esnap.back_bs_dev, bs_set_external_parent_refs,
7833 : &ctx->parent, bs_set_external_parent_unfrozen, ctx);
7834 8 : return;
7835 :
7836 8 : error:
7837 8 : spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
7838 : }
7839 :
7840 : void
7841 24 : spdk_bs_blob_set_external_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
7842 : struct spdk_bs_dev *esnap_bs_dev, const void *esnap_id,
7843 : uint32_t esnap_id_len, spdk_blob_op_complete cb_fn, void *cb_arg)
7844 : {
7845 : struct set_parent_ctx *ctx;
7846 : uint64_t esnap_dev_size, cluster_sz;
7847 :
7848 24 : if (sizeof(blob_id) == esnap_id_len && memcmp(&blob_id, esnap_id, sizeof(blob_id)) == 0) {
7849 4 : SPDK_ERRLOG("blob id and external snapshot id cannot be the same\n");
7850 4 : cb_fn(cb_arg, -EINVAL);
7851 4 : return;
7852 : }
7853 :
7854 20 : esnap_dev_size = esnap_bs_dev->blockcnt * esnap_bs_dev->blocklen;
7855 20 : cluster_sz = spdk_bs_get_cluster_size(bs);
7856 20 : if ((esnap_dev_size % cluster_sz) != 0) {
7857 4 : SPDK_ERRLOG("Esnap device size %" PRIu64 " is not an integer multiple of "
7858 : "cluster size %" PRIu64 "\n", esnap_dev_size, cluster_sz);
7859 4 : cb_fn(cb_arg, -EINVAL);
7860 4 : return;
7861 : }
7862 :
7863 16 : ctx = calloc(1, sizeof(*ctx));
7864 16 : if (!ctx) {
7865 0 : cb_fn(cb_arg, -ENOMEM);
7866 0 : return;
7867 : }
7868 :
7869 16 : ctx->parent.u.esnap.id = calloc(1, esnap_id_len);
7870 16 : if (!ctx->parent.u.esnap.id) {
7871 0 : free(ctx);
7872 0 : cb_fn(cb_arg, -ENOMEM);
7873 0 : return;
7874 : }
7875 :
7876 16 : ctx->bs = bs;
7877 16 : ctx->parent.u.esnap.back_bs_dev = esnap_bs_dev;
7878 16 : memcpy(ctx->parent.u.esnap.id, esnap_id, esnap_id_len);
7879 16 : ctx->parent.u.esnap.id_len = esnap_id_len;
7880 16 : ctx->cb_fn = cb_fn;
7881 16 : ctx->cb_arg = cb_arg;
7882 16 : ctx->bserrno = 0;
7883 :
7884 16 : spdk_bs_open_blob(bs, blob_id, bs_set_external_parent_blob_open_cpl, ctx);
7885 : }
7886 : /* END spdk_bs_blob_set_external_parent */
7887 :
7888 : /* START spdk_blob_resize */
7889 : struct spdk_bs_resize_ctx {
7890 : spdk_blob_op_complete cb_fn;
7891 : void *cb_arg;
7892 : struct spdk_blob *blob;
7893 : uint64_t sz;
7894 : int rc;
7895 : };
7896 :
7897 : static void
7898 202 : bs_resize_unfreeze_cpl(void *cb_arg, int rc)
7899 : {
7900 202 : struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
7901 :
7902 202 : if (rc != 0) {
7903 0 : SPDK_ERRLOG("Unfreeze failed, rc=%d\n", rc);
7904 : }
7905 :
7906 202 : if (ctx->rc != 0) {
7907 4 : SPDK_ERRLOG("Unfreeze failed, ctx->rc=%d\n", ctx->rc);
7908 4 : rc = ctx->rc;
7909 : }
7910 :
7911 202 : ctx->blob->locked_operation_in_progress = false;
7912 :
7913 202 : ctx->cb_fn(ctx->cb_arg, rc);
7914 202 : free(ctx);
7915 202 : }
7916 :
7917 : static void
7918 202 : bs_resize_freeze_cpl(void *cb_arg, int rc)
7919 : {
7920 202 : struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
7921 :
7922 202 : if (rc != 0) {
7923 0 : ctx->blob->locked_operation_in_progress = false;
7924 0 : ctx->cb_fn(ctx->cb_arg, rc);
7925 0 : free(ctx);
7926 0 : return;
7927 : }
7928 :
7929 202 : ctx->rc = blob_resize(ctx->blob, ctx->sz);
7930 :
7931 202 : blob_unfreeze_io(ctx->blob, bs_resize_unfreeze_cpl, ctx);
7932 : }
7933 :
7934 : void
7935 216 : spdk_blob_resize(struct spdk_blob *blob, uint64_t sz, spdk_blob_op_complete cb_fn, void *cb_arg)
7936 : {
7937 : struct spdk_bs_resize_ctx *ctx;
7938 :
7939 216 : blob_verify_md_op(blob);
7940 :
7941 216 : SPDK_DEBUGLOG(blob, "Resizing blob 0x%" PRIx64 " to %" PRIu64 " clusters\n", blob->id, sz);
7942 :
7943 216 : if (blob->md_ro) {
7944 4 : cb_fn(cb_arg, -EPERM);
7945 4 : return;
7946 : }
7947 :
7948 212 : if (sz == blob->active.num_clusters) {
7949 10 : cb_fn(cb_arg, 0);
7950 10 : return;
7951 : }
7952 :
7953 202 : if (blob->locked_operation_in_progress) {
7954 0 : cb_fn(cb_arg, -EBUSY);
7955 0 : return;
7956 : }
7957 :
7958 202 : ctx = calloc(1, sizeof(*ctx));
7959 202 : if (!ctx) {
7960 0 : cb_fn(cb_arg, -ENOMEM);
7961 0 : return;
7962 : }
7963 :
7964 202 : blob->locked_operation_in_progress = true;
7965 202 : ctx->cb_fn = cb_fn;
7966 202 : ctx->cb_arg = cb_arg;
7967 202 : ctx->blob = blob;
7968 202 : ctx->sz = sz;
7969 202 : blob_freeze_io(blob, bs_resize_freeze_cpl, ctx);
7970 : }
7971 :
7972 : /* END spdk_blob_resize */
7973 :
7974 :
7975 : /* START spdk_bs_delete_blob */
7976 :
7977 : static void
7978 1492 : bs_delete_close_cpl(void *cb_arg, int bserrno)
7979 : {
7980 1492 : spdk_bs_sequence_t *seq = cb_arg;
7981 :
7982 1492 : bs_sequence_finish(seq, bserrno);
7983 1492 : }
7984 :
7985 : static void
7986 1492 : bs_delete_persist_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
7987 : {
7988 1492 : struct spdk_blob *blob = cb_arg;
7989 :
7990 1492 : if (bserrno != 0) {
7991 : /*
7992 : * We already removed this blob from the blobstore tailq, so
7993 : * we need to free it here since this is the last reference
7994 : * to it.
7995 : */
7996 0 : blob_free(blob);
7997 0 : bs_delete_close_cpl(seq, bserrno);
7998 0 : return;
7999 : }
8000 :
8001 : /*
8002 : * This will immediately decrement the ref_count and call
8003 : * the completion routine since the metadata state is clean.
8004 : * By calling spdk_blob_close, we reduce the number of call
8005 : * points into code that touches the blob->open_ref count
8006 : * and the blobstore's blob list.
8007 : */
8008 1492 : spdk_blob_close(blob, bs_delete_close_cpl, seq);
8009 : }
8010 :
8011 : struct delete_snapshot_ctx {
8012 : struct spdk_blob_list *parent_snapshot_entry;
8013 : struct spdk_blob *snapshot;
8014 : struct spdk_blob_md_page *page;
8015 : bool snapshot_md_ro;
8016 : struct spdk_blob *clone;
8017 : bool clone_md_ro;
8018 : spdk_blob_op_with_handle_complete cb_fn;
8019 : void *cb_arg;
8020 : int bserrno;
8021 : uint32_t next_extent_page;
8022 : };
8023 :
8024 : static void
8025 110 : delete_blob_cleanup_finish(void *cb_arg, int bserrno)
8026 : {
8027 110 : struct delete_snapshot_ctx *ctx = cb_arg;
8028 :
8029 110 : if (bserrno != 0) {
8030 0 : SPDK_ERRLOG("Snapshot cleanup error %d\n", bserrno);
8031 : }
8032 :
8033 110 : assert(ctx != NULL);
8034 :
8035 110 : if (bserrno != 0 && ctx->bserrno == 0) {
8036 0 : ctx->bserrno = bserrno;
8037 : }
8038 :
8039 110 : ctx->cb_fn(ctx->cb_arg, ctx->snapshot, ctx->bserrno);
8040 110 : spdk_free(ctx->page);
8041 110 : free(ctx);
8042 110 : }
8043 :
8044 : static void
8045 22 : delete_snapshot_cleanup_snapshot(void *cb_arg, int bserrno)
8046 : {
8047 22 : struct delete_snapshot_ctx *ctx = cb_arg;
8048 :
8049 22 : if (bserrno != 0) {
8050 0 : ctx->bserrno = bserrno;
8051 0 : SPDK_ERRLOG("Clone cleanup error %d\n", bserrno);
8052 : }
8053 :
8054 22 : if (ctx->bserrno != 0) {
8055 22 : assert(blob_lookup(ctx->snapshot->bs, ctx->snapshot->id) == NULL);
8056 22 : RB_INSERT(spdk_blob_tree, &ctx->snapshot->bs->open_blobs, ctx->snapshot);
8057 22 : spdk_bit_array_set(ctx->snapshot->bs->open_blobids, ctx->snapshot->id);
8058 : }
8059 :
8060 22 : ctx->snapshot->locked_operation_in_progress = false;
8061 22 : ctx->snapshot->md_ro = ctx->snapshot_md_ro;
8062 :
8063 22 : spdk_blob_close(ctx->snapshot, delete_blob_cleanup_finish, ctx);
8064 22 : }
8065 :
8066 : static void
8067 12 : delete_snapshot_cleanup_clone(void *cb_arg, int bserrno)
8068 : {
8069 12 : struct delete_snapshot_ctx *ctx = cb_arg;
8070 :
8071 12 : ctx->clone->locked_operation_in_progress = false;
8072 12 : ctx->clone->md_ro = ctx->clone_md_ro;
8073 :
8074 12 : spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
8075 12 : }
8076 :
8077 : static void
8078 48 : delete_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
8079 : {
8080 48 : struct delete_snapshot_ctx *ctx = cb_arg;
8081 :
8082 48 : if (bserrno) {
8083 0 : ctx->bserrno = bserrno;
8084 0 : delete_snapshot_cleanup_clone(ctx, 0);
8085 0 : return;
8086 : }
8087 :
8088 48 : ctx->clone->locked_operation_in_progress = false;
8089 48 : spdk_blob_close(ctx->clone, delete_blob_cleanup_finish, ctx);
8090 : }
8091 :
8092 : static void
8093 52 : delete_snapshot_sync_snapshot_cpl(void *cb_arg, int bserrno)
8094 : {
8095 52 : struct delete_snapshot_ctx *ctx = cb_arg;
8096 52 : struct spdk_blob_list *parent_snapshot_entry = NULL;
8097 52 : struct spdk_blob_list *snapshot_entry = NULL;
8098 52 : struct spdk_blob_list *clone_entry = NULL;
8099 52 : struct spdk_blob_list *snapshot_clone_entry = NULL;
8100 :
8101 52 : if (bserrno) {
8102 4 : SPDK_ERRLOG("Failed to sync MD on blob\n");
8103 4 : ctx->bserrno = bserrno;
8104 4 : delete_snapshot_cleanup_clone(ctx, 0);
8105 4 : return;
8106 : }
8107 :
8108 : /* Get snapshot entry for the snapshot we want to remove */
8109 48 : snapshot_entry = bs_get_snapshot_entry(ctx->snapshot->bs, ctx->snapshot->id);
8110 :
8111 48 : assert(snapshot_entry != NULL);
8112 :
8113 : /* Remove clone entry in this snapshot (at this point there can be only one clone) */
8114 48 : clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
8115 48 : assert(clone_entry != NULL);
8116 48 : TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
8117 48 : snapshot_entry->clone_count--;
8118 48 : assert(TAILQ_EMPTY(&snapshot_entry->clones));
8119 :
8120 48 : switch (ctx->snapshot->parent_id) {
8121 40 : case SPDK_BLOBID_INVALID:
8122 : case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
8123 : /* No parent snapshot - just remove clone entry */
8124 40 : free(clone_entry);
8125 40 : break;
8126 8 : default:
8127 : /* This snapshot is at the same time a clone of another snapshot - we need to
8128 : * update parent snapshot (remove current clone, add new one inherited from
8129 : * the snapshot that is being removed) */
8130 :
8131 : /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
8132 : * snapshot that we are removing */
8133 8 : blob_get_snapshot_and_clone_entries(ctx->snapshot, &parent_snapshot_entry,
8134 : &snapshot_clone_entry);
8135 :
8136 : /* Switch clone entry in parent snapshot */
8137 8 : TAILQ_INSERT_TAIL(&parent_snapshot_entry->clones, clone_entry, link);
8138 8 : TAILQ_REMOVE(&parent_snapshot_entry->clones, snapshot_clone_entry, link);
8139 8 : free(snapshot_clone_entry);
8140 : }
8141 :
8142 : /* Restore md_ro flags */
8143 48 : ctx->clone->md_ro = ctx->clone_md_ro;
8144 48 : ctx->snapshot->md_ro = ctx->snapshot_md_ro;
8145 :
8146 48 : blob_unfreeze_io(ctx->clone, delete_snapshot_unfreeze_cpl, ctx);
8147 : }
8148 :
8149 : static void
8150 56 : delete_snapshot_sync_clone_cpl(void *cb_arg, int bserrno)
8151 : {
8152 56 : struct delete_snapshot_ctx *ctx = cb_arg;
8153 : uint64_t i;
8154 :
8155 56 : ctx->snapshot->md_ro = false;
8156 :
8157 56 : if (bserrno) {
8158 4 : SPDK_ERRLOG("Failed to sync MD on clone\n");
8159 4 : ctx->bserrno = bserrno;
8160 :
8161 : /* Restore snapshot to previous state */
8162 4 : bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
8163 4 : if (bserrno != 0) {
8164 0 : delete_snapshot_cleanup_clone(ctx, bserrno);
8165 0 : return;
8166 : }
8167 :
8168 4 : spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
8169 4 : return;
8170 : }
8171 :
8172 : /* Clear cluster map entries for snapshot */
8173 552 : for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
8174 500 : if (ctx->clone->active.clusters[i] == ctx->snapshot->active.clusters[i]) {
8175 492 : if (ctx->snapshot->active.clusters[i] != 0) {
8176 328 : ctx->snapshot->active.num_allocated_clusters--;
8177 : }
8178 492 : ctx->snapshot->active.clusters[i] = 0;
8179 : }
8180 : }
8181 78 : for (i = 0; i < ctx->snapshot->active.num_extent_pages &&
8182 52 : i < ctx->clone->active.num_extent_pages; i++) {
8183 26 : if (ctx->clone->active.extent_pages[i] == ctx->snapshot->active.extent_pages[i]) {
8184 24 : ctx->snapshot->active.extent_pages[i] = 0;
8185 : }
8186 : }
8187 :
8188 52 : blob_set_thin_provision(ctx->snapshot);
8189 52 : ctx->snapshot->state = SPDK_BLOB_STATE_DIRTY;
8190 :
8191 52 : if (ctx->parent_snapshot_entry != NULL) {
8192 8 : ctx->snapshot->back_bs_dev = NULL;
8193 : }
8194 :
8195 52 : spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_cpl, ctx);
8196 : }
8197 :
8198 : static void
8199 56 : delete_snapshot_update_extent_pages_cpl(struct delete_snapshot_ctx *ctx)
8200 : {
8201 : int bserrno;
8202 :
8203 : /* Delete old backing bs_dev from clone (related to snapshot that will be removed) */
8204 56 : blob_back_bs_destroy(ctx->clone);
8205 :
8206 : /* Set/remove snapshot xattr and switch parent ID and backing bs_dev on clone... */
8207 56 : if (ctx->snapshot->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
8208 8 : bserrno = bs_snapshot_copy_xattr(ctx->clone, ctx->snapshot,
8209 : BLOB_EXTERNAL_SNAPSHOT_ID);
8210 8 : if (bserrno != 0) {
8211 0 : ctx->bserrno = bserrno;
8212 :
8213 : /* Restore snapshot to previous state */
8214 0 : bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
8215 0 : if (bserrno != 0) {
8216 0 : delete_snapshot_cleanup_clone(ctx, bserrno);
8217 0 : return;
8218 : }
8219 :
8220 0 : spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
8221 0 : return;
8222 : }
8223 8 : ctx->clone->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
8224 8 : ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
8225 : /* Do not delete the external snapshot along with this snapshot */
8226 8 : ctx->snapshot->back_bs_dev = NULL;
8227 8 : ctx->clone->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
8228 48 : } else if (ctx->parent_snapshot_entry != NULL) {
8229 : /* ...to parent snapshot */
8230 8 : ctx->clone->parent_id = ctx->parent_snapshot_entry->id;
8231 8 : ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
8232 8 : blob_set_xattr(ctx->clone, BLOB_SNAPSHOT, &ctx->parent_snapshot_entry->id,
8233 : sizeof(spdk_blob_id),
8234 : true);
8235 : } else {
8236 : /* ...to blobid invalid and zeroes dev */
8237 40 : ctx->clone->parent_id = SPDK_BLOBID_INVALID;
8238 40 : ctx->clone->back_bs_dev = bs_create_zeroes_dev();
8239 40 : blob_remove_xattr(ctx->clone, BLOB_SNAPSHOT, true);
8240 : }
8241 :
8242 56 : spdk_blob_sync_md(ctx->clone, delete_snapshot_sync_clone_cpl, ctx);
8243 : }
8244 :
8245 : static void
8246 58 : delete_snapshot_update_extent_pages(void *cb_arg, int bserrno)
8247 : {
8248 58 : struct delete_snapshot_ctx *ctx = cb_arg;
8249 : uint32_t *extent_page;
8250 : uint64_t i;
8251 :
8252 84 : for (i = ctx->next_extent_page; i < ctx->snapshot->active.num_extent_pages &&
8253 54 : i < ctx->clone->active.num_extent_pages; i++) {
8254 28 : if (ctx->snapshot->active.extent_pages[i] == 0) {
8255 : /* No extent page to use from snapshot */
8256 8 : continue;
8257 : }
8258 :
8259 20 : extent_page = &ctx->clone->active.extent_pages[i];
8260 20 : if (*extent_page == 0) {
8261 : /* Copy extent page from snapshot when clone did not have a matching one */
8262 18 : *extent_page = ctx->snapshot->active.extent_pages[i];
8263 18 : continue;
8264 : }
8265 :
8266 : /* Clone and snapshot both contain partially filled matching extent pages.
8267 : * Update the clone extent page in place with cluster map containing the mix of both. */
8268 2 : ctx->next_extent_page = i + 1;
8269 2 : memset(ctx->page, 0, SPDK_BS_PAGE_SIZE);
8270 :
8271 2 : blob_write_extent_page(ctx->clone, *extent_page, i * SPDK_EXTENTS_PER_EP, ctx->page,
8272 : delete_snapshot_update_extent_pages, ctx);
8273 2 : return;
8274 : }
8275 56 : delete_snapshot_update_extent_pages_cpl(ctx);
8276 : }
8277 :
8278 : static void
8279 60 : delete_snapshot_sync_snapshot_xattr_cpl(void *cb_arg, int bserrno)
8280 : {
8281 60 : struct delete_snapshot_ctx *ctx = cb_arg;
8282 : uint64_t i;
8283 :
8284 : /* Temporarily override md_ro flag for clone for MD modification */
8285 60 : ctx->clone_md_ro = ctx->clone->md_ro;
8286 60 : ctx->clone->md_ro = false;
8287 :
8288 60 : if (bserrno) {
8289 4 : SPDK_ERRLOG("Failed to sync MD with xattr on blob\n");
8290 4 : ctx->bserrno = bserrno;
8291 4 : delete_snapshot_cleanup_clone(ctx, 0);
8292 4 : return;
8293 : }
8294 :
8295 : /* Copy snapshot map to clone map (only unallocated clusters in clone) */
8296 596 : for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
8297 540 : if (ctx->clone->active.clusters[i] == 0) {
8298 532 : ctx->clone->active.clusters[i] = ctx->snapshot->active.clusters[i];
8299 532 : if (ctx->clone->active.clusters[i] != 0) {
8300 368 : ctx->clone->active.num_allocated_clusters++;
8301 : }
8302 : }
8303 : }
8304 56 : ctx->next_extent_page = 0;
8305 56 : delete_snapshot_update_extent_pages(ctx, 0);
8306 : }
8307 :
8308 : static void
8309 8 : delete_snapshot_esnap_channels_destroyed_cb(void *cb_arg, struct spdk_blob *blob, int bserrno)
8310 : {
8311 8 : struct delete_snapshot_ctx *ctx = cb_arg;
8312 :
8313 8 : if (bserrno != 0) {
8314 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to destroy esnap channels: %d\n",
8315 : blob->id, bserrno);
8316 : /* That error should not stop us from syncing metadata. */
8317 : }
8318 :
8319 8 : spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
8320 8 : }
8321 :
8322 : static void
8323 60 : delete_snapshot_freeze_io_cb(void *cb_arg, int bserrno)
8324 : {
8325 60 : struct delete_snapshot_ctx *ctx = cb_arg;
8326 :
8327 60 : if (bserrno) {
8328 0 : SPDK_ERRLOG("Failed to freeze I/O on clone\n");
8329 0 : ctx->bserrno = bserrno;
8330 0 : delete_snapshot_cleanup_clone(ctx, 0);
8331 0 : return;
8332 : }
8333 :
8334 : /* Temporarily override md_ro flag for snapshot for MD modification */
8335 60 : ctx->snapshot_md_ro = ctx->snapshot->md_ro;
8336 60 : ctx->snapshot->md_ro = false;
8337 :
8338 : /* Mark blob as pending for removal for power failure safety, use clone id for recovery */
8339 60 : ctx->bserrno = blob_set_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, &ctx->clone->id,
8340 : sizeof(spdk_blob_id), true);
8341 60 : if (ctx->bserrno != 0) {
8342 0 : delete_snapshot_cleanup_clone(ctx, 0);
8343 0 : return;
8344 : }
8345 :
8346 60 : if (blob_is_esnap_clone(ctx->snapshot)) {
8347 8 : blob_esnap_destroy_bs_dev_channels(ctx->snapshot, false,
8348 : delete_snapshot_esnap_channels_destroyed_cb,
8349 : ctx);
8350 8 : return;
8351 : }
8352 :
8353 52 : spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
8354 : }
8355 :
8356 : static void
8357 70 : delete_snapshot_open_clone_cb(void *cb_arg, struct spdk_blob *clone, int bserrno)
8358 : {
8359 70 : struct delete_snapshot_ctx *ctx = cb_arg;
8360 :
8361 70 : if (bserrno) {
8362 10 : SPDK_ERRLOG("Failed to open clone\n");
8363 10 : ctx->bserrno = bserrno;
8364 10 : delete_snapshot_cleanup_snapshot(ctx, 0);
8365 10 : return;
8366 : }
8367 :
8368 60 : ctx->clone = clone;
8369 :
8370 60 : if (clone->locked_operation_in_progress) {
8371 0 : SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress on its clone\n");
8372 0 : ctx->bserrno = -EBUSY;
8373 0 : spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
8374 0 : return;
8375 : }
8376 :
8377 60 : clone->locked_operation_in_progress = true;
8378 :
8379 60 : blob_freeze_io(clone, delete_snapshot_freeze_io_cb, ctx);
8380 : }
8381 :
8382 : static void
8383 70 : update_clone_on_snapshot_deletion(struct spdk_blob *snapshot, struct delete_snapshot_ctx *ctx)
8384 : {
8385 70 : struct spdk_blob_list *snapshot_entry = NULL;
8386 70 : struct spdk_blob_list *clone_entry = NULL;
8387 70 : struct spdk_blob_list *snapshot_clone_entry = NULL;
8388 :
8389 : /* Get snapshot entry for the snapshot we want to remove */
8390 70 : snapshot_entry = bs_get_snapshot_entry(snapshot->bs, snapshot->id);
8391 :
8392 70 : assert(snapshot_entry != NULL);
8393 :
8394 : /* Get clone of the snapshot (at this point there can be only one clone) */
8395 70 : clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
8396 70 : assert(snapshot_entry->clone_count == 1);
8397 70 : assert(clone_entry != NULL);
8398 :
8399 : /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
8400 : * snapshot that we are removing */
8401 70 : blob_get_snapshot_and_clone_entries(snapshot, &ctx->parent_snapshot_entry,
8402 : &snapshot_clone_entry);
8403 :
8404 70 : spdk_bs_open_blob(snapshot->bs, clone_entry->id, delete_snapshot_open_clone_cb, ctx);
8405 70 : }
8406 :
8407 : static void
8408 1554 : bs_delete_blob_finish(void *cb_arg, struct spdk_blob *blob, int bserrno)
8409 : {
8410 1554 : spdk_bs_sequence_t *seq = cb_arg;
8411 1554 : struct spdk_blob_list *snapshot_entry = NULL;
8412 : uint32_t page_num;
8413 :
8414 1554 : if (bserrno) {
8415 62 : SPDK_ERRLOG("Failed to remove blob\n");
8416 62 : bs_sequence_finish(seq, bserrno);
8417 62 : return;
8418 : }
8419 :
8420 : /* Remove snapshot from the list */
8421 1492 : snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
8422 1492 : if (snapshot_entry != NULL) {
8423 144 : TAILQ_REMOVE(&blob->bs->snapshots, snapshot_entry, link);
8424 144 : free(snapshot_entry);
8425 : }
8426 :
8427 1492 : page_num = bs_blobid_to_page(blob->id);
8428 1492 : spdk_bit_array_clear(blob->bs->used_blobids, page_num);
8429 1492 : blob->state = SPDK_BLOB_STATE_DIRTY;
8430 1492 : blob->active.num_pages = 0;
8431 1492 : blob_resize(blob, 0);
8432 :
8433 1492 : blob_persist(seq, blob, bs_delete_persist_cpl, blob);
8434 : }
8435 :
8436 : static int
8437 1554 : bs_is_blob_deletable(struct spdk_blob *blob, bool *update_clone)
8438 : {
8439 1554 : struct spdk_blob_list *snapshot_entry = NULL;
8440 1554 : struct spdk_blob_list *clone_entry = NULL;
8441 1554 : struct spdk_blob *clone = NULL;
8442 1554 : bool has_one_clone = false;
8443 :
8444 : /* Check if this is a snapshot with clones */
8445 1554 : snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
8446 1554 : if (snapshot_entry != NULL) {
8447 194 : if (snapshot_entry->clone_count > 1) {
8448 24 : SPDK_ERRLOG("Cannot remove snapshot with more than one clone\n");
8449 24 : return -EBUSY;
8450 170 : } else if (snapshot_entry->clone_count == 1) {
8451 70 : has_one_clone = true;
8452 : }
8453 : }
8454 :
8455 : /* Check if someone has this blob open (besides this delete context):
8456 : * - open_ref = 1 - only this context opened blob, so it is ok to remove it
8457 : * - open_ref <= 2 && has_one_clone = true - clone is holding snapshot
8458 : * and that is ok, because we will update it accordingly */
8459 1530 : if (blob->open_ref <= 2 && has_one_clone) {
8460 70 : clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
8461 70 : assert(clone_entry != NULL);
8462 70 : clone = blob_lookup(blob->bs, clone_entry->id);
8463 :
8464 70 : if (blob->open_ref == 2 && clone == NULL) {
8465 : /* Clone is closed and someone else opened this blob */
8466 0 : SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
8467 0 : return -EBUSY;
8468 : }
8469 :
8470 70 : *update_clone = true;
8471 70 : return 0;
8472 : }
8473 :
8474 1460 : if (blob->open_ref > 1) {
8475 16 : SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
8476 16 : return -EBUSY;
8477 : }
8478 :
8479 1444 : assert(has_one_clone == false);
8480 1444 : *update_clone = false;
8481 1444 : return 0;
8482 : }
8483 :
8484 : static void
8485 0 : bs_delete_enomem_close_cpl(void *cb_arg, int bserrno)
8486 : {
8487 0 : spdk_bs_sequence_t *seq = cb_arg;
8488 :
8489 0 : bs_sequence_finish(seq, -ENOMEM);
8490 0 : }
8491 :
8492 : static void
8493 1564 : bs_delete_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
8494 : {
8495 1564 : spdk_bs_sequence_t *seq = cb_arg;
8496 : struct delete_snapshot_ctx *ctx;
8497 1564 : bool update_clone = false;
8498 :
8499 1564 : if (bserrno != 0) {
8500 10 : bs_sequence_finish(seq, bserrno);
8501 10 : return;
8502 : }
8503 :
8504 1554 : blob_verify_md_op(blob);
8505 :
8506 1554 : ctx = calloc(1, sizeof(*ctx));
8507 1554 : if (ctx == NULL) {
8508 0 : spdk_blob_close(blob, bs_delete_enomem_close_cpl, seq);
8509 0 : return;
8510 : }
8511 :
8512 1554 : ctx->snapshot = blob;
8513 1554 : ctx->cb_fn = bs_delete_blob_finish;
8514 1554 : ctx->cb_arg = seq;
8515 :
8516 : /* Check if blob can be removed and if it is a snapshot with clone on top of it */
8517 1554 : ctx->bserrno = bs_is_blob_deletable(blob, &update_clone);
8518 1554 : if (ctx->bserrno) {
8519 40 : spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
8520 40 : return;
8521 : }
8522 :
8523 1514 : if (blob->locked_operation_in_progress) {
8524 0 : SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress\n");
8525 0 : ctx->bserrno = -EBUSY;
8526 0 : spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
8527 0 : return;
8528 : }
8529 :
8530 1514 : blob->locked_operation_in_progress = true;
8531 :
8532 : /*
8533 : * Remove the blob from the blob_store list now, to ensure it does not
8534 : * get returned after this point by blob_lookup().
8535 : */
8536 1514 : spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
8537 1514 : RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
8538 :
8539 1514 : if (update_clone) {
8540 70 : ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
8541 70 : if (!ctx->page) {
8542 0 : ctx->bserrno = -ENOMEM;
8543 0 : spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
8544 0 : return;
8545 : }
8546 : /* This blob is a snapshot with active clone - update clone first */
8547 70 : update_clone_on_snapshot_deletion(blob, ctx);
8548 : } else {
8549 : /* This blob does not have any clones - just remove it */
8550 1444 : bs_blob_list_remove(blob);
8551 1444 : bs_delete_blob_finish(seq, blob, 0);
8552 1444 : free(ctx);
8553 : }
8554 : }
8555 :
8556 : void
8557 1564 : spdk_bs_delete_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
8558 : spdk_blob_op_complete cb_fn, void *cb_arg)
8559 : {
8560 1564 : struct spdk_bs_cpl cpl;
8561 : spdk_bs_sequence_t *seq;
8562 :
8563 1564 : SPDK_DEBUGLOG(blob, "Deleting blob 0x%" PRIx64 "\n", blobid);
8564 :
8565 1564 : assert(spdk_get_thread() == bs->md_thread);
8566 :
8567 1564 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
8568 1564 : cpl.u.blob_basic.cb_fn = cb_fn;
8569 1564 : cpl.u.blob_basic.cb_arg = cb_arg;
8570 :
8571 1564 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
8572 1564 : if (!seq) {
8573 0 : cb_fn(cb_arg, -ENOMEM);
8574 0 : return;
8575 : }
8576 :
8577 1564 : spdk_bs_open_blob(bs, blobid, bs_delete_open_cpl, seq);
8578 : }
8579 :
8580 : /* END spdk_bs_delete_blob */
8581 :
8582 : /* START spdk_bs_open_blob */
8583 :
8584 : static void
8585 3474 : bs_open_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
8586 : {
8587 3474 : struct spdk_blob *blob = cb_arg;
8588 : struct spdk_blob *existing;
8589 :
8590 3474 : if (bserrno != 0) {
8591 64 : blob_free(blob);
8592 64 : seq->cpl.u.blob_handle.blob = NULL;
8593 64 : bs_sequence_finish(seq, bserrno);
8594 64 : return;
8595 : }
8596 :
8597 3410 : existing = blob_lookup(blob->bs, blob->id);
8598 3410 : if (existing) {
8599 4 : blob_free(blob);
8600 4 : existing->open_ref++;
8601 4 : seq->cpl.u.blob_handle.blob = existing;
8602 4 : bs_sequence_finish(seq, 0);
8603 4 : return;
8604 : }
8605 :
8606 3406 : blob->open_ref++;
8607 :
8608 3406 : spdk_bit_array_set(blob->bs->open_blobids, blob->id);
8609 3406 : RB_INSERT(spdk_blob_tree, &blob->bs->open_blobs, blob);
8610 :
8611 3406 : bs_sequence_finish(seq, bserrno);
8612 : }
8613 :
8614 : static inline void
8615 4 : blob_open_opts_copy(const struct spdk_blob_open_opts *src, struct spdk_blob_open_opts *dst)
8616 : {
8617 : #define FIELD_OK(field) \
8618 : offsetof(struct spdk_blob_open_opts, field) + sizeof(src->field) <= src->opts_size
8619 :
8620 : #define SET_FIELD(field) \
8621 : if (FIELD_OK(field)) { \
8622 : dst->field = src->field; \
8623 : } \
8624 :
8625 4 : SET_FIELD(clear_method);
8626 4 : SET_FIELD(esnap_ctx);
8627 :
8628 4 : dst->opts_size = src->opts_size;
8629 :
8630 : /* You should not remove this statement, but need to update the assert statement
8631 : * if you add a new field, and also add a corresponding SET_FIELD statement */
8632 : SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_open_opts) == 24, "Incorrect size");
8633 :
8634 : #undef FIELD_OK
8635 : #undef SET_FIELD
8636 4 : }
8637 :
8638 : static void
8639 4279 : bs_open_blob(struct spdk_blob_store *bs,
8640 : spdk_blob_id blobid,
8641 : struct spdk_blob_open_opts *opts,
8642 : spdk_blob_op_with_handle_complete cb_fn,
8643 : void *cb_arg)
8644 : {
8645 : struct spdk_blob *blob;
8646 4279 : struct spdk_bs_cpl cpl;
8647 4279 : struct spdk_blob_open_opts opts_local;
8648 : spdk_bs_sequence_t *seq;
8649 : uint32_t page_num;
8650 :
8651 4279 : SPDK_DEBUGLOG(blob, "Opening blob 0x%" PRIx64 "\n", blobid);
8652 4279 : assert(spdk_get_thread() == bs->md_thread);
8653 :
8654 4279 : page_num = bs_blobid_to_page(blobid);
8655 4279 : if (spdk_bit_array_get(bs->used_blobids, page_num) == false) {
8656 : /* Invalid blobid */
8657 48 : cb_fn(cb_arg, NULL, -ENOENT);
8658 48 : return;
8659 : }
8660 :
8661 4231 : blob = blob_lookup(bs, blobid);
8662 4231 : if (blob) {
8663 757 : blob->open_ref++;
8664 757 : cb_fn(cb_arg, blob, 0);
8665 757 : return;
8666 : }
8667 :
8668 3474 : blob = blob_alloc(bs, blobid);
8669 3474 : if (!blob) {
8670 0 : cb_fn(cb_arg, NULL, -ENOMEM);
8671 0 : return;
8672 : }
8673 :
8674 3474 : spdk_blob_open_opts_init(&opts_local, sizeof(opts_local));
8675 3474 : if (opts) {
8676 4 : blob_open_opts_copy(opts, &opts_local);
8677 : }
8678 :
8679 3474 : blob->clear_method = opts_local.clear_method;
8680 :
8681 3474 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_HANDLE;
8682 3474 : cpl.u.blob_handle.cb_fn = cb_fn;
8683 3474 : cpl.u.blob_handle.cb_arg = cb_arg;
8684 3474 : cpl.u.blob_handle.blob = blob;
8685 3474 : cpl.u.blob_handle.esnap_ctx = opts_local.esnap_ctx;
8686 :
8687 3474 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
8688 3474 : if (!seq) {
8689 0 : blob_free(blob);
8690 0 : cb_fn(cb_arg, NULL, -ENOMEM);
8691 0 : return;
8692 : }
8693 :
8694 3474 : blob_load(seq, blob, bs_open_blob_cpl, blob);
8695 : }
8696 :
8697 : void
8698 4275 : spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
8699 : spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
8700 : {
8701 4275 : bs_open_blob(bs, blobid, NULL, cb_fn, cb_arg);
8702 4275 : }
8703 :
8704 : void
8705 4 : spdk_bs_open_blob_ext(struct spdk_blob_store *bs, spdk_blob_id blobid,
8706 : struct spdk_blob_open_opts *opts, spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
8707 : {
8708 4 : bs_open_blob(bs, blobid, opts, cb_fn, cb_arg);
8709 4 : }
8710 :
8711 : /* END spdk_bs_open_blob */
8712 :
8713 : /* START spdk_blob_set_read_only */
8714 : int
8715 236 : spdk_blob_set_read_only(struct spdk_blob *blob)
8716 : {
8717 236 : blob_verify_md_op(blob);
8718 :
8719 236 : blob->data_ro_flags |= SPDK_BLOB_READ_ONLY;
8720 :
8721 236 : blob->state = SPDK_BLOB_STATE_DIRTY;
8722 236 : return 0;
8723 : }
8724 : /* END spdk_blob_set_read_only */
8725 :
8726 : /* START spdk_blob_sync_md */
8727 :
8728 : static void
8729 1607 : blob_sync_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
8730 : {
8731 1607 : struct spdk_blob *blob = cb_arg;
8732 :
8733 1607 : if (bserrno == 0 && (blob->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
8734 404 : blob->data_ro = true;
8735 404 : blob->md_ro = true;
8736 : }
8737 :
8738 1607 : bs_sequence_finish(seq, bserrno);
8739 1607 : }
8740 :
8741 : static void
8742 1607 : blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
8743 : {
8744 1607 : struct spdk_bs_cpl cpl;
8745 : spdk_bs_sequence_t *seq;
8746 :
8747 1607 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
8748 1607 : cpl.u.blob_basic.cb_fn = cb_fn;
8749 1607 : cpl.u.blob_basic.cb_arg = cb_arg;
8750 :
8751 1607 : seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
8752 1607 : if (!seq) {
8753 0 : cb_fn(cb_arg, -ENOMEM);
8754 0 : return;
8755 : }
8756 :
8757 1607 : blob_persist(seq, blob, blob_sync_md_cpl, blob);
8758 : }
8759 :
8760 : void
8761 1097 : spdk_blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
8762 : {
8763 1097 : blob_verify_md_op(blob);
8764 :
8765 1097 : SPDK_DEBUGLOG(blob, "Syncing blob 0x%" PRIx64 "\n", blob->id);
8766 :
8767 1097 : if (blob->md_ro) {
8768 4 : assert(blob->state == SPDK_BLOB_STATE_CLEAN);
8769 4 : cb_fn(cb_arg, 0);
8770 4 : return;
8771 : }
8772 :
8773 1093 : blob_sync_md(blob, cb_fn, cb_arg);
8774 : }
8775 :
8776 : /* END spdk_blob_sync_md */
8777 :
8778 : struct spdk_blob_cluster_op_ctx {
8779 : struct spdk_thread *thread;
8780 : struct spdk_blob *blob;
8781 : uint32_t cluster_num; /* cluster index in blob */
8782 : uint32_t cluster; /* cluster on disk */
8783 : uint32_t extent_page; /* extent page on disk */
8784 : struct spdk_blob_md_page *page; /* preallocated extent page */
8785 : int rc;
8786 : spdk_blob_op_complete cb_fn;
8787 : void *cb_arg;
8788 : };
8789 :
8790 : static void
8791 876 : blob_op_cluster_msg_cpl(void *arg)
8792 : {
8793 876 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8794 :
8795 876 : ctx->cb_fn(ctx->cb_arg, ctx->rc);
8796 876 : free(ctx);
8797 876 : }
8798 :
8799 : static void
8800 846 : blob_op_cluster_msg_cb(void *arg, int bserrno)
8801 : {
8802 846 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8803 :
8804 846 : ctx->rc = bserrno;
8805 846 : spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
8806 846 : }
8807 :
8808 : static void
8809 82 : blob_insert_new_ep_cb(void *arg, int bserrno)
8810 : {
8811 82 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8812 : uint32_t *extent_page;
8813 :
8814 82 : extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
8815 82 : *extent_page = ctx->extent_page;
8816 82 : ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
8817 82 : blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
8818 82 : }
8819 :
8820 : struct spdk_blob_write_extent_page_ctx {
8821 : struct spdk_blob_store *bs;
8822 :
8823 : uint32_t extent;
8824 : struct spdk_blob_md_page *page;
8825 : };
8826 :
8827 : static void
8828 26 : blob_free_cluster_msg_cb(void *arg, int bserrno)
8829 : {
8830 26 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8831 :
8832 26 : spdk_spin_lock(&ctx->blob->bs->used_lock);
8833 26 : bs_release_cluster(ctx->blob->bs, ctx->cluster);
8834 26 : spdk_spin_unlock(&ctx->blob->bs->used_lock);
8835 :
8836 26 : ctx->rc = bserrno;
8837 26 : spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
8838 26 : }
8839 :
8840 : static void
8841 26 : blob_free_cluster_update_ep_cb(void *arg, int bserrno)
8842 : {
8843 26 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8844 :
8845 26 : if (bserrno != 0 || ctx->blob->bs->clean == 0) {
8846 26 : blob_free_cluster_msg_cb(ctx, bserrno);
8847 26 : return;
8848 : }
8849 :
8850 0 : ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
8851 0 : blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
8852 : }
8853 :
8854 : static void
8855 0 : blob_free_cluster_free_ep_cb(void *arg, int bserrno)
8856 : {
8857 0 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8858 :
8859 0 : spdk_spin_lock(&ctx->blob->bs->used_lock);
8860 0 : assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
8861 0 : bs_release_md_page(ctx->blob->bs, ctx->extent_page);
8862 0 : spdk_spin_unlock(&ctx->blob->bs->used_lock);
8863 0 : ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
8864 0 : blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
8865 0 : }
8866 :
8867 : static void
8868 434 : blob_persist_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
8869 : {
8870 434 : struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
8871 :
8872 434 : free(ctx);
8873 434 : bs_sequence_finish(seq, bserrno);
8874 434 : }
8875 :
8876 : static void
8877 434 : blob_write_extent_page_ready(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
8878 : {
8879 434 : struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
8880 :
8881 434 : if (bserrno != 0) {
8882 0 : blob_persist_extent_page_cpl(seq, ctx, bserrno);
8883 0 : return;
8884 : }
8885 434 : bs_sequence_write_dev(seq, ctx->page, bs_md_page_to_lba(ctx->bs, ctx->extent),
8886 434 : bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
8887 : blob_persist_extent_page_cpl, ctx);
8888 : }
8889 :
8890 : static void
8891 434 : blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
8892 : struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
8893 : {
8894 : struct spdk_blob_write_extent_page_ctx *ctx;
8895 : spdk_bs_sequence_t *seq;
8896 434 : struct spdk_bs_cpl cpl;
8897 :
8898 434 : ctx = calloc(1, sizeof(*ctx));
8899 434 : if (!ctx) {
8900 0 : cb_fn(cb_arg, -ENOMEM);
8901 0 : return;
8902 : }
8903 434 : ctx->bs = blob->bs;
8904 434 : ctx->extent = extent;
8905 434 : ctx->page = page;
8906 :
8907 434 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
8908 434 : cpl.u.blob_basic.cb_fn = cb_fn;
8909 434 : cpl.u.blob_basic.cb_arg = cb_arg;
8910 :
8911 434 : seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
8912 434 : if (!seq) {
8913 0 : free(ctx);
8914 0 : cb_fn(cb_arg, -ENOMEM);
8915 0 : return;
8916 : }
8917 :
8918 434 : assert(page);
8919 434 : page->next = SPDK_INVALID_MD_PAGE;
8920 434 : page->id = blob->id;
8921 434 : page->sequence_num = 0;
8922 :
8923 434 : blob_serialize_extent_page(blob, cluster_num, page);
8924 :
8925 434 : page->crc = blob_md_page_calc_crc(page);
8926 :
8927 434 : assert(spdk_bit_array_get(blob->bs->used_md_pages, extent) == true);
8928 :
8929 434 : bs_mark_dirty(seq, blob->bs, blob_write_extent_page_ready, ctx);
8930 : }
8931 :
8932 : static void
8933 816 : blob_insert_cluster_msg(void *arg)
8934 : {
8935 816 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8936 : uint32_t *extent_page;
8937 :
8938 816 : ctx->rc = blob_insert_cluster(ctx->blob, ctx->cluster_num, ctx->cluster);
8939 816 : if (ctx->rc != 0) {
8940 4 : spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
8941 4 : return;
8942 : }
8943 :
8944 812 : if (ctx->blob->use_extent_table == false) {
8945 : /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
8946 406 : ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
8947 406 : blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
8948 406 : return;
8949 : }
8950 :
8951 406 : extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
8952 406 : if (*extent_page == 0) {
8953 : /* Extent page requires allocation.
8954 : * It was already claimed in the used_md_pages map and placed in ctx. */
8955 82 : assert(ctx->extent_page != 0);
8956 82 : assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
8957 82 : blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
8958 : blob_insert_new_ep_cb, ctx);
8959 : } else {
8960 : /* It is possible for original thread to allocate extent page for
8961 : * different cluster in the same extent page. In such case proceed with
8962 : * updating the existing extent page, but release the additional one. */
8963 324 : if (ctx->extent_page != 0) {
8964 0 : spdk_spin_lock(&ctx->blob->bs->used_lock);
8965 0 : assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
8966 0 : bs_release_md_page(ctx->blob->bs, ctx->extent_page);
8967 0 : spdk_spin_unlock(&ctx->blob->bs->used_lock);
8968 0 : ctx->extent_page = 0;
8969 : }
8970 : /* Extent page already allocated.
8971 : * Every cluster allocation, requires just an update of single extent page. */
8972 324 : blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
8973 : blob_op_cluster_msg_cb, ctx);
8974 : }
8975 : }
8976 :
8977 : static void
8978 816 : blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
8979 : uint64_t cluster, uint32_t extent_page, struct spdk_blob_md_page *page,
8980 : spdk_blob_op_complete cb_fn, void *cb_arg)
8981 : {
8982 : struct spdk_blob_cluster_op_ctx *ctx;
8983 :
8984 816 : ctx = calloc(1, sizeof(*ctx));
8985 816 : if (ctx == NULL) {
8986 0 : cb_fn(cb_arg, -ENOMEM);
8987 0 : return;
8988 : }
8989 :
8990 816 : ctx->thread = spdk_get_thread();
8991 816 : ctx->blob = blob;
8992 816 : ctx->cluster_num = cluster_num;
8993 816 : ctx->cluster = cluster;
8994 816 : ctx->extent_page = extent_page;
8995 816 : ctx->page = page;
8996 816 : ctx->cb_fn = cb_fn;
8997 816 : ctx->cb_arg = cb_arg;
8998 :
8999 816 : spdk_thread_send_msg(blob->bs->md_thread, blob_insert_cluster_msg, ctx);
9000 : }
9001 :
9002 : static void
9003 60 : blob_free_cluster_msg(void *arg)
9004 : {
9005 60 : struct spdk_blob_cluster_op_ctx *ctx = arg;
9006 : uint32_t *extent_page;
9007 : uint32_t start_cluster_idx;
9008 60 : bool free_extent_page = true;
9009 : size_t i;
9010 :
9011 60 : ctx->cluster = bs_lba_to_cluster(ctx->blob->bs, ctx->blob->active.clusters[ctx->cluster_num]);
9012 :
9013 : /* There were concurrent unmaps to the same cluster, only release the cluster on the first one */
9014 60 : if (ctx->cluster == 0) {
9015 8 : blob_op_cluster_msg_cb(ctx, 0);
9016 8 : return;
9017 : }
9018 :
9019 52 : ctx->blob->active.clusters[ctx->cluster_num] = 0;
9020 52 : if (ctx->cluster != 0) {
9021 52 : ctx->blob->active.num_allocated_clusters--;
9022 : }
9023 :
9024 52 : if (ctx->blob->use_extent_table == false) {
9025 : /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
9026 26 : spdk_spin_lock(&ctx->blob->bs->used_lock);
9027 26 : bs_release_cluster(ctx->blob->bs, ctx->cluster);
9028 26 : spdk_spin_unlock(&ctx->blob->bs->used_lock);
9029 26 : ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
9030 26 : blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
9031 26 : return;
9032 : }
9033 :
9034 26 : extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
9035 :
9036 : /* There shouldn't be parallel release operations on same cluster */
9037 26 : assert(*extent_page == ctx->extent_page);
9038 :
9039 26 : start_cluster_idx = (ctx->cluster_num / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
9040 48 : for (i = 0; i < SPDK_EXTENTS_PER_EP; ++i) {
9041 48 : if (ctx->blob->active.clusters[start_cluster_idx + i] != 0) {
9042 26 : free_extent_page = false;
9043 26 : break;
9044 : }
9045 : }
9046 :
9047 26 : if (free_extent_page) {
9048 0 : assert(ctx->extent_page != 0);
9049 0 : assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
9050 0 : ctx->blob->active.extent_pages[bs_cluster_to_extent_table_id(ctx->cluster_num)] = 0;
9051 0 : blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
9052 : blob_free_cluster_free_ep_cb, ctx);
9053 : } else {
9054 26 : blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
9055 : blob_free_cluster_update_ep_cb, ctx);
9056 : }
9057 : }
9058 :
9059 :
9060 : static void
9061 60 : blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num, uint32_t extent_page,
9062 : struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
9063 : {
9064 : struct spdk_blob_cluster_op_ctx *ctx;
9065 :
9066 60 : ctx = calloc(1, sizeof(*ctx));
9067 60 : if (ctx == NULL) {
9068 0 : cb_fn(cb_arg, -ENOMEM);
9069 0 : return;
9070 : }
9071 :
9072 60 : ctx->thread = spdk_get_thread();
9073 60 : ctx->blob = blob;
9074 60 : ctx->cluster_num = cluster_num;
9075 60 : ctx->extent_page = extent_page;
9076 60 : ctx->page = page;
9077 60 : ctx->cb_fn = cb_fn;
9078 60 : ctx->cb_arg = cb_arg;
9079 :
9080 60 : spdk_thread_send_msg(blob->bs->md_thread, blob_free_cluster_msg, ctx);
9081 : }
9082 :
9083 : /* START spdk_blob_close */
9084 :
9085 : static void
9086 4167 : blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9087 : {
9088 4167 : struct spdk_blob *blob = cb_arg;
9089 :
9090 4167 : if (bserrno == 0) {
9091 4167 : blob->open_ref--;
9092 4167 : if (blob->open_ref == 0) {
9093 : /*
9094 : * Blobs with active.num_pages == 0 are deleted blobs.
9095 : * these blobs are removed from the blob_store list
9096 : * when the deletion process starts - so don't try to
9097 : * remove them again.
9098 : */
9099 3406 : if (blob->active.num_pages > 0) {
9100 1914 : spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
9101 1914 : RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
9102 : }
9103 3406 : blob_free(blob);
9104 : }
9105 : }
9106 :
9107 4167 : bs_sequence_finish(seq, bserrno);
9108 4167 : }
9109 :
9110 : static void
9111 120 : blob_close_esnap_done(void *cb_arg, struct spdk_blob *blob, int bserrno)
9112 : {
9113 120 : spdk_bs_sequence_t *seq = cb_arg;
9114 :
9115 120 : if (bserrno != 0) {
9116 0 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": close failed with error %d\n",
9117 : blob->id, bserrno);
9118 0 : bs_sequence_finish(seq, bserrno);
9119 0 : return;
9120 : }
9121 :
9122 120 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": closed, syncing metadata on thread %s\n",
9123 : blob->id, spdk_thread_get_name(spdk_get_thread()));
9124 :
9125 : /* Sync metadata */
9126 120 : blob_persist(seq, blob, blob_close_cpl, blob);
9127 : }
9128 :
9129 : void
9130 4167 : spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
9131 : {
9132 4167 : struct spdk_bs_cpl cpl;
9133 : spdk_bs_sequence_t *seq;
9134 :
9135 4167 : blob_verify_md_op(blob);
9136 :
9137 4167 : SPDK_DEBUGLOG(blob, "Closing blob 0x%" PRIx64 "\n", blob->id);
9138 :
9139 4167 : if (blob->open_ref == 0) {
9140 0 : cb_fn(cb_arg, -EBADF);
9141 0 : return;
9142 : }
9143 :
9144 4167 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
9145 4167 : cpl.u.blob_basic.cb_fn = cb_fn;
9146 4167 : cpl.u.blob_basic.cb_arg = cb_arg;
9147 :
9148 4167 : seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
9149 4167 : if (!seq) {
9150 0 : cb_fn(cb_arg, -ENOMEM);
9151 0 : return;
9152 : }
9153 :
9154 4167 : if (blob->open_ref == 1 && blob_is_esnap_clone(blob)) {
9155 120 : blob_esnap_destroy_bs_dev_channels(blob, false, blob_close_esnap_done, seq);
9156 120 : return;
9157 : }
9158 :
9159 : /* Sync metadata */
9160 4047 : blob_persist(seq, blob, blob_close_cpl, blob);
9161 : }
9162 :
9163 : /* END spdk_blob_close */
9164 :
9165 233 : struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs)
9166 : {
9167 233 : return spdk_get_io_channel(bs);
9168 : }
9169 :
9170 : void
9171 233 : spdk_bs_free_io_channel(struct spdk_io_channel *channel)
9172 : {
9173 233 : blob_esnap_destroy_bs_channel(spdk_io_channel_get_ctx(channel));
9174 233 : spdk_put_io_channel(channel);
9175 233 : }
9176 :
9177 : void
9178 108 : spdk_blob_io_unmap(struct spdk_blob *blob, struct spdk_io_channel *channel,
9179 : uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
9180 : {
9181 108 : blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
9182 : SPDK_BLOB_UNMAP);
9183 108 : }
9184 :
9185 : void
9186 48 : spdk_blob_io_write_zeroes(struct spdk_blob *blob, struct spdk_io_channel *channel,
9187 : uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
9188 : {
9189 48 : blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
9190 : SPDK_BLOB_WRITE_ZEROES);
9191 48 : }
9192 :
9193 : void
9194 20868 : spdk_blob_io_write(struct spdk_blob *blob, struct spdk_io_channel *channel,
9195 : void *payload, uint64_t offset, uint64_t length,
9196 : spdk_blob_op_complete cb_fn, void *cb_arg)
9197 : {
9198 20868 : blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
9199 : SPDK_BLOB_WRITE);
9200 20868 : }
9201 :
9202 : void
9203 17500 : spdk_blob_io_read(struct spdk_blob *blob, struct spdk_io_channel *channel,
9204 : void *payload, uint64_t offset, uint64_t length,
9205 : spdk_blob_op_complete cb_fn, void *cb_arg)
9206 : {
9207 17500 : blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
9208 : SPDK_BLOB_READ);
9209 17500 : }
9210 :
9211 : void
9212 140 : spdk_blob_io_writev(struct spdk_blob *blob, struct spdk_io_channel *channel,
9213 : struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
9214 : spdk_blob_op_complete cb_fn, void *cb_arg)
9215 : {
9216 140 : blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false, NULL);
9217 140 : }
9218 :
9219 : void
9220 940 : spdk_blob_io_readv(struct spdk_blob *blob, struct spdk_io_channel *channel,
9221 : struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
9222 : spdk_blob_op_complete cb_fn, void *cb_arg)
9223 : {
9224 940 : blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true, NULL);
9225 940 : }
9226 :
9227 : void
9228 208 : spdk_blob_io_writev_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
9229 : struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
9230 : spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
9231 : {
9232 208 : blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false,
9233 : io_opts);
9234 208 : }
9235 :
9236 : void
9237 1300 : spdk_blob_io_readv_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
9238 : struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
9239 : spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
9240 : {
9241 1300 : blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true,
9242 : io_opts);
9243 1300 : }
9244 :
9245 : struct spdk_bs_iter_ctx {
9246 : int64_t page_num;
9247 : struct spdk_blob_store *bs;
9248 :
9249 : spdk_blob_op_with_handle_complete cb_fn;
9250 : void *cb_arg;
9251 : };
9252 :
9253 : static void
9254 1164 : bs_iter_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
9255 : {
9256 1164 : struct spdk_bs_iter_ctx *ctx = cb_arg;
9257 1164 : struct spdk_blob_store *bs = ctx->bs;
9258 : spdk_blob_id id;
9259 :
9260 1164 : if (bserrno == 0) {
9261 444 : ctx->cb_fn(ctx->cb_arg, _blob, bserrno);
9262 444 : free(ctx);
9263 444 : return;
9264 : }
9265 :
9266 720 : ctx->page_num++;
9267 720 : ctx->page_num = spdk_bit_array_find_first_set(bs->used_blobids, ctx->page_num);
9268 720 : if (ctx->page_num >= spdk_bit_array_capacity(bs->used_blobids)) {
9269 268 : ctx->cb_fn(ctx->cb_arg, NULL, -ENOENT);
9270 268 : free(ctx);
9271 268 : return;
9272 : }
9273 :
9274 452 : id = bs_page_to_blobid(ctx->page_num);
9275 :
9276 452 : spdk_bs_open_blob(bs, id, bs_iter_cpl, ctx);
9277 : }
9278 :
9279 : void
9280 292 : spdk_bs_iter_first(struct spdk_blob_store *bs,
9281 : spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
9282 : {
9283 : struct spdk_bs_iter_ctx *ctx;
9284 :
9285 292 : ctx = calloc(1, sizeof(*ctx));
9286 292 : if (!ctx) {
9287 0 : cb_fn(cb_arg, NULL, -ENOMEM);
9288 0 : return;
9289 : }
9290 :
9291 292 : ctx->page_num = -1;
9292 292 : ctx->bs = bs;
9293 292 : ctx->cb_fn = cb_fn;
9294 292 : ctx->cb_arg = cb_arg;
9295 :
9296 292 : bs_iter_cpl(ctx, NULL, -1);
9297 : }
9298 :
9299 : static void
9300 420 : bs_iter_close_cpl(void *cb_arg, int bserrno)
9301 : {
9302 420 : struct spdk_bs_iter_ctx *ctx = cb_arg;
9303 :
9304 420 : bs_iter_cpl(ctx, NULL, -1);
9305 420 : }
9306 :
9307 : void
9308 420 : spdk_bs_iter_next(struct spdk_blob_store *bs, struct spdk_blob *blob,
9309 : spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
9310 : {
9311 : struct spdk_bs_iter_ctx *ctx;
9312 :
9313 420 : assert(blob != NULL);
9314 :
9315 420 : ctx = calloc(1, sizeof(*ctx));
9316 420 : if (!ctx) {
9317 0 : cb_fn(cb_arg, NULL, -ENOMEM);
9318 0 : return;
9319 : }
9320 :
9321 420 : ctx->page_num = bs_blobid_to_page(blob->id);
9322 420 : ctx->bs = bs;
9323 420 : ctx->cb_fn = cb_fn;
9324 420 : ctx->cb_arg = cb_arg;
9325 :
9326 : /* Close the existing blob */
9327 420 : spdk_blob_close(blob, bs_iter_close_cpl, ctx);
9328 : }
9329 :
9330 : static int
9331 959 : blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
9332 : uint16_t value_len, bool internal)
9333 : {
9334 : struct spdk_xattr_tailq *xattrs;
9335 : struct spdk_xattr *xattr;
9336 : size_t desc_size;
9337 : void *tmp;
9338 :
9339 959 : blob_verify_md_op(blob);
9340 :
9341 959 : if (blob->md_ro) {
9342 4 : return -EPERM;
9343 : }
9344 :
9345 955 : desc_size = sizeof(struct spdk_blob_md_descriptor_xattr) + strlen(name) + value_len;
9346 955 : if (desc_size > SPDK_BS_MAX_DESC_SIZE) {
9347 4 : SPDK_DEBUGLOG(blob, "Xattr '%s' of size %zu does not fix into single page %zu\n", name,
9348 : desc_size, SPDK_BS_MAX_DESC_SIZE);
9349 4 : return -ENOMEM;
9350 : }
9351 :
9352 951 : if (internal) {
9353 740 : xattrs = &blob->xattrs_internal;
9354 740 : blob->invalid_flags |= SPDK_BLOB_INTERNAL_XATTR;
9355 : } else {
9356 211 : xattrs = &blob->xattrs;
9357 : }
9358 :
9359 1182 : TAILQ_FOREACH(xattr, xattrs, link) {
9360 340 : if (!strcmp(name, xattr->name)) {
9361 109 : tmp = malloc(value_len);
9362 109 : if (!tmp) {
9363 0 : return -ENOMEM;
9364 : }
9365 :
9366 109 : free(xattr->value);
9367 109 : xattr->value_len = value_len;
9368 109 : xattr->value = tmp;
9369 109 : memcpy(xattr->value, value, value_len);
9370 :
9371 109 : blob->state = SPDK_BLOB_STATE_DIRTY;
9372 :
9373 109 : return 0;
9374 : }
9375 : }
9376 :
9377 842 : xattr = calloc(1, sizeof(*xattr));
9378 842 : if (!xattr) {
9379 0 : return -ENOMEM;
9380 : }
9381 :
9382 842 : xattr->name = strdup(name);
9383 842 : if (!xattr->name) {
9384 0 : free(xattr);
9385 0 : return -ENOMEM;
9386 : }
9387 :
9388 842 : xattr->value_len = value_len;
9389 842 : xattr->value = malloc(value_len);
9390 842 : if (!xattr->value) {
9391 0 : free(xattr->name);
9392 0 : free(xattr);
9393 0 : return -ENOMEM;
9394 : }
9395 842 : memcpy(xattr->value, value, value_len);
9396 842 : TAILQ_INSERT_TAIL(xattrs, xattr, link);
9397 :
9398 842 : blob->state = SPDK_BLOB_STATE_DIRTY;
9399 :
9400 842 : return 0;
9401 : }
9402 :
9403 : int
9404 183 : spdk_blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
9405 : uint16_t value_len)
9406 : {
9407 183 : return blob_set_xattr(blob, name, value, value_len, false);
9408 : }
9409 :
9410 : static int
9411 416 : blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal)
9412 : {
9413 : struct spdk_xattr_tailq *xattrs;
9414 : struct spdk_xattr *xattr;
9415 :
9416 416 : blob_verify_md_op(blob);
9417 :
9418 416 : if (blob->md_ro) {
9419 4 : return -EPERM;
9420 : }
9421 412 : xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
9422 :
9423 424 : TAILQ_FOREACH(xattr, xattrs, link) {
9424 372 : if (!strcmp(name, xattr->name)) {
9425 360 : TAILQ_REMOVE(xattrs, xattr, link);
9426 360 : free(xattr->value);
9427 360 : free(xattr->name);
9428 360 : free(xattr);
9429 :
9430 360 : if (internal && TAILQ_EMPTY(&blob->xattrs_internal)) {
9431 244 : blob->invalid_flags &= ~SPDK_BLOB_INTERNAL_XATTR;
9432 : }
9433 360 : blob->state = SPDK_BLOB_STATE_DIRTY;
9434 :
9435 360 : return 0;
9436 : }
9437 : }
9438 :
9439 52 : return -ENOENT;
9440 : }
9441 :
9442 : int
9443 36 : spdk_blob_remove_xattr(struct spdk_blob *blob, const char *name)
9444 : {
9445 36 : return blob_remove_xattr(blob, name, false);
9446 : }
9447 :
9448 : static int
9449 2292 : blob_get_xattr_value(struct spdk_blob *blob, const char *name,
9450 : const void **value, size_t *value_len, bool internal)
9451 : {
9452 : struct spdk_xattr *xattr;
9453 : struct spdk_xattr_tailq *xattrs;
9454 :
9455 2292 : xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
9456 :
9457 2922 : TAILQ_FOREACH(xattr, xattrs, link) {
9458 1396 : if (!strcmp(name, xattr->name)) {
9459 766 : *value = xattr->value;
9460 766 : *value_len = xattr->value_len;
9461 766 : return 0;
9462 : }
9463 : }
9464 1526 : return -ENOENT;
9465 : }
9466 :
9467 : int
9468 154 : spdk_blob_get_xattr_value(struct spdk_blob *blob, const char *name,
9469 : const void **value, size_t *value_len)
9470 : {
9471 154 : blob_verify_md_op(blob);
9472 :
9473 154 : return blob_get_xattr_value(blob, name, value, value_len, false);
9474 : }
9475 :
9476 : struct spdk_xattr_names {
9477 : uint32_t count;
9478 : const char *names[0];
9479 : };
9480 :
9481 : static int
9482 4 : blob_get_xattr_names(struct spdk_xattr_tailq *xattrs, struct spdk_xattr_names **names)
9483 : {
9484 : struct spdk_xattr *xattr;
9485 4 : int count = 0;
9486 :
9487 12 : TAILQ_FOREACH(xattr, xattrs, link) {
9488 8 : count++;
9489 : }
9490 :
9491 4 : *names = calloc(1, sizeof(struct spdk_xattr_names) + count * sizeof(char *));
9492 4 : if (*names == NULL) {
9493 0 : return -ENOMEM;
9494 : }
9495 :
9496 12 : TAILQ_FOREACH(xattr, xattrs, link) {
9497 8 : (*names)->names[(*names)->count++] = xattr->name;
9498 : }
9499 :
9500 4 : return 0;
9501 : }
9502 :
9503 : int
9504 4 : spdk_blob_get_xattr_names(struct spdk_blob *blob, struct spdk_xattr_names **names)
9505 : {
9506 4 : blob_verify_md_op(blob);
9507 :
9508 4 : return blob_get_xattr_names(&blob->xattrs, names);
9509 : }
9510 :
9511 : uint32_t
9512 4 : spdk_xattr_names_get_count(struct spdk_xattr_names *names)
9513 : {
9514 4 : assert(names != NULL);
9515 :
9516 4 : return names->count;
9517 : }
9518 :
9519 : const char *
9520 8 : spdk_xattr_names_get_name(struct spdk_xattr_names *names, uint32_t index)
9521 : {
9522 8 : if (index >= names->count) {
9523 0 : return NULL;
9524 : }
9525 :
9526 8 : return names->names[index];
9527 : }
9528 :
9529 : void
9530 4 : spdk_xattr_names_free(struct spdk_xattr_names *names)
9531 : {
9532 4 : free(names);
9533 4 : }
9534 :
9535 : struct spdk_bs_type
9536 2 : spdk_bs_get_bstype(struct spdk_blob_store *bs)
9537 : {
9538 2 : return bs->bstype;
9539 : }
9540 :
9541 : void
9542 0 : spdk_bs_set_bstype(struct spdk_blob_store *bs, struct spdk_bs_type bstype)
9543 : {
9544 0 : memcpy(&bs->bstype, &bstype, sizeof(bstype));
9545 0 : }
9546 :
9547 : bool
9548 48 : spdk_blob_is_read_only(struct spdk_blob *blob)
9549 : {
9550 48 : assert(blob != NULL);
9551 48 : return (blob->data_ro || blob->md_ro);
9552 : }
9553 :
9554 : bool
9555 52 : spdk_blob_is_snapshot(struct spdk_blob *blob)
9556 : {
9557 : struct spdk_blob_list *snapshot_entry;
9558 :
9559 52 : assert(blob != NULL);
9560 :
9561 52 : snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
9562 52 : if (snapshot_entry == NULL) {
9563 28 : return false;
9564 : }
9565 :
9566 24 : return true;
9567 : }
9568 :
9569 : bool
9570 68 : spdk_blob_is_clone(struct spdk_blob *blob)
9571 : {
9572 68 : assert(blob != NULL);
9573 :
9574 68 : if (blob->parent_id != SPDK_BLOBID_INVALID &&
9575 52 : blob->parent_id != SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
9576 40 : assert(spdk_blob_is_thin_provisioned(blob));
9577 40 : return true;
9578 : }
9579 :
9580 28 : return false;
9581 : }
9582 :
9583 : bool
9584 36536 : spdk_blob_is_thin_provisioned(struct spdk_blob *blob)
9585 : {
9586 36536 : assert(blob != NULL);
9587 36536 : return !!(blob->invalid_flags & SPDK_BLOB_THIN_PROV);
9588 : }
9589 :
9590 : bool
9591 40888 : spdk_blob_is_esnap_clone(const struct spdk_blob *blob)
9592 : {
9593 40888 : return blob_is_esnap_clone(blob);
9594 : }
9595 :
9596 : static void
9597 3434 : blob_update_clear_method(struct spdk_blob *blob)
9598 : {
9599 : enum blob_clear_method stored_cm;
9600 :
9601 3434 : assert(blob != NULL);
9602 :
9603 : /* If BLOB_CLEAR_WITH_DEFAULT was passed in, use the setting stored
9604 : * in metadata previously. If something other than the default was
9605 : * specified, ignore stored value and used what was passed in.
9606 : */
9607 3434 : stored_cm = ((blob->md_ro_flags & SPDK_BLOB_CLEAR_METHOD) >> SPDK_BLOB_CLEAR_METHOD_SHIFT);
9608 :
9609 3434 : if (blob->clear_method == BLOB_CLEAR_WITH_DEFAULT) {
9610 3434 : blob->clear_method = stored_cm;
9611 0 : } else if (blob->clear_method != stored_cm) {
9612 0 : SPDK_WARNLOG("Using passed in clear method 0x%x instead of stored value of 0x%x\n",
9613 : blob->clear_method, stored_cm);
9614 : }
9615 3434 : }
9616 :
9617 : spdk_blob_id
9618 258 : spdk_blob_get_parent_snapshot(struct spdk_blob_store *bs, spdk_blob_id blob_id)
9619 : {
9620 258 : struct spdk_blob_list *snapshot_entry = NULL;
9621 258 : struct spdk_blob_list *clone_entry = NULL;
9622 :
9623 494 : TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
9624 732 : TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
9625 496 : if (clone_entry->id == blob_id) {
9626 168 : return snapshot_entry->id;
9627 : }
9628 : }
9629 : }
9630 :
9631 90 : return SPDK_BLOBID_INVALID;
9632 : }
9633 :
9634 : int
9635 196 : spdk_blob_get_clones(struct spdk_blob_store *bs, spdk_blob_id blobid, spdk_blob_id *ids,
9636 : size_t *count)
9637 : {
9638 : struct spdk_blob_list *snapshot_entry, *clone_entry;
9639 : size_t n;
9640 :
9641 196 : snapshot_entry = bs_get_snapshot_entry(bs, blobid);
9642 196 : if (snapshot_entry == NULL) {
9643 28 : *count = 0;
9644 28 : return 0;
9645 : }
9646 :
9647 168 : if (ids == NULL || *count < snapshot_entry->clone_count) {
9648 8 : *count = snapshot_entry->clone_count;
9649 8 : return -ENOMEM;
9650 : }
9651 160 : *count = snapshot_entry->clone_count;
9652 :
9653 160 : n = 0;
9654 340 : TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
9655 180 : ids[n++] = clone_entry->id;
9656 : }
9657 :
9658 160 : return 0;
9659 : }
9660 :
9661 : static void
9662 4 : bs_load_grow_continue(struct spdk_bs_load_ctx *ctx)
9663 : {
9664 : int rc;
9665 :
9666 4 : if (ctx->super->size == 0) {
9667 0 : ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
9668 : }
9669 :
9670 4 : if (ctx->super->io_unit_size == 0) {
9671 0 : ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
9672 : }
9673 :
9674 : /* Parse the super block */
9675 4 : ctx->bs->clean = 1;
9676 4 : ctx->bs->cluster_sz = ctx->super->cluster_size;
9677 4 : ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
9678 4 : ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
9679 4 : if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
9680 4 : ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
9681 : }
9682 4 : ctx->bs->io_unit_size = ctx->super->io_unit_size;
9683 4 : rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
9684 4 : if (rc < 0) {
9685 0 : bs_load_ctx_fail(ctx, -ENOMEM);
9686 0 : return;
9687 : }
9688 4 : ctx->bs->md_start = ctx->super->md_start;
9689 4 : ctx->bs->md_len = ctx->super->md_len;
9690 4 : rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
9691 4 : if (rc < 0) {
9692 0 : bs_load_ctx_fail(ctx, -ENOMEM);
9693 0 : return;
9694 : }
9695 :
9696 8 : ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
9697 4 : ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
9698 4 : ctx->bs->super_blob = ctx->super->super_blob;
9699 4 : memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
9700 :
9701 4 : if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0) {
9702 0 : SPDK_ERRLOG("Can not grow an unclean blobstore, please load it normally to clean it.\n");
9703 0 : bs_load_ctx_fail(ctx, -EIO);
9704 0 : return;
9705 : } else {
9706 4 : bs_load_read_used_pages(ctx);
9707 : }
9708 : }
9709 :
9710 : static void
9711 4 : bs_load_grow_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9712 : {
9713 4 : struct spdk_bs_load_ctx *ctx = cb_arg;
9714 :
9715 4 : if (bserrno != 0) {
9716 0 : bs_load_ctx_fail(ctx, bserrno);
9717 0 : return;
9718 : }
9719 4 : bs_load_grow_continue(ctx);
9720 : }
9721 :
9722 : static void
9723 4 : bs_load_grow_used_clusters_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9724 : {
9725 4 : struct spdk_bs_load_ctx *ctx = cb_arg;
9726 :
9727 4 : if (bserrno != 0) {
9728 0 : bs_load_ctx_fail(ctx, bserrno);
9729 0 : return;
9730 : }
9731 :
9732 4 : spdk_free(ctx->mask);
9733 :
9734 4 : bs_sequence_write_dev(ctx->seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
9735 4 : bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
9736 : bs_load_grow_super_write_cpl, ctx);
9737 : }
9738 :
9739 : static void
9740 4 : bs_load_grow_used_clusters_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9741 : {
9742 4 : struct spdk_bs_load_ctx *ctx = cb_arg;
9743 : uint64_t lba, lba_count;
9744 : uint64_t dev_size;
9745 : uint64_t total_clusters;
9746 :
9747 4 : if (bserrno != 0) {
9748 0 : bs_load_ctx_fail(ctx, bserrno);
9749 0 : return;
9750 : }
9751 :
9752 : /* The type must be correct */
9753 4 : assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
9754 : /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
9755 4 : assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
9756 : struct spdk_blob_md_page) * 8));
9757 4 : dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
9758 4 : total_clusters = dev_size / ctx->super->cluster_size;
9759 4 : ctx->mask->length = total_clusters;
9760 :
9761 4 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
9762 4 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
9763 4 : bs_sequence_write_dev(ctx->seq, ctx->mask, lba, lba_count,
9764 : bs_load_grow_used_clusters_write_cpl, ctx);
9765 : }
9766 :
9767 : static void
9768 4 : bs_load_try_to_grow(struct spdk_bs_load_ctx *ctx)
9769 : {
9770 : uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
9771 : uint64_t lba, lba_count, mask_size;
9772 :
9773 4 : dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
9774 4 : total_clusters = dev_size / ctx->super->cluster_size;
9775 4 : used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
9776 4 : spdk_divide_round_up(total_clusters, 8),
9777 : SPDK_BS_PAGE_SIZE);
9778 4 : max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
9779 : /* No necessary to grow or no space to grow */
9780 4 : if (ctx->super->size >= dev_size || used_cluster_mask_len > max_used_cluster_mask) {
9781 0 : SPDK_DEBUGLOG(blob, "No grow\n");
9782 0 : bs_load_grow_continue(ctx);
9783 0 : return;
9784 : }
9785 :
9786 4 : SPDK_DEBUGLOG(blob, "Resize blobstore\n");
9787 :
9788 4 : ctx->super->size = dev_size;
9789 4 : ctx->super->used_cluster_mask_len = used_cluster_mask_len;
9790 4 : ctx->super->crc = blob_md_page_calc_crc(ctx->super);
9791 :
9792 4 : mask_size = used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
9793 4 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
9794 : SPDK_MALLOC_DMA);
9795 4 : if (!ctx->mask) {
9796 0 : bs_load_ctx_fail(ctx, -ENOMEM);
9797 0 : return;
9798 : }
9799 4 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
9800 4 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
9801 4 : bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
9802 : bs_load_grow_used_clusters_read_cpl, ctx);
9803 : }
9804 :
9805 : static void
9806 4 : bs_grow_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9807 : {
9808 4 : struct spdk_bs_load_ctx *ctx = cb_arg;
9809 : int rc;
9810 :
9811 4 : rc = bs_super_validate(ctx->super, ctx->bs);
9812 4 : if (rc != 0) {
9813 0 : bs_load_ctx_fail(ctx, rc);
9814 0 : return;
9815 : }
9816 :
9817 4 : bs_load_try_to_grow(ctx);
9818 : }
9819 :
9820 : struct spdk_bs_grow_ctx {
9821 : struct spdk_blob_store *bs;
9822 : struct spdk_bs_super_block *super;
9823 :
9824 : struct spdk_bit_pool *new_used_clusters;
9825 : struct spdk_bs_md_mask *new_used_clusters_mask;
9826 :
9827 : spdk_bs_sequence_t *seq;
9828 : };
9829 :
9830 : static void
9831 32 : bs_grow_live_done(struct spdk_bs_grow_ctx *ctx, int bserrno)
9832 : {
9833 32 : if (bserrno != 0) {
9834 8 : spdk_bit_pool_free(&ctx->new_used_clusters);
9835 : }
9836 :
9837 32 : bs_sequence_finish(ctx->seq, bserrno);
9838 32 : free(ctx->new_used_clusters_mask);
9839 32 : spdk_free(ctx->super);
9840 32 : free(ctx);
9841 32 : }
9842 :
9843 : static void
9844 8 : bs_grow_live_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9845 : {
9846 8 : struct spdk_bs_grow_ctx *ctx = cb_arg;
9847 8 : struct spdk_blob_store *bs = ctx->bs;
9848 : uint64_t total_clusters;
9849 :
9850 8 : if (bserrno != 0) {
9851 0 : bs_grow_live_done(ctx, bserrno);
9852 0 : return;
9853 : }
9854 :
9855 : /*
9856 : * Blobstore is not clean until unload, for now only the super block is up to date.
9857 : * This is similar to state right after blobstore init, when bs_write_used_md() didn't
9858 : * yet execute.
9859 : * When cleanly unloaded, the used md pages will be written out.
9860 : * In case of unclean shutdown, loading blobstore will go through recovery path correctly
9861 : * filling out the used_clusters with new size and writing it out.
9862 : */
9863 8 : bs->clean = 0;
9864 :
9865 : /* Reverting the super->size past this point is complex, avoid any error paths
9866 : * that require to do so. */
9867 8 : spdk_spin_lock(&bs->used_lock);
9868 :
9869 8 : total_clusters = ctx->super->size / ctx->super->cluster_size;
9870 :
9871 8 : assert(total_clusters >= spdk_bit_pool_capacity(bs->used_clusters));
9872 8 : spdk_bit_pool_store_mask(bs->used_clusters, ctx->new_used_clusters_mask);
9873 :
9874 8 : assert(total_clusters == spdk_bit_pool_capacity(ctx->new_used_clusters));
9875 8 : spdk_bit_pool_load_mask(ctx->new_used_clusters, ctx->new_used_clusters_mask);
9876 :
9877 8 : spdk_bit_pool_free(&bs->used_clusters);
9878 8 : bs->used_clusters = ctx->new_used_clusters;
9879 :
9880 8 : bs->total_clusters = total_clusters;
9881 16 : bs->total_data_clusters = bs->total_clusters - spdk_divide_round_up(
9882 8 : bs->md_start + bs->md_len, bs->pages_per_cluster);
9883 :
9884 8 : bs->num_free_clusters = spdk_bit_pool_count_free(bs->used_clusters);
9885 8 : assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
9886 8 : spdk_spin_unlock(&bs->used_lock);
9887 :
9888 8 : bs_grow_live_done(ctx, 0);
9889 : }
9890 :
9891 : static void
9892 32 : bs_grow_live_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9893 : {
9894 32 : struct spdk_bs_grow_ctx *ctx = cb_arg;
9895 : uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
9896 : int rc;
9897 :
9898 32 : if (bserrno != 0) {
9899 0 : bs_grow_live_done(ctx, bserrno);
9900 0 : return;
9901 : }
9902 :
9903 32 : rc = bs_super_validate(ctx->super, ctx->bs);
9904 32 : if (rc != 0) {
9905 4 : bs_grow_live_done(ctx, rc);
9906 4 : return;
9907 : }
9908 :
9909 28 : dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
9910 28 : total_clusters = dev_size / ctx->super->cluster_size;
9911 28 : used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
9912 28 : spdk_divide_round_up(total_clusters, 8),
9913 : SPDK_BS_PAGE_SIZE);
9914 28 : max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
9915 : /* Only checking dev_size. Since it can change, but total_clusters remain the same. */
9916 28 : if (dev_size == ctx->super->size) {
9917 16 : SPDK_DEBUGLOG(blob, "No need to grow blobstore\n");
9918 16 : bs_grow_live_done(ctx, 0);
9919 16 : return;
9920 : }
9921 : /*
9922 : * Blobstore cannot be shrunk, so check before if:
9923 : * - new size of the device is smaller than size in super_block
9924 : * - new total number of clusters is smaller than used_clusters bit_pool
9925 : * - there is enough space in metadata for used_cluster_mask to be written out
9926 : */
9927 12 : if (dev_size < ctx->super->size ||
9928 12 : total_clusters < spdk_bit_pool_capacity(ctx->bs->used_clusters) ||
9929 : used_cluster_mask_len > max_used_cluster_mask) {
9930 4 : SPDK_DEBUGLOG(blob, "No space to grow blobstore\n");
9931 4 : bs_grow_live_done(ctx, -ENOSPC);
9932 4 : return;
9933 : }
9934 :
9935 8 : SPDK_DEBUGLOG(blob, "Resizing blobstore\n");
9936 :
9937 8 : ctx->new_used_clusters_mask = calloc(1, total_clusters);
9938 8 : if (!ctx->new_used_clusters_mask) {
9939 0 : bs_grow_live_done(ctx, -ENOMEM);
9940 0 : return;
9941 : }
9942 8 : ctx->new_used_clusters = spdk_bit_pool_create(total_clusters);
9943 8 : if (!ctx->new_used_clusters) {
9944 0 : bs_grow_live_done(ctx, -ENOMEM);
9945 0 : return;
9946 : }
9947 :
9948 8 : ctx->super->clean = 0;
9949 8 : ctx->super->size = dev_size;
9950 8 : ctx->super->used_cluster_mask_len = used_cluster_mask_len;
9951 8 : bs_write_super(seq, ctx->bs, ctx->super, bs_grow_live_super_write_cpl, ctx);
9952 : }
9953 :
9954 : void
9955 32 : spdk_bs_grow_live(struct spdk_blob_store *bs,
9956 : spdk_bs_op_complete cb_fn, void *cb_arg)
9957 : {
9958 32 : struct spdk_bs_cpl cpl;
9959 : struct spdk_bs_grow_ctx *ctx;
9960 :
9961 32 : assert(spdk_get_thread() == bs->md_thread);
9962 :
9963 32 : SPDK_DEBUGLOG(blob, "Growing blobstore on dev %p\n", bs->dev);
9964 :
9965 32 : cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
9966 32 : cpl.u.bs_basic.cb_fn = cb_fn;
9967 32 : cpl.u.bs_basic.cb_arg = cb_arg;
9968 :
9969 32 : ctx = calloc(1, sizeof(struct spdk_bs_grow_ctx));
9970 32 : if (!ctx) {
9971 0 : cb_fn(cb_arg, -ENOMEM);
9972 0 : return;
9973 : }
9974 32 : ctx->bs = bs;
9975 :
9976 32 : ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
9977 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
9978 32 : if (!ctx->super) {
9979 0 : free(ctx);
9980 0 : cb_fn(cb_arg, -ENOMEM);
9981 0 : return;
9982 : }
9983 :
9984 32 : ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
9985 32 : if (!ctx->seq) {
9986 0 : spdk_free(ctx->super);
9987 0 : free(ctx);
9988 0 : cb_fn(cb_arg, -ENOMEM);
9989 0 : return;
9990 : }
9991 :
9992 : /* Read the super block */
9993 32 : bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
9994 32 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
9995 : bs_grow_live_load_super_cpl, ctx);
9996 : }
9997 :
9998 : void
9999 4 : spdk_bs_grow(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
10000 : spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
10001 : {
10002 4 : struct spdk_blob_store *bs;
10003 4 : struct spdk_bs_cpl cpl;
10004 4 : struct spdk_bs_load_ctx *ctx;
10005 4 : struct spdk_bs_opts opts = {};
10006 : int err;
10007 :
10008 4 : SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
10009 :
10010 4 : if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
10011 0 : SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
10012 0 : dev->destroy(dev);
10013 0 : cb_fn(cb_arg, NULL, -EINVAL);
10014 0 : return;
10015 : }
10016 :
10017 4 : spdk_bs_opts_init(&opts, sizeof(opts));
10018 4 : if (o) {
10019 4 : if (bs_opts_copy(o, &opts)) {
10020 0 : return;
10021 : }
10022 : }
10023 :
10024 4 : if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
10025 0 : dev->destroy(dev);
10026 0 : cb_fn(cb_arg, NULL, -EINVAL);
10027 0 : return;
10028 : }
10029 :
10030 4 : err = bs_alloc(dev, &opts, &bs, &ctx);
10031 4 : if (err) {
10032 0 : dev->destroy(dev);
10033 0 : cb_fn(cb_arg, NULL, err);
10034 0 : return;
10035 : }
10036 :
10037 4 : cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
10038 4 : cpl.u.bs_handle.cb_fn = cb_fn;
10039 4 : cpl.u.bs_handle.cb_arg = cb_arg;
10040 4 : cpl.u.bs_handle.bs = bs;
10041 :
10042 4 : ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
10043 4 : if (!ctx->seq) {
10044 0 : spdk_free(ctx->super);
10045 0 : free(ctx);
10046 0 : bs_free(bs);
10047 0 : cb_fn(cb_arg, NULL, -ENOMEM);
10048 0 : return;
10049 : }
10050 :
10051 : /* Read the super block */
10052 4 : bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
10053 4 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
10054 : bs_grow_load_super_cpl, ctx);
10055 : }
10056 :
10057 : int
10058 24 : spdk_blob_get_esnap_id(struct spdk_blob *blob, const void **id, size_t *len)
10059 : {
10060 24 : if (!blob_is_esnap_clone(blob)) {
10061 12 : return -EINVAL;
10062 : }
10063 :
10064 12 : return blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, id, len, true);
10065 : }
10066 :
10067 : struct spdk_io_channel *
10068 8840 : blob_esnap_get_io_channel(struct spdk_io_channel *ch, struct spdk_blob *blob)
10069 : {
10070 8840 : struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(ch);
10071 8840 : struct spdk_bs_dev *bs_dev = blob->back_bs_dev;
10072 8840 : struct blob_esnap_channel find = {};
10073 : struct blob_esnap_channel *esnap_channel, *existing;
10074 :
10075 8840 : find.blob_id = blob->id;
10076 8840 : esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
10077 8840 : if (spdk_likely(esnap_channel != NULL)) {
10078 8796 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": using cached channel on thread %s\n",
10079 : blob->id, spdk_thread_get_name(spdk_get_thread()));
10080 8796 : return esnap_channel->channel;
10081 : }
10082 :
10083 44 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": allocating channel on thread %s\n",
10084 : blob->id, spdk_thread_get_name(spdk_get_thread()));
10085 :
10086 44 : esnap_channel = calloc(1, sizeof(*esnap_channel));
10087 44 : if (esnap_channel == NULL) {
10088 0 : SPDK_NOTICELOG("blob 0x%" PRIx64 " channel allocation failed: no memory\n",
10089 : find.blob_id);
10090 0 : return NULL;
10091 : }
10092 44 : esnap_channel->channel = bs_dev->create_channel(bs_dev);
10093 44 : if (esnap_channel->channel == NULL) {
10094 0 : SPDK_NOTICELOG("blob 0x%" PRIx64 " back channel allocation failed\n", blob->id);
10095 0 : free(esnap_channel);
10096 0 : return NULL;
10097 : }
10098 44 : esnap_channel->blob_id = find.blob_id;
10099 44 : existing = RB_INSERT(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
10100 44 : if (spdk_unlikely(existing != NULL)) {
10101 : /*
10102 : * This should be unreachable: all modifications to this tree happen on this thread.
10103 : */
10104 0 : SPDK_ERRLOG("blob 0x%" PRIx64 "lost race to allocate a channel\n", find.blob_id);
10105 0 : assert(false);
10106 :
10107 : bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
10108 : free(esnap_channel);
10109 :
10110 : return existing->channel;
10111 : }
10112 :
10113 44 : return esnap_channel->channel;
10114 : }
10115 :
10116 : static int
10117 8816 : blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2)
10118 : {
10119 8816 : return (c1->blob_id < c2->blob_id ? -1 : c1->blob_id > c2->blob_id);
10120 : }
10121 :
10122 : struct blob_esnap_destroy_ctx {
10123 : spdk_blob_op_with_handle_complete cb_fn;
10124 : void *cb_arg;
10125 : struct spdk_blob *blob;
10126 : struct spdk_bs_dev *back_bs_dev;
10127 : bool abort_io;
10128 : };
10129 :
10130 : static void
10131 152 : blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status)
10132 : {
10133 152 : struct blob_esnap_destroy_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
10134 152 : struct spdk_blob *blob = ctx->blob;
10135 152 : struct spdk_blob_store *bs = blob->bs;
10136 :
10137 152 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n",
10138 : blob->id);
10139 :
10140 152 : if (ctx->cb_fn != NULL) {
10141 136 : ctx->cb_fn(ctx->cb_arg, blob, status);
10142 : }
10143 152 : free(ctx);
10144 :
10145 152 : bs->esnap_channels_unloading--;
10146 152 : if (bs->esnap_channels_unloading == 0 && bs->esnap_unload_cb_fn != NULL) {
10147 4 : spdk_bs_unload(bs, bs->esnap_unload_cb_fn, bs->esnap_unload_cb_arg);
10148 : }
10149 152 : }
10150 :
10151 : static void
10152 160 : blob_esnap_destroy_one_channel(struct spdk_io_channel_iter *i)
10153 : {
10154 160 : struct blob_esnap_destroy_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
10155 160 : struct spdk_blob *blob = ctx->blob;
10156 160 : struct spdk_bs_dev *bs_dev = ctx->back_bs_dev;
10157 160 : struct spdk_io_channel *channel = spdk_io_channel_iter_get_channel(i);
10158 160 : struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(channel);
10159 : struct blob_esnap_channel *esnap_channel;
10160 160 : struct blob_esnap_channel find = {};
10161 :
10162 160 : assert(spdk_get_thread() == spdk_io_channel_get_thread(channel));
10163 :
10164 160 : find.blob_id = blob->id;
10165 160 : esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
10166 160 : if (esnap_channel != NULL) {
10167 12 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channel on thread %s\n",
10168 : blob->id, spdk_thread_get_name(spdk_get_thread()));
10169 12 : RB_REMOVE(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
10170 :
10171 12 : if (ctx->abort_io) {
10172 : spdk_bs_user_op_t *op, *tmp;
10173 :
10174 8 : TAILQ_FOREACH_SAFE(op, &bs_channel->queued_io, link, tmp) {
10175 0 : if (op->back_channel == esnap_channel->channel) {
10176 0 : TAILQ_REMOVE(&bs_channel->queued_io, op, link);
10177 0 : bs_user_op_abort(op, -EIO);
10178 : }
10179 : }
10180 : }
10181 :
10182 12 : bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
10183 12 : free(esnap_channel);
10184 : }
10185 :
10186 160 : spdk_for_each_channel_continue(i, 0);
10187 160 : }
10188 :
10189 : /*
10190 : * Destroy the channels for a specific blob on each thread with a blobstore channel. This should be
10191 : * used when closing an esnap clone blob and after decoupling from the parent.
10192 : */
10193 : static void
10194 500 : blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
10195 : spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
10196 : {
10197 : struct blob_esnap_destroy_ctx *ctx;
10198 :
10199 500 : if (!blob_is_esnap_clone(blob) || blob->back_bs_dev == NULL) {
10200 348 : if (cb_fn != NULL) {
10201 348 : cb_fn(cb_arg, blob, 0);
10202 : }
10203 348 : return;
10204 : }
10205 :
10206 152 : ctx = calloc(1, sizeof(*ctx));
10207 152 : if (ctx == NULL) {
10208 0 : if (cb_fn != NULL) {
10209 0 : cb_fn(cb_arg, blob, -ENOMEM);
10210 : }
10211 0 : return;
10212 : }
10213 152 : ctx->cb_fn = cb_fn;
10214 152 : ctx->cb_arg = cb_arg;
10215 152 : ctx->blob = blob;
10216 152 : ctx->back_bs_dev = blob->back_bs_dev;
10217 152 : ctx->abort_io = abort_io;
10218 :
10219 152 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channels for this blob\n",
10220 : blob->id);
10221 :
10222 152 : blob->bs->esnap_channels_unloading++;
10223 152 : spdk_for_each_channel(blob->bs, blob_esnap_destroy_one_channel, ctx,
10224 : blob_esnap_destroy_channels_done);
10225 : }
10226 :
10227 : /*
10228 : * Destroy all bs_dev channels on a specific blobstore channel. This should be used when a
10229 : * bs_channel is destroyed.
10230 : */
10231 : static void
10232 1029 : blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch)
10233 : {
10234 : struct blob_esnap_channel *esnap_channel, *esnap_channel_tmp;
10235 :
10236 1029 : assert(spdk_get_thread() == spdk_io_channel_get_thread(spdk_io_channel_from_ctx(ch)));
10237 :
10238 1029 : SPDK_DEBUGLOG(blob_esnap, "destroying channels on thread %s\n",
10239 : spdk_thread_get_name(spdk_get_thread()));
10240 1061 : RB_FOREACH_SAFE(esnap_channel, blob_esnap_channel_tree, &ch->esnap_channels,
10241 : esnap_channel_tmp) {
10242 32 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64
10243 : ": destroying one channel in thread %s\n",
10244 : esnap_channel->blob_id, spdk_thread_get_name(spdk_get_thread()));
10245 32 : RB_REMOVE(blob_esnap_channel_tree, &ch->esnap_channels, esnap_channel);
10246 32 : spdk_put_io_channel(esnap_channel->channel);
10247 32 : free(esnap_channel);
10248 : }
10249 1029 : SPDK_DEBUGLOG(blob_esnap, "done destroying channels on thread %s\n",
10250 : spdk_thread_get_name(spdk_get_thread()));
10251 1029 : }
10252 :
10253 : static void
10254 28 : blob_set_back_bs_dev_done(void *_ctx, int bserrno)
10255 : {
10256 28 : struct set_bs_dev_ctx *ctx = _ctx;
10257 :
10258 28 : if (bserrno != 0) {
10259 : /* Even though the unfreeze failed, the update may have succeed. */
10260 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": unfreeze failed with error %d\n", ctx->blob->id,
10261 : bserrno);
10262 : }
10263 28 : ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
10264 28 : free(ctx);
10265 28 : }
10266 :
10267 : static void
10268 28 : blob_frozen_set_back_bs_dev(void *_ctx, struct spdk_blob *blob, int bserrno)
10269 : {
10270 28 : struct set_bs_dev_ctx *ctx = _ctx;
10271 : int rc;
10272 :
10273 28 : if (bserrno != 0) {
10274 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to release old back_bs_dev with error %d\n",
10275 : blob->id, bserrno);
10276 0 : ctx->bserrno = bserrno;
10277 0 : blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
10278 0 : return;
10279 : }
10280 :
10281 28 : if (blob->back_bs_dev != NULL) {
10282 28 : blob_back_bs_dev_unref(blob);
10283 : }
10284 :
10285 28 : if (ctx->parent_refs_cb_fn) {
10286 20 : rc = ctx->parent_refs_cb_fn(blob, ctx->parent_refs_cb_arg);
10287 20 : if (rc != 0) {
10288 0 : ctx->bserrno = rc;
10289 0 : blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
10290 0 : return;
10291 : }
10292 : }
10293 :
10294 28 : SPDK_NOTICELOG("blob 0x%" PRIx64 ": hotplugged back_bs_dev\n", blob->id);
10295 28 : blob->back_bs_dev = ctx->back_bs_dev;
10296 28 : ctx->bserrno = 0;
10297 :
10298 28 : blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
10299 : }
10300 :
10301 : static void
10302 28 : blob_set_back_bs_dev_frozen(void *_ctx, int bserrno)
10303 : {
10304 28 : struct set_bs_dev_ctx *ctx = _ctx;
10305 28 : struct spdk_blob *blob = ctx->blob;
10306 :
10307 28 : if (bserrno != 0) {
10308 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to freeze with error %d\n", blob->id,
10309 : bserrno);
10310 0 : ctx->cb_fn(ctx->cb_arg, bserrno);
10311 0 : free(ctx);
10312 0 : return;
10313 : }
10314 :
10315 : /*
10316 : * This does not prevent future reads from the esnap device because any future IO will
10317 : * lazily create a new esnap IO channel.
10318 : */
10319 28 : blob_esnap_destroy_bs_dev_channels(blob, true, blob_frozen_set_back_bs_dev, ctx);
10320 : }
10321 :
10322 : void
10323 8 : spdk_blob_set_esnap_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
10324 : spdk_blob_op_complete cb_fn, void *cb_arg)
10325 : {
10326 8 : if (!blob_is_esnap_clone(blob)) {
10327 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
10328 0 : cb_fn(cb_arg, -EINVAL);
10329 0 : return;
10330 : }
10331 :
10332 8 : blob_set_back_bs_dev(blob, back_bs_dev, NULL, NULL, cb_fn, cb_arg);
10333 : }
10334 :
10335 : struct spdk_bs_dev *
10336 4 : spdk_blob_get_esnap_bs_dev(const struct spdk_blob *blob)
10337 : {
10338 4 : if (!blob_is_esnap_clone(blob)) {
10339 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
10340 0 : return NULL;
10341 : }
10342 :
10343 4 : return blob->back_bs_dev;
10344 : }
10345 :
10346 : bool
10347 28 : spdk_blob_is_degraded(const struct spdk_blob *blob)
10348 : {
10349 28 : if (blob->bs->dev->is_degraded != NULL && blob->bs->dev->is_degraded(blob->bs->dev)) {
10350 4 : return true;
10351 : }
10352 24 : if (blob->back_bs_dev == NULL || blob->back_bs_dev->is_degraded == NULL) {
10353 12 : return false;
10354 : }
10355 :
10356 12 : return blob->back_bs_dev->is_degraded(blob->back_bs_dev);
10357 : }
10358 :
10359 3 : SPDK_LOG_REGISTER_COMPONENT(blob)
10360 3 : SPDK_LOG_REGISTER_COMPONENT(blob_esnap)
|