Branch data Line data Source code
1 : : /* SPDX-License-Identifier: BSD-3-Clause
2 : : * Copyright (C) 2022 Intel Corporation.
3 : : * Copyright (c) Peng Yu yupeng0921@gmail.com.
4 : : * All rights reserved.
5 : : */
6 : :
7 : : #include "bdev_raid.h"
8 : :
9 : : #include "spdk/env.h"
10 : : #include "spdk/thread.h"
11 : : #include "spdk/string.h"
12 : : #include "spdk/util.h"
13 : :
14 : : #include "spdk/log.h"
15 : :
16 : : struct concat_block_range {
17 : : uint64_t start;
18 : : uint64_t length;
19 : : };
20 : :
21 : : /*
22 : : * brief:
23 : : * concat_bdev_io_completion function is called by lower layers to notify raid
24 : : * module that particular bdev_io is completed.
25 : : * params:
26 : : * bdev_io - pointer to bdev io submitted to lower layers, like child io
27 : : * success - bdev_io status
28 : : * cb_arg - function callback context (parent raid_bdev_io)
29 : : * returns:
30 : : * none
31 : : */
32 : : static void
33 : 1753598 : concat_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
34 : : {
35 : 1753598 : struct raid_bdev_io *raid_io = cb_arg;
36 : :
37 : 1753598 : spdk_bdev_free_io(bdev_io);
38 : :
39 [ + + ]: 1753598 : if (success) {
40 : 1753592 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS);
41 : : } else {
42 : 6 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
43 : : }
44 : 1753598 : }
45 : :
46 : : static void concat_submit_rw_request(struct raid_bdev_io *raid_io);
47 : :
48 : : static void
49 : 1320 : _concat_submit_rw_request(void *_raid_io)
50 : : {
51 : 1320 : struct raid_bdev_io *raid_io = _raid_io;
52 : :
53 : 1320 : concat_submit_rw_request(raid_io);
54 : 1320 : }
55 : :
56 : : /*
57 : : * brief:
58 : : * concat_submit_rw_request function is used to submit I/O to the correct
59 : : * member disk for concat bdevs.
60 : : * params:
61 : : * raid_io
62 : : * returns:
63 : : * none
64 : : */
65 : : static void
66 : 1754918 : concat_submit_rw_request(struct raid_bdev_io *raid_io)
67 : : {
68 : 1754918 : struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch;
69 : 1754918 : struct raid_bdev *raid_bdev = raid_io->raid_bdev;
70 : 1754918 : struct concat_block_range *block_range = raid_bdev->module_private;
71 : : uint64_t pd_lba;
72 : : uint64_t pd_blocks;
73 : : int pd_idx;
74 : 1754918 : int ret = 0;
75 : : struct raid_base_bdev_info *base_info;
76 : : struct spdk_io_channel *base_ch;
77 : 1754918 : struct spdk_bdev_ext_io_opts io_opts = {};
78 : : int i;
79 : :
80 : 1754918 : pd_idx = -1;
81 [ + + ]: 4533213 : for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
82 [ + + ]: 3781976 : if (block_range[i].start > raid_io->offset_blocks) {
83 : 1003681 : break;
84 : : }
85 : 2778295 : pd_idx = i;
86 : : }
87 [ - + ]: 1754918 : assert(pd_idx >= 0);
88 [ - + ]: 1754918 : assert(raid_io->offset_blocks >= block_range[pd_idx].start);
89 : 1754918 : pd_lba = raid_io->offset_blocks - block_range[pd_idx].start;
90 : 1754918 : pd_blocks = raid_io->num_blocks;
91 : 1754918 : base_info = &raid_bdev->base_bdev_info[pd_idx];
92 [ - + ]: 1754918 : if (base_info->desc == NULL) {
93 : 0 : SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
94 : 0 : assert(0);
95 : : }
96 : :
97 : : /*
98 : : * Submit child io to bdev layer with using base bdev descriptors, base
99 : : * bdev lba, base bdev child io length in blocks, buffer, completion
100 : : * function and function callback context
101 : : */
102 [ - + ]: 1754918 : assert(raid_ch != NULL);
103 : 1754918 : base_ch = raid_bdev_channel_get_base_channel(raid_ch, pd_idx);
104 : :
105 : 1754918 : io_opts.size = sizeof(io_opts);
106 : 1754918 : io_opts.memory_domain = raid_io->memory_domain;
107 : 1754918 : io_opts.memory_domain_ctx = raid_io->memory_domain_ctx;
108 : 1754918 : io_opts.metadata = raid_io->md_buf;
109 : :
110 [ + + ]: 1754918 : if (raid_io->type == SPDK_BDEV_IO_TYPE_READ) {
111 : 626551 : ret = raid_bdev_readv_blocks_ext(base_info, base_ch,
112 : : raid_io->iovs, raid_io->iovcnt,
113 : : pd_lba, pd_blocks, concat_bdev_io_completion,
114 : : raid_io, &io_opts);
115 [ + - ]: 1128367 : } else if (raid_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
116 : 1128367 : ret = raid_bdev_writev_blocks_ext(base_info, base_ch,
117 : : raid_io->iovs, raid_io->iovcnt,
118 : : pd_lba, pd_blocks, concat_bdev_io_completion,
119 : : raid_io, &io_opts);
120 : : } else {
121 : 0 : SPDK_ERRLOG("Recvd not supported io type %u\n", raid_io->type);
122 : 0 : assert(0);
123 : : }
124 : :
125 [ + + ]: 1754918 : if (ret == -ENOMEM) {
126 : 1320 : raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
127 : : base_ch, _concat_submit_rw_request);
128 [ - + ]: 1753598 : } else if (ret != 0) {
129 : 0 : SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
130 : 0 : assert(false);
131 : : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
132 : : }
133 : 1754918 : }
134 : :
135 : : static void concat_submit_null_payload_request(struct raid_bdev_io *raid_io);
136 : :
137 : : static void
138 : 330 : _concat_submit_null_payload_request(void *_raid_io)
139 : : {
140 : 330 : struct raid_bdev_io *raid_io = _raid_io;
141 : :
142 : 330 : concat_submit_null_payload_request(raid_io);
143 : 330 : }
144 : :
145 : : static void
146 : 498435 : concat_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
147 : : {
148 : 498435 : struct raid_bdev_io *raid_io = cb_arg;
149 : :
150 [ + - ]: 498435 : raid_bdev_io_complete_part(raid_io, 1, success ?
151 : : SPDK_BDEV_IO_STATUS_SUCCESS :
152 : : SPDK_BDEV_IO_STATUS_FAILED);
153 : :
154 : 498435 : spdk_bdev_free_io(bdev_io);
155 : 498435 : }
156 : :
157 : : /*
158 : : * brief:
159 : : * concat_submit_null_payload_request function submits the next batch of
160 : : * io requests with range but without payload, like FLUSH and UNMAP, to member disks;
161 : : * it will submit as many as possible unless one base io request fails with -ENOMEM,
162 : : * in which case it will queue itself for later submission.
163 : : * params:
164 : : * bdev_io - pointer to parent bdev_io on raid bdev device
165 : : * returns:
166 : : * none
167 : : */
168 : : static void
169 : 498445 : concat_submit_null_payload_request(struct raid_bdev_io *raid_io)
170 : : {
171 : : struct raid_bdev *raid_bdev;
172 : : int ret;
173 : : struct raid_base_bdev_info *base_info;
174 : : struct spdk_io_channel *base_ch;
175 : : uint64_t pd_lba;
176 : : uint64_t pd_blocks;
177 : : uint64_t offset_blocks;
178 : : uint64_t num_blocks;
179 : : struct concat_block_range *block_range;
180 : : int i, start_idx, stop_idx;
181 : :
182 : 498445 : raid_bdev = raid_io->raid_bdev;
183 : 498445 : block_range = raid_bdev->module_private;
184 : :
185 : 498445 : offset_blocks = raid_io->offset_blocks;
186 : 498445 : num_blocks = raid_io->num_blocks;
187 : 498445 : start_idx = -1;
188 : 498445 : stop_idx = -1;
189 : : /*
190 : : * Go through all base bdevs, find the first bdev and the last bdev
191 : : */
192 [ + - ]: 738289 : for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
193 : : /* skip the bdevs before the offset_blocks */
194 [ + + ]: 738289 : if (offset_blocks >= block_range[i].start + block_range[i].length) {
195 : 239204 : continue;
196 : : }
197 [ + + ]: 499085 : if (start_idx == -1) {
198 : 498445 : start_idx = i;
199 : : } else {
200 : : /*
201 : : * The offset_blocks might be at the middle of the first bdev.
202 : : * Besides the first bdev, the offset_blocks should be always
203 : : * at the start of the bdev.
204 : : */
205 [ - + ]: 640 : assert(offset_blocks == block_range[i].start);
206 : : }
207 : 499085 : pd_lba = offset_blocks - block_range[i].start;
208 : 499085 : pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba);
209 : 499085 : offset_blocks += pd_blocks;
210 : 499085 : num_blocks -= pd_blocks;
211 [ + + ]: 499085 : if (num_blocks == 0) {
212 : 498445 : stop_idx = i;
213 : 498445 : break;
214 : : }
215 : : }
216 [ - + ]: 498445 : assert(start_idx >= 0);
217 [ - + ]: 498445 : assert(stop_idx >= 0);
218 : :
219 [ + + ]: 498445 : if (raid_io->base_bdev_io_remaining == 0) {
220 : 498115 : raid_io->base_bdev_io_remaining = stop_idx - start_idx + 1;
221 : : }
222 : 498445 : offset_blocks = raid_io->offset_blocks;
223 : 498445 : num_blocks = raid_io->num_blocks;
224 [ + + ]: 996880 : for (i = start_idx; i <= stop_idx; i++) {
225 [ - + ]: 498765 : assert(offset_blocks >= block_range[i].start);
226 [ - + ]: 498765 : assert(offset_blocks < block_range[i].start + block_range[i].length);
227 : 498765 : pd_lba = offset_blocks - block_range[i].start;
228 : 498765 : pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba);
229 : 498765 : offset_blocks += pd_blocks;
230 : 498765 : num_blocks -= pd_blocks;
231 : : /*
232 : : * Skip the IOs we have submitted
233 : : */
234 [ - + ]: 498765 : if (i < start_idx + raid_io->base_bdev_io_submitted) {
235 : 0 : continue;
236 : : }
237 : 498765 : base_info = &raid_bdev->base_bdev_info[i];
238 : 498765 : base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, i);
239 [ + + - ]: 498765 : switch (raid_io->type) {
240 : 498275 : case SPDK_BDEV_IO_TYPE_UNMAP:
241 : 498275 : ret = raid_bdev_unmap_blocks(base_info, base_ch,
242 : : pd_lba, pd_blocks,
243 : : concat_base_io_complete, raid_io);
244 : 498275 : break;
245 : 490 : case SPDK_BDEV_IO_TYPE_FLUSH:
246 : 490 : ret = raid_bdev_flush_blocks(base_info, base_ch,
247 : : pd_lba, pd_blocks,
248 : : concat_base_io_complete, raid_io);
249 : 490 : break;
250 : 0 : default:
251 : 0 : SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", raid_io->type);
252 : 0 : assert(false);
253 : : ret = -EIO;
254 : : }
255 [ + + ]: 498765 : if (ret == 0) {
256 : 498435 : raid_io->base_bdev_io_submitted++;
257 [ + - ]: 330 : } else if (ret == -ENOMEM) {
258 : 330 : raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
259 : : base_ch, _concat_submit_null_payload_request);
260 : 330 : return;
261 : : } else {
262 : 0 : SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
263 : 0 : assert(false);
264 : : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
265 : : return;
266 : : }
267 : : }
268 : : }
269 : :
270 : : static int
271 : 1881 : concat_start(struct raid_bdev *raid_bdev)
272 : : {
273 : 1881 : uint64_t total_blockcnt = 0;
274 : : struct raid_base_bdev_info *base_info;
275 : : struct concat_block_range *block_range;
276 : :
277 : 1881 : block_range = calloc(raid_bdev->num_base_bdevs, sizeof(struct concat_block_range));
278 [ - + ]: 1881 : if (!block_range) {
279 : 0 : SPDK_ERRLOG("Can not allocate block_range, num_base_bdevs: %u",
280 : : raid_bdev->num_base_bdevs);
281 : 0 : return -ENOMEM;
282 : : }
283 : :
284 : 1881 : int idx = 0;
285 [ + + ]: 9521 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
286 [ - + ]: 7640 : uint64_t strip_cnt = base_info->data_size >> raid_bdev->strip_size_shift;
287 [ - + ]: 7640 : uint64_t pd_block_cnt = strip_cnt << raid_bdev->strip_size_shift;
288 : :
289 : 7640 : base_info->data_size = pd_block_cnt;
290 : :
291 : 7640 : block_range[idx].start = total_blockcnt;
292 : 7640 : block_range[idx].length = pd_block_cnt;
293 : 7640 : total_blockcnt += pd_block_cnt;
294 : 7640 : idx++;
295 : : }
296 : :
297 : 1881 : raid_bdev->module_private = block_range;
298 : :
299 [ - + - + ]: 1881 : SPDK_DEBUGLOG(bdev_concat, "total blockcount %" PRIu64 ", numbasedev %u, strip size shift %u\n",
300 : : total_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift);
301 : 1881 : raid_bdev->bdev.blockcnt = total_blockcnt;
302 : :
303 : 1881 : raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
304 : 1881 : raid_bdev->bdev.split_on_optimal_io_boundary = true;
305 : :
306 : 1881 : return 0;
307 : : }
308 : :
309 : : static bool
310 : 1881 : concat_stop(struct raid_bdev *raid_bdev)
311 : : {
312 : 1881 : struct concat_block_range *block_range = raid_bdev->module_private;
313 : :
314 : 1881 : free(block_range);
315 : :
316 : 1881 : return true;
317 : : }
318 : :
319 : : static struct raid_bdev_module g_concat_module = {
320 : : .level = CONCAT,
321 : : .base_bdevs_min = 1,
322 : : .memory_domains_supported = true,
323 : : .start = concat_start,
324 : : .stop = concat_stop,
325 : : .submit_rw_request = concat_submit_rw_request,
326 : : .submit_null_payload_request = concat_submit_null_payload_request,
327 : : };
328 : 2010 : RAID_MODULE_REGISTER(&g_concat_module)
329 : :
330 : 2010 : SPDK_LOG_REGISTER_COMPONENT(bdev_concat)
|