Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2022 Intel Corporation.
3 : * All rights reserved.
4 : */
5 :
6 : #include "bdev_raid.h"
7 :
8 : #include "spdk/likely.h"
9 : #include "spdk/log.h"
10 :
11 : struct raid1_info {
12 : /* The parent raid bdev */
13 : struct raid_bdev *raid_bdev;
14 : };
15 :
16 : struct raid1_io_channel {
17 : /* Array of per-base_bdev counters of outstanding read blocks on this channel */
18 : uint64_t read_blocks_outstanding[0];
19 : };
20 :
21 : static void
22 1338 : raid1_channel_inc_read_counters(struct raid_bdev_io_channel *raid_ch, uint8_t idx,
23 : uint64_t num_blocks)
24 : {
25 1338 : struct raid1_io_channel *raid1_ch = raid_bdev_channel_get_module_ctx(raid_ch);
26 :
27 1338 : assert(raid1_ch->read_blocks_outstanding[idx] <= UINT64_MAX - num_blocks);
28 1338 : raid1_ch->read_blocks_outstanding[idx] += num_blocks;
29 1338 : }
30 :
31 : static void
32 72 : raid1_channel_dec_read_counters(struct raid_bdev_io_channel *raid_ch, uint8_t idx,
33 : uint64_t num_blocks)
34 : {
35 72 : struct raid1_io_channel *raid1_ch = raid_bdev_channel_get_module_ctx(raid_ch);
36 :
37 72 : assert(raid1_ch->read_blocks_outstanding[idx] >= num_blocks);
38 72 : raid1_ch->read_blocks_outstanding[idx] -= num_blocks;
39 72 : }
40 :
41 : static void
42 1506 : raid1_init_ext_io_opts(struct spdk_bdev_ext_io_opts *opts, struct raid_bdev_io *raid_io)
43 : {
44 1506 : memset(opts, 0, sizeof(*opts));
45 1506 : opts->size = sizeof(*opts);
46 1506 : opts->memory_domain = raid_io->memory_domain;
47 1506 : opts->memory_domain_ctx = raid_io->memory_domain_ctx;
48 1506 : opts->metadata = raid_io->md_buf;
49 1506 : }
50 :
51 : static void
52 90 : raid1_write_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
53 : {
54 90 : struct raid_bdev_io *raid_io = cb_arg;
55 :
56 90 : if (!success) {
57 : struct raid_base_bdev_info *base_info;
58 :
59 60 : base_info = raid_bdev_channel_get_base_info(raid_io->raid_ch, bdev_io->bdev);
60 60 : if (base_info) {
61 60 : raid_bdev_fail_base_bdev(base_info);
62 : }
63 : }
64 :
65 90 : spdk_bdev_free_io(bdev_io);
66 :
67 90 : raid_bdev_io_complete_part(raid_io, 1, success ?
68 : SPDK_BDEV_IO_STATUS_SUCCESS :
69 : SPDK_BDEV_IO_STATUS_FAILED);
70 90 : }
71 :
72 : static struct raid_base_bdev_info *
73 30 : raid1_get_read_io_base_bdev(struct raid_bdev_io *raid_io)
74 : {
75 30 : assert(raid_io->type == SPDK_BDEV_IO_TYPE_READ);
76 30 : return &raid_io->raid_bdev->base_bdev_info[raid_io->base_bdev_io_submitted];
77 : }
78 :
79 : static void
80 54 : raid1_correct_read_error_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
81 : {
82 54 : struct raid_bdev_io *raid_io = cb_arg;
83 :
84 54 : spdk_bdev_free_io(bdev_io);
85 :
86 54 : if (!success) {
87 12 : struct raid_base_bdev_info *base_info = raid1_get_read_io_base_bdev(raid_io);
88 :
89 : /* Writing to the bdev that had the read error failed so fail the base bdev
90 : * but complete the raid_io successfully. */
91 12 : raid_bdev_fail_base_bdev(base_info);
92 : }
93 :
94 54 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS);
95 54 : }
96 :
97 : static void
98 54 : raid1_correct_read_error(void *_raid_io)
99 : {
100 54 : struct raid_bdev_io *raid_io = _raid_io;
101 54 : struct raid_bdev *raid_bdev = raid_io->raid_bdev;
102 54 : struct spdk_bdev_ext_io_opts io_opts;
103 : struct raid_base_bdev_info *base_info;
104 : struct spdk_io_channel *base_ch;
105 : uint8_t i;
106 : int ret;
107 :
108 54 : i = raid_io->base_bdev_io_submitted;
109 54 : base_info = &raid_bdev->base_bdev_info[i];
110 54 : base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, i);
111 54 : assert(base_ch != NULL);
112 :
113 54 : raid1_init_ext_io_opts(&io_opts, raid_io);
114 54 : ret = raid_bdev_writev_blocks_ext(base_info, base_ch, raid_io->iovs, raid_io->iovcnt,
115 : raid_io->offset_blocks, raid_io->num_blocks,
116 : raid1_correct_read_error_completion, raid_io, &io_opts);
117 54 : if (spdk_unlikely(ret != 0)) {
118 0 : if (ret == -ENOMEM) {
119 0 : raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
120 : base_ch, raid1_correct_read_error);
121 : } else {
122 0 : raid_bdev_fail_base_bdev(base_info);
123 0 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS);
124 : }
125 : }
126 54 : }
127 :
128 : static void raid1_read_other_base_bdev(void *_raid_io);
129 :
130 : static void
131 78 : raid1_read_other_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
132 : {
133 78 : struct raid_bdev_io *raid_io = cb_arg;
134 :
135 78 : spdk_bdev_free_io(bdev_io);
136 :
137 78 : if (!success) {
138 24 : assert(raid_io->base_bdev_io_remaining > 0);
139 24 : raid_io->base_bdev_io_remaining--;
140 24 : raid1_read_other_base_bdev(raid_io);
141 24 : return;
142 : }
143 :
144 : /* try to correct the read error by writing data read from the other base bdev */
145 54 : raid1_correct_read_error(raid_io);
146 : }
147 :
148 : static void
149 96 : raid1_read_other_base_bdev(void *_raid_io)
150 : {
151 96 : struct raid_bdev_io *raid_io = _raid_io;
152 96 : struct raid_bdev *raid_bdev = raid_io->raid_bdev;
153 96 : struct spdk_bdev_ext_io_opts io_opts;
154 : struct raid_base_bdev_info *base_info;
155 : struct spdk_io_channel *base_ch;
156 : uint8_t i;
157 : int ret;
158 :
159 168 : for (i = raid_bdev->num_base_bdevs - raid_io->base_bdev_io_remaining; i < raid_bdev->num_base_bdevs;
160 72 : i++) {
161 150 : base_info = &raid_bdev->base_bdev_info[i];
162 150 : base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, i);
163 :
164 150 : if (base_ch == NULL || i == raid_io->base_bdev_io_submitted) {
165 72 : raid_io->base_bdev_io_remaining--;
166 72 : continue;
167 : }
168 :
169 78 : raid1_init_ext_io_opts(&io_opts, raid_io);
170 78 : ret = raid_bdev_readv_blocks_ext(base_info, base_ch, raid_io->iovs, raid_io->iovcnt,
171 : raid_io->offset_blocks, raid_io->num_blocks,
172 : raid1_read_other_completion, raid_io, &io_opts);
173 78 : if (spdk_unlikely(ret != 0)) {
174 0 : if (ret == -ENOMEM) {
175 0 : raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
176 : base_ch, raid1_read_other_base_bdev);
177 : } else {
178 0 : break;
179 : }
180 : }
181 78 : return;
182 : }
183 :
184 18 : base_info = raid1_get_read_io_base_bdev(raid_io);
185 18 : raid_bdev_fail_base_bdev(base_info);
186 :
187 18 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
188 : }
189 :
190 : static void
191 72 : raid1_read_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
192 : {
193 72 : struct raid_bdev_io *raid_io = cb_arg;
194 :
195 72 : spdk_bdev_free_io(bdev_io);
196 :
197 72 : raid1_channel_dec_read_counters(raid_io->raid_ch, raid_io->base_bdev_io_submitted,
198 : raid_io->num_blocks);
199 :
200 72 : if (!success) {
201 72 : raid_io->base_bdev_io_remaining = raid_io->raid_bdev->num_base_bdevs;
202 72 : raid1_read_other_base_bdev(raid_io);
203 72 : return;
204 : }
205 :
206 0 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS);
207 : }
208 :
209 : static void raid1_submit_rw_request(struct raid_bdev_io *raid_io);
210 :
211 : static void
212 0 : _raid1_submit_rw_request(void *_raid_io)
213 : {
214 0 : struct raid_bdev_io *raid_io = _raid_io;
215 :
216 0 : raid1_submit_rw_request(raid_io);
217 0 : }
218 :
219 : static uint8_t
220 1338 : raid1_channel_next_read_base_bdev(struct raid_bdev *raid_bdev, struct raid_bdev_io_channel *raid_ch)
221 : {
222 1338 : struct raid1_io_channel *raid1_ch = raid_bdev_channel_get_module_ctx(raid_ch);
223 1338 : uint64_t read_blocks_min = UINT64_MAX;
224 1338 : uint8_t idx = UINT8_MAX;
225 : uint8_t i;
226 :
227 4884 : for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
228 3546 : if (raid_bdev_channel_get_base_channel(raid_ch, i) != NULL &&
229 3534 : raid1_ch->read_blocks_outstanding[i] < read_blocks_min) {
230 2934 : read_blocks_min = raid1_ch->read_blocks_outstanding[i];
231 2934 : idx = i;
232 : }
233 : }
234 :
235 1338 : return idx;
236 : }
237 :
238 : static int
239 1338 : raid1_submit_read_request(struct raid_bdev_io *raid_io)
240 : {
241 1338 : struct raid_bdev *raid_bdev = raid_io->raid_bdev;
242 1338 : struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch;
243 1338 : struct spdk_bdev_ext_io_opts io_opts;
244 : struct raid_base_bdev_info *base_info;
245 : struct spdk_io_channel *base_ch;
246 : uint8_t idx;
247 : int ret;
248 :
249 1338 : idx = raid1_channel_next_read_base_bdev(raid_bdev, raid_ch);
250 1338 : if (spdk_unlikely(idx == UINT8_MAX)) {
251 0 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
252 0 : return 0;
253 : }
254 :
255 1338 : base_info = &raid_bdev->base_bdev_info[idx];
256 1338 : base_ch = raid_bdev_channel_get_base_channel(raid_ch, idx);
257 :
258 1338 : raid1_init_ext_io_opts(&io_opts, raid_io);
259 1338 : ret = raid_bdev_readv_blocks_ext(base_info, base_ch, raid_io->iovs, raid_io->iovcnt,
260 : raid_io->offset_blocks, raid_io->num_blocks,
261 : raid1_read_bdev_io_completion, raid_io, &io_opts);
262 :
263 1338 : if (spdk_likely(ret == 0)) {
264 1338 : raid1_channel_inc_read_counters(raid_ch, idx, raid_io->num_blocks);
265 1338 : raid_io->base_bdev_io_submitted = idx;
266 0 : } else if (spdk_unlikely(ret == -ENOMEM)) {
267 0 : raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
268 : base_ch, _raid1_submit_rw_request);
269 0 : return 0;
270 : }
271 :
272 1338 : return ret;
273 : }
274 :
275 : static int
276 36 : raid1_submit_write_request(struct raid_bdev_io *raid_io)
277 : {
278 36 : struct raid_bdev *raid_bdev = raid_io->raid_bdev;
279 36 : struct spdk_bdev_ext_io_opts io_opts;
280 : struct raid_base_bdev_info *base_info;
281 : struct spdk_io_channel *base_ch;
282 : uint8_t idx;
283 : uint64_t base_bdev_io_not_submitted;
284 36 : int ret = 0;
285 :
286 36 : if (raid_io->base_bdev_io_submitted == 0) {
287 36 : raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
288 36 : raid_bdev_io_set_default_status(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
289 : }
290 :
291 36 : raid1_init_ext_io_opts(&io_opts, raid_io);
292 126 : for (idx = raid_io->base_bdev_io_submitted; idx < raid_bdev->num_base_bdevs; idx++) {
293 90 : base_info = &raid_bdev->base_bdev_info[idx];
294 90 : base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, idx);
295 :
296 90 : if (base_ch == NULL) {
297 : /* skip a missing base bdev's slot */
298 0 : raid_io->base_bdev_io_submitted++;
299 0 : raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_FAILED);
300 0 : continue;
301 : }
302 :
303 90 : ret = raid_bdev_writev_blocks_ext(base_info, base_ch, raid_io->iovs, raid_io->iovcnt,
304 : raid_io->offset_blocks, raid_io->num_blocks,
305 : raid1_write_bdev_io_completion, raid_io, &io_opts);
306 90 : if (spdk_unlikely(ret != 0)) {
307 0 : if (spdk_unlikely(ret == -ENOMEM)) {
308 0 : raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
309 : base_ch, _raid1_submit_rw_request);
310 0 : return 0;
311 : }
312 :
313 0 : base_bdev_io_not_submitted = raid_bdev->num_base_bdevs -
314 0 : raid_io->base_bdev_io_submitted;
315 0 : raid_bdev_io_complete_part(raid_io, base_bdev_io_not_submitted,
316 : SPDK_BDEV_IO_STATUS_FAILED);
317 0 : return 0;
318 : }
319 :
320 90 : raid_io->base_bdev_io_submitted++;
321 : }
322 :
323 36 : if (raid_io->base_bdev_io_submitted == 0) {
324 0 : ret = -ENODEV;
325 : }
326 :
327 36 : return ret;
328 : }
329 :
330 : static void
331 0 : raid1_submit_rw_request(struct raid_bdev_io *raid_io)
332 : {
333 : int ret;
334 :
335 0 : switch (raid_io->type) {
336 0 : case SPDK_BDEV_IO_TYPE_READ:
337 0 : ret = raid1_submit_read_request(raid_io);
338 0 : break;
339 0 : case SPDK_BDEV_IO_TYPE_WRITE:
340 0 : ret = raid1_submit_write_request(raid_io);
341 0 : break;
342 0 : default:
343 0 : ret = -EINVAL;
344 0 : break;
345 : }
346 :
347 0 : if (spdk_unlikely(ret != 0)) {
348 0 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
349 : }
350 0 : }
351 :
352 : static void
353 36 : raid1_ioch_destroy(void *io_device, void *ctx_buf)
354 : {
355 36 : }
356 :
357 : static int
358 36 : raid1_ioch_create(void *io_device, void *ctx_buf)
359 : {
360 36 : return 0;
361 : }
362 :
363 : static void
364 48 : raid1_io_device_unregister_done(void *io_device)
365 : {
366 48 : struct raid1_info *r1info = io_device;
367 :
368 48 : raid_bdev_module_stop_done(r1info->raid_bdev);
369 :
370 48 : free(r1info);
371 48 : }
372 :
373 : static int
374 48 : raid1_start(struct raid_bdev *raid_bdev)
375 : {
376 48 : uint64_t min_blockcnt = UINT64_MAX;
377 : struct raid_base_bdev_info *base_info;
378 : struct raid1_info *r1info;
379 48 : char name[256];
380 :
381 48 : r1info = calloc(1, sizeof(*r1info));
382 48 : if (!r1info) {
383 0 : SPDK_ERRLOG("Failed to allocate RAID1 info device structure\n");
384 0 : return -ENOMEM;
385 : }
386 48 : r1info->raid_bdev = raid_bdev;
387 :
388 168 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
389 120 : min_blockcnt = spdk_min(min_blockcnt, base_info->data_size);
390 : }
391 :
392 168 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
393 120 : base_info->data_size = min_blockcnt;
394 : }
395 :
396 48 : raid_bdev->bdev.blockcnt = min_blockcnt;
397 48 : raid_bdev->module_private = r1info;
398 :
399 48 : snprintf(name, sizeof(name), "raid1_%s", raid_bdev->bdev.name);
400 48 : spdk_io_device_register(r1info, raid1_ioch_create, raid1_ioch_destroy,
401 48 : sizeof(struct raid1_io_channel) + raid_bdev->num_base_bdevs * sizeof(uint64_t),
402 : name);
403 :
404 48 : return 0;
405 : }
406 :
407 : static bool
408 48 : raid1_stop(struct raid_bdev *raid_bdev)
409 : {
410 48 : struct raid1_info *r1info = raid_bdev->module_private;
411 :
412 48 : spdk_io_device_unregister(r1info, raid1_io_device_unregister_done);
413 :
414 48 : return false;
415 : }
416 :
417 : static struct spdk_io_channel *
418 36 : raid1_get_io_channel(struct raid_bdev *raid_bdev)
419 : {
420 36 : struct raid1_info *r1info = raid_bdev->module_private;
421 :
422 36 : return spdk_get_io_channel(r1info);
423 : }
424 :
425 : static void
426 0 : raid1_process_write_completed(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
427 : {
428 0 : struct raid_bdev_process_request *process_req = cb_arg;
429 :
430 0 : spdk_bdev_free_io(bdev_io);
431 :
432 0 : raid_bdev_process_request_complete(process_req, success ? 0 : -EIO);
433 0 : }
434 :
435 : static void raid1_process_submit_write(struct raid_bdev_process_request *process_req);
436 :
437 : static void
438 0 : _raid1_process_submit_write(void *ctx)
439 : {
440 0 : struct raid_bdev_process_request *process_req = ctx;
441 :
442 0 : raid1_process_submit_write(process_req);
443 0 : }
444 :
445 : static void
446 0 : raid1_process_submit_write(struct raid_bdev_process_request *process_req)
447 : {
448 0 : struct raid_bdev_io *raid_io = &process_req->raid_io;
449 0 : struct spdk_bdev_ext_io_opts io_opts;
450 : int ret;
451 :
452 0 : raid1_init_ext_io_opts(&io_opts, raid_io);
453 0 : ret = raid_bdev_writev_blocks_ext(process_req->target, process_req->target_ch,
454 : raid_io->iovs, raid_io->iovcnt,
455 : raid_io->offset_blocks, raid_io->num_blocks,
456 : raid1_process_write_completed, process_req, &io_opts);
457 0 : if (spdk_unlikely(ret != 0)) {
458 0 : if (ret == -ENOMEM) {
459 0 : raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(process_req->target->desc),
460 : process_req->target_ch, _raid1_process_submit_write);
461 : } else {
462 0 : raid_bdev_process_request_complete(process_req, ret);
463 : }
464 : }
465 0 : }
466 :
467 : static void
468 0 : raid1_process_read_completed(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
469 : {
470 0 : struct raid_bdev_process_request *process_req = SPDK_CONTAINEROF(raid_io,
471 : struct raid_bdev_process_request, raid_io);
472 :
473 0 : if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
474 0 : raid_bdev_process_request_complete(process_req, -EIO);
475 0 : return;
476 : }
477 :
478 0 : raid1_process_submit_write(process_req);
479 : }
480 :
481 : static int
482 0 : raid1_submit_process_request(struct raid_bdev_process_request *process_req,
483 : struct raid_bdev_io_channel *raid_ch)
484 : {
485 0 : struct raid_bdev_io *raid_io = &process_req->raid_io;
486 : int ret;
487 :
488 0 : raid_bdev_io_init(raid_io, raid_ch, SPDK_BDEV_IO_TYPE_READ,
489 0 : process_req->offset_blocks, process_req->num_blocks,
490 : &process_req->iov, 1, process_req->md_buf, NULL, NULL);
491 0 : raid_io->completion_cb = raid1_process_read_completed;
492 :
493 0 : ret = raid1_submit_read_request(raid_io);
494 0 : if (spdk_likely(ret == 0)) {
495 0 : return process_req->num_blocks;
496 0 : } else if (ret < 0) {
497 0 : return ret;
498 : } else {
499 0 : return -EINVAL;
500 : }
501 : }
502 :
503 : static bool
504 0 : raid1_resize(struct raid_bdev *raid_bdev)
505 : {
506 : int rc;
507 0 : uint64_t min_blockcnt = UINT64_MAX;
508 : struct raid_base_bdev_info *base_info;
509 :
510 0 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
511 : struct spdk_bdev *base_bdev;
512 :
513 0 : if (base_info->desc == NULL) {
514 0 : continue;
515 : }
516 0 : base_bdev = spdk_bdev_desc_get_bdev(base_info->desc);
517 0 : min_blockcnt = spdk_min(min_blockcnt, base_bdev->blockcnt - base_info->data_offset);
518 : }
519 :
520 0 : if (min_blockcnt == raid_bdev->bdev.blockcnt) {
521 0 : return false;
522 : }
523 :
524 0 : rc = spdk_bdev_notify_blockcnt_change(&raid_bdev->bdev, min_blockcnt);
525 0 : if (rc != 0) {
526 0 : SPDK_ERRLOG("Failed to notify blockcount change\n");
527 0 : return false;
528 : }
529 :
530 0 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
531 0 : base_info->data_size = min_blockcnt;
532 : }
533 0 : return true;
534 : }
535 :
536 : static struct raid_bdev_module g_raid1_module = {
537 : .level = RAID1,
538 : .base_bdevs_min = 2,
539 : .base_bdevs_constraint = {CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL, 1},
540 : .memory_domains_supported = true,
541 : .start = raid1_start,
542 : .stop = raid1_stop,
543 : .submit_rw_request = raid1_submit_rw_request,
544 : .get_io_channel = raid1_get_io_channel,
545 : .submit_process_request = raid1_submit_process_request,
546 : .resize = raid1_resize,
547 : };
548 1 : RAID_MODULE_REGISTER(&g_raid1_module)
549 :
550 1 : SPDK_LOG_REGISTER_COMPONENT(bdev_raid1)
|