LCOV - code coverage report
Current view: top level - lib/vhost - vhost_blk.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 74 845 8.8 %
Date: 2024-11-05 10:06:02 Functions: 12 69 17.4 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2017 Intel Corporation. All rights reserved.
       3             :  *   All rights reserved.
       4             :  */
       5             : 
       6             : #include <linux/virtio_blk.h>
       7             : 
       8             : #include "spdk/env.h"
       9             : #include "spdk/bdev.h"
      10             : #include "spdk/bdev_module.h"
      11             : #include "spdk/thread.h"
      12             : #include "spdk/likely.h"
      13             : #include "spdk/string.h"
      14             : #include "spdk/util.h"
      15             : #include "spdk/vhost.h"
      16             : #include "spdk/json.h"
      17             : 
      18             : #include "vhost_internal.h"
      19             : #include <rte_version.h>
      20             : 
      21             : /* Minimal set of features supported by every SPDK VHOST-BLK device */
      22             : #define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \
      23             :                 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
      24             :                 (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
      25             :                 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
      26             :                 (1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
      27             :                 (1ULL << VIRTIO_BLK_F_MQ))
      28             : 
      29             : /* Not supported features */
      30             : #define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \
      31             :                 (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
      32             :                 (1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
      33             : 
      34             : /* Vhost-blk support protocol features */
      35             : #define SPDK_VHOST_BLK_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
      36             :                 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
      37             : 
      38             : #define VIRTIO_BLK_DEFAULT_TRANSPORT "vhost_user_blk"
      39             : 
      40             : struct spdk_vhost_user_blk_task {
      41             :         struct spdk_vhost_blk_task blk_task;
      42             :         struct spdk_vhost_blk_session *bvsession;
      43             :         struct spdk_vhost_virtqueue *vq;
      44             : 
      45             :         uint16_t req_idx;
      46             :         uint16_t num_descs;
      47             :         uint16_t buffer_id;
      48             :         uint16_t inflight_head;
      49             : 
      50             :         /* If set, the task is currently used for I/O processing. */
      51             :         bool used;
      52             : };
      53             : 
      54             : struct spdk_vhost_blk_dev {
      55             :         struct spdk_vhost_dev vdev;
      56             :         struct spdk_bdev *bdev;
      57             :         struct spdk_bdev_desc *bdev_desc;
      58             :         const struct spdk_virtio_blk_transport_ops *ops;
      59             : 
      60             :         bool readonly;
      61             : };
      62             : 
      63             : struct spdk_vhost_blk_session {
      64             :         /* The parent session must be the very first field in this struct */
      65             :         struct spdk_vhost_session vsession;
      66             :         struct spdk_vhost_blk_dev *bvdev;
      67             :         struct spdk_poller *requestq_poller;
      68             :         struct spdk_io_channel *io_channel;
      69             :         struct spdk_poller *stop_poller;
      70             : };
      71             : 
      72             : /* forward declaration */
      73             : static const struct spdk_vhost_dev_backend vhost_blk_device_backend;
      74             : 
      75             : static void vhost_user_blk_request_finish(uint8_t status, struct spdk_vhost_blk_task *task,
      76             :                 void *cb_arg);
      77             : 
      78             : static int
      79           0 : vhost_user_process_blk_request(struct spdk_vhost_user_blk_task *user_task)
      80             : {
      81           0 :         struct spdk_vhost_blk_session *bvsession = user_task->bvsession;
      82           0 :         struct spdk_vhost_dev *vdev = &bvsession->bvdev->vdev;
      83             : 
      84           0 :         return virtio_blk_process_request(vdev, bvsession->io_channel, &user_task->blk_task,
      85             :                                           vhost_user_blk_request_finish, NULL);
      86             : }
      87             : 
      88             : static struct spdk_vhost_blk_dev *
      89           4 : to_blk_dev(struct spdk_vhost_dev *vdev)
      90             : {
      91           4 :         if (vdev == NULL) {
      92           0 :                 return NULL;
      93             :         }
      94             : 
      95           4 :         if (vdev->backend->type != VHOST_BACKEND_BLK) {
      96           0 :                 SPDK_ERRLOG("%s: not a vhost-blk device\n", vdev->name);
      97           0 :                 return NULL;
      98             :         }
      99             : 
     100           4 :         return SPDK_CONTAINEROF(vdev, struct spdk_vhost_blk_dev, vdev);
     101             : }
     102             : 
     103             : struct spdk_bdev *
     104           0 : vhost_blk_get_bdev(struct spdk_vhost_dev *vdev)
     105             : {
     106           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
     107             : 
     108           0 :         assert(bvdev != NULL);
     109             : 
     110           0 :         return bvdev->bdev;
     111             : }
     112             : 
     113             : static struct spdk_vhost_blk_session *
     114           0 : to_blk_session(struct spdk_vhost_session *vsession)
     115             : {
     116           0 :         assert(vsession->vdev->backend->type == VHOST_BACKEND_BLK);
     117           0 :         return (struct spdk_vhost_blk_session *)vsession;
     118             : }
     119             : 
     120             : static inline void
     121           0 : blk_task_inc_task_cnt(struct spdk_vhost_user_blk_task *task)
     122             : {
     123           0 :         task->bvsession->vsession.task_cnt++;
     124           0 : }
     125             : 
     126             : static inline void
     127           0 : blk_task_dec_task_cnt(struct spdk_vhost_user_blk_task *task)
     128             : {
     129           0 :         assert(task->bvsession->vsession.task_cnt > 0);
     130           0 :         task->bvsession->vsession.task_cnt--;
     131           0 : }
     132             : 
     133             : static void
     134           0 : blk_task_finish(struct spdk_vhost_user_blk_task *task)
     135             : {
     136           0 :         blk_task_dec_task_cnt(task);
     137           0 :         task->used = false;
     138           0 : }
     139             : 
     140             : static void
     141           0 : blk_task_init(struct spdk_vhost_user_blk_task *task)
     142             : {
     143           0 :         struct spdk_vhost_blk_task *blk_task = &task->blk_task;
     144             : 
     145           0 :         task->used = true;
     146           0 :         blk_task->iovcnt = SPDK_COUNTOF(blk_task->iovs);
     147           0 :         blk_task->status = NULL;
     148           0 :         blk_task->used_len = 0;
     149           0 :         blk_task->payload_size = 0;
     150           0 : }
     151             : 
     152             : static void
     153           0 : blk_task_enqueue(struct spdk_vhost_user_blk_task *task)
     154             : {
     155           0 :         if (task->vq->packed.packed_ring) {
     156           0 :                 vhost_vq_packed_ring_enqueue(&task->bvsession->vsession, task->vq,
     157           0 :                                              task->num_descs,
     158           0 :                                              task->buffer_id, task->blk_task.used_len,
     159           0 :                                              task->inflight_head);
     160             :         } else {
     161           0 :                 vhost_vq_used_ring_enqueue(&task->bvsession->vsession, task->vq,
     162           0 :                                            task->req_idx, task->blk_task.used_len);
     163             :         }
     164           0 : }
     165             : 
     166             : static void
     167           0 : vhost_user_blk_request_finish(uint8_t status, struct spdk_vhost_blk_task *task, void *cb_arg)
     168             : {
     169             :         struct spdk_vhost_user_blk_task *user_task;
     170             : 
     171           0 :         user_task = SPDK_CONTAINEROF(task, struct spdk_vhost_user_blk_task, blk_task);
     172             : 
     173           0 :         blk_task_enqueue(user_task);
     174             : 
     175           0 :         SPDK_DEBUGLOG(vhost_blk, "Finished task (%p) req_idx=%d\n status: %" PRIu8"\n",
     176             :                       user_task, user_task->req_idx, status);
     177           0 :         blk_task_finish(user_task);
     178           0 : }
     179             : 
     180             : static void
     181           0 : blk_request_finish(uint8_t status, struct spdk_vhost_blk_task *task)
     182             : {
     183             : 
     184           0 :         if (task->status) {
     185           0 :                 *task->status = status;
     186             :         }
     187             : 
     188           0 :         task->cb(status, task, task->cb_arg);
     189           0 : }
     190             : 
     191             : /*
     192             :  * Process task's descriptor chain and setup data related fields.
     193             :  * Return
     194             :  *   total size of supplied buffers
     195             :  *
     196             :  *   FIXME: Make this function return to rd_cnt and wr_cnt
     197             :  */
     198             : static int
     199           0 : blk_iovs_split_queue_setup(struct spdk_vhost_blk_session *bvsession,
     200             :                            struct spdk_vhost_virtqueue *vq,
     201             :                            uint16_t req_idx, struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
     202             : {
     203           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     204           0 :         struct spdk_vhost_dev *vdev = vsession->vdev;
     205           0 :         struct vring_desc *desc, *desc_table;
     206           0 :         uint16_t out_cnt = 0, cnt = 0;
     207           0 :         uint32_t desc_table_size, len = 0;
     208             :         uint32_t desc_handled_cnt;
     209             :         int rc;
     210             : 
     211           0 :         rc = vhost_vq_get_desc(vsession, vq, req_idx, &desc, &desc_table, &desc_table_size);
     212           0 :         if (rc != 0) {
     213           0 :                 SPDK_ERRLOG("%s: invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
     214           0 :                 return -1;
     215             :         }
     216             : 
     217           0 :         desc_handled_cnt = 0;
     218             :         while (1) {
     219             :                 /*
     220             :                  * Maximum cnt reached?
     221             :                  * Should not happen if request is well formatted, otherwise this is a BUG.
     222             :                  */
     223           0 :                 if (spdk_unlikely(cnt == *iovs_cnt)) {
     224           0 :                         SPDK_DEBUGLOG(vhost_blk, "%s: max IOVs in request reached (req_idx = %"PRIu16").\n",
     225             :                                       vsession->name, req_idx);
     226           0 :                         return -1;
     227             :                 }
     228             : 
     229           0 :                 if (spdk_unlikely(vhost_vring_desc_to_iov(vsession, iovs, &cnt, desc))) {
     230           0 :                         SPDK_DEBUGLOG(vhost_blk, "%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
     231             :                                       vsession->name, req_idx, cnt);
     232           0 :                         return -1;
     233             :                 }
     234             : 
     235           0 :                 len += desc->len;
     236             : 
     237           0 :                 out_cnt += vhost_vring_desc_is_wr(desc);
     238             : 
     239           0 :                 rc = vhost_vring_desc_get_next(&desc, desc_table, desc_table_size);
     240           0 :                 if (rc != 0) {
     241           0 :                         SPDK_ERRLOG("%s: descriptor chain at index %"PRIu16" terminated unexpectedly.\n",
     242             :                                     vsession->name, req_idx);
     243           0 :                         return -1;
     244           0 :                 } else if (desc == NULL) {
     245           0 :                         break;
     246             :                 }
     247             : 
     248           0 :                 desc_handled_cnt++;
     249           0 :                 if (spdk_unlikely(desc_handled_cnt > desc_table_size)) {
     250             :                         /* Break a cycle and report an error, if any. */
     251           0 :                         SPDK_ERRLOG("%s: found a cycle in the descriptor chain: desc_table_size = %d, desc_handled_cnt = %d.\n",
     252             :                                     vsession->name, desc_table_size, desc_handled_cnt);
     253           0 :                         return -1;
     254             :                 }
     255             :         }
     256             : 
     257             :         /*
     258             :          * There must be least two descriptors.
     259             :          * First contain request so it must be readable.
     260             :          * Last descriptor contain buffer for response so it must be writable.
     261             :          */
     262           0 :         if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
     263           0 :                 return -1;
     264             :         }
     265             : 
     266           0 :         *length = len;
     267           0 :         *iovs_cnt = cnt;
     268           0 :         return 0;
     269             : }
     270             : 
     271             : static int
     272           0 : blk_iovs_packed_desc_setup(struct spdk_vhost_session *vsession,
     273             :                            struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
     274             :                            struct vring_packed_desc *desc_table, uint16_t desc_table_size,
     275             :                            struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
     276             : {
     277           0 :         struct vring_packed_desc *desc;
     278           0 :         uint16_t cnt = 0, out_cnt = 0;
     279           0 :         uint32_t len = 0;
     280             : 
     281           0 :         if (desc_table == NULL) {
     282           0 :                 desc = &vq->vring.desc_packed[req_idx];
     283             :         } else {
     284           0 :                 req_idx = 0;
     285           0 :                 desc = desc_table;
     286             :         }
     287             : 
     288             :         while (1) {
     289             :                 /*
     290             :                  * Maximum cnt reached?
     291             :                  * Should not happen if request is well formatted, otherwise this is a BUG.
     292             :                  */
     293           0 :                 if (spdk_unlikely(cnt == *iovs_cnt)) {
     294           0 :                         SPDK_ERRLOG("%s: max IOVs in request reached (req_idx = %"PRIu16").\n",
     295             :                                     vsession->name, req_idx);
     296           0 :                         return -EINVAL;
     297             :                 }
     298             : 
     299           0 :                 if (spdk_unlikely(vhost_vring_packed_desc_to_iov(vsession, iovs, &cnt, desc))) {
     300           0 :                         SPDK_ERRLOG("%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
     301             :                                     vsession->name, req_idx, cnt);
     302           0 :                         return -EINVAL;
     303             :                 }
     304             : 
     305           0 :                 len += desc->len;
     306           0 :                 out_cnt += vhost_vring_packed_desc_is_wr(desc);
     307             : 
     308             :                 /* desc is NULL means we reach the last desc of this request */
     309           0 :                 vhost_vring_packed_desc_get_next(&desc, &req_idx, vq, desc_table, desc_table_size);
     310           0 :                 if (desc == NULL) {
     311           0 :                         break;
     312             :                 }
     313             :         }
     314             : 
     315             :         /*
     316             :          * There must be least two descriptors.
     317             :          * First contain request so it must be readable.
     318             :          * Last descriptor contain buffer for response so it must be writable.
     319             :          */
     320           0 :         if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
     321           0 :                 return -EINVAL;
     322             :         }
     323             : 
     324           0 :         *length = len;
     325           0 :         *iovs_cnt = cnt;
     326             : 
     327           0 :         return 0;
     328             : }
     329             : 
     330             : static int
     331           0 : blk_iovs_packed_queue_setup(struct spdk_vhost_blk_session *bvsession,
     332             :                             struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
     333             :                             struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
     334             : {
     335           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     336           0 :         struct spdk_vhost_dev *vdev = vsession->vdev;
     337           0 :         struct vring_packed_desc *desc = NULL, *desc_table;
     338           0 :         uint32_t desc_table_size;
     339             :         int rc;
     340             : 
     341           0 :         rc = vhost_vq_get_desc_packed(vsession, vq, req_idx, &desc,
     342             :                                       &desc_table, &desc_table_size);
     343           0 :         if (spdk_unlikely(rc != 0)) {
     344           0 :                 SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
     345           0 :                 return rc;
     346             :         }
     347             : 
     348           0 :         return blk_iovs_packed_desc_setup(vsession, vq, req_idx, desc_table, desc_table_size,
     349             :                                           iovs, iovs_cnt, length);
     350             : }
     351             : 
     352             : static int
     353           0 : blk_iovs_inflight_queue_setup(struct spdk_vhost_blk_session *bvsession,
     354             :                               struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
     355             :                               struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
     356             : {
     357           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     358           0 :         struct spdk_vhost_dev *vdev = vsession->vdev;
     359           0 :         spdk_vhost_inflight_desc *inflight_desc;
     360           0 :         struct vring_packed_desc *desc_table;
     361           0 :         uint16_t out_cnt = 0, cnt = 0;
     362           0 :         uint32_t desc_table_size, len = 0;
     363           0 :         int rc = 0;
     364             : 
     365           0 :         rc = vhost_inflight_queue_get_desc(vsession, vq->vring_inflight.inflight_packed->desc,
     366             :                                            req_idx, &inflight_desc, &desc_table, &desc_table_size);
     367           0 :         if (spdk_unlikely(rc != 0)) {
     368           0 :                 SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
     369           0 :                 return rc;
     370             :         }
     371             : 
     372           0 :         if (desc_table != NULL) {
     373           0 :                 return blk_iovs_packed_desc_setup(vsession, vq, req_idx, desc_table, desc_table_size,
     374             :                                                   iovs, iovs_cnt, length);
     375             :         }
     376             : 
     377             :         while (1) {
     378             :                 /*
     379             :                  * Maximum cnt reached?
     380             :                  * Should not happen if request is well formatted, otherwise this is a BUG.
     381             :                  */
     382           0 :                 if (spdk_unlikely(cnt == *iovs_cnt)) {
     383           0 :                         SPDK_ERRLOG("%s: max IOVs in request reached (req_idx = %"PRIu16").\n",
     384             :                                     vsession->name, req_idx);
     385           0 :                         return -EINVAL;
     386             :                 }
     387             : 
     388           0 :                 if (spdk_unlikely(vhost_vring_inflight_desc_to_iov(vsession, iovs, &cnt, inflight_desc))) {
     389           0 :                         SPDK_ERRLOG("%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
     390             :                                     vsession->name, req_idx, cnt);
     391           0 :                         return -EINVAL;
     392             :                 }
     393             : 
     394           0 :                 len += inflight_desc->len;
     395           0 :                 out_cnt += vhost_vring_inflight_desc_is_wr(inflight_desc);
     396             : 
     397             :                 /* Without F_NEXT means it's the last desc */
     398           0 :                 if ((inflight_desc->flags & VRING_DESC_F_NEXT) == 0) {
     399           0 :                         break;
     400             :                 }
     401             : 
     402           0 :                 inflight_desc = &vq->vring_inflight.inflight_packed->desc[inflight_desc->next];
     403             :         }
     404             : 
     405             :         /*
     406             :          * There must be least two descriptors.
     407             :          * First contain request so it must be readable.
     408             :          * Last descriptor contain buffer for response so it must be writable.
     409             :          */
     410           0 :         if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
     411           0 :                 return -EINVAL;
     412             :         }
     413             : 
     414           0 :         *length = len;
     415           0 :         *iovs_cnt = cnt;
     416             : 
     417           0 :         return 0;
     418             : }
     419             : 
     420             : static void
     421           0 : blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
     422             : {
     423           0 :         struct spdk_vhost_blk_task *task = cb_arg;
     424             : 
     425           0 :         spdk_bdev_free_io(bdev_io);
     426           0 :         blk_request_finish(success ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR, task);
     427           0 : }
     428             : 
     429             : static void
     430           0 : blk_request_resubmit(void *arg)
     431             : {
     432           0 :         struct spdk_vhost_blk_task *task = arg;
     433           0 :         int rc = 0;
     434             : 
     435           0 :         rc = virtio_blk_process_request(task->bdev_io_wait_vdev, task->bdev_io_wait_ch, task,
     436             :                                         task->cb, task->cb_arg);
     437           0 :         if (rc == 0) {
     438           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Task %p resubmitted ======\n", task);
     439             :         } else {
     440           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Task %p failed ======\n", task);
     441             :         }
     442           0 : }
     443             : 
     444             : static inline void
     445           0 : blk_request_queue_io(struct spdk_vhost_dev *vdev, struct spdk_io_channel *ch,
     446             :                      struct spdk_vhost_blk_task *task)
     447             : {
     448             :         int rc;
     449           0 :         struct spdk_bdev *bdev = vhost_blk_get_bdev(vdev);
     450             : 
     451           0 :         task->bdev_io_wait.bdev = bdev;
     452           0 :         task->bdev_io_wait.cb_fn = blk_request_resubmit;
     453           0 :         task->bdev_io_wait.cb_arg = task;
     454           0 :         task->bdev_io_wait_ch = ch;
     455           0 :         task->bdev_io_wait_vdev = vdev;
     456             : 
     457           0 :         rc = spdk_bdev_queue_io_wait(bdev, ch, &task->bdev_io_wait);
     458           0 :         if (rc != 0) {
     459           0 :                 blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     460             :         }
     461           0 : }
     462             : 
     463             : int
     464           0 : virtio_blk_process_request(struct spdk_vhost_dev *vdev, struct spdk_io_channel *ch,
     465             :                            struct spdk_vhost_blk_task *task, virtio_blk_request_cb cb, void *cb_arg)
     466             : {
     467           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
     468           0 :         struct virtio_blk_outhdr req;
     469             :         struct virtio_blk_discard_write_zeroes *desc;
     470             :         struct iovec *iov;
     471             :         uint32_t type;
     472             :         uint64_t flush_bytes;
     473             :         uint32_t payload_len;
     474             :         uint16_t iovcnt;
     475             :         int rc;
     476             : 
     477           0 :         assert(bvdev != NULL);
     478             : 
     479           0 :         task->cb = cb;
     480           0 :         task->cb_arg = cb_arg;
     481             : 
     482           0 :         iov = &task->iovs[0];
     483           0 :         if (spdk_unlikely(iov->iov_len != sizeof(req))) {
     484           0 :                 SPDK_DEBUGLOG(vhost_blk,
     485             :                               "First descriptor size is %zu but expected %zu (task = %p).\n",
     486             :                               iov->iov_len, sizeof(req), task);
     487           0 :                 blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     488           0 :                 return -1;
     489             :         }
     490             : 
     491             :         /* Some SeaBIOS versions don't align the virtio_blk_outhdr on an 8-byte boundary, which
     492             :          * triggers ubsan errors.  So copy this small 16-byte structure to the stack to workaround
     493             :          * this problem.
     494             :          */
     495           0 :         memcpy(&req, iov->iov_base, sizeof(req));
     496             : 
     497           0 :         iov = &task->iovs[task->iovcnt - 1];
     498           0 :         if (spdk_unlikely(iov->iov_len != 1)) {
     499           0 :                 SPDK_DEBUGLOG(vhost_blk,
     500             :                               "Last descriptor size is %zu but expected %d (task = %p).\n",
     501             :                               iov->iov_len, 1, task);
     502           0 :                 blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     503           0 :                 return -1;
     504             :         }
     505             : 
     506           0 :         payload_len = task->payload_size;
     507           0 :         task->status = iov->iov_base;
     508           0 :         payload_len -= sizeof(req) + sizeof(*task->status);
     509           0 :         iovcnt = task->iovcnt - 2;
     510             : 
     511           0 :         type = req.type;
     512             : #ifdef VIRTIO_BLK_T_BARRIER
     513             :         /* Don't care about barrier for now (as QEMU's virtio-blk do). */
     514           0 :         type &= ~VIRTIO_BLK_T_BARRIER;
     515             : #endif
     516             : 
     517           0 :         switch (type) {
     518           0 :         case VIRTIO_BLK_T_IN:
     519             :         case VIRTIO_BLK_T_OUT:
     520           0 :                 if (spdk_unlikely(payload_len == 0 || (payload_len & (512 - 1)) != 0)) {
     521           0 :                         SPDK_ERRLOG("%s - passed IO buffer is not multiple of 512b (task = %p).\n",
     522             :                                     type ? "WRITE" : "READ", task);
     523           0 :                         blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     524           0 :                         return -1;
     525             :                 }
     526             : 
     527           0 :                 if (type == VIRTIO_BLK_T_IN) {
     528           0 :                         task->used_len = payload_len + sizeof(*task->status);
     529           0 :                         rc = spdk_bdev_readv(bvdev->bdev_desc, ch,
     530           0 :                                              &task->iovs[1], iovcnt, req.sector * 512,
     531             :                                              payload_len, blk_request_complete_cb, task);
     532           0 :                 } else if (!bvdev->readonly) {
     533           0 :                         task->used_len = sizeof(*task->status);
     534           0 :                         rc = spdk_bdev_writev(bvdev->bdev_desc, ch,
     535           0 :                                               &task->iovs[1], iovcnt, req.sector * 512,
     536             :                                               payload_len, blk_request_complete_cb, task);
     537             :                 } else {
     538           0 :                         SPDK_DEBUGLOG(vhost_blk, "Device is in read-only mode!\n");
     539           0 :                         rc = -1;
     540             :                 }
     541             : 
     542           0 :                 if (rc) {
     543           0 :                         if (rc == -ENOMEM) {
     544           0 :                                 SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
     545           0 :                                 blk_request_queue_io(vdev, ch, task);
     546             :                         } else {
     547           0 :                                 blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     548           0 :                                 return -1;
     549             :                         }
     550             :                 }
     551           0 :                 break;
     552           0 :         case VIRTIO_BLK_T_DISCARD:
     553           0 :                 desc = task->iovs[1].iov_base;
     554           0 :                 if (payload_len != sizeof(*desc)) {
     555           0 :                         SPDK_NOTICELOG("Invalid discard payload size: %u\n", payload_len);
     556           0 :                         blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     557           0 :                         return -1;
     558             :                 }
     559             : 
     560           0 :                 if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
     561           0 :                         SPDK_ERRLOG("UNMAP flag is only used for WRITE ZEROES command\n");
     562           0 :                         blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     563           0 :                         return -1;
     564             :                 }
     565             : 
     566           0 :                 rc = spdk_bdev_unmap(bvdev->bdev_desc, ch,
     567           0 :                                      desc->sector * 512, desc->num_sectors * 512,
     568             :                                      blk_request_complete_cb, task);
     569           0 :                 if (rc) {
     570           0 :                         if (rc == -ENOMEM) {
     571           0 :                                 SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
     572           0 :                                 blk_request_queue_io(vdev, ch, task);
     573             :                         } else {
     574           0 :                                 blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     575           0 :                                 return -1;
     576             :                         }
     577             :                 }
     578           0 :                 break;
     579           0 :         case VIRTIO_BLK_T_WRITE_ZEROES:
     580           0 :                 desc = task->iovs[1].iov_base;
     581           0 :                 if (payload_len != sizeof(*desc)) {
     582           0 :                         SPDK_NOTICELOG("Invalid write zeroes payload size: %u\n", payload_len);
     583           0 :                         blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     584           0 :                         return -1;
     585             :                 }
     586             : 
     587             :                 /* Unmap this range, SPDK doesn't support it, kernel will enable this flag by default
     588             :                  * without checking unmap feature is negotiated or not, the flag isn't mandatory, so
     589             :                  * just print a warning.
     590             :                  */
     591           0 :                 if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
     592           0 :                         SPDK_WARNLOG("Ignore the unmap flag for WRITE ZEROES from %"PRIx64", len %"PRIx64"\n",
     593             :                                      (uint64_t)desc->sector * 512, (uint64_t)desc->num_sectors * 512);
     594             :                 }
     595             : 
     596           0 :                 rc = spdk_bdev_write_zeroes(bvdev->bdev_desc, ch,
     597           0 :                                             desc->sector * 512, desc->num_sectors * 512,
     598             :                                             blk_request_complete_cb, task);
     599           0 :                 if (rc) {
     600           0 :                         if (rc == -ENOMEM) {
     601           0 :                                 SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
     602           0 :                                 blk_request_queue_io(vdev, ch, task);
     603             :                         } else {
     604           0 :                                 blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     605           0 :                                 return -1;
     606             :                         }
     607             :                 }
     608           0 :                 break;
     609           0 :         case VIRTIO_BLK_T_FLUSH:
     610           0 :                 flush_bytes = spdk_bdev_get_num_blocks(bvdev->bdev) * spdk_bdev_get_block_size(bvdev->bdev);
     611           0 :                 if (req.sector != 0) {
     612           0 :                         SPDK_NOTICELOG("sector must be zero for flush command\n");
     613           0 :                         blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     614           0 :                         return -1;
     615             :                 }
     616           0 :                 rc = spdk_bdev_flush(bvdev->bdev_desc, ch,
     617             :                                      0, flush_bytes,
     618             :                                      blk_request_complete_cb, task);
     619           0 :                 if (rc) {
     620           0 :                         if (rc == -ENOMEM) {
     621           0 :                                 SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
     622           0 :                                 blk_request_queue_io(vdev, ch, task);
     623             :                         } else {
     624           0 :                                 blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     625           0 :                                 return -1;
     626             :                         }
     627             :                 }
     628           0 :                 break;
     629           0 :         case VIRTIO_BLK_T_GET_ID:
     630           0 :                 if (!iovcnt || !payload_len) {
     631           0 :                         blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     632           0 :                         return -1;
     633             :                 }
     634           0 :                 task->used_len = spdk_min((size_t)VIRTIO_BLK_ID_BYTES, task->iovs[1].iov_len);
     635           0 :                 spdk_strcpy_pad(task->iovs[1].iov_base, spdk_bdev_get_name(bvdev->bdev),
     636           0 :                                 task->used_len, ' ');
     637           0 :                 blk_request_finish(VIRTIO_BLK_S_OK, task);
     638           0 :                 break;
     639           0 :         default:
     640           0 :                 SPDK_DEBUGLOG(vhost_blk, "Not supported request type '%"PRIu32"'.\n", type);
     641           0 :                 blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     642           0 :                 return -1;
     643             :         }
     644             : 
     645           0 :         return 0;
     646             : }
     647             : 
     648             : static void
     649           0 : process_blk_task(struct spdk_vhost_virtqueue *vq, uint16_t req_idx)
     650             : {
     651             :         struct spdk_vhost_user_blk_task *task;
     652             :         struct spdk_vhost_blk_task *blk_task;
     653             :         int rc;
     654             : 
     655           0 :         assert(vq->packed.packed_ring == false);
     656             : 
     657           0 :         task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[req_idx];
     658           0 :         blk_task = &task->blk_task;
     659           0 :         if (spdk_unlikely(task->used)) {
     660           0 :                 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
     661             :                             task->bvsession->vsession.name, req_idx);
     662           0 :                 blk_task->used_len = 0;
     663           0 :                 blk_task_enqueue(task);
     664           0 :                 return;
     665             :         }
     666             : 
     667           0 :         blk_task_inc_task_cnt(task);
     668             : 
     669           0 :         blk_task_init(task);
     670             : 
     671           0 :         rc = blk_iovs_split_queue_setup(task->bvsession, vq, task->req_idx,
     672           0 :                                         blk_task->iovs, &blk_task->iovcnt, &blk_task->payload_size);
     673             : 
     674           0 :         if (rc) {
     675           0 :                 SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
     676             :                 /* Only READ and WRITE are supported for now. */
     677           0 :                 vhost_user_blk_request_finish(VIRTIO_BLK_S_UNSUPP, blk_task, NULL);
     678           0 :                 return;
     679             :         }
     680             : 
     681           0 :         if (vhost_user_process_blk_request(task) == 0) {
     682           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task,
     683             :                               req_idx);
     684             :         } else {
     685           0 :                 SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, req_idx);
     686             :         }
     687             : }
     688             : 
     689             : static void
     690           0 : process_packed_blk_task(struct spdk_vhost_virtqueue *vq, uint16_t req_idx)
     691             : {
     692             :         struct spdk_vhost_user_blk_task *task;
     693             :         struct spdk_vhost_blk_task *blk_task;
     694           0 :         uint16_t task_idx = req_idx, num_descs;
     695             :         int rc;
     696             : 
     697           0 :         assert(vq->packed.packed_ring);
     698             : 
     699             :         /* Packed ring used the buffer_id as the task_idx to get task struct.
     700             :          * In kernel driver, it uses the vq->free_head to set the buffer_id so the value
     701             :          * must be in the range of 0 ~ vring.size. The free_head value must be unique
     702             :          * in the outstanding requests.
     703             :          * We can't use the req_idx as the task_idx because the desc can be reused in
     704             :          * the next phase even when it's not completed in the previous phase. For example,
     705             :          * At phase 0, last_used_idx was 2 and desc0 was not completed.Then after moving
     706             :          * phase 1, last_avail_idx is updated to 1. In this case, req_idx can not be used
     707             :          * as task_idx because we will know task[0]->used is true at phase 1.
     708             :          * The split queue is quite different, the desc would insert into the free list when
     709             :          * device completes the request, the driver gets the desc from the free list which
     710             :          * ensures the req_idx is unique in the outstanding requests.
     711             :          */
     712           0 :         task_idx = vhost_vring_packed_desc_get_buffer_id(vq, req_idx, &num_descs);
     713             : 
     714           0 :         task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[task_idx];
     715           0 :         blk_task = &task->blk_task;
     716           0 :         if (spdk_unlikely(task->used)) {
     717           0 :                 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
     718             :                             task->bvsession->vsession.name, task_idx);
     719           0 :                 blk_task->used_len = 0;
     720           0 :                 blk_task_enqueue(task);
     721           0 :                 return;
     722             :         }
     723             : 
     724           0 :         task->req_idx = req_idx;
     725           0 :         task->num_descs = num_descs;
     726           0 :         task->buffer_id = task_idx;
     727             : 
     728           0 :         rte_vhost_set_inflight_desc_packed(task->bvsession->vsession.vid, vq->vring_idx,
     729           0 :                                            req_idx, (req_idx + num_descs - 1) % vq->vring.size,
     730             :                                            &task->inflight_head);
     731             : 
     732           0 :         blk_task_inc_task_cnt(task);
     733             : 
     734           0 :         blk_task_init(task);
     735             : 
     736           0 :         rc = blk_iovs_packed_queue_setup(task->bvsession, vq, task->req_idx, blk_task->iovs,
     737             :                                          &blk_task->iovcnt,
     738             :                                          &blk_task->payload_size);
     739           0 :         if (rc) {
     740           0 :                 SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
     741             :                 /* Only READ and WRITE are supported for now. */
     742           0 :                 vhost_user_blk_request_finish(VIRTIO_BLK_S_UNSUPP, blk_task, NULL);
     743           0 :                 return;
     744             :         }
     745             : 
     746           0 :         if (vhost_user_process_blk_request(task) == 0) {
     747           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task,
     748             :                               task_idx);
     749             :         } else {
     750           0 :                 SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, task_idx);
     751             :         }
     752             : }
     753             : 
     754             : static void
     755           0 : process_packed_inflight_blk_task(struct spdk_vhost_virtqueue *vq,
     756             :                                  uint16_t req_idx)
     757             : {
     758           0 :         spdk_vhost_inflight_desc *desc_array = vq->vring_inflight.inflight_packed->desc;
     759           0 :         spdk_vhost_inflight_desc *desc = &desc_array[req_idx];
     760             :         struct spdk_vhost_user_blk_task *task;
     761             :         struct spdk_vhost_blk_task *blk_task;
     762             :         uint16_t task_idx, num_descs;
     763             :         int rc;
     764             : 
     765           0 :         task_idx = desc_array[desc->last].id;
     766           0 :         num_descs = desc->num;
     767             :         /* In packed ring reconnection, we use the last_used_idx as the
     768             :          * initial value. So when we process the inflight descs we still
     769             :          * need to update the available ring index.
     770             :          */
     771           0 :         vq->last_avail_idx += num_descs;
     772           0 :         if (vq->last_avail_idx >= vq->vring.size) {
     773           0 :                 vq->last_avail_idx -= vq->vring.size;
     774           0 :                 vq->packed.avail_phase = !vq->packed.avail_phase;
     775             :         }
     776             : 
     777           0 :         task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[task_idx];
     778           0 :         blk_task = &task->blk_task;
     779           0 :         if (spdk_unlikely(task->used)) {
     780           0 :                 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
     781             :                             task->bvsession->vsession.name, task_idx);
     782           0 :                 blk_task->used_len = 0;
     783           0 :                 blk_task_enqueue(task);
     784           0 :                 return;
     785             :         }
     786             : 
     787           0 :         task->req_idx = req_idx;
     788           0 :         task->num_descs = num_descs;
     789           0 :         task->buffer_id = task_idx;
     790             :         /* It's for cleaning inflight entries */
     791           0 :         task->inflight_head = req_idx;
     792             : 
     793           0 :         blk_task_inc_task_cnt(task);
     794             : 
     795           0 :         blk_task_init(task);
     796             : 
     797           0 :         rc = blk_iovs_inflight_queue_setup(task->bvsession, vq, task->req_idx, blk_task->iovs,
     798             :                                            &blk_task->iovcnt,
     799             :                                            &blk_task->payload_size);
     800           0 :         if (rc) {
     801           0 :                 SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
     802             :                 /* Only READ and WRITE are supported for now. */
     803           0 :                 vhost_user_blk_request_finish(VIRTIO_BLK_S_UNSUPP, blk_task, NULL);
     804           0 :                 return;
     805             :         }
     806             : 
     807           0 :         if (vhost_user_process_blk_request(task) == 0) {
     808           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task,
     809             :                               task_idx);
     810             :         } else {
     811           0 :                 SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, task_idx);
     812             :         }
     813             : }
     814             : 
     815             : static int
     816           0 : submit_inflight_desc(struct spdk_vhost_blk_session *bvsession,
     817             :                      struct spdk_vhost_virtqueue *vq)
     818             : {
     819             :         struct spdk_vhost_session *vsession;
     820             :         spdk_vhost_resubmit_info *resubmit;
     821             :         spdk_vhost_resubmit_desc *resubmit_list;
     822             :         uint16_t req_idx;
     823             :         int i, resubmit_cnt;
     824             : 
     825           0 :         resubmit = vq->vring_inflight.resubmit_inflight;
     826           0 :         if (spdk_likely(resubmit == NULL || resubmit->resubmit_list == NULL ||
     827             :                         resubmit->resubmit_num == 0)) {
     828           0 :                 return 0;
     829             :         }
     830             : 
     831           0 :         resubmit_list = resubmit->resubmit_list;
     832           0 :         vsession = &bvsession->vsession;
     833             : 
     834           0 :         for (i = resubmit->resubmit_num - 1; i >= 0; --i) {
     835           0 :                 req_idx = resubmit_list[i].index;
     836           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Start processing resubmit request idx %"PRIu16"======\n",
     837             :                               req_idx);
     838             : 
     839           0 :                 if (spdk_unlikely(req_idx >= vq->vring.size)) {
     840           0 :                         SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n",
     841             :                                     vsession->name, req_idx, vq->vring.size);
     842           0 :                         vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0);
     843           0 :                         continue;
     844             :                 }
     845             : 
     846           0 :                 if (vq->packed.packed_ring) {
     847           0 :                         process_packed_inflight_blk_task(vq, req_idx);
     848             :                 } else {
     849           0 :                         process_blk_task(vq, req_idx);
     850             :                 }
     851             :         }
     852           0 :         resubmit_cnt = resubmit->resubmit_num;
     853           0 :         resubmit->resubmit_num = 0;
     854           0 :         return resubmit_cnt;
     855             : }
     856             : 
     857             : static int
     858           0 : process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
     859             : {
     860           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     861           0 :         uint16_t reqs[SPDK_VHOST_VQ_MAX_SUBMISSIONS];
     862             :         uint16_t reqs_cnt, i;
     863           0 :         int resubmit_cnt = 0;
     864             : 
     865           0 :         resubmit_cnt = submit_inflight_desc(bvsession, vq);
     866             : 
     867           0 :         reqs_cnt = vhost_vq_avail_ring_get(vq, reqs, SPDK_COUNTOF(reqs));
     868           0 :         if (!reqs_cnt) {
     869           0 :                 return resubmit_cnt;
     870             :         }
     871             : 
     872           0 :         for (i = 0; i < reqs_cnt; i++) {
     873           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Starting processing request idx %"PRIu16"======\n",
     874             :                               reqs[i]);
     875             : 
     876           0 :                 if (spdk_unlikely(reqs[i] >= vq->vring.size)) {
     877           0 :                         SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n",
     878             :                                     vsession->name, reqs[i], vq->vring.size);
     879           0 :                         vhost_vq_used_ring_enqueue(vsession, vq, reqs[i], 0);
     880           0 :                         continue;
     881             :                 }
     882             : 
     883           0 :                 rte_vhost_set_inflight_desc_split(vsession->vid, vq->vring_idx, reqs[i]);
     884             : 
     885           0 :                 process_blk_task(vq, reqs[i]);
     886             :         }
     887             : 
     888           0 :         return reqs_cnt;
     889             : }
     890             : 
     891             : static int
     892           0 : process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
     893             : {
     894           0 :         uint16_t i = 0;
     895           0 :         uint16_t count = 0;
     896           0 :         int resubmit_cnt = 0;
     897             : 
     898           0 :         resubmit_cnt = submit_inflight_desc(bvsession, vq);
     899             : 
     900           0 :         while (i++ < SPDK_VHOST_VQ_MAX_SUBMISSIONS &&
     901           0 :                vhost_vq_packed_ring_is_avail(vq)) {
     902           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Starting processing request idx %"PRIu16"======\n",
     903             :                               vq->last_avail_idx);
     904           0 :                 count++;
     905           0 :                 process_packed_blk_task(vq, vq->last_avail_idx);
     906             :         }
     907             : 
     908           0 :         return count > 0 ? count : resubmit_cnt;
     909             : }
     910             : 
     911             : static int
     912           0 : _vdev_vq_worker(struct spdk_vhost_virtqueue *vq)
     913             : {
     914           0 :         struct spdk_vhost_session *vsession = vq->vsession;
     915           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
     916             :         bool packed_ring;
     917           0 :         int rc = 0;
     918             : 
     919           0 :         packed_ring = vq->packed.packed_ring;
     920           0 :         if (packed_ring) {
     921           0 :                 rc = process_packed_vq(bvsession, vq);
     922             :         } else {
     923           0 :                 rc = process_vq(bvsession, vq);
     924             :         }
     925             : 
     926           0 :         vhost_session_vq_used_signal(vq);
     927             : 
     928           0 :         return rc;
     929             : 
     930             : }
     931             : 
     932             : static int
     933           0 : vdev_vq_worker(void *arg)
     934             : {
     935           0 :         struct spdk_vhost_virtqueue *vq = arg;
     936             : 
     937           0 :         return _vdev_vq_worker(vq);
     938             : }
     939             : 
     940             : static int
     941           0 : vdev_worker(void *arg)
     942             : {
     943           0 :         struct spdk_vhost_blk_session *bvsession = arg;
     944           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     945             :         uint16_t q_idx;
     946           0 :         int rc = 0;
     947             : 
     948           0 :         for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) {
     949           0 :                 rc += _vdev_vq_worker(&vsession->virtqueue[q_idx]);
     950             :         }
     951             : 
     952           0 :         return rc > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
     953             : }
     954             : 
     955             : static void
     956           0 : no_bdev_process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
     957             : {
     958           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     959           0 :         struct iovec iovs[SPDK_VHOST_IOVS_MAX];
     960           0 :         uint32_t length;
     961           0 :         uint16_t iovcnt, req_idx;
     962             : 
     963           0 :         if (vhost_vq_avail_ring_get(vq, &req_idx, 1) != 1) {
     964           0 :                 return;
     965             :         }
     966             : 
     967           0 :         iovcnt = SPDK_COUNTOF(iovs);
     968           0 :         if (blk_iovs_split_queue_setup(bvsession, vq, req_idx, iovs, &iovcnt, &length) == 0) {
     969           0 :                 *(volatile uint8_t *)iovs[iovcnt - 1].iov_base = VIRTIO_BLK_S_IOERR;
     970           0 :                 SPDK_DEBUGLOG(vhost_blk_data, "Aborting request %" PRIu16"\n", req_idx);
     971             :         }
     972             : 
     973           0 :         vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0);
     974             : }
     975             : 
     976             : static void
     977           0 : no_bdev_process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
     978             : {
     979           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     980             :         struct spdk_vhost_user_blk_task *task;
     981             :         struct spdk_vhost_blk_task *blk_task;
     982           0 :         uint32_t length;
     983           0 :         uint16_t req_idx = vq->last_avail_idx;
     984           0 :         uint16_t task_idx, num_descs;
     985             : 
     986           0 :         if (!vhost_vq_packed_ring_is_avail(vq)) {
     987           0 :                 return;
     988             :         }
     989             : 
     990           0 :         task_idx = vhost_vring_packed_desc_get_buffer_id(vq, req_idx, &num_descs);
     991           0 :         task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[task_idx];
     992           0 :         blk_task = &task->blk_task;
     993           0 :         if (spdk_unlikely(task->used)) {
     994           0 :                 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
     995             :                             vsession->name, req_idx);
     996           0 :                 vhost_vq_packed_ring_enqueue(vsession, vq, num_descs,
     997           0 :                                              task->buffer_id, blk_task->used_len,
     998           0 :                                              task->inflight_head);
     999           0 :                 return;
    1000             :         }
    1001             : 
    1002           0 :         task->req_idx = req_idx;
    1003           0 :         task->num_descs = num_descs;
    1004           0 :         task->buffer_id = task_idx;
    1005           0 :         blk_task_init(task);
    1006             : 
    1007           0 :         if (blk_iovs_packed_queue_setup(bvsession, vq, task->req_idx, blk_task->iovs, &blk_task->iovcnt,
    1008             :                                         &length)) {
    1009           0 :                 *(volatile uint8_t *)(blk_task->iovs[blk_task->iovcnt - 1].iov_base) = VIRTIO_BLK_S_IOERR;
    1010           0 :                 SPDK_DEBUGLOG(vhost_blk_data, "Aborting request %" PRIu16"\n", req_idx);
    1011             :         }
    1012             : 
    1013           0 :         task->used = false;
    1014           0 :         vhost_vq_packed_ring_enqueue(vsession, vq, num_descs,
    1015           0 :                                      task->buffer_id, blk_task->used_len,
    1016           0 :                                      task->inflight_head);
    1017             : }
    1018             : 
    1019             : static int
    1020           0 : _no_bdev_vdev_vq_worker(struct spdk_vhost_virtqueue *vq)
    1021             : {
    1022           0 :         struct spdk_vhost_session *vsession = vq->vsession;
    1023           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
    1024             :         bool packed_ring;
    1025             : 
    1026           0 :         packed_ring = vq->packed.packed_ring;
    1027           0 :         if (packed_ring) {
    1028           0 :                 no_bdev_process_packed_vq(bvsession, vq);
    1029             :         } else {
    1030           0 :                 no_bdev_process_vq(bvsession, vq);
    1031             :         }
    1032             : 
    1033           0 :         vhost_session_vq_used_signal(vq);
    1034             : 
    1035           0 :         if (vsession->task_cnt == 0 && bvsession->io_channel) {
    1036           0 :                 vhost_blk_put_io_channel(bvsession->io_channel);
    1037           0 :                 bvsession->io_channel = NULL;
    1038             :         }
    1039             : 
    1040           0 :         return SPDK_POLLER_BUSY;
    1041             : }
    1042             : 
    1043             : static int
    1044           0 : no_bdev_vdev_vq_worker(void *arg)
    1045             : {
    1046           0 :         struct spdk_vhost_virtqueue *vq = arg;
    1047             : 
    1048           0 :         return _no_bdev_vdev_vq_worker(vq);
    1049             : }
    1050             : 
    1051             : static int
    1052           0 : no_bdev_vdev_worker(void *arg)
    1053             : {
    1054           0 :         struct spdk_vhost_blk_session *bvsession = arg;
    1055           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
    1056             :         uint16_t q_idx;
    1057             : 
    1058           0 :         for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) {
    1059           0 :                 _no_bdev_vdev_vq_worker(&vsession->virtqueue[q_idx]);
    1060             :         }
    1061             : 
    1062           0 :         return SPDK_POLLER_BUSY;
    1063             : }
    1064             : 
    1065             : static void
    1066           0 : vhost_blk_session_unregister_interrupts(struct spdk_vhost_blk_session *bvsession)
    1067             : {
    1068           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
    1069             :         struct spdk_vhost_virtqueue *vq;
    1070             :         int i;
    1071             : 
    1072           0 :         SPDK_DEBUGLOG(vhost_blk, "unregister virtqueues interrupt\n");
    1073           0 :         for (i = 0; i < vsession->max_queues; i++) {
    1074           0 :                 vq = &vsession->virtqueue[i];
    1075           0 :                 if (vq->intr == NULL) {
    1076           0 :                         break;
    1077             :                 }
    1078             : 
    1079           0 :                 SPDK_DEBUGLOG(vhost_blk, "unregister vq[%d]'s kickfd is %d\n",
    1080             :                               i, vq->vring.kickfd);
    1081           0 :                 spdk_interrupt_unregister(&vq->intr);
    1082             :         }
    1083           0 : }
    1084             : 
    1085             : static void
    1086           0 : _vhost_blk_vq_register_interrupt(void *arg)
    1087             : {
    1088           0 :         struct spdk_vhost_virtqueue *vq = arg;
    1089           0 :         struct spdk_vhost_session *vsession = vq->vsession;
    1090           0 :         struct spdk_vhost_blk_dev *bvdev =  to_blk_dev(vsession->vdev);
    1091             : 
    1092           0 :         assert(bvdev != NULL);
    1093             : 
    1094           0 :         if (bvdev->bdev) {
    1095           0 :                 vq->intr = spdk_interrupt_register(vq->vring.kickfd, vdev_vq_worker, vq, "vdev_vq_worker");
    1096             :         } else {
    1097           0 :                 vq->intr = spdk_interrupt_register(vq->vring.kickfd, no_bdev_vdev_vq_worker, vq,
    1098             :                                                    "no_bdev_vdev_vq_worker");
    1099             :         }
    1100             : 
    1101           0 :         if (vq->intr == NULL) {
    1102           0 :                 SPDK_ERRLOG("Fail to register req notifier handler.\n");
    1103           0 :                 assert(false);
    1104             :         }
    1105           0 : }
    1106             : 
    1107             : static int
    1108           0 : vhost_blk_vq_enable(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq)
    1109             : {
    1110           0 :         if (spdk_interrupt_mode_is_enabled()) {
    1111           0 :                 spdk_thread_send_msg(vsession->vdev->thread, _vhost_blk_vq_register_interrupt, vq);
    1112             :         }
    1113             : 
    1114           0 :         return 0;
    1115             : }
    1116             : 
    1117             : static int
    1118           0 : vhost_blk_session_register_no_bdev_interrupts(struct spdk_vhost_blk_session *bvsession)
    1119             : {
    1120           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
    1121           0 :         struct spdk_vhost_virtqueue *vq = NULL;
    1122             :         int i;
    1123             : 
    1124           0 :         SPDK_DEBUGLOG(vhost_blk, "Register virtqueues interrupt\n");
    1125           0 :         for (i = 0; i < vsession->max_queues; i++) {
    1126           0 :                 vq = &vsession->virtqueue[i];
    1127           0 :                 SPDK_DEBUGLOG(vhost_blk, "Register vq[%d]'s kickfd is %d\n",
    1128             :                               i, vq->vring.kickfd);
    1129           0 :                 vq->intr = spdk_interrupt_register(vq->vring.kickfd, no_bdev_vdev_vq_worker, vq,
    1130             :                                                    "no_bdev_vdev_vq_worker");
    1131           0 :                 if (vq->intr == NULL) {
    1132           0 :                         goto err;
    1133             :                 }
    1134             : 
    1135             :         }
    1136             : 
    1137           0 :         return 0;
    1138             : 
    1139           0 : err:
    1140           0 :         vhost_blk_session_unregister_interrupts(bvsession);
    1141           0 :         return -1;
    1142             : }
    1143             : 
    1144             : static void
    1145           0 : vhost_blk_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode)
    1146             : {
    1147           0 :         struct spdk_vhost_blk_session *bvsession = cb_arg;
    1148             : 
    1149           0 :         vhost_user_session_set_interrupt_mode(&bvsession->vsession, interrupt_mode);
    1150           0 : }
    1151             : 
    1152             : static void
    1153           0 : bdev_event_cpl_cb(struct spdk_vhost_dev *vdev, void *ctx)
    1154             : {
    1155           0 :         enum spdk_bdev_event_type type = (enum spdk_bdev_event_type)(uintptr_t)ctx;
    1156             :         struct spdk_vhost_blk_dev *bvdev;
    1157             : 
    1158           0 :         if (type == SPDK_BDEV_EVENT_REMOVE) {
    1159             :                 /* All sessions have been notified, time to close the bdev */
    1160           0 :                 bvdev = to_blk_dev(vdev);
    1161           0 :                 assert(bvdev != NULL);
    1162           0 :                 spdk_bdev_close(bvdev->bdev_desc);
    1163           0 :                 bvdev->bdev_desc = NULL;
    1164           0 :                 bvdev->bdev = NULL;
    1165             :         }
    1166           0 : }
    1167             : 
    1168             : static int
    1169           0 : vhost_session_bdev_resize_cb(struct spdk_vhost_dev *vdev,
    1170             :                              struct spdk_vhost_session *vsession,
    1171             :                              void *ctx)
    1172             : {
    1173           0 :         SPDK_NOTICELOG("bdev send slave msg to vid(%d)\n", vsession->vid);
    1174             : #if RTE_VERSION >= RTE_VERSION_NUM(23, 03, 0, 0)
    1175           0 :         rte_vhost_backend_config_change(vsession->vid, false);
    1176             : #else
    1177             :         rte_vhost_slave_config_change(vsession->vid, false);
    1178             : #endif
    1179             : 
    1180           0 :         return 0;
    1181             : }
    1182             : 
    1183             : static void
    1184           0 : vhost_user_blk_resize_cb(struct spdk_vhost_dev *vdev, bdev_event_cb_complete cb, void *cb_arg)
    1185             : {
    1186           0 :         vhost_user_dev_foreach_session(vdev, vhost_session_bdev_resize_cb,
    1187             :                                        cb, cb_arg);
    1188           0 : }
    1189             : 
    1190             : static int
    1191           0 : vhost_user_session_bdev_remove_cb(struct spdk_vhost_dev *vdev,
    1192             :                                   struct spdk_vhost_session *vsession,
    1193             :                                   void *ctx)
    1194             : {
    1195             :         struct spdk_vhost_blk_session *bvsession;
    1196             :         int rc;
    1197             : 
    1198           0 :         bvsession = to_blk_session(vsession);
    1199           0 :         if (bvsession->requestq_poller) {
    1200           0 :                 spdk_poller_unregister(&bvsession->requestq_poller);
    1201           0 :                 if (spdk_interrupt_mode_is_enabled()) {
    1202           0 :                         vhost_blk_session_unregister_interrupts(bvsession);
    1203           0 :                         rc = vhost_blk_session_register_no_bdev_interrupts(bvsession);
    1204           0 :                         if (rc) {
    1205           0 :                                 SPDK_ERRLOG("%s: Interrupt register failed\n", vsession->name);
    1206           0 :                                 return rc;
    1207             :                         }
    1208             :                 }
    1209             : 
    1210           0 :                 bvsession->requestq_poller = SPDK_POLLER_REGISTER(no_bdev_vdev_worker, bvsession, 0);
    1211           0 :                 spdk_poller_register_interrupt(bvsession->requestq_poller, vhost_blk_poller_set_interrupt_mode,
    1212             :                                                bvsession);
    1213             :         }
    1214             : 
    1215           0 :         return 0;
    1216             : }
    1217             : 
    1218             : static void
    1219           0 : vhost_user_bdev_remove_cb(struct spdk_vhost_dev *vdev, bdev_event_cb_complete cb, void *cb_arg)
    1220             : {
    1221           0 :         SPDK_WARNLOG("%s: hot-removing bdev - all further requests will fail.\n",
    1222             :                      vdev->name);
    1223             : 
    1224           0 :         vhost_user_dev_foreach_session(vdev, vhost_user_session_bdev_remove_cb,
    1225             :                                        cb, cb_arg);
    1226           0 : }
    1227             : 
    1228             : static void
    1229           0 : vhost_user_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_vhost_dev *vdev,
    1230             :                          bdev_event_cb_complete cb, void *cb_arg)
    1231             : {
    1232           0 :         switch (type) {
    1233           0 :         case SPDK_BDEV_EVENT_REMOVE:
    1234           0 :                 vhost_user_bdev_remove_cb(vdev, cb, cb_arg);
    1235           0 :                 break;
    1236           0 :         case SPDK_BDEV_EVENT_RESIZE:
    1237           0 :                 vhost_user_blk_resize_cb(vdev, cb, cb_arg);
    1238           0 :                 break;
    1239           0 :         default:
    1240           0 :                 assert(false);
    1241             :                 return;
    1242             :         }
    1243             : }
    1244             : 
    1245             : static void
    1246           0 : bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
    1247             :               void *event_ctx)
    1248             : {
    1249           0 :         struct spdk_vhost_dev *vdev = (struct spdk_vhost_dev *)event_ctx;
    1250           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1251             : 
    1252           0 :         assert(bvdev != NULL);
    1253             : 
    1254           0 :         SPDK_DEBUGLOG(vhost_blk, "Bdev event: type %d, name %s\n",
    1255             :                       type,
    1256             :                       bdev->name);
    1257             : 
    1258           0 :         switch (type) {
    1259           0 :         case SPDK_BDEV_EVENT_REMOVE:
    1260             :         case SPDK_BDEV_EVENT_RESIZE:
    1261           0 :                 bvdev->ops->bdev_event(type, vdev, bdev_event_cpl_cb, (void *)type);
    1262           0 :                 break;
    1263           0 :         default:
    1264           0 :                 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
    1265           0 :                 break;
    1266             :         }
    1267           0 : }
    1268             : 
    1269             : static void
    1270           0 : free_task_pool(struct spdk_vhost_blk_session *bvsession)
    1271             : {
    1272           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
    1273             :         struct spdk_vhost_virtqueue *vq;
    1274             :         uint16_t i;
    1275             : 
    1276           0 :         for (i = 0; i < vsession->max_queues; i++) {
    1277           0 :                 vq = &vsession->virtqueue[i];
    1278           0 :                 if (vq->tasks == NULL) {
    1279           0 :                         continue;
    1280             :                 }
    1281             : 
    1282           0 :                 spdk_free(vq->tasks);
    1283           0 :                 vq->tasks = NULL;
    1284             :         }
    1285           0 : }
    1286             : 
    1287             : static int
    1288           0 : alloc_vq_task_pool(struct spdk_vhost_session *vsession, uint16_t qid)
    1289             : {
    1290           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
    1291             :         struct spdk_vhost_virtqueue *vq;
    1292             :         struct spdk_vhost_user_blk_task *task;
    1293             :         uint32_t task_cnt;
    1294             :         uint32_t j;
    1295             : 
    1296           0 :         if (qid >= SPDK_VHOST_MAX_VQUEUES) {
    1297           0 :                 return -EINVAL;
    1298             :         }
    1299             : 
    1300           0 :         vq = &vsession->virtqueue[qid];
    1301           0 :         if (vq->vring.desc == NULL) {
    1302           0 :                 return 0;
    1303             :         }
    1304             : 
    1305           0 :         task_cnt = vq->vring.size;
    1306           0 :         if (task_cnt > SPDK_VHOST_MAX_VQ_SIZE) {
    1307             :                 /* sanity check */
    1308           0 :                 SPDK_ERRLOG("%s: virtqueue %"PRIu16" is too big. (size = %"PRIu32", max = %"PRIu32")\n",
    1309             :                             vsession->name, qid, task_cnt, SPDK_VHOST_MAX_VQ_SIZE);
    1310           0 :                 return -1;
    1311             :         }
    1312           0 :         vq->tasks = spdk_zmalloc(sizeof(struct spdk_vhost_user_blk_task) * task_cnt,
    1313             :                                  SPDK_CACHE_LINE_SIZE, NULL,
    1314             :                                  SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
    1315           0 :         if (vq->tasks == NULL) {
    1316           0 :                 SPDK_ERRLOG("%s: failed to allocate %"PRIu32" tasks for virtqueue %"PRIu16"\n",
    1317             :                             vsession->name, task_cnt, qid);
    1318           0 :                 return -1;
    1319             :         }
    1320             : 
    1321           0 :         for (j = 0; j < task_cnt; j++) {
    1322           0 :                 task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[j];
    1323           0 :                 task->bvsession = bvsession;
    1324           0 :                 task->req_idx = j;
    1325           0 :                 task->vq = vq;
    1326             :         }
    1327             : 
    1328           0 :         return 0;
    1329             : }
    1330             : 
    1331             : static int
    1332           0 : vhost_blk_start(struct spdk_vhost_dev *vdev,
    1333             :                 struct spdk_vhost_session *vsession, void *unused)
    1334             : {
    1335           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
    1336             :         struct spdk_vhost_blk_dev *bvdev;
    1337             :         int i;
    1338             : 
    1339             :         /* return if start is already in progress */
    1340           0 :         if (bvsession->requestq_poller) {
    1341           0 :                 SPDK_INFOLOG(vhost, "%s: start in progress\n", vsession->name);
    1342           0 :                 return -EINPROGRESS;
    1343             :         }
    1344             : 
    1345             :         /* validate all I/O queues are in a contiguous index range */
    1346           0 :         for (i = 0; i < vsession->max_queues; i++) {
    1347             :                 /* vring.desc and vring.desc_packed are in a union struct
    1348             :                  * so q->vring.desc can replace q->vring.desc_packed.
    1349             :                  */
    1350           0 :                 if (vsession->virtqueue[i].vring.desc == NULL) {
    1351           0 :                         SPDK_ERRLOG("%s: queue %"PRIu32" is empty\n", vsession->name, i);
    1352           0 :                         return -1;
    1353             :                 }
    1354             :         }
    1355             : 
    1356           0 :         bvdev = to_blk_dev(vdev);
    1357           0 :         assert(bvdev != NULL);
    1358           0 :         bvsession->bvdev = bvdev;
    1359             : 
    1360           0 :         if (bvdev->bdev) {
    1361           0 :                 bvsession->io_channel = vhost_blk_get_io_channel(vdev);
    1362           0 :                 if (!bvsession->io_channel) {
    1363           0 :                         free_task_pool(bvsession);
    1364           0 :                         SPDK_ERRLOG("%s: I/O channel allocation failed\n", vsession->name);
    1365           0 :                         return -1;
    1366             :                 }
    1367             :         }
    1368             : 
    1369           0 :         if (bvdev->bdev) {
    1370           0 :                 bvsession->requestq_poller = SPDK_POLLER_REGISTER(vdev_worker, bvsession, 0);
    1371             :         } else {
    1372           0 :                 bvsession->requestq_poller = SPDK_POLLER_REGISTER(no_bdev_vdev_worker, bvsession, 0);
    1373             :         }
    1374           0 :         SPDK_INFOLOG(vhost, "%s: started poller on lcore %d\n",
    1375             :                      vsession->name, spdk_env_get_current_core());
    1376             : 
    1377           0 :         spdk_poller_register_interrupt(bvsession->requestq_poller, vhost_blk_poller_set_interrupt_mode,
    1378             :                                        bvsession);
    1379             : 
    1380           0 :         return 0;
    1381             : }
    1382             : 
    1383             : static int
    1384           0 : destroy_session_poller_cb(void *arg)
    1385             : {
    1386           0 :         struct spdk_vhost_blk_session *bvsession = arg;
    1387           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
    1388           0 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vsession->vdev);
    1389             :         int i;
    1390             : 
    1391           0 :         if (vsession->task_cnt > 0 || (pthread_mutex_trylock(&user_dev->lock) != 0)) {
    1392           0 :                 assert(vsession->stop_retry_count > 0);
    1393           0 :                 vsession->stop_retry_count--;
    1394           0 :                 if (vsession->stop_retry_count == 0) {
    1395           0 :                         SPDK_ERRLOG("%s: Timedout when destroy session (task_cnt %d)\n", vsession->name,
    1396             :                                     vsession->task_cnt);
    1397           0 :                         spdk_poller_unregister(&bvsession->stop_poller);
    1398           0 :                         vhost_user_session_stop_done(vsession, -ETIMEDOUT);
    1399             :                 }
    1400             : 
    1401           0 :                 return SPDK_POLLER_BUSY;
    1402             :         }
    1403             : 
    1404           0 :         for (i = 0; i < vsession->max_queues; i++) {
    1405           0 :                 vsession->virtqueue[i].next_event_time = 0;
    1406           0 :                 vhost_vq_used_signal(vsession, &vsession->virtqueue[i]);
    1407             :         }
    1408             : 
    1409           0 :         SPDK_INFOLOG(vhost, "%s: stopping poller on lcore %d\n",
    1410             :                      vsession->name, spdk_env_get_current_core());
    1411             : 
    1412           0 :         if (bvsession->io_channel) {
    1413           0 :                 vhost_blk_put_io_channel(bvsession->io_channel);
    1414           0 :                 bvsession->io_channel = NULL;
    1415             :         }
    1416             : 
    1417           0 :         free_task_pool(bvsession);
    1418           0 :         spdk_poller_unregister(&bvsession->stop_poller);
    1419           0 :         vhost_user_session_stop_done(vsession, 0);
    1420             : 
    1421           0 :         pthread_mutex_unlock(&user_dev->lock);
    1422           0 :         return SPDK_POLLER_BUSY;
    1423             : }
    1424             : 
    1425             : static int
    1426           0 : vhost_blk_stop(struct spdk_vhost_dev *vdev,
    1427             :                struct spdk_vhost_session *vsession, void *unused)
    1428             : {
    1429           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
    1430             : 
    1431             :         /* return if stop is already in progress */
    1432           0 :         if (bvsession->stop_poller) {
    1433           0 :                 return -EINPROGRESS;
    1434             :         }
    1435             : 
    1436           0 :         spdk_poller_unregister(&bvsession->requestq_poller);
    1437           0 :         vhost_blk_session_unregister_interrupts(bvsession);
    1438             : 
    1439           0 :         bvsession->vsession.stop_retry_count = (SPDK_VHOST_SESSION_STOP_RETRY_TIMEOUT_IN_SEC * 1000 *
    1440             :                                                 1000) / SPDK_VHOST_SESSION_STOP_RETRY_PERIOD_IN_US;
    1441           0 :         bvsession->stop_poller = SPDK_POLLER_REGISTER(destroy_session_poller_cb,
    1442             :                                  bvsession, SPDK_VHOST_SESSION_STOP_RETRY_PERIOD_IN_US);
    1443           0 :         return 0;
    1444             : }
    1445             : 
    1446             : static void
    1447           0 : vhost_blk_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
    1448             : {
    1449             :         struct spdk_vhost_blk_dev *bvdev;
    1450             : 
    1451           0 :         bvdev = to_blk_dev(vdev);
    1452           0 :         assert(bvdev != NULL);
    1453             : 
    1454           0 :         spdk_json_write_named_object_begin(w, "block");
    1455             : 
    1456           0 :         spdk_json_write_named_bool(w, "readonly", bvdev->readonly);
    1457             : 
    1458           0 :         spdk_json_write_name(w, "bdev");
    1459           0 :         if (bvdev->bdev) {
    1460           0 :                 spdk_json_write_string(w, spdk_bdev_get_name(bvdev->bdev));
    1461             :         } else {
    1462           0 :                 spdk_json_write_null(w);
    1463             :         }
    1464           0 :         spdk_json_write_named_string(w, "transport", bvdev->ops->name);
    1465             : 
    1466           0 :         spdk_json_write_object_end(w);
    1467           0 : }
    1468             : 
    1469             : static void
    1470           0 : vhost_blk_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
    1471             : {
    1472             :         struct spdk_vhost_blk_dev *bvdev;
    1473             : 
    1474           0 :         bvdev = to_blk_dev(vdev);
    1475           0 :         assert(bvdev != NULL);
    1476             : 
    1477           0 :         if (!bvdev->bdev) {
    1478           0 :                 return;
    1479             :         }
    1480             : 
    1481           0 :         spdk_json_write_object_begin(w);
    1482           0 :         spdk_json_write_named_string(w, "method", "vhost_create_blk_controller");
    1483             : 
    1484           0 :         spdk_json_write_named_object_begin(w, "params");
    1485           0 :         spdk_json_write_named_string(w, "ctrlr", vdev->name);
    1486           0 :         spdk_json_write_named_string(w, "dev_name", spdk_bdev_get_name(bvdev->bdev));
    1487           0 :         spdk_json_write_named_string(w, "cpumask",
    1488             :                                      spdk_cpuset_fmt(spdk_thread_get_cpumask(vdev->thread)));
    1489           0 :         spdk_json_write_named_bool(w, "readonly", bvdev->readonly);
    1490           0 :         spdk_json_write_named_string(w, "transport", bvdev->ops->name);
    1491           0 :         spdk_json_write_object_end(w);
    1492             : 
    1493           0 :         spdk_json_write_object_end(w);
    1494             : }
    1495             : 
    1496             : static int vhost_blk_destroy(struct spdk_vhost_dev *dev);
    1497             : 
    1498             : static int
    1499           0 : vhost_blk_get_config(struct spdk_vhost_dev *vdev, uint8_t *config,
    1500             :                      uint32_t len)
    1501             : {
    1502           0 :         struct virtio_blk_config blkcfg;
    1503             :         struct spdk_bdev *bdev;
    1504             :         uint32_t blk_size;
    1505             :         uint64_t blkcnt;
    1506             : 
    1507           0 :         memset(&blkcfg, 0, sizeof(blkcfg));
    1508           0 :         bdev = vhost_blk_get_bdev(vdev);
    1509           0 :         if (bdev == NULL) {
    1510             :                 /* We can't just return -1 here as this GET_CONFIG message might
    1511             :                  * be caused by a QEMU VM reboot. Returning -1 will indicate an
    1512             :                  * error to QEMU, who might then decide to terminate itself.
    1513             :                  * We don't want that. A simple reboot shouldn't break the system.
    1514             :                  *
    1515             :                  * Presenting a block device with block size 0 and block count 0
    1516             :                  * doesn't cause any problems on QEMU side and the virtio-pci
    1517             :                  * device is even still available inside the VM, but there will
    1518             :                  * be no block device created for it - the kernel drivers will
    1519             :                  * silently reject it.
    1520             :                  */
    1521           0 :                 blk_size = 0;
    1522           0 :                 blkcnt = 0;
    1523             :         } else {
    1524           0 :                 blk_size = spdk_bdev_get_block_size(bdev);
    1525           0 :                 blkcnt = spdk_bdev_get_num_blocks(bdev);
    1526           0 :                 if (spdk_bdev_get_buf_align(bdev) > 1) {
    1527           0 :                         blkcfg.size_max = SPDK_BDEV_LARGE_BUF_MAX_SIZE;
    1528           0 :                         blkcfg.seg_max = spdk_min(SPDK_VHOST_IOVS_MAX - 2 - 1, SPDK_BDEV_IO_NUM_CHILD_IOV - 2 - 1);
    1529             :                 } else {
    1530           0 :                         blkcfg.size_max = 131072;
    1531             :                         /*  -2 for REQ and RESP and -1 for region boundary splitting */
    1532           0 :                         blkcfg.seg_max = SPDK_VHOST_IOVS_MAX - 2 - 1;
    1533             :                 }
    1534             :         }
    1535             : 
    1536           0 :         blkcfg.blk_size = blk_size;
    1537             :         /* minimum I/O size in blocks */
    1538           0 :         blkcfg.min_io_size = 1;
    1539             :         /* expressed in 512 Bytes sectors */
    1540           0 :         blkcfg.capacity = (blkcnt * blk_size) / 512;
    1541             :         /* QEMU can overwrite this value when started */
    1542           0 :         blkcfg.num_queues = SPDK_VHOST_MAX_VQUEUES;
    1543             : 
    1544           0 :         if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
    1545             :                 /* 16MiB, expressed in 512 Bytes */
    1546           0 :                 blkcfg.max_discard_sectors = 32768;
    1547           0 :                 blkcfg.max_discard_seg = 1;
    1548           0 :                 blkcfg.discard_sector_alignment = blk_size / 512;
    1549             :         }
    1550           0 :         if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
    1551           0 :                 blkcfg.max_write_zeroes_sectors = 32768;
    1552           0 :                 blkcfg.max_write_zeroes_seg = 1;
    1553             :         }
    1554             : 
    1555           0 :         memcpy(config, &blkcfg, spdk_min(len, sizeof(blkcfg)));
    1556             : 
    1557           0 :         return 0;
    1558             : }
    1559             : 
    1560             : static int
    1561           0 : vhost_blk_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us,
    1562             :                          uint32_t iops_threshold)
    1563             : {
    1564           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1565             : 
    1566           0 :         assert(bvdev != NULL);
    1567             : 
    1568           0 :         return bvdev->ops->set_coalescing(vdev, delay_base_us, iops_threshold);
    1569             : }
    1570             : 
    1571             : static void
    1572           0 : vhost_blk_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us,
    1573             :                          uint32_t *iops_threshold)
    1574             : {
    1575           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1576             : 
    1577           0 :         assert(bvdev != NULL);
    1578             : 
    1579           0 :         bvdev->ops->get_coalescing(vdev, delay_base_us, iops_threshold);
    1580           0 : }
    1581             : 
    1582             : static const struct spdk_vhost_user_dev_backend vhost_blk_user_device_backend = {
    1583             :         .session_ctx_size = sizeof(struct spdk_vhost_blk_session) - sizeof(struct spdk_vhost_session),
    1584             :         .start_session =  vhost_blk_start,
    1585             :         .stop_session = vhost_blk_stop,
    1586             :         .alloc_vq_tasks = alloc_vq_task_pool,
    1587             :         .enable_vq = vhost_blk_vq_enable,
    1588             : };
    1589             : 
    1590             : static const struct spdk_vhost_dev_backend vhost_blk_device_backend = {
    1591             :         .type = VHOST_BACKEND_BLK,
    1592             :         .vhost_get_config = vhost_blk_get_config,
    1593             :         .dump_info_json = vhost_blk_dump_info_json,
    1594             :         .write_config_json = vhost_blk_write_config_json,
    1595             :         .remove_device = vhost_blk_destroy,
    1596             :         .set_coalescing = vhost_blk_set_coalescing,
    1597             :         .get_coalescing = vhost_blk_get_coalescing,
    1598             : };
    1599             : 
    1600             : int
    1601           1 : virtio_blk_construct_ctrlr(struct spdk_vhost_dev *vdev, const char *address,
    1602             :                            struct spdk_cpuset *cpumask, const struct spdk_json_val *params,
    1603             :                            const struct spdk_vhost_user_dev_backend *user_backend)
    1604             : {
    1605           1 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1606             : 
    1607           1 :         assert(bvdev != NULL);
    1608             : 
    1609           1 :         return bvdev->ops->create_ctrlr(vdev, cpumask, address, params, (void *)user_backend);
    1610             : }
    1611             : 
    1612             : int
    1613           1 : spdk_vhost_blk_construct(const char *name, const char *cpumask, const char *dev_name,
    1614             :                          const char *transport, const struct spdk_json_val *params)
    1615             : {
    1616           1 :         struct spdk_vhost_blk_dev *bvdev = NULL;
    1617             :         struct spdk_vhost_dev *vdev;
    1618             :         struct spdk_bdev *bdev;
    1619           1 :         const char *transport_name = VIRTIO_BLK_DEFAULT_TRANSPORT;
    1620           1 :         int ret = 0;
    1621             : 
    1622           1 :         bvdev = calloc(1, sizeof(*bvdev));
    1623           1 :         if (bvdev == NULL) {
    1624           0 :                 ret = -ENOMEM;
    1625           0 :                 goto out;
    1626             :         }
    1627             : 
    1628           1 :         if (transport != NULL) {
    1629           0 :                 transport_name = transport;
    1630             :         }
    1631             : 
    1632           1 :         bvdev->ops = virtio_blk_get_transport_ops(transport_name);
    1633           1 :         if (!bvdev->ops) {
    1634           0 :                 ret = -EINVAL;
    1635           0 :                 SPDK_ERRLOG("Transport type '%s' unavailable.\n", transport_name);
    1636           0 :                 goto out;
    1637             :         }
    1638             : 
    1639           1 :         ret = spdk_bdev_open_ext(dev_name, true, bdev_event_cb, bvdev, &bvdev->bdev_desc);
    1640           1 :         if (ret != 0) {
    1641           0 :                 SPDK_ERRLOG("%s: could not open bdev '%s', error=%d\n",
    1642             :                             name, dev_name, ret);
    1643           0 :                 goto out;
    1644             :         }
    1645           1 :         bdev = spdk_bdev_desc_get_bdev(bvdev->bdev_desc);
    1646             : 
    1647           1 :         vdev = &bvdev->vdev;
    1648           1 :         vdev->virtio_features = SPDK_VHOST_BLK_FEATURES_BASE;
    1649           1 :         vdev->disabled_features = SPDK_VHOST_BLK_DISABLED_FEATURES;
    1650           1 :         vdev->protocol_features = SPDK_VHOST_BLK_PROTOCOL_FEATURES;
    1651             : 
    1652           1 :         if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
    1653           1 :                 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_DISCARD);
    1654             :         }
    1655           1 :         if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
    1656           1 :                 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_WRITE_ZEROES);
    1657             :         }
    1658             : 
    1659           1 :         if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
    1660           1 :                 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_FLUSH);
    1661             :         }
    1662             : 
    1663           1 :         bvdev->bdev = bdev;
    1664           1 :         bvdev->readonly = false;
    1665           1 :         ret = vhost_dev_register(vdev, name, cpumask, params, &vhost_blk_device_backend,
    1666             :                                  &vhost_blk_user_device_backend, false);
    1667           1 :         if (ret != 0) {
    1668           0 :                 spdk_bdev_close(bvdev->bdev_desc);
    1669           0 :                 goto out;
    1670             :         }
    1671             : 
    1672           1 :         SPDK_INFOLOG(vhost, "%s: using bdev '%s'\n", name, dev_name);
    1673           1 : out:
    1674           1 :         if (ret != 0 && bvdev) {
    1675           0 :                 free(bvdev);
    1676             :         }
    1677           1 :         return ret;
    1678             : }
    1679             : 
    1680             : int
    1681           1 : virtio_blk_destroy_ctrlr(struct spdk_vhost_dev *vdev)
    1682             : {
    1683           1 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1684             : 
    1685           1 :         assert(bvdev != NULL);
    1686             : 
    1687           1 :         return bvdev->ops->destroy_ctrlr(vdev);
    1688             : }
    1689             : 
    1690             : static int
    1691           1 : vhost_blk_destroy(struct spdk_vhost_dev *vdev)
    1692             : {
    1693           1 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1694             :         int rc;
    1695             : 
    1696           1 :         assert(bvdev != NULL);
    1697             : 
    1698           1 :         rc = vhost_dev_unregister(&bvdev->vdev);
    1699           1 :         if (rc != 0) {
    1700           0 :                 return rc;
    1701             :         }
    1702             : 
    1703           1 :         if (bvdev->bdev_desc) {
    1704           0 :                 spdk_bdev_close(bvdev->bdev_desc);
    1705           0 :                 bvdev->bdev_desc = NULL;
    1706             :         }
    1707           1 :         bvdev->bdev = NULL;
    1708             : 
    1709           1 :         free(bvdev);
    1710           1 :         return 0;
    1711             : }
    1712             : 
    1713             : struct spdk_io_channel *
    1714           0 : vhost_blk_get_io_channel(struct spdk_vhost_dev *vdev)
    1715             : {
    1716           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1717             : 
    1718           0 :         assert(bvdev != NULL);
    1719             : 
    1720           0 :         return spdk_bdev_get_io_channel(bvdev->bdev_desc);
    1721             : }
    1722             : 
    1723             : void
    1724           0 : vhost_blk_put_io_channel(struct spdk_io_channel *ch)
    1725             : {
    1726           0 :         spdk_put_io_channel(ch);
    1727           0 : }
    1728             : 
    1729             : static struct spdk_virtio_blk_transport *
    1730           1 : vhost_user_blk_create(const struct spdk_json_val *params)
    1731             : {
    1732             :         int ret;
    1733             :         struct spdk_virtio_blk_transport *vhost_user_blk;
    1734             : 
    1735           1 :         vhost_user_blk = calloc(1, sizeof(*vhost_user_blk));
    1736           1 :         if (!vhost_user_blk) {
    1737           0 :                 return NULL;
    1738             :         }
    1739             : 
    1740           1 :         ret = vhost_user_init();
    1741           1 :         if (ret != 0) {
    1742           0 :                 free(vhost_user_blk);
    1743           0 :                 return NULL;
    1744             :         }
    1745             : 
    1746           1 :         return vhost_user_blk;
    1747             : }
    1748             : 
    1749             : static int
    1750           1 : vhost_user_blk_destroy(struct spdk_virtio_blk_transport *transport,
    1751             :                        spdk_vhost_fini_cb cb_fn)
    1752             : {
    1753           1 :         vhost_user_fini(cb_fn);
    1754           1 :         free(transport);
    1755           1 :         return 0;
    1756             : }
    1757             : 
    1758             : struct rpc_vhost_blk {
    1759             :         bool readonly;
    1760             :         bool packed_ring;
    1761             : };
    1762             : 
    1763             : static const struct spdk_json_object_decoder rpc_construct_vhost_blk[] = {
    1764             :         {"readonly", offsetof(struct rpc_vhost_blk, readonly), spdk_json_decode_bool, true},
    1765             :         {"packed_ring", offsetof(struct rpc_vhost_blk, packed_ring), spdk_json_decode_bool, true},
    1766             : };
    1767             : 
    1768             : static int
    1769           1 : vhost_user_blk_create_ctrlr(struct spdk_vhost_dev *vdev, struct spdk_cpuset *cpumask,
    1770             :                             const char *address, const struct spdk_json_val *params, void *custom_opts)
    1771             : {
    1772           1 :         struct rpc_vhost_blk req = {0};
    1773           1 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1774             : 
    1775           1 :         assert(bvdev != NULL);
    1776             : 
    1777           1 :         if (spdk_json_decode_object_relaxed(params, rpc_construct_vhost_blk,
    1778             :                                             SPDK_COUNTOF(rpc_construct_vhost_blk),
    1779             :                                             &req)) {
    1780           0 :                 SPDK_DEBUGLOG(vhost_blk, "spdk_json_decode_object failed\n");
    1781           0 :                 return -EINVAL;
    1782             :         }
    1783             : 
    1784           1 :         if (req.packed_ring) {
    1785           0 :                 vdev->virtio_features |= (uint64_t)req.packed_ring << VIRTIO_F_RING_PACKED;
    1786             :         }
    1787           1 :         if (req.readonly) {
    1788           0 :                 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_RO);
    1789           0 :                 bvdev->readonly = req.readonly;
    1790             :         }
    1791             : 
    1792           1 :         return vhost_user_dev_create(vdev, address, cpumask, custom_opts, false);
    1793             : }
    1794             : 
    1795             : static int
    1796           1 : vhost_user_blk_destroy_ctrlr(struct spdk_vhost_dev *vdev)
    1797             : {
    1798           1 :         return vhost_user_dev_unregister(vdev);
    1799             : }
    1800             : 
    1801             : static void
    1802           0 : vhost_user_blk_dump_opts(struct spdk_virtio_blk_transport *transport, struct spdk_json_write_ctx *w)
    1803             : {
    1804           0 :         assert(w != NULL);
    1805             : 
    1806           0 :         spdk_json_write_named_string(w, "name", transport->ops->name);
    1807           0 : }
    1808             : 
    1809             : static const struct spdk_virtio_blk_transport_ops vhost_user_blk = {
    1810             :         .name = "vhost_user_blk",
    1811             : 
    1812             :         .dump_opts = vhost_user_blk_dump_opts,
    1813             : 
    1814             :         .create = vhost_user_blk_create,
    1815             :         .destroy = vhost_user_blk_destroy,
    1816             : 
    1817             :         .create_ctrlr = vhost_user_blk_create_ctrlr,
    1818             :         .destroy_ctrlr = vhost_user_blk_destroy_ctrlr,
    1819             : 
    1820             :         .bdev_event = vhost_user_bdev_event_cb,
    1821             :         .set_coalescing = vhost_user_set_coalescing,
    1822             :         .get_coalescing = vhost_user_get_coalescing,
    1823             : };
    1824             : 
    1825           1 : SPDK_VIRTIO_BLK_TRANSPORT_REGISTER(vhost_user_blk, &vhost_user_blk);
    1826             : 
    1827           1 : SPDK_LOG_REGISTER_COMPONENT(vhost_blk)
    1828           1 : SPDK_LOG_REGISTER_COMPONENT(vhost_blk_data)

Generated by: LCOV version 1.15