Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2010-2016 Intel Corporation. All rights reserved.
3 : * All rights reserved.
4 : */
5 :
6 : #include "spdk/stdinc.h"
7 :
8 : #include <sys/eventfd.h>
9 :
10 : #include "spdk/string.h"
11 : #include "spdk/config.h"
12 : #include "spdk/util.h"
13 :
14 : #include "spdk_internal/virtio.h"
15 : #include "spdk_internal/vhost_user.h"
16 :
17 : /* The version of the protocol we support */
18 : #define VHOST_USER_VERSION 0x1
19 :
20 : #define VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES \
21 : ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
22 : (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))
23 :
24 : struct virtio_user_dev {
25 : int vhostfd;
26 :
27 : int callfds[SPDK_VIRTIO_MAX_VIRTQUEUES];
28 : int kickfds[SPDK_VIRTIO_MAX_VIRTQUEUES];
29 : uint32_t queue_size;
30 :
31 : uint8_t status;
32 : bool is_stopping;
33 : char path[PATH_MAX];
34 : uint64_t protocol_features;
35 : struct vring vrings[SPDK_VIRTIO_MAX_VIRTQUEUES];
36 : struct spdk_mem_map *mem_map;
37 : };
38 :
39 : static int
40 0 : vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num)
41 0 : {
42 : int r;
43 0 : struct msghdr msgh;
44 0 : struct iovec iov;
45 0 : size_t fd_size = fd_num * sizeof(int);
46 0 : char control[CMSG_SPACE(fd_size)];
47 : struct cmsghdr *cmsg;
48 :
49 0 : memset(&msgh, 0, sizeof(msgh));
50 0 : memset(control, 0, sizeof(control));
51 :
52 0 : iov.iov_base = (uint8_t *)buf;
53 0 : iov.iov_len = len;
54 :
55 0 : msgh.msg_iov = &iov;
56 0 : msgh.msg_iovlen = 1;
57 :
58 0 : if (fds && fd_num > 0) {
59 0 : msgh.msg_control = control;
60 0 : msgh.msg_controllen = sizeof(control);
61 0 : cmsg = CMSG_FIRSTHDR(&msgh);
62 0 : if (!cmsg) {
63 0 : SPDK_WARNLOG("First HDR is NULL\n");
64 0 : return -EIO;
65 : }
66 0 : cmsg->cmsg_len = CMSG_LEN(fd_size);
67 0 : cmsg->cmsg_level = SOL_SOCKET;
68 0 : cmsg->cmsg_type = SCM_RIGHTS;
69 0 : memcpy(CMSG_DATA(cmsg), fds, fd_size);
70 : } else {
71 0 : msgh.msg_control = NULL;
72 0 : msgh.msg_controllen = 0;
73 : }
74 :
75 : do {
76 0 : r = sendmsg(fd, &msgh, 0);
77 0 : } while (r < 0 && errno == EINTR);
78 :
79 0 : if (r == -1) {
80 0 : return -errno;
81 : }
82 :
83 0 : return 0;
84 : }
85 :
86 : static int
87 0 : vhost_user_read(int fd, struct vhost_user_msg *msg)
88 : {
89 0 : uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION;
90 : ssize_t ret;
91 0 : size_t sz_hdr = VHOST_USER_HDR_SIZE, sz_payload;
92 :
93 0 : ret = recv(fd, (void *)msg, sz_hdr, 0);
94 0 : if ((size_t)ret != sz_hdr) {
95 0 : SPDK_WARNLOG("Failed to recv msg hdr: %zd instead of %zu.\n",
96 : ret, sz_hdr);
97 0 : if (ret == -1) {
98 0 : return -errno;
99 : } else {
100 0 : return -EBUSY;
101 : }
102 : }
103 :
104 : /* validate msg flags */
105 0 : if (msg->flags != (valid_flags)) {
106 0 : SPDK_WARNLOG("Failed to recv msg: flags %"PRIx32" instead of %"PRIx32".\n",
107 : msg->flags, valid_flags);
108 0 : return -EIO;
109 : }
110 :
111 0 : sz_payload = msg->size;
112 :
113 0 : if (sz_payload > VHOST_USER_PAYLOAD_SIZE) {
114 0 : SPDK_WARNLOG("Received oversized msg: payload size %zu > available space %zu\n",
115 : sz_payload, VHOST_USER_PAYLOAD_SIZE);
116 0 : return -EIO;
117 : }
118 :
119 0 : if (sz_payload) {
120 0 : ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0);
121 0 : if ((size_t)ret != sz_payload) {
122 0 : SPDK_WARNLOG("Failed to recv msg payload: %zd instead of %"PRIu32".\n",
123 : ret, msg->size);
124 0 : if (ret == -1) {
125 0 : return -errno;
126 : } else {
127 0 : return -EBUSY;
128 : }
129 : }
130 : }
131 :
132 0 : return 0;
133 : }
134 :
135 : struct hugepage_file_info {
136 : uint64_t addr; /**< virtual addr */
137 : size_t size; /**< the file size */
138 : char path[PATH_MAX]; /**< path to backing file */
139 : };
140 :
141 : /* Two possible options:
142 : * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file
143 : * array. This is simple but cannot be used in secondary process because
144 : * secondary process will close and munmap that file.
145 : * 2. Match HUGEFILE_FMT to find hugepage files directly.
146 : *
147 : * We choose option 2.
148 : */
149 : static int
150 0 : get_hugepage_file_info(struct hugepage_file_info hugepages[], int max)
151 : {
152 : int idx, rc;
153 : FILE *f;
154 0 : char buf[BUFSIZ], *tmp, *tail;
155 : char *str_underline, *str_start;
156 0 : int huge_index;
157 0 : uint64_t v_start, v_end;
158 :
159 0 : f = fopen("/proc/self/maps", "r");
160 0 : if (!f) {
161 0 : SPDK_ERRLOG("cannot open /proc/self/maps\n");
162 0 : rc = -errno;
163 0 : assert(rc < 0); /* scan-build hack */
164 0 : return rc;
165 : }
166 :
167 0 : idx = 0;
168 0 : while (fgets(buf, sizeof(buf), f) != NULL) {
169 0 : if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) {
170 0 : SPDK_ERRLOG("Failed to parse address\n");
171 0 : rc = -EIO;
172 0 : goto out;
173 : }
174 :
175 0 : tmp = strchr(buf, ' ') + 1; /** skip address */
176 0 : tmp = strchr(tmp, ' ') + 1; /** skip perm */
177 0 : tmp = strchr(tmp, ' ') + 1; /** skip offset */
178 0 : tmp = strchr(tmp, ' ') + 1; /** skip dev */
179 0 : tmp = strchr(tmp, ' ') + 1; /** skip inode */
180 0 : while (*tmp == ' ') { /** skip spaces */
181 0 : tmp++;
182 : }
183 0 : tail = strrchr(tmp, '\n'); /** remove newline if exists */
184 0 : if (tail) {
185 0 : *tail = '\0';
186 : }
187 :
188 : /* Match HUGEFILE_FMT, aka "%s/%smap_%d",
189 : * which is defined in eal_filesystem.h
190 : */
191 0 : str_underline = strrchr(tmp, '_');
192 0 : if (!str_underline) {
193 0 : continue;
194 : }
195 :
196 0 : str_start = str_underline - strlen("map");
197 0 : if (str_start < tmp) {
198 0 : continue;
199 : }
200 :
201 0 : if (sscanf(str_start, "map_%d", &huge_index) != 1) {
202 0 : continue;
203 : }
204 :
205 0 : if (idx >= max) {
206 0 : SPDK_ERRLOG("Exceed maximum of %d\n", max);
207 0 : rc = -ENOSPC;
208 0 : goto out;
209 : }
210 :
211 0 : if (idx > 0 &&
212 0 : strncmp(tmp, hugepages[idx - 1].path, PATH_MAX) == 0 &&
213 0 : v_start == hugepages[idx - 1].addr + hugepages[idx - 1].size) {
214 0 : hugepages[idx - 1].size += (v_end - v_start);
215 0 : continue;
216 : }
217 :
218 0 : hugepages[idx].addr = v_start;
219 0 : hugepages[idx].size = v_end - v_start;
220 0 : snprintf(hugepages[idx].path, PATH_MAX, "%s", tmp);
221 0 : idx++;
222 : }
223 :
224 0 : rc = idx;
225 0 : out:
226 0 : fclose(f);
227 0 : return rc;
228 : }
229 :
230 : static int
231 0 : prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[])
232 : {
233 : int i, num;
234 0 : struct hugepage_file_info hugepages[VHOST_USER_MEMORY_MAX_NREGIONS];
235 :
236 0 : num = get_hugepage_file_info(hugepages, VHOST_USER_MEMORY_MAX_NREGIONS);
237 0 : if (num < 0) {
238 0 : SPDK_ERRLOG("Failed to prepare memory for vhost-user\n");
239 0 : return num;
240 : }
241 :
242 0 : for (i = 0; i < num; ++i) {
243 : /* the memory regions are unaligned */
244 0 : msg->payload.memory.regions[i].guest_phys_addr = hugepages[i].addr; /* use vaddr! */
245 0 : msg->payload.memory.regions[i].userspace_addr = hugepages[i].addr;
246 0 : msg->payload.memory.regions[i].memory_size = hugepages[i].size;
247 0 : msg->payload.memory.regions[i].flags_padding = 0;
248 0 : fds[i] = open(hugepages[i].path, O_RDWR);
249 : }
250 :
251 0 : msg->payload.memory.nregions = num;
252 0 : msg->payload.memory.padding = 0;
253 :
254 0 : return 0;
255 : }
256 :
257 : static const char *const vhost_msg_strings[VHOST_USER_MAX] = {
258 : [VHOST_USER_SET_OWNER] = "VHOST_SET_OWNER",
259 : [VHOST_USER_RESET_OWNER] = "VHOST_RESET_OWNER",
260 : [VHOST_USER_SET_FEATURES] = "VHOST_SET_FEATURES",
261 : [VHOST_USER_GET_FEATURES] = "VHOST_GET_FEATURES",
262 : [VHOST_USER_SET_VRING_CALL] = "VHOST_SET_VRING_CALL",
263 : [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
264 : [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
265 : [VHOST_USER_SET_VRING_NUM] = "VHOST_SET_VRING_NUM",
266 : [VHOST_USER_SET_VRING_BASE] = "VHOST_SET_VRING_BASE",
267 : [VHOST_USER_GET_VRING_BASE] = "VHOST_GET_VRING_BASE",
268 : [VHOST_USER_SET_VRING_ADDR] = "VHOST_SET_VRING_ADDR",
269 : [VHOST_USER_SET_VRING_KICK] = "VHOST_SET_VRING_KICK",
270 : [VHOST_USER_SET_MEM_TABLE] = "VHOST_SET_MEM_TABLE",
271 : [VHOST_USER_SET_VRING_ENABLE] = "VHOST_SET_VRING_ENABLE",
272 : [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
273 : [VHOST_USER_GET_CONFIG] = "VHOST_USER_GET_CONFIG",
274 : [VHOST_USER_SET_CONFIG] = "VHOST_USER_SET_CONFIG",
275 : };
276 :
277 : static int
278 0 : vhost_user_sock(struct virtio_user_dev *dev,
279 : enum vhost_user_request req,
280 : void *arg)
281 : {
282 0 : struct vhost_user_msg msg;
283 0 : struct vhost_vring_file *file = 0;
284 0 : int need_reply = 0;
285 0 : int fds[VHOST_USER_MEMORY_MAX_NREGIONS];
286 0 : int fd_num = 0;
287 : int i, len, rc;
288 0 : int vhostfd = dev->vhostfd;
289 :
290 0 : SPDK_DEBUGLOG(virtio_user, "sent message %d = %s\n", req, vhost_msg_strings[req]);
291 :
292 0 : msg.request = req;
293 0 : msg.flags = VHOST_USER_VERSION;
294 0 : msg.size = 0;
295 :
296 0 : switch (req) {
297 0 : case VHOST_USER_GET_FEATURES:
298 : case VHOST_USER_GET_PROTOCOL_FEATURES:
299 : case VHOST_USER_GET_QUEUE_NUM:
300 0 : need_reply = 1;
301 0 : break;
302 :
303 0 : case VHOST_USER_SET_FEATURES:
304 : case VHOST_USER_SET_LOG_BASE:
305 : case VHOST_USER_SET_PROTOCOL_FEATURES:
306 0 : msg.payload.u64 = *((__u64 *)arg);
307 0 : msg.size = sizeof(msg.payload.u64);
308 0 : break;
309 :
310 0 : case VHOST_USER_SET_OWNER:
311 : case VHOST_USER_RESET_OWNER:
312 0 : break;
313 :
314 0 : case VHOST_USER_SET_MEM_TABLE:
315 0 : rc = prepare_vhost_memory_user(&msg, fds);
316 0 : if (rc < 0) {
317 0 : return rc;
318 : }
319 0 : fd_num = msg.payload.memory.nregions;
320 0 : msg.size = sizeof(msg.payload.memory.nregions);
321 0 : msg.size += sizeof(msg.payload.memory.padding);
322 0 : msg.size += fd_num * sizeof(struct vhost_memory_region);
323 0 : break;
324 :
325 0 : case VHOST_USER_SET_LOG_FD:
326 0 : fds[fd_num++] = *((int *)arg);
327 0 : break;
328 :
329 0 : case VHOST_USER_SET_VRING_NUM:
330 : case VHOST_USER_SET_VRING_BASE:
331 : case VHOST_USER_SET_VRING_ENABLE:
332 0 : memcpy(&msg.payload.state, arg, sizeof(msg.payload.state));
333 0 : msg.size = sizeof(msg.payload.state);
334 0 : break;
335 :
336 0 : case VHOST_USER_GET_VRING_BASE:
337 0 : memcpy(&msg.payload.state, arg, sizeof(msg.payload.state));
338 0 : msg.size = sizeof(msg.payload.state);
339 0 : need_reply = 1;
340 0 : break;
341 :
342 0 : case VHOST_USER_SET_VRING_ADDR:
343 0 : memcpy(&msg.payload.addr, arg, sizeof(msg.payload.addr));
344 0 : msg.size = sizeof(msg.payload.addr);
345 0 : break;
346 :
347 0 : case VHOST_USER_SET_VRING_KICK:
348 : case VHOST_USER_SET_VRING_CALL:
349 : case VHOST_USER_SET_VRING_ERR:
350 0 : file = arg;
351 0 : msg.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
352 0 : msg.size = sizeof(msg.payload.u64);
353 0 : if (file->fd > 0) {
354 0 : fds[fd_num++] = file->fd;
355 : } else {
356 0 : msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
357 : }
358 0 : break;
359 :
360 0 : case VHOST_USER_GET_CONFIG:
361 0 : memcpy(&msg.payload.cfg, arg, sizeof(msg.payload.cfg));
362 0 : msg.size = sizeof(msg.payload.cfg);
363 0 : need_reply = 1;
364 0 : break;
365 :
366 0 : case VHOST_USER_SET_CONFIG:
367 0 : memcpy(&msg.payload.cfg, arg, sizeof(msg.payload.cfg));
368 0 : msg.size = sizeof(msg.payload.cfg);
369 0 : break;
370 :
371 0 : default:
372 0 : SPDK_ERRLOG("trying to send unknown msg\n");
373 0 : return -EINVAL;
374 : }
375 :
376 0 : len = VHOST_USER_HDR_SIZE + msg.size;
377 0 : rc = vhost_user_write(vhostfd, &msg, len, fds, fd_num);
378 0 : if (rc < 0) {
379 0 : SPDK_ERRLOG("%s failed: %s\n",
380 : vhost_msg_strings[req], spdk_strerror(-rc));
381 0 : return rc;
382 : }
383 :
384 0 : if (req == VHOST_USER_SET_MEM_TABLE)
385 0 : for (i = 0; i < fd_num; ++i) {
386 0 : close(fds[i]);
387 : }
388 :
389 0 : if (need_reply) {
390 0 : rc = vhost_user_read(vhostfd, &msg);
391 0 : if (rc < 0) {
392 0 : SPDK_WARNLOG("Received msg failed: %s\n", spdk_strerror(-rc));
393 0 : return rc;
394 : }
395 :
396 0 : if (req != msg.request) {
397 0 : SPDK_WARNLOG("Received unexpected msg type\n");
398 0 : return -EIO;
399 : }
400 :
401 0 : switch (req) {
402 0 : case VHOST_USER_GET_FEATURES:
403 : case VHOST_USER_GET_PROTOCOL_FEATURES:
404 : case VHOST_USER_GET_QUEUE_NUM:
405 0 : if (msg.size != sizeof(msg.payload.u64)) {
406 0 : SPDK_WARNLOG("Received bad msg size\n");
407 0 : return -EIO;
408 : }
409 0 : *((__u64 *)arg) = msg.payload.u64;
410 0 : break;
411 0 : case VHOST_USER_GET_VRING_BASE:
412 0 : if (msg.size != sizeof(msg.payload.state)) {
413 0 : SPDK_WARNLOG("Received bad msg size\n");
414 0 : return -EIO;
415 : }
416 0 : memcpy(arg, &msg.payload.state,
417 : sizeof(struct vhost_vring_state));
418 0 : break;
419 0 : case VHOST_USER_GET_CONFIG:
420 0 : if (msg.size != sizeof(msg.payload.cfg)) {
421 0 : SPDK_WARNLOG("Received bad msg size\n");
422 0 : return -EIO;
423 : }
424 0 : memcpy(arg, &msg.payload.cfg, sizeof(msg.payload.cfg));
425 0 : break;
426 0 : default:
427 0 : SPDK_WARNLOG("Received unexpected msg type\n");
428 0 : return -EBADMSG;
429 : }
430 : }
431 :
432 0 : return 0;
433 : }
434 :
435 : /**
436 : * Set up environment to talk with a vhost user backend.
437 : *
438 : * @return
439 : * - (-1) if fail;
440 : * - (0) if succeed.
441 : */
442 : static int
443 0 : vhost_user_setup(struct virtio_user_dev *dev)
444 : {
445 : int fd;
446 : int flag;
447 0 : struct sockaddr_un un;
448 : ssize_t rc;
449 :
450 0 : fd = socket(AF_UNIX, SOCK_STREAM, 0);
451 0 : if (fd < 0) {
452 0 : SPDK_ERRLOG("socket() error, %s\n", spdk_strerror(errno));
453 0 : return -errno;
454 : }
455 :
456 0 : flag = fcntl(fd, F_GETFD);
457 0 : if (fcntl(fd, F_SETFD, flag | FD_CLOEXEC) < 0) {
458 0 : SPDK_ERRLOG("fcntl failed, %s\n", spdk_strerror(errno));
459 : }
460 :
461 0 : memset(&un, 0, sizeof(un));
462 0 : un.sun_family = AF_UNIX;
463 0 : rc = snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
464 0 : if (rc < 0 || (size_t)rc >= sizeof(un.sun_path)) {
465 0 : SPDK_ERRLOG("socket path too long\n");
466 0 : close(fd);
467 0 : if (rc < 0) {
468 0 : return -errno;
469 : } else {
470 0 : return -EINVAL;
471 : }
472 : }
473 0 : if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
474 0 : SPDK_ERRLOG("connect error, %s\n", spdk_strerror(errno));
475 0 : close(fd);
476 0 : return -errno;
477 : }
478 :
479 0 : dev->vhostfd = fd;
480 0 : return 0;
481 : }
482 :
483 : static int
484 0 : virtio_user_create_queue(struct virtio_dev *vdev, uint32_t queue_sel)
485 : {
486 0 : struct virtio_user_dev *dev = vdev->ctx;
487 :
488 : /* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come
489 : * firstly because vhost depends on this msg to allocate virtqueue
490 : * pair.
491 : */
492 0 : struct vhost_vring_file file;
493 :
494 0 : file.index = queue_sel;
495 0 : file.fd = dev->callfds[queue_sel];
496 0 : return vhost_user_sock(dev, VHOST_USER_SET_VRING_CALL, &file);
497 : }
498 :
499 : static int
500 0 : virtio_user_set_vring_addr(struct virtio_dev *vdev, uint32_t queue_sel)
501 : {
502 0 : struct virtio_user_dev *dev = vdev->ctx;
503 0 : struct vring *vring = &dev->vrings[queue_sel];
504 0 : struct vhost_vring_addr addr = {
505 : .index = queue_sel,
506 0 : .desc_user_addr = (uint64_t)(uintptr_t)vring->desc,
507 0 : .avail_user_addr = (uint64_t)(uintptr_t)vring->avail,
508 0 : .used_user_addr = (uint64_t)(uintptr_t)vring->used,
509 : .log_guest_addr = 0,
510 : .flags = 0, /* disable log */
511 : };
512 :
513 0 : return vhost_user_sock(dev, VHOST_USER_SET_VRING_ADDR, &addr);
514 : }
515 :
516 : static int
517 0 : virtio_user_kick_queue(struct virtio_dev *vdev, uint32_t queue_sel)
518 : {
519 0 : struct virtio_user_dev *dev = vdev->ctx;
520 0 : struct vhost_vring_file file;
521 0 : struct vhost_vring_state state;
522 0 : struct vring *vring = &dev->vrings[queue_sel];
523 : int rc;
524 :
525 0 : state.index = queue_sel;
526 0 : state.num = vring->num;
527 0 : rc = vhost_user_sock(dev, VHOST_USER_SET_VRING_NUM, &state);
528 0 : if (rc < 0) {
529 0 : return rc;
530 : }
531 :
532 0 : state.index = queue_sel;
533 0 : state.num = 0; /* no reservation */
534 0 : rc = vhost_user_sock(dev, VHOST_USER_SET_VRING_BASE, &state);
535 0 : if (rc < 0) {
536 0 : return rc;
537 : }
538 :
539 0 : virtio_user_set_vring_addr(vdev, queue_sel);
540 :
541 : /* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes
542 : * lastly because vhost depends on this msg to judge if
543 : * virtio is ready.
544 : */
545 0 : file.index = queue_sel;
546 0 : file.fd = dev->kickfds[queue_sel];
547 0 : return vhost_user_sock(dev, VHOST_USER_SET_VRING_KICK, &file);
548 : }
549 :
550 : static int
551 0 : virtio_user_stop_queue(struct virtio_dev *vdev, uint32_t queue_sel)
552 : {
553 0 : struct virtio_user_dev *dev = vdev->ctx;
554 0 : struct vhost_vring_state state;
555 :
556 0 : state.index = queue_sel;
557 0 : state.num = 0;
558 :
559 0 : return vhost_user_sock(dev, VHOST_USER_GET_VRING_BASE, &state);
560 : }
561 :
562 : static int
563 0 : virtio_user_queue_setup(struct virtio_dev *vdev,
564 : int (*fn)(struct virtio_dev *, uint32_t))
565 : {
566 : uint32_t i;
567 : int rc;
568 :
569 0 : for (i = 0; i < vdev->max_queues; ++i) {
570 0 : rc = fn(vdev, i);
571 0 : if (rc < 0) {
572 0 : SPDK_ERRLOG("setup tx vq fails: %"PRIu32".\n", i);
573 0 : return rc;
574 : }
575 : }
576 :
577 0 : return 0;
578 : }
579 :
580 : static int
581 0 : virtio_user_map_notify(void *cb_ctx, struct spdk_mem_map *map,
582 : enum spdk_mem_map_notify_action action,
583 : void *vaddr, size_t size)
584 : {
585 0 : struct virtio_dev *vdev = cb_ctx;
586 0 : struct virtio_user_dev *dev = vdev->ctx;
587 0 : uint64_t features;
588 : int ret;
589 :
590 : /* We do not support dynamic memory allocation with virtio-user. If this is the
591 : * initial notification when the device is started, dev->mem_map will be NULL. If
592 : * this is the final notification when the device is stopped, dev->is_stopping will
593 : * be true. All other cases are unsupported.
594 : */
595 0 : if (dev->mem_map != NULL && !dev->is_stopping) {
596 0 : assert(false);
597 : SPDK_ERRLOG("Memory map change with active virtio_user_devs not allowed.\n");
598 : SPDK_ERRLOG("Pre-allocate memory for application using -s (mem_size) option.\n");
599 : return -1;
600 : }
601 :
602 : /* We have to resend all mappings anyway, so don't bother with any
603 : * page tracking.
604 : */
605 0 : ret = vhost_user_sock(dev, VHOST_USER_SET_MEM_TABLE, NULL);
606 0 : if (ret < 0) {
607 0 : return ret;
608 : }
609 :
610 : /* Since we might want to use that mapping straight away, we have to
611 : * make sure the guest has already processed our SET_MEM_TABLE message.
612 : * F_REPLY_ACK is just a feature and the host is not obliged to
613 : * support it, so we send a simple message that always has a response
614 : * and we wait for that response. Messages are always processed in order.
615 : */
616 0 : return vhost_user_sock(dev, VHOST_USER_GET_FEATURES, &features);
617 : }
618 :
619 : static int
620 0 : virtio_user_register_mem(struct virtio_dev *vdev)
621 : {
622 0 : struct virtio_user_dev *dev = vdev->ctx;
623 0 : const struct spdk_mem_map_ops virtio_user_map_ops = {
624 : .notify_cb = virtio_user_map_notify,
625 : .are_contiguous = NULL
626 : };
627 :
628 0 : dev->mem_map = spdk_mem_map_alloc(0, &virtio_user_map_ops, vdev);
629 0 : if (dev->mem_map == NULL) {
630 0 : SPDK_ERRLOG("spdk_mem_map_alloc() failed\n");
631 0 : return -1;
632 : }
633 :
634 0 : return 0;
635 : }
636 :
637 : static void
638 0 : virtio_user_unregister_mem(struct virtio_dev *vdev)
639 : {
640 0 : struct virtio_user_dev *dev = vdev->ctx;
641 :
642 0 : dev->is_stopping = true;
643 0 : spdk_mem_map_free(&dev->mem_map);
644 0 : }
645 :
646 : static int
647 0 : virtio_user_start_device(struct virtio_dev *vdev)
648 : {
649 0 : struct virtio_user_dev *dev = vdev->ctx;
650 0 : uint64_t host_max_queues;
651 : int ret;
652 :
653 0 : if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) == 0 &&
654 0 : vdev->max_queues > 1 + vdev->fixed_queues_num) {
655 0 : SPDK_WARNLOG("%s: requested %"PRIu16" request queues, but the "
656 : "host doesn't support VHOST_USER_PROTOCOL_F_MQ. "
657 : "Only one request queue will be used.\n",
658 : vdev->name, vdev->max_queues - vdev->fixed_queues_num);
659 0 : vdev->max_queues = 1 + vdev->fixed_queues_num;
660 : }
661 :
662 : /* negotiate the number of I/O queues. */
663 0 : ret = vhost_user_sock(dev, VHOST_USER_GET_QUEUE_NUM, &host_max_queues);
664 0 : if (ret < 0) {
665 0 : return ret;
666 : }
667 :
668 0 : if (vdev->max_queues > host_max_queues + vdev->fixed_queues_num) {
669 0 : SPDK_WARNLOG("%s: requested %"PRIu16" request queues"
670 : "but only %"PRIu64" available\n",
671 : vdev->name, vdev->max_queues - vdev->fixed_queues_num,
672 : host_max_queues);
673 0 : vdev->max_queues = host_max_queues;
674 : }
675 :
676 : /* tell vhost to create queues */
677 0 : ret = virtio_user_queue_setup(vdev, virtio_user_create_queue);
678 0 : if (ret < 0) {
679 0 : return ret;
680 : }
681 :
682 0 : ret = virtio_user_register_mem(vdev);
683 0 : if (ret < 0) {
684 0 : return ret;
685 : }
686 :
687 0 : return virtio_user_queue_setup(vdev, virtio_user_kick_queue);
688 : }
689 :
690 : static int
691 0 : virtio_user_stop_device(struct virtio_dev *vdev)
692 : {
693 : int ret;
694 :
695 0 : ret = virtio_user_queue_setup(vdev, virtio_user_stop_queue);
696 : /* a queue might fail to stop for various reasons, e.g. socket
697 : * connection going down, but this mustn't prevent us from freeing
698 : * the mem map.
699 : */
700 0 : virtio_user_unregister_mem(vdev);
701 0 : return ret;
702 : }
703 :
704 : static int
705 0 : virtio_user_dev_setup(struct virtio_dev *vdev)
706 : {
707 0 : struct virtio_user_dev *dev = vdev->ctx;
708 : uint16_t i;
709 :
710 0 : dev->vhostfd = -1;
711 :
712 0 : for (i = 0; i < SPDK_VIRTIO_MAX_VIRTQUEUES; ++i) {
713 0 : dev->callfds[i] = -1;
714 0 : dev->kickfds[i] = -1;
715 : }
716 :
717 0 : return vhost_user_setup(dev);
718 : }
719 :
720 : static int
721 0 : virtio_user_read_dev_config(struct virtio_dev *vdev, size_t offset,
722 : void *dst, int length)
723 : {
724 0 : struct virtio_user_dev *dev = vdev->ctx;
725 0 : struct vhost_user_config cfg = {0};
726 : int rc;
727 :
728 0 : if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)) == 0) {
729 0 : return -ENOTSUP;
730 : }
731 :
732 0 : cfg.offset = 0;
733 0 : cfg.size = VHOST_USER_MAX_CONFIG_SIZE;
734 :
735 0 : rc = vhost_user_sock(dev, VHOST_USER_GET_CONFIG, &cfg);
736 0 : if (rc < 0) {
737 0 : SPDK_ERRLOG("get_config failed: %s\n", spdk_strerror(-rc));
738 0 : return rc;
739 : }
740 :
741 0 : memcpy(dst, cfg.region + offset, length);
742 0 : return 0;
743 : }
744 :
745 : static int
746 0 : virtio_user_write_dev_config(struct virtio_dev *vdev, size_t offset,
747 : const void *src, int length)
748 : {
749 0 : struct virtio_user_dev *dev = vdev->ctx;
750 0 : struct vhost_user_config cfg = {0};
751 : int rc;
752 :
753 0 : if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)) == 0) {
754 0 : return -ENOTSUP;
755 : }
756 :
757 0 : cfg.offset = offset;
758 0 : cfg.size = length;
759 0 : memcpy(cfg.region, src, length);
760 :
761 0 : rc = vhost_user_sock(dev, VHOST_USER_SET_CONFIG, &cfg);
762 0 : if (rc < 0) {
763 0 : SPDK_ERRLOG("set_config failed: %s\n", spdk_strerror(-rc));
764 0 : return rc;
765 : }
766 :
767 0 : return 0;
768 : }
769 :
770 : static void
771 0 : virtio_user_set_status(struct virtio_dev *vdev, uint8_t status)
772 : {
773 0 : struct virtio_user_dev *dev = vdev->ctx;
774 0 : int rc = 0;
775 :
776 0 : if ((dev->status & VIRTIO_CONFIG_S_NEEDS_RESET) &&
777 : status != VIRTIO_CONFIG_S_RESET) {
778 0 : rc = -1;
779 0 : } else if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
780 0 : rc = virtio_user_start_device(vdev);
781 0 : } else if (status == VIRTIO_CONFIG_S_RESET &&
782 0 : (dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
783 0 : rc = virtio_user_stop_device(vdev);
784 : }
785 :
786 0 : if (rc != 0) {
787 0 : dev->status |= VIRTIO_CONFIG_S_NEEDS_RESET;
788 : } else {
789 0 : dev->status = status;
790 : }
791 0 : }
792 :
793 : static uint8_t
794 0 : virtio_user_get_status(struct virtio_dev *vdev)
795 : {
796 0 : struct virtio_user_dev *dev = vdev->ctx;
797 :
798 0 : return dev->status;
799 : }
800 :
801 : static uint64_t
802 0 : virtio_user_get_features(struct virtio_dev *vdev)
803 : {
804 0 : struct virtio_user_dev *dev = vdev->ctx;
805 0 : uint64_t features;
806 : int rc;
807 :
808 0 : rc = vhost_user_sock(dev, VHOST_USER_GET_FEATURES, &features);
809 0 : if (rc < 0) {
810 0 : SPDK_ERRLOG("get_features failed: %s\n", spdk_strerror(-rc));
811 0 : return 0;
812 : }
813 :
814 0 : return features;
815 : }
816 :
817 : static int
818 0 : virtio_user_set_features(struct virtio_dev *vdev, uint64_t features)
819 : {
820 0 : struct virtio_user_dev *dev = vdev->ctx;
821 0 : uint64_t protocol_features;
822 : int ret;
823 :
824 0 : ret = vhost_user_sock(dev, VHOST_USER_SET_FEATURES, &features);
825 0 : if (ret < 0) {
826 0 : return ret;
827 : }
828 :
829 0 : vdev->negotiated_features = features;
830 0 : vdev->modern = virtio_dev_has_feature(vdev, VIRTIO_F_VERSION_1);
831 :
832 0 : if (!virtio_dev_has_feature(vdev, VHOST_USER_F_PROTOCOL_FEATURES)) {
833 : /* nothing else to do */
834 0 : return 0;
835 : }
836 :
837 0 : ret = vhost_user_sock(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &protocol_features);
838 0 : if (ret < 0) {
839 0 : return ret;
840 : }
841 :
842 0 : protocol_features &= VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES;
843 0 : ret = vhost_user_sock(dev, VHOST_USER_SET_PROTOCOL_FEATURES, &protocol_features);
844 0 : if (ret < 0) {
845 0 : return ret;
846 : }
847 :
848 0 : dev->protocol_features = protocol_features;
849 0 : return 0;
850 : }
851 :
852 : static uint16_t
853 0 : virtio_user_get_queue_size(struct virtio_dev *vdev, uint16_t queue_id)
854 : {
855 0 : struct virtio_user_dev *dev = vdev->ctx;
856 :
857 : /* Currently each queue has same queue size */
858 0 : return dev->queue_size;
859 : }
860 :
861 : static int
862 0 : virtio_user_setup_queue(struct virtio_dev *vdev, struct virtqueue *vq)
863 : {
864 0 : struct virtio_user_dev *dev = vdev->ctx;
865 0 : struct vhost_vring_state state;
866 0 : uint16_t queue_idx = vq->vq_queue_index;
867 : void *queue_mem;
868 : uint64_t desc_addr, avail_addr, used_addr;
869 : int callfd, kickfd, rc;
870 :
871 0 : if (dev->callfds[queue_idx] != -1 || dev->kickfds[queue_idx] != -1) {
872 0 : SPDK_ERRLOG("queue %"PRIu16" already exists\n", queue_idx);
873 0 : return -EEXIST;
874 : }
875 :
876 : /* May use invalid flag, but some backend uses kickfd and
877 : * callfd as criteria to judge if dev is alive. so finally we
878 : * use real event_fd.
879 : */
880 0 : callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
881 0 : if (callfd < 0) {
882 0 : SPDK_ERRLOG("callfd error, %s\n", spdk_strerror(errno));
883 0 : return -errno;
884 : }
885 :
886 0 : kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
887 0 : if (kickfd < 0) {
888 0 : SPDK_ERRLOG("kickfd error, %s\n", spdk_strerror(errno));
889 0 : close(callfd);
890 0 : return -errno;
891 : }
892 :
893 0 : queue_mem = spdk_zmalloc(vq->vq_ring_size, VIRTIO_PCI_VRING_ALIGN, NULL,
894 : SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
895 0 : if (queue_mem == NULL) {
896 0 : close(kickfd);
897 0 : close(callfd);
898 0 : return -ENOMEM;
899 : }
900 :
901 0 : vq->vq_ring_mem = SPDK_VTOPHYS_ERROR;
902 0 : vq->vq_ring_virt_mem = queue_mem;
903 :
904 0 : state.index = vq->vq_queue_index;
905 0 : state.num = 1;
906 :
907 0 : if (virtio_dev_has_feature(vdev, VHOST_USER_F_PROTOCOL_FEATURES)) {
908 0 : rc = vhost_user_sock(dev, VHOST_USER_SET_VRING_ENABLE, &state);
909 0 : if (rc < 0) {
910 0 : SPDK_ERRLOG("failed to send VHOST_USER_SET_VRING_ENABLE: %s\n",
911 : spdk_strerror(-rc));
912 0 : close(kickfd);
913 0 : close(callfd);
914 0 : spdk_free(queue_mem);
915 0 : return -rc;
916 : }
917 : }
918 :
919 0 : dev->callfds[queue_idx] = callfd;
920 0 : dev->kickfds[queue_idx] = kickfd;
921 :
922 0 : desc_addr = (uintptr_t)vq->vq_ring_virt_mem;
923 0 : avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
924 0 : used_addr = SPDK_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
925 : ring[vq->vq_nentries]),
926 : VIRTIO_PCI_VRING_ALIGN);
927 :
928 0 : dev->vrings[queue_idx].num = vq->vq_nentries;
929 0 : dev->vrings[queue_idx].desc = (void *)(uintptr_t)desc_addr;
930 0 : dev->vrings[queue_idx].avail = (void *)(uintptr_t)avail_addr;
931 0 : dev->vrings[queue_idx].used = (void *)(uintptr_t)used_addr;
932 :
933 0 : return 0;
934 : }
935 :
936 : static void
937 0 : virtio_user_del_queue(struct virtio_dev *vdev, struct virtqueue *vq)
938 : {
939 : /* For legacy devices, write 0 to VIRTIO_PCI_QUEUE_PFN port, QEMU
940 : * correspondingly stops the ioeventfds, and reset the status of
941 : * the device.
942 : * For modern devices, set queue desc, avail, used in PCI bar to 0,
943 : * not see any more behavior in QEMU.
944 : *
945 : * Here we just care about what information to deliver to vhost-user.
946 : * So we just close ioeventfd for now.
947 : */
948 0 : struct virtio_user_dev *dev = vdev->ctx;
949 :
950 0 : close(dev->callfds[vq->vq_queue_index]);
951 0 : close(dev->kickfds[vq->vq_queue_index]);
952 0 : dev->callfds[vq->vq_queue_index] = -1;
953 0 : dev->kickfds[vq->vq_queue_index] = -1;
954 :
955 0 : spdk_free(vq->vq_ring_virt_mem);
956 0 : }
957 :
958 : static void
959 0 : virtio_user_notify_queue(struct virtio_dev *vdev, struct virtqueue *vq)
960 : {
961 0 : uint64_t buf = 1;
962 0 : struct virtio_user_dev *dev = vdev->ctx;
963 :
964 0 : if (write(dev->kickfds[vq->vq_queue_index], &buf, sizeof(buf)) < 0) {
965 0 : SPDK_ERRLOG("failed to kick backend: %s.\n", spdk_strerror(errno));
966 : }
967 0 : }
968 :
969 : static void
970 0 : virtio_user_destroy(struct virtio_dev *vdev)
971 : {
972 0 : struct virtio_user_dev *dev = vdev->ctx;
973 :
974 0 : if (dev) {
975 0 : close(dev->vhostfd);
976 0 : free(dev);
977 : }
978 0 : }
979 :
980 : static void
981 0 : virtio_user_dump_json_info(struct virtio_dev *vdev, struct spdk_json_write_ctx *w)
982 : {
983 0 : struct virtio_user_dev *dev = vdev->ctx;
984 :
985 0 : spdk_json_write_named_string(w, "type", "user");
986 0 : spdk_json_write_named_string(w, "socket", dev->path);
987 0 : }
988 :
989 : static void
990 0 : virtio_user_write_json_config(struct virtio_dev *vdev, struct spdk_json_write_ctx *w)
991 : {
992 0 : struct virtio_user_dev *dev = vdev->ctx;
993 :
994 0 : spdk_json_write_named_string(w, "trtype", "user");
995 0 : spdk_json_write_named_string(w, "traddr", dev->path);
996 0 : spdk_json_write_named_uint32(w, "vq_count", vdev->max_queues - vdev->fixed_queues_num);
997 0 : spdk_json_write_named_uint32(w, "vq_size", virtio_dev_backend_ops(vdev)->get_queue_size(vdev, 0));
998 0 : }
999 :
1000 : static const struct virtio_dev_ops virtio_user_ops = {
1001 : .read_dev_cfg = virtio_user_read_dev_config,
1002 : .write_dev_cfg = virtio_user_write_dev_config,
1003 : .get_status = virtio_user_get_status,
1004 : .set_status = virtio_user_set_status,
1005 : .get_features = virtio_user_get_features,
1006 : .set_features = virtio_user_set_features,
1007 : .destruct_dev = virtio_user_destroy,
1008 : .get_queue_size = virtio_user_get_queue_size,
1009 : .setup_queue = virtio_user_setup_queue,
1010 : .del_queue = virtio_user_del_queue,
1011 : .notify_queue = virtio_user_notify_queue,
1012 : .dump_json_info = virtio_user_dump_json_info,
1013 : .write_json_config = virtio_user_write_json_config,
1014 : };
1015 :
1016 : int
1017 0 : virtio_user_dev_init(struct virtio_dev *vdev, const char *name, const char *path,
1018 : uint32_t queue_size)
1019 : {
1020 : struct virtio_user_dev *dev;
1021 : int rc;
1022 :
1023 0 : if (name == NULL) {
1024 0 : SPDK_ERRLOG("No name given for controller: %s\n", path);
1025 0 : return -EINVAL;
1026 : }
1027 :
1028 0 : dev = calloc(1, sizeof(*dev));
1029 0 : if (dev == NULL) {
1030 0 : return -ENOMEM;
1031 : }
1032 :
1033 0 : rc = virtio_dev_construct(vdev, name, &virtio_user_ops, dev);
1034 0 : if (rc != 0) {
1035 0 : SPDK_ERRLOG("Failed to init device: %s\n", path);
1036 0 : free(dev);
1037 0 : return rc;
1038 : }
1039 :
1040 0 : vdev->is_hw = 0;
1041 :
1042 0 : snprintf(dev->path, PATH_MAX, "%s", path);
1043 0 : dev->queue_size = queue_size;
1044 :
1045 0 : rc = virtio_user_dev_setup(vdev);
1046 0 : if (rc < 0) {
1047 0 : SPDK_ERRLOG("backend set up fails\n");
1048 0 : goto err;
1049 : }
1050 :
1051 0 : rc = vhost_user_sock(dev, VHOST_USER_SET_OWNER, NULL);
1052 0 : if (rc < 0) {
1053 0 : SPDK_ERRLOG("set_owner fails: %s\n", spdk_strerror(-rc));
1054 0 : goto err;
1055 : }
1056 :
1057 0 : return 0;
1058 :
1059 0 : err:
1060 0 : virtio_dev_destruct(vdev);
1061 0 : return rc;
1062 : }
1063 0 : SPDK_LOG_REGISTER_COMPONENT(virtio_user)
|