Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2019 Intel Corporation.
3 : * All rights reserved.
4 : * Copyright (c) 2021 Mellanox Technologies LTD. All rights reserved.
5 : */
6 :
7 : #include "spdk/stdinc.h"
8 :
9 : #include "spdk/env.h"
10 : #include "spdk/likely.h"
11 : #include "spdk/string.h"
12 : #include "spdk/util.h"
13 : #include "spdk/memory.h"
14 : #include "spdk/barrier.h"
15 : #include "spdk/vhost.h"
16 : #include "vhost_internal.h"
17 : #include <rte_version.h>
18 :
19 : #include "spdk_internal/vhost_user.h"
20 :
21 : /* Path to folder where character device will be created. Can be set by user. */
22 : static char g_vhost_user_dev_dirname[PATH_MAX] = "";
23 :
24 : static struct spdk_thread *g_vhost_user_init_thread;
25 :
26 : struct vhost_session_fn_ctx {
27 : /** Device pointer obtained before enqueueing the event */
28 : struct spdk_vhost_dev *vdev;
29 :
30 : /** ID of the session to send event to. */
31 : uint32_t vsession_id;
32 :
33 : /** User provided function to be executed on session's thread. */
34 : spdk_vhost_session_fn cb_fn;
35 :
36 : /**
37 : * User provided function to be called on the init thread
38 : * after iterating through all sessions.
39 : */
40 : spdk_vhost_dev_fn cpl_fn;
41 :
42 : /** Custom user context */
43 : void *user_ctx;
44 : };
45 :
46 : static int vhost_user_wait_for_session_stop(struct spdk_vhost_session *vsession,
47 : unsigned timeout_sec, const char *errmsg);
48 :
49 : void *
50 0 : vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len)
51 : {
52 : void *vva;
53 0 : uint64_t newlen;
54 :
55 0 : newlen = len;
56 0 : vva = (void *)rte_vhost_va_from_guest_pa(vsession->mem, addr, &newlen);
57 0 : if (newlen != len) {
58 0 : return NULL;
59 : }
60 :
61 0 : return vva;
62 :
63 : }
64 :
65 : static void
66 0 : vhost_log_req_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue,
67 : uint16_t req_id)
68 : {
69 0 : struct vring_desc *desc, *desc_table;
70 0 : uint32_t desc_table_size;
71 : int rc;
72 :
73 0 : if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
74 0 : return;
75 : }
76 :
77 0 : rc = vhost_vq_get_desc(vsession, virtqueue, req_id, &desc, &desc_table, &desc_table_size);
78 0 : if (spdk_unlikely(rc != 0)) {
79 0 : SPDK_ERRLOG("Can't log used ring descriptors!\n");
80 0 : return;
81 : }
82 :
83 : do {
84 0 : if (vhost_vring_desc_is_wr(desc)) {
85 : /* To be honest, only pages really touched should be logged, but
86 : * doing so would require tracking those changes in each backed.
87 : * Also backend most likely will touch all/most of those pages so
88 : * for lets assume we touched all pages passed to as writeable buffers. */
89 0 : rte_vhost_log_write(vsession->vid, desc->addr, desc->len);
90 : }
91 0 : vhost_vring_desc_get_next(&desc, desc_table, desc_table_size);
92 0 : } while (desc);
93 : }
94 :
95 : static void
96 7 : vhost_log_used_vring_elem(struct spdk_vhost_session *vsession,
97 : struct spdk_vhost_virtqueue *virtqueue,
98 : uint16_t idx)
99 : {
100 : uint64_t offset, len;
101 :
102 7 : if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
103 7 : return;
104 : }
105 :
106 0 : if (spdk_unlikely(virtqueue->packed.packed_ring)) {
107 0 : offset = idx * sizeof(struct vring_packed_desc);
108 0 : len = sizeof(struct vring_packed_desc);
109 : } else {
110 0 : offset = offsetof(struct vring_used, ring[idx]);
111 0 : len = sizeof(virtqueue->vring.used->ring[idx]);
112 : }
113 :
114 0 : rte_vhost_log_used_vring(vsession->vid, virtqueue->vring_idx, offset, len);
115 : }
116 :
117 : static void
118 0 : vhost_log_used_vring_idx(struct spdk_vhost_session *vsession,
119 : struct spdk_vhost_virtqueue *virtqueue)
120 : {
121 : uint64_t offset, len;
122 : uint16_t vq_idx;
123 :
124 0 : if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
125 0 : return;
126 : }
127 :
128 0 : offset = offsetof(struct vring_used, idx);
129 0 : len = sizeof(virtqueue->vring.used->idx);
130 0 : vq_idx = virtqueue - vsession->virtqueue;
131 :
132 0 : rte_vhost_log_used_vring(vsession->vid, vq_idx, offset, len);
133 : }
134 :
135 : /*
136 : * Get available requests from avail ring.
137 : */
138 : uint16_t
139 4 : vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *virtqueue, uint16_t *reqs,
140 : uint16_t reqs_len)
141 : {
142 4 : struct rte_vhost_vring *vring = &virtqueue->vring;
143 4 : struct vring_avail *avail = vring->avail;
144 4 : uint16_t size_mask = vring->size - 1;
145 4 : uint16_t last_idx = virtqueue->last_avail_idx, avail_idx = avail->idx;
146 : uint16_t count, i;
147 : int rc;
148 4 : uint64_t u64_value;
149 :
150 4 : spdk_smp_rmb();
151 :
152 4 : if (virtqueue->vsession && spdk_unlikely(spdk_interrupt_mode_is_enabled())) {
153 : /* Read to clear vring's kickfd */
154 0 : rc = read(vring->kickfd, &u64_value, sizeof(u64_value));
155 0 : if (rc < 0) {
156 0 : SPDK_ERRLOG("failed to acknowledge kickfd: %s.\n", spdk_strerror(errno));
157 0 : return -errno;
158 : }
159 : }
160 :
161 4 : count = avail_idx - last_idx;
162 4 : if (spdk_likely(count == 0)) {
163 0 : return 0;
164 : }
165 :
166 4 : if (spdk_unlikely(count > vring->size)) {
167 : /* TODO: the queue is unrecoverably broken and should be marked so.
168 : * For now we will fail silently and report there are no new avail entries.
169 : */
170 1 : return 0;
171 : }
172 :
173 3 : count = spdk_min(count, reqs_len);
174 :
175 3 : virtqueue->last_avail_idx += count;
176 : /* Check whether there are unprocessed reqs in vq, then kick vq manually */
177 3 : if (virtqueue->vsession && spdk_unlikely(spdk_interrupt_mode_is_enabled())) {
178 : /* If avail_idx is larger than virtqueue's last_avail_idx, then there is unprocessed reqs.
179 : * avail_idx should get updated here from memory, in case of race condition with guest.
180 : */
181 0 : avail_idx = * (volatile uint16_t *) &avail->idx;
182 0 : if (avail_idx > virtqueue->last_avail_idx) {
183 : /* Write to notify vring's kickfd */
184 0 : rc = write(vring->kickfd, &u64_value, sizeof(u64_value));
185 0 : if (rc < 0) {
186 0 : SPDK_ERRLOG("failed to kick vring: %s.\n", spdk_strerror(errno));
187 0 : return -errno;
188 : }
189 : }
190 : }
191 :
192 19 : for (i = 0; i < count; i++) {
193 16 : reqs[i] = vring->avail->ring[(last_idx + i) & size_mask];
194 : }
195 :
196 3 : SPDK_DEBUGLOG(vhost_ring,
197 : "AVAIL: last_idx=%"PRIu16" avail_idx=%"PRIu16" count=%"PRIu16"\n",
198 : last_idx, avail_idx, count);
199 :
200 3 : return count;
201 : }
202 :
203 : static bool
204 0 : vhost_vring_desc_is_indirect(struct vring_desc *cur_desc)
205 : {
206 0 : return !!(cur_desc->flags & VRING_DESC_F_INDIRECT);
207 : }
208 :
209 : static bool
210 7 : vhost_vring_packed_desc_is_indirect(struct vring_packed_desc *cur_desc)
211 : {
212 7 : return (cur_desc->flags & VRING_DESC_F_INDIRECT) != 0;
213 : }
214 :
215 : static bool
216 0 : vhost_inflight_packed_desc_is_indirect(spdk_vhost_inflight_desc *cur_desc)
217 : {
218 0 : return (cur_desc->flags & VRING_DESC_F_INDIRECT) != 0;
219 : }
220 :
221 : int
222 0 : vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue,
223 : uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table,
224 : uint32_t *desc_table_size)
225 : {
226 0 : if (spdk_unlikely(req_idx >= virtqueue->vring.size)) {
227 0 : return -1;
228 : }
229 :
230 0 : *desc = &virtqueue->vring.desc[req_idx];
231 :
232 0 : if (vhost_vring_desc_is_indirect(*desc)) {
233 0 : *desc_table_size = (*desc)->len / sizeof(**desc);
234 0 : *desc_table = vhost_gpa_to_vva(vsession, (*desc)->addr,
235 0 : sizeof(**desc) * *desc_table_size);
236 0 : *desc = *desc_table;
237 0 : if (*desc == NULL) {
238 0 : return -1;
239 : }
240 :
241 0 : return 0;
242 : }
243 :
244 0 : *desc_table = virtqueue->vring.desc;
245 0 : *desc_table_size = virtqueue->vring.size;
246 :
247 0 : return 0;
248 : }
249 :
250 : static bool
251 0 : vhost_packed_desc_indirect_to_desc_table(struct spdk_vhost_session *vsession,
252 : uint64_t addr, uint32_t len,
253 : struct vring_packed_desc **desc_table,
254 : uint32_t *desc_table_size)
255 : {
256 0 : *desc_table_size = len / sizeof(struct vring_packed_desc);
257 :
258 0 : *desc_table = vhost_gpa_to_vva(vsession, addr, len);
259 0 : if (spdk_unlikely(*desc_table == NULL)) {
260 0 : return false;
261 : }
262 :
263 0 : return true;
264 : }
265 :
266 : int
267 0 : vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession,
268 : struct spdk_vhost_virtqueue *virtqueue,
269 : uint16_t req_idx, struct vring_packed_desc **desc,
270 : struct vring_packed_desc **desc_table, uint32_t *desc_table_size)
271 : {
272 0 : *desc = &virtqueue->vring.desc_packed[req_idx];
273 :
274 : /* In packed ring when the desc is non-indirect we get next desc
275 : * by judging (desc->flag & VRING_DESC_F_NEXT) != 0. When the desc
276 : * is indirect we get next desc by idx and desc_table_size. It's
277 : * different from split ring.
278 : */
279 0 : if (vhost_vring_packed_desc_is_indirect(*desc)) {
280 0 : if (!vhost_packed_desc_indirect_to_desc_table(vsession, (*desc)->addr, (*desc)->len,
281 : desc_table, desc_table_size)) {
282 0 : return -1;
283 : }
284 :
285 0 : *desc = *desc_table;
286 : } else {
287 0 : *desc_table = NULL;
288 0 : *desc_table_size = 0;
289 : }
290 :
291 0 : return 0;
292 : }
293 :
294 : int
295 0 : vhost_inflight_queue_get_desc(struct spdk_vhost_session *vsession,
296 : spdk_vhost_inflight_desc *desc_array,
297 : uint16_t req_idx, spdk_vhost_inflight_desc **desc,
298 : struct vring_packed_desc **desc_table, uint32_t *desc_table_size)
299 : {
300 0 : *desc = &desc_array[req_idx];
301 :
302 0 : if (vhost_inflight_packed_desc_is_indirect(*desc)) {
303 0 : if (!vhost_packed_desc_indirect_to_desc_table(vsession, (*desc)->addr, (*desc)->len,
304 : desc_table, desc_table_size)) {
305 0 : return -1;
306 : }
307 :
308 : /* This desc is the inflight desc not the packed desc.
309 : * When set the F_INDIRECT the table entry should be the packed desc
310 : * so set the inflight desc NULL.
311 : */
312 0 : *desc = NULL;
313 : } else {
314 : /* When not set the F_INDIRECT means there is no packed desc table */
315 0 : *desc_table = NULL;
316 0 : *desc_table_size = 0;
317 : }
318 :
319 0 : return 0;
320 : }
321 :
322 : int
323 0 : vhost_vq_used_signal(struct spdk_vhost_session *vsession,
324 : struct spdk_vhost_virtqueue *virtqueue)
325 : {
326 0 : if (virtqueue->used_req_cnt == 0) {
327 0 : return 0;
328 : }
329 :
330 0 : SPDK_DEBUGLOG(vhost_ring,
331 : "Queue %td - USED RING: sending IRQ: last used %"PRIu16"\n",
332 : virtqueue - vsession->virtqueue, virtqueue->last_used_idx);
333 :
334 : #if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
335 : if (rte_vhost_vring_call(vsession->vid, virtqueue->vring_idx) == 0) {
336 : #else
337 0 : if (rte_vhost_vring_call_nonblock(vsession->vid, virtqueue->vring_idx) == 0) {
338 : #endif
339 : /* interrupt signalled */
340 0 : virtqueue->req_cnt += virtqueue->used_req_cnt;
341 0 : virtqueue->used_req_cnt = 0;
342 0 : return 1;
343 : } else {
344 : /* interrupt not signalled */
345 0 : return 0;
346 : }
347 : }
348 :
349 : static void
350 0 : session_vq_io_stats_update(struct spdk_vhost_session *vsession,
351 : struct spdk_vhost_virtqueue *virtqueue, uint64_t now)
352 : {
353 0 : uint32_t irq_delay_base = vsession->coalescing_delay_time_base;
354 0 : uint32_t io_threshold = vsession->coalescing_io_rate_threshold;
355 : int32_t irq_delay;
356 : uint32_t req_cnt;
357 :
358 0 : req_cnt = virtqueue->req_cnt + virtqueue->used_req_cnt;
359 0 : if (req_cnt <= io_threshold) {
360 0 : return;
361 : }
362 :
363 0 : irq_delay = (irq_delay_base * (req_cnt - io_threshold)) / io_threshold;
364 0 : virtqueue->irq_delay_time = (uint32_t) spdk_max(0, irq_delay);
365 :
366 0 : virtqueue->req_cnt = 0;
367 0 : virtqueue->next_event_time = now;
368 : }
369 :
370 : static void
371 0 : check_session_vq_io_stats(struct spdk_vhost_session *vsession,
372 : struct spdk_vhost_virtqueue *virtqueue, uint64_t now)
373 : {
374 0 : if (now < vsession->next_stats_check_time) {
375 0 : return;
376 : }
377 :
378 0 : vsession->next_stats_check_time = now + vsession->stats_check_interval;
379 0 : session_vq_io_stats_update(vsession, virtqueue, now);
380 : }
381 :
382 : static inline bool
383 0 : vhost_vq_event_is_suppressed(struct spdk_vhost_virtqueue *vq)
384 : {
385 0 : spdk_smp_mb();
386 :
387 0 : if (spdk_unlikely(vq->packed.packed_ring)) {
388 0 : if (vq->vring.driver_event->flags & VRING_PACKED_EVENT_FLAG_DISABLE) {
389 0 : return true;
390 : }
391 : } else {
392 0 : if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
393 0 : return true;
394 : }
395 : }
396 :
397 0 : return false;
398 : }
399 :
400 : void
401 0 : vhost_session_vq_used_signal(struct spdk_vhost_virtqueue *virtqueue)
402 : {
403 0 : struct spdk_vhost_session *vsession = virtqueue->vsession;
404 : uint64_t now;
405 :
406 0 : if (vsession->coalescing_delay_time_base == 0) {
407 0 : if (virtqueue->vring.desc == NULL) {
408 0 : return;
409 : }
410 :
411 0 : if (vhost_vq_event_is_suppressed(virtqueue)) {
412 0 : return;
413 : }
414 :
415 0 : vhost_vq_used_signal(vsession, virtqueue);
416 : } else {
417 0 : now = spdk_get_ticks();
418 0 : check_session_vq_io_stats(vsession, virtqueue, now);
419 :
420 : /* No need for event right now */
421 0 : if (now < virtqueue->next_event_time) {
422 0 : return;
423 : }
424 :
425 0 : if (vhost_vq_event_is_suppressed(virtqueue)) {
426 0 : return;
427 : }
428 :
429 0 : if (!vhost_vq_used_signal(vsession, virtqueue)) {
430 0 : return;
431 : }
432 :
433 : /* Syscall is quite long so update time */
434 0 : now = spdk_get_ticks();
435 0 : virtqueue->next_event_time = now + virtqueue->irq_delay_time;
436 : }
437 : }
438 :
439 : /*
440 : * Enqueue id and len to used ring.
441 : */
442 : void
443 0 : vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession,
444 : struct spdk_vhost_virtqueue *virtqueue,
445 : uint16_t id, uint32_t len)
446 : {
447 0 : struct rte_vhost_vring *vring = &virtqueue->vring;
448 0 : struct vring_used *used = vring->used;
449 0 : uint16_t last_idx = virtqueue->last_used_idx & (vring->size - 1);
450 0 : uint16_t vq_idx = virtqueue->vring_idx;
451 :
452 0 : SPDK_DEBUGLOG(vhost_ring,
453 : "Queue %td - USED RING: last_idx=%"PRIu16" req id=%"PRIu16" len=%"PRIu32"\n",
454 : virtqueue - vsession->virtqueue, virtqueue->last_used_idx, id, len);
455 :
456 0 : vhost_log_req_desc(vsession, virtqueue, id);
457 :
458 0 : virtqueue->last_used_idx++;
459 0 : used->ring[last_idx].id = id;
460 0 : used->ring[last_idx].len = len;
461 :
462 : /* Ensure the used ring is updated before we log it or increment used->idx. */
463 0 : spdk_smp_wmb();
464 :
465 0 : rte_vhost_set_last_inflight_io_split(vsession->vid, vq_idx, id);
466 :
467 0 : vhost_log_used_vring_elem(vsession, virtqueue, last_idx);
468 0 : * (volatile uint16_t *) &used->idx = virtqueue->last_used_idx;
469 0 : vhost_log_used_vring_idx(vsession, virtqueue);
470 :
471 0 : rte_vhost_clr_inflight_desc_split(vsession->vid, vq_idx, virtqueue->last_used_idx, id);
472 :
473 0 : virtqueue->used_req_cnt++;
474 :
475 0 : if (spdk_unlikely(spdk_interrupt_mode_is_enabled())) {
476 0 : if (virtqueue->vring.desc == NULL || vhost_vq_event_is_suppressed(virtqueue)) {
477 0 : return;
478 : }
479 :
480 0 : vhost_vq_used_signal(vsession, virtqueue);
481 : }
482 : }
483 :
484 : void
485 7 : vhost_vq_packed_ring_enqueue(struct spdk_vhost_session *vsession,
486 : struct spdk_vhost_virtqueue *virtqueue,
487 : uint16_t num_descs, uint16_t buffer_id,
488 : uint32_t length, uint16_t inflight_head)
489 : {
490 7 : struct vring_packed_desc *desc = &virtqueue->vring.desc_packed[virtqueue->last_used_idx];
491 : bool used, avail;
492 :
493 7 : SPDK_DEBUGLOG(vhost_ring,
494 : "Queue %td - RING: buffer_id=%"PRIu16"\n",
495 : virtqueue - vsession->virtqueue, buffer_id);
496 :
497 : /* When the descriptor is used, two flags in descriptor
498 : * avail flag and used flag are set to equal
499 : * and used flag value == used_wrap_counter.
500 : */
501 7 : used = !!(desc->flags & VRING_DESC_F_USED);
502 7 : avail = !!(desc->flags & VRING_DESC_F_AVAIL);
503 7 : if (spdk_unlikely(used == virtqueue->packed.used_phase && used == avail)) {
504 0 : SPDK_ERRLOG("descriptor has been used before\n");
505 0 : return;
506 : }
507 :
508 : /* In used desc addr is unused and len specifies the buffer length
509 : * that has been written to by the device.
510 : */
511 7 : desc->addr = 0;
512 7 : desc->len = length;
513 :
514 : /* This bit specifies whether any data has been written by the device */
515 7 : if (length != 0) {
516 7 : desc->flags |= VRING_DESC_F_WRITE;
517 : }
518 :
519 : /* Buffer ID is included in the last descriptor in the list.
520 : * The driver needs to keep track of the size of the list corresponding
521 : * to each buffer ID.
522 : */
523 7 : desc->id = buffer_id;
524 :
525 : /* A device MUST NOT make the descriptor used before buffer_id is
526 : * written to the descriptor.
527 : */
528 7 : spdk_smp_wmb();
529 :
530 7 : rte_vhost_set_last_inflight_io_packed(vsession->vid, virtqueue->vring_idx, inflight_head);
531 : /* To mark a desc as used, the device sets the F_USED bit in flags to match
532 : * the internal Device ring wrap counter. It also sets the F_AVAIL bit to
533 : * match the same value.
534 : */
535 7 : if (virtqueue->packed.used_phase) {
536 4 : desc->flags |= VRING_DESC_F_AVAIL_USED;
537 : } else {
538 3 : desc->flags &= ~VRING_DESC_F_AVAIL_USED;
539 : }
540 7 : rte_vhost_clr_inflight_desc_packed(vsession->vid, virtqueue->vring_idx, inflight_head);
541 :
542 7 : vhost_log_used_vring_elem(vsession, virtqueue, virtqueue->last_used_idx);
543 7 : virtqueue->last_used_idx += num_descs;
544 7 : if (virtqueue->last_used_idx >= virtqueue->vring.size) {
545 1 : virtqueue->last_used_idx -= virtqueue->vring.size;
546 1 : virtqueue->packed.used_phase = !virtqueue->packed.used_phase;
547 : }
548 :
549 7 : virtqueue->used_req_cnt++;
550 : }
551 :
552 : bool
553 12 : vhost_vq_packed_ring_is_avail(struct spdk_vhost_virtqueue *virtqueue)
554 : {
555 12 : uint16_t flags = virtqueue->vring.desc_packed[virtqueue->last_avail_idx].flags;
556 :
557 : /* To mark a desc as available, the driver sets the F_AVAIL bit in flags
558 : * to match the internal avail wrap counter. It also sets the F_USED bit to
559 : * match the inverse value but it's not mandatory.
560 : */
561 12 : return (!!(flags & VRING_DESC_F_AVAIL) == virtqueue->packed.avail_phase);
562 : }
563 :
564 : bool
565 0 : vhost_vring_packed_desc_is_wr(struct vring_packed_desc *cur_desc)
566 : {
567 0 : return (cur_desc->flags & VRING_DESC_F_WRITE) != 0;
568 : }
569 :
570 : bool
571 0 : vhost_vring_inflight_desc_is_wr(spdk_vhost_inflight_desc *cur_desc)
572 : {
573 0 : return (cur_desc->flags & VRING_DESC_F_WRITE) != 0;
574 : }
575 :
576 : int
577 0 : vhost_vring_packed_desc_get_next(struct vring_packed_desc **desc, uint16_t *req_idx,
578 : struct spdk_vhost_virtqueue *vq,
579 : struct vring_packed_desc *desc_table,
580 : uint32_t desc_table_size)
581 : {
582 0 : if (desc_table != NULL) {
583 : /* When the desc_table isn't NULL means it's indirect and we get the next
584 : * desc by req_idx and desc_table_size. The return value is NULL means
585 : * we reach the last desc of this request.
586 : */
587 0 : (*req_idx)++;
588 0 : if (*req_idx < desc_table_size) {
589 0 : *desc = &desc_table[*req_idx];
590 : } else {
591 0 : *desc = NULL;
592 : }
593 : } else {
594 : /* When the desc_table is NULL means it's non-indirect and we get the next
595 : * desc by req_idx and F_NEXT in flags. The return value is NULL means
596 : * we reach the last desc of this request. When return new desc
597 : * we update the req_idx too.
598 : */
599 0 : if (((*desc)->flags & VRING_DESC_F_NEXT) == 0) {
600 0 : *desc = NULL;
601 0 : return 0;
602 : }
603 :
604 0 : *req_idx = (*req_idx + 1) % vq->vring.size;
605 0 : *desc = &vq->vring.desc_packed[*req_idx];
606 : }
607 :
608 0 : return 0;
609 : }
610 :
611 : static int
612 6 : vhost_vring_desc_payload_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
613 : uint16_t *iov_index, uintptr_t payload, uint64_t remaining)
614 : {
615 : uintptr_t vva;
616 6 : uint64_t len;
617 :
618 : do {
619 7 : if (*iov_index >= SPDK_VHOST_IOVS_MAX) {
620 1 : SPDK_ERRLOG("SPDK_VHOST_IOVS_MAX(%d) reached\n", SPDK_VHOST_IOVS_MAX);
621 1 : return -1;
622 : }
623 6 : len = remaining;
624 6 : vva = (uintptr_t)rte_vhost_va_from_guest_pa(vsession->mem, payload, &len);
625 6 : if (vva == 0 || len == 0) {
626 0 : SPDK_ERRLOG("gpa_to_vva(%p) == NULL\n", (void *)payload);
627 0 : return -1;
628 : }
629 6 : iov[*iov_index].iov_base = (void *)vva;
630 6 : iov[*iov_index].iov_len = len;
631 6 : remaining -= len;
632 6 : payload += len;
633 6 : (*iov_index)++;
634 6 : } while (remaining);
635 :
636 5 : return 0;
637 : }
638 :
639 : int
640 0 : vhost_vring_packed_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
641 : uint16_t *iov_index, const struct vring_packed_desc *desc)
642 : {
643 0 : return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index,
644 0 : desc->addr, desc->len);
645 : }
646 :
647 : int
648 0 : vhost_vring_inflight_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
649 : uint16_t *iov_index, const spdk_vhost_inflight_desc *desc)
650 : {
651 0 : return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index,
652 0 : desc->addr, desc->len);
653 : }
654 :
655 : /* 1, Traverse the desc chain to get the buffer_id and return buffer_id as task_idx.
656 : * 2, Update the vq->last_avail_idx to point next available desc chain.
657 : * 3, Update the avail_wrap_counter if last_avail_idx overturn.
658 : */
659 : uint16_t
660 7 : vhost_vring_packed_desc_get_buffer_id(struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
661 : uint16_t *num_descs)
662 : {
663 : struct vring_packed_desc *desc;
664 7 : uint16_t desc_head = req_idx;
665 :
666 7 : *num_descs = 1;
667 :
668 7 : desc = &vq->vring.desc_packed[req_idx];
669 7 : if (!vhost_vring_packed_desc_is_indirect(desc)) {
670 7 : while ((desc->flags & VRING_DESC_F_NEXT) != 0) {
671 0 : req_idx = (req_idx + 1) % vq->vring.size;
672 0 : desc = &vq->vring.desc_packed[req_idx];
673 0 : (*num_descs)++;
674 : }
675 : }
676 :
677 : /* Queue Size doesn't have to be a power of 2
678 : * Device maintains last_avail_idx so we can make sure
679 : * the value is valid(0 ~ vring.size - 1)
680 : */
681 7 : vq->last_avail_idx = (req_idx + 1) % vq->vring.size;
682 7 : if (vq->last_avail_idx < desc_head) {
683 1 : vq->packed.avail_phase = !vq->packed.avail_phase;
684 : }
685 :
686 7 : return desc->id;
687 : }
688 :
689 : int
690 0 : vhost_vring_desc_get_next(struct vring_desc **desc,
691 : struct vring_desc *desc_table, uint32_t desc_table_size)
692 : {
693 0 : struct vring_desc *old_desc = *desc;
694 : uint16_t next_idx;
695 :
696 0 : if ((old_desc->flags & VRING_DESC_F_NEXT) == 0) {
697 0 : *desc = NULL;
698 0 : return 0;
699 : }
700 :
701 0 : next_idx = old_desc->next;
702 0 : if (spdk_unlikely(next_idx >= desc_table_size)) {
703 0 : *desc = NULL;
704 0 : return -1;
705 : }
706 :
707 0 : *desc = &desc_table[next_idx];
708 0 : return 0;
709 : }
710 :
711 : int
712 6 : vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
713 : uint16_t *iov_index, const struct vring_desc *desc)
714 : {
715 18 : return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index,
716 6 : desc->addr, desc->len);
717 : }
718 :
719 : static inline void
720 0 : vhost_session_mem_region_calc(uint64_t *previous_start, uint64_t *start, uint64_t *end,
721 : uint64_t *len, struct rte_vhost_mem_region *region)
722 : {
723 0 : *start = FLOOR_2MB(region->mmap_addr);
724 0 : *end = CEIL_2MB(region->mmap_addr + region->mmap_size);
725 0 : if (*start == *previous_start) {
726 0 : *start += (size_t) VALUE_2MB;
727 : }
728 0 : *previous_start = *start;
729 0 : *len = *end - *start;
730 0 : }
731 :
732 : void
733 0 : vhost_session_mem_register(struct rte_vhost_memory *mem)
734 : {
735 0 : uint64_t start, end, len;
736 : uint32_t i;
737 0 : uint64_t previous_start = UINT64_MAX;
738 :
739 :
740 0 : for (i = 0; i < mem->nregions; i++) {
741 0 : vhost_session_mem_region_calc(&previous_start, &start, &end, &len, &mem->regions[i]);
742 0 : SPDK_INFOLOG(vhost, "Registering VM memory for vtophys translation - 0x%jx len:0x%jx\n",
743 : start, len);
744 :
745 0 : if (spdk_mem_register((void *)start, len) != 0) {
746 0 : SPDK_WARNLOG("Failed to register memory region %"PRIu32". Future vtophys translation might fail.\n",
747 : i);
748 0 : continue;
749 : }
750 : }
751 0 : }
752 :
753 : void
754 0 : vhost_session_mem_unregister(struct rte_vhost_memory *mem)
755 : {
756 0 : uint64_t start, end, len;
757 : uint32_t i;
758 0 : uint64_t previous_start = UINT64_MAX;
759 :
760 0 : for (i = 0; i < mem->nregions; i++) {
761 0 : vhost_session_mem_region_calc(&previous_start, &start, &end, &len, &mem->regions[i]);
762 0 : if (spdk_vtophys((void *) start, NULL) == SPDK_VTOPHYS_ERROR) {
763 0 : continue; /* region has not been registered */
764 : }
765 :
766 0 : if (spdk_mem_unregister((void *)start, len) != 0) {
767 0 : assert(false);
768 : }
769 : }
770 0 : }
771 :
772 : static bool
773 0 : vhost_memory_changed(struct rte_vhost_memory *new,
774 : struct rte_vhost_memory *old)
775 : {
776 : uint32_t i;
777 :
778 0 : if (new->nregions != old->nregions) {
779 0 : return true;
780 : }
781 :
782 0 : for (i = 0; i < new->nregions; ++i) {
783 0 : struct rte_vhost_mem_region *new_r = &new->regions[i];
784 0 : struct rte_vhost_mem_region *old_r = &old->regions[i];
785 :
786 0 : if (new_r->guest_phys_addr != old_r->guest_phys_addr) {
787 0 : return true;
788 : }
789 0 : if (new_r->size != old_r->size) {
790 0 : return true;
791 : }
792 0 : if (new_r->guest_user_addr != old_r->guest_user_addr) {
793 0 : return true;
794 : }
795 0 : if (new_r->mmap_addr != old_r->mmap_addr) {
796 0 : return true;
797 : }
798 0 : if (new_r->fd != old_r->fd) {
799 0 : return true;
800 : }
801 : }
802 :
803 0 : return false;
804 : }
805 :
806 : static int
807 0 : vhost_register_memtable_if_required(struct spdk_vhost_session *vsession, int vid)
808 : {
809 0 : struct rte_vhost_memory *new_mem;
810 :
811 0 : if (vhost_get_mem_table(vid, &new_mem) != 0) {
812 0 : SPDK_ERRLOG("vhost device %d: Failed to get guest memory table\n", vid);
813 0 : return -1;
814 : }
815 :
816 0 : if (vsession->mem == NULL) {
817 0 : SPDK_INFOLOG(vhost, "Start to set memtable\n");
818 0 : vsession->mem = new_mem;
819 0 : vhost_session_mem_register(vsession->mem);
820 0 : return 0;
821 : }
822 :
823 0 : if (vhost_memory_changed(new_mem, vsession->mem)) {
824 0 : SPDK_INFOLOG(vhost, "Memtable is changed\n");
825 0 : vhost_session_mem_unregister(vsession->mem);
826 0 : free(vsession->mem);
827 :
828 0 : vsession->mem = new_mem;
829 0 : vhost_session_mem_register(vsession->mem);
830 0 : return 0;
831 :
832 : }
833 :
834 0 : SPDK_INFOLOG(vhost, "Memtable is unchanged\n");
835 0 : free(new_mem);
836 0 : return 0;
837 : }
838 :
839 : static int
840 0 : _stop_session(struct spdk_vhost_session *vsession)
841 : {
842 : struct spdk_vhost_virtqueue *q;
843 : int rc;
844 : uint16_t i;
845 :
846 0 : rc = vhost_user_wait_for_session_stop(vsession, SPDK_VHOST_SESSION_STOP_TIMEOUT_IN_SEC,
847 : "stop session");
848 0 : if (rc != 0) {
849 0 : SPDK_ERRLOG("Couldn't stop device with vid %d.\n", vsession->vid);
850 0 : return rc;
851 : }
852 :
853 0 : for (i = 0; i < vsession->max_queues; i++) {
854 0 : q = &vsession->virtqueue[i];
855 :
856 : /* vring.desc and vring.desc_packed are in a union struct
857 : * so q->vring.desc can replace q->vring.desc_packed.
858 : */
859 0 : if (q->vring.desc == NULL) {
860 0 : continue;
861 : }
862 :
863 : /* Packed virtqueues support up to 2^15 entries each
864 : * so left one bit can be used as wrap counter.
865 : */
866 0 : if (q->packed.packed_ring) {
867 0 : q->last_avail_idx = q->last_avail_idx |
868 0 : ((uint16_t)q->packed.avail_phase << 15);
869 0 : q->last_used_idx = q->last_used_idx |
870 0 : ((uint16_t)q->packed.used_phase << 15);
871 : }
872 :
873 0 : rte_vhost_set_vring_base(vsession->vid, i, q->last_avail_idx, q->last_used_idx);
874 0 : q->vring.desc = NULL;
875 : }
876 0 : vsession->max_queues = 0;
877 :
878 0 : return 0;
879 : }
880 :
881 : static int
882 0 : new_connection(int vid)
883 : {
884 : struct spdk_vhost_dev *vdev;
885 : struct spdk_vhost_user_dev *user_dev;
886 0 : struct spdk_vhost_session *vsession;
887 : size_t dev_dirname_len;
888 0 : char ifname[PATH_MAX];
889 : char *ctrlr_name;
890 :
891 0 : if (rte_vhost_get_ifname(vid, ifname, PATH_MAX) < 0) {
892 0 : SPDK_ERRLOG("Couldn't get a valid ifname for device with vid %d\n", vid);
893 0 : return -1;
894 : }
895 :
896 0 : ctrlr_name = &ifname[0];
897 0 : dev_dirname_len = strlen(g_vhost_user_dev_dirname);
898 0 : if (strncmp(ctrlr_name, g_vhost_user_dev_dirname, dev_dirname_len) == 0) {
899 0 : ctrlr_name += dev_dirname_len;
900 : }
901 :
902 0 : spdk_vhost_lock();
903 0 : vdev = spdk_vhost_dev_find(ctrlr_name);
904 0 : if (vdev == NULL) {
905 0 : SPDK_ERRLOG("Couldn't find device with vid %d to create connection for.\n", vid);
906 0 : spdk_vhost_unlock();
907 0 : return -1;
908 : }
909 0 : spdk_vhost_unlock();
910 :
911 0 : user_dev = to_user_dev(vdev);
912 0 : pthread_mutex_lock(&user_dev->lock);
913 0 : if (user_dev->registered == false) {
914 0 : SPDK_ERRLOG("Device %s is unregistered\n", ctrlr_name);
915 0 : pthread_mutex_unlock(&user_dev->lock);
916 0 : return -1;
917 : }
918 :
919 : /* We expect sessions inside user_dev->vsessions to be sorted in ascending
920 : * order in regard of vsession->id. For now we always set id = vsessions_num++
921 : * and append each session to the very end of the vsessions list.
922 : * This is required for vhost_user_dev_foreach_session() to work.
923 : */
924 0 : if (user_dev->vsessions_num == UINT_MAX) {
925 0 : pthread_mutex_unlock(&user_dev->lock);
926 0 : assert(false);
927 : return -EINVAL;
928 : }
929 :
930 0 : if (posix_memalign((void **)&vsession, SPDK_CACHE_LINE_SIZE, sizeof(*vsession) +
931 0 : user_dev->user_backend->session_ctx_size)) {
932 0 : SPDK_ERRLOG("vsession alloc failed\n");
933 0 : pthread_mutex_unlock(&user_dev->lock);
934 0 : return -1;
935 : }
936 0 : memset(vsession, 0, sizeof(*vsession) + user_dev->user_backend->session_ctx_size);
937 :
938 0 : vsession->vdev = vdev;
939 0 : vsession->vid = vid;
940 0 : vsession->id = user_dev->vsessions_num++;
941 0 : vsession->name = spdk_sprintf_alloc("%ss%u", vdev->name, vsession->vid);
942 0 : if (vsession->name == NULL) {
943 0 : SPDK_ERRLOG("vsession alloc failed\n");
944 0 : free(vsession);
945 0 : pthread_mutex_unlock(&user_dev->lock);
946 0 : return -1;
947 : }
948 :
949 0 : if (sem_init(&vsession->dpdk_sem, 0, 0) != 0) {
950 0 : SPDK_ERRLOG("Failed to initialize semaphore for rte_vhost pthread.\n");
951 0 : free(vsession->name);
952 0 : free(vsession);
953 0 : pthread_mutex_unlock(&user_dev->lock);
954 0 : return -1;
955 : }
956 :
957 0 : vsession->started = false;
958 0 : vsession->starting = false;
959 0 : vsession->next_stats_check_time = 0;
960 0 : vsession->stats_check_interval = SPDK_VHOST_STATS_CHECK_INTERVAL_MS *
961 0 : spdk_get_ticks_hz() / 1000UL;
962 0 : TAILQ_INSERT_TAIL(&user_dev->vsessions, vsession, tailq);
963 0 : vhost_session_install_rte_compat_hooks(vsession);
964 0 : pthread_mutex_unlock(&user_dev->lock);
965 :
966 0 : return 0;
967 : }
968 :
969 : static void
970 0 : vhost_user_session_start(void *arg1)
971 : {
972 0 : struct spdk_vhost_session *vsession = arg1;
973 0 : struct spdk_vhost_dev *vdev = vsession->vdev;
974 0 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vsession->vdev);
975 : const struct spdk_vhost_user_dev_backend *backend;
976 : int rc;
977 :
978 0 : SPDK_INFOLOG(vhost, "Starting new session for device %s with vid %d\n", vdev->name, vsession->vid);
979 0 : pthread_mutex_lock(&user_dev->lock);
980 0 : vsession->starting = false;
981 0 : backend = user_dev->user_backend;
982 0 : rc = backend->start_session(vdev, vsession, NULL);
983 0 : if (rc == 0) {
984 0 : vsession->started = true;
985 : }
986 0 : pthread_mutex_unlock(&user_dev->lock);
987 0 : }
988 :
989 : static int
990 0 : set_device_vq_callfd(struct spdk_vhost_session *vsession, uint16_t qid)
991 : {
992 : struct spdk_vhost_virtqueue *q;
993 :
994 0 : if (qid >= SPDK_VHOST_MAX_VQUEUES) {
995 0 : return -EINVAL;
996 : }
997 :
998 0 : q = &vsession->virtqueue[qid];
999 : /* vq isn't enabled yet */
1000 0 : if (q->vring_idx != qid) {
1001 0 : return 0;
1002 : }
1003 :
1004 : /* vring.desc and vring.desc_packed are in a union struct
1005 : * so q->vring.desc can replace q->vring.desc_packed.
1006 : */
1007 0 : if (q->vring.desc == NULL || q->vring.size == 0) {
1008 0 : return 0;
1009 : }
1010 :
1011 : /*
1012 : * Not sure right now but this look like some kind of QEMU bug and guest IO
1013 : * might be frozed without kicking all queues after live-migration. This look like
1014 : * the previous vhost instance failed to effectively deliver all interrupts before
1015 : * the GET_VRING_BASE message. This shouldn't harm guest since spurious interrupts
1016 : * should be ignored by guest virtio driver.
1017 : *
1018 : * Tested on QEMU 2.10.91 and 2.11.50.
1019 : *
1020 : * Make sure a successful call of
1021 : * `rte_vhost_vring_call` will happen
1022 : * after starting the device.
1023 : */
1024 0 : q->used_req_cnt += 1;
1025 :
1026 0 : return 0;
1027 : }
1028 :
1029 : static int
1030 0 : enable_device_vq(struct spdk_vhost_session *vsession, uint16_t qid)
1031 : {
1032 : struct spdk_vhost_virtqueue *q;
1033 : bool packed_ring;
1034 : const struct spdk_vhost_user_dev_backend *backend;
1035 : int rc;
1036 :
1037 0 : if (qid >= SPDK_VHOST_MAX_VQUEUES) {
1038 0 : return -EINVAL;
1039 : }
1040 :
1041 0 : q = &vsession->virtqueue[qid];
1042 0 : memset(q, 0, sizeof(*q));
1043 0 : packed_ring = ((vsession->negotiated_features & (1ULL << VIRTIO_F_RING_PACKED)) != 0);
1044 :
1045 0 : q->vsession = vsession;
1046 0 : q->vring_idx = -1;
1047 0 : if (rte_vhost_get_vhost_vring(vsession->vid, qid, &q->vring)) {
1048 0 : return 0;
1049 : }
1050 0 : q->vring_idx = qid;
1051 0 : rte_vhost_get_vhost_ring_inflight(vsession->vid, qid, &q->vring_inflight);
1052 :
1053 : /* vring.desc and vring.desc_packed are in a union struct
1054 : * so q->vring.desc can replace q->vring.desc_packed.
1055 : */
1056 0 : if (q->vring.desc == NULL || q->vring.size == 0) {
1057 0 : return 0;
1058 : }
1059 :
1060 0 : if (rte_vhost_get_vring_base(vsession->vid, qid, &q->last_avail_idx, &q->last_used_idx)) {
1061 0 : q->vring.desc = NULL;
1062 0 : return 0;
1063 : }
1064 :
1065 0 : backend = to_user_dev(vsession->vdev)->user_backend;
1066 0 : rc = backend->alloc_vq_tasks(vsession, qid);
1067 0 : if (rc) {
1068 0 : return rc;
1069 : }
1070 :
1071 : /*
1072 : * This shouldn't harm guest since spurious interrupts should be ignored by
1073 : * guest virtio driver.
1074 : *
1075 : * Make sure a successful call of `rte_vhost_vring_call` will happen after
1076 : * restarting the device.
1077 : */
1078 0 : if (vsession->needs_restart) {
1079 0 : q->used_req_cnt += 1;
1080 : }
1081 :
1082 0 : if (packed_ring) {
1083 : /* Since packed ring flag is already negotiated between SPDK and VM, VM doesn't
1084 : * restore `last_avail_idx` and `last_used_idx` for packed ring, so use the
1085 : * inflight mem to restore the `last_avail_idx` and `last_used_idx`.
1086 : */
1087 0 : rte_vhost_get_vring_base_from_inflight(vsession->vid, qid, &q->last_avail_idx,
1088 : &q->last_used_idx);
1089 :
1090 : /* Packed virtqueues support up to 2^15 entries each
1091 : * so left one bit can be used as wrap counter.
1092 : */
1093 0 : q->packed.avail_phase = q->last_avail_idx >> 15;
1094 0 : q->last_avail_idx = q->last_avail_idx & 0x7FFF;
1095 0 : q->packed.used_phase = q->last_used_idx >> 15;
1096 0 : q->last_used_idx = q->last_used_idx & 0x7FFF;
1097 :
1098 0 : if (!spdk_interrupt_mode_is_enabled()) {
1099 : /* Disable I/O submission notifications, we'll be polling. */
1100 0 : q->vring.device_event->flags = VRING_PACKED_EVENT_FLAG_DISABLE;
1101 : } else {
1102 : /* Enable I/O submission notifications, we'll be interrupting. */
1103 0 : q->vring.device_event->flags = VRING_PACKED_EVENT_FLAG_ENABLE;
1104 : }
1105 : } else {
1106 0 : if (!spdk_interrupt_mode_is_enabled()) {
1107 : /* Disable I/O submission notifications, we'll be polling. */
1108 0 : q->vring.used->flags = VRING_USED_F_NO_NOTIFY;
1109 : } else {
1110 : /* Enable I/O submission notifications, we'll be interrupting. */
1111 0 : q->vring.used->flags = 0;
1112 : }
1113 : }
1114 :
1115 0 : if (backend->enable_vq) {
1116 0 : rc = backend->enable_vq(vsession, q);
1117 0 : if (rc) {
1118 0 : return rc;
1119 : }
1120 : }
1121 :
1122 0 : q->packed.packed_ring = packed_ring;
1123 0 : vsession->max_queues = spdk_max(vsession->max_queues, qid + 1);
1124 :
1125 0 : return 0;
1126 : }
1127 :
1128 : static int
1129 0 : start_device(int vid)
1130 : {
1131 : struct spdk_vhost_dev *vdev;
1132 : struct spdk_vhost_session *vsession;
1133 : struct spdk_vhost_user_dev *user_dev;
1134 0 : int rc = 0;
1135 :
1136 0 : vsession = vhost_session_find_by_vid(vid);
1137 0 : if (vsession == NULL) {
1138 0 : SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid);
1139 0 : return -1;
1140 : }
1141 0 : vdev = vsession->vdev;
1142 0 : user_dev = to_user_dev(vdev);
1143 :
1144 0 : pthread_mutex_lock(&user_dev->lock);
1145 0 : if (vsession->started) {
1146 : /* already started, nothing to do */
1147 0 : goto out;
1148 : }
1149 :
1150 0 : if (!vsession->mem) {
1151 0 : rc = -1;
1152 0 : SPDK_ERRLOG("Session %s doesn't set memory table yet\n", vsession->name);
1153 0 : goto out;
1154 : }
1155 :
1156 0 : vsession->starting = true;
1157 0 : SPDK_INFOLOG(vhost, "Session %s is scheduled to start\n", vsession->name);
1158 0 : vhost_user_session_set_coalescing(vdev, vsession, NULL);
1159 0 : spdk_thread_send_msg(vdev->thread, vhost_user_session_start, vsession);
1160 :
1161 0 : out:
1162 0 : pthread_mutex_unlock(&user_dev->lock);
1163 0 : return rc;
1164 : }
1165 :
1166 : static void
1167 0 : stop_device(int vid)
1168 : {
1169 : struct spdk_vhost_session *vsession;
1170 : struct spdk_vhost_user_dev *user_dev;
1171 :
1172 0 : vsession = vhost_session_find_by_vid(vid);
1173 0 : if (vsession == NULL) {
1174 0 : SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid);
1175 0 : return;
1176 : }
1177 0 : user_dev = to_user_dev(vsession->vdev);
1178 :
1179 0 : pthread_mutex_lock(&user_dev->lock);
1180 0 : if (!vsession->started && !vsession->starting) {
1181 0 : pthread_mutex_unlock(&user_dev->lock);
1182 : /* already stopped, nothing to do */
1183 0 : return;
1184 : }
1185 :
1186 0 : _stop_session(vsession);
1187 0 : pthread_mutex_unlock(&user_dev->lock);
1188 : }
1189 :
1190 : static void
1191 0 : destroy_connection(int vid)
1192 : {
1193 : struct spdk_vhost_session *vsession;
1194 : struct spdk_vhost_user_dev *user_dev;
1195 :
1196 0 : vsession = vhost_session_find_by_vid(vid);
1197 0 : if (vsession == NULL) {
1198 0 : SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid);
1199 0 : return;
1200 : }
1201 0 : user_dev = to_user_dev(vsession->vdev);
1202 :
1203 0 : pthread_mutex_lock(&user_dev->lock);
1204 0 : if (vsession->started || vsession->starting) {
1205 0 : if (_stop_session(vsession) != 0) {
1206 0 : pthread_mutex_unlock(&user_dev->lock);
1207 0 : return;
1208 : }
1209 : }
1210 :
1211 0 : if (vsession->mem) {
1212 0 : vhost_session_mem_unregister(vsession->mem);
1213 0 : free(vsession->mem);
1214 : }
1215 :
1216 0 : TAILQ_REMOVE(&to_user_dev(vsession->vdev)->vsessions, vsession, tailq);
1217 0 : sem_destroy(&vsession->dpdk_sem);
1218 0 : free(vsession->name);
1219 0 : free(vsession);
1220 0 : pthread_mutex_unlock(&user_dev->lock);
1221 : }
1222 :
1223 : static const struct rte_vhost_device_ops g_spdk_vhost_ops = {
1224 : .new_device = start_device,
1225 : .destroy_device = stop_device,
1226 : .new_connection = new_connection,
1227 : .destroy_connection = destroy_connection,
1228 : };
1229 :
1230 : static struct spdk_vhost_session *
1231 0 : vhost_session_find_by_id(struct spdk_vhost_dev *vdev, unsigned id)
1232 : {
1233 : struct spdk_vhost_session *vsession;
1234 :
1235 0 : TAILQ_FOREACH(vsession, &to_user_dev(vdev)->vsessions, tailq) {
1236 0 : if (vsession->id == id) {
1237 0 : return vsession;
1238 : }
1239 : }
1240 :
1241 0 : return NULL;
1242 : }
1243 :
1244 : struct spdk_vhost_session *
1245 2 : vhost_session_find_by_vid(int vid)
1246 : {
1247 : struct spdk_vhost_dev *vdev;
1248 : struct spdk_vhost_session *vsession;
1249 : struct spdk_vhost_user_dev *user_dev;
1250 :
1251 2 : spdk_vhost_lock();
1252 3 : for (vdev = spdk_vhost_dev_next(NULL); vdev != NULL;
1253 1 : vdev = spdk_vhost_dev_next(vdev)) {
1254 2 : user_dev = to_user_dev(vdev);
1255 :
1256 2 : pthread_mutex_lock(&user_dev->lock);
1257 3 : TAILQ_FOREACH(vsession, &user_dev->vsessions, tailq) {
1258 2 : if (vsession->vid == vid) {
1259 1 : pthread_mutex_unlock(&user_dev->lock);
1260 1 : spdk_vhost_unlock();
1261 1 : return vsession;
1262 : }
1263 : }
1264 1 : pthread_mutex_unlock(&user_dev->lock);
1265 : }
1266 1 : spdk_vhost_unlock();
1267 :
1268 1 : return NULL;
1269 : }
1270 :
1271 : static void
1272 0 : vhost_session_wait_for_semaphore(struct spdk_vhost_session *vsession, int timeout_sec,
1273 : const char *errmsg)
1274 : {
1275 0 : struct timespec timeout;
1276 : int rc;
1277 :
1278 0 : clock_gettime(CLOCK_REALTIME, &timeout);
1279 0 : timeout.tv_sec += timeout_sec;
1280 0 : rc = sem_timedwait(&vsession->dpdk_sem, &timeout);
1281 0 : if (rc != 0) {
1282 0 : SPDK_ERRLOG("Timeout waiting for event: %s.\n", errmsg);
1283 0 : sem_wait(&vsession->dpdk_sem);
1284 : }
1285 0 : }
1286 :
1287 : void
1288 0 : vhost_user_session_stop_done(struct spdk_vhost_session *vsession, int response)
1289 : {
1290 0 : if (response == 0) {
1291 0 : vsession->started = false;
1292 : }
1293 :
1294 0 : vsession->dpdk_response = response;
1295 0 : sem_post(&vsession->dpdk_sem);
1296 0 : }
1297 :
1298 : static void
1299 0 : vhost_user_session_stop_event(void *arg1)
1300 : {
1301 0 : struct vhost_session_fn_ctx *ctx = arg1;
1302 0 : struct spdk_vhost_dev *vdev = ctx->vdev;
1303 0 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
1304 : struct spdk_vhost_session *vsession;
1305 :
1306 0 : if (pthread_mutex_trylock(&user_dev->lock) != 0) {
1307 0 : spdk_thread_send_msg(spdk_get_thread(), vhost_user_session_stop_event, arg1);
1308 0 : return;
1309 : }
1310 :
1311 0 : vsession = vhost_session_find_by_id(vdev, ctx->vsession_id);
1312 0 : user_dev->user_backend->stop_session(vdev, vsession, NULL);
1313 0 : pthread_mutex_unlock(&user_dev->lock);
1314 : }
1315 :
1316 : static int
1317 0 : vhost_user_wait_for_session_stop(struct spdk_vhost_session *vsession,
1318 : unsigned timeout_sec, const char *errmsg)
1319 : {
1320 0 : struct vhost_session_fn_ctx ev_ctx = {0};
1321 0 : struct spdk_vhost_dev *vdev = vsession->vdev;
1322 0 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
1323 :
1324 0 : ev_ctx.vdev = vdev;
1325 0 : ev_ctx.vsession_id = vsession->id;
1326 :
1327 0 : spdk_thread_send_msg(vdev->thread, vhost_user_session_stop_event, &ev_ctx);
1328 :
1329 0 : pthread_mutex_unlock(&user_dev->lock);
1330 0 : vhost_session_wait_for_semaphore(vsession, timeout_sec, errmsg);
1331 0 : pthread_mutex_lock(&user_dev->lock);
1332 :
1333 0 : return vsession->dpdk_response;
1334 : }
1335 :
1336 : static void
1337 0 : foreach_session_finish_cb(void *arg1)
1338 : {
1339 0 : struct vhost_session_fn_ctx *ev_ctx = arg1;
1340 0 : struct spdk_vhost_dev *vdev = ev_ctx->vdev;
1341 0 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
1342 :
1343 0 : if (pthread_mutex_trylock(&user_dev->lock) != 0) {
1344 0 : spdk_thread_send_msg(spdk_get_thread(),
1345 : foreach_session_finish_cb, arg1);
1346 0 : return;
1347 : }
1348 :
1349 0 : assert(user_dev->pending_async_op_num > 0);
1350 0 : user_dev->pending_async_op_num--;
1351 0 : if (ev_ctx->cpl_fn != NULL) {
1352 0 : ev_ctx->cpl_fn(vdev, ev_ctx->user_ctx);
1353 : }
1354 :
1355 0 : pthread_mutex_unlock(&user_dev->lock);
1356 0 : free(ev_ctx);
1357 : }
1358 :
1359 : static void
1360 0 : foreach_session(void *arg1)
1361 : {
1362 0 : struct vhost_session_fn_ctx *ev_ctx = arg1;
1363 0 : struct spdk_vhost_dev *vdev = ev_ctx->vdev;
1364 0 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
1365 : struct spdk_vhost_session *vsession;
1366 : int rc;
1367 :
1368 0 : if (pthread_mutex_trylock(&user_dev->lock) != 0) {
1369 0 : spdk_thread_send_msg(spdk_get_thread(), foreach_session, arg1);
1370 0 : return;
1371 : }
1372 :
1373 0 : TAILQ_FOREACH(vsession, &user_dev->vsessions, tailq) {
1374 0 : rc = ev_ctx->cb_fn(vdev, vsession, ev_ctx->user_ctx);
1375 0 : if (rc < 0) {
1376 0 : goto out;
1377 : }
1378 : }
1379 :
1380 0 : out:
1381 0 : pthread_mutex_unlock(&user_dev->lock);
1382 0 : spdk_thread_send_msg(g_vhost_user_init_thread, foreach_session_finish_cb, arg1);
1383 : }
1384 :
1385 : void
1386 0 : vhost_user_dev_foreach_session(struct spdk_vhost_dev *vdev,
1387 : spdk_vhost_session_fn fn,
1388 : spdk_vhost_dev_fn cpl_fn,
1389 : void *arg)
1390 : {
1391 : struct vhost_session_fn_ctx *ev_ctx;
1392 0 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
1393 :
1394 0 : ev_ctx = calloc(1, sizeof(*ev_ctx));
1395 0 : if (ev_ctx == NULL) {
1396 0 : SPDK_ERRLOG("Failed to alloc vhost event.\n");
1397 0 : assert(false);
1398 : return;
1399 : }
1400 :
1401 0 : ev_ctx->vdev = vdev;
1402 0 : ev_ctx->cb_fn = fn;
1403 0 : ev_ctx->cpl_fn = cpl_fn;
1404 0 : ev_ctx->user_ctx = arg;
1405 :
1406 0 : pthread_mutex_lock(&user_dev->lock);
1407 0 : assert(user_dev->pending_async_op_num < UINT32_MAX);
1408 0 : user_dev->pending_async_op_num++;
1409 0 : pthread_mutex_unlock(&user_dev->lock);
1410 :
1411 0 : spdk_thread_send_msg(vdev->thread, foreach_session, ev_ctx);
1412 : }
1413 :
1414 : void
1415 0 : vhost_user_session_set_interrupt_mode(struct spdk_vhost_session *vsession, bool interrupt_mode)
1416 : {
1417 : uint16_t i;
1418 0 : int rc = 0;
1419 :
1420 0 : for (i = 0; i < vsession->max_queues; i++) {
1421 0 : struct spdk_vhost_virtqueue *q = &vsession->virtqueue[i];
1422 0 : uint64_t num_events = 1;
1423 :
1424 : /* vring.desc and vring.desc_packed are in a union struct
1425 : * so q->vring.desc can replace q->vring.desc_packed.
1426 : */
1427 0 : if (q->vring.desc == NULL || q->vring.size == 0) {
1428 0 : continue;
1429 : }
1430 :
1431 0 : if (interrupt_mode) {
1432 :
1433 : /* In case of race condition, always kick vring when switch to intr */
1434 0 : rc = write(q->vring.kickfd, &num_events, sizeof(num_events));
1435 0 : if (rc < 0) {
1436 0 : SPDK_ERRLOG("failed to kick vring: %s.\n", spdk_strerror(errno));
1437 : }
1438 : }
1439 : }
1440 0 : }
1441 :
1442 : static int
1443 0 : extern_vhost_pre_msg_handler(int vid, void *_msg)
1444 : {
1445 0 : struct vhost_user_msg *msg = _msg;
1446 : struct spdk_vhost_session *vsession;
1447 : struct spdk_vhost_user_dev *user_dev;
1448 :
1449 0 : vsession = vhost_session_find_by_vid(vid);
1450 0 : if (vsession == NULL) {
1451 0 : SPDK_ERRLOG("Received a message to uninitialized session (vid %d).\n", vid);
1452 0 : assert(false);
1453 : return RTE_VHOST_MSG_RESULT_ERR;
1454 : }
1455 0 : user_dev = to_user_dev(vsession->vdev);
1456 :
1457 0 : switch (msg->request) {
1458 0 : case VHOST_USER_GET_VRING_BASE:
1459 0 : pthread_mutex_lock(&user_dev->lock);
1460 0 : if (vsession->started || vsession->starting) {
1461 0 : pthread_mutex_unlock(&user_dev->lock);
1462 0 : g_spdk_vhost_ops.destroy_device(vid);
1463 0 : break;
1464 : }
1465 0 : pthread_mutex_unlock(&user_dev->lock);
1466 0 : break;
1467 0 : case VHOST_USER_SET_MEM_TABLE:
1468 0 : pthread_mutex_lock(&user_dev->lock);
1469 0 : if (vsession->started || vsession->starting) {
1470 0 : vsession->original_max_queues = vsession->max_queues;
1471 0 : pthread_mutex_unlock(&user_dev->lock);
1472 0 : g_spdk_vhost_ops.destroy_device(vid);
1473 0 : vsession->needs_restart = true;
1474 0 : break;
1475 : }
1476 0 : pthread_mutex_unlock(&user_dev->lock);
1477 0 : break;
1478 0 : case VHOST_USER_GET_CONFIG: {
1479 0 : int rc = 0;
1480 :
1481 0 : pthread_mutex_lock(&user_dev->lock);
1482 0 : if (vsession->vdev->backend->vhost_get_config) {
1483 0 : rc = vsession->vdev->backend->vhost_get_config(vsession->vdev,
1484 0 : msg->payload.cfg.region, msg->payload.cfg.size);
1485 0 : if (rc != 0) {
1486 0 : msg->size = 0;
1487 : }
1488 : }
1489 0 : pthread_mutex_unlock(&user_dev->lock);
1490 :
1491 0 : return RTE_VHOST_MSG_RESULT_REPLY;
1492 : }
1493 0 : case VHOST_USER_SET_CONFIG: {
1494 0 : int rc = 0;
1495 :
1496 0 : pthread_mutex_lock(&user_dev->lock);
1497 0 : if (vsession->vdev->backend->vhost_set_config) {
1498 0 : rc = vsession->vdev->backend->vhost_set_config(vsession->vdev,
1499 0 : msg->payload.cfg.region, msg->payload.cfg.offset,
1500 : msg->payload.cfg.size, msg->payload.cfg.flags);
1501 : }
1502 0 : pthread_mutex_unlock(&user_dev->lock);
1503 :
1504 0 : return rc == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
1505 : }
1506 0 : default:
1507 0 : break;
1508 : }
1509 :
1510 0 : return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
1511 : }
1512 :
1513 : static int
1514 0 : extern_vhost_post_msg_handler(int vid, void *_msg)
1515 : {
1516 0 : struct vhost_user_msg *msg = _msg;
1517 : struct spdk_vhost_session *vsession;
1518 : struct spdk_vhost_user_dev *user_dev;
1519 : uint16_t qid;
1520 : int rc;
1521 :
1522 0 : vsession = vhost_session_find_by_vid(vid);
1523 0 : if (vsession == NULL) {
1524 0 : SPDK_ERRLOG("Received a message to uninitialized session (vid %d).\n", vid);
1525 0 : assert(false);
1526 : return RTE_VHOST_MSG_RESULT_ERR;
1527 : }
1528 0 : user_dev = to_user_dev(vsession->vdev);
1529 :
1530 0 : switch (msg->request) {
1531 0 : case VHOST_USER_SET_FEATURES:
1532 0 : rc = vhost_get_negotiated_features(vid, &vsession->negotiated_features);
1533 0 : if (rc) {
1534 0 : SPDK_ERRLOG("vhost device %d: Failed to get negotiated driver features\n", vid);
1535 0 : return RTE_VHOST_MSG_RESULT_ERR;
1536 : }
1537 0 : break;
1538 0 : case VHOST_USER_SET_VRING_CALL:
1539 0 : qid = ((uint16_t)msg->payload.u64) & VHOST_USER_VRING_IDX_MASK;
1540 0 : rc = set_device_vq_callfd(vsession, qid);
1541 0 : if (rc) {
1542 0 : return RTE_VHOST_MSG_RESULT_ERR;
1543 : }
1544 0 : break;
1545 0 : case VHOST_USER_SET_VRING_KICK:
1546 0 : qid = ((uint16_t)msg->payload.u64) & VHOST_USER_VRING_IDX_MASK;
1547 0 : rc = enable_device_vq(vsession, qid);
1548 0 : if (rc) {
1549 0 : return RTE_VHOST_MSG_RESULT_ERR;
1550 : }
1551 :
1552 : /* vhost-user spec tells us to start polling a queue after receiving
1553 : * its SET_VRING_KICK message. Let's do it!
1554 : */
1555 0 : pthread_mutex_lock(&user_dev->lock);
1556 0 : if (!vsession->started && !vsession->starting) {
1557 0 : pthread_mutex_unlock(&user_dev->lock);
1558 0 : g_spdk_vhost_ops.new_device(vid);
1559 0 : return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
1560 : }
1561 0 : pthread_mutex_unlock(&user_dev->lock);
1562 0 : break;
1563 0 : case VHOST_USER_SET_MEM_TABLE:
1564 0 : vhost_register_memtable_if_required(vsession, vid);
1565 0 : pthread_mutex_lock(&user_dev->lock);
1566 0 : if (vsession->needs_restart) {
1567 0 : pthread_mutex_unlock(&user_dev->lock);
1568 0 : for (qid = 0; qid < vsession->original_max_queues; qid++) {
1569 0 : enable_device_vq(vsession, qid);
1570 : }
1571 0 : vsession->original_max_queues = 0;
1572 0 : vsession->needs_restart = false;
1573 0 : g_spdk_vhost_ops.new_device(vid);
1574 0 : break;
1575 : }
1576 0 : pthread_mutex_unlock(&user_dev->lock);
1577 0 : break;
1578 0 : default:
1579 0 : break;
1580 : }
1581 :
1582 0 : return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
1583 : }
1584 :
1585 : struct rte_vhost_user_extern_ops g_spdk_extern_vhost_ops = {
1586 : .pre_msg_handle = extern_vhost_pre_msg_handler,
1587 : .post_msg_handle = extern_vhost_post_msg_handler,
1588 : };
1589 :
1590 : void
1591 0 : vhost_session_install_rte_compat_hooks(struct spdk_vhost_session *vsession)
1592 : {
1593 : int rc;
1594 :
1595 0 : rc = rte_vhost_extern_callback_register(vsession->vid, &g_spdk_extern_vhost_ops, NULL);
1596 0 : if (rc != 0) {
1597 0 : SPDK_ERRLOG("rte_vhost_extern_callback_register() failed for vid = %d\n",
1598 : vsession->vid);
1599 0 : return;
1600 : }
1601 : }
1602 :
1603 : int
1604 9 : vhost_register_unix_socket(const char *path, const char *ctrl_name,
1605 : uint64_t virtio_features, uint64_t disabled_features, uint64_t protocol_features)
1606 : {
1607 9 : struct stat file_stat;
1608 9 : uint64_t features = 0;
1609 9 : uint64_t flags = 0;
1610 :
1611 : /* Register vhost driver to handle vhost messages. */
1612 9 : if (stat(path, &file_stat) != -1) {
1613 0 : if (!S_ISSOCK(file_stat.st_mode)) {
1614 0 : SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": "
1615 : "The file already exists and is not a socket.\n",
1616 : path);
1617 0 : return -EIO;
1618 0 : } else if (unlink(path) != 0) {
1619 0 : SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": "
1620 : "The socket already exists and failed to unlink.\n",
1621 : path);
1622 0 : return -EIO;
1623 : }
1624 : }
1625 :
1626 9 : flags = spdk_iommu_is_enabled() ? 0 : RTE_VHOST_USER_ASYNC_COPY;
1627 9 : if (rte_vhost_driver_register(path, flags) != 0) {
1628 0 : SPDK_ERRLOG("Could not register controller %s with vhost library\n", ctrl_name);
1629 0 : SPDK_ERRLOG("Check if domain socket %s already exists\n", path);
1630 0 : return -EIO;
1631 : }
1632 18 : if (rte_vhost_driver_set_features(path, virtio_features) ||
1633 9 : rte_vhost_driver_disable_features(path, disabled_features)) {
1634 0 : SPDK_ERRLOG("Couldn't set vhost features for controller %s\n", ctrl_name);
1635 :
1636 0 : rte_vhost_driver_unregister(path);
1637 0 : return -EIO;
1638 : }
1639 :
1640 9 : if (rte_vhost_driver_callback_register(path, &g_spdk_vhost_ops) != 0) {
1641 0 : rte_vhost_driver_unregister(path);
1642 0 : SPDK_ERRLOG("Couldn't register callbacks for controller %s\n", ctrl_name);
1643 0 : return -EIO;
1644 : }
1645 :
1646 9 : rte_vhost_driver_get_protocol_features(path, &features);
1647 9 : features |= protocol_features;
1648 9 : rte_vhost_driver_set_protocol_features(path, features);
1649 :
1650 9 : if (rte_vhost_driver_start(path) != 0) {
1651 0 : SPDK_ERRLOG("Failed to start vhost driver for controller %s (%d): %s\n",
1652 : ctrl_name, errno, spdk_strerror(errno));
1653 0 : rte_vhost_driver_unregister(path);
1654 0 : return -EIO;
1655 : }
1656 :
1657 9 : return 0;
1658 : }
1659 :
1660 : int
1661 0 : vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
1662 : {
1663 0 : return rte_vhost_get_mem_table(vid, mem);
1664 : }
1665 :
1666 : int
1667 9 : vhost_driver_unregister(const char *path)
1668 : {
1669 9 : return rte_vhost_driver_unregister(path);
1670 : }
1671 :
1672 : int
1673 0 : vhost_get_negotiated_features(int vid, uint64_t *negotiated_features)
1674 : {
1675 0 : return rte_vhost_get_negotiated_features(vid, negotiated_features);
1676 : }
1677 :
1678 : int
1679 9 : vhost_user_dev_set_coalescing(struct spdk_vhost_user_dev *user_dev, uint32_t delay_base_us,
1680 : uint32_t iops_threshold)
1681 : {
1682 9 : uint64_t delay_time_base = delay_base_us * spdk_get_ticks_hz() / 1000000ULL;
1683 9 : uint32_t io_rate = iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U;
1684 :
1685 9 : if (delay_time_base >= UINT32_MAX) {
1686 0 : SPDK_ERRLOG("Delay time of %"PRIu32" is to big\n", delay_base_us);
1687 0 : return -EINVAL;
1688 9 : } else if (io_rate == 0) {
1689 0 : SPDK_ERRLOG("IOPS rate of %"PRIu32" is too low. Min is %u\n", io_rate,
1690 : 1000U / SPDK_VHOST_STATS_CHECK_INTERVAL_MS);
1691 0 : return -EINVAL;
1692 : }
1693 :
1694 9 : user_dev->coalescing_delay_us = delay_base_us;
1695 9 : user_dev->coalescing_iops_threshold = iops_threshold;
1696 9 : return 0;
1697 : }
1698 :
1699 : int
1700 0 : vhost_user_session_set_coalescing(struct spdk_vhost_dev *vdev,
1701 : struct spdk_vhost_session *vsession, void *ctx)
1702 : {
1703 0 : vsession->coalescing_delay_time_base =
1704 0 : to_user_dev(vdev)->coalescing_delay_us * spdk_get_ticks_hz() / 1000000ULL;
1705 0 : vsession->coalescing_io_rate_threshold =
1706 0 : to_user_dev(vdev)->coalescing_iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U;
1707 0 : return 0;
1708 : }
1709 :
1710 : int
1711 0 : vhost_user_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us,
1712 : uint32_t iops_threshold)
1713 : {
1714 : int rc;
1715 :
1716 0 : rc = vhost_user_dev_set_coalescing(to_user_dev(vdev), delay_base_us, iops_threshold);
1717 0 : if (rc != 0) {
1718 0 : return rc;
1719 : }
1720 :
1721 0 : vhost_user_dev_foreach_session(vdev, vhost_user_session_set_coalescing, NULL, NULL);
1722 :
1723 0 : return 0;
1724 : }
1725 :
1726 : void
1727 0 : vhost_user_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us,
1728 : uint32_t *iops_threshold)
1729 : {
1730 0 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
1731 :
1732 0 : if (delay_base_us) {
1733 0 : *delay_base_us = user_dev->coalescing_delay_us;
1734 : }
1735 :
1736 0 : if (iops_threshold) {
1737 0 : *iops_threshold = user_dev->coalescing_iops_threshold;
1738 : }
1739 0 : }
1740 :
1741 : int
1742 0 : spdk_vhost_set_socket_path(const char *basename)
1743 : {
1744 : int ret;
1745 :
1746 0 : if (basename && strlen(basename) > 0) {
1747 0 : ret = snprintf(g_vhost_user_dev_dirname, sizeof(g_vhost_user_dev_dirname) - 2, "%s", basename);
1748 0 : if (ret <= 0) {
1749 0 : return -EINVAL;
1750 : }
1751 0 : if ((size_t)ret >= sizeof(g_vhost_user_dev_dirname) - 2) {
1752 0 : SPDK_ERRLOG("Char dev dir path length %d is too long\n", ret);
1753 0 : return -EINVAL;
1754 : }
1755 :
1756 0 : if (g_vhost_user_dev_dirname[ret - 1] != '/') {
1757 0 : g_vhost_user_dev_dirname[ret] = '/';
1758 0 : g_vhost_user_dev_dirname[ret + 1] = '\0';
1759 : }
1760 : }
1761 :
1762 0 : return 0;
1763 : }
1764 :
1765 : static void
1766 0 : vhost_dev_thread_exit(void *arg1)
1767 : {
1768 0 : spdk_thread_exit(spdk_get_thread());
1769 0 : }
1770 :
1771 : static bool g_vhost_user_started = false;
1772 :
1773 : int
1774 10 : vhost_user_dev_init(struct spdk_vhost_dev *vdev, const char *name,
1775 : struct spdk_cpuset *cpumask, const struct spdk_vhost_user_dev_backend *user_backend)
1776 : {
1777 10 : char path[PATH_MAX];
1778 : struct spdk_vhost_user_dev *user_dev;
1779 :
1780 10 : if (snprintf(path, sizeof(path), "%s%s", g_vhost_user_dev_dirname, name) >= (int)sizeof(path)) {
1781 1 : SPDK_ERRLOG("Resulting socket path for controller %s is too long: %s%s\n",
1782 : name, g_vhost_user_dev_dirname, name);
1783 1 : return -EINVAL;
1784 : }
1785 :
1786 9 : vdev->path = strdup(path);
1787 9 : if (vdev->path == NULL) {
1788 0 : return -EIO;
1789 : }
1790 :
1791 9 : user_dev = calloc(1, sizeof(*user_dev));
1792 9 : if (user_dev == NULL) {
1793 0 : free(vdev->path);
1794 0 : return -ENOMEM;
1795 : }
1796 9 : vdev->ctxt = user_dev;
1797 :
1798 9 : vdev->thread = spdk_thread_create(vdev->name, cpumask);
1799 9 : if (vdev->thread == NULL) {
1800 0 : free(user_dev);
1801 0 : free(vdev->path);
1802 0 : SPDK_ERRLOG("Failed to create thread for vhost controller %s.\n", name);
1803 0 : return -EIO;
1804 : }
1805 :
1806 9 : user_dev->user_backend = user_backend;
1807 9 : user_dev->vdev = vdev;
1808 9 : user_dev->registered = true;
1809 9 : TAILQ_INIT(&user_dev->vsessions);
1810 9 : pthread_mutex_init(&user_dev->lock, NULL);
1811 :
1812 9 : vhost_user_dev_set_coalescing(user_dev, SPDK_VHOST_COALESCING_DELAY_BASE_US,
1813 : SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD);
1814 :
1815 9 : return 0;
1816 : }
1817 :
1818 : int
1819 9 : vhost_user_dev_start(struct spdk_vhost_dev *vdev)
1820 : {
1821 9 : return vhost_register_unix_socket(vdev->path, vdev->name, vdev->virtio_features,
1822 : vdev->disabled_features,
1823 : vdev->protocol_features);
1824 : }
1825 :
1826 : int
1827 10 : vhost_user_dev_create(struct spdk_vhost_dev *vdev, const char *name, struct spdk_cpuset *cpumask,
1828 : const struct spdk_vhost_user_dev_backend *user_backend, bool delay)
1829 : {
1830 : int rc;
1831 : struct spdk_vhost_user_dev *user_dev;
1832 :
1833 10 : rc = vhost_user_dev_init(vdev, name, cpumask, user_backend);
1834 10 : if (rc != 0) {
1835 1 : return rc;
1836 : }
1837 :
1838 9 : if (delay == false) {
1839 9 : rc = vhost_user_dev_start(vdev);
1840 9 : if (rc != 0) {
1841 0 : user_dev = to_user_dev(vdev);
1842 0 : spdk_thread_send_msg(vdev->thread, vhost_dev_thread_exit, NULL);
1843 0 : pthread_mutex_destroy(&user_dev->lock);
1844 0 : free(user_dev);
1845 0 : free(vdev->path);
1846 : }
1847 : }
1848 :
1849 9 : return rc;
1850 : }
1851 :
1852 : int
1853 10 : vhost_user_dev_unregister(struct spdk_vhost_dev *vdev)
1854 : {
1855 10 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
1856 : struct spdk_vhost_session *vsession, *tmp_vsession;
1857 :
1858 10 : if (pthread_mutex_trylock(&user_dev->lock) != 0) {
1859 0 : return -EBUSY;
1860 : }
1861 :
1862 10 : if (user_dev->pending_async_op_num) {
1863 0 : pthread_mutex_unlock(&user_dev->lock);
1864 0 : return -EBUSY;
1865 : }
1866 :
1867 : /* This is the case that uses RPC call `vhost_delete_controller` while VM is connected */
1868 10 : if (!TAILQ_EMPTY(&user_dev->vsessions) && g_vhost_user_started) {
1869 1 : SPDK_ERRLOG("Controller %s has still valid connection.\n", vdev->name);
1870 1 : pthread_mutex_unlock(&user_dev->lock);
1871 1 : return -EBUSY;
1872 : }
1873 :
1874 : /* This is the case that quits the subsystem while VM is connected, the VM
1875 : * should be stopped by the shutdown thread.
1876 : */
1877 9 : if (!g_vhost_user_started) {
1878 0 : TAILQ_FOREACH_SAFE(vsession, &user_dev->vsessions, tailq, tmp_vsession) {
1879 0 : assert(vsession->started == false);
1880 0 : TAILQ_REMOVE(&user_dev->vsessions, vsession, tailq);
1881 0 : if (vsession->mem) {
1882 0 : vhost_session_mem_unregister(vsession->mem);
1883 0 : free(vsession->mem);
1884 : }
1885 0 : sem_destroy(&vsession->dpdk_sem);
1886 0 : free(vsession->name);
1887 0 : free(vsession);
1888 : }
1889 : }
1890 :
1891 9 : user_dev->registered = false;
1892 9 : pthread_mutex_unlock(&user_dev->lock);
1893 :
1894 : /* There are no valid connections now, and it's not an error if the domain
1895 : * socket was already removed by shutdown thread.
1896 : */
1897 9 : vhost_driver_unregister(vdev->path);
1898 :
1899 9 : spdk_thread_send_msg(vdev->thread, vhost_dev_thread_exit, NULL);
1900 9 : pthread_mutex_destroy(&user_dev->lock);
1901 :
1902 9 : free(user_dev);
1903 9 : free(vdev->path);
1904 :
1905 9 : return 0;
1906 : }
1907 :
1908 : int
1909 2 : vhost_user_init(void)
1910 : {
1911 : size_t len;
1912 :
1913 2 : if (g_vhost_user_started) {
1914 1 : return 0;
1915 : }
1916 :
1917 1 : if (g_vhost_user_dev_dirname[0] == '\0') {
1918 1 : if (getcwd(g_vhost_user_dev_dirname, sizeof(g_vhost_user_dev_dirname) - 1) == NULL) {
1919 0 : SPDK_ERRLOG("getcwd failed (%d): %s\n", errno, spdk_strerror(errno));
1920 0 : return -1;
1921 : }
1922 :
1923 1 : len = strlen(g_vhost_user_dev_dirname);
1924 1 : if (g_vhost_user_dev_dirname[len - 1] != '/') {
1925 1 : g_vhost_user_dev_dirname[len] = '/';
1926 1 : g_vhost_user_dev_dirname[len + 1] = '\0';
1927 : }
1928 : }
1929 :
1930 1 : g_vhost_user_started = true;
1931 :
1932 1 : g_vhost_user_init_thread = spdk_get_thread();
1933 1 : assert(g_vhost_user_init_thread != NULL);
1934 :
1935 1 : return 0;
1936 : }
1937 :
1938 : static void
1939 1 : vhost_user_session_shutdown_on_init(void *vhost_cb)
1940 : {
1941 1 : spdk_vhost_fini_cb fn = vhost_cb;
1942 :
1943 1 : fn();
1944 1 : }
1945 :
1946 : static void *
1947 1 : vhost_user_session_shutdown(void *vhost_cb)
1948 : {
1949 1 : struct spdk_vhost_dev *vdev = NULL;
1950 : struct spdk_vhost_session *vsession;
1951 : struct spdk_vhost_user_dev *user_dev;
1952 : int ret;
1953 :
1954 1 : for (vdev = spdk_vhost_dev_next(NULL); vdev != NULL;
1955 0 : vdev = spdk_vhost_dev_next(vdev)) {
1956 0 : user_dev = to_user_dev(vdev);
1957 0 : ret = 0;
1958 0 : pthread_mutex_lock(&user_dev->lock);
1959 0 : TAILQ_FOREACH(vsession, &user_dev->vsessions, tailq) {
1960 0 : if (vsession->started || vsession->starting) {
1961 0 : ret += _stop_session(vsession);
1962 : }
1963 : }
1964 0 : pthread_mutex_unlock(&user_dev->lock);
1965 0 : if (ret == 0) {
1966 0 : vhost_driver_unregister(vdev->path);
1967 : }
1968 : }
1969 :
1970 1 : SPDK_INFOLOG(vhost, "Exiting\n");
1971 1 : spdk_thread_send_msg(g_vhost_user_init_thread, vhost_user_session_shutdown_on_init, vhost_cb);
1972 1 : return NULL;
1973 : }
1974 :
1975 : void
1976 2 : vhost_user_fini(spdk_vhost_fini_cb vhost_cb)
1977 : {
1978 2 : pthread_t tid;
1979 : int rc;
1980 :
1981 2 : if (!g_vhost_user_started) {
1982 1 : vhost_cb();
1983 1 : return;
1984 : }
1985 :
1986 1 : g_vhost_user_started = false;
1987 :
1988 : /* rte_vhost API for removing sockets is not asynchronous. Since it may call SPDK
1989 : * ops for stopping a device or removing a connection, we need to call it from
1990 : * a separate thread to avoid deadlock.
1991 : */
1992 1 : rc = pthread_create(&tid, NULL, &vhost_user_session_shutdown, vhost_cb);
1993 1 : if (rc != 0) {
1994 0 : SPDK_ERRLOG("Failed to start session shutdown thread (%d): %s\n", rc, spdk_strerror(rc));
1995 0 : abort();
1996 : }
1997 1 : pthread_detach(tid);
1998 : }
1999 :
2000 : void
2001 0 : vhost_session_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
2002 : {
2003 : struct spdk_vhost_session *vsession;
2004 : struct spdk_vhost_user_dev *user_dev;
2005 :
2006 0 : user_dev = to_user_dev(vdev);
2007 0 : pthread_mutex_lock(&user_dev->lock);
2008 0 : TAILQ_FOREACH(vsession, &user_dev->vsessions, tailq) {
2009 0 : spdk_json_write_object_begin(w);
2010 0 : spdk_json_write_named_uint32(w, "vid", vsession->vid);
2011 0 : spdk_json_write_named_uint32(w, "id", vsession->id);
2012 0 : spdk_json_write_named_string(w, "name", vsession->name);
2013 0 : spdk_json_write_named_bool(w, "started", vsession->started);
2014 0 : spdk_json_write_named_uint32(w, "max_queues", vsession->max_queues);
2015 0 : spdk_json_write_named_uint32(w, "inflight_task_cnt", vsession->task_cnt);
2016 0 : spdk_json_write_object_end(w);
2017 : }
2018 0 : pthread_mutex_unlock(&user_dev->lock);
2019 0 : }
|