Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2016 Intel Corporation.
3 : * All rights reserved.
4 : */
5 :
6 : #include "spdk/stdinc.h"
7 : #include "spdk/likely.h"
8 :
9 : #include "spdk_internal/event.h"
10 : #include "spdk_internal/usdt.h"
11 :
12 : #include "spdk/log.h"
13 : #include "spdk/thread.h"
14 : #include "spdk/env.h"
15 : #include "spdk/util.h"
16 : #include "spdk/scheduler.h"
17 : #include "spdk/string.h"
18 : #include "spdk/fd_group.h"
19 :
20 : #ifdef __linux__
21 : #include <sys/prctl.h>
22 : #include <sys/eventfd.h>
23 : #endif
24 :
25 : #ifdef __FreeBSD__
26 : #include <pthread_np.h>
27 : #endif
28 :
29 : #define SPDK_EVENT_BATCH_SIZE 8
30 :
31 : static struct spdk_reactor *g_reactors;
32 : static uint32_t g_reactor_count;
33 : static struct spdk_cpuset g_reactor_core_mask;
34 : static enum spdk_reactor_state g_reactor_state = SPDK_REACTOR_STATE_UNINITIALIZED;
35 :
36 : static bool g_framework_context_switch_monitor_enabled = true;
37 :
38 : static struct spdk_mempool *g_spdk_event_mempool = NULL;
39 :
40 : TAILQ_HEAD(, spdk_scheduler) g_scheduler_list
41 : = TAILQ_HEAD_INITIALIZER(g_scheduler_list);
42 :
43 : static struct spdk_scheduler *g_scheduler = NULL;
44 : static struct spdk_reactor *g_scheduling_reactor;
45 : bool g_scheduling_in_progress = false;
46 : static uint64_t g_scheduler_period = 0;
47 : static uint32_t g_scheduler_core_number;
48 : static struct spdk_scheduler_core_info *g_core_infos = NULL;
49 :
50 : TAILQ_HEAD(, spdk_governor) g_governor_list
51 : = TAILQ_HEAD_INITIALIZER(g_governor_list);
52 :
53 : static struct spdk_governor *g_governor = NULL;
54 :
55 : static int reactor_interrupt_init(struct spdk_reactor *reactor);
56 : static void reactor_interrupt_fini(struct spdk_reactor *reactor);
57 :
58 : static pthread_mutex_t g_stopping_reactors_mtx = PTHREAD_MUTEX_INITIALIZER;
59 : static bool g_stopping_reactors = false;
60 :
61 : static struct spdk_scheduler *
62 5 : _scheduler_find(const char *name)
63 : {
64 : struct spdk_scheduler *tmp;
65 :
66 9 : TAILQ_FOREACH(tmp, &g_scheduler_list, link) {
67 7 : if (strcmp(name, tmp->name) == 0) {
68 3 : return tmp;
69 : }
70 : }
71 :
72 2 : return NULL;
73 : }
74 :
75 : int
76 3 : spdk_scheduler_set(const char *name)
77 : {
78 : struct spdk_scheduler *scheduler;
79 3 : int rc = 0;
80 :
81 : /* NULL scheduler was specifically requested */
82 3 : if (name == NULL) {
83 0 : if (g_scheduler) {
84 0 : g_scheduler->deinit();
85 : }
86 0 : g_scheduler = NULL;
87 0 : return 0;
88 : }
89 :
90 3 : scheduler = _scheduler_find(name);
91 3 : if (scheduler == NULL) {
92 0 : SPDK_ERRLOG("Requested scheduler is missing\n");
93 0 : return -EINVAL;
94 : }
95 :
96 3 : if (g_scheduler == scheduler) {
97 2 : return 0;
98 : }
99 :
100 1 : if (g_scheduler) {
101 0 : g_scheduler->deinit();
102 : }
103 :
104 1 : rc = scheduler->init();
105 1 : if (rc == 0) {
106 1 : g_scheduler = scheduler;
107 : } else {
108 : /* Could not switch to the new scheduler, so keep the old
109 : * one. We need to check if it wasn't NULL, and ->init() it again.
110 : */
111 0 : if (g_scheduler) {
112 0 : SPDK_ERRLOG("Could not ->init() '%s' scheduler, reverting to '%s'\n",
113 : name, g_scheduler->name);
114 0 : g_scheduler->init();
115 : } else {
116 0 : SPDK_ERRLOG("Could not ->init() '%s' scheduler.\n", name);
117 : }
118 : }
119 :
120 1 : return rc;
121 : }
122 :
123 : struct spdk_scheduler *
124 6 : spdk_scheduler_get(void)
125 : {
126 6 : return g_scheduler;
127 : }
128 :
129 : uint64_t
130 0 : spdk_scheduler_get_period(void)
131 : {
132 : /* Convert from ticks to microseconds */
133 0 : return (g_scheduler_period * SPDK_SEC_TO_USEC / spdk_get_ticks_hz());
134 : }
135 :
136 : void
137 0 : spdk_scheduler_set_period(uint64_t period)
138 : {
139 : /* Convert microseconds to ticks */
140 0 : g_scheduler_period = period * spdk_get_ticks_hz() / SPDK_SEC_TO_USEC;
141 0 : }
142 :
143 : void
144 2 : spdk_scheduler_register(struct spdk_scheduler *scheduler)
145 : {
146 2 : if (_scheduler_find(scheduler->name)) {
147 0 : SPDK_ERRLOG("scheduler named '%s' already registered.\n", scheduler->name);
148 0 : assert(false);
149 : return;
150 : }
151 :
152 2 : TAILQ_INSERT_TAIL(&g_scheduler_list, scheduler, link);
153 : }
154 :
155 : uint32_t
156 0 : spdk_scheduler_get_scheduling_lcore(void)
157 : {
158 0 : return g_scheduling_reactor->lcore;
159 : }
160 :
161 : static void
162 27 : reactor_construct(struct spdk_reactor *reactor, uint32_t lcore)
163 : {
164 27 : reactor->lcore = lcore;
165 27 : reactor->flags.is_valid = true;
166 :
167 27 : TAILQ_INIT(&reactor->threads);
168 27 : reactor->thread_count = 0;
169 27 : spdk_cpuset_zero(&reactor->notify_cpuset);
170 :
171 27 : reactor->events = spdk_ring_create(SPDK_RING_TYPE_MP_SC, 65536, SPDK_ENV_SOCKET_ID_ANY);
172 27 : if (reactor->events == NULL) {
173 0 : SPDK_ERRLOG("Failed to allocate events ring\n");
174 0 : assert(false);
175 : }
176 :
177 : /* Always initialize interrupt facilities for reactor */
178 27 : if (reactor_interrupt_init(reactor) != 0) {
179 : /* Reactor interrupt facilities are necessary if setting app to interrupt mode. */
180 0 : if (spdk_interrupt_mode_is_enabled()) {
181 0 : SPDK_ERRLOG("Failed to prepare intr facilities\n");
182 0 : assert(false);
183 : }
184 0 : return;
185 : }
186 :
187 : /* If application runs with full interrupt ability,
188 : * all reactors are going to run in interrupt mode.
189 : */
190 27 : if (spdk_interrupt_mode_is_enabled()) {
191 : uint32_t i;
192 :
193 0 : SPDK_ENV_FOREACH_CORE(i) {
194 0 : spdk_cpuset_set_cpu(&reactor->notify_cpuset, i, true);
195 : }
196 0 : reactor->in_interrupt = true;
197 : }
198 : }
199 :
200 : struct spdk_reactor *
201 405 : spdk_reactor_get(uint32_t lcore)
202 : {
203 : struct spdk_reactor *reactor;
204 :
205 405 : if (g_reactors == NULL) {
206 0 : SPDK_WARNLOG("Called spdk_reactor_get() while the g_reactors array was NULL!\n");
207 0 : return NULL;
208 : }
209 :
210 405 : if (lcore >= g_reactor_count) {
211 0 : return NULL;
212 : }
213 :
214 405 : reactor = &g_reactors[lcore];
215 :
216 405 : if (reactor->flags.is_valid == false) {
217 0 : return NULL;
218 : }
219 :
220 405 : return reactor;
221 : }
222 :
223 : static int reactor_thread_op(struct spdk_thread *thread, enum spdk_thread_op op);
224 : static bool reactor_thread_op_supported(enum spdk_thread_op op);
225 :
226 : int
227 9 : spdk_reactors_init(size_t msg_mempool_size)
228 : {
229 : struct spdk_reactor *reactor;
230 : int rc;
231 : uint32_t i, current_core;
232 9 : char mempool_name[32];
233 :
234 9 : snprintf(mempool_name, sizeof(mempool_name), "evtpool_%d", getpid());
235 9 : g_spdk_event_mempool = spdk_mempool_create(mempool_name,
236 : 262144 - 1, /* Power of 2 minus 1 is optimal for memory consumption */
237 : sizeof(struct spdk_event),
238 : SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
239 : SPDK_ENV_SOCKET_ID_ANY);
240 :
241 9 : if (g_spdk_event_mempool == NULL) {
242 0 : SPDK_ERRLOG("spdk_event_mempool creation failed\n");
243 0 : return -1;
244 : }
245 :
246 : /* struct spdk_reactor must be aligned on 64 byte boundary */
247 9 : g_reactor_count = spdk_env_get_last_core() + 1;
248 9 : rc = posix_memalign((void **)&g_reactors, 64,
249 : g_reactor_count * sizeof(struct spdk_reactor));
250 9 : if (rc != 0) {
251 0 : SPDK_ERRLOG("Could not allocate array size=%u for g_reactors\n",
252 : g_reactor_count);
253 0 : spdk_mempool_free(g_spdk_event_mempool);
254 0 : return -1;
255 : }
256 :
257 9 : g_core_infos = calloc(g_reactor_count, sizeof(*g_core_infos));
258 9 : if (g_core_infos == NULL) {
259 0 : SPDK_ERRLOG("Could not allocate memory for g_core_infos\n");
260 0 : spdk_mempool_free(g_spdk_event_mempool);
261 0 : free(g_reactors);
262 0 : return -ENOMEM;
263 : }
264 :
265 9 : memset(g_reactors, 0, (g_reactor_count) * sizeof(struct spdk_reactor));
266 :
267 9 : rc = spdk_thread_lib_init_ext(reactor_thread_op, reactor_thread_op_supported,
268 : sizeof(struct spdk_lw_thread), msg_mempool_size);
269 9 : if (rc != 0) {
270 0 : SPDK_ERRLOG("Initialize spdk thread lib failed\n");
271 0 : spdk_mempool_free(g_spdk_event_mempool);
272 0 : free(g_reactors);
273 0 : free(g_core_infos);
274 0 : return rc;
275 : }
276 :
277 35 : SPDK_ENV_FOREACH_CORE(i) {
278 26 : reactor_construct(&g_reactors[i], i);
279 : }
280 :
281 9 : current_core = spdk_env_get_current_core();
282 9 : reactor = spdk_reactor_get(current_core);
283 9 : assert(reactor != NULL);
284 9 : g_scheduling_reactor = reactor;
285 :
286 9 : g_reactor_state = SPDK_REACTOR_STATE_INITIALIZED;
287 :
288 9 : return 0;
289 : }
290 :
291 : void
292 9 : spdk_reactors_fini(void)
293 : {
294 : uint32_t i;
295 : struct spdk_reactor *reactor;
296 :
297 9 : if (g_reactor_state == SPDK_REACTOR_STATE_UNINITIALIZED) {
298 0 : return;
299 : }
300 :
301 9 : spdk_thread_lib_fini();
302 :
303 35 : SPDK_ENV_FOREACH_CORE(i) {
304 26 : reactor = spdk_reactor_get(i);
305 26 : assert(reactor != NULL);
306 26 : assert(reactor->thread_count == 0);
307 26 : if (reactor->events != NULL) {
308 26 : spdk_ring_free(reactor->events);
309 : }
310 :
311 26 : reactor_interrupt_fini(reactor);
312 :
313 26 : if (g_core_infos != NULL) {
314 26 : free(g_core_infos[i].thread_infos);
315 : }
316 : }
317 :
318 9 : spdk_mempool_free(g_spdk_event_mempool);
319 :
320 9 : free(g_reactors);
321 9 : g_reactors = NULL;
322 9 : free(g_core_infos);
323 9 : g_core_infos = NULL;
324 : }
325 :
326 : static void _reactor_set_interrupt_mode(void *arg1, void *arg2);
327 :
328 : static void
329 4 : _reactor_set_notify_cpuset(void *arg1, void *arg2)
330 : {
331 4 : struct spdk_reactor *target = arg1;
332 4 : struct spdk_reactor *reactor = spdk_reactor_get(spdk_env_get_current_core());
333 :
334 4 : assert(reactor != NULL);
335 4 : spdk_cpuset_set_cpu(&reactor->notify_cpuset, target->lcore, target->new_in_interrupt);
336 4 : }
337 :
338 : static void
339 20 : _event_call(uint32_t lcore, spdk_event_fn fn, void *arg1, void *arg2)
340 : {
341 : struct spdk_event *ev;
342 :
343 20 : ev = spdk_event_allocate(lcore, fn, arg1, arg2);
344 20 : assert(ev);
345 20 : spdk_event_call(ev);
346 20 : }
347 :
348 : static void
349 2 : _reactor_set_notify_cpuset_cpl(void *arg1, void *arg2)
350 : {
351 2 : struct spdk_reactor *target = arg1;
352 :
353 2 : if (target->new_in_interrupt == false) {
354 1 : target->set_interrupt_mode_in_progress = false;
355 1 : spdk_thread_send_msg(spdk_thread_get_app_thread(), target->set_interrupt_mode_cb_fn,
356 : target->set_interrupt_mode_cb_arg);
357 : } else {
358 1 : _event_call(target->lcore, _reactor_set_interrupt_mode, target, NULL);
359 : }
360 2 : }
361 :
362 : static void
363 0 : _reactor_set_thread_interrupt_mode(void *ctx)
364 : {
365 0 : struct spdk_reactor *reactor = ctx;
366 :
367 0 : spdk_thread_set_interrupt_mode(reactor->in_interrupt);
368 0 : }
369 :
370 : static void
371 2 : _reactor_set_interrupt_mode(void *arg1, void *arg2)
372 : {
373 2 : struct spdk_reactor *target = arg1;
374 : struct spdk_thread *thread;
375 : struct spdk_fd_group *grp;
376 : struct spdk_lw_thread *lw_thread, *tmp;
377 :
378 2 : assert(target == spdk_reactor_get(spdk_env_get_current_core()));
379 2 : assert(target != NULL);
380 2 : assert(target->in_interrupt != target->new_in_interrupt);
381 2 : SPDK_DEBUGLOG(reactor, "Do reactor set on core %u from %s to state %s\n",
382 : target->lcore, target->in_interrupt ? "intr" : "poll", target->new_in_interrupt ? "intr" : "poll");
383 :
384 2 : target->in_interrupt = target->new_in_interrupt;
385 :
386 2 : if (spdk_interrupt_mode_is_enabled()) {
387 : /* Align spdk_thread with reactor to interrupt mode or poll mode */
388 0 : TAILQ_FOREACH_SAFE(lw_thread, &target->threads, link, tmp) {
389 0 : thread = spdk_thread_get_from_ctx(lw_thread);
390 0 : if (target->in_interrupt) {
391 0 : grp = spdk_thread_get_interrupt_fd_group(thread);
392 0 : spdk_fd_group_nest(target->fgrp, grp);
393 : } else {
394 0 : grp = spdk_thread_get_interrupt_fd_group(thread);
395 0 : spdk_fd_group_unnest(target->fgrp, grp);
396 : }
397 :
398 0 : spdk_thread_send_msg(thread, _reactor_set_thread_interrupt_mode, target);
399 : }
400 : }
401 :
402 2 : if (target->new_in_interrupt == false) {
403 : /* Reactor is no longer in interrupt mode. Refresh the tsc_last to accurately
404 : * track reactor stats. */
405 1 : target->tsc_last = spdk_get_ticks();
406 1 : spdk_for_each_reactor(_reactor_set_notify_cpuset, target, NULL, _reactor_set_notify_cpuset_cpl);
407 : } else {
408 1 : uint64_t notify = 1;
409 1 : int rc = 0;
410 :
411 : /* Always trigger spdk_event and resched event in case of race condition */
412 1 : rc = write(target->events_fd, ¬ify, sizeof(notify));
413 1 : if (rc < 0) {
414 0 : SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno));
415 : }
416 1 : rc = write(target->resched_fd, ¬ify, sizeof(notify));
417 1 : if (rc < 0) {
418 0 : SPDK_ERRLOG("failed to notify reschedule: %s.\n", spdk_strerror(errno));
419 : }
420 :
421 1 : target->set_interrupt_mode_in_progress = false;
422 1 : spdk_thread_send_msg(spdk_thread_get_app_thread(), target->set_interrupt_mode_cb_fn,
423 : target->set_interrupt_mode_cb_arg);
424 : }
425 2 : }
426 :
427 : int
428 2 : spdk_reactor_set_interrupt_mode(uint32_t lcore, bool new_in_interrupt,
429 : spdk_reactor_set_interrupt_mode_cb cb_fn, void *cb_arg)
430 : {
431 : struct spdk_reactor *target;
432 :
433 2 : target = spdk_reactor_get(lcore);
434 2 : if (target == NULL) {
435 0 : return -EINVAL;
436 : }
437 :
438 : /* Eventfd has to be supported in order to use interrupt functionality. */
439 2 : if (target->fgrp == NULL) {
440 0 : return -ENOTSUP;
441 : }
442 :
443 2 : if (spdk_env_get_current_core() != g_scheduling_reactor->lcore) {
444 0 : SPDK_ERRLOG("It is only permitted within scheduling reactor.\n");
445 0 : return -EPERM;
446 : }
447 :
448 2 : if (target->in_interrupt == new_in_interrupt) {
449 0 : cb_fn(cb_arg);
450 0 : return 0;
451 : }
452 :
453 2 : if (target->set_interrupt_mode_in_progress) {
454 0 : SPDK_NOTICELOG("Reactor(%u) is already in progress to set interrupt mode\n", lcore);
455 0 : return -EBUSY;
456 : }
457 2 : target->set_interrupt_mode_in_progress = true;
458 :
459 2 : target->new_in_interrupt = new_in_interrupt;
460 2 : target->set_interrupt_mode_cb_fn = cb_fn;
461 2 : target->set_interrupt_mode_cb_arg = cb_arg;
462 :
463 2 : SPDK_DEBUGLOG(reactor, "Starting reactor event from %d to %d\n",
464 : spdk_env_get_current_core(), lcore);
465 :
466 2 : if (new_in_interrupt == false) {
467 : /* For potential race cases, when setting the reactor to poll mode,
468 : * first change the mode of the reactor and then clear the corresponding
469 : * bit of the notify_cpuset of each reactor.
470 : */
471 1 : _event_call(lcore, _reactor_set_interrupt_mode, target, NULL);
472 : } else {
473 : /* For race cases, when setting the reactor to interrupt mode, first set the
474 : * corresponding bit of the notify_cpuset of each reactor and then change the mode.
475 : */
476 1 : spdk_for_each_reactor(_reactor_set_notify_cpuset, target, NULL, _reactor_set_notify_cpuset_cpl);
477 : }
478 :
479 2 : return 0;
480 : }
481 :
482 : struct spdk_event *
483 51 : spdk_event_allocate(uint32_t lcore, spdk_event_fn fn, void *arg1, void *arg2)
484 : {
485 51 : struct spdk_event *event = NULL;
486 51 : struct spdk_reactor *reactor = spdk_reactor_get(lcore);
487 :
488 51 : if (!reactor) {
489 0 : assert(false);
490 : return NULL;
491 : }
492 :
493 51 : event = spdk_mempool_get(g_spdk_event_mempool);
494 51 : if (event == NULL) {
495 0 : assert(false);
496 : return NULL;
497 : }
498 :
499 51 : event->lcore = lcore;
500 51 : event->fn = fn;
501 51 : event->arg1 = arg1;
502 51 : event->arg2 = arg2;
503 :
504 51 : return event;
505 : }
506 :
507 : void
508 51 : spdk_event_call(struct spdk_event *event)
509 : {
510 : int rc;
511 : struct spdk_reactor *reactor;
512 51 : struct spdk_reactor *local_reactor = NULL;
513 51 : uint32_t current_core = spdk_env_get_current_core();
514 :
515 51 : reactor = spdk_reactor_get(event->lcore);
516 :
517 51 : assert(reactor != NULL);
518 51 : assert(reactor->events != NULL);
519 :
520 51 : rc = spdk_ring_enqueue(reactor->events, (void **)&event, 1, NULL);
521 51 : if (rc != 1) {
522 0 : assert(false);
523 : }
524 :
525 51 : if (current_core != SPDK_ENV_LCORE_ID_ANY) {
526 51 : local_reactor = spdk_reactor_get(current_core);
527 : }
528 :
529 : /* If spdk_event_call isn't called on a reactor, always send a notification.
530 : * If it is called on a reactor, send a notification if the destination reactor
531 : * is indicated in interrupt mode state.
532 : */
533 51 : if (spdk_unlikely(local_reactor == NULL) ||
534 51 : spdk_unlikely(spdk_cpuset_get_cpu(&local_reactor->notify_cpuset, event->lcore))) {
535 4 : uint64_t notify = 1;
536 :
537 4 : rc = write(reactor->events_fd, ¬ify, sizeof(notify));
538 4 : if (rc < 0) {
539 0 : SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno));
540 : }
541 : }
542 51 : }
543 :
544 : static inline int
545 118 : event_queue_run_batch(void *arg)
546 : {
547 118 : struct spdk_reactor *reactor = arg;
548 : size_t count, i;
549 118 : void *events[SPDK_EVENT_BATCH_SIZE];
550 :
551 : #ifdef DEBUG
552 : /*
553 : * spdk_ring_dequeue() fills events and returns how many entries it wrote,
554 : * so we will never actually read uninitialized data from events, but just to be sure
555 : * (and to silence a static analyzer false positive), initialize the array to NULL pointers.
556 : */
557 118 : memset(events, 0, sizeof(events));
558 : #endif
559 :
560 : /* Operate event notification if this reactor currently runs in interrupt state */
561 118 : if (spdk_unlikely(reactor->in_interrupt)) {
562 3 : uint64_t notify = 1;
563 : int rc;
564 :
565 : /* There may be race between event_acknowledge and another producer's event_notify,
566 : * so event_acknowledge should be applied ahead. And then check for self's event_notify.
567 : * This can avoid event notification missing.
568 : */
569 3 : rc = read(reactor->events_fd, ¬ify, sizeof(notify));
570 3 : if (rc < 0) {
571 0 : SPDK_ERRLOG("failed to acknowledge event queue: %s.\n", spdk_strerror(errno));
572 0 : return -errno;
573 : }
574 :
575 3 : count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE);
576 :
577 3 : if (spdk_ring_count(reactor->events) != 0) {
578 : /* Trigger new notification if there are still events in event-queue waiting for processing. */
579 0 : rc = write(reactor->events_fd, ¬ify, sizeof(notify));
580 0 : if (rc < 0) {
581 0 : SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno));
582 0 : return -errno;
583 : }
584 : }
585 : } else {
586 115 : count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE);
587 : }
588 :
589 118 : if (count == 0) {
590 69 : return 0;
591 : }
592 :
593 100 : for (i = 0; i < count; i++) {
594 51 : struct spdk_event *event = events[i];
595 :
596 51 : assert(event != NULL);
597 51 : assert(spdk_get_thread() == NULL);
598 : SPDK_DTRACE_PROBE3(event_exec, event->fn,
599 : event->arg1, event->arg2);
600 51 : event->fn(event->arg1, event->arg2);
601 : }
602 :
603 49 : spdk_mempool_put_bulk(g_spdk_event_mempool, events, count);
604 :
605 49 : return (int)count;
606 : }
607 :
608 : /* 1s */
609 : #define CONTEXT_SWITCH_MONITOR_PERIOD 1000000
610 :
611 : static int
612 8 : get_rusage(struct spdk_reactor *reactor)
613 : {
614 8 : struct rusage rusage;
615 :
616 8 : if (getrusage(RUSAGE_THREAD, &rusage) != 0) {
617 0 : return -1;
618 : }
619 :
620 8 : if (rusage.ru_nvcsw != reactor->rusage.ru_nvcsw || rusage.ru_nivcsw != reactor->rusage.ru_nivcsw) {
621 8 : SPDK_INFOLOG(reactor,
622 : "Reactor %d: %ld voluntary context switches and %ld involuntary context switches in the last second.\n",
623 : reactor->lcore, rusage.ru_nvcsw - reactor->rusage.ru_nvcsw,
624 : rusage.ru_nivcsw - reactor->rusage.ru_nivcsw);
625 : }
626 8 : reactor->rusage = rusage;
627 :
628 8 : return -1;
629 : }
630 :
631 : void
632 0 : spdk_framework_enable_context_switch_monitor(bool enable)
633 : {
634 : /* This global is being read by multiple threads, so this isn't
635 : * strictly thread safe. However, we're toggling between true and
636 : * false here, and if a thread sees the value update later than it
637 : * should, it's no big deal. */
638 0 : g_framework_context_switch_monitor_enabled = enable;
639 0 : }
640 :
641 : bool
642 0 : spdk_framework_context_switch_monitor_enabled(void)
643 : {
644 0 : return g_framework_context_switch_monitor_enabled;
645 : }
646 :
647 : static void
648 9 : _set_thread_name(const char *thread_name)
649 : {
650 : #if defined(__linux__)
651 9 : prctl(PR_SET_NAME, thread_name, 0, 0, 0);
652 : #elif defined(__FreeBSD__)
653 : pthread_set_name_np(pthread_self(), thread_name);
654 : #else
655 : pthread_setname_np(pthread_self(), thread_name);
656 : #endif
657 9 : }
658 :
659 : static void
660 15 : _init_thread_stats(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread)
661 : {
662 15 : struct spdk_thread *thread = spdk_thread_get_from_ctx(lw_thread);
663 : struct spdk_thread_stats prev_total_stats;
664 :
665 : /* Read total_stats before updating it to calculate stats during the last scheduling period. */
666 15 : prev_total_stats = lw_thread->total_stats;
667 :
668 15 : spdk_set_thread(thread);
669 15 : spdk_thread_get_stats(&lw_thread->total_stats);
670 15 : spdk_set_thread(NULL);
671 :
672 15 : lw_thread->current_stats.busy_tsc = lw_thread->total_stats.busy_tsc - prev_total_stats.busy_tsc;
673 15 : lw_thread->current_stats.idle_tsc = lw_thread->total_stats.idle_tsc - prev_total_stats.idle_tsc;
674 15 : }
675 :
676 : static void
677 8 : _threads_reschedule_thread(struct spdk_scheduler_thread_info *thread_info)
678 : {
679 : struct spdk_lw_thread *lw_thread;
680 : struct spdk_thread *thread;
681 :
682 8 : thread = spdk_thread_get_by_id(thread_info->thread_id);
683 8 : if (thread == NULL) {
684 : /* Thread no longer exists. */
685 0 : return;
686 : }
687 8 : lw_thread = spdk_thread_get_ctx(thread);
688 8 : assert(lw_thread != NULL);
689 :
690 8 : lw_thread->lcore = thread_info->lcore;
691 8 : lw_thread->resched = true;
692 : }
693 :
694 : static void
695 6 : _threads_reschedule(struct spdk_scheduler_core_info *cores_info)
696 : {
697 : struct spdk_scheduler_core_info *core;
698 : struct spdk_scheduler_thread_info *thread_info;
699 : uint32_t i, j;
700 :
701 21 : SPDK_ENV_FOREACH_CORE(i) {
702 15 : core = &cores_info[i];
703 30 : for (j = 0; j < core->threads_count; j++) {
704 15 : thread_info = &core->thread_infos[j];
705 15 : if (thread_info->lcore != i) {
706 8 : _threads_reschedule_thread(thread_info);
707 : }
708 : }
709 15 : core->threads_count = 0;
710 15 : free(core->thread_infos);
711 15 : core->thread_infos = NULL;
712 : }
713 6 : }
714 :
715 : static void
716 6 : _reactors_scheduler_fini(void)
717 : {
718 : /* Reschedule based on the balancing output */
719 6 : _threads_reschedule(g_core_infos);
720 :
721 6 : g_scheduling_in_progress = false;
722 6 : }
723 :
724 : static void
725 8 : _reactors_scheduler_update_core_mode(void *ctx)
726 : {
727 : struct spdk_reactor *reactor;
728 : uint32_t i;
729 8 : int rc = 0;
730 :
731 21 : for (i = g_scheduler_core_number; i < SPDK_ENV_LCORE_ID_ANY; i = spdk_env_get_next_core(i)) {
732 15 : reactor = spdk_reactor_get(i);
733 15 : assert(reactor != NULL);
734 15 : if (reactor->in_interrupt != g_core_infos[i].interrupt_mode) {
735 : /* Switch next found reactor to new state */
736 2 : rc = spdk_reactor_set_interrupt_mode(i, g_core_infos[i].interrupt_mode,
737 : _reactors_scheduler_update_core_mode, NULL);
738 2 : if (rc == 0) {
739 : /* Set core to start with after callback completes */
740 2 : g_scheduler_core_number = spdk_env_get_next_core(i);
741 2 : return;
742 : }
743 : }
744 : }
745 6 : _reactors_scheduler_fini();
746 : }
747 :
748 : static void
749 0 : _reactors_scheduler_cancel(void *arg1, void *arg2)
750 : {
751 : struct spdk_scheduler_core_info *core;
752 : uint32_t i;
753 :
754 0 : SPDK_ENV_FOREACH_CORE(i) {
755 0 : core = &g_core_infos[i];
756 0 : core->threads_count = 0;
757 0 : free(core->thread_infos);
758 0 : core->thread_infos = NULL;
759 : }
760 :
761 0 : g_scheduling_in_progress = false;
762 0 : }
763 :
764 : static void
765 6 : _reactors_scheduler_balance(void *arg1, void *arg2)
766 : {
767 6 : struct spdk_scheduler *scheduler = spdk_scheduler_get();
768 :
769 6 : if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING || scheduler == NULL) {
770 0 : _reactors_scheduler_cancel(NULL, NULL);
771 0 : return;
772 : }
773 :
774 6 : scheduler->balance(g_core_infos, g_reactor_count);
775 :
776 6 : g_scheduler_core_number = spdk_env_get_first_core();
777 6 : _reactors_scheduler_update_core_mode(NULL);
778 : }
779 :
780 : /* Phase 1 of thread scheduling is to gather metrics on the existing threads */
781 : static void
782 15 : _reactors_scheduler_gather_metrics(void *arg1, void *arg2)
783 : {
784 : struct spdk_scheduler_core_info *core_info;
785 : struct spdk_lw_thread *lw_thread;
786 : struct spdk_thread *thread;
787 : struct spdk_reactor *reactor;
788 : uint32_t next_core;
789 15 : uint32_t i = 0;
790 :
791 15 : reactor = spdk_reactor_get(spdk_env_get_current_core());
792 15 : assert(reactor != NULL);
793 15 : core_info = &g_core_infos[reactor->lcore];
794 15 : core_info->lcore = reactor->lcore;
795 15 : core_info->current_idle_tsc = reactor->idle_tsc - core_info->total_idle_tsc;
796 15 : core_info->total_idle_tsc = reactor->idle_tsc;
797 15 : core_info->current_busy_tsc = reactor->busy_tsc - core_info->total_busy_tsc;
798 15 : core_info->total_busy_tsc = reactor->busy_tsc;
799 15 : core_info->interrupt_mode = reactor->in_interrupt;
800 15 : core_info->threads_count = 0;
801 :
802 15 : SPDK_DEBUGLOG(reactor, "Gathering metrics on %u\n", reactor->lcore);
803 :
804 15 : if (reactor->thread_count > 0) {
805 11 : core_info->thread_infos = calloc(reactor->thread_count, sizeof(*core_info->thread_infos));
806 11 : if (core_info->thread_infos == NULL) {
807 0 : SPDK_ERRLOG("Failed to allocate memory when gathering metrics on %u\n", reactor->lcore);
808 :
809 : /* Cancel this round of schedule work */
810 0 : _event_call(g_scheduling_reactor->lcore, _reactors_scheduler_cancel, NULL, NULL);
811 0 : return;
812 : }
813 :
814 26 : TAILQ_FOREACH(lw_thread, &reactor->threads, link) {
815 15 : _init_thread_stats(reactor, lw_thread);
816 :
817 15 : core_info->thread_infos[i].lcore = lw_thread->lcore;
818 15 : thread = spdk_thread_get_from_ctx(lw_thread);
819 15 : assert(thread != NULL);
820 15 : core_info->thread_infos[i].thread_id = spdk_thread_get_id(thread);
821 15 : core_info->thread_infos[i].total_stats = lw_thread->total_stats;
822 15 : core_info->thread_infos[i].current_stats = lw_thread->current_stats;
823 15 : core_info->threads_count++;
824 15 : assert(core_info->threads_count <= reactor->thread_count);
825 15 : i++;
826 : }
827 : }
828 :
829 15 : next_core = spdk_env_get_next_core(reactor->lcore);
830 15 : if (next_core == UINT32_MAX) {
831 6 : next_core = spdk_env_get_first_core();
832 : }
833 :
834 : /* If we've looped back around to the scheduler thread, move to the next phase */
835 15 : if (next_core == g_scheduling_reactor->lcore) {
836 : /* Phase 2 of scheduling is rebalancing - deciding which threads to move where */
837 6 : _event_call(next_core, _reactors_scheduler_balance, NULL, NULL);
838 6 : return;
839 : }
840 :
841 9 : _event_call(next_core, _reactors_scheduler_gather_metrics, NULL, NULL);
842 : }
843 :
844 : static int _reactor_schedule_thread(struct spdk_thread *thread);
845 : static uint64_t g_rusage_period;
846 :
847 : static void
848 19 : _reactor_remove_lw_thread(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread)
849 : {
850 19 : struct spdk_thread *thread = spdk_thread_get_from_ctx(lw_thread);
851 : struct spdk_fd_group *grp;
852 :
853 19 : TAILQ_REMOVE(&reactor->threads, lw_thread, link);
854 19 : assert(reactor->thread_count > 0);
855 19 : reactor->thread_count--;
856 :
857 : /* Operate thread intr if running with full interrupt ability */
858 19 : if (spdk_interrupt_mode_is_enabled()) {
859 0 : if (reactor->in_interrupt) {
860 0 : grp = spdk_thread_get_interrupt_fd_group(thread);
861 0 : spdk_fd_group_unnest(reactor->fgrp, grp);
862 : }
863 : }
864 19 : }
865 :
866 : static bool
867 57 : reactor_post_process_lw_thread(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread)
868 : {
869 57 : struct spdk_thread *thread = spdk_thread_get_from_ctx(lw_thread);
870 :
871 57 : if (spdk_unlikely(spdk_thread_is_exited(thread) &&
872 : spdk_thread_is_idle(thread))) {
873 11 : _reactor_remove_lw_thread(reactor, lw_thread);
874 11 : spdk_thread_destroy(thread);
875 11 : return true;
876 : }
877 :
878 46 : if (spdk_unlikely(lw_thread->resched && !spdk_thread_is_bound(thread))) {
879 8 : lw_thread->resched = false;
880 8 : _reactor_remove_lw_thread(reactor, lw_thread);
881 8 : _reactor_schedule_thread(thread);
882 8 : return true;
883 : }
884 :
885 38 : return false;
886 : }
887 :
888 : static void
889 0 : reactor_interrupt_run(struct spdk_reactor *reactor)
890 : {
891 0 : int block_timeout = -1; /* _EPOLL_WAIT_FOREVER */
892 :
893 0 : spdk_fd_group_wait(reactor->fgrp, block_timeout);
894 0 : }
895 :
896 : static void
897 43 : _reactor_run(struct spdk_reactor *reactor)
898 : {
899 : struct spdk_thread *thread;
900 : struct spdk_lw_thread *lw_thread, *tmp;
901 : uint64_t now;
902 : int rc;
903 :
904 43 : event_queue_run_batch(reactor);
905 :
906 : /* If no threads are present on the reactor,
907 : * tsc_last gets outdated. Update it to track
908 : * thread execution time correctly. */
909 43 : if (spdk_unlikely(TAILQ_EMPTY(&reactor->threads))) {
910 4 : now = spdk_get_ticks();
911 4 : reactor->idle_tsc += now - reactor->tsc_last;
912 4 : reactor->tsc_last = now;
913 4 : return;
914 : }
915 :
916 96 : TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) {
917 57 : thread = spdk_thread_get_from_ctx(lw_thread);
918 57 : rc = spdk_thread_poll(thread, 0, reactor->tsc_last);
919 :
920 57 : now = spdk_thread_get_last_tsc(thread);
921 57 : if (rc == 0) {
922 49 : reactor->idle_tsc += now - reactor->tsc_last;
923 8 : } else if (rc > 0) {
924 8 : reactor->busy_tsc += now - reactor->tsc_last;
925 : }
926 57 : reactor->tsc_last = now;
927 :
928 57 : reactor_post_process_lw_thread(reactor, lw_thread);
929 : }
930 : }
931 :
932 : static int
933 9 : reactor_run(void *arg)
934 : {
935 9 : struct spdk_reactor *reactor = arg;
936 : struct spdk_thread *thread;
937 : struct spdk_lw_thread *lw_thread, *tmp;
938 9 : char thread_name[32];
939 9 : uint64_t last_sched = 0;
940 :
941 9 : SPDK_NOTICELOG("Reactor started on core %u\n", reactor->lcore);
942 :
943 : /* Rename the POSIX thread because the reactor is tied to the POSIX
944 : * thread in the SPDK event library.
945 : */
946 9 : snprintf(thread_name, sizeof(thread_name), "reactor_%u", reactor->lcore);
947 9 : _set_thread_name(thread_name);
948 :
949 9 : reactor->tsc_last = spdk_get_ticks();
950 :
951 : while (1) {
952 : /* Execute interrupt process fn if this reactor currently runs in interrupt state */
953 9 : if (spdk_unlikely(reactor->in_interrupt)) {
954 0 : reactor_interrupt_run(reactor);
955 : } else {
956 9 : _reactor_run(reactor);
957 : }
958 :
959 9 : if (g_framework_context_switch_monitor_enabled) {
960 9 : if ((reactor->last_rusage + g_rusage_period) < reactor->tsc_last) {
961 8 : get_rusage(reactor);
962 8 : reactor->last_rusage = reactor->tsc_last;
963 : }
964 : }
965 :
966 9 : if (spdk_unlikely(g_scheduler_period > 0 &&
967 : (reactor->tsc_last - last_sched) > g_scheduler_period &&
968 : reactor == g_scheduling_reactor &&
969 : !g_scheduling_in_progress)) {
970 0 : last_sched = reactor->tsc_last;
971 0 : g_scheduling_in_progress = true;
972 0 : _reactors_scheduler_gather_metrics(NULL, NULL);
973 : }
974 :
975 9 : if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING) {
976 9 : break;
977 : }
978 : }
979 :
980 9 : TAILQ_FOREACH(lw_thread, &reactor->threads, link) {
981 0 : thread = spdk_thread_get_from_ctx(lw_thread);
982 : /* All threads should have already had spdk_thread_exit() called on them, except
983 : * for the app thread.
984 : */
985 0 : if (spdk_thread_is_running(thread)) {
986 0 : if (!spdk_thread_is_app_thread(thread)) {
987 0 : SPDK_ERRLOG("spdk_thread_exit() was not called on thread '%s'\n",
988 : spdk_thread_get_name(thread));
989 0 : SPDK_ERRLOG("This will result in a non-zero exit code in a future release.\n");
990 : }
991 0 : spdk_set_thread(thread);
992 0 : spdk_thread_exit(thread);
993 : }
994 : }
995 :
996 9 : while (!TAILQ_EMPTY(&reactor->threads)) {
997 0 : TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) {
998 0 : thread = spdk_thread_get_from_ctx(lw_thread);
999 0 : spdk_set_thread(thread);
1000 0 : if (spdk_thread_is_exited(thread)) {
1001 0 : _reactor_remove_lw_thread(reactor, lw_thread);
1002 0 : spdk_thread_destroy(thread);
1003 : } else {
1004 0 : if (spdk_unlikely(reactor->in_interrupt)) {
1005 0 : reactor_interrupt_run(reactor);
1006 : } else {
1007 0 : spdk_thread_poll(thread, 0, 0);
1008 : }
1009 : }
1010 : }
1011 : }
1012 :
1013 9 : return 0;
1014 : }
1015 :
1016 : int
1017 0 : spdk_app_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask)
1018 : {
1019 : int ret;
1020 : const struct spdk_cpuset *validmask;
1021 :
1022 0 : ret = spdk_cpuset_parse(cpumask, mask);
1023 0 : if (ret < 0) {
1024 0 : return ret;
1025 : }
1026 :
1027 0 : validmask = spdk_app_get_core_mask();
1028 0 : spdk_cpuset_and(cpumask, validmask);
1029 :
1030 0 : return 0;
1031 : }
1032 :
1033 : const struct spdk_cpuset *
1034 0 : spdk_app_get_core_mask(void)
1035 : {
1036 0 : return &g_reactor_core_mask;
1037 : }
1038 :
1039 : void
1040 0 : spdk_reactors_start(void)
1041 : {
1042 : struct spdk_reactor *reactor;
1043 : uint32_t i, current_core;
1044 : int rc;
1045 :
1046 0 : g_rusage_period = (CONTEXT_SWITCH_MONITOR_PERIOD * spdk_get_ticks_hz()) / SPDK_SEC_TO_USEC;
1047 0 : g_reactor_state = SPDK_REACTOR_STATE_RUNNING;
1048 : /* Reinitialize to false, in case the app framework is restarting in the same process. */
1049 0 : g_stopping_reactors = false;
1050 :
1051 0 : current_core = spdk_env_get_current_core();
1052 0 : SPDK_ENV_FOREACH_CORE(i) {
1053 0 : if (i != current_core) {
1054 0 : reactor = spdk_reactor_get(i);
1055 0 : if (reactor == NULL) {
1056 0 : continue;
1057 : }
1058 :
1059 0 : rc = spdk_env_thread_launch_pinned(reactor->lcore, reactor_run, reactor);
1060 0 : if (rc < 0) {
1061 0 : SPDK_ERRLOG("Unable to start reactor thread on core %u\n", reactor->lcore);
1062 0 : assert(false);
1063 : return;
1064 : }
1065 : }
1066 0 : spdk_cpuset_set_cpu(&g_reactor_core_mask, i, true);
1067 : }
1068 :
1069 : /* Start the main reactor */
1070 0 : reactor = spdk_reactor_get(current_core);
1071 0 : assert(reactor != NULL);
1072 0 : reactor_run(reactor);
1073 :
1074 0 : spdk_env_thread_wait_all();
1075 :
1076 0 : g_reactor_state = SPDK_REACTOR_STATE_SHUTDOWN;
1077 : }
1078 :
1079 : static void
1080 0 : _reactors_stop(void *arg1, void *arg2)
1081 : {
1082 : uint32_t i;
1083 : int rc;
1084 : struct spdk_reactor *reactor;
1085 : struct spdk_reactor *local_reactor;
1086 0 : uint64_t notify = 1;
1087 :
1088 0 : g_reactor_state = SPDK_REACTOR_STATE_EXITING;
1089 0 : local_reactor = spdk_reactor_get(spdk_env_get_current_core());
1090 :
1091 0 : SPDK_ENV_FOREACH_CORE(i) {
1092 : /* If spdk_event_call isn't called on a reactor, always send a notification.
1093 : * If it is called on a reactor, send a notification if the destination reactor
1094 : * is indicated in interrupt mode state.
1095 : */
1096 0 : if (local_reactor == NULL || spdk_cpuset_get_cpu(&local_reactor->notify_cpuset, i)) {
1097 0 : reactor = spdk_reactor_get(i);
1098 0 : assert(reactor != NULL);
1099 0 : rc = write(reactor->events_fd, ¬ify, sizeof(notify));
1100 0 : if (rc < 0) {
1101 0 : SPDK_ERRLOG("failed to notify event queue for reactor(%u): %s.\n", i, spdk_strerror(errno));
1102 0 : continue;
1103 : }
1104 : }
1105 : }
1106 0 : }
1107 :
1108 : static void
1109 0 : nop(void *arg1, void *arg2)
1110 : {
1111 0 : }
1112 :
1113 : void
1114 0 : spdk_reactors_stop(void *arg1)
1115 : {
1116 0 : spdk_for_each_reactor(nop, NULL, NULL, _reactors_stop);
1117 0 : }
1118 :
1119 : static pthread_mutex_t g_scheduler_mtx = PTHREAD_MUTEX_INITIALIZER;
1120 : static uint32_t g_next_core = UINT32_MAX;
1121 :
1122 : static void
1123 21 : _schedule_thread(void *arg1, void *arg2)
1124 : {
1125 21 : struct spdk_lw_thread *lw_thread = arg1;
1126 : struct spdk_thread *thread;
1127 : struct spdk_reactor *reactor;
1128 : uint32_t current_core;
1129 : struct spdk_fd_group *grp;
1130 :
1131 21 : current_core = spdk_env_get_current_core();
1132 21 : reactor = spdk_reactor_get(current_core);
1133 21 : assert(reactor != NULL);
1134 :
1135 : /* Update total_stats to reflect state of thread
1136 : * at the end of the move. */
1137 21 : thread = spdk_thread_get_from_ctx(lw_thread);
1138 21 : spdk_set_thread(thread);
1139 21 : spdk_thread_get_stats(&lw_thread->total_stats);
1140 21 : spdk_set_thread(NULL);
1141 :
1142 21 : lw_thread->lcore = current_core;
1143 :
1144 21 : TAILQ_INSERT_TAIL(&reactor->threads, lw_thread, link);
1145 21 : reactor->thread_count++;
1146 :
1147 : /* Operate thread intr if running with full interrupt ability */
1148 21 : if (spdk_interrupt_mode_is_enabled()) {
1149 : int rc;
1150 :
1151 0 : if (reactor->in_interrupt) {
1152 0 : grp = spdk_thread_get_interrupt_fd_group(thread);
1153 0 : rc = spdk_fd_group_nest(reactor->fgrp, grp);
1154 0 : if (rc < 0) {
1155 0 : SPDK_ERRLOG("Failed to schedule spdk_thread: %s.\n", spdk_strerror(-rc));
1156 : }
1157 : }
1158 :
1159 : /* Align spdk_thread with reactor to interrupt mode or poll mode */
1160 0 : spdk_thread_send_msg(thread, _reactor_set_thread_interrupt_mode, reactor);
1161 : }
1162 21 : }
1163 :
1164 : static int
1165 21 : _reactor_schedule_thread(struct spdk_thread *thread)
1166 : {
1167 : uint32_t core;
1168 : struct spdk_lw_thread *lw_thread;
1169 21 : struct spdk_event *evt = NULL;
1170 : struct spdk_cpuset *cpumask;
1171 : uint32_t i;
1172 21 : struct spdk_reactor *local_reactor = NULL;
1173 21 : uint32_t current_lcore = spdk_env_get_current_core();
1174 21 : struct spdk_cpuset polling_cpumask;
1175 21 : struct spdk_cpuset valid_cpumask;
1176 :
1177 21 : cpumask = spdk_thread_get_cpumask(thread);
1178 :
1179 21 : lw_thread = spdk_thread_get_ctx(thread);
1180 21 : assert(lw_thread != NULL);
1181 21 : core = lw_thread->lcore;
1182 21 : memset(lw_thread, 0, sizeof(*lw_thread));
1183 :
1184 21 : if (current_lcore != SPDK_ENV_LCORE_ID_ANY) {
1185 21 : local_reactor = spdk_reactor_get(current_lcore);
1186 21 : assert(local_reactor);
1187 : }
1188 :
1189 : /* When interrupt ability of spdk_thread is not enabled and the current
1190 : * reactor runs on DPDK thread, skip reactors which are in interrupt mode.
1191 : */
1192 21 : if (!spdk_interrupt_mode_is_enabled() && local_reactor != NULL) {
1193 : /* Get the cpumask of all reactors in polling */
1194 21 : spdk_cpuset_zero(&polling_cpumask);
1195 76 : SPDK_ENV_FOREACH_CORE(i) {
1196 55 : spdk_cpuset_set_cpu(&polling_cpumask, i, true);
1197 : }
1198 21 : spdk_cpuset_xor(&polling_cpumask, &local_reactor->notify_cpuset);
1199 :
1200 21 : if (core == SPDK_ENV_LCORE_ID_ANY) {
1201 : /* Get the cpumask of all valid reactors which are suggested and also in polling */
1202 14 : spdk_cpuset_copy(&valid_cpumask, &polling_cpumask);
1203 14 : spdk_cpuset_and(&valid_cpumask, spdk_thread_get_cpumask(thread));
1204 :
1205 : /* If there are any valid reactors, spdk_thread should be scheduled
1206 : * into one of the valid reactors.
1207 : * If there is no valid reactors, spdk_thread should be scheduled
1208 : * into one of the polling reactors.
1209 : */
1210 14 : if (spdk_cpuset_count(&valid_cpumask) != 0) {
1211 14 : cpumask = &valid_cpumask;
1212 : } else {
1213 0 : cpumask = &polling_cpumask;
1214 : }
1215 7 : } else if (!spdk_cpuset_get_cpu(&polling_cpumask, core)) {
1216 : /* If specified reactor is not in polling, spdk_thread should be scheduled
1217 : * into one of the polling reactors.
1218 : */
1219 0 : core = SPDK_ENV_LCORE_ID_ANY;
1220 0 : cpumask = &polling_cpumask;
1221 : }
1222 : }
1223 :
1224 21 : pthread_mutex_lock(&g_scheduler_mtx);
1225 21 : if (core == SPDK_ENV_LCORE_ID_ANY) {
1226 19 : for (i = 0; i < spdk_env_get_core_count(); i++) {
1227 19 : if (g_next_core >= g_reactor_count) {
1228 5 : g_next_core = spdk_env_get_first_core();
1229 : }
1230 19 : core = g_next_core;
1231 19 : g_next_core = spdk_env_get_next_core(g_next_core);
1232 :
1233 19 : if (spdk_cpuset_get_cpu(cpumask, core)) {
1234 14 : break;
1235 : }
1236 : }
1237 : }
1238 :
1239 21 : evt = spdk_event_allocate(core, _schedule_thread, lw_thread, NULL);
1240 :
1241 21 : pthread_mutex_unlock(&g_scheduler_mtx);
1242 :
1243 21 : assert(evt != NULL);
1244 21 : if (evt == NULL) {
1245 0 : SPDK_ERRLOG("Unable to schedule thread on requested core mask.\n");
1246 0 : return -1;
1247 : }
1248 :
1249 21 : lw_thread->tsc_start = spdk_get_ticks();
1250 :
1251 21 : spdk_event_call(evt);
1252 :
1253 21 : return 0;
1254 : }
1255 :
1256 : static void
1257 2 : _reactor_request_thread_reschedule(struct spdk_thread *thread)
1258 : {
1259 : struct spdk_lw_thread *lw_thread;
1260 : struct spdk_reactor *reactor;
1261 : uint32_t current_core;
1262 :
1263 2 : assert(thread == spdk_get_thread());
1264 :
1265 2 : lw_thread = spdk_thread_get_ctx(thread);
1266 :
1267 2 : assert(lw_thread != NULL);
1268 2 : lw_thread->resched = true;
1269 2 : lw_thread->lcore = SPDK_ENV_LCORE_ID_ANY;
1270 :
1271 2 : current_core = spdk_env_get_current_core();
1272 2 : reactor = spdk_reactor_get(current_core);
1273 2 : assert(reactor != NULL);
1274 :
1275 : /* Send a notification if the destination reactor is indicated in intr mode state */
1276 2 : if (spdk_unlikely(spdk_cpuset_get_cpu(&reactor->notify_cpuset, reactor->lcore))) {
1277 0 : uint64_t notify = 1;
1278 :
1279 0 : if (write(reactor->resched_fd, ¬ify, sizeof(notify)) < 0) {
1280 0 : SPDK_ERRLOG("failed to notify reschedule: %s.\n", spdk_strerror(errno));
1281 : }
1282 : }
1283 2 : }
1284 :
1285 : static int
1286 15 : reactor_thread_op(struct spdk_thread *thread, enum spdk_thread_op op)
1287 : {
1288 : struct spdk_lw_thread *lw_thread;
1289 :
1290 15 : switch (op) {
1291 13 : case SPDK_THREAD_OP_NEW:
1292 13 : lw_thread = spdk_thread_get_ctx(thread);
1293 13 : lw_thread->lcore = SPDK_ENV_LCORE_ID_ANY;
1294 13 : return _reactor_schedule_thread(thread);
1295 2 : case SPDK_THREAD_OP_RESCHED:
1296 2 : _reactor_request_thread_reschedule(thread);
1297 2 : return 0;
1298 0 : default:
1299 0 : return -ENOTSUP;
1300 : }
1301 : }
1302 :
1303 : static bool
1304 15 : reactor_thread_op_supported(enum spdk_thread_op op)
1305 : {
1306 15 : switch (op) {
1307 15 : case SPDK_THREAD_OP_NEW:
1308 : case SPDK_THREAD_OP_RESCHED:
1309 15 : return true;
1310 0 : default:
1311 0 : return false;
1312 : }
1313 : }
1314 :
1315 : struct call_reactor {
1316 : uint32_t cur_core;
1317 : spdk_event_fn fn;
1318 : void *arg1;
1319 : void *arg2;
1320 :
1321 : uint32_t orig_core;
1322 : spdk_event_fn cpl;
1323 : };
1324 :
1325 : static void
1326 9 : on_reactor(void *arg1, void *arg2)
1327 : {
1328 9 : struct call_reactor *cr = arg1;
1329 : struct spdk_event *evt;
1330 :
1331 9 : cr->fn(cr->arg1, cr->arg2);
1332 :
1333 9 : cr->cur_core = spdk_env_get_next_core(cr->cur_core);
1334 :
1335 9 : if (cr->cur_core >= g_reactor_count) {
1336 3 : SPDK_DEBUGLOG(reactor, "Completed reactor iteration\n");
1337 :
1338 3 : evt = spdk_event_allocate(cr->orig_core, cr->cpl, cr->arg1, cr->arg2);
1339 3 : free(cr);
1340 : } else {
1341 6 : SPDK_DEBUGLOG(reactor, "Continuing reactor iteration to %d\n",
1342 : cr->cur_core);
1343 :
1344 6 : evt = spdk_event_allocate(cr->cur_core, on_reactor, arg1, NULL);
1345 : }
1346 9 : assert(evt != NULL);
1347 9 : spdk_event_call(evt);
1348 9 : }
1349 :
1350 : void
1351 3 : spdk_for_each_reactor(spdk_event_fn fn, void *arg1, void *arg2, spdk_event_fn cpl)
1352 : {
1353 : struct call_reactor *cr;
1354 :
1355 : /* When the application framework is shutting down, we will send one
1356 : * final for_each_reactor operation with completion callback _reactors_stop,
1357 : * to flush any existing for_each_reactor operations to avoid any memory
1358 : * leaks. We use a mutex here to protect a boolean flag that will ensure
1359 : * we don't start any more operations once we've started shutting down.
1360 : */
1361 3 : pthread_mutex_lock(&g_stopping_reactors_mtx);
1362 3 : if (g_stopping_reactors) {
1363 0 : pthread_mutex_unlock(&g_stopping_reactors_mtx);
1364 0 : return;
1365 3 : } else if (cpl == _reactors_stop) {
1366 0 : g_stopping_reactors = true;
1367 : }
1368 3 : pthread_mutex_unlock(&g_stopping_reactors_mtx);
1369 :
1370 3 : cr = calloc(1, sizeof(*cr));
1371 3 : if (!cr) {
1372 0 : SPDK_ERRLOG("Unable to perform reactor iteration\n");
1373 0 : cpl(arg1, arg2);
1374 0 : return;
1375 : }
1376 :
1377 3 : cr->fn = fn;
1378 3 : cr->arg1 = arg1;
1379 3 : cr->arg2 = arg2;
1380 3 : cr->cpl = cpl;
1381 3 : cr->orig_core = spdk_env_get_current_core();
1382 3 : cr->cur_core = spdk_env_get_first_core();
1383 :
1384 3 : SPDK_DEBUGLOG(reactor, "Starting reactor iteration from %d\n", cr->orig_core);
1385 :
1386 3 : _event_call(cr->cur_core, on_reactor, cr, NULL);
1387 : }
1388 :
1389 : #ifdef __linux__
1390 : static int
1391 0 : reactor_schedule_thread_event(void *arg)
1392 : {
1393 0 : struct spdk_reactor *reactor = arg;
1394 : struct spdk_lw_thread *lw_thread, *tmp;
1395 0 : uint32_t count = 0;
1396 0 : uint64_t notify = 1;
1397 :
1398 0 : assert(reactor->in_interrupt);
1399 :
1400 0 : if (read(reactor->resched_fd, ¬ify, sizeof(notify)) < 0) {
1401 0 : SPDK_ERRLOG("failed to acknowledge reschedule: %s.\n", spdk_strerror(errno));
1402 0 : return -errno;
1403 : }
1404 :
1405 0 : TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) {
1406 0 : count += reactor_post_process_lw_thread(reactor, lw_thread) ? 1 : 0;
1407 : }
1408 :
1409 0 : return count;
1410 : }
1411 :
1412 : static int
1413 27 : reactor_interrupt_init(struct spdk_reactor *reactor)
1414 : {
1415 : int rc;
1416 :
1417 27 : rc = spdk_fd_group_create(&reactor->fgrp);
1418 27 : if (rc != 0) {
1419 0 : return rc;
1420 : }
1421 :
1422 27 : reactor->resched_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
1423 27 : if (reactor->resched_fd < 0) {
1424 0 : rc = -EBADF;
1425 0 : goto err;
1426 : }
1427 :
1428 27 : rc = SPDK_FD_GROUP_ADD(reactor->fgrp, reactor->resched_fd, reactor_schedule_thread_event,
1429 : reactor);
1430 27 : if (rc) {
1431 0 : close(reactor->resched_fd);
1432 0 : goto err;
1433 : }
1434 :
1435 27 : reactor->events_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
1436 27 : if (reactor->events_fd < 0) {
1437 0 : spdk_fd_group_remove(reactor->fgrp, reactor->resched_fd);
1438 0 : close(reactor->resched_fd);
1439 :
1440 0 : rc = -EBADF;
1441 0 : goto err;
1442 : }
1443 :
1444 27 : rc = SPDK_FD_GROUP_ADD(reactor->fgrp, reactor->events_fd,
1445 : event_queue_run_batch, reactor);
1446 27 : if (rc) {
1447 0 : spdk_fd_group_remove(reactor->fgrp, reactor->resched_fd);
1448 0 : close(reactor->resched_fd);
1449 0 : close(reactor->events_fd);
1450 0 : goto err;
1451 : }
1452 :
1453 27 : return 0;
1454 :
1455 0 : err:
1456 0 : spdk_fd_group_destroy(reactor->fgrp);
1457 0 : reactor->fgrp = NULL;
1458 0 : return rc;
1459 : }
1460 : #else
1461 : static int
1462 : reactor_interrupt_init(struct spdk_reactor *reactor)
1463 : {
1464 : return -ENOTSUP;
1465 : }
1466 : #endif
1467 :
1468 : static void
1469 27 : reactor_interrupt_fini(struct spdk_reactor *reactor)
1470 : {
1471 27 : struct spdk_fd_group *fgrp = reactor->fgrp;
1472 :
1473 27 : if (!fgrp) {
1474 0 : return;
1475 : }
1476 :
1477 27 : spdk_fd_group_remove(fgrp, reactor->events_fd);
1478 27 : spdk_fd_group_remove(fgrp, reactor->resched_fd);
1479 :
1480 27 : close(reactor->events_fd);
1481 27 : close(reactor->resched_fd);
1482 :
1483 27 : spdk_fd_group_destroy(fgrp);
1484 27 : reactor->fgrp = NULL;
1485 : }
1486 :
1487 : static struct spdk_governor *
1488 3 : _governor_find(const char *name)
1489 : {
1490 : struct spdk_governor *governor, *tmp;
1491 :
1492 3 : TAILQ_FOREACH_SAFE(governor, &g_governor_list, link, tmp) {
1493 1 : if (strcmp(name, governor->name) == 0) {
1494 1 : return governor;
1495 : }
1496 : }
1497 :
1498 2 : return NULL;
1499 : }
1500 :
1501 : int
1502 2 : spdk_governor_set(const char *name)
1503 : {
1504 : struct spdk_governor *governor;
1505 2 : int rc = 0;
1506 :
1507 : /* NULL governor was specifically requested */
1508 2 : if (name == NULL) {
1509 0 : if (g_governor) {
1510 0 : g_governor->deinit();
1511 : }
1512 0 : g_governor = NULL;
1513 0 : return 0;
1514 : }
1515 :
1516 2 : governor = _governor_find(name);
1517 2 : if (governor == NULL) {
1518 1 : return -EINVAL;
1519 : }
1520 :
1521 1 : if (g_governor == governor) {
1522 0 : return 0;
1523 : }
1524 :
1525 1 : rc = governor->init();
1526 1 : if (rc == 0) {
1527 1 : if (g_governor) {
1528 0 : g_governor->deinit();
1529 : }
1530 1 : g_governor = governor;
1531 : }
1532 :
1533 1 : return rc;
1534 : }
1535 :
1536 : struct spdk_governor *
1537 10 : spdk_governor_get(void)
1538 : {
1539 10 : return g_governor;
1540 : }
1541 :
1542 : void
1543 1 : spdk_governor_register(struct spdk_governor *governor)
1544 : {
1545 1 : if (_governor_find(governor->name)) {
1546 0 : SPDK_ERRLOG("governor named '%s' already registered.\n", governor->name);
1547 0 : assert(false);
1548 : return;
1549 : }
1550 :
1551 1 : TAILQ_INSERT_TAIL(&g_governor_list, governor, link);
1552 : }
1553 :
1554 1 : SPDK_LOG_REGISTER_COMPONENT(reactor)
|