Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2016 Intel Corporation.
3 : * All rights reserved.
4 : */
5 :
6 : #include "spdk/stdinc.h"
7 : #include "spdk/likely.h"
8 :
9 : #include "spdk_internal/event.h"
10 : #include "spdk_internal/usdt.h"
11 :
12 : #include "spdk/log.h"
13 : #include "spdk/thread.h"
14 : #include "spdk/env.h"
15 : #include "spdk/util.h"
16 : #include "spdk/scheduler.h"
17 : #include "spdk/string.h"
18 : #include "spdk/fd_group.h"
19 :
20 : #ifdef __linux__
21 : #include <sys/prctl.h>
22 : #include <sys/eventfd.h>
23 : #endif
24 :
25 : #ifdef __FreeBSD__
26 : #include <pthread_np.h>
27 : #endif
28 :
29 : #define SPDK_EVENT_BATCH_SIZE 8
30 :
31 : static struct spdk_reactor *g_reactors;
32 : static uint32_t g_reactor_count;
33 : static struct spdk_cpuset g_reactor_core_mask;
34 : static enum spdk_reactor_state g_reactor_state = SPDK_REACTOR_STATE_UNINITIALIZED;
35 :
36 : static bool g_framework_context_switch_monitor_enabled = true;
37 :
38 : static struct spdk_mempool *g_spdk_event_mempool = NULL;
39 :
40 : TAILQ_HEAD(, spdk_scheduler) g_scheduler_list
41 : = TAILQ_HEAD_INITIALIZER(g_scheduler_list);
42 :
43 : static struct spdk_scheduler *g_scheduler = NULL;
44 : static struct spdk_reactor *g_scheduling_reactor;
45 : bool g_scheduling_in_progress = false;
46 : static uint64_t g_scheduler_period = 0;
47 : static uint32_t g_scheduler_core_number;
48 : static struct spdk_scheduler_core_info *g_core_infos = NULL;
49 :
50 : TAILQ_HEAD(, spdk_governor) g_governor_list
51 : = TAILQ_HEAD_INITIALIZER(g_governor_list);
52 :
53 : static struct spdk_governor *g_governor = NULL;
54 :
55 : static int reactor_interrupt_init(struct spdk_reactor *reactor);
56 : static void reactor_interrupt_fini(struct spdk_reactor *reactor);
57 :
58 : static pthread_mutex_t g_stopping_reactors_mtx = PTHREAD_MUTEX_INITIALIZER;
59 : static bool g_stopping_reactors = false;
60 :
61 : static struct spdk_scheduler *
62 5 : _scheduler_find(const char *name)
63 : {
64 : struct spdk_scheduler *tmp;
65 :
66 9 : TAILQ_FOREACH(tmp, &g_scheduler_list, link) {
67 7 : if (strcmp(name, tmp->name) == 0) {
68 3 : return tmp;
69 : }
70 : }
71 :
72 2 : return NULL;
73 : }
74 :
75 : int
76 3 : spdk_scheduler_set(const char *name)
77 : {
78 : struct spdk_scheduler *scheduler;
79 3 : int rc = 0;
80 :
81 : /* NULL scheduler was specifically requested */
82 3 : if (name == NULL) {
83 0 : if (g_scheduler) {
84 0 : g_scheduler->deinit();
85 : }
86 0 : g_scheduler = NULL;
87 0 : return 0;
88 : }
89 :
90 3 : scheduler = _scheduler_find(name);
91 3 : if (scheduler == NULL) {
92 0 : SPDK_ERRLOG("Requested scheduler is missing\n");
93 0 : return -EINVAL;
94 : }
95 :
96 3 : if (g_scheduler == scheduler) {
97 2 : return 0;
98 : }
99 :
100 1 : rc = scheduler->init();
101 1 : if (rc == 0) {
102 1 : if (g_scheduler) {
103 0 : g_scheduler->deinit();
104 : }
105 1 : g_scheduler = scheduler;
106 : }
107 :
108 1 : return rc;
109 : }
110 :
111 : struct spdk_scheduler *
112 6 : spdk_scheduler_get(void)
113 : {
114 6 : return g_scheduler;
115 : }
116 :
117 : uint64_t
118 0 : spdk_scheduler_get_period(void)
119 : {
120 : /* Convert from ticks to microseconds */
121 0 : return (g_scheduler_period * SPDK_SEC_TO_USEC / spdk_get_ticks_hz());
122 : }
123 :
124 : void
125 0 : spdk_scheduler_set_period(uint64_t period)
126 : {
127 : /* Convert microseconds to ticks */
128 0 : g_scheduler_period = period * spdk_get_ticks_hz() / SPDK_SEC_TO_USEC;
129 0 : }
130 :
131 : void
132 2 : spdk_scheduler_register(struct spdk_scheduler *scheduler)
133 : {
134 2 : if (_scheduler_find(scheduler->name)) {
135 0 : SPDK_ERRLOG("scheduler named '%s' already registered.\n", scheduler->name);
136 0 : assert(false);
137 : return;
138 : }
139 :
140 2 : TAILQ_INSERT_TAIL(&g_scheduler_list, scheduler, link);
141 : }
142 :
143 : uint32_t
144 0 : spdk_scheduler_get_scheduling_lcore(void)
145 : {
146 0 : return g_scheduling_reactor->lcore;
147 : }
148 :
149 : static void
150 27 : reactor_construct(struct spdk_reactor *reactor, uint32_t lcore)
151 : {
152 27 : reactor->lcore = lcore;
153 27 : reactor->flags.is_valid = true;
154 :
155 27 : TAILQ_INIT(&reactor->threads);
156 27 : reactor->thread_count = 0;
157 27 : spdk_cpuset_zero(&reactor->notify_cpuset);
158 :
159 27 : reactor->events = spdk_ring_create(SPDK_RING_TYPE_MP_SC, 65536, SPDK_ENV_SOCKET_ID_ANY);
160 27 : if (reactor->events == NULL) {
161 0 : SPDK_ERRLOG("Failed to allocate events ring\n");
162 0 : assert(false);
163 : }
164 :
165 : /* Always initialize interrupt facilities for reactor */
166 27 : if (reactor_interrupt_init(reactor) != 0) {
167 : /* Reactor interrupt facilities are necessary if seting app to interrupt mode. */
168 0 : if (spdk_interrupt_mode_is_enabled()) {
169 0 : SPDK_ERRLOG("Failed to prepare intr facilities\n");
170 0 : assert(false);
171 : }
172 0 : return;
173 : }
174 :
175 : /* If application runs with full interrupt ability,
176 : * all reactors are going to run in interrupt mode.
177 : */
178 27 : if (spdk_interrupt_mode_is_enabled()) {
179 : uint32_t i;
180 :
181 0 : SPDK_ENV_FOREACH_CORE(i) {
182 0 : spdk_cpuset_set_cpu(&reactor->notify_cpuset, i, true);
183 : }
184 0 : reactor->in_interrupt = true;
185 : }
186 : }
187 :
188 : struct spdk_reactor *
189 405 : spdk_reactor_get(uint32_t lcore)
190 : {
191 : struct spdk_reactor *reactor;
192 :
193 405 : if (g_reactors == NULL) {
194 0 : SPDK_WARNLOG("Called spdk_reactor_get() while the g_reactors array was NULL!\n");
195 0 : return NULL;
196 : }
197 :
198 405 : if (lcore >= g_reactor_count) {
199 0 : return NULL;
200 : }
201 :
202 405 : reactor = &g_reactors[lcore];
203 :
204 405 : if (reactor->flags.is_valid == false) {
205 0 : return NULL;
206 : }
207 :
208 405 : return reactor;
209 : }
210 :
211 : static int reactor_thread_op(struct spdk_thread *thread, enum spdk_thread_op op);
212 : static bool reactor_thread_op_supported(enum spdk_thread_op op);
213 :
214 : int
215 9 : spdk_reactors_init(size_t msg_mempool_size)
216 : {
217 : struct spdk_reactor *reactor;
218 : int rc;
219 : uint32_t i, current_core;
220 9 : char mempool_name[32];
221 :
222 9 : snprintf(mempool_name, sizeof(mempool_name), "evtpool_%d", getpid());
223 9 : g_spdk_event_mempool = spdk_mempool_create(mempool_name,
224 : 262144 - 1, /* Power of 2 minus 1 is optimal for memory consumption */
225 : sizeof(struct spdk_event),
226 : SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
227 : SPDK_ENV_SOCKET_ID_ANY);
228 :
229 9 : if (g_spdk_event_mempool == NULL) {
230 0 : SPDK_ERRLOG("spdk_event_mempool creation failed\n");
231 0 : return -1;
232 : }
233 :
234 : /* struct spdk_reactor must be aligned on 64 byte boundary */
235 9 : g_reactor_count = spdk_env_get_last_core() + 1;
236 9 : rc = posix_memalign((void **)&g_reactors, 64,
237 : g_reactor_count * sizeof(struct spdk_reactor));
238 9 : if (rc != 0) {
239 0 : SPDK_ERRLOG("Could not allocate array size=%u for g_reactors\n",
240 : g_reactor_count);
241 0 : spdk_mempool_free(g_spdk_event_mempool);
242 0 : return -1;
243 : }
244 :
245 9 : g_core_infos = calloc(g_reactor_count, sizeof(*g_core_infos));
246 9 : if (g_core_infos == NULL) {
247 0 : SPDK_ERRLOG("Could not allocate memory for g_core_infos\n");
248 0 : spdk_mempool_free(g_spdk_event_mempool);
249 0 : free(g_reactors);
250 0 : return -ENOMEM;
251 : }
252 :
253 9 : memset(g_reactors, 0, (g_reactor_count) * sizeof(struct spdk_reactor));
254 :
255 9 : rc = spdk_thread_lib_init_ext(reactor_thread_op, reactor_thread_op_supported,
256 : sizeof(struct spdk_lw_thread), msg_mempool_size);
257 9 : if (rc != 0) {
258 0 : SPDK_ERRLOG("Initialize spdk thread lib failed\n");
259 0 : spdk_mempool_free(g_spdk_event_mempool);
260 0 : free(g_reactors);
261 0 : free(g_core_infos);
262 0 : return rc;
263 : }
264 :
265 35 : SPDK_ENV_FOREACH_CORE(i) {
266 26 : reactor_construct(&g_reactors[i], i);
267 : }
268 :
269 9 : current_core = spdk_env_get_current_core();
270 9 : reactor = spdk_reactor_get(current_core);
271 9 : assert(reactor != NULL);
272 9 : g_scheduling_reactor = reactor;
273 :
274 9 : g_reactor_state = SPDK_REACTOR_STATE_INITIALIZED;
275 :
276 9 : return 0;
277 : }
278 :
279 : void
280 9 : spdk_reactors_fini(void)
281 : {
282 : uint32_t i;
283 : struct spdk_reactor *reactor;
284 :
285 9 : if (g_reactor_state == SPDK_REACTOR_STATE_UNINITIALIZED) {
286 0 : return;
287 : }
288 :
289 9 : spdk_thread_lib_fini();
290 :
291 35 : SPDK_ENV_FOREACH_CORE(i) {
292 26 : reactor = spdk_reactor_get(i);
293 26 : assert(reactor != NULL);
294 26 : assert(reactor->thread_count == 0);
295 26 : if (reactor->events != NULL) {
296 26 : spdk_ring_free(reactor->events);
297 : }
298 :
299 26 : reactor_interrupt_fini(reactor);
300 :
301 26 : if (g_core_infos != NULL) {
302 26 : free(g_core_infos[i].thread_infos);
303 : }
304 : }
305 :
306 9 : spdk_mempool_free(g_spdk_event_mempool);
307 :
308 9 : free(g_reactors);
309 9 : g_reactors = NULL;
310 9 : free(g_core_infos);
311 9 : g_core_infos = NULL;
312 : }
313 :
314 : static void _reactor_set_interrupt_mode(void *arg1, void *arg2);
315 :
316 : static void
317 4 : _reactor_set_notify_cpuset(void *arg1, void *arg2)
318 : {
319 4 : struct spdk_reactor *target = arg1;
320 4 : struct spdk_reactor *reactor = spdk_reactor_get(spdk_env_get_current_core());
321 :
322 4 : assert(reactor != NULL);
323 4 : spdk_cpuset_set_cpu(&reactor->notify_cpuset, target->lcore, target->new_in_interrupt);
324 4 : }
325 :
326 : static void
327 20 : _event_call(uint32_t lcore, spdk_event_fn fn, void *arg1, void *arg2)
328 : {
329 : struct spdk_event *ev;
330 :
331 20 : ev = spdk_event_allocate(lcore, fn, arg1, arg2);
332 20 : assert(ev);
333 20 : spdk_event_call(ev);
334 20 : }
335 :
336 : static void
337 2 : _reactor_set_notify_cpuset_cpl(void *arg1, void *arg2)
338 : {
339 2 : struct spdk_reactor *target = arg1;
340 :
341 2 : if (target->new_in_interrupt == false) {
342 1 : target->set_interrupt_mode_in_progress = false;
343 1 : spdk_thread_send_msg(spdk_thread_get_app_thread(), target->set_interrupt_mode_cb_fn,
344 : target->set_interrupt_mode_cb_arg);
345 : } else {
346 1 : _event_call(target->lcore, _reactor_set_interrupt_mode, target, NULL);
347 : }
348 2 : }
349 :
350 : static void
351 0 : _reactor_set_thread_interrupt_mode(void *ctx)
352 : {
353 0 : struct spdk_reactor *reactor = ctx;
354 :
355 0 : spdk_thread_set_interrupt_mode(reactor->in_interrupt);
356 0 : }
357 :
358 : static void
359 2 : _reactor_set_interrupt_mode(void *arg1, void *arg2)
360 : {
361 2 : struct spdk_reactor *target = arg1;
362 : struct spdk_thread *thread;
363 : struct spdk_fd_group *grp;
364 : struct spdk_lw_thread *lw_thread, *tmp;
365 :
366 2 : assert(target == spdk_reactor_get(spdk_env_get_current_core()));
367 2 : assert(target != NULL);
368 2 : assert(target->in_interrupt != target->new_in_interrupt);
369 2 : SPDK_DEBUGLOG(reactor, "Do reactor set on core %u from %s to state %s\n",
370 : target->lcore, target->in_interrupt ? "intr" : "poll", target->new_in_interrupt ? "intr" : "poll");
371 :
372 2 : target->in_interrupt = target->new_in_interrupt;
373 :
374 2 : if (spdk_interrupt_mode_is_enabled()) {
375 : /* Align spdk_thread with reactor to interrupt mode or poll mode */
376 0 : TAILQ_FOREACH_SAFE(lw_thread, &target->threads, link, tmp) {
377 0 : thread = spdk_thread_get_from_ctx(lw_thread);
378 0 : if (target->in_interrupt) {
379 0 : grp = spdk_thread_get_interrupt_fd_group(thread);
380 0 : spdk_fd_group_nest(target->fgrp, grp);
381 : } else {
382 0 : grp = spdk_thread_get_interrupt_fd_group(thread);
383 0 : spdk_fd_group_unnest(target->fgrp, grp);
384 : }
385 :
386 0 : spdk_thread_send_msg(thread, _reactor_set_thread_interrupt_mode, target);
387 : }
388 : }
389 :
390 2 : if (target->new_in_interrupt == false) {
391 : /* Reactor is no longer in interrupt mode. Refresh the tsc_last to accurately
392 : * track reactor stats. */
393 1 : target->tsc_last = spdk_get_ticks();
394 1 : spdk_for_each_reactor(_reactor_set_notify_cpuset, target, NULL, _reactor_set_notify_cpuset_cpl);
395 : } else {
396 1 : uint64_t notify = 1;
397 1 : int rc = 0;
398 :
399 : /* Always trigger spdk_event and resched event in case of race condition */
400 1 : rc = write(target->events_fd, ¬ify, sizeof(notify));
401 1 : if (rc < 0) {
402 0 : SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno));
403 : }
404 1 : rc = write(target->resched_fd, ¬ify, sizeof(notify));
405 1 : if (rc < 0) {
406 0 : SPDK_ERRLOG("failed to notify reschedule: %s.\n", spdk_strerror(errno));
407 : }
408 :
409 1 : target->set_interrupt_mode_in_progress = false;
410 1 : spdk_thread_send_msg(spdk_thread_get_app_thread(), target->set_interrupt_mode_cb_fn,
411 : target->set_interrupt_mode_cb_arg);
412 : }
413 2 : }
414 :
415 : int
416 2 : spdk_reactor_set_interrupt_mode(uint32_t lcore, bool new_in_interrupt,
417 : spdk_reactor_set_interrupt_mode_cb cb_fn, void *cb_arg)
418 : {
419 : struct spdk_reactor *target;
420 :
421 2 : target = spdk_reactor_get(lcore);
422 2 : if (target == NULL) {
423 0 : return -EINVAL;
424 : }
425 :
426 : /* Eventfd has to be supported in order to use interrupt functionality. */
427 2 : if (target->fgrp == NULL) {
428 0 : return -ENOTSUP;
429 : }
430 :
431 2 : if (!spdk_thread_is_app_thread(NULL)) {
432 0 : SPDK_ERRLOG("It is only permitted within spdk application thread.\n");
433 0 : return -EPERM;
434 : }
435 :
436 2 : if (target->in_interrupt == new_in_interrupt) {
437 0 : cb_fn(cb_arg);
438 0 : return 0;
439 : }
440 :
441 2 : if (target->set_interrupt_mode_in_progress) {
442 0 : SPDK_NOTICELOG("Reactor(%u) is already in progress to set interrupt mode\n", lcore);
443 0 : return -EBUSY;
444 : }
445 2 : target->set_interrupt_mode_in_progress = true;
446 :
447 2 : target->new_in_interrupt = new_in_interrupt;
448 2 : target->set_interrupt_mode_cb_fn = cb_fn;
449 2 : target->set_interrupt_mode_cb_arg = cb_arg;
450 :
451 2 : SPDK_DEBUGLOG(reactor, "Starting reactor event from %d to %d\n",
452 : spdk_env_get_current_core(), lcore);
453 :
454 2 : if (new_in_interrupt == false) {
455 : /* For potential race cases, when setting the reactor to poll mode,
456 : * first change the mode of the reactor and then clear the corresponding
457 : * bit of the notify_cpuset of each reactor.
458 : */
459 1 : _event_call(lcore, _reactor_set_interrupt_mode, target, NULL);
460 : } else {
461 : /* For race cases, when setting the reactor to interrupt mode, first set the
462 : * corresponding bit of the notify_cpuset of each reactor and then change the mode.
463 : */
464 1 : spdk_for_each_reactor(_reactor_set_notify_cpuset, target, NULL, _reactor_set_notify_cpuset_cpl);
465 : }
466 :
467 2 : return 0;
468 : }
469 :
470 : struct spdk_event *
471 51 : spdk_event_allocate(uint32_t lcore, spdk_event_fn fn, void *arg1, void *arg2)
472 : {
473 51 : struct spdk_event *event = NULL;
474 51 : struct spdk_reactor *reactor = spdk_reactor_get(lcore);
475 :
476 51 : if (!reactor) {
477 0 : assert(false);
478 : return NULL;
479 : }
480 :
481 51 : event = spdk_mempool_get(g_spdk_event_mempool);
482 51 : if (event == NULL) {
483 0 : assert(false);
484 : return NULL;
485 : }
486 :
487 51 : event->lcore = lcore;
488 51 : event->fn = fn;
489 51 : event->arg1 = arg1;
490 51 : event->arg2 = arg2;
491 :
492 51 : return event;
493 : }
494 :
495 : void
496 51 : spdk_event_call(struct spdk_event *event)
497 : {
498 : int rc;
499 : struct spdk_reactor *reactor;
500 51 : struct spdk_reactor *local_reactor = NULL;
501 51 : uint32_t current_core = spdk_env_get_current_core();
502 :
503 51 : reactor = spdk_reactor_get(event->lcore);
504 :
505 51 : assert(reactor != NULL);
506 51 : assert(reactor->events != NULL);
507 :
508 51 : rc = spdk_ring_enqueue(reactor->events, (void **)&event, 1, NULL);
509 51 : if (rc != 1) {
510 0 : assert(false);
511 : }
512 :
513 51 : if (current_core != SPDK_ENV_LCORE_ID_ANY) {
514 51 : local_reactor = spdk_reactor_get(current_core);
515 : }
516 :
517 : /* If spdk_event_call isn't called on a reactor, always send a notification.
518 : * If it is called on a reactor, send a notification if the destination reactor
519 : * is indicated in interrupt mode state.
520 : */
521 51 : if (spdk_unlikely(local_reactor == NULL) ||
522 51 : spdk_unlikely(spdk_cpuset_get_cpu(&local_reactor->notify_cpuset, event->lcore))) {
523 4 : uint64_t notify = 1;
524 :
525 4 : rc = write(reactor->events_fd, ¬ify, sizeof(notify));
526 4 : if (rc < 0) {
527 0 : SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno));
528 : }
529 : }
530 51 : }
531 :
532 : static inline int
533 118 : event_queue_run_batch(void *arg)
534 : {
535 118 : struct spdk_reactor *reactor = arg;
536 : size_t count, i;
537 118 : void *events[SPDK_EVENT_BATCH_SIZE];
538 : struct spdk_thread *thread;
539 : struct spdk_lw_thread *lw_thread;
540 :
541 : #ifdef DEBUG
542 : /*
543 : * spdk_ring_dequeue() fills events and returns how many entries it wrote,
544 : * so we will never actually read uninitialized data from events, but just to be sure
545 : * (and to silence a static analyzer false positive), initialize the array to NULL pointers.
546 : */
547 118 : memset(events, 0, sizeof(events));
548 : #endif
549 :
550 : /* Operate event notification if this reactor currently runs in interrupt state */
551 118 : if (spdk_unlikely(reactor->in_interrupt)) {
552 3 : uint64_t notify = 1;
553 : int rc;
554 :
555 : /* There may be race between event_acknowledge and another producer's event_notify,
556 : * so event_acknowledge should be applied ahead. And then check for self's event_notify.
557 : * This can avoid event notification missing.
558 : */
559 3 : rc = read(reactor->events_fd, ¬ify, sizeof(notify));
560 3 : if (rc < 0) {
561 0 : SPDK_ERRLOG("failed to acknowledge event queue: %s.\n", spdk_strerror(errno));
562 0 : return -errno;
563 : }
564 :
565 3 : count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE);
566 :
567 3 : if (spdk_ring_count(reactor->events) != 0) {
568 : /* Trigger new notification if there are still events in event-queue waiting for processing. */
569 0 : rc = write(reactor->events_fd, ¬ify, sizeof(notify));
570 0 : if (rc < 0) {
571 0 : SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno));
572 0 : return -errno;
573 : }
574 : }
575 : } else {
576 115 : count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE);
577 : }
578 :
579 118 : if (count == 0) {
580 69 : return 0;
581 : }
582 :
583 : /* Execute the events. There are still some remaining events
584 : * that must occur on an SPDK thread. To accommodate those, try to
585 : * run them on the first thread in the list, if it exists. */
586 49 : lw_thread = TAILQ_FIRST(&reactor->threads);
587 49 : if (lw_thread) {
588 17 : thread = spdk_thread_get_from_ctx(lw_thread);
589 : } else {
590 32 : thread = NULL;
591 : }
592 :
593 100 : for (i = 0; i < count; i++) {
594 51 : struct spdk_event *event = events[i];
595 :
596 51 : assert(event != NULL);
597 51 : spdk_set_thread(thread);
598 :
599 : SPDK_DTRACE_PROBE3(event_exec, event->fn,
600 : event->arg1, event->arg2);
601 51 : event->fn(event->arg1, event->arg2);
602 51 : spdk_set_thread(NULL);
603 : }
604 :
605 49 : spdk_mempool_put_bulk(g_spdk_event_mempool, events, count);
606 :
607 49 : return (int)count;
608 : }
609 :
610 : /* 1s */
611 : #define CONTEXT_SWITCH_MONITOR_PERIOD 1000000
612 :
613 : static int
614 8 : get_rusage(struct spdk_reactor *reactor)
615 : {
616 8 : struct rusage rusage;
617 :
618 8 : if (getrusage(RUSAGE_THREAD, &rusage) != 0) {
619 0 : return -1;
620 : }
621 :
622 8 : if (rusage.ru_nvcsw != reactor->rusage.ru_nvcsw || rusage.ru_nivcsw != reactor->rusage.ru_nivcsw) {
623 8 : SPDK_INFOLOG(reactor,
624 : "Reactor %d: %ld voluntary context switches and %ld involuntary context switches in the last second.\n",
625 : reactor->lcore, rusage.ru_nvcsw - reactor->rusage.ru_nvcsw,
626 : rusage.ru_nivcsw - reactor->rusage.ru_nivcsw);
627 : }
628 8 : reactor->rusage = rusage;
629 :
630 8 : return -1;
631 : }
632 :
633 : void
634 0 : spdk_framework_enable_context_switch_monitor(bool enable)
635 : {
636 : /* This global is being read by multiple threads, so this isn't
637 : * strictly thread safe. However, we're toggling between true and
638 : * false here, and if a thread sees the value update later than it
639 : * should, it's no big deal. */
640 0 : g_framework_context_switch_monitor_enabled = enable;
641 0 : }
642 :
643 : bool
644 0 : spdk_framework_context_switch_monitor_enabled(void)
645 : {
646 0 : return g_framework_context_switch_monitor_enabled;
647 : }
648 :
649 : static void
650 9 : _set_thread_name(const char *thread_name)
651 : {
652 : #if defined(__linux__)
653 9 : prctl(PR_SET_NAME, thread_name, 0, 0, 0);
654 : #elif defined(__FreeBSD__)
655 : pthread_set_name_np(pthread_self(), thread_name);
656 : #else
657 : pthread_setname_np(pthread_self(), thread_name);
658 : #endif
659 9 : }
660 :
661 : static void
662 15 : _init_thread_stats(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread)
663 : {
664 15 : struct spdk_thread *thread = spdk_thread_get_from_ctx(lw_thread);
665 : struct spdk_thread_stats prev_total_stats;
666 :
667 : /* Read total_stats before updating it to calculate stats during the last scheduling period. */
668 15 : prev_total_stats = lw_thread->total_stats;
669 :
670 15 : spdk_set_thread(thread);
671 15 : spdk_thread_get_stats(&lw_thread->total_stats);
672 15 : spdk_set_thread(NULL);
673 :
674 15 : lw_thread->current_stats.busy_tsc = lw_thread->total_stats.busy_tsc - prev_total_stats.busy_tsc;
675 15 : lw_thread->current_stats.idle_tsc = lw_thread->total_stats.idle_tsc - prev_total_stats.idle_tsc;
676 15 : }
677 :
678 : static void
679 8 : _threads_reschedule_thread(struct spdk_scheduler_thread_info *thread_info)
680 : {
681 : struct spdk_lw_thread *lw_thread;
682 : struct spdk_thread *thread;
683 :
684 8 : thread = spdk_thread_get_by_id(thread_info->thread_id);
685 8 : if (thread == NULL) {
686 : /* Thread no longer exists. */
687 0 : return;
688 : }
689 8 : lw_thread = spdk_thread_get_ctx(thread);
690 8 : assert(lw_thread != NULL);
691 :
692 8 : lw_thread->lcore = thread_info->lcore;
693 8 : lw_thread->resched = true;
694 : }
695 :
696 : static void
697 6 : _threads_reschedule(struct spdk_scheduler_core_info *cores_info)
698 : {
699 : struct spdk_scheduler_core_info *core;
700 : struct spdk_scheduler_thread_info *thread_info;
701 : uint32_t i, j;
702 :
703 21 : SPDK_ENV_FOREACH_CORE(i) {
704 15 : core = &cores_info[i];
705 30 : for (j = 0; j < core->threads_count; j++) {
706 15 : thread_info = &core->thread_infos[j];
707 15 : if (thread_info->lcore != i) {
708 8 : _threads_reschedule_thread(thread_info);
709 : }
710 : }
711 15 : core->threads_count = 0;
712 15 : free(core->thread_infos);
713 15 : core->thread_infos = NULL;
714 : }
715 6 : }
716 :
717 : static void
718 6 : _reactors_scheduler_fini(void)
719 : {
720 : /* Reschedule based on the balancing output */
721 6 : _threads_reschedule(g_core_infos);
722 :
723 6 : g_scheduling_in_progress = false;
724 6 : }
725 :
726 : static void
727 8 : _reactors_scheduler_update_core_mode(void *ctx)
728 : {
729 : struct spdk_reactor *reactor;
730 : uint32_t i;
731 8 : int rc = 0;
732 :
733 21 : for (i = g_scheduler_core_number; i < SPDK_ENV_LCORE_ID_ANY; i = spdk_env_get_next_core(i)) {
734 15 : reactor = spdk_reactor_get(i);
735 15 : assert(reactor != NULL);
736 15 : if (reactor->in_interrupt != g_core_infos[i].interrupt_mode) {
737 : /* Switch next found reactor to new state */
738 2 : rc = spdk_reactor_set_interrupt_mode(i, g_core_infos[i].interrupt_mode,
739 : _reactors_scheduler_update_core_mode, NULL);
740 2 : if (rc == 0) {
741 : /* Set core to start with after callback completes */
742 2 : g_scheduler_core_number = spdk_env_get_next_core(i);
743 2 : return;
744 : }
745 : }
746 : }
747 6 : _reactors_scheduler_fini();
748 : }
749 :
750 : static void
751 0 : _reactors_scheduler_cancel(void *arg1, void *arg2)
752 : {
753 : struct spdk_scheduler_core_info *core;
754 : uint32_t i;
755 :
756 0 : SPDK_ENV_FOREACH_CORE(i) {
757 0 : core = &g_core_infos[i];
758 0 : core->threads_count = 0;
759 0 : free(core->thread_infos);
760 0 : core->thread_infos = NULL;
761 : }
762 :
763 0 : g_scheduling_in_progress = false;
764 0 : }
765 :
766 : static void
767 6 : _reactors_scheduler_balance(void *arg1, void *arg2)
768 : {
769 6 : struct spdk_scheduler *scheduler = spdk_scheduler_get();
770 :
771 6 : if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING || scheduler == NULL) {
772 0 : _reactors_scheduler_cancel(NULL, NULL);
773 0 : return;
774 : }
775 :
776 6 : scheduler->balance(g_core_infos, g_reactor_count);
777 :
778 6 : g_scheduler_core_number = spdk_env_get_first_core();
779 6 : _reactors_scheduler_update_core_mode(NULL);
780 : }
781 :
782 : /* Phase 1 of thread scheduling is to gather metrics on the existing threads */
783 : static void
784 15 : _reactors_scheduler_gather_metrics(void *arg1, void *arg2)
785 : {
786 : struct spdk_scheduler_core_info *core_info;
787 : struct spdk_lw_thread *lw_thread;
788 : struct spdk_thread *thread;
789 : struct spdk_reactor *reactor;
790 : uint32_t next_core;
791 15 : uint32_t i = 0;
792 :
793 15 : reactor = spdk_reactor_get(spdk_env_get_current_core());
794 15 : assert(reactor != NULL);
795 15 : core_info = &g_core_infos[reactor->lcore];
796 15 : core_info->lcore = reactor->lcore;
797 15 : core_info->current_idle_tsc = reactor->idle_tsc - core_info->total_idle_tsc;
798 15 : core_info->total_idle_tsc = reactor->idle_tsc;
799 15 : core_info->current_busy_tsc = reactor->busy_tsc - core_info->total_busy_tsc;
800 15 : core_info->total_busy_tsc = reactor->busy_tsc;
801 15 : core_info->interrupt_mode = reactor->in_interrupt;
802 15 : core_info->threads_count = 0;
803 :
804 15 : SPDK_DEBUGLOG(reactor, "Gathering metrics on %u\n", reactor->lcore);
805 :
806 15 : if (reactor->thread_count > 0) {
807 11 : core_info->thread_infos = calloc(reactor->thread_count, sizeof(*core_info->thread_infos));
808 11 : if (core_info->thread_infos == NULL) {
809 0 : SPDK_ERRLOG("Failed to allocate memory when gathering metrics on %u\n", reactor->lcore);
810 :
811 : /* Cancel this round of schedule work */
812 0 : _event_call(g_scheduling_reactor->lcore, _reactors_scheduler_cancel, NULL, NULL);
813 0 : return;
814 : }
815 :
816 26 : TAILQ_FOREACH(lw_thread, &reactor->threads, link) {
817 15 : _init_thread_stats(reactor, lw_thread);
818 :
819 15 : core_info->thread_infos[i].lcore = lw_thread->lcore;
820 15 : thread = spdk_thread_get_from_ctx(lw_thread);
821 15 : assert(thread != NULL);
822 15 : core_info->thread_infos[i].thread_id = spdk_thread_get_id(thread);
823 15 : core_info->thread_infos[i].total_stats = lw_thread->total_stats;
824 15 : core_info->thread_infos[i].current_stats = lw_thread->current_stats;
825 15 : core_info->threads_count++;
826 15 : assert(core_info->threads_count <= reactor->thread_count);
827 15 : i++;
828 : }
829 : }
830 :
831 15 : next_core = spdk_env_get_next_core(reactor->lcore);
832 15 : if (next_core == UINT32_MAX) {
833 6 : next_core = spdk_env_get_first_core();
834 : }
835 :
836 : /* If we've looped back around to the scheduler thread, move to the next phase */
837 15 : if (next_core == g_scheduling_reactor->lcore) {
838 : /* Phase 2 of scheduling is rebalancing - deciding which threads to move where */
839 6 : _event_call(next_core, _reactors_scheduler_balance, NULL, NULL);
840 6 : return;
841 : }
842 :
843 9 : _event_call(next_core, _reactors_scheduler_gather_metrics, NULL, NULL);
844 : }
845 :
846 : static int _reactor_schedule_thread(struct spdk_thread *thread);
847 : static uint64_t g_rusage_period;
848 :
849 : static void
850 19 : _reactor_remove_lw_thread(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread)
851 : {
852 19 : struct spdk_thread *thread = spdk_thread_get_from_ctx(lw_thread);
853 : struct spdk_fd_group *grp;
854 :
855 19 : TAILQ_REMOVE(&reactor->threads, lw_thread, link);
856 19 : assert(reactor->thread_count > 0);
857 19 : reactor->thread_count--;
858 :
859 : /* Operate thread intr if running with full interrupt ability */
860 19 : if (spdk_interrupt_mode_is_enabled()) {
861 0 : if (reactor->in_interrupt) {
862 0 : grp = spdk_thread_get_interrupt_fd_group(thread);
863 0 : spdk_fd_group_unnest(reactor->fgrp, grp);
864 : }
865 : }
866 19 : }
867 :
868 : static bool
869 57 : reactor_post_process_lw_thread(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread)
870 : {
871 57 : struct spdk_thread *thread = spdk_thread_get_from_ctx(lw_thread);
872 :
873 57 : if (spdk_unlikely(spdk_thread_is_exited(thread) &&
874 : spdk_thread_is_idle(thread))) {
875 11 : _reactor_remove_lw_thread(reactor, lw_thread);
876 11 : spdk_thread_destroy(thread);
877 11 : return true;
878 : }
879 :
880 46 : if (spdk_unlikely(lw_thread->resched && !spdk_thread_is_bound(thread))) {
881 8 : lw_thread->resched = false;
882 8 : _reactor_remove_lw_thread(reactor, lw_thread);
883 8 : _reactor_schedule_thread(thread);
884 8 : return true;
885 : }
886 :
887 38 : return false;
888 : }
889 :
890 : static void
891 0 : reactor_interrupt_run(struct spdk_reactor *reactor)
892 : {
893 0 : int block_timeout = -1; /* _EPOLL_WAIT_FOREVER */
894 :
895 0 : spdk_fd_group_wait(reactor->fgrp, block_timeout);
896 0 : }
897 :
898 : static void
899 43 : _reactor_run(struct spdk_reactor *reactor)
900 : {
901 : struct spdk_thread *thread;
902 : struct spdk_lw_thread *lw_thread, *tmp;
903 : uint64_t now;
904 : int rc;
905 :
906 43 : event_queue_run_batch(reactor);
907 :
908 : /* If no threads are present on the reactor,
909 : * tsc_last gets outdated. Update it to track
910 : * thread execution time correctly. */
911 43 : if (spdk_unlikely(TAILQ_EMPTY(&reactor->threads))) {
912 4 : now = spdk_get_ticks();
913 4 : reactor->idle_tsc += now - reactor->tsc_last;
914 4 : reactor->tsc_last = now;
915 4 : return;
916 : }
917 :
918 96 : TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) {
919 57 : thread = spdk_thread_get_from_ctx(lw_thread);
920 57 : rc = spdk_thread_poll(thread, 0, reactor->tsc_last);
921 :
922 57 : now = spdk_thread_get_last_tsc(thread);
923 57 : if (rc == 0) {
924 49 : reactor->idle_tsc += now - reactor->tsc_last;
925 8 : } else if (rc > 0) {
926 8 : reactor->busy_tsc += now - reactor->tsc_last;
927 : }
928 57 : reactor->tsc_last = now;
929 :
930 57 : reactor_post_process_lw_thread(reactor, lw_thread);
931 : }
932 : }
933 :
934 : static int
935 9 : reactor_run(void *arg)
936 : {
937 9 : struct spdk_reactor *reactor = arg;
938 : struct spdk_thread *thread;
939 : struct spdk_lw_thread *lw_thread, *tmp;
940 9 : char thread_name[32];
941 9 : uint64_t last_sched = 0;
942 :
943 9 : SPDK_NOTICELOG("Reactor started on core %u\n", reactor->lcore);
944 :
945 : /* Rename the POSIX thread because the reactor is tied to the POSIX
946 : * thread in the SPDK event library.
947 : */
948 9 : snprintf(thread_name, sizeof(thread_name), "reactor_%u", reactor->lcore);
949 9 : _set_thread_name(thread_name);
950 :
951 9 : reactor->tsc_last = spdk_get_ticks();
952 :
953 : while (1) {
954 : /* Execute interrupt process fn if this reactor currently runs in interrupt state */
955 9 : if (spdk_unlikely(reactor->in_interrupt)) {
956 0 : reactor_interrupt_run(reactor);
957 : } else {
958 9 : _reactor_run(reactor);
959 : }
960 :
961 9 : if (g_framework_context_switch_monitor_enabled) {
962 9 : if ((reactor->last_rusage + g_rusage_period) < reactor->tsc_last) {
963 8 : get_rusage(reactor);
964 8 : reactor->last_rusage = reactor->tsc_last;
965 : }
966 : }
967 :
968 9 : if (spdk_unlikely(g_scheduler_period > 0 &&
969 : (reactor->tsc_last - last_sched) > g_scheduler_period &&
970 : reactor == g_scheduling_reactor &&
971 : !g_scheduling_in_progress)) {
972 0 : last_sched = reactor->tsc_last;
973 0 : g_scheduling_in_progress = true;
974 0 : _reactors_scheduler_gather_metrics(NULL, NULL);
975 : }
976 :
977 9 : if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING) {
978 9 : break;
979 : }
980 : }
981 :
982 9 : TAILQ_FOREACH(lw_thread, &reactor->threads, link) {
983 0 : thread = spdk_thread_get_from_ctx(lw_thread);
984 : /* All threads should have already had spdk_thread_exit() called on them, except
985 : * for the app thread.
986 : */
987 0 : if (spdk_thread_is_running(thread)) {
988 0 : if (!spdk_thread_is_app_thread(thread)) {
989 0 : SPDK_ERRLOG("spdk_thread_exit() was not called on thread '%s'\n",
990 : spdk_thread_get_name(thread));
991 0 : SPDK_ERRLOG("This will result in a non-zero exit code in a future release.\n");
992 : }
993 0 : spdk_set_thread(thread);
994 0 : spdk_thread_exit(thread);
995 : }
996 : }
997 :
998 9 : while (!TAILQ_EMPTY(&reactor->threads)) {
999 0 : TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) {
1000 0 : thread = spdk_thread_get_from_ctx(lw_thread);
1001 0 : spdk_set_thread(thread);
1002 0 : if (spdk_thread_is_exited(thread)) {
1003 0 : _reactor_remove_lw_thread(reactor, lw_thread);
1004 0 : spdk_thread_destroy(thread);
1005 : } else {
1006 0 : if (spdk_unlikely(reactor->in_interrupt)) {
1007 0 : reactor_interrupt_run(reactor);
1008 : } else {
1009 0 : spdk_thread_poll(thread, 0, 0);
1010 : }
1011 : }
1012 : }
1013 : }
1014 :
1015 9 : return 0;
1016 : }
1017 :
1018 : int
1019 0 : spdk_app_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask)
1020 : {
1021 : int ret;
1022 : const struct spdk_cpuset *validmask;
1023 :
1024 0 : ret = spdk_cpuset_parse(cpumask, mask);
1025 0 : if (ret < 0) {
1026 0 : return ret;
1027 : }
1028 :
1029 0 : validmask = spdk_app_get_core_mask();
1030 0 : spdk_cpuset_and(cpumask, validmask);
1031 :
1032 0 : return 0;
1033 : }
1034 :
1035 : const struct spdk_cpuset *
1036 0 : spdk_app_get_core_mask(void)
1037 : {
1038 0 : return &g_reactor_core_mask;
1039 : }
1040 :
1041 : void
1042 0 : spdk_reactors_start(void)
1043 : {
1044 : struct spdk_reactor *reactor;
1045 : uint32_t i, current_core;
1046 : int rc;
1047 :
1048 0 : g_rusage_period = (CONTEXT_SWITCH_MONITOR_PERIOD * spdk_get_ticks_hz()) / SPDK_SEC_TO_USEC;
1049 0 : g_reactor_state = SPDK_REACTOR_STATE_RUNNING;
1050 : /* Reinitialize to false, in case the app framework is restarting in the same process. */
1051 0 : g_stopping_reactors = false;
1052 :
1053 0 : current_core = spdk_env_get_current_core();
1054 0 : SPDK_ENV_FOREACH_CORE(i) {
1055 0 : if (i != current_core) {
1056 0 : reactor = spdk_reactor_get(i);
1057 0 : if (reactor == NULL) {
1058 0 : continue;
1059 : }
1060 :
1061 0 : rc = spdk_env_thread_launch_pinned(reactor->lcore, reactor_run, reactor);
1062 0 : if (rc < 0) {
1063 0 : SPDK_ERRLOG("Unable to start reactor thread on core %u\n", reactor->lcore);
1064 0 : assert(false);
1065 : return;
1066 : }
1067 : }
1068 0 : spdk_cpuset_set_cpu(&g_reactor_core_mask, i, true);
1069 : }
1070 :
1071 : /* Start the main reactor */
1072 0 : reactor = spdk_reactor_get(current_core);
1073 0 : assert(reactor != NULL);
1074 0 : reactor_run(reactor);
1075 :
1076 0 : spdk_env_thread_wait_all();
1077 :
1078 0 : g_reactor_state = SPDK_REACTOR_STATE_SHUTDOWN;
1079 : }
1080 :
1081 : static void
1082 0 : _reactors_stop(void *arg1, void *arg2)
1083 : {
1084 : uint32_t i;
1085 : int rc;
1086 : struct spdk_reactor *reactor;
1087 : struct spdk_reactor *local_reactor;
1088 0 : uint64_t notify = 1;
1089 :
1090 0 : g_reactor_state = SPDK_REACTOR_STATE_EXITING;
1091 0 : local_reactor = spdk_reactor_get(spdk_env_get_current_core());
1092 :
1093 0 : SPDK_ENV_FOREACH_CORE(i) {
1094 : /* If spdk_event_call isn't called on a reactor, always send a notification.
1095 : * If it is called on a reactor, send a notification if the destination reactor
1096 : * is indicated in interrupt mode state.
1097 : */
1098 0 : if (local_reactor == NULL || spdk_cpuset_get_cpu(&local_reactor->notify_cpuset, i)) {
1099 0 : reactor = spdk_reactor_get(i);
1100 0 : assert(reactor != NULL);
1101 0 : rc = write(reactor->events_fd, ¬ify, sizeof(notify));
1102 0 : if (rc < 0) {
1103 0 : SPDK_ERRLOG("failed to notify event queue for reactor(%u): %s.\n", i, spdk_strerror(errno));
1104 0 : continue;
1105 : }
1106 : }
1107 : }
1108 0 : }
1109 :
1110 : static void
1111 0 : nop(void *arg1, void *arg2)
1112 : {
1113 0 : }
1114 :
1115 : void
1116 0 : spdk_reactors_stop(void *arg1)
1117 : {
1118 0 : spdk_for_each_reactor(nop, NULL, NULL, _reactors_stop);
1119 0 : }
1120 :
1121 : static pthread_mutex_t g_scheduler_mtx = PTHREAD_MUTEX_INITIALIZER;
1122 : static uint32_t g_next_core = UINT32_MAX;
1123 :
1124 : static void
1125 21 : _schedule_thread(void *arg1, void *arg2)
1126 : {
1127 21 : struct spdk_lw_thread *lw_thread = arg1;
1128 : struct spdk_thread *thread;
1129 : struct spdk_reactor *reactor;
1130 : uint32_t current_core;
1131 : struct spdk_fd_group *grp;
1132 :
1133 21 : current_core = spdk_env_get_current_core();
1134 21 : reactor = spdk_reactor_get(current_core);
1135 21 : assert(reactor != NULL);
1136 :
1137 : /* Update total_stats to reflect state of thread
1138 : * at the end of the move. */
1139 21 : thread = spdk_thread_get_from_ctx(lw_thread);
1140 21 : spdk_set_thread(thread);
1141 21 : spdk_thread_get_stats(&lw_thread->total_stats);
1142 21 : spdk_set_thread(NULL);
1143 :
1144 21 : lw_thread->lcore = current_core;
1145 :
1146 21 : TAILQ_INSERT_TAIL(&reactor->threads, lw_thread, link);
1147 21 : reactor->thread_count++;
1148 :
1149 : /* Operate thread intr if running with full interrupt ability */
1150 21 : if (spdk_interrupt_mode_is_enabled()) {
1151 : int rc;
1152 :
1153 0 : if (reactor->in_interrupt) {
1154 0 : grp = spdk_thread_get_interrupt_fd_group(thread);
1155 0 : rc = spdk_fd_group_nest(reactor->fgrp, grp);
1156 0 : if (rc < 0) {
1157 0 : SPDK_ERRLOG("Failed to schedule spdk_thread: %s.\n", spdk_strerror(-rc));
1158 : }
1159 : }
1160 :
1161 : /* Align spdk_thread with reactor to interrupt mode or poll mode */
1162 0 : spdk_thread_send_msg(thread, _reactor_set_thread_interrupt_mode, reactor);
1163 : }
1164 21 : }
1165 :
1166 : static int
1167 21 : _reactor_schedule_thread(struct spdk_thread *thread)
1168 : {
1169 : uint32_t core;
1170 : struct spdk_lw_thread *lw_thread;
1171 21 : struct spdk_event *evt = NULL;
1172 : struct spdk_cpuset *cpumask;
1173 : uint32_t i;
1174 21 : struct spdk_reactor *local_reactor = NULL;
1175 21 : uint32_t current_lcore = spdk_env_get_current_core();
1176 21 : struct spdk_cpuset polling_cpumask;
1177 21 : struct spdk_cpuset valid_cpumask;
1178 :
1179 21 : cpumask = spdk_thread_get_cpumask(thread);
1180 :
1181 21 : lw_thread = spdk_thread_get_ctx(thread);
1182 21 : assert(lw_thread != NULL);
1183 21 : core = lw_thread->lcore;
1184 21 : memset(lw_thread, 0, sizeof(*lw_thread));
1185 :
1186 21 : if (current_lcore != SPDK_ENV_LCORE_ID_ANY) {
1187 21 : local_reactor = spdk_reactor_get(current_lcore);
1188 21 : assert(local_reactor);
1189 : }
1190 :
1191 : /* When interrupt ability of spdk_thread is not enabled and the current
1192 : * reactor runs on DPDK thread, skip reactors which are in interrupt mode.
1193 : */
1194 21 : if (!spdk_interrupt_mode_is_enabled() && local_reactor != NULL) {
1195 : /* Get the cpumask of all reactors in polling */
1196 21 : spdk_cpuset_zero(&polling_cpumask);
1197 76 : SPDK_ENV_FOREACH_CORE(i) {
1198 55 : spdk_cpuset_set_cpu(&polling_cpumask, i, true);
1199 : }
1200 21 : spdk_cpuset_xor(&polling_cpumask, &local_reactor->notify_cpuset);
1201 :
1202 21 : if (core == SPDK_ENV_LCORE_ID_ANY) {
1203 : /* Get the cpumask of all valid reactors which are suggested and also in polling */
1204 14 : spdk_cpuset_copy(&valid_cpumask, &polling_cpumask);
1205 14 : spdk_cpuset_and(&valid_cpumask, spdk_thread_get_cpumask(thread));
1206 :
1207 : /* If there are any valid reactors, spdk_thread should be scheduled
1208 : * into one of the valid reactors.
1209 : * If there is no valid reactors, spdk_thread should be scheduled
1210 : * into one of the polling reactors.
1211 : */
1212 14 : if (spdk_cpuset_count(&valid_cpumask) != 0) {
1213 14 : cpumask = &valid_cpumask;
1214 : } else {
1215 0 : cpumask = &polling_cpumask;
1216 : }
1217 7 : } else if (!spdk_cpuset_get_cpu(&polling_cpumask, core)) {
1218 : /* If specified reactor is not in polling, spdk_thread should be scheduled
1219 : * into one of the polling reactors.
1220 : */
1221 0 : core = SPDK_ENV_LCORE_ID_ANY;
1222 0 : cpumask = &polling_cpumask;
1223 : }
1224 : }
1225 :
1226 21 : pthread_mutex_lock(&g_scheduler_mtx);
1227 21 : if (core == SPDK_ENV_LCORE_ID_ANY) {
1228 19 : for (i = 0; i < spdk_env_get_core_count(); i++) {
1229 19 : if (g_next_core >= g_reactor_count) {
1230 5 : g_next_core = spdk_env_get_first_core();
1231 : }
1232 19 : core = g_next_core;
1233 19 : g_next_core = spdk_env_get_next_core(g_next_core);
1234 :
1235 19 : if (spdk_cpuset_get_cpu(cpumask, core)) {
1236 14 : break;
1237 : }
1238 : }
1239 : }
1240 :
1241 21 : evt = spdk_event_allocate(core, _schedule_thread, lw_thread, NULL);
1242 :
1243 21 : pthread_mutex_unlock(&g_scheduler_mtx);
1244 :
1245 21 : assert(evt != NULL);
1246 21 : if (evt == NULL) {
1247 0 : SPDK_ERRLOG("Unable to schedule thread on requested core mask.\n");
1248 0 : return -1;
1249 : }
1250 :
1251 21 : lw_thread->tsc_start = spdk_get_ticks();
1252 :
1253 21 : spdk_event_call(evt);
1254 :
1255 21 : return 0;
1256 : }
1257 :
1258 : static void
1259 2 : _reactor_request_thread_reschedule(struct spdk_thread *thread)
1260 : {
1261 : struct spdk_lw_thread *lw_thread;
1262 : struct spdk_reactor *reactor;
1263 : uint32_t current_core;
1264 :
1265 2 : assert(thread == spdk_get_thread());
1266 :
1267 2 : lw_thread = spdk_thread_get_ctx(thread);
1268 :
1269 2 : assert(lw_thread != NULL);
1270 2 : lw_thread->resched = true;
1271 2 : lw_thread->lcore = SPDK_ENV_LCORE_ID_ANY;
1272 :
1273 2 : current_core = spdk_env_get_current_core();
1274 2 : reactor = spdk_reactor_get(current_core);
1275 2 : assert(reactor != NULL);
1276 :
1277 : /* Send a notification if the destination reactor is indicated in intr mode state */
1278 2 : if (spdk_unlikely(spdk_cpuset_get_cpu(&reactor->notify_cpuset, reactor->lcore))) {
1279 0 : uint64_t notify = 1;
1280 :
1281 0 : if (write(reactor->resched_fd, ¬ify, sizeof(notify)) < 0) {
1282 0 : SPDK_ERRLOG("failed to notify reschedule: %s.\n", spdk_strerror(errno));
1283 : }
1284 : }
1285 2 : }
1286 :
1287 : static int
1288 15 : reactor_thread_op(struct spdk_thread *thread, enum spdk_thread_op op)
1289 : {
1290 : struct spdk_lw_thread *lw_thread;
1291 :
1292 15 : switch (op) {
1293 13 : case SPDK_THREAD_OP_NEW:
1294 13 : lw_thread = spdk_thread_get_ctx(thread);
1295 13 : lw_thread->lcore = SPDK_ENV_LCORE_ID_ANY;
1296 13 : return _reactor_schedule_thread(thread);
1297 2 : case SPDK_THREAD_OP_RESCHED:
1298 2 : _reactor_request_thread_reschedule(thread);
1299 2 : return 0;
1300 0 : default:
1301 0 : return -ENOTSUP;
1302 : }
1303 : }
1304 :
1305 : static bool
1306 15 : reactor_thread_op_supported(enum spdk_thread_op op)
1307 : {
1308 15 : switch (op) {
1309 15 : case SPDK_THREAD_OP_NEW:
1310 : case SPDK_THREAD_OP_RESCHED:
1311 15 : return true;
1312 0 : default:
1313 0 : return false;
1314 : }
1315 : }
1316 :
1317 : struct call_reactor {
1318 : uint32_t cur_core;
1319 : spdk_event_fn fn;
1320 : void *arg1;
1321 : void *arg2;
1322 :
1323 : uint32_t orig_core;
1324 : spdk_event_fn cpl;
1325 : };
1326 :
1327 : static void
1328 9 : on_reactor(void *arg1, void *arg2)
1329 : {
1330 9 : struct call_reactor *cr = arg1;
1331 : struct spdk_event *evt;
1332 :
1333 9 : cr->fn(cr->arg1, cr->arg2);
1334 :
1335 9 : cr->cur_core = spdk_env_get_next_core(cr->cur_core);
1336 :
1337 9 : if (cr->cur_core >= g_reactor_count) {
1338 3 : SPDK_DEBUGLOG(reactor, "Completed reactor iteration\n");
1339 :
1340 3 : evt = spdk_event_allocate(cr->orig_core, cr->cpl, cr->arg1, cr->arg2);
1341 3 : free(cr);
1342 : } else {
1343 6 : SPDK_DEBUGLOG(reactor, "Continuing reactor iteration to %d\n",
1344 : cr->cur_core);
1345 :
1346 6 : evt = spdk_event_allocate(cr->cur_core, on_reactor, arg1, NULL);
1347 : }
1348 9 : assert(evt != NULL);
1349 9 : spdk_event_call(evt);
1350 9 : }
1351 :
1352 : void
1353 3 : spdk_for_each_reactor(spdk_event_fn fn, void *arg1, void *arg2, spdk_event_fn cpl)
1354 : {
1355 : struct call_reactor *cr;
1356 :
1357 : /* When the application framework is shutting down, we will send one
1358 : * final for_each_reactor operation with completion callback _reactors_stop,
1359 : * to flush any existing for_each_reactor operations to avoid any memory
1360 : * leaks. We use a mutex here to protect a boolean flag that will ensure
1361 : * we don't start any more operations once we've started shutting down.
1362 : */
1363 3 : pthread_mutex_lock(&g_stopping_reactors_mtx);
1364 3 : if (g_stopping_reactors) {
1365 0 : pthread_mutex_unlock(&g_stopping_reactors_mtx);
1366 0 : return;
1367 3 : } else if (cpl == _reactors_stop) {
1368 0 : g_stopping_reactors = true;
1369 : }
1370 3 : pthread_mutex_unlock(&g_stopping_reactors_mtx);
1371 :
1372 3 : cr = calloc(1, sizeof(*cr));
1373 3 : if (!cr) {
1374 0 : SPDK_ERRLOG("Unable to perform reactor iteration\n");
1375 0 : cpl(arg1, arg2);
1376 0 : return;
1377 : }
1378 :
1379 3 : cr->fn = fn;
1380 3 : cr->arg1 = arg1;
1381 3 : cr->arg2 = arg2;
1382 3 : cr->cpl = cpl;
1383 3 : cr->orig_core = spdk_env_get_current_core();
1384 3 : cr->cur_core = spdk_env_get_first_core();
1385 :
1386 3 : SPDK_DEBUGLOG(reactor, "Starting reactor iteration from %d\n", cr->orig_core);
1387 :
1388 3 : _event_call(cr->cur_core, on_reactor, cr, NULL);
1389 : }
1390 :
1391 : #ifdef __linux__
1392 : static int
1393 0 : reactor_schedule_thread_event(void *arg)
1394 : {
1395 0 : struct spdk_reactor *reactor = arg;
1396 : struct spdk_lw_thread *lw_thread, *tmp;
1397 0 : uint32_t count = 0;
1398 0 : uint64_t notify = 1;
1399 :
1400 0 : assert(reactor->in_interrupt);
1401 :
1402 0 : if (read(reactor->resched_fd, ¬ify, sizeof(notify)) < 0) {
1403 0 : SPDK_ERRLOG("failed to acknowledge reschedule: %s.\n", spdk_strerror(errno));
1404 0 : return -errno;
1405 : }
1406 :
1407 0 : TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) {
1408 0 : count += reactor_post_process_lw_thread(reactor, lw_thread) ? 1 : 0;
1409 : }
1410 :
1411 0 : return count;
1412 : }
1413 :
1414 : static int
1415 27 : reactor_interrupt_init(struct spdk_reactor *reactor)
1416 : {
1417 : int rc;
1418 :
1419 27 : rc = spdk_fd_group_create(&reactor->fgrp);
1420 27 : if (rc != 0) {
1421 0 : return rc;
1422 : }
1423 :
1424 27 : reactor->resched_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
1425 27 : if (reactor->resched_fd < 0) {
1426 0 : rc = -EBADF;
1427 0 : goto err;
1428 : }
1429 :
1430 27 : rc = SPDK_FD_GROUP_ADD(reactor->fgrp, reactor->resched_fd, reactor_schedule_thread_event,
1431 : reactor);
1432 27 : if (rc) {
1433 0 : close(reactor->resched_fd);
1434 0 : goto err;
1435 : }
1436 :
1437 27 : reactor->events_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
1438 27 : if (reactor->events_fd < 0) {
1439 0 : spdk_fd_group_remove(reactor->fgrp, reactor->resched_fd);
1440 0 : close(reactor->resched_fd);
1441 :
1442 0 : rc = -EBADF;
1443 0 : goto err;
1444 : }
1445 :
1446 27 : rc = SPDK_FD_GROUP_ADD(reactor->fgrp, reactor->events_fd,
1447 : event_queue_run_batch, reactor);
1448 27 : if (rc) {
1449 0 : spdk_fd_group_remove(reactor->fgrp, reactor->resched_fd);
1450 0 : close(reactor->resched_fd);
1451 0 : close(reactor->events_fd);
1452 0 : goto err;
1453 : }
1454 :
1455 27 : return 0;
1456 :
1457 0 : err:
1458 0 : spdk_fd_group_destroy(reactor->fgrp);
1459 0 : reactor->fgrp = NULL;
1460 0 : return rc;
1461 : }
1462 : #else
1463 : static int
1464 : reactor_interrupt_init(struct spdk_reactor *reactor)
1465 : {
1466 : return -ENOTSUP;
1467 : }
1468 : #endif
1469 :
1470 : static void
1471 27 : reactor_interrupt_fini(struct spdk_reactor *reactor)
1472 : {
1473 27 : struct spdk_fd_group *fgrp = reactor->fgrp;
1474 :
1475 27 : if (!fgrp) {
1476 0 : return;
1477 : }
1478 :
1479 27 : spdk_fd_group_remove(fgrp, reactor->events_fd);
1480 27 : spdk_fd_group_remove(fgrp, reactor->resched_fd);
1481 :
1482 27 : close(reactor->events_fd);
1483 27 : close(reactor->resched_fd);
1484 :
1485 27 : spdk_fd_group_destroy(fgrp);
1486 27 : reactor->fgrp = NULL;
1487 : }
1488 :
1489 : static struct spdk_governor *
1490 3 : _governor_find(const char *name)
1491 : {
1492 : struct spdk_governor *governor, *tmp;
1493 :
1494 3 : TAILQ_FOREACH_SAFE(governor, &g_governor_list, link, tmp) {
1495 1 : if (strcmp(name, governor->name) == 0) {
1496 1 : return governor;
1497 : }
1498 : }
1499 :
1500 2 : return NULL;
1501 : }
1502 :
1503 : int
1504 2 : spdk_governor_set(const char *name)
1505 : {
1506 : struct spdk_governor *governor;
1507 2 : int rc = 0;
1508 :
1509 : /* NULL governor was specifically requested */
1510 2 : if (name == NULL) {
1511 0 : if (g_governor) {
1512 0 : g_governor->deinit();
1513 : }
1514 0 : g_governor = NULL;
1515 0 : return 0;
1516 : }
1517 :
1518 2 : governor = _governor_find(name);
1519 2 : if (governor == NULL) {
1520 1 : return -EINVAL;
1521 : }
1522 :
1523 1 : if (g_governor == governor) {
1524 0 : return 0;
1525 : }
1526 :
1527 1 : rc = governor->init();
1528 1 : if (rc == 0) {
1529 1 : if (g_governor) {
1530 0 : g_governor->deinit();
1531 : }
1532 1 : g_governor = governor;
1533 : }
1534 :
1535 1 : return rc;
1536 : }
1537 :
1538 : struct spdk_governor *
1539 6 : spdk_governor_get(void)
1540 : {
1541 6 : return g_governor;
1542 : }
1543 :
1544 : void
1545 1 : spdk_governor_register(struct spdk_governor *governor)
1546 : {
1547 1 : if (_governor_find(governor->name)) {
1548 0 : SPDK_ERRLOG("governor named '%s' already registered.\n", governor->name);
1549 0 : assert(false);
1550 : return;
1551 : }
1552 :
1553 1 : TAILQ_INSERT_TAIL(&g_governor_list, governor, link);
1554 : }
1555 :
1556 1 : SPDK_LOG_REGISTER_COMPONENT(reactor)
|