Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2015 Intel Corporation.
3 : * All rights reserved.
4 : */
5 :
6 : #include "env_internal.h"
7 : #include "pci_dpdk.h"
8 :
9 : #include <rte_alarm.h>
10 : #include <rte_devargs.h>
11 : #include <rte_pci.h>
12 : #include "spdk/env.h"
13 : #include "spdk/log.h"
14 : #include "spdk/string.h"
15 : #include "spdk/memory.h"
16 :
17 : #define SYSFS_PCI_DRIVERS "/sys/bus/pci/drivers"
18 :
19 : #define PCI_CFG_SIZE 256
20 : #define PCI_EXT_CAP_ID_SN 0x03
21 :
22 : /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time
23 : * might cause the internal IPC to misbehave. Just retry in such case.
24 : */
25 : #define DPDK_HOTPLUG_RETRY_COUNT 4
26 :
27 : /* DPDK alarm/interrupt thread */
28 : static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER;
29 : static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices);
30 : /* devices hotplugged on a dpdk thread */
31 : static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices =
32 : TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices);
33 : static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers);
34 : static TAILQ_HEAD(, spdk_pci_device_provider) g_pci_device_providers =
35 : TAILQ_HEAD_INITIALIZER(g_pci_device_providers);
36 :
37 : int pci_device_init(struct rte_pci_driver *driver, struct rte_pci_device *device);
38 : int pci_device_fini(struct rte_pci_device *device);
39 :
40 : struct env_devargs {
41 : struct rte_bus *bus;
42 : char name[128];
43 : uint64_t allowed_at;
44 : TAILQ_ENTRY(env_devargs) link;
45 : };
46 : static TAILQ_HEAD(, env_devargs) g_env_devargs = TAILQ_HEAD_INITIALIZER(g_env_devargs);
47 :
48 : static struct env_devargs *
49 0 : find_env_devargs(struct rte_bus *bus, const char *name)
50 : {
51 : struct env_devargs *da;
52 :
53 0 : TAILQ_FOREACH(da, &g_env_devargs, link) {
54 0 : if (bus == da->bus && !strcmp(name, da->name)) {
55 0 : return da;
56 : }
57 : }
58 :
59 0 : return NULL;
60 : }
61 :
62 : static int
63 0 : map_bar_rte(struct spdk_pci_device *device, uint32_t bar,
64 : void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
65 : {
66 : struct rte_mem_resource *res;
67 :
68 0 : res = dpdk_pci_device_get_mem_resource(device->dev_handle, bar);
69 0 : *mapped_addr = res->addr;
70 0 : *phys_addr = (uint64_t)res->phys_addr;
71 0 : *size = (uint64_t)res->len;
72 :
73 0 : return 0;
74 : }
75 :
76 : static int
77 0 : unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr)
78 : {
79 0 : return 0;
80 : }
81 :
82 : static int
83 0 : cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
84 : {
85 0 : return dpdk_pci_device_read_config(dev->dev_handle, value, len, offset);
86 : }
87 :
88 : static int
89 0 : cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
90 : {
91 0 : return dpdk_pci_device_write_config(dev->dev_handle, value, len, offset);
92 : }
93 :
94 : static void
95 0 : remove_rte_dev(struct rte_pci_device *rte_dev)
96 : {
97 0 : char bdf[32];
98 0 : int i = 0, rc;
99 :
100 0 : snprintf(bdf, sizeof(bdf), "%s", dpdk_pci_device_get_name(rte_dev));
101 : do {
102 0 : rc = rte_eal_hotplug_remove("pci", bdf);
103 0 : } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT);
104 0 : }
105 :
106 : static void
107 0 : detach_rte_cb(void *_dev)
108 : {
109 0 : remove_rte_dev(_dev);
110 0 : }
111 :
112 : /* if it's a physical device we need to deal with DPDK on
113 : * a different process and we can't just unset one flag
114 : * here. We also want to stop using any device resources
115 : * so that the device isn't "in use" by the userspace driver
116 : * once we detach it. This would allow attaching the device
117 : * to a different process, or to a kernel driver like nvme.
118 : */
119 : static void
120 0 : detach_rte(struct spdk_pci_device *dev)
121 : {
122 0 : struct rte_pci_device *rte_dev = dev->dev_handle;
123 : int i;
124 : bool removed;
125 :
126 0 : if (!spdk_process_is_primary()) {
127 0 : return;
128 : }
129 :
130 0 : pthread_mutex_lock(&g_pci_mutex);
131 0 : dev->internal.attached = false;
132 : /* prevent the hotremove notification from removing this device */
133 0 : dev->internal.pending_removal = true;
134 0 : pthread_mutex_unlock(&g_pci_mutex);
135 :
136 0 : rte_eal_alarm_set(1, detach_rte_cb, rte_dev);
137 :
138 : /* wait up to 2s for the cb to execute */
139 0 : for (i = 2000; i > 0; i--) {
140 :
141 0 : spdk_delay_us(1000);
142 0 : pthread_mutex_lock(&g_pci_mutex);
143 0 : removed = dev->internal.removed;
144 0 : pthread_mutex_unlock(&g_pci_mutex);
145 :
146 0 : if (removed) {
147 0 : break;
148 : }
149 : }
150 :
151 : /* besides checking the removed flag, we also need to wait
152 : * for the dpdk detach function to unwind, as it's doing some
153 : * operations even after calling our detach callback. Simply
154 : * cancel the alarm - if it started executing already, this
155 : * call will block and wait for it to finish.
156 : */
157 0 : rte_eal_alarm_cancel(detach_rte_cb, rte_dev);
158 :
159 : /* the device could have been finally removed, so just check
160 : * it again.
161 : */
162 0 : pthread_mutex_lock(&g_pci_mutex);
163 0 : removed = dev->internal.removed;
164 0 : pthread_mutex_unlock(&g_pci_mutex);
165 0 : if (!removed) {
166 0 : SPDK_ERRLOG("Timeout waiting for DPDK to remove PCI device %s.\n",
167 : dpdk_pci_device_get_name(rte_dev));
168 : /* If we reach this state, then the device couldn't be removed and most likely
169 : a subsequent hot add of a device in the same BDF will fail */
170 : }
171 : }
172 :
173 : void
174 0 : spdk_pci_driver_register(const char *name, struct spdk_pci_id *id_table, uint32_t flags)
175 : {
176 : struct spdk_pci_driver *driver;
177 :
178 0 : driver = calloc(1, sizeof(*driver));
179 0 : if (!driver) {
180 : /* we can't do any better than bailing atm */
181 0 : return;
182 : }
183 :
184 0 : driver->name = name;
185 0 : driver->id_table = id_table;
186 0 : driver->drv_flags = flags;
187 0 : driver->driver = (struct rte_pci_driver *)driver->driver_buf;
188 0 : TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq);
189 : }
190 :
191 : struct spdk_pci_driver *
192 0 : spdk_pci_nvme_get_driver(void)
193 : {
194 0 : return spdk_pci_get_driver("nvme");
195 : }
196 :
197 : struct spdk_pci_driver *
198 0 : spdk_pci_get_driver(const char *name)
199 : {
200 : struct spdk_pci_driver *driver;
201 :
202 0 : TAILQ_FOREACH(driver, &g_pci_drivers, tailq) {
203 0 : if (strcmp(driver->name, name) == 0) {
204 0 : return driver;
205 : }
206 : }
207 :
208 0 : return NULL;
209 : }
210 :
211 : static void
212 0 : pci_device_rte_dev_event(const char *device_name,
213 : enum rte_dev_event_type event,
214 : void *cb_arg)
215 : {
216 : struct spdk_pci_device *dev;
217 0 : bool can_detach = false;
218 :
219 0 : switch (event) {
220 0 : default:
221 : case RTE_DEV_EVENT_ADD:
222 : /* Nothing to do here yet. */
223 0 : break;
224 0 : case RTE_DEV_EVENT_REMOVE:
225 0 : pthread_mutex_lock(&g_pci_mutex);
226 0 : TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
227 0 : struct rte_pci_device *rte_dev = dev->dev_handle;
228 :
229 0 : if (strcmp(dpdk_pci_device_get_name(rte_dev), device_name)) {
230 0 : continue;
231 : }
232 :
233 : /* Note: these ERRLOGs are useful for triaging issue #2983. */
234 0 : if (dev->internal.pending_removal || dev->internal.removed) {
235 0 : SPDK_ERRLOG("Received event for device SPDK already tried to remove\n");
236 0 : SPDK_ERRLOG("pending_removal=%d removed=%d\n", dev->internal.pending_removal,
237 : dev->internal.removed);
238 : }
239 :
240 0 : if (!dev->internal.pending_removal) {
241 0 : can_detach = !dev->internal.attached;
242 : /* prevent any further attaches */
243 0 : dev->internal.pending_removal = true;
244 0 : break;
245 : }
246 : }
247 0 : pthread_mutex_unlock(&g_pci_mutex);
248 :
249 0 : if (can_detach) {
250 : /* if device is not attached we can remove it right away.
251 : * Otherwise it will be removed at detach.
252 : *
253 : * Because the user's callback is invoked in eal interrupt
254 : * callback, the interrupt callback need to be finished before
255 : * it can be unregistered when detaching device. So finish
256 : * callback soon and use a deferred removal to detach device
257 : * is need. It is a workaround, once the device detaching be
258 : * moved into the eal in the future, the deferred removal could
259 : * be deleted.
260 : */
261 0 : assert(dev != NULL);
262 0 : rte_eal_alarm_set(1, detach_rte_cb, dev->dev_handle);
263 : }
264 0 : break;
265 : }
266 0 : }
267 :
268 : static void
269 0 : cleanup_pci_devices(void)
270 : {
271 : struct spdk_pci_device *dev, *tmp;
272 :
273 0 : pthread_mutex_lock(&g_pci_mutex);
274 : /* cleanup removed devices */
275 0 : TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) {
276 0 : if (!dev->internal.removed) {
277 0 : continue;
278 : }
279 :
280 0 : vtophys_pci_device_removed(dev->dev_handle);
281 0 : TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq);
282 0 : free(dev);
283 : }
284 :
285 : /* add newly-attached devices */
286 0 : TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) {
287 0 : TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq);
288 0 : TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq);
289 0 : vtophys_pci_device_added(dev->dev_handle);
290 : }
291 0 : pthread_mutex_unlock(&g_pci_mutex);
292 0 : }
293 :
294 : static int scan_pci_bus(bool delay_init);
295 :
296 : static inline void
297 0 : _pci_env_init(void)
298 : {
299 : /* We assume devices were present on the bus for more than 2 seconds
300 : * before initializing SPDK and there's no need to wait more. We scan
301 : * the bus, but we don't block any devices.
302 : */
303 0 : scan_pci_bus(false);
304 :
305 : /* Register a single hotremove callback for all devices. */
306 0 : if (spdk_process_is_primary()) {
307 0 : rte_dev_event_callback_register(NULL, pci_device_rte_dev_event, NULL);
308 : }
309 0 : }
310 :
311 : int
312 0 : pci_env_init(void)
313 : {
314 : struct spdk_pci_driver *driver;
315 : int rc;
316 :
317 0 : rc = dpdk_pci_init();
318 0 : if (rc) {
319 0 : return rc;
320 : }
321 :
322 0 : TAILQ_FOREACH(driver, &g_pci_drivers, tailq) {
323 0 : dpdk_pci_driver_register(driver, pci_device_init, pci_device_fini);
324 : }
325 :
326 0 : _pci_env_init();
327 0 : return 0;
328 : }
329 :
330 : void
331 0 : pci_env_reinit(void)
332 : {
333 : /* There is no need to register pci drivers again, since they were
334 : * already pre-registered in pci_env_init.
335 : */
336 :
337 0 : _pci_env_init();
338 0 : }
339 :
340 : void
341 0 : pci_env_fini(void)
342 : {
343 : struct spdk_pci_device *dev;
344 0 : char bdf[32];
345 :
346 0 : cleanup_pci_devices();
347 0 : TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
348 0 : if (dev->internal.attached) {
349 0 : spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr);
350 0 : SPDK_ERRLOG("Device %s is still attached at shutdown!\n", bdf);
351 : }
352 : }
353 :
354 0 : if (spdk_process_is_primary()) {
355 0 : rte_dev_event_callback_unregister(NULL, pci_device_rte_dev_event, NULL);
356 : }
357 0 : }
358 :
359 : int
360 0 : pci_device_init(struct rte_pci_driver *_drv,
361 : struct rte_pci_device *_dev)
362 : {
363 0 : struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv;
364 : struct spdk_pci_device *dev;
365 : struct rte_pci_addr *addr;
366 : struct rte_pci_id *id;
367 : int rc;
368 :
369 0 : dev = calloc(1, sizeof(*dev));
370 0 : if (dev == NULL) {
371 0 : return -1;
372 : }
373 :
374 0 : dev->dev_handle = _dev;
375 :
376 0 : addr = dpdk_pci_device_get_addr(_dev);
377 0 : dev->addr.domain = addr->domain;
378 0 : dev->addr.bus = addr->bus;
379 0 : dev->addr.dev = addr->devid;
380 0 : dev->addr.func = addr->function;
381 :
382 0 : id = dpdk_pci_device_get_id(_dev);
383 0 : dev->id.class_id = id->class_id;
384 0 : dev->id.vendor_id = id->vendor_id;
385 0 : dev->id.device_id = id->device_id;
386 0 : dev->id.subvendor_id = id->subsystem_vendor_id;
387 0 : dev->id.subdevice_id = id->subsystem_device_id;
388 :
389 0 : dev->socket_id = dpdk_pci_device_get_numa_node(_dev);
390 0 : dev->type = "pci";
391 :
392 0 : dev->map_bar = map_bar_rte;
393 0 : dev->unmap_bar = unmap_bar_rte;
394 0 : dev->cfg_read = cfg_read_rte;
395 0 : dev->cfg_write = cfg_write_rte;
396 :
397 0 : dev->internal.driver = driver;
398 0 : dev->internal.claim_fd = -1;
399 :
400 0 : if (driver->cb_fn != NULL) {
401 0 : rc = driver->cb_fn(driver->cb_arg, dev);
402 0 : if (rc != 0) {
403 0 : free(dev);
404 0 : return rc;
405 : }
406 0 : dev->internal.attached = true;
407 : }
408 :
409 0 : pthread_mutex_lock(&g_pci_mutex);
410 0 : TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq);
411 0 : pthread_mutex_unlock(&g_pci_mutex);
412 0 : return 0;
413 : }
414 :
415 : static void
416 0 : set_allowed_at(struct rte_devargs *rte_da, uint64_t tsc)
417 : {
418 : struct env_devargs *env_da;
419 :
420 0 : env_da = find_env_devargs(rte_da->bus, rte_da->name);
421 0 : if (env_da == NULL) {
422 0 : env_da = calloc(1, sizeof(*env_da));
423 0 : if (env_da == NULL) {
424 0 : SPDK_ERRLOG("could not set_allowed_at for device %s\n", rte_da->name);
425 0 : return;
426 : }
427 0 : env_da->bus = rte_da->bus;
428 0 : spdk_strcpy_pad(env_da->name, rte_da->name, sizeof(env_da->name), 0);
429 0 : TAILQ_INSERT_TAIL(&g_env_devargs, env_da, link);
430 : }
431 :
432 0 : env_da->allowed_at = tsc;
433 : }
434 :
435 : static uint64_t
436 0 : get_allowed_at(struct rte_devargs *rte_da)
437 : {
438 : struct env_devargs *env_da;
439 :
440 0 : env_da = find_env_devargs(rte_da->bus, rte_da->name);
441 0 : if (env_da) {
442 0 : return env_da->allowed_at;
443 : } else {
444 0 : return 0;
445 : }
446 : }
447 :
448 : int
449 0 : pci_device_fini(struct rte_pci_device *_dev)
450 : {
451 : struct spdk_pci_device *dev;
452 :
453 0 : pthread_mutex_lock(&g_pci_mutex);
454 0 : TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
455 0 : if (dev->dev_handle == _dev) {
456 0 : break;
457 : }
458 : }
459 :
460 0 : if (dev == NULL || dev->internal.attached) {
461 : /* The device might be still referenced somewhere in SPDK. */
462 0 : pthread_mutex_unlock(&g_pci_mutex);
463 0 : return -EBUSY;
464 : }
465 :
466 : /* remove our allowed_at option */
467 0 : if (dpdk_pci_device_get_devargs(_dev)) {
468 0 : set_allowed_at(dpdk_pci_device_get_devargs(_dev), 0);
469 : }
470 :
471 : /* It is possible that removed flag was already set when there is a race
472 : * between the remove notification for this process, and another process
473 : * that is also detaching from this same device (for example, when using
474 : * nvme driver in multi-process mode. So do not assert here. See
475 : * #2456 for additional details.
476 : */
477 0 : dev->internal.removed = true;
478 0 : pthread_mutex_unlock(&g_pci_mutex);
479 0 : return 0;
480 :
481 : }
482 :
483 : void
484 0 : spdk_pci_device_detach(struct spdk_pci_device *dev)
485 : {
486 : struct spdk_pci_device_provider *provider;
487 :
488 0 : assert(dev->internal.attached);
489 :
490 0 : if (dev->internal.claim_fd >= 0) {
491 0 : spdk_pci_device_unclaim(dev);
492 : }
493 :
494 0 : TAILQ_FOREACH(provider, &g_pci_device_providers, tailq) {
495 0 : if (strcmp(dev->type, provider->name) == 0) {
496 0 : break;
497 : }
498 : }
499 :
500 0 : assert(provider != NULL);
501 0 : dev->internal.attached = false;
502 0 : provider->detach_cb(dev);
503 :
504 0 : cleanup_pci_devices();
505 0 : }
506 :
507 : static int
508 0 : scan_pci_bus(bool delay_init)
509 : {
510 0 : struct rte_dev_iterator it;
511 : struct rte_device *rte_dev;
512 : uint64_t now;
513 :
514 0 : dpdk_bus_scan();
515 0 : now = spdk_get_ticks();
516 :
517 0 : if (!TAILQ_FIRST(&g_pci_drivers)) {
518 0 : return 0;
519 : }
520 :
521 0 : RTE_DEV_FOREACH(rte_dev, "bus=pci", &it) {
522 0 : struct rte_devargs *da;
523 :
524 0 : da = dpdk_device_get_devargs(rte_dev);
525 0 : if (!da) {
526 0 : char devargs_str[128];
527 :
528 : /* the device was never blocked or allowed */
529 0 : da = calloc(1, sizeof(*da));
530 0 : if (!da) {
531 0 : return -1;
532 : }
533 :
534 0 : snprintf(devargs_str, sizeof(devargs_str), "pci:%s", dpdk_device_get_name(rte_dev));
535 0 : if (rte_devargs_parse(da, devargs_str) != 0) {
536 0 : free(da);
537 0 : return -1;
538 : }
539 :
540 0 : rte_devargs_insert(&da);
541 0 : dpdk_device_set_devargs(rte_dev, da);
542 : }
543 :
544 0 : if (get_allowed_at(da)) {
545 0 : uint64_t allowed_at = get_allowed_at(da);
546 :
547 : /* this device was seen by spdk before... */
548 0 : if (da->policy == RTE_DEV_BLOCKED && allowed_at <= now) {
549 0 : da->policy = RTE_DEV_ALLOWED;
550 : }
551 0 : } else if ((dpdk_device_scan_allowed(rte_dev) && da->policy == RTE_DEV_ALLOWED) ||
552 0 : da->policy != RTE_DEV_BLOCKED) {
553 : /* override the policy only if not permanently blocked */
554 :
555 0 : if (delay_init) {
556 0 : da->policy = RTE_DEV_BLOCKED;
557 0 : set_allowed_at(da, now + 2 * spdk_get_ticks_hz());
558 : } else {
559 0 : da->policy = RTE_DEV_ALLOWED;
560 0 : set_allowed_at(da, now);
561 : }
562 : }
563 : }
564 :
565 0 : return 0;
566 : }
567 :
568 : static int
569 0 : pci_attach_rte(const struct spdk_pci_addr *addr)
570 : {
571 0 : char bdf[32];
572 0 : int rc, i = 0;
573 :
574 0 : spdk_pci_addr_fmt(bdf, sizeof(bdf), addr);
575 :
576 : do {
577 0 : rc = rte_eal_hotplug_add("pci", bdf, "");
578 0 : } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT);
579 :
580 0 : if (i > 1 && rc == -EEXIST) {
581 : /* Even though the previous request timed out, the device
582 : * was attached successfully.
583 : */
584 0 : rc = 0;
585 : }
586 :
587 0 : return rc;
588 : }
589 :
590 : static struct spdk_pci_device_provider g_pci_rte_provider = {
591 : .name = "pci",
592 : .attach_cb = pci_attach_rte,
593 : .detach_cb = detach_rte,
594 : };
595 :
596 0 : SPDK_PCI_REGISTER_DEVICE_PROVIDER(pci, &g_pci_rte_provider);
597 :
598 : int
599 0 : spdk_pci_device_attach(struct spdk_pci_driver *driver,
600 : spdk_pci_enum_cb enum_cb,
601 : void *enum_ctx, struct spdk_pci_addr *pci_address)
602 : {
603 : struct spdk_pci_device *dev;
604 : struct spdk_pci_device_provider *provider;
605 : struct rte_pci_device *rte_dev;
606 : struct rte_devargs *da;
607 : int rc;
608 :
609 0 : cleanup_pci_devices();
610 :
611 0 : TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
612 0 : if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) {
613 0 : break;
614 : }
615 : }
616 :
617 0 : if (dev != NULL && dev->internal.driver == driver) {
618 0 : pthread_mutex_lock(&g_pci_mutex);
619 0 : if (dev->internal.attached || dev->internal.pending_removal) {
620 0 : pthread_mutex_unlock(&g_pci_mutex);
621 0 : return -1;
622 : }
623 :
624 0 : rc = enum_cb(enum_ctx, dev);
625 0 : if (rc == 0) {
626 0 : dev->internal.attached = true;
627 : }
628 0 : pthread_mutex_unlock(&g_pci_mutex);
629 0 : return rc;
630 : }
631 :
632 0 : driver->cb_fn = enum_cb;
633 0 : driver->cb_arg = enum_ctx;
634 :
635 0 : rc = -ENODEV;
636 0 : TAILQ_FOREACH(provider, &g_pci_device_providers, tailq) {
637 0 : rc = provider->attach_cb(pci_address);
638 0 : if (rc == 0) {
639 0 : break;
640 : }
641 : }
642 :
643 0 : driver->cb_arg = NULL;
644 0 : driver->cb_fn = NULL;
645 :
646 0 : cleanup_pci_devices();
647 :
648 0 : if (rc != 0) {
649 0 : return -1;
650 : }
651 :
652 : /* explicit attach ignores the allowlist, so if we blocked this
653 : * device before let's enable it now - just for clarity.
654 : */
655 0 : TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
656 0 : if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) {
657 0 : break;
658 : }
659 : }
660 0 : assert(dev != NULL);
661 :
662 0 : rte_dev = dev->dev_handle;
663 0 : if (rte_dev != NULL) {
664 0 : da = dpdk_pci_device_get_devargs(rte_dev);
665 0 : if (da && get_allowed_at(da)) {
666 0 : set_allowed_at(da, spdk_get_ticks());
667 0 : da->policy = RTE_DEV_ALLOWED;
668 : }
669 : }
670 :
671 0 : return 0;
672 : }
673 :
674 : /* Note: You can call spdk_pci_enumerate from more than one thread
675 : * simultaneously safely, but you cannot call spdk_pci_enumerate
676 : * and rte_eal_pci_probe simultaneously.
677 : */
678 : int
679 0 : spdk_pci_enumerate(struct spdk_pci_driver *driver,
680 : spdk_pci_enum_cb enum_cb,
681 : void *enum_ctx)
682 : {
683 : struct spdk_pci_device *dev;
684 : int rc;
685 :
686 0 : cleanup_pci_devices();
687 :
688 0 : pthread_mutex_lock(&g_pci_mutex);
689 0 : TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
690 0 : if (dev->internal.attached ||
691 0 : dev->internal.driver != driver ||
692 0 : dev->internal.pending_removal) {
693 0 : continue;
694 : }
695 :
696 0 : rc = enum_cb(enum_ctx, dev);
697 0 : if (rc == 0) {
698 0 : dev->internal.attached = true;
699 0 : } else if (rc < 0) {
700 0 : pthread_mutex_unlock(&g_pci_mutex);
701 0 : return -1;
702 : }
703 : }
704 0 : pthread_mutex_unlock(&g_pci_mutex);
705 :
706 0 : if (scan_pci_bus(true) != 0) {
707 0 : return -1;
708 : }
709 :
710 0 : driver->cb_fn = enum_cb;
711 0 : driver->cb_arg = enum_ctx;
712 :
713 0 : if (dpdk_bus_probe() != 0) {
714 0 : driver->cb_arg = NULL;
715 0 : driver->cb_fn = NULL;
716 0 : return -1;
717 : }
718 :
719 0 : driver->cb_arg = NULL;
720 0 : driver->cb_fn = NULL;
721 :
722 0 : cleanup_pci_devices();
723 0 : return 0;
724 : }
725 :
726 : void
727 0 : spdk_pci_for_each_device(void *ctx, void (*fn)(void *ctx, struct spdk_pci_device *dev))
728 : {
729 : struct spdk_pci_device *dev, *tmp;
730 :
731 0 : pthread_mutex_lock(&g_pci_mutex);
732 0 : TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) {
733 0 : fn(ctx, dev);
734 : }
735 0 : pthread_mutex_unlock(&g_pci_mutex);
736 0 : }
737 :
738 : int
739 0 : spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar,
740 : void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
741 : {
742 : int rc;
743 :
744 0 : rc = dev->map_bar(dev, bar, mapped_addr, phys_addr, size);
745 0 : if (rc) {
746 0 : return rc;
747 : }
748 :
749 : #if VFIO_ENABLED
750 : /* Automatically map the BAR to the IOMMU */
751 0 : if (!spdk_iommu_is_enabled()) {
752 0 : return 0;
753 : }
754 :
755 0 : if (rte_eal_iova_mode() == RTE_IOVA_VA) {
756 : /* We'll use the virtual address as the iova to match DPDK. */
757 0 : rc = vtophys_iommu_map_dma_bar((uint64_t)(*mapped_addr), (uint64_t) * mapped_addr, *size);
758 0 : if (rc) {
759 0 : dev->unmap_bar(dev, bar, *mapped_addr);
760 0 : return -EFAULT;
761 : }
762 :
763 0 : *phys_addr = (uint64_t)(*mapped_addr);
764 : } else {
765 : /* We'll use the physical address as the iova to match DPDK. */
766 0 : rc = vtophys_iommu_map_dma_bar((uint64_t)(*mapped_addr), *phys_addr, *size);
767 0 : if (rc) {
768 0 : dev->unmap_bar(dev, bar, *mapped_addr);
769 0 : return -EFAULT;
770 : }
771 : }
772 : #endif
773 0 : return rc;
774 : }
775 :
776 : int
777 0 : spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr)
778 : {
779 : #if VFIO_ENABLED
780 : int rc;
781 :
782 0 : if (spdk_iommu_is_enabled()) {
783 0 : rc = vtophys_iommu_unmap_dma_bar((uint64_t)addr);
784 0 : if (rc) {
785 0 : return -EFAULT;
786 : }
787 : }
788 : #endif
789 :
790 0 : return dev->unmap_bar(dev, bar, addr);
791 : }
792 :
793 : int
794 0 : spdk_pci_device_enable_interrupt(struct spdk_pci_device *dev)
795 : {
796 0 : return dpdk_pci_device_enable_interrupt(dev->dev_handle);
797 : }
798 :
799 : int
800 0 : spdk_pci_device_disable_interrupt(struct spdk_pci_device *dev)
801 : {
802 0 : return dpdk_pci_device_disable_interrupt(dev->dev_handle);
803 : }
804 :
805 : int
806 0 : spdk_pci_device_get_interrupt_efd(struct spdk_pci_device *dev)
807 : {
808 0 : return dpdk_pci_device_get_interrupt_efd(dev->dev_handle);
809 : }
810 :
811 : uint32_t
812 0 : spdk_pci_device_get_domain(struct spdk_pci_device *dev)
813 : {
814 0 : return dev->addr.domain;
815 : }
816 :
817 : uint8_t
818 0 : spdk_pci_device_get_bus(struct spdk_pci_device *dev)
819 : {
820 0 : return dev->addr.bus;
821 : }
822 :
823 : uint8_t
824 0 : spdk_pci_device_get_dev(struct spdk_pci_device *dev)
825 : {
826 0 : return dev->addr.dev;
827 : }
828 :
829 : uint8_t
830 0 : spdk_pci_device_get_func(struct spdk_pci_device *dev)
831 : {
832 0 : return dev->addr.func;
833 : }
834 :
835 : uint16_t
836 0 : spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev)
837 : {
838 0 : return dev->id.vendor_id;
839 : }
840 :
841 : uint16_t
842 0 : spdk_pci_device_get_device_id(struct spdk_pci_device *dev)
843 : {
844 0 : return dev->id.device_id;
845 : }
846 :
847 : uint16_t
848 0 : spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev)
849 : {
850 0 : return dev->id.subvendor_id;
851 : }
852 :
853 : uint16_t
854 0 : spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev)
855 : {
856 0 : return dev->id.subdevice_id;
857 : }
858 :
859 : struct spdk_pci_id
860 0 : spdk_pci_device_get_id(struct spdk_pci_device *dev)
861 : {
862 0 : return dev->id;
863 : }
864 :
865 : int
866 0 : spdk_pci_device_get_socket_id(struct spdk_pci_device *dev)
867 : {
868 0 : return dev->socket_id;
869 : }
870 :
871 : int
872 0 : spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
873 : {
874 0 : return dev->cfg_read(dev, value, len, offset);
875 : }
876 :
877 : int
878 0 : spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
879 : {
880 0 : return dev->cfg_write(dev, value, len, offset);
881 : }
882 :
883 : int
884 0 : spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset)
885 : {
886 0 : return spdk_pci_device_cfg_read(dev, value, 1, offset);
887 : }
888 :
889 : int
890 0 : spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset)
891 : {
892 0 : return spdk_pci_device_cfg_write(dev, &value, 1, offset);
893 : }
894 :
895 : int
896 0 : spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset)
897 : {
898 0 : return spdk_pci_device_cfg_read(dev, value, 2, offset);
899 : }
900 :
901 : int
902 0 : spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset)
903 : {
904 0 : return spdk_pci_device_cfg_write(dev, &value, 2, offset);
905 : }
906 :
907 : int
908 0 : spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset)
909 : {
910 0 : return spdk_pci_device_cfg_read(dev, value, 4, offset);
911 : }
912 :
913 : int
914 0 : spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset)
915 : {
916 0 : return spdk_pci_device_cfg_write(dev, &value, 4, offset);
917 : }
918 :
919 : int
920 0 : spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len)
921 : {
922 : int err;
923 0 : uint32_t pos, header = 0;
924 0 : uint32_t i, buf[2];
925 :
926 0 : if (len < 17) {
927 0 : return -1;
928 : }
929 :
930 0 : err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE);
931 0 : if (err || !header) {
932 0 : return -1;
933 : }
934 :
935 0 : pos = PCI_CFG_SIZE;
936 : while (1) {
937 0 : if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) {
938 0 : if (pos) {
939 : /* skip the header */
940 0 : pos += 4;
941 0 : for (i = 0; i < 2; i++) {
942 0 : err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i);
943 0 : if (err) {
944 0 : return -1;
945 : }
946 : }
947 0 : snprintf(sn, len, "%08x%08x", buf[1], buf[0]);
948 0 : return 0;
949 : }
950 : }
951 0 : pos = (header >> 20) & 0xffc;
952 : /* 0 if no other items exist */
953 0 : if (pos < PCI_CFG_SIZE) {
954 0 : return -1;
955 : }
956 0 : err = spdk_pci_device_cfg_read32(dev, &header, pos);
957 0 : if (err) {
958 0 : return -1;
959 : }
960 : }
961 : return -1;
962 : }
963 :
964 : struct spdk_pci_addr
965 0 : spdk_pci_device_get_addr(struct spdk_pci_device *dev)
966 : {
967 0 : return dev->addr;
968 : }
969 :
970 : bool
971 0 : spdk_pci_device_is_removed(struct spdk_pci_device *dev)
972 : {
973 0 : return dev->internal.pending_removal;
974 : }
975 :
976 : int
977 0 : spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2)
978 : {
979 0 : if (a1->domain > a2->domain) {
980 0 : return 1;
981 0 : } else if (a1->domain < a2->domain) {
982 0 : return -1;
983 0 : } else if (a1->bus > a2->bus) {
984 0 : return 1;
985 0 : } else if (a1->bus < a2->bus) {
986 0 : return -1;
987 0 : } else if (a1->dev > a2->dev) {
988 0 : return 1;
989 0 : } else if (a1->dev < a2->dev) {
990 0 : return -1;
991 0 : } else if (a1->func > a2->func) {
992 0 : return 1;
993 0 : } else if (a1->func < a2->func) {
994 0 : return -1;
995 : }
996 :
997 0 : return 0;
998 : }
999 :
1000 : #ifdef __linux__
1001 : int
1002 0 : spdk_pci_device_claim(struct spdk_pci_device *dev)
1003 : {
1004 : int dev_fd;
1005 0 : char dev_name[64];
1006 : int pid;
1007 : void *dev_map;
1008 0 : struct flock pcidev_lock = {
1009 : .l_type = F_WRLCK,
1010 : .l_whence = SEEK_SET,
1011 : .l_start = 0,
1012 : .l_len = 0,
1013 : };
1014 :
1015 0 : snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x",
1016 0 : dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func);
1017 :
1018 0 : dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
1019 0 : if (dev_fd == -1) {
1020 0 : SPDK_ERRLOG("could not open %s\n", dev_name);
1021 0 : return -errno;
1022 : }
1023 :
1024 0 : if (ftruncate(dev_fd, sizeof(int)) != 0) {
1025 0 : SPDK_ERRLOG("could not truncate %s\n", dev_name);
1026 0 : close(dev_fd);
1027 0 : return -errno;
1028 : }
1029 :
1030 0 : dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
1031 : MAP_SHARED, dev_fd, 0);
1032 0 : if (dev_map == MAP_FAILED) {
1033 0 : SPDK_ERRLOG("could not mmap dev %s (%d)\n", dev_name, errno);
1034 0 : close(dev_fd);
1035 0 : return -errno;
1036 : }
1037 :
1038 0 : if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) {
1039 0 : pid = *(int *)dev_map;
1040 0 : SPDK_ERRLOG("Cannot create lock on device %s, probably"
1041 : " process %d has claimed it\n", dev_name, pid);
1042 0 : munmap(dev_map, sizeof(int));
1043 0 : close(dev_fd);
1044 : /* F_SETLK returns unspecified errnos, normalize them */
1045 0 : return -EACCES;
1046 : }
1047 :
1048 0 : *(int *)dev_map = (int)getpid();
1049 0 : munmap(dev_map, sizeof(int));
1050 0 : dev->internal.claim_fd = dev_fd;
1051 : /* Keep dev_fd open to maintain the lock. */
1052 0 : return 0;
1053 : }
1054 :
1055 : void
1056 0 : spdk_pci_device_unclaim(struct spdk_pci_device *dev)
1057 : {
1058 0 : char dev_name[64];
1059 :
1060 0 : snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x",
1061 0 : dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func);
1062 :
1063 0 : close(dev->internal.claim_fd);
1064 0 : dev->internal.claim_fd = -1;
1065 0 : unlink(dev_name);
1066 0 : }
1067 : #else /* !__linux__ */
1068 : int
1069 : spdk_pci_device_claim(struct spdk_pci_device *dev)
1070 : {
1071 : /* TODO */
1072 : return 0;
1073 : }
1074 :
1075 : void
1076 : spdk_pci_device_unclaim(struct spdk_pci_device *dev)
1077 : {
1078 : /* TODO */
1079 : }
1080 : #endif /* __linux__ */
1081 :
1082 : int
1083 0 : spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf)
1084 : {
1085 0 : unsigned domain, bus, dev, func;
1086 :
1087 0 : if (addr == NULL || bdf == NULL) {
1088 0 : return -EINVAL;
1089 : }
1090 :
1091 0 : if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) ||
1092 0 : (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) {
1093 : /* Matched a full address - all variables are initialized */
1094 0 : } else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) {
1095 0 : func = 0;
1096 0 : } else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) ||
1097 0 : (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) {
1098 0 : domain = 0;
1099 0 : } else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) ||
1100 0 : (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) {
1101 0 : domain = 0;
1102 0 : func = 0;
1103 : } else {
1104 0 : return -EINVAL;
1105 : }
1106 :
1107 0 : if (bus > 0xFF || dev > 0x1F || func > 7) {
1108 0 : return -EINVAL;
1109 : }
1110 :
1111 0 : addr->domain = domain;
1112 0 : addr->bus = bus;
1113 0 : addr->dev = dev;
1114 0 : addr->func = func;
1115 :
1116 0 : return 0;
1117 : }
1118 :
1119 : int
1120 0 : spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr)
1121 : {
1122 : int rc;
1123 :
1124 0 : rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x",
1125 0 : addr->domain, addr->bus,
1126 0 : addr->dev, addr->func);
1127 :
1128 0 : if (rc > 0 && (size_t)rc < sz) {
1129 0 : return 0;
1130 : }
1131 :
1132 0 : return -1;
1133 : }
1134 :
1135 : int
1136 0 : spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev)
1137 : {
1138 : int rc;
1139 :
1140 0 : assert(dev->map_bar != NULL);
1141 0 : assert(dev->unmap_bar != NULL);
1142 0 : assert(dev->cfg_read != NULL);
1143 0 : assert(dev->cfg_write != NULL);
1144 0 : dev->internal.driver = drv;
1145 :
1146 0 : if (drv->cb_fn != NULL) {
1147 0 : rc = drv->cb_fn(drv->cb_arg, dev);
1148 0 : if (rc != 0) {
1149 0 : return -ECANCELED;
1150 : }
1151 :
1152 0 : dev->internal.attached = true;
1153 : }
1154 :
1155 0 : TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq);
1156 :
1157 0 : return 0;
1158 : }
1159 :
1160 : void
1161 0 : spdk_pci_unhook_device(struct spdk_pci_device *dev)
1162 : {
1163 0 : assert(!dev->internal.attached);
1164 0 : TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq);
1165 0 : }
1166 :
1167 : void
1168 0 : spdk_pci_register_device_provider(struct spdk_pci_device_provider *provider)
1169 : {
1170 0 : TAILQ_INSERT_TAIL(&g_pci_device_providers, provider, tailq);
1171 0 : }
1172 :
1173 : const char *
1174 0 : spdk_pci_device_get_type(const struct spdk_pci_device *dev)
1175 : {
1176 0 : return dev->type;
1177 : }
1178 :
1179 : int
1180 0 : spdk_pci_device_allow(struct spdk_pci_addr *pci_addr)
1181 : {
1182 0 : struct rte_devargs *da;
1183 0 : char devargs_str[128];
1184 :
1185 0 : da = calloc(1, sizeof(*da));
1186 0 : if (da == NULL) {
1187 0 : SPDK_ERRLOG("could not allocate rte_devargs\n");
1188 0 : return -ENOMEM;
1189 : }
1190 :
1191 0 : snprintf(devargs_str, sizeof(devargs_str), "pci:%04x:%02x:%02x.%x",
1192 0 : pci_addr->domain, pci_addr->bus, pci_addr->dev, pci_addr->func);
1193 0 : if (rte_devargs_parse(da, devargs_str) != 0) {
1194 0 : SPDK_ERRLOG("rte_devargs_parse() failed on '%s'\n", devargs_str);
1195 0 : free(da);
1196 0 : return -EINVAL;
1197 : }
1198 0 : da->policy = RTE_DEV_ALLOWED;
1199 : /* Note: if a devargs already exists for this device address, it just gets
1200 : * overridden. So we do not need to check if the devargs already exists.
1201 : * DPDK will take care of memory management for the devargs structure after
1202 : * it has been inserted, so there's nothing SPDK needs to track.
1203 : */
1204 0 : if (rte_devargs_insert(&da) != 0) {
1205 0 : SPDK_ERRLOG("rte_devargs_insert() failed on '%s'\n", devargs_str);
1206 0 : free(da);
1207 0 : return -EINVAL;
1208 : }
1209 :
1210 0 : return 0;
1211 : }
|