/* SPDX-License-Identifier: LGPL-2.1-or-later * Copyright © 2020 VMware, Inc. */ #include "device-enumerator-private.h" #include "device-util.h" #include "fd-util.h" #include "networkd-link.h" #include "networkd-manager.h" #include "networkd-queue.h" #include "networkd-sriov.h" static int sr_iov_handler(sd_netlink *rtnl, sd_netlink_message *m, Request *req, Link *link, SRIOV *sr_iov) { int r; assert(m); assert(link); r = sd_netlink_message_get_errno(m); if (r < 0 && r != -EEXIST) { log_link_message_error_errno(link, m, r, "Could not set up SR-IOV"); link_enter_failed(link); return 1; } if (link->sr_iov_messages == 0) { log_link_debug(link, "SR-IOV configured"); link->sr_iov_configured = true; link_check_ready(link); } return 1; } static int sr_iov_configure(SRIOV *sr_iov, Link *link, Request *req) { _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; int r; assert(sr_iov); assert(link); assert(link->manager); assert(link->manager->rtnl); assert(link->ifindex > 0); assert(req); log_link_debug(link, "Setting SR-IOV virtual function %"PRIu32".", sr_iov->vf); r = sd_rtnl_message_new_link(link->manager->rtnl, &m, RTM_SETLINK, link->ifindex); if (r < 0) return r; r = sr_iov_set_netlink_message(sr_iov, m); if (r < 0) return r; return request_call_netlink_async(link->manager->rtnl, m, req); } static int sr_iov_process_request(Request *req, Link *link, SRIOV *sr_iov) { int r; assert(req); assert(link); assert(sr_iov); if (!IN_SET(link->state, LINK_STATE_CONFIGURING, LINK_STATE_CONFIGURED)) return 0; r = sr_iov_configure(sr_iov, link, req); if (r < 0) return log_link_warning_errno(link, r, "Failed to configure SR-IOV virtual function %"PRIu32": %m", sr_iov->vf); return 1; } int link_request_sr_iov_vfs(Link *link) { SRIOV *sr_iov; int r; assert(link); assert(link->network); link->sr_iov_configured = false; ORDERED_HASHMAP_FOREACH(sr_iov, link->network->sr_iov_by_section) { r = link_queue_request_safe(link, REQUEST_TYPE_SRIOV, sr_iov, NULL, sr_iov_hash_func, sr_iov_compare_func, sr_iov_process_request, &link->sr_iov_messages, sr_iov_handler, NULL); if (r < 0) return log_link_warning_errno(link, r, "Failed to request SR-IOV virtual function %"PRIu32": %m", sr_iov->vf); } if (link->sr_iov_messages == 0) { link->sr_iov_configured = true; link_check_ready(link); } else log_link_debug(link, "Configuring SR-IOV"); return 0; } static int find_ifindex_from_pci_dev_port(sd_device *pci_dev, const char *dev_port) { _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; sd_device *dev; int ifindex, r; assert(pci_dev); assert(dev_port); r = sd_device_enumerator_new(&e); if (r < 0) return r; r = sd_device_enumerator_allow_uninitialized(e); if (r < 0) return r; r = sd_device_enumerator_add_match_parent(e, pci_dev); if (r < 0) return r; r = sd_device_enumerator_add_match_subsystem(e, "net", true); if (r < 0) return r; r = sd_device_enumerator_add_match_sysattr(e, "dev_port", dev_port, true); if (r < 0) return r; dev = sd_device_enumerator_get_device_first(e); if (!dev) return -ENODEV; /* no device found */ if (sd_device_enumerator_get_device_next(e)) return -ENXIO; /* multiple devices found */ r = sd_device_get_ifindex(dev, &ifindex); if (r < 0) return r; assert(ifindex > 0); return ifindex; } static int manager_update_sr_iov_ifindices(Manager *manager, int phys_port_ifindex, int virt_port_ifindex) { Link *phys_link = NULL, *virt_link = NULL; int r; assert(manager); assert(phys_port_ifindex > 0); assert(virt_port_ifindex > 0); /* This sets ifindices only when both interfaces are already managed by us. */ r = link_get_by_index(manager, phys_port_ifindex, &phys_link); if (r < 0) return r; r = link_get_by_index(manager, virt_port_ifindex, &virt_link); if (r < 0) return r; /* update VF ifindex in PF */ r = set_ensure_put(&phys_link->sr_iov_virt_port_ifindices, NULL, INT_TO_PTR(virt_port_ifindex)); if (r < 0) return r; log_link_debug(phys_link, "Found SR-IOV VF port %s(%i).", virt_link ? virt_link->ifname : "n/a", virt_port_ifindex); /* update PF ifindex in VF */ if (virt_link->sr_iov_phys_port_ifindex > 0 && virt_link->sr_iov_phys_port_ifindex != phys_port_ifindex) { Link *old_phys_link; if (link_get_by_index(manager, virt_link->sr_iov_phys_port_ifindex, &old_phys_link) >= 0) set_remove(old_phys_link->sr_iov_virt_port_ifindices, INT_TO_PTR(virt_port_ifindex)); } virt_link->sr_iov_phys_port_ifindex = phys_port_ifindex; log_link_debug(virt_link, "Found SR-IOV PF port %s(%i).", phys_link ? phys_link->ifname : "n/a", phys_port_ifindex); return 0; } static int link_set_sr_iov_phys_port(Link *link) { _cleanup_(sd_device_unrefp) sd_device *pci_physfn_dev = NULL; const char *dev_port; sd_device *pci_dev; int r; assert(link); assert(link->manager); if (link->sr_iov_phys_port_ifindex > 0) return 0; if (!link->dev) return -ENODEV; /* This may return -EINVAL or -ENODEV, instead of -ENOENT, if the device has been removed or is being * removed. Let's map -EINVAL to -ENODEV, as the caller will ignore -ENODEV. */ r = sd_device_get_sysattr_value(link->dev, "dev_port", &dev_port); if (r == -EINVAL) return -ENODEV; if (r < 0) return r; r = sd_device_get_parent_with_subsystem_devtype(link->dev, "pci", NULL, &pci_dev); if (r < 0) return r; r = sd_device_new_child(&pci_physfn_dev, pci_dev, "physfn"); if (r < 0) return r; r = find_ifindex_from_pci_dev_port(pci_physfn_dev, dev_port); if (r < 0) return r; return manager_update_sr_iov_ifindices(link->manager, r, link->ifindex); } static int link_set_sr_iov_virt_ports(Link *link) { const char *dev_port, *name; sd_device *pci_dev, *child; int r; assert(link); assert(link->manager); set_clear(link->sr_iov_virt_port_ifindices); if (!link->dev) return -ENODEV; r = sd_device_get_sysattr_value(link->dev, "dev_port", &dev_port); if (r == -EINVAL) return -ENODEV; if (r < 0) return r; r = sd_device_get_parent_with_subsystem_devtype(link->dev, "pci", NULL, &pci_dev); if (r < 0) return r; FOREACH_DEVICE_CHILD_WITH_SUFFIX(pci_dev, child, name) { const char *n; /* Accept name prefixed with "virtfn", but refuse "virtfn" itself. */ n = startswith(name, "virtfn"); if (isempty(n) || !in_charset(n, DIGITS)) continue; r = find_ifindex_from_pci_dev_port(child, dev_port); if (r < 0) continue; if (manager_update_sr_iov_ifindices(link->manager, link->ifindex, r) < 0) continue; } return 0; } int link_set_sr_iov_ifindices(Link *link) { int r; assert(link); r = link_set_sr_iov_phys_port(link); if (r < 0 && !ERRNO_IS_DEVICE_ABSENT(r)) return r; r = link_set_sr_iov_virt_ports(link); if (r < 0 && !ERRNO_IS_DEVICE_ABSENT(r)) return r; return 0; } void link_clear_sr_iov_ifindices(Link *link) { void *v; assert(link); assert(link->manager); if (link->sr_iov_phys_port_ifindex > 0) { Link *phys_link; if (link_get_by_index(link->manager, link->sr_iov_phys_port_ifindex, &phys_link) >= 0) set_remove(phys_link->sr_iov_virt_port_ifindices, INT_TO_PTR(link->ifindex)); link->sr_iov_phys_port_ifindex = 0; } while ((v = set_steal_first(link->sr_iov_virt_port_ifindices))) { Link *virt_link; if (link_get_by_index(link->manager, PTR_TO_INT(v), &virt_link) >= 0) virt_link->sr_iov_phys_port_ifindex = 0; } } bool check_ready_for_all_sr_iov_ports( Link *link, bool allow_unmanaged, /* for the main target */ bool (check_one)(Link *link, bool allow_unmanaged)) { Link *phys_link; void *v; assert(link); assert(link->manager); assert(check_one); /* Some drivers make VF ports become down when their PF port becomes down, and may fail to configure * VF ports. Also, when a VF port becomes up/down, its PF port and other VF ports may become down. * See issue #23315. */ /* First, check the main target. */ if (!check_one(link, allow_unmanaged)) return false; /* If this is a VF port, then also check the PF port. */ if (link->sr_iov_phys_port_ifindex > 0) { if (link_get_by_index(link->manager, link->sr_iov_phys_port_ifindex, &phys_link) < 0 || !check_one(phys_link, /* allow_unmanaged = */ true)) return false; } else phys_link = link; /* Also check all VF ports. */ SET_FOREACH(v, phys_link->sr_iov_virt_port_ifindices) { int ifindex = PTR_TO_INT(v); Link *virt_link; if (ifindex == link->ifindex) continue; /* The main target link is a VF port, and its state is already checked. */ if (link_get_by_index(link->manager, ifindex, &virt_link) < 0) return false; if (!check_one(virt_link, /* allow_unmanaged = */ true)) return false; } return true; }