summaryrefslogtreecommitdiffstats
path: root/tests/topotests/munet/native.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/topotests/munet/native.py')
-rw-r--r--tests/topotests/munet/native.py144
1 files changed, 112 insertions, 32 deletions
diff --git a/tests/topotests/munet/native.py b/tests/topotests/munet/native.py
index fecf709..4fbbb85 100644
--- a/tests/topotests/munet/native.py
+++ b/tests/topotests/munet/native.py
@@ -20,6 +20,8 @@ import socket
import subprocess
import time
+from pathlib import Path
+
from . import cli
from .base import BaseMunet
from .base import Bridge
@@ -38,6 +40,7 @@ from .config import config_to_dict_with_key
from .config import find_matching_net_config
from .config import find_with_kv
from .config import merge_kind_config
+from .watchlog import WatchLog
class L3ContainerNotRunningError(MunetError):
@@ -455,13 +458,14 @@ class NodeMixin:
bps = self.unet.cfgopt.getoption("--gdb-breakpoints", "").split(",")
for bp in bps:
- gdbcmd += f" '-ex=b {bp}'"
+ if bp:
+ gdbcmd += f" '-ex=b {bp}'"
- cmds = self.config.get("gdb-run-cmd", [])
+ cmds = self.config.get("gdb-run-cmds", [])
for cmd in cmds:
gdbcmd += f" '-ex={cmd}'"
- self.run_in_window(gdbcmd)
+ self.run_in_window(gdbcmd, ns_only=True)
elif should_gdb and use_emacs:
gdbcmd = gdbcmd.replace("gdb ", "gdb -i=mi ")
ecbin = self.get_exec_path("emacsclient")
@@ -664,6 +668,7 @@ class L3NodeMixin(NodeMixin):
self.phycount = 0
self.phy_odrivers = {}
self.tapmacs = {}
+ self.watched_logs = {}
self.intf_tc_count = 0
@@ -723,6 +728,26 @@ ff02::2\tip6-allrouters
if hasattr(self, "bind_mount"):
self.bind_mount(hosts_file, "/etc/hosts")
+ def add_watch_log(self, path, watchfor_re=None):
+ """Add a WatchLog to this nodes watched logs.
+
+ Args:
+ path: If relative is relative to the nodes ``rundir``
+ watchfor_re: Regular expression to watch the log for and raise an exception
+ if found.
+
+ Return:
+ The watching task if request or None otherwise.
+ """
+ path = Path(path)
+ if not path.is_absolute():
+ path = self.rundir.joinpath(path)
+
+ wl = WatchLog(path)
+ self.watched_logs[wl.path] = wl
+ task = wl.raise_if_match_task(watchfor_re) if watchfor_re else None
+ return task
+
async def console(
self,
concmd,
@@ -938,8 +963,32 @@ ff02::2\tip6-allrouters
if hname in self.host_intfs:
return
self.host_intfs[hname] = lname
- self.unet.rootcmd.cmd_nostatus(f"ip link set {hname} down ")
- self.unet.rootcmd.cmd_raises(f"ip link set {hname} netns {self.pid}")
+
+ # See if this interace is missing and needs to be fixed
+ rc, o, _ = self.unet.rootcmd.cmd_status("ip -o link show")
+ m = re.search(rf"\d+:\s+(\S+):.*altname {re.escape(hname)}\W", o)
+ if m:
+ # need to rename
+ dname = m.group(1)
+ self.logger.info("Fixing misnamed %s to %s", dname, hname)
+ self.unet.rootcmd.cmd_status(
+ f"ip link property del dev {dname} altname {hname}"
+ )
+ self.unet.rootcmd.cmd_status(f"ip link set {dname} name {hname}")
+
+ rc, o, _ = self.unet.rootcmd.cmd_status("ip -o link show")
+ m = re.search(rf"\d+:\s+{re.escape(hname)}:.*", o)
+ if m:
+ self.unet.rootcmd.cmd_nostatus(f"ip link set {hname} down ")
+ self.unet.rootcmd.cmd_raises(f"ip link set {hname} netns {self.pid}")
+ # Wait for interface to show up in namespace
+ for retry in range(0, 10):
+ rc, o, _ = self.cmd_status(f"ip -o link show {hname}")
+ if not rc:
+ if re.search(rf"\d+: {re.escape(hname)}:.*", o):
+ break
+ if retry > 0:
+ await asyncio.sleep(1)
self.cmd_raises(f"ip link set {hname} name {lname}")
if mtu:
self.cmd_raises(f"ip link set {lname} mtu {mtu}")
@@ -949,7 +998,12 @@ ff02::2\tip6-allrouters
lname = self.host_intfs[hname]
self.cmd_raises(f"ip link set {lname} down")
self.cmd_raises(f"ip link set {lname} name {hname}")
- self.cmd_raises(f"ip link set {hname} netns 1")
+ self.cmd_status(f"ip link set netns 1 dev {hname}")
+ # The above is failing sometimes and not sure why
+ # logging.error(
+ # "XXX after setns %s",
+ # self.unet.rootcmd.cmd_nostatus(f"ip link show {hname}"),
+ # )
del self.host_intfs[hname]
async def add_phy_intf(self, devaddr, lname):
@@ -1019,12 +1073,13 @@ ff02::2\tip6-allrouters
"Physical PCI device %s already bound to vfio-pci", devaddr
)
return
+
self.logger.info(
"Unbinding physical PCI device %s from driver %s", devaddr, driver
)
self.phy_odrivers[devaddr] = driver
self.unet.rootcmd.cmd_raises(
- f"echo {devaddr} > /sys/bus/pci/drivers/{driver}/unbind"
+ f"echo {devaddr} | timeout 10 tee /sys/bus/pci/drivers/{driver}/unbind"
)
# Add the device vendor and device id to vfio-pci in case it's the first time
@@ -1035,7 +1090,14 @@ ff02::2\tip6-allrouters
f"echo {vendor} {devid} > /sys/bus/pci/drivers/vfio-pci/new_id", warn=False
)
- if not self.unet.rootcmd.path_exists(f"/sys/bus/pci/driver/vfio-pci/{devaddr}"):
+ for retry in range(0, 10):
+ if self.unet.rootcmd.path_exists(
+ f"/sys/bus/pci/drivers/vfio-pci/{devaddr}"
+ ):
+ break
+ if retry > 0:
+ await asyncio.sleep(1)
+
# Bind to vfio-pci if wasn't added with new_id
self.logger.info("Binding physical PCI device %s to vfio-pci", devaddr)
ec, _, _ = self.unet.rootcmd.cmd_status(
@@ -1066,7 +1128,7 @@ ff02::2\tip6-allrouters
"Unbinding physical PCI device %s from driver vfio-pci", devaddr
)
self.unet.rootcmd.cmd_status(
- f"echo {devaddr} > /sys/bus/pci/drivers/vfio-pci/unbind"
+ f"echo {devaddr} | timeout 10 tee /sys/bus/pci/drivers/vfio-pci/unbind"
)
self.logger.info("Binding physical PCI device %s to driver %s", devaddr, driver)
@@ -1085,13 +1147,13 @@ ff02::2\tip6-allrouters
for hname in list(self.host_intfs):
await self.rem_host_intf(hname)
- # remove any hostintf interfaces
- for devaddr in list(self.phy_intfs):
- await self.rem_phy_intf(devaddr)
-
# delete the LinuxNamespace/InterfaceMixin
await super()._async_delete()
+ # remove any hostintf interfaces, needs to come after normal exits
+ for devaddr in list(self.phy_intfs):
+ await self.rem_phy_intf(devaddr)
+
class L3NamespaceNode(L3NodeMixin, LinuxNamespace):
"""A namespace L3 node."""
@@ -1123,6 +1185,7 @@ class L3ContainerNode(L3NodeMixin, LinuxNamespace):
assert self.container_image
self.cmd_p = None
+ self.cmd_pid = None
self.__base_cmd = []
self.__base_cmd_pty = []
@@ -1393,7 +1456,13 @@ class L3ContainerNode(L3NodeMixin, LinuxNamespace):
start_new_session=True, # keeps main tty signals away from podman
)
- self.logger.debug("%s: async_popen => %s", self, self.cmd_p.pid)
+ # If our process is actually the child of an nsenter fetch its pid.
+ if self.nsenter_fork:
+ self.cmd_pid = await self.get_proc_child_pid(self.cmd_p)
+
+ self.logger.debug(
+ "%s: async_popen => %s (%s)", self, self.cmd_p.pid, self.cmd_pid
+ )
self.pytest_hook_run_cmd(stdout, stderr)
@@ -1542,6 +1611,7 @@ class L3QemuVM(L3NodeMixin, LinuxNamespace):
"""Create a Container Node."""
self.cont_exec_paths = {}
self.launch_p = None
+ self.launch_pid = None
self.qemu_config = config["qemu"]
self.extra_mounts = []
assert self.qemu_config
@@ -1968,8 +2038,9 @@ class L3QemuVM(L3NodeMixin, LinuxNamespace):
con.cmd_raises(f"ip -6 route add default via {switch.ip6_address}")
con.cmd_raises("ip link set lo up")
- if self.unet.cfgopt.getoption("--coverage"):
- con.cmd_raises("mount -t debugfs none /sys/kernel/debug")
+ # This is already mounted now
+ # if self.unet.cfgopt.getoption("--coverage"):
+ # con.cmd_raises("mount -t debugfs none /sys/kernel/debug")
async def gather_coverage_data(self):
con = self.conrepl
@@ -2261,25 +2332,29 @@ class L3QemuVM(L3NodeMixin, LinuxNamespace):
stdout = open(os.path.join(self.rundir, "qemu.out"), "wb")
stderr = open(os.path.join(self.rundir, "qemu.err"), "wb")
- self.launch_p = await self.async_popen(
+ self.launch_p = await self.async_popen_nsonly(
args,
stdin=subprocess.DEVNULL,
stdout=stdout,
stderr=stderr,
pass_fds=pass_fds,
- # We don't need this here b/c we are only ever running qemu and that's all
- # we need to kill for cleanup
- # XXX reconcile this
- start_new_session=True, # allows us to signal all children to exit
+ # Don't want Keybaord interrupt etc to pass to child.
+ # start_new_session=True,
+ preexec_fn=os.setsid,
)
+ if self.nsenter_fork:
+ self.launch_pid = await self.get_proc_child_pid(self.launch_p)
+
self.pytest_hook_run_cmd(stdout, stderr)
# We've passed these on, so don't need these open here anymore.
for fd in pass_fds:
os.close(fd)
- self.logger.debug("%s: async_popen => %s", self, self.launch_p.pid)
+ self.logger.debug(
+ "%s: popen => %s (%s)", self, self.launch_p.pid, self.launch_pid
+ )
confiles = ["_console"]
if use_cmdcon:
@@ -2307,10 +2382,10 @@ class L3QemuVM(L3NodeMixin, LinuxNamespace):
# the monitor output has super annoying ANSI escapes in it
output = self.monrepl.cmd_nostatus("info status")
- self.logger.info("VM status: %s", output)
+ self.logger.debug("VM status: %s", output)
output = self.monrepl.cmd_nostatus("info kvm")
- self.logger.info("KVM status: %s", output)
+ self.logger.debug("KVM status: %s", output)
#
# Set thread affinity
@@ -2348,11 +2423,6 @@ class L3QemuVM(L3NodeMixin, LinuxNamespace):
"%s: node launch (qemu) cmd wait() canceled: %s", future, error
)
- async def cleanup_qemu(self):
- """Launch qemu."""
- if self.launch_p:
- await self.async_cleanup_proc(self.launch_p)
-
async def async_cleanup_cmd(self):
"""Run the configured cleanup commands for this node."""
self.cleanup_called = True
@@ -2372,7 +2442,7 @@ class L3QemuVM(L3NodeMixin, LinuxNamespace):
# Need to cleanup early b/c it is running on the VM
if self.cmd_p:
- await self.async_cleanup_proc(self.cmd_p)
+ await self.async_cleanup_proc(self.cmd_p, self.cmd_pid)
self.cmd_p = None
try:
@@ -2388,9 +2458,9 @@ class L3QemuVM(L3NodeMixin, LinuxNamespace):
if not self.launch_p:
self.logger.warning("async_delete: qemu is not running")
else:
- await self.cleanup_qemu()
+ await self.async_cleanup_proc(self.launch_p, self.launch_pid)
except Exception as error:
- self.logger.warning("%s: failued to cleanup qemu process: %s", self, error)
+ self.logger.warning("%s: failed to cleanup qemu process: %s", self, error)
await super()._async_delete()
@@ -2814,6 +2884,8 @@ ff02::2\tip6-allrouters
logging.debug("Launching nodes")
await asyncio.gather(*[x.launch() for x in launch_nodes])
+ logging.debug("Launched nodes -- Queueing Waits")
+
# Watch for launched processes to exit
for node in launch_nodes:
task = asyncio.create_task(
@@ -2822,17 +2894,23 @@ ff02::2\tip6-allrouters
task.add_done_callback(node.launch_completed)
tasks.append(task)
+ logging.debug("Wait complete queued, running cmd")
+
if run_nodes:
# would like a info when verbose here.
logging.debug("Running `cmd` on nodes")
await asyncio.gather(*[x.run_cmd() for x in run_nodes])
+ logging.debug("Ran cmds -- Queueing Waits")
+
# Watch for run_cmd processes to exit
for node in run_nodes:
task = asyncio.create_task(node.cmd_p.wait(), name=f"Node-{node.name}-cmd")
task.add_done_callback(node.cmd_completed)
tasks.append(task)
+ logging.debug("Wait complete queued, waiting for ready")
+
# Wait for nodes to be ready
if ready_nodes:
@@ -2853,6 +2931,8 @@ ff02::2\tip6-allrouters
raise asyncio.TimeoutError()
logging.debug("All nodes ready")
+ logging.debug("All done returning tasks: %s", tasks)
+
return tasks
async def _async_delete(self):