1 files changed, 205 insertions, 0 deletions
diff --git a/qa/tasks/vip.py b/qa/tasks/vip.py
new file mode 100644
index 000000000..52114b104
--- /dev/null
+++ b/qa/tasks/vip.py
@@ -0,0 +1,205 @@
+import contextlib
+import ipaddress
+import logging
+import re
+
+from teuthology import misc as teuthology
+from teuthology.config import config as teuth_config
+
+log = logging.getLogger(__name__)
+
+
+def subst_vip(ctx, cmd):
+    p = re.compile(r'({{VIP(\d+)}})')
+    for m in p.findall(cmd):
+        n = int(m[1])
+        if n >= len(ctx.vip["vips"]):
+            log.warning(f'no VIP{n} (we have {len(ctx.vip["vips"])})')
+        else:
+            cmd = cmd.replace(m[0], str(ctx.vip["vips"][n]))
+
+    if '{{VIPPREFIXLEN}}' in cmd:
+        cmd = cmd.replace('{{VIPPREFIXLEN}}', str(ctx.vip["vnet"].prefixlen))
+
+    if '{{VIPSUBNET}}' in cmd:
+        cmd = cmd.replace('{{VIPSUBNET}}', str(ctx.vip["vnet"].network_address))
+
+    return cmd
+
+
+def echo(ctx, config):
+    """
+    This is mostly for debugging
+    """
+    for remote in ctx.cluster.remotes.keys():
+        log.info(subst_vip(ctx, config))
+
+
+def exec(ctx, config):
+    """
+    This is similar to the standard 'exec' task, but does the VIP substitutions.
+    """
+    assert isinstance(config, dict), "task exec got invalid config"
+
+    testdir = teuthology.get_testdir(ctx)
+
+    if 'all-roles' in config and len(config) == 1:
+        a = config['all-roles']
+        roles = teuthology.all_roles(ctx.cluster)
+        config = dict((id_, a) for id_ in roles if not id_.startswith('host.'))
+    elif 'all-hosts' in config and len(config) == 1:
+        a = config['all-hosts']
+        roles = teuthology.all_roles(ctx.cluster)
+        config = dict((id_, a) for id_ in roles if id_.startswith('host.'))
+
+    for role, ls in config.items():
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+        log.info('Running commands on role %s host %s', role, remote.name)
+        for c in ls:
+            c.replace('$TESTDIR', testdir)
+            remote.run(
+                args=[
+                    'sudo',
+                    'TESTDIR={tdir}'.format(tdir=testdir),
+                    'bash',
+                    '-ex',
+                    '-c',
+                    subst_vip(ctx, c)],
+                )
+
+
+def map_vips(mip, count):
+    for mapping in teuth_config.get('vip', []):
+        mnet = ipaddress.ip_network(mapping['machine_subnet'])
+        vnet = ipaddress.ip_network(mapping['virtual_subnet'])
+        if vnet.prefixlen >= mnet.prefixlen:
+            log.error(f"virtual_subnet {vnet} prefix >= machine_subnet {mnet} prefix")
+            return None
+        if mip in mnet:
+            pos = list(mnet.hosts()).index(mip)
+            log.info(f"{mip} in {mnet}, pos {pos}")
+            r = []
+            for sub in vnet.subnets(new_prefix=mnet.prefixlen):
+                r += [list(sub.hosts())[pos]]
+                count -= 1
+                if count == 0:
+                    break
+            return vnet, r
+    return None
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Set up a virtual network and allocate virtual IP(s) for each machine.
+
+    The strategy here is to set up a private virtual subnet that is larger than
+    the subnet the machine(s) exist in, and allocate virtual IPs from that pool.
+
+    - The teuthology.yaml must include a section like::
+
+        vip:
+          - machine_subnet: 172.21.0.0/20
+            virtual_subnet: 10.0.0.0/16
+
+      At least one item's machine_subnet should map the subnet the test machine's
+      primary IP lives in (the one DNS resolves to).  The virtual_subnet must have a
+      shorter prefix (i.e., larger than the machine_subnet).  If there are multiple
+      machine_subnets, they cannot map into the same virtual_subnet.
+
+    - Each machine gets an IP in the virtual_subset statically configured by the vip
+      task. This lets all test machines reach each other and (most importantly) any
+      virtual IPs.
+
+    - 1 or more virtual IPs are then mapped for the task.  These IPs are chosen based
+      on one of the remotes.  This uses a lot of network space but it avoids any
+      conflicts between tests.
+
+    To use a virtual IP, the {{VIP0}}, {{VIP1}}, etc. substitutions can be used.
+    
+    {{VIPSUBNET}} is the virtual_subnet address (10.0.0.0 in the example).
+
+    {{VIPPREFIXLEN}} is the virtual_subnet prefix (16 in the example.
+
+    These substitutions work for vip.echo, and (at the time of writing) cephadm.apply
+    and cephadm.shell.
+    """
+    if config is None:
+        config = {}
+    count = config.get('count', 1)
+
+    ctx.vip_static = {}
+    ctx.vip = {}
+
+    log.info("Allocating static IPs for each host...")
+    for remote in ctx.cluster.remotes.keys():
+        ip = remote.ssh.get_transport().getpeername()[0]
+        log.info(f'peername {ip}')
+        mip = ipaddress.ip_address(ip)
+        vnet, vips = map_vips(mip, count + 1)
+        static = vips.pop(0)
+        log.info(f"{remote.hostname} static {static}, vnet {vnet}")
+
+        if not ctx.vip:
+            # do this only once (use the first remote we see), since we only need 1
+            # set of virtual IPs, regardless of how many remotes we have.
+            log.info("VIPs are {map(str, vips)}")
+            ctx.vip = {
+                'vnet': vnet,
+                'vips': vips,
+            }
+        else:
+            # all remotes must be in the same virtual network...
+            assert vnet == ctx.vip['vnet']
+
+        # pick interface
+        p = re.compile(r'^(\S+) dev (\S+) (.*)scope link (.*)src (\S+)')
+        iface = None
+        for line in remote.sh(['sudo', 'ip','route','ls']).splitlines():
+            m = p.findall(line)
+            if not m:
+                continue
+            route_iface = m[0][1]
+            route_ip = m[0][4]
+            if route_ip == ip:
+                iface = route_iface
+                break
+
+        if not iface:
+            log.error(f"Unable to find {remote.hostname} interface for {ip}")
+            continue
+
+        # configure
+        log.info(f"Configuring {static} on {remote.hostname} iface {iface}...")
+        remote.sh(['sudo',
+                   'ip', 'addr', 'add',
+                   str(static) + '/' + str(vnet.prefixlen),
+                   'dev', iface])
+
+        ctx.vip_static[remote] = {
+            "iface": iface,
+            "static": static,
+        }
+
+    try:
+        yield
+
+    finally:
+        for remote, m in ctx.vip_static.items():
+            log.info(f"Removing {m['static']} (and any VIPs) on {remote.hostname} iface {m['iface']}...")
+            remote.sh(['sudo',
+                       'ip', 'addr', 'del',
+                       str(m['static']) + '/' + str(ctx.vip['vnet'].prefixlen),
+                       'dev', m['iface']])
+
+            for vip in ctx.vip['vips']:
+                remote.sh(
+                    [
+                        'sudo',
+                        'ip', 'addr', 'del',
+                        str(vip) + '/' + str(ctx.vip['vnet'].prefixlen),
+                        'dev', m['iface']
+                    ],
+                    check_status=False,
+                )
+