diff options
Diffstat (limited to 'qa/tasks/vip.py')
-rw-r--r-- | qa/tasks/vip.py | 205 |
1 files changed, 205 insertions, 0 deletions
diff --git a/qa/tasks/vip.py b/qa/tasks/vip.py new file mode 100644 index 000000000..52114b104 --- /dev/null +++ b/qa/tasks/vip.py @@ -0,0 +1,205 @@ +import contextlib +import ipaddress +import logging +import re + +from teuthology import misc as teuthology +from teuthology.config import config as teuth_config + +log = logging.getLogger(__name__) + + +def subst_vip(ctx, cmd): + p = re.compile(r'({{VIP(\d+)}})') + for m in p.findall(cmd): + n = int(m[1]) + if n >= len(ctx.vip["vips"]): + log.warning(f'no VIP{n} (we have {len(ctx.vip["vips"])})') + else: + cmd = cmd.replace(m[0], str(ctx.vip["vips"][n])) + + if '{{VIPPREFIXLEN}}' in cmd: + cmd = cmd.replace('{{VIPPREFIXLEN}}', str(ctx.vip["vnet"].prefixlen)) + + if '{{VIPSUBNET}}' in cmd: + cmd = cmd.replace('{{VIPSUBNET}}', str(ctx.vip["vnet"].network_address)) + + return cmd + + +def echo(ctx, config): + """ + This is mostly for debugging + """ + for remote in ctx.cluster.remotes.keys(): + log.info(subst_vip(ctx, config)) + + +def exec(ctx, config): + """ + This is similar to the standard 'exec' task, but does the VIP substitutions. + """ + assert isinstance(config, dict), "task exec got invalid config" + + testdir = teuthology.get_testdir(ctx) + + if 'all-roles' in config and len(config) == 1: + a = config['all-roles'] + roles = teuthology.all_roles(ctx.cluster) + config = dict((id_, a) for id_ in roles if not id_.startswith('host.')) + elif 'all-hosts' in config and len(config) == 1: + a = config['all-hosts'] + roles = teuthology.all_roles(ctx.cluster) + config = dict((id_, a) for id_ in roles if id_.startswith('host.')) + + for role, ls in config.items(): + (remote,) = ctx.cluster.only(role).remotes.keys() + log.info('Running commands on role %s host %s', role, remote.name) + for c in ls: + c.replace('$TESTDIR', testdir) + remote.run( + args=[ + 'sudo', + 'TESTDIR={tdir}'.format(tdir=testdir), + 'bash', + '-ex', + '-c', + subst_vip(ctx, c)], + ) + + +def map_vips(mip, count): + for mapping in teuth_config.get('vip', []): + mnet = ipaddress.ip_network(mapping['machine_subnet']) + vnet = ipaddress.ip_network(mapping['virtual_subnet']) + if vnet.prefixlen >= mnet.prefixlen: + log.error(f"virtual_subnet {vnet} prefix >= machine_subnet {mnet} prefix") + return None + if mip in mnet: + pos = list(mnet.hosts()).index(mip) + log.info(f"{mip} in {mnet}, pos {pos}") + r = [] + for sub in vnet.subnets(new_prefix=mnet.prefixlen): + r += [list(sub.hosts())[pos]] + count -= 1 + if count == 0: + break + return vnet, r + return None + + +@contextlib.contextmanager +def task(ctx, config): + """ + Set up a virtual network and allocate virtual IP(s) for each machine. + + The strategy here is to set up a private virtual subnet that is larger than + the subnet the machine(s) exist in, and allocate virtual IPs from that pool. + + - The teuthology.yaml must include a section like:: + + vip: + - machine_subnet: 172.21.0.0/20 + virtual_subnet: 10.0.0.0/16 + + At least one item's machine_subnet should map the subnet the test machine's + primary IP lives in (the one DNS resolves to). The virtual_subnet must have a + shorter prefix (i.e., larger than the machine_subnet). If there are multiple + machine_subnets, they cannot map into the same virtual_subnet. + + - Each machine gets an IP in the virtual_subset statically configured by the vip + task. This lets all test machines reach each other and (most importantly) any + virtual IPs. + + - 1 or more virtual IPs are then mapped for the task. These IPs are chosen based + on one of the remotes. This uses a lot of network space but it avoids any + conflicts between tests. + + To use a virtual IP, the {{VIP0}}, {{VIP1}}, etc. substitutions can be used. + + {{VIPSUBNET}} is the virtual_subnet address (10.0.0.0 in the example). + + {{VIPPREFIXLEN}} is the virtual_subnet prefix (16 in the example. + + These substitutions work for vip.echo, and (at the time of writing) cephadm.apply + and cephadm.shell. + """ + if config is None: + config = {} + count = config.get('count', 1) + + ctx.vip_static = {} + ctx.vip = {} + + log.info("Allocating static IPs for each host...") + for remote in ctx.cluster.remotes.keys(): + ip = remote.ssh.get_transport().getpeername()[0] + log.info(f'peername {ip}') + mip = ipaddress.ip_address(ip) + vnet, vips = map_vips(mip, count + 1) + static = vips.pop(0) + log.info(f"{remote.hostname} static {static}, vnet {vnet}") + + if not ctx.vip: + # do this only once (use the first remote we see), since we only need 1 + # set of virtual IPs, regardless of how many remotes we have. + log.info("VIPs are {map(str, vips)}") + ctx.vip = { + 'vnet': vnet, + 'vips': vips, + } + else: + # all remotes must be in the same virtual network... + assert vnet == ctx.vip['vnet'] + + # pick interface + p = re.compile(r'^(\S+) dev (\S+) (.*)scope link (.*)src (\S+)') + iface = None + for line in remote.sh(['sudo', 'ip','route','ls']).splitlines(): + m = p.findall(line) + if not m: + continue + route_iface = m[0][1] + route_ip = m[0][4] + if route_ip == ip: + iface = route_iface + break + + if not iface: + log.error(f"Unable to find {remote.hostname} interface for {ip}") + continue + + # configure + log.info(f"Configuring {static} on {remote.hostname} iface {iface}...") + remote.sh(['sudo', + 'ip', 'addr', 'add', + str(static) + '/' + str(vnet.prefixlen), + 'dev', iface]) + + ctx.vip_static[remote] = { + "iface": iface, + "static": static, + } + + try: + yield + + finally: + for remote, m in ctx.vip_static.items(): + log.info(f"Removing {m['static']} (and any VIPs) on {remote.hostname} iface {m['iface']}...") + remote.sh(['sudo', + 'ip', 'addr', 'del', + str(m['static']) + '/' + str(ctx.vip['vnet'].prefixlen), + 'dev', m['iface']]) + + for vip in ctx.vip['vips']: + remote.sh( + [ + 'sudo', + 'ip', 'addr', 'del', + str(vip) + '/' + str(ctx.vip['vnet'].prefixlen), + 'dev', m['iface'] + ], + check_status=False, + ) + |