summaryrefslogtreecommitdiffstats
path: root/qa/tasks/vip.py
blob: 52114b1042285268782a05f932b112bf8e1df14c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import contextlib
import ipaddress
import logging
import re

from teuthology import misc as teuthology
from teuthology.config import config as teuth_config

log = logging.getLogger(__name__)


def subst_vip(ctx, cmd):
    p = re.compile(r'({{VIP(\d+)}})')
    for m in p.findall(cmd):
        n = int(m[1])
        if n >= len(ctx.vip["vips"]):
            log.warning(f'no VIP{n} (we have {len(ctx.vip["vips"])})')
        else:
            cmd = cmd.replace(m[0], str(ctx.vip["vips"][n]))

    if '{{VIPPREFIXLEN}}' in cmd:
        cmd = cmd.replace('{{VIPPREFIXLEN}}', str(ctx.vip["vnet"].prefixlen))

    if '{{VIPSUBNET}}' in cmd:
        cmd = cmd.replace('{{VIPSUBNET}}', str(ctx.vip["vnet"].network_address))

    return cmd


def echo(ctx, config):
    """
    This is mostly for debugging
    """
    for remote in ctx.cluster.remotes.keys():
        log.info(subst_vip(ctx, config))


def exec(ctx, config):
    """
    This is similar to the standard 'exec' task, but does the VIP substitutions.
    """
    assert isinstance(config, dict), "task exec got invalid config"

    testdir = teuthology.get_testdir(ctx)

    if 'all-roles' in config and len(config) == 1:
        a = config['all-roles']
        roles = teuthology.all_roles(ctx.cluster)
        config = dict((id_, a) for id_ in roles if not id_.startswith('host.'))
    elif 'all-hosts' in config and len(config) == 1:
        a = config['all-hosts']
        roles = teuthology.all_roles(ctx.cluster)
        config = dict((id_, a) for id_ in roles if id_.startswith('host.'))

    for role, ls in config.items():
        (remote,) = ctx.cluster.only(role).remotes.keys()
        log.info('Running commands on role %s host %s', role, remote.name)
        for c in ls:
            c.replace('$TESTDIR', testdir)
            remote.run(
                args=[
                    'sudo',
                    'TESTDIR={tdir}'.format(tdir=testdir),
                    'bash',
                    '-ex',
                    '-c',
                    subst_vip(ctx, c)],
                )


def map_vips(mip, count):
    for mapping in teuth_config.get('vip', []):
        mnet = ipaddress.ip_network(mapping['machine_subnet'])
        vnet = ipaddress.ip_network(mapping['virtual_subnet'])
        if vnet.prefixlen >= mnet.prefixlen:
            log.error(f"virtual_subnet {vnet} prefix >= machine_subnet {mnet} prefix")
            return None
        if mip in mnet:
            pos = list(mnet.hosts()).index(mip)
            log.info(f"{mip} in {mnet}, pos {pos}")
            r = []
            for sub in vnet.subnets(new_prefix=mnet.prefixlen):
                r += [list(sub.hosts())[pos]]
                count -= 1
                if count == 0:
                    break
            return vnet, r
    return None


@contextlib.contextmanager
def task(ctx, config):
    """
    Set up a virtual network and allocate virtual IP(s) for each machine.

    The strategy here is to set up a private virtual subnet that is larger than
    the subnet the machine(s) exist in, and allocate virtual IPs from that pool.

    - The teuthology.yaml must include a section like::

        vip:
          - machine_subnet: 172.21.0.0/20
            virtual_subnet: 10.0.0.0/16

      At least one item's machine_subnet should map the subnet the test machine's
      primary IP lives in (the one DNS resolves to).  The virtual_subnet must have a
      shorter prefix (i.e., larger than the machine_subnet).  If there are multiple
      machine_subnets, they cannot map into the same virtual_subnet.

    - Each machine gets an IP in the virtual_subset statically configured by the vip
      task. This lets all test machines reach each other and (most importantly) any
      virtual IPs.

    - 1 or more virtual IPs are then mapped for the task.  These IPs are chosen based
      on one of the remotes.  This uses a lot of network space but it avoids any
      conflicts between tests.

    To use a virtual IP, the {{VIP0}}, {{VIP1}}, etc. substitutions can be used.
    
    {{VIPSUBNET}} is the virtual_subnet address (10.0.0.0 in the example).

    {{VIPPREFIXLEN}} is the virtual_subnet prefix (16 in the example.

    These substitutions work for vip.echo, and (at the time of writing) cephadm.apply
    and cephadm.shell.
    """
    if config is None:
        config = {}
    count = config.get('count', 1)

    ctx.vip_static = {}
    ctx.vip = {}

    log.info("Allocating static IPs for each host...")
    for remote in ctx.cluster.remotes.keys():
        ip = remote.ssh.get_transport().getpeername()[0]
        log.info(f'peername {ip}')
        mip = ipaddress.ip_address(ip)
        vnet, vips = map_vips(mip, count + 1)
        static = vips.pop(0)
        log.info(f"{remote.hostname} static {static}, vnet {vnet}")

        if not ctx.vip:
            # do this only once (use the first remote we see), since we only need 1
            # set of virtual IPs, regardless of how many remotes we have.
            log.info("VIPs are {map(str, vips)}")
            ctx.vip = {
                'vnet': vnet,
                'vips': vips,
            }
        else:
            # all remotes must be in the same virtual network...
            assert vnet == ctx.vip['vnet']

        # pick interface
        p = re.compile(r'^(\S+) dev (\S+) (.*)scope link (.*)src (\S+)')
        iface = None
        for line in remote.sh(['sudo', 'ip','route','ls']).splitlines():
            m = p.findall(line)
            if not m:
                continue
            route_iface = m[0][1]
            route_ip = m[0][4]
            if route_ip == ip:
                iface = route_iface
                break

        if not iface:
            log.error(f"Unable to find {remote.hostname} interface for {ip}")
            continue

        # configure
        log.info(f"Configuring {static} on {remote.hostname} iface {iface}...")
        remote.sh(['sudo',
                   'ip', 'addr', 'add',
                   str(static) + '/' + str(vnet.prefixlen),
                   'dev', iface])

        ctx.vip_static[remote] = {
            "iface": iface,
            "static": static,
        }

    try:
        yield

    finally:
        for remote, m in ctx.vip_static.items():
            log.info(f"Removing {m['static']} (and any VIPs) on {remote.hostname} iface {m['iface']}...")
            remote.sh(['sudo',
                       'ip', 'addr', 'del',
                       str(m['static']) + '/' + str(ctx.vip['vnet'].prefixlen),
                       'dev', m['iface']])

            for vip in ctx.vip['vips']:
                remote.sh(
                    [
                        'sudo',
                        'ip', 'addr', 'del',
                        str(vip) + '/' + str(ctx.vip['vnet'].prefixlen),
                        'dev', m['iface']
                    ],
                    check_status=False,
                )