diff options
Diffstat (limited to '')
18 files changed, 10449 insertions, 0 deletions
diff --git a/src/pybind/mgr/cephadm/tests/__init__.py b/src/pybind/mgr/cephadm/tests/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/__init__.py diff --git a/src/pybind/mgr/cephadm/tests/conftest.py b/src/pybind/mgr/cephadm/tests/conftest.py new file mode 100644 index 000000000..e8add2c7b --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/conftest.py @@ -0,0 +1,27 @@ +import pytest + +from cephadm.services.osd import RemoveUtil, OSD +from tests import mock + +from .fixtures import with_cephadm_module + + +@pytest.fixture() +def cephadm_module(): + with with_cephadm_module({}) as m: + yield m + + +@pytest.fixture() +def rm_util(): + with with_cephadm_module({}) as m: + r = RemoveUtil.__new__(RemoveUtil) + r.__init__(m) + yield r + + +@pytest.fixture() +def osd_obj(): + with mock.patch("cephadm.services.osd.RemoveUtil"): + o = OSD(0, mock.MagicMock()) + yield o diff --git a/src/pybind/mgr/cephadm/tests/fixtures.py b/src/pybind/mgr/cephadm/tests/fixtures.py new file mode 100644 index 000000000..6281283d7 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/fixtures.py @@ -0,0 +1,200 @@ +import fnmatch +import asyncio +import sys +from tempfile import NamedTemporaryFile +from contextlib import contextmanager + +from ceph.deployment.service_spec import PlacementSpec, ServiceSpec +from ceph.utils import datetime_to_str, datetime_now +from cephadm.serve import CephadmServe, cephadmNoImage + +try: + from typing import Any, Iterator, List, Callable, Dict +except ImportError: + pass + +from cephadm import CephadmOrchestrator +from orchestrator import raise_if_exception, OrchResult, HostSpec, DaemonDescriptionStatus +from tests import mock + + +def async_side_effect(result): + async def side_effect(*args, **kwargs): + return result + return side_effect + + +def get_ceph_option(_, key): + return __file__ + + +def get_module_option_ex(_, module, key, default=None): + if module == 'prometheus': + if key == 'server_port': + return 9283 + return None + + +def _run_cephadm(ret): + async def foo(s, host, entity, cmd, e, **kwargs): + if cmd == 'gather-facts': + return '{}', '', 0 + return [ret], '', 0 + return foo + + +def match_glob(val, pat): + ok = fnmatch.fnmatchcase(val, pat) + if not ok: + assert pat in val + + +class MockEventLoopThread: + def get_result(self, coro, timeout): + if sys.version_info >= (3, 7): + return asyncio.run(coro) + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + return loop.run_until_complete(coro) + finally: + loop.close() + asyncio.set_event_loop(None) + + +def receive_agent_metadata(m: CephadmOrchestrator, host: str, ops: List[str] = None) -> None: + to_update: Dict[str, Callable[[str, Any], None]] = { + 'ls': m._process_ls_output, + 'gather-facts': m.cache.update_host_facts, + 'list-networks': m.cache.update_host_networks, + } + if ops: + for op in ops: + out = m.wait_async(CephadmServe(m)._run_cephadm_json(host, cephadmNoImage, op, [])) + to_update[op](host, out) + m.cache.last_daemon_update[host] = datetime_now() + m.cache.last_facts_update[host] = datetime_now() + m.cache.last_network_update[host] = datetime_now() + m.cache.metadata_up_to_date[host] = True + + +def receive_agent_metadata_all_hosts(m: CephadmOrchestrator) -> None: + for host in m.cache.get_hosts(): + receive_agent_metadata(m, host) + + +@contextmanager +def with_cephadm_module(module_options=None, store=None): + """ + :param module_options: Set opts as if they were set before module.__init__ is called + :param store: Set the store before module.__init__ is called + """ + with mock.patch("cephadm.module.CephadmOrchestrator.get_ceph_option", get_ceph_option), \ + mock.patch("cephadm.services.osd.RemoveUtil._run_mon_cmd"), \ + mock.patch('cephadm.module.CephadmOrchestrator.get_module_option_ex', get_module_option_ex), \ + mock.patch("cephadm.module.CephadmOrchestrator.get_osdmap"), \ + mock.patch("cephadm.module.CephadmOrchestrator.remote"), \ + mock.patch("cephadm.agent.CephadmAgentHelpers._request_agent_acks"), \ + mock.patch("cephadm.agent.CephadmAgentHelpers._apply_agent", return_value=False), \ + mock.patch("cephadm.agent.CephadmAgentHelpers._agent_down", return_value=False), \ + mock.patch('cephadm.offline_watcher.OfflineHostWatcher.run'), \ + mock.patch('cephadm.tuned_profiles.TunedProfileUtils._remove_stray_tuned_profiles'), \ + mock.patch('cephadm.offline_watcher.OfflineHostWatcher.run'), \ + mock.patch('cephadm.http_server.CephadmHttpServer.run'): + + m = CephadmOrchestrator.__new__(CephadmOrchestrator) + if module_options is not None: + for k, v in module_options.items(): + m._ceph_set_module_option('cephadm', k, v) + if store is None: + store = {} + if '_ceph_get/mon_map' not in store: + m.mock_store_set('_ceph_get', 'mon_map', { + 'modified': datetime_to_str(datetime_now()), + 'fsid': 'foobar', + }) + if '_ceph_get/mgr_map' not in store: + m.mock_store_set('_ceph_get', 'mgr_map', { + 'services': { + 'dashboard': 'http://[::1]:8080', + 'prometheus': 'http://[::1]:8081' + }, + 'modules': ['dashboard', 'prometheus'], + }) + for k, v in store.items(): + m._ceph_set_store(k, v) + + m.__init__('cephadm', 0, 0) + m._cluster_fsid = "fsid" + + m.event_loop = MockEventLoopThread() + m.tkey = NamedTemporaryFile(prefix='test-cephadm-identity-') + + yield m + + +def wait(m: CephadmOrchestrator, c: OrchResult) -> Any: + return raise_if_exception(c) + + +@contextmanager +def with_host(m: CephadmOrchestrator, name, addr='1::4', refresh_hosts=True, rm_with_force=True): + with mock.patch("cephadm.utils.resolve_ip", return_value=addr): + wait(m, m.add_host(HostSpec(hostname=name))) + if refresh_hosts: + CephadmServe(m)._refresh_hosts_and_daemons() + receive_agent_metadata(m, name) + yield + wait(m, m.remove_host(name, force=rm_with_force)) + + +def assert_rm_service(cephadm: CephadmOrchestrator, srv_name): + mon_or_mgr = cephadm.spec_store[srv_name].spec.service_type in ('mon', 'mgr') + if mon_or_mgr: + assert 'Unable' in wait(cephadm, cephadm.remove_service(srv_name)) + return + assert wait(cephadm, cephadm.remove_service(srv_name)) == f'Removed service {srv_name}' + assert cephadm.spec_store[srv_name].deleted is not None + CephadmServe(cephadm)._check_daemons() + CephadmServe(cephadm)._apply_all_services() + assert cephadm.spec_store[srv_name].deleted + unmanaged = cephadm.spec_store[srv_name].spec.unmanaged + CephadmServe(cephadm)._purge_deleted_services() + if not unmanaged: # cause then we're not deleting daemons + assert srv_name not in cephadm.spec_store, f'{cephadm.spec_store[srv_name]!r}' + + +@contextmanager +def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth=None, host: str = '', status_running=False) -> Iterator[List[str]]: + if spec.placement.is_empty() and host: + spec.placement = PlacementSpec(hosts=[host], count=1) + if meth is not None: + c = meth(cephadm_module, spec) + assert wait(cephadm_module, c) == f'Scheduled {spec.service_name()} update...' + else: + c = cephadm_module.apply([spec]) + assert wait(cephadm_module, c) == [f'Scheduled {spec.service_name()} update...'] + + specs = [d.spec for d in wait(cephadm_module, cephadm_module.describe_service())] + assert spec in specs + + CephadmServe(cephadm_module)._apply_all_services() + + if status_running: + make_daemons_running(cephadm_module, spec.service_name()) + + dds = wait(cephadm_module, cephadm_module.list_daemons()) + own_dds = [dd for dd in dds if dd.service_name() == spec.service_name()] + if host and spec.service_type != 'osd': + assert own_dds + + yield [dd.name() for dd in own_dds] + + assert_rm_service(cephadm_module, spec.service_name()) + + +def make_daemons_running(cephadm_module, service_name): + own_dds = cephadm_module.cache.get_daemons_by_service(service_name) + for dd in own_dds: + dd.status = DaemonDescriptionStatus.running # We're changing the reference diff --git a/src/pybind/mgr/cephadm/tests/test_autotune.py b/src/pybind/mgr/cephadm/tests/test_autotune.py new file mode 100644 index 000000000..524da9c00 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_autotune.py @@ -0,0 +1,69 @@ +# Disable autopep8 for this file: + +# fmt: off + +import pytest + +from cephadm.autotune import MemoryAutotuner +from orchestrator import DaemonDescription + + +@pytest.mark.parametrize("total,daemons,config,result", + [ # noqa: E128 + ( + 128 * 1024 * 1024 * 1024, + [], + {}, + None, + ), + ( + 128 * 1024 * 1024 * 1024, + [ + DaemonDescription('osd', '1', 'host1'), + DaemonDescription('osd', '2', 'host1'), + ], + {}, + 64 * 1024 * 1024 * 1024, + ), + ( + 128 * 1024 * 1024 * 1024, + [ + DaemonDescription('osd', '1', 'host1'), + DaemonDescription('osd', '2', 'host1'), + DaemonDescription('osd', '3', 'host1'), + ], + { + 'osd.3': 16 * 1024 * 1024 * 1024, + }, + 56 * 1024 * 1024 * 1024, + ), + ( + 128 * 1024 * 1024 * 1024, + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('osd', '1', 'host1'), + DaemonDescription('osd', '2', 'host1'), + ], + {}, + 62 * 1024 * 1024 * 1024, + ) + ]) +def test_autotune(total, daemons, config, result): + def fake_getter(who, opt): + if opt == 'osd_memory_target_autotune': + if who in config: + return False + else: + return True + if opt == 'osd_memory_target': + return config.get(who, 4 * 1024 * 1024 * 1024) + if opt == 'mds_cache_memory_limit': + return 16 * 1024 * 1024 * 1024 + + a = MemoryAutotuner( + total_mem=total, + daemons=daemons, + config_get=fake_getter, + ) + val, osds = a.tune() + assert val == result diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py new file mode 100644 index 000000000..24fcb0280 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -0,0 +1,2709 @@ +import asyncio +import json +import logging + +from contextlib import contextmanager + +import pytest + +from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection +from cephadm.serve import CephadmServe +from cephadm.inventory import HostCacheStatus, ClientKeyringSpec +from cephadm.services.osd import OSD, OSDRemovalQueue, OsdIdClaims +from cephadm.utils import SpecialHostLabels + +try: + from typing import List +except ImportError: + pass + +from ceph.deployment.service_spec import ( + CustomConfig, + CustomContainerSpec, + HostPlacementSpec, + IscsiServiceSpec, + MDSSpec, + NFSServiceSpec, + PlacementSpec, + RGWSpec, + ServiceSpec, +) +from ceph.deployment.drive_selection.selector import DriveSelection +from ceph.deployment.inventory import Devices, Device +from ceph.utils import datetime_to_str, datetime_now, str_to_datetime +from orchestrator import DaemonDescription, InventoryHost, \ + HostSpec, OrchestratorError, DaemonDescriptionStatus, OrchestratorEvent +from tests import mock +from .fixtures import wait, _run_cephadm, match_glob, with_host, \ + with_cephadm_module, with_service, make_daemons_running, async_side_effect +from cephadm.module import CephadmOrchestrator + +""" +TODOs: + There is really room for improvement here. I just quickly assembled theses tests. + I general, everything should be testes in Teuthology as well. Reasons for + also testing this here is the development roundtrip time. +""" + + +def assert_rm_daemon(cephadm: CephadmOrchestrator, prefix, host): + dds: List[DaemonDescription] = wait(cephadm, cephadm.list_daemons(host=host)) + d_names = [dd.name() for dd in dds if dd.name().startswith(prefix)] + assert d_names + # there should only be one daemon (if not match_glob will throw mismatch) + assert len(d_names) == 1 + + c = cephadm.remove_daemons(d_names) + [out] = wait(cephadm, c) + # picking the 1st element is needed, rather than passing the list when the daemon + # name contains '-' char. If not, the '-' is treated as a range i.e. cephadm-exporter + # is treated like a m-e range which is invalid. rbd-mirror (d-m) and node-exporter (e-e) + # are valid, so pass without incident! Also, match_gob acts on strings anyway! + match_glob(out, f"Removed {d_names[0]}* from host '{host}'") + + +@contextmanager +def with_daemon(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, host: str): + spec.placement = PlacementSpec(hosts=[host], count=1) + + c = cephadm_module.add_daemon(spec) + [out] = wait(cephadm_module, c) + match_glob(out, f"Deployed {spec.service_name()}.* on host '{host}'") + + dds = cephadm_module.cache.get_daemons_by_service(spec.service_name()) + for dd in dds: + if dd.hostname == host: + yield dd.daemon_id + assert_rm_daemon(cephadm_module, spec.service_name(), host) + return + + assert False, 'Daemon not found' + + +@contextmanager +def with_osd_daemon(cephadm_module: CephadmOrchestrator, _run_cephadm, host: str, osd_id: int, ceph_volume_lvm_list=None): + cephadm_module.mock_store_set('_ceph_get', 'osd_map', { + 'osds': [ + { + 'osd': 1, + 'up_from': 0, + 'up': True, + 'uuid': 'uuid' + } + ] + }) + + _run_cephadm.reset_mock(return_value=True, side_effect=True) + if ceph_volume_lvm_list: + _run_cephadm.side_effect = ceph_volume_lvm_list + else: + async def _ceph_volume_list(s, host, entity, cmd, **kwargs): + logging.info(f'ceph-volume cmd: {cmd}') + if 'raw' in cmd: + return json.dumps({ + "21a4209b-f51b-4225-81dc-d2dca5b8b2f5": { + "ceph_fsid": cephadm_module._cluster_fsid, + "device": "/dev/loop0", + "osd_id": 21, + "osd_uuid": "21a4209b-f51b-4225-81dc-d2dca5b8b2f5", + "type": "bluestore" + }, + }), '', 0 + if 'lvm' in cmd: + return json.dumps({ + str(osd_id): [{ + 'tags': { + 'ceph.cluster_fsid': cephadm_module._cluster_fsid, + 'ceph.osd_fsid': 'uuid' + }, + 'type': 'data' + }] + }), '', 0 + return '{}', '', 0 + + _run_cephadm.side_effect = _ceph_volume_list + + assert cephadm_module._osd_activate( + [host]).stdout == f"Created osd(s) 1 on host '{host}'" + assert _run_cephadm.mock_calls == [ + mock.call(host, 'osd', 'ceph-volume', + ['--', 'lvm', 'list', '--format', 'json'], no_fsid=False, error_ok=False, image='', log_output=True), + mock.call(host, f'osd.{osd_id}', ['_orch', 'deploy'], [], stdin=mock.ANY), + mock.call(host, 'osd', 'ceph-volume', + ['--', 'raw', 'list', '--format', 'json'], no_fsid=False, error_ok=False, image='', log_output=True), + ] + dd = cephadm_module.cache.get_daemon(f'osd.{osd_id}', host=host) + assert dd.name() == f'osd.{osd_id}' + yield dd + cephadm_module._remove_daemons([(f'osd.{osd_id}', host)]) + + +class TestCephadm(object): + + def test_get_unique_name(self, cephadm_module): + # type: (CephadmOrchestrator) -> None + existing = [ + DaemonDescription(daemon_type='mon', daemon_id='a') + ] + new_mon = cephadm_module.get_unique_name('mon', 'myhost', existing) + match_glob(new_mon, 'myhost') + new_mgr = cephadm_module.get_unique_name('mgr', 'myhost', existing) + match_glob(new_mgr, 'myhost.*') + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_host(self, cephadm_module): + assert wait(cephadm_module, cephadm_module.get_hosts()) == [] + with with_host(cephadm_module, 'test'): + assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '1::4')] + + # Be careful with backward compatibility when changing things here: + assert json.loads(cephadm_module.get_store('inventory')) == \ + {"test": {"hostname": "test", "addr": "1::4", "labels": [], "status": ""}} + + with with_host(cephadm_module, 'second', '1.2.3.5'): + assert wait(cephadm_module, cephadm_module.get_hosts()) == [ + HostSpec('test', '1::4'), + HostSpec('second', '1.2.3.5') + ] + + assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '1::4')] + assert wait(cephadm_module, cephadm_module.get_hosts()) == [] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + @mock.patch("cephadm.utils.resolve_ip") + def test_re_add_host_receive_loopback(self, resolve_ip, cephadm_module): + resolve_ip.side_effect = ['192.168.122.1', '127.0.0.1', '127.0.0.1'] + assert wait(cephadm_module, cephadm_module.get_hosts()) == [] + cephadm_module._add_host(HostSpec('test', '192.168.122.1')) + assert wait(cephadm_module, cephadm_module.get_hosts()) == [ + HostSpec('test', '192.168.122.1')] + cephadm_module._add_host(HostSpec('test')) + assert wait(cephadm_module, cephadm_module.get_hosts()) == [ + HostSpec('test', '192.168.122.1')] + with pytest.raises(OrchestratorError): + cephadm_module._add_host(HostSpec('test2')) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_service_ls(self, cephadm_module): + with with_host(cephadm_module, 'test'): + c = cephadm_module.list_daemons(refresh=True) + assert wait(cephadm_module, c) == [] + with with_service(cephadm_module, MDSSpec('mds', 'name', unmanaged=True)) as _, \ + with_daemon(cephadm_module, MDSSpec('mds', 'name'), 'test') as _: + + c = cephadm_module.list_daemons() + + def remove_id_events(dd): + out = dd.to_json() + del out['daemon_id'] + del out['events'] + del out['daemon_name'] + return out + + assert [remove_id_events(dd) for dd in wait(cephadm_module, c)] == [ + { + 'service_name': 'mds.name', + 'daemon_type': 'mds', + 'hostname': 'test', + 'status': 2, + 'status_desc': 'starting', + 'is_active': False, + 'ports': [], + } + ] + + with with_service(cephadm_module, ServiceSpec('rgw', 'r.z'), + CephadmOrchestrator.apply_rgw, 'test', status_running=True): + make_daemons_running(cephadm_module, 'mds.name') + + c = cephadm_module.describe_service() + out = [dict(o.to_json()) for o in wait(cephadm_module, c)] + expected = [ + { + 'placement': {'count': 2}, + 'service_id': 'name', + 'service_name': 'mds.name', + 'service_type': 'mds', + 'status': {'created': mock.ANY, 'running': 1, 'size': 2}, + 'unmanaged': True + }, + { + 'placement': { + 'count': 1, + 'hosts': ["test"] + }, + 'service_id': 'r.z', + 'service_name': 'rgw.r.z', + 'service_type': 'rgw', + 'status': {'created': mock.ANY, 'running': 1, 'size': 1, + 'ports': [80]}, + } + ] + for o in out: + if 'events' in o: + del o['events'] # delete it, as it contains a timestamp + assert out == expected + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_service_ls_service_type_flag(self, cephadm_module): + with with_host(cephadm_module, 'host1'): + with with_host(cephadm_module, 'host2'): + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)), + CephadmOrchestrator.apply_mgr, '', status_running=True): + with with_service(cephadm_module, MDSSpec('mds', 'test-id', placement=PlacementSpec(count=2)), + CephadmOrchestrator.apply_mds, '', status_running=True): + + # with no service-type. Should provide info fot both services + c = cephadm_module.describe_service() + out = [dict(o.to_json()) for o in wait(cephadm_module, c)] + expected = [ + { + 'placement': {'count': 2}, + 'service_name': 'mgr', + 'service_type': 'mgr', + 'status': {'created': mock.ANY, + 'running': 2, + 'size': 2} + }, + { + 'placement': {'count': 2}, + 'service_id': 'test-id', + 'service_name': 'mds.test-id', + 'service_type': 'mds', + 'status': {'created': mock.ANY, + 'running': 2, + 'size': 2} + }, + ] + + for o in out: + if 'events' in o: + del o['events'] # delete it, as it contains a timestamp + assert out == expected + + # with service-type. Should provide info fot only mds + c = cephadm_module.describe_service(service_type='mds') + out = [dict(o.to_json()) for o in wait(cephadm_module, c)] + expected = [ + { + 'placement': {'count': 2}, + 'service_id': 'test-id', + 'service_name': 'mds.test-id', + 'service_type': 'mds', + 'status': {'created': mock.ANY, + 'running': 2, + 'size': 2} + }, + ] + + for o in out: + if 'events' in o: + del o['events'] # delete it, as it contains a timestamp + assert out == expected + + # service-type should not match with service names + c = cephadm_module.describe_service(service_type='mds.test-id') + out = [dict(o.to_json()) for o in wait(cephadm_module, c)] + assert out == [] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_device_ls(self, cephadm_module): + with with_host(cephadm_module, 'test'): + c = cephadm_module.get_inventory() + assert wait(cephadm_module, c) == [InventoryHost('test')] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm( + json.dumps([ + dict( + name='rgw.myrgw.foobar', + style='cephadm', + fsid='fsid', + container_id='container_id', + version='version', + state='running', + ), + dict( + name='something.foo.bar', + style='cephadm', + fsid='fsid', + ), + dict( + name='haproxy.test.bar', + style='cephadm', + fsid='fsid', + ), + + ]) + )) + def test_list_daemons(self, cephadm_module: CephadmOrchestrator): + cephadm_module.service_cache_timeout = 10 + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + dds = wait(cephadm_module, cephadm_module.list_daemons()) + assert {d.name() for d in dds} == {'rgw.myrgw.foobar', 'haproxy.test.bar'} + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_daemon_action(self, cephadm_module: CephadmOrchestrator): + cephadm_module.service_cache_timeout = 10 + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, RGWSpec(service_id='myrgw.foobar', unmanaged=True)) as _, \ + with_daemon(cephadm_module, RGWSpec(service_id='myrgw.foobar'), 'test') as daemon_id: + + d_name = 'rgw.' + daemon_id + + c = cephadm_module.daemon_action('redeploy', d_name) + assert wait(cephadm_module, + c) == f"Scheduled to redeploy rgw.{daemon_id} on host 'test'" + + for what in ('start', 'stop', 'restart'): + c = cephadm_module.daemon_action(what, d_name) + assert wait(cephadm_module, + c) == F"Scheduled to {what} {d_name} on host 'test'" + + # Make sure, _check_daemons does a redeploy due to monmap change: + cephadm_module._store['_ceph_get/mon_map'] = { + 'modified': datetime_to_str(datetime_now()), + 'fsid': 'foobar', + } + cephadm_module.notify('mon_map', None) + + CephadmServe(cephadm_module)._check_daemons() + + assert cephadm_module.events.get_for_daemon(d_name) == [ + OrchestratorEvent(mock.ANY, 'daemon', d_name, 'INFO', + f"Deployed {d_name} on host \'test\'"), + OrchestratorEvent(mock.ANY, 'daemon', d_name, 'INFO', + f"stop {d_name} from host \'test\'"), + ] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_daemon_action_fail(self, cephadm_module: CephadmOrchestrator): + cephadm_module.service_cache_timeout = 10 + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, RGWSpec(service_id='myrgw.foobar', unmanaged=True)) as _, \ + with_daemon(cephadm_module, RGWSpec(service_id='myrgw.foobar'), 'test') as daemon_id: + with mock.patch('ceph_module.BaseMgrModule._ceph_send_command') as _ceph_send_command: + + _ceph_send_command.side_effect = Exception("myerror") + + # Make sure, _check_daemons does a redeploy due to monmap change: + cephadm_module.mock_store_set('_ceph_get', 'mon_map', { + 'modified': datetime_to_str(datetime_now()), + 'fsid': 'foobar', + }) + cephadm_module.notify('mon_map', None) + + CephadmServe(cephadm_module)._check_daemons() + + evs = [e.message for e in cephadm_module.events.get_for_daemon( + f'rgw.{daemon_id}')] + + assert 'myerror' in ''.join(evs) + + @pytest.mark.parametrize( + "action", + [ + 'start', + 'stop', + 'restart', + 'reconfig', + 'redeploy' + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.module.HostCache.save_host") + def test_daemon_check(self, _save_host, cephadm_module: CephadmOrchestrator, action): + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='grafana'), CephadmOrchestrator.apply_grafana, 'test') as d_names: + [daemon_name] = d_names + + cephadm_module._schedule_daemon_action(daemon_name, action) + + assert cephadm_module.cache.get_scheduled_daemon_action( + 'test', daemon_name) == action + + CephadmServe(cephadm_module)._check_daemons() + + assert _save_host.called_with('test') + assert cephadm_module.cache.get_scheduled_daemon_action('test', daemon_name) is None + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_daemon_check_extra_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test'): + + # Also testing deploying mons without explicit network placement + cephadm_module.check_mon_command({ + 'prefix': 'config set', + 'who': 'mon', + 'name': 'public_network', + 'value': '127.0.0.0/8' + }) + + cephadm_module.cache.update_host_networks( + 'test', + { + "127.0.0.0/8": [ + "127.0.0.1" + ], + } + ) + + with with_service(cephadm_module, ServiceSpec(service_type='mon'), CephadmOrchestrator.apply_mon, 'test') as d_names: + [daemon_name] = d_names + + cephadm_module._set_extra_ceph_conf('[mon]\nk=v') + + CephadmServe(cephadm_module)._check_daemons() + + _run_cephadm.assert_called_with( + 'test', + 'mon.test', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": "mon.test", + "image": '', + "deploy_arguments": [], + "params": { + 'reconfig': True, + }, + "meta": { + 'service_name': 'mon', + 'ports': [], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "config": "[mon]\nk=v\n[mon.test]\npublic network = 127.0.0.0/8\n", + "keyring": "", + "files": { + "config": "[mon.test]\npublic network = 127.0.0.0/8\n" + }, + }, + }), + ) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_mon_crush_location_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test'): + cephadm_module.check_mon_command({ + 'prefix': 'config set', + 'who': 'mon', + 'name': 'public_network', + 'value': '127.0.0.0/8' + }) + + cephadm_module.cache.update_host_networks( + 'test', + { + "127.0.0.0/8": [ + "127.0.0.1" + ], + } + ) + + with with_service(cephadm_module, ServiceSpec(service_type='mon', crush_locations={'test': ['datacenter=a', 'rack=2']}), CephadmOrchestrator.apply_mon, 'test'): + _run_cephadm.assert_called_with( + 'test', + 'mon.test', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": "mon.test", + "image": '', + "deploy_arguments": [], + "params": {}, + "meta": { + 'service_name': 'mon', + 'ports': [], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "config": "[mon.test]\npublic network = 127.0.0.0/8\n", + "keyring": "", + "files": { + "config": "[mon.test]\npublic network = 127.0.0.0/8\n", + }, + "crush_location": "datacenter=a", + }, + }), + ) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_extra_container_args(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='crash', extra_container_args=['--cpus=2', '--quiet']), CephadmOrchestrator.apply_crash): + _run_cephadm.assert_called_with( + 'test', + 'crash.test', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": "crash.test", + "image": '', + "deploy_arguments": [], + "params": { + 'extra_container_args': [ + "--cpus=2", + "--quiet", + ], + }, + "meta": { + 'service_name': 'crash', + 'ports': [], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': [ + "--cpus=2", + "--quiet", + ], + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "config": "", + "keyring": "[client.crash.test]\nkey = None\n", + }, + }), + ) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_extra_entrypoint_args(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='node-exporter', + extra_entrypoint_args=['--collector.textfile.directory=/var/lib/node_exporter/textfile_collector', '--some-other-arg']), + CephadmOrchestrator.apply_node_exporter): + _run_cephadm.assert_called_with( + 'test', + 'node-exporter.test', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": "node-exporter.test", + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9100], + 'extra_entrypoint_args': [ + "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector", + "--some-other-arg", + ], + }, + "meta": { + 'service_name': 'node-exporter', + 'ports': [9100], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': [ + "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector", + "--some-other-arg", + ], + }, + "config_blobs": {}, + }), + ) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_extra_entrypoint_and_container_args(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='node-exporter', + extra_entrypoint_args=['--collector.textfile.directory=/var/lib/node_exporter/textfile_collector', '--some-other-arg'], + extra_container_args=['--cpus=2', '--quiet']), + CephadmOrchestrator.apply_node_exporter): + _run_cephadm.assert_called_with( + 'test', + 'node-exporter.test', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": "node-exporter.test", + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9100], + 'extra_container_args': [ + "--cpus=2", + "--quiet", + ], + 'extra_entrypoint_args': [ + "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector", + "--some-other-arg", + ], + }, + "meta": { + 'service_name': 'node-exporter', + 'ports': [9100], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': [ + "--cpus=2", + "--quiet", + ], + 'extra_entrypoint_args': [ + "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector", + "--some-other-arg", + ], + }, + "config_blobs": {}, + }), + ) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_extra_entrypoint_and_container_args_with_spaces(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='node-exporter', + extra_entrypoint_args=['--entrypoint-arg-with-value value', '--some-other-arg 3'], + extra_container_args=['--cpus 2', '--container-arg-with-value value']), + CephadmOrchestrator.apply_node_exporter): + _run_cephadm.assert_called_with( + 'test', + 'node-exporter.test', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": "node-exporter.test", + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9100], + 'extra_container_args': [ + "--cpus", + "2", + "--container-arg-with-value", + "value", + ], + 'extra_entrypoint_args': [ + "--entrypoint-arg-with-value", + "value", + "--some-other-arg", + "3", + ], + }, + "meta": { + 'service_name': 'node-exporter', + 'ports': [9100], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': [ + "--cpus 2", + "--container-arg-with-value value", + ], + 'extra_entrypoint_args': [ + "--entrypoint-arg-with-value value", + "--some-other-arg 3", + ], + }, + "config_blobs": {}, + }), + ) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_custom_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + test_cert = ['-----BEGIN PRIVATE KEY-----', + 'YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg', + 'ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=', + '-----END PRIVATE KEY-----', + '-----BEGIN CERTIFICATE-----', + 'YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg', + 'ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=', + '-----END CERTIFICATE-----'] + configs = [ + CustomConfig(content='something something something', + mount_path='/etc/test.conf'), + CustomConfig(content='\n'.join(test_cert), mount_path='/usr/share/grafana/thing.crt') + ] + tc_joined = '\n'.join(test_cert) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='crash', custom_configs=configs), CephadmOrchestrator.apply_crash): + _run_cephadm( + 'test', + 'crash.test', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": "crash.test", + "image": "", + "deploy_arguments": [], + "params": {}, + "meta": { + "service_name": "crash", + "ports": [], + "ip": None, + "deployed_by": [], + "rank": None, + "rank_generation": None, + "extra_container_args": None, + "extra_entrypoint_args": None, + }, + "config_blobs": { + "config": "", + "keyring": "[client.crash.test]\nkey = None\n", + "custom_config_files": [ + { + "content": "something something something", + "mount_path": "/etc/test.conf", + }, + { + "content": tc_joined, + "mount_path": "/usr/share/grafana/thing.crt", + }, + ] + } + }), + ) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_daemon_check_post(self, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='grafana'), CephadmOrchestrator.apply_grafana, 'test'): + + # Make sure, _check_daemons does a redeploy due to monmap change: + cephadm_module.mock_store_set('_ceph_get', 'mon_map', { + 'modified': datetime_to_str(datetime_now()), + 'fsid': 'foobar', + }) + cephadm_module.notify('mon_map', None) + cephadm_module.mock_store_set('_ceph_get', 'mgr_map', { + 'modules': ['dashboard'] + }) + + with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd: + CephadmServe(cephadm_module)._check_daemons() + _mon_cmd.assert_any_call( + {'prefix': 'dashboard set-grafana-api-url', 'value': 'https://[1::4]:3000'}, + None) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1.2.3.4') + def test_iscsi_post_actions_with_missing_daemon_in_cache(self, cephadm_module: CephadmOrchestrator): + # https://tracker.ceph.com/issues/52866 + with with_host(cephadm_module, 'test1'): + with with_host(cephadm_module, 'test2'): + with with_service(cephadm_module, IscsiServiceSpec(service_id='foobar', pool='pool', placement=PlacementSpec(host_pattern='*')), CephadmOrchestrator.apply_iscsi, 'test'): + + CephadmServe(cephadm_module)._apply_all_services() + assert len(cephadm_module.cache.get_daemons_by_type('iscsi')) == 2 + + # get a daemons from postaction list (ARRGH sets!!) + tempset = cephadm_module.requires_post_actions.copy() + tempdaemon1 = tempset.pop() + tempdaemon2 = tempset.pop() + + # make sure post actions has 2 daemons in it + assert len(cephadm_module.requires_post_actions) == 2 + + # replicate a host cache that is not in sync when check_daemons is called + tempdd1 = cephadm_module.cache.get_daemon(tempdaemon1) + tempdd2 = cephadm_module.cache.get_daemon(tempdaemon2) + host = 'test1' + if 'test1' not in tempdaemon1: + host = 'test2' + cephadm_module.cache.rm_daemon(host, tempdaemon1) + + # Make sure, _check_daemons does a redeploy due to monmap change: + cephadm_module.mock_store_set('_ceph_get', 'mon_map', { + 'modified': datetime_to_str(datetime_now()), + 'fsid': 'foobar', + }) + cephadm_module.notify('mon_map', None) + cephadm_module.mock_store_set('_ceph_get', 'mgr_map', { + 'modules': ['dashboard'] + }) + + with mock.patch("cephadm.module.IscsiService.config_dashboard") as _cfg_db: + CephadmServe(cephadm_module)._check_daemons() + _cfg_db.assert_called_once_with([tempdd2]) + + # post actions still has the other daemon in it and will run next _check_daemons + assert len(cephadm_module.requires_post_actions) == 1 + + # post actions was missed for a daemon + assert tempdaemon1 in cephadm_module.requires_post_actions + + # put the daemon back in the cache + cephadm_module.cache.add_daemon(host, tempdd1) + + _cfg_db.reset_mock() + # replicate serve loop running again + CephadmServe(cephadm_module)._check_daemons() + + # post actions should have been called again + _cfg_db.asset_called() + + # post actions is now empty + assert len(cephadm_module.requires_post_actions) == 0 + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_mon_add(self, cephadm_module): + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='mon', unmanaged=True)): + ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1) + c = cephadm_module.add_daemon(ServiceSpec('mon', placement=ps)) + assert wait(cephadm_module, c) == ["Deployed mon.a on host 'test'"] + + with pytest.raises(OrchestratorError, match="Must set public_network config option or specify a CIDR network,"): + ps = PlacementSpec(hosts=['test'], count=1) + c = cephadm_module.add_daemon(ServiceSpec('mon', placement=ps)) + wait(cephadm_module, c) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_mgr_update(self, cephadm_module): + with with_host(cephadm_module, 'test'): + ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1) + r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps)) + assert r + + assert_rm_daemon(cephadm_module, 'mgr.a', 'test') + + @mock.patch("cephadm.module.CephadmOrchestrator.mon_command") + def test_find_destroyed_osds(self, _mon_cmd, cephadm_module): + dict_out = { + "nodes": [ + { + "id": -1, + "name": "default", + "type": "root", + "type_id": 11, + "children": [ + -3 + ] + }, + { + "id": -3, + "name": "host1", + "type": "host", + "type_id": 1, + "pool_weights": {}, + "children": [ + 0 + ] + }, + { + "id": 0, + "device_class": "hdd", + "name": "osd.0", + "type": "osd", + "type_id": 0, + "crush_weight": 0.0243988037109375, + "depth": 2, + "pool_weights": {}, + "exists": 1, + "status": "destroyed", + "reweight": 1, + "primary_affinity": 1 + } + ], + "stray": [] + } + json_out = json.dumps(dict_out) + _mon_cmd.return_value = (0, json_out, '') + osd_claims = OsdIdClaims(cephadm_module) + assert osd_claims.get() == {'host1': ['0']} + assert osd_claims.filtered_by_host('host1') == ['0'] + assert osd_claims.filtered_by_host('host1.domain.com') == ['0'] + + @ pytest.mark.parametrize( + "ceph_services, cephadm_daemons, strays_expected, metadata", + # [ ([(daemon_type, daemon_id), ... ], [...], [...]), ... ] + [ + ( + [('mds', 'a'), ('osd', '0'), ('mgr', 'x')], + [], + [('mds', 'a'), ('osd', '0'), ('mgr', 'x')], + {}, + ), + ( + [('mds', 'a'), ('osd', '0'), ('mgr', 'x')], + [('mds', 'a'), ('osd', '0'), ('mgr', 'x')], + [], + {}, + ), + ( + [('mds', 'a'), ('osd', '0'), ('mgr', 'x')], + [('mds', 'a'), ('osd', '0')], + [('mgr', 'x')], + {}, + ), + # https://tracker.ceph.com/issues/49573 + ( + [('rgw-nfs', '14649')], + [], + [('nfs', 'foo-rgw.host1')], + {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}}, + ), + ( + [('rgw-nfs', '14649'), ('rgw-nfs', '14650')], + [('nfs', 'foo-rgw.host1'), ('nfs', 'foo2.host2')], + [], + {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}, '14650': {'id': 'nfs.foo2.host2-rgw'}}, + ), + ( + [('rgw-nfs', '14649'), ('rgw-nfs', '14650')], + [('nfs', 'foo-rgw.host1')], + [('nfs', 'foo2.host2')], + {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}, '14650': {'id': 'nfs.foo2.host2-rgw'}}, + ), + ] + ) + def test_check_for_stray_daemons( + self, + cephadm_module, + ceph_services, + cephadm_daemons, + strays_expected, + metadata + ): + # mock ceph service-map + services = [] + for service in ceph_services: + s = {'type': service[0], 'id': service[1]} + services.append(s) + ls = [{'hostname': 'host1', 'services': services}] + + with mock.patch.object(cephadm_module, 'list_servers', mock.MagicMock()) as list_servers: + list_servers.return_value = ls + list_servers.__iter__.side_effect = ls.__iter__ + + # populate cephadm daemon cache + dm = {} + for daemon_type, daemon_id in cephadm_daemons: + dd = DaemonDescription(daemon_type=daemon_type, daemon_id=daemon_id) + dm[dd.name()] = dd + cephadm_module.cache.update_host_daemons('host1', dm) + + def get_metadata_mock(svc_type, svc_id, default): + return metadata[svc_id] + + with mock.patch.object(cephadm_module, 'get_metadata', new_callable=lambda: get_metadata_mock): + + # test + CephadmServe(cephadm_module)._check_for_strays() + + # verify + strays = cephadm_module.health_checks.get('CEPHADM_STRAY_DAEMON') + if not strays: + assert len(strays_expected) == 0 + else: + for dt, di in strays_expected: + name = '%s.%s' % (dt, di) + for detail in strays['detail']: + if name in detail: + strays['detail'].remove(detail) + break + assert name in detail + assert len(strays['detail']) == 0 + assert strays['count'] == len(strays_expected) + + @mock.patch("cephadm.module.CephadmOrchestrator.mon_command") + def test_find_destroyed_osds_cmd_failure(self, _mon_cmd, cephadm_module): + _mon_cmd.return_value = (1, "", "fail_msg") + with pytest.raises(OrchestratorError): + OsdIdClaims(cephadm_module) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_apply_osd_save(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + + spec = DriveGroupSpec( + service_id='foo', + placement=PlacementSpec( + host_pattern='*', + ), + data_devices=DeviceSelection( + all=True + ) + ) + + c = cephadm_module.apply([spec]) + assert wait(cephadm_module, c) == ['Scheduled osd.foo update...'] + + inventory = Devices([ + Device( + '/dev/sdb', + available=True + ), + ]) + + cephadm_module.cache.update_host_devices('test', inventory.devices) + + _run_cephadm.side_effect = async_side_effect((['{}'], '', 0)) + + assert CephadmServe(cephadm_module)._apply_all_services() is False + + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', + ['--config-json', '-', '--', 'lvm', 'batch', + '--no-auto', '/dev/sdb', '--yes', '--no-systemd'], + env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=foo'], error_ok=True, + stdin='{"config": "", "keyring": ""}') + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False, error_ok=False, log_output=True) + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False, error_ok=False, log_output=True) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_apply_osd_save_non_collocated(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + + spec = DriveGroupSpec( + service_id='noncollocated', + placement=PlacementSpec( + hosts=['test'] + ), + data_devices=DeviceSelection(paths=['/dev/sdb']), + db_devices=DeviceSelection(paths=['/dev/sdc']), + wal_devices=DeviceSelection(paths=['/dev/sdd']) + ) + + c = cephadm_module.apply([spec]) + assert wait(cephadm_module, c) == ['Scheduled osd.noncollocated update...'] + + inventory = Devices([ + Device('/dev/sdb', available=True), + Device('/dev/sdc', available=True), + Device('/dev/sdd', available=True) + ]) + + cephadm_module.cache.update_host_devices('test', inventory.devices) + + _run_cephadm.side_effect = async_side_effect((['{}'], '', 0)) + + assert CephadmServe(cephadm_module)._apply_all_services() is False + + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', + ['--config-json', '-', '--', 'lvm', 'batch', + '--no-auto', '/dev/sdb', '--db-devices', '/dev/sdc', + '--wal-devices', '/dev/sdd', '--yes', '--no-systemd'], + env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=noncollocated'], + error_ok=True, stdin='{"config": "", "keyring": ""}') + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False, error_ok=False, log_output=True) + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False, error_ok=False, log_output=True) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.module.SpecStore.save") + def test_apply_osd_save_placement(self, _save_spec, cephadm_module): + with with_host(cephadm_module, 'test'): + json_spec = {'service_type': 'osd', 'placement': {'host_pattern': 'test'}, + 'service_id': 'foo', 'data_devices': {'all': True}} + spec = ServiceSpec.from_json(json_spec) + assert isinstance(spec, DriveGroupSpec) + c = cephadm_module.apply([spec]) + assert wait(cephadm_module, c) == ['Scheduled osd.foo update...'] + _save_spec.assert_called_with(spec) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_create_osds(self, cephadm_module): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'), + data_devices=DeviceSelection(paths=[''])) + c = cephadm_module.create_osds(dg) + out = wait(cephadm_module, c) + assert out == "Created no osd(s) on host test; already created?" + bad_dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='invalid_host'), + data_devices=DeviceSelection(paths=[''])) + c = cephadm_module.create_osds(bad_dg) + out = wait(cephadm_module, c) + assert "Invalid 'host:device' spec: host not found in cluster" in out + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_create_noncollocated_osd(self, cephadm_module): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'), + data_devices=DeviceSelection(paths=[''])) + c = cephadm_module.create_osds(dg) + out = wait(cephadm_module, c) + assert out == "Created no osd(s) on host test; already created?" + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch('cephadm.services.osd.OSDService._run_ceph_volume_command') + @mock.patch('cephadm.services.osd.OSDService.driveselection_to_ceph_volume') + @mock.patch('cephadm.services.osd.OsdIdClaims.refresh', lambda _: None) + @mock.patch('cephadm.services.osd.OsdIdClaims.get', lambda _: {}) + def test_limit_not_reached(self, d_to_cv, _run_cv_cmd, cephadm_module): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'), + data_devices=DeviceSelection(limit=5, rotational=1), + service_id='not_enough') + + disks_found = [ + '[{"data": "/dev/vdb", "data_size": "50.00 GB", "encryption": "None"}, {"data": "/dev/vdc", "data_size": "50.00 GB", "encryption": "None"}]'] + d_to_cv.return_value = 'foo' + _run_cv_cmd.side_effect = async_side_effect((disks_found, '', 0)) + preview = cephadm_module.osd_service.generate_previews([dg], 'test') + + for osd in preview: + assert 'notes' in osd + assert osd['notes'] == [ + 'NOTE: Did not find enough disks matching filter on host test to reach data device limit (Found: 2 | Limit: 5)'] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_prepare_drivegroup(self, cephadm_module): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'), + data_devices=DeviceSelection(paths=[''])) + out = cephadm_module.osd_service.prepare_drivegroup(dg) + assert len(out) == 1 + f1 = out[0] + assert f1[0] == 'test' + assert isinstance(f1[1], DriveSelection) + + @pytest.mark.parametrize( + "devices, preview, exp_commands", + [ + # no preview and only one disk, prepare is used due the hack that is in place. + (['/dev/sda'], False, ["lvm batch --no-auto /dev/sda --yes --no-systemd"]), + # no preview and multiple disks, uses batch + (['/dev/sda', '/dev/sdb'], False, + ["CEPH_VOLUME_OSDSPEC_AFFINITY=test.spec lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd"]), + # preview and only one disk needs to use batch again to generate the preview + (['/dev/sda'], True, ["lvm batch --no-auto /dev/sda --yes --no-systemd --report --format json"]), + # preview and multiple disks work the same + (['/dev/sda', '/dev/sdb'], True, + ["CEPH_VOLUME_OSDSPEC_AFFINITY=test.spec lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd --report --format json"]), + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_driveselection_to_ceph_volume(self, cephadm_module, devices, preview, exp_commands): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(service_id='test.spec', placement=PlacementSpec( + host_pattern='test'), data_devices=DeviceSelection(paths=devices)) + ds = DriveSelection(dg, Devices([Device(path) for path in devices])) + preview = preview + out = cephadm_module.osd_service.driveselection_to_ceph_volume(ds, [], preview) + assert all(any(cmd in exp_cmd for exp_cmd in exp_commands) + for cmd in out), f'Expected cmds from f{out} in {exp_commands}' + + @pytest.mark.parametrize( + "devices, preview, exp_commands", + [ + # one data device, no preview + (['/dev/sda'], False, ["raw prepare --bluestore --data /dev/sda"]), + # multiple data devices, no preview + (['/dev/sda', '/dev/sdb'], False, + ["raw prepare --bluestore --data /dev/sda", "raw prepare --bluestore --data /dev/sdb"]), + # one data device, preview + (['/dev/sda'], True, ["raw prepare --bluestore --data /dev/sda --report --format json"]), + # multiple data devices, preview + (['/dev/sda', '/dev/sdb'], True, + ["raw prepare --bluestore --data /dev/sda --report --format json", "raw prepare --bluestore --data /dev/sdb --report --format json"]), + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_raw_driveselection_to_ceph_volume(self, cephadm_module, devices, preview, exp_commands): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(service_id='test.spec', method='raw', placement=PlacementSpec( + host_pattern='test'), data_devices=DeviceSelection(paths=devices)) + ds = DriveSelection(dg, Devices([Device(path) for path in devices])) + preview = preview + out = cephadm_module.osd_service.driveselection_to_ceph_volume(ds, [], preview) + assert all(any(cmd in exp_cmd for exp_cmd in exp_commands) + for cmd in out), f'Expected cmds from f{out} in {exp_commands}' + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm( + json.dumps([ + dict( + name='osd.0', + style='cephadm', + fsid='fsid', + container_id='container_id', + version='version', + state='running', + ) + ]) + )) + @mock.patch("cephadm.services.osd.OSD.exists", True) + @mock.patch("cephadm.services.osd.RemoveUtil.get_pg_count", lambda _, __: 0) + def test_remove_osds(self, cephadm_module): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + c = cephadm_module.list_daemons() + wait(cephadm_module, c) + + c = cephadm_module.remove_daemons(['osd.0']) + out = wait(cephadm_module, c) + assert out == ["Removed osd.0 from host 'test'"] + + cephadm_module.to_remove_osds.enqueue(OSD(osd_id=0, + replace=False, + force=False, + hostname='test', + process_started_at=datetime_now(), + remove_util=cephadm_module.to_remove_osds.rm_util + )) + cephadm_module.to_remove_osds.process_removal_queue() + assert cephadm_module.to_remove_osds == OSDRemovalQueue(cephadm_module) + + c = cephadm_module.remove_osds_status() + out = wait(cephadm_module, c) + assert out == [] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_rgw_update(self, cephadm_module): + with with_host(cephadm_module, 'host1'): + with with_host(cephadm_module, 'host2'): + with with_service(cephadm_module, RGWSpec(service_id="foo", unmanaged=True)): + ps = PlacementSpec(hosts=['host1'], count=1) + c = cephadm_module.add_daemon( + RGWSpec(service_id="foo", placement=ps)) + [out] = wait(cephadm_module, c) + match_glob(out, "Deployed rgw.foo.* on host 'host1'") + + ps = PlacementSpec(hosts=['host1', 'host2'], count=2) + r = CephadmServe(cephadm_module)._apply_service( + RGWSpec(service_id="foo", placement=ps)) + assert r + + assert_rm_daemon(cephadm_module, 'rgw.foo', 'host1') + assert_rm_daemon(cephadm_module, 'rgw.foo', 'host2') + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm( + json.dumps([ + dict( + name='rgw.myrgw.myhost.myid', + style='cephadm', + fsid='fsid', + container_id='container_id', + version='version', + state='running', + ) + ]) + )) + def test_remove_daemon(self, cephadm_module): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + c = cephadm_module.list_daemons() + wait(cephadm_module, c) + c = cephadm_module.remove_daemons(['rgw.myrgw.myhost.myid']) + out = wait(cephadm_module, c) + assert out == ["Removed rgw.myrgw.myhost.myid from host 'test'"] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_remove_duplicate_osds(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'host1'): + with with_host(cephadm_module, 'host2'): + with with_osd_daemon(cephadm_module, _run_cephadm, 'host1', 1) as dd1: # type: DaemonDescription + with with_osd_daemon(cephadm_module, _run_cephadm, 'host2', 1) as dd2: # type: DaemonDescription + CephadmServe(cephadm_module)._check_for_moved_osds() + # both are in status "starting" + assert len(cephadm_module.cache.get_daemons()) == 2 + + dd1.status = DaemonDescriptionStatus.running + dd2.status = DaemonDescriptionStatus.error + cephadm_module.cache.update_host_daemons(dd1.hostname, {dd1.name(): dd1}) + cephadm_module.cache.update_host_daemons(dd2.hostname, {dd2.name(): dd2}) + CephadmServe(cephadm_module)._check_for_moved_osds() + assert len(cephadm_module.cache.get_daemons()) == 1 + + assert cephadm_module.events.get_for_daemon('osd.1') == [ + OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO', + "Deployed osd.1 on host 'host1'"), + OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO', + "Deployed osd.1 on host 'host2'"), + OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO', + "Removed duplicated daemon on host 'host2'"), + ] + + with pytest.raises(AssertionError): + cephadm_module.assert_issued_mon_command({ + 'prefix': 'auth rm', + 'entity': 'osd.1', + }) + + cephadm_module.assert_issued_mon_command({ + 'prefix': 'auth rm', + 'entity': 'osd.1', + }) + + @pytest.mark.parametrize( + "spec", + [ + ServiceSpec('crash'), + ServiceSpec('prometheus'), + ServiceSpec('grafana'), + ServiceSpec('node-exporter'), + ServiceSpec('alertmanager'), + ServiceSpec('rbd-mirror'), + ServiceSpec('cephfs-mirror'), + ServiceSpec('mds', service_id='fsname'), + RGWSpec(rgw_realm='realm', rgw_zone='zone'), + RGWSpec(service_id="foo"), + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_daemon_add(self, spec: ServiceSpec, cephadm_module): + unmanaged_spec = ServiceSpec.from_json(spec.to_json()) + unmanaged_spec.unmanaged = True + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, unmanaged_spec): + with with_daemon(cephadm_module, spec, 'test'): + pass + + @pytest.mark.parametrize( + "entity,success,spec", + [ + ('mgr.x', True, ServiceSpec( + service_type='mgr', + placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1), + unmanaged=True) + ), # noqa: E124 + ('client.rgw.x', True, ServiceSpec( + service_type='rgw', + service_id='id', + placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1), + unmanaged=True) + ), # noqa: E124 + ('client.nfs.x', True, ServiceSpec( + service_type='nfs', + service_id='id', + placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1), + unmanaged=True) + ), # noqa: E124 + ('mon.', False, ServiceSpec( + service_type='mon', + placement=PlacementSpec( + hosts=[HostPlacementSpec('test', '127.0.0.0/24', 'x')], count=1), + unmanaged=True) + ), # noqa: E124 + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock()) + def test_daemon_add_fail(self, _run_cephadm, entity, success, spec, cephadm_module): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec): + _run_cephadm.side_effect = OrchestratorError('fail') + with pytest.raises(OrchestratorError): + wait(cephadm_module, cephadm_module.add_daemon(spec)) + if success: + cephadm_module.assert_issued_mon_command({ + 'prefix': 'auth rm', + 'entity': entity, + }) + else: + with pytest.raises(AssertionError): + cephadm_module.assert_issued_mon_command({ + 'prefix': 'auth rm', + 'entity': entity, + }) + assert cephadm_module.events.get_for_service(spec.service_name()) == [ + OrchestratorEvent(mock.ANY, 'service', spec.service_name(), 'INFO', + "service was created"), + OrchestratorEvent(mock.ANY, 'service', spec.service_name(), 'ERROR', + "fail"), + ] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_daemon_place_fail_health_warning(self, _run_cephadm, cephadm_module): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + _run_cephadm.side_effect = OrchestratorError('fail') + ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1) + r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps)) + assert not r + assert cephadm_module.health_checks.get('CEPHADM_DAEMON_PLACE_FAIL') is not None + assert cephadm_module.health_checks['CEPHADM_DAEMON_PLACE_FAIL']['count'] == 1 + assert 'Failed to place 1 daemon(s)' in cephadm_module.health_checks[ + 'CEPHADM_DAEMON_PLACE_FAIL']['summary'] + assert 'Failed while placing mgr.a on test: fail' in cephadm_module.health_checks[ + 'CEPHADM_DAEMON_PLACE_FAIL']['detail'] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_apply_spec_fail_health_warning(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._apply_all_services() + ps = PlacementSpec(hosts=['fail'], count=1) + r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps)) + assert not r + assert cephadm_module.apply_spec_fails + assert cephadm_module.health_checks.get('CEPHADM_APPLY_SPEC_FAIL') is not None + assert cephadm_module.health_checks['CEPHADM_APPLY_SPEC_FAIL']['count'] == 1 + assert 'Failed to apply 1 service(s)' in cephadm_module.health_checks[ + 'CEPHADM_APPLY_SPEC_FAIL']['summary'] + + @mock.patch("cephadm.module.CephadmOrchestrator.get_foreign_ceph_option") + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.module.HostCache.save_host_devices") + def test_invalid_config_option_health_warning(self, _save_devs, _run_cephadm, get_foreign_ceph_option, cephadm_module: CephadmOrchestrator): + _save_devs.return_value = None + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1) + get_foreign_ceph_option.side_effect = KeyError + CephadmServe(cephadm_module)._apply_service_config( + ServiceSpec('mgr', placement=ps, config={'test': 'foo'})) + assert cephadm_module.health_checks.get('CEPHADM_INVALID_CONFIG_OPTION') is not None + assert cephadm_module.health_checks['CEPHADM_INVALID_CONFIG_OPTION']['count'] == 1 + assert 'Ignoring 1 invalid config option(s)' in cephadm_module.health_checks[ + 'CEPHADM_INVALID_CONFIG_OPTION']['summary'] + assert 'Ignoring invalid mgr config option test' in cephadm_module.health_checks[ + 'CEPHADM_INVALID_CONFIG_OPTION']['detail'] + + @mock.patch("cephadm.module.CephadmOrchestrator.get_foreign_ceph_option") + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.module.CephadmOrchestrator.set_store") + def test_save_devices(self, _set_store, _run_cephadm, _get_foreign_ceph_option, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + entry_size = 65536 # default 64k size + _get_foreign_ceph_option.return_value = entry_size + + class FakeDev(): + def __init__(self, c: str = 'a'): + # using 1015 here makes the serialized string exactly 1024 bytes if c is one char + self.content = {c: c * 1015} + self.path = 'dev/vdc' + + def to_json(self): + return self.content + + def from_json(self, stuff): + return json.loads(stuff) + + def byte_len(s): + return len(s.encode('utf-8')) + + with with_host(cephadm_module, 'test'): + fake_devices = [FakeDev()] * 100 # should be ~100k + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) > entry_size + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size * 2 + cephadm_module.cache.update_host_devices('test', fake_devices) + cephadm_module.cache.save_host_devices('test') + expected_calls = [ + mock.call('host.test.devices.0', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 34], 'entries': 3})), + mock.call('host.test.devices.1', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 34]})), + mock.call('host.test.devices.2', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 32]})), + ] + _set_store.assert_has_calls(expected_calls) + + fake_devices = [FakeDev()] * 300 # should be ~300k + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) > entry_size * 4 + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size * 5 + cephadm_module.cache.update_host_devices('test', fake_devices) + cephadm_module.cache.save_host_devices('test') + expected_calls = [ + mock.call('host.test.devices.0', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 50], 'entries': 6})), + mock.call('host.test.devices.1', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 50]})), + mock.call('host.test.devices.2', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 50]})), + mock.call('host.test.devices.3', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 50]})), + mock.call('host.test.devices.4', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 50]})), + mock.call('host.test.devices.5', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 50]})), + ] + _set_store.assert_has_calls(expected_calls) + + fake_devices = [FakeDev()] * 62 # should be ~62k, just under cache size + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size + cephadm_module.cache.update_host_devices('test', fake_devices) + cephadm_module.cache.save_host_devices('test') + expected_calls = [ + mock.call('host.test.devices.0', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 62], 'entries': 1})), + ] + _set_store.assert_has_calls(expected_calls) + + # should be ~64k but just over so it requires more entries + fake_devices = [FakeDev()] * 64 + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) > entry_size + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size * 2 + cephadm_module.cache.update_host_devices('test', fake_devices) + cephadm_module.cache.save_host_devices('test') + expected_calls = [ + mock.call('host.test.devices.0', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 22], 'entries': 3})), + mock.call('host.test.devices.1', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 22]})), + mock.call('host.test.devices.2', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 20]})), + ] + _set_store.assert_has_calls(expected_calls) + + # test for actual content being correct using differing devices + entry_size = 3072 + _get_foreign_ceph_option.return_value = entry_size + fake_devices = [FakeDev('a'), FakeDev('b'), FakeDev('c'), FakeDev('d'), FakeDev('e')] + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) > entry_size + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size * 2 + cephadm_module.cache.update_host_devices('test', fake_devices) + cephadm_module.cache.save_host_devices('test') + expected_calls = [ + mock.call('host.test.devices.0', json.dumps( + {'devices': [d.to_json() for d in [FakeDev('a'), FakeDev('b')]], 'entries': 3})), + mock.call('host.test.devices.1', json.dumps( + {'devices': [d.to_json() for d in [FakeDev('c'), FakeDev('d')]]})), + mock.call('host.test.devices.2', json.dumps( + {'devices': [d.to_json() for d in [FakeDev('e')]]})), + ] + _set_store.assert_has_calls(expected_calls) + + @mock.patch("cephadm.module.CephadmOrchestrator.get_store") + def test_load_devices(self, _get_store, cephadm_module: CephadmOrchestrator): + def _fake_store(key): + if key == 'host.test.devices.0': + return json.dumps({'devices': [d.to_json() for d in [Device('/path')] * 9], 'entries': 3}) + elif key == 'host.test.devices.1': + return json.dumps({'devices': [d.to_json() for d in [Device('/path')] * 7]}) + elif key == 'host.test.devices.2': + return json.dumps({'devices': [d.to_json() for d in [Device('/path')] * 4]}) + else: + raise Exception(f'Get store with unexpected value {key}') + + _get_store.side_effect = _fake_store + devs = cephadm_module.cache.load_host_devices('test') + assert devs == [Device('/path')] * 20 + + @mock.patch("cephadm.module.Inventory.__contains__") + def test_check_stray_host_cache_entry(self, _contains, cephadm_module: CephadmOrchestrator): + def _fake_inv(key): + if key in ['host1', 'node02', 'host.something.com']: + return True + return False + + _contains.side_effect = _fake_inv + assert cephadm_module.cache._get_host_cache_entry_status('host1') == HostCacheStatus.host + assert cephadm_module.cache._get_host_cache_entry_status( + 'host.something.com') == HostCacheStatus.host + assert cephadm_module.cache._get_host_cache_entry_status( + 'node02.devices.37') == HostCacheStatus.devices + assert cephadm_module.cache._get_host_cache_entry_status( + 'host.something.com.devices.0') == HostCacheStatus.devices + assert cephadm_module.cache._get_host_cache_entry_status('hostXXX') == HostCacheStatus.stray + assert cephadm_module.cache._get_host_cache_entry_status( + 'host.nothing.com') == HostCacheStatus.stray + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock()) + def test_nfs(self, cephadm_module): + with with_host(cephadm_module, 'test'): + ps = PlacementSpec(hosts=['test'], count=1) + spec = NFSServiceSpec( + service_id='name', + placement=ps) + unmanaged_spec = ServiceSpec.from_json(spec.to_json()) + unmanaged_spec.unmanaged = True + with with_service(cephadm_module, unmanaged_spec): + c = cephadm_module.add_daemon(spec) + [out] = wait(cephadm_module, c) + match_glob(out, "Deployed nfs.name.* on host 'test'") + + assert_rm_daemon(cephadm_module, 'nfs.name.test', 'test') + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("subprocess.run", None) + @mock.patch("cephadm.module.CephadmOrchestrator.rados", mock.MagicMock()) + @mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4') + def test_iscsi(self, cephadm_module): + with with_host(cephadm_module, 'test'): + ps = PlacementSpec(hosts=['test'], count=1) + spec = IscsiServiceSpec( + service_id='name', + pool='pool', + api_user='user', + api_password='password', + placement=ps) + unmanaged_spec = ServiceSpec.from_json(spec.to_json()) + unmanaged_spec.unmanaged = True + with with_service(cephadm_module, unmanaged_spec): + + c = cephadm_module.add_daemon(spec) + [out] = wait(cephadm_module, c) + match_glob(out, "Deployed iscsi.name.* on host 'test'") + + assert_rm_daemon(cephadm_module, 'iscsi.name.test', 'test') + + @pytest.mark.parametrize( + "on_bool", + [ + True, + False + ] + ) + @pytest.mark.parametrize( + "fault_ident", + [ + 'fault', + 'ident' + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_blink_device_light(self, _run_cephadm, on_bool, fault_ident, cephadm_module): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + c = cephadm_module.blink_device_light(fault_ident, on_bool, [('test', '', 'dev')]) + on_off = 'on' if on_bool else 'off' + assert wait(cephadm_module, c) == [f'Set {fault_ident} light for test: {on_off}'] + _run_cephadm.assert_called_with('test', 'osd', 'shell', [ + '--', 'lsmcli', f'local-disk-{fault_ident}-led-{on_off}', '--path', 'dev'], error_ok=True) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_blink_device_light_custom(self, _run_cephadm, cephadm_module): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + cephadm_module.set_store('blink_device_light_cmd', 'echo hello') + c = cephadm_module.blink_device_light('ident', True, [('test', '', '/dev/sda')]) + assert wait(cephadm_module, c) == ['Set ident light for test: on'] + _run_cephadm.assert_called_with('test', 'osd', 'shell', [ + '--', 'echo', 'hello'], error_ok=True) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_blink_device_light_custom_per_host(self, _run_cephadm, cephadm_module): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'mgr0'): + cephadm_module.set_store('mgr0/blink_device_light_cmd', + 'xyz --foo --{{ ident_fault }}={{\'on\' if on else \'off\'}} \'{{ path or dev }}\'') + c = cephadm_module.blink_device_light( + 'fault', True, [('mgr0', 'SanDisk_X400_M.2_2280_512GB_162924424784', '')]) + assert wait(cephadm_module, c) == [ + 'Set fault light for mgr0:SanDisk_X400_M.2_2280_512GB_162924424784 on'] + _run_cephadm.assert_called_with('mgr0', 'osd', 'shell', [ + '--', 'xyz', '--foo', '--fault=on', 'SanDisk_X400_M.2_2280_512GB_162924424784' + ], error_ok=True) + + @pytest.mark.parametrize( + "spec, meth", + [ + (ServiceSpec('mgr'), CephadmOrchestrator.apply_mgr), + (ServiceSpec('crash'), CephadmOrchestrator.apply_crash), + (ServiceSpec('prometheus'), CephadmOrchestrator.apply_prometheus), + (ServiceSpec('grafana'), CephadmOrchestrator.apply_grafana), + (ServiceSpec('node-exporter'), CephadmOrchestrator.apply_node_exporter), + (ServiceSpec('alertmanager'), CephadmOrchestrator.apply_alertmanager), + (ServiceSpec('rbd-mirror'), CephadmOrchestrator.apply_rbd_mirror), + (ServiceSpec('cephfs-mirror'), CephadmOrchestrator.apply_rbd_mirror), + (ServiceSpec('mds', service_id='fsname'), CephadmOrchestrator.apply_mds), + (ServiceSpec( + 'mds', service_id='fsname', + placement=PlacementSpec( + hosts=[HostPlacementSpec( + hostname='test', + name='fsname', + network='' + )] + ) + ), CephadmOrchestrator.apply_mds), + (RGWSpec(service_id='foo'), CephadmOrchestrator.apply_rgw), + (RGWSpec( + service_id='bar', + rgw_realm='realm', rgw_zone='zone', + placement=PlacementSpec( + hosts=[HostPlacementSpec( + hostname='test', + name='bar', + network='' + )] + ) + ), CephadmOrchestrator.apply_rgw), + (NFSServiceSpec( + service_id='name', + ), CephadmOrchestrator.apply_nfs), + (IscsiServiceSpec( + service_id='name', + pool='pool', + api_user='user', + api_password='password' + ), CephadmOrchestrator.apply_iscsi), + (CustomContainerSpec( + service_id='hello-world', + image='docker.io/library/hello-world:latest', + uid=65534, + gid=65534, + dirs=['foo/bar'], + files={ + 'foo/bar/xyz.conf': 'aaa\nbbb' + }, + bind_mounts=[[ + 'type=bind', + 'source=lib/modules', + 'destination=/lib/modules', + 'ro=true' + ]], + volume_mounts={ + 'foo/bar': '/foo/bar:Z' + }, + args=['--no-healthcheck'], + envs=['SECRET=password'], + ports=[8080, 8443] + ), CephadmOrchestrator.apply_container), + ] + ) + @mock.patch("subprocess.run", None) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock()) + @mock.patch("subprocess.run", mock.MagicMock()) + def test_apply_save(self, spec: ServiceSpec, meth, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec, meth, 'test'): + pass + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_mds_config_purge(self, cephadm_module: CephadmOrchestrator): + spec = MDSSpec('mds', service_id='fsname', config={'test': 'foo'}) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec, host='test'): + ret, out, err = cephadm_module.check_mon_command({ + 'prefix': 'config get', + 'who': spec.service_name(), + 'key': 'mds_join_fs', + }) + assert out == 'fsname' + ret, out, err = cephadm_module.check_mon_command({ + 'prefix': 'config get', + 'who': spec.service_name(), + 'key': 'mds_join_fs', + }) + assert not out + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.services.cephadmservice.CephadmService.ok_to_stop") + def test_daemon_ok_to_stop(self, ok_to_stop, cephadm_module: CephadmOrchestrator): + spec = MDSSpec( + 'mds', + service_id='fsname', + placement=PlacementSpec(hosts=['host1', 'host2']), + config={'test': 'foo'} + ) + with with_host(cephadm_module, 'host1'), with_host(cephadm_module, 'host2'): + c = cephadm_module.apply_mds(spec) + out = wait(cephadm_module, c) + match_glob(out, "Scheduled mds.fsname update...") + CephadmServe(cephadm_module)._apply_all_services() + + [daemon] = cephadm_module.cache.daemons['host1'].keys() + + spec.placement.set_hosts(['host2']) + + ok_to_stop.side_effect = False + + c = cephadm_module.apply_mds(spec) + out = wait(cephadm_module, c) + match_glob(out, "Scheduled mds.fsname update...") + CephadmServe(cephadm_module)._apply_all_services() + + ok_to_stop.assert_called_with([daemon[4:]], force=True) + + assert_rm_daemon(cephadm_module, spec.service_name(), 'host1') # verifies ok-to-stop + assert_rm_daemon(cephadm_module, spec.service_name(), 'host2') + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_dont_touch_offline_or_maintenance_host_daemons(self, cephadm_module): + # test daemons on offline/maint hosts not removed when applying specs + # test daemons not added to hosts in maint/offline state + with with_host(cephadm_module, 'test1'): + with with_host(cephadm_module, 'test2'): + with with_host(cephadm_module, 'test3'): + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*'))): + # should get a mgr on all 3 hosts + # CephadmServe(cephadm_module)._apply_all_services() + assert len(cephadm_module.cache.get_daemons_by_type('mgr')) == 3 + + # put one host in offline state and one host in maintenance state + cephadm_module.offline_hosts = {'test2'} + cephadm_module.inventory._inventory['test3']['status'] = 'maintenance' + cephadm_module.inventory.save() + + # being in offline/maint mode should disqualify hosts from being + # candidates for scheduling + assert cephadm_module.cache.is_host_schedulable('test2') + assert cephadm_module.cache.is_host_schedulable('test3') + + assert cephadm_module.cache.is_host_unreachable('test2') + assert cephadm_module.cache.is_host_unreachable('test3') + + with with_service(cephadm_module, ServiceSpec('crash', placement=PlacementSpec(host_pattern='*'))): + # re-apply services. No mgr should be removed from maint/offline hosts + # crash daemon should only be on host not in maint/offline mode + CephadmServe(cephadm_module)._apply_all_services() + assert len(cephadm_module.cache.get_daemons_by_type('mgr')) == 3 + assert len(cephadm_module.cache.get_daemons_by_type('crash')) == 1 + + cephadm_module.offline_hosts = {} + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.CephadmOrchestrator._host_ok_to_stop") + @mock.patch("cephadm.module.HostCache.get_daemon_types") + @mock.patch("cephadm.module.HostCache.get_hosts") + def test_maintenance_enter_success(self, _hosts, _get_daemon_types, _host_ok, _run_cephadm, cephadm_module: CephadmOrchestrator): + hostname = 'host1' + _run_cephadm.side_effect = async_side_effect( + ([''], ['something\nsuccess - systemd target xxx disabled'], 0)) + _host_ok.return_value = 0, 'it is okay' + _get_daemon_types.return_value = ['crash'] + _hosts.return_value = [hostname, 'other_host'] + cephadm_module.inventory.add_host(HostSpec(hostname)) + # should not raise an error + retval = cephadm_module.enter_host_maintenance(hostname) + assert retval.result_str().startswith('Daemons for Ceph cluster') + assert not retval.exception_str + assert cephadm_module.inventory._inventory[hostname]['status'] == 'maintenance' + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.CephadmOrchestrator._host_ok_to_stop") + @mock.patch("cephadm.module.HostCache.get_daemon_types") + @mock.patch("cephadm.module.HostCache.get_hosts") + def test_maintenance_enter_failure(self, _hosts, _get_daemon_types, _host_ok, _run_cephadm, cephadm_module: CephadmOrchestrator): + hostname = 'host1' + _run_cephadm.side_effect = async_side_effect( + ([''], ['something\nfailed - disable the target'], 0)) + _host_ok.return_value = 0, 'it is okay' + _get_daemon_types.return_value = ['crash'] + _hosts.return_value = [hostname, 'other_host'] + cephadm_module.inventory.add_host(HostSpec(hostname)) + + with pytest.raises(OrchestratorError, match='Failed to place host1 into maintenance for cluster fsid'): + cephadm_module.enter_host_maintenance(hostname) + + assert not cephadm_module.inventory._inventory[hostname]['status'] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.CephadmOrchestrator._host_ok_to_stop") + @mock.patch("cephadm.module.HostCache.get_daemon_types") + @mock.patch("cephadm.module.HostCache.get_hosts") + def test_maintenance_enter_i_really_mean_it(self, _hosts, _get_daemon_types, _host_ok, _run_cephadm, cephadm_module: CephadmOrchestrator): + hostname = 'host1' + err_str = 'some kind of error' + _run_cephadm.side_effect = async_side_effect( + ([''], ['something\nfailed - disable the target'], 0)) + _host_ok.return_value = 1, err_str + _get_daemon_types.return_value = ['mon'] + _hosts.return_value = [hostname, 'other_host'] + cephadm_module.inventory.add_host(HostSpec(hostname)) + + with pytest.raises(OrchestratorError, match=err_str): + cephadm_module.enter_host_maintenance(hostname) + assert not cephadm_module.inventory._inventory[hostname]['status'] + + with pytest.raises(OrchestratorError, match=err_str): + cephadm_module.enter_host_maintenance(hostname, force=True) + assert not cephadm_module.inventory._inventory[hostname]['status'] + + retval = cephadm_module.enter_host_maintenance(hostname, force=True, yes_i_really_mean_it=True) + assert retval.result_str().startswith('Daemons for Ceph cluster') + assert not retval.exception_str + assert cephadm_module.inventory._inventory[hostname]['status'] == 'maintenance' + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.module.HostCache.get_daemon_types") + @mock.patch("cephadm.module.HostCache.get_hosts") + def test_maintenance_exit_success(self, _hosts, _get_daemon_types, _run_cephadm, cephadm_module: CephadmOrchestrator): + hostname = 'host1' + _run_cephadm.side_effect = async_side_effect(([''], [ + 'something\nsuccess - systemd target xxx enabled and started'], 0)) + _get_daemon_types.return_value = ['crash'] + _hosts.return_value = [hostname, 'other_host'] + cephadm_module.inventory.add_host(HostSpec(hostname, status='maintenance')) + # should not raise an error + retval = cephadm_module.exit_host_maintenance(hostname) + assert retval.result_str().startswith('Ceph cluster') + assert not retval.exception_str + assert not cephadm_module.inventory._inventory[hostname]['status'] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.module.HostCache.get_daemon_types") + @mock.patch("cephadm.module.HostCache.get_hosts") + def test_maintenance_exit_failure(self, _hosts, _get_daemon_types, _run_cephadm, cephadm_module: CephadmOrchestrator): + hostname = 'host1' + _run_cephadm.side_effect = async_side_effect( + ([''], ['something\nfailed - unable to enable the target'], 0)) + _get_daemon_types.return_value = ['crash'] + _hosts.return_value = [hostname, 'other_host'] + cephadm_module.inventory.add_host(HostSpec(hostname, status='maintenance')) + + with pytest.raises(OrchestratorError, match='Failed to exit maintenance state for host host1, cluster fsid'): + cephadm_module.exit_host_maintenance(hostname) + + assert cephadm_module.inventory._inventory[hostname]['status'] == 'maintenance' + + @mock.patch("cephadm.ssh.SSHManager._remote_connection") + @mock.patch("cephadm.ssh.SSHManager._execute_command") + @mock.patch("cephadm.ssh.SSHManager._check_execute_command") + @mock.patch("cephadm.ssh.SSHManager._write_remote_file") + def test_etc_ceph(self, _write_file, check_execute_command, execute_command, remote_connection, cephadm_module): + _write_file.side_effect = async_side_effect(None) + check_execute_command.side_effect = async_side_effect('') + execute_command.side_effect = async_side_effect(('{}', '', 0)) + remote_connection.side_effect = async_side_effect(mock.Mock()) + + assert cephadm_module.manage_etc_ceph_ceph_conf is False + + with with_host(cephadm_module, 'test'): + assert '/etc/ceph/ceph.conf' not in cephadm_module.cache.get_host_client_files('test') + + with with_host(cephadm_module, 'test'): + cephadm_module.set_module_option('manage_etc_ceph_ceph_conf', True) + cephadm_module.config_notify() + assert cephadm_module.manage_etc_ceph_ceph_conf is True + + CephadmServe(cephadm_module)._write_all_client_files() + # Make sure both ceph conf locations (default and per fsid) are called + _write_file.assert_has_calls([mock.call('test', '/etc/ceph/ceph.conf', b'', + 0o644, 0, 0, None), + mock.call('test', '/var/lib/ceph/fsid/config/ceph.conf', b'', + 0o644, 0, 0, None)] + ) + ceph_conf_files = cephadm_module.cache.get_host_client_files('test') + assert len(ceph_conf_files) == 2 + assert '/etc/ceph/ceph.conf' in ceph_conf_files + assert '/var/lib/ceph/fsid/config/ceph.conf' in ceph_conf_files + + # set extra config and expect that we deploy another ceph.conf + cephadm_module._set_extra_ceph_conf('[mon]\nk=v') + CephadmServe(cephadm_module)._write_all_client_files() + _write_file.assert_has_calls([mock.call('test', + '/etc/ceph/ceph.conf', + b'[mon]\nk=v\n', 0o644, 0, 0, None), + mock.call('test', + '/var/lib/ceph/fsid/config/ceph.conf', + b'[mon]\nk=v\n', 0o644, 0, 0, None)]) + # reload + cephadm_module.cache.last_client_files = {} + cephadm_module.cache.load() + + ceph_conf_files = cephadm_module.cache.get_host_client_files('test') + assert len(ceph_conf_files) == 2 + assert '/etc/ceph/ceph.conf' in ceph_conf_files + assert '/var/lib/ceph/fsid/config/ceph.conf' in ceph_conf_files + + # Make sure, _check_daemons does a redeploy due to monmap change: + f1_before_digest = cephadm_module.cache.get_host_client_files('test')[ + '/etc/ceph/ceph.conf'][0] + f2_before_digest = cephadm_module.cache.get_host_client_files( + 'test')['/var/lib/ceph/fsid/config/ceph.conf'][0] + cephadm_module._set_extra_ceph_conf('[mon]\nk2=v2') + CephadmServe(cephadm_module)._write_all_client_files() + f1_after_digest = cephadm_module.cache.get_host_client_files('test')[ + '/etc/ceph/ceph.conf'][0] + f2_after_digest = cephadm_module.cache.get_host_client_files( + 'test')['/var/lib/ceph/fsid/config/ceph.conf'][0] + assert f1_before_digest != f1_after_digest + assert f2_before_digest != f2_after_digest + + @mock.patch("cephadm.inventory.HostCache.get_host_client_files") + def test_dont_write_client_files_to_unreachable_hosts(self, _get_client_files, cephadm_module): + cephadm_module.inventory.add_host(HostSpec('host1', '1.2.3.1')) # online + cephadm_module.inventory.add_host(HostSpec('host2', '1.2.3.2')) # maintenance + cephadm_module.inventory.add_host(HostSpec('host3', '1.2.3.3')) # offline + + # mark host2 as maintenance and host3 as offline + cephadm_module.inventory._inventory['host2']['status'] = 'maintenance' + cephadm_module.offline_hosts.add('host3') + + # verify host2 and host3 are correctly marked as unreachable but host1 is not + assert not cephadm_module.cache.is_host_unreachable('host1') + assert cephadm_module.cache.is_host_unreachable('host2') + assert cephadm_module.cache.is_host_unreachable('host3') + + _get_client_files.side_effect = Exception('Called _get_client_files') + + # with the online host, should call _get_client_files which + # we have setup to raise an Exception + with pytest.raises(Exception, match='Called _get_client_files'): + CephadmServe(cephadm_module)._write_client_files({}, 'host1') + + # for the maintenance and offline host, _get_client_files should + # not be called and it should just return immediately with nothing + # having been raised + CephadmServe(cephadm_module)._write_client_files({}, 'host2') + CephadmServe(cephadm_module)._write_client_files({}, 'host3') + + def test_etc_ceph_init(self): + with with_cephadm_module({'manage_etc_ceph_ceph_conf': True}) as m: + assert m.manage_etc_ceph_ceph_conf is True + + @mock.patch("cephadm.CephadmOrchestrator.check_mon_command") + @mock.patch("cephadm.CephadmOrchestrator.extra_ceph_conf") + def test_extra_ceph_conf(self, _extra_ceph_conf, _check_mon_cmd, cephadm_module: CephadmOrchestrator): + # settings put into the [global] section in the extra conf + # need to be appended to existing [global] section in given + # minimal ceph conf, but anything in another section (e.g. [mon]) + # needs to continue to be its own section + + # this is the conf "ceph generate-minimal-conf" will return in this test + _check_mon_cmd.return_value = (0, """[global] +global_k1 = global_v1 +global_k2 = global_v2 +[mon] +mon_k1 = mon_v1 +[osd] +osd_k1 = osd_v1 +osd_k2 = osd_v2 +""", '') + + # test with extra ceph conf that has some of the sections from minimal conf + _extra_ceph_conf.return_value = CephadmOrchestrator.ExtraCephConf(conf="""[mon] +mon_k2 = mon_v2 +[global] +global_k3 = global_v3 +""", last_modified=datetime_now()) + + expected_combined_conf = """[global] +global_k1 = global_v1 +global_k2 = global_v2 +global_k3 = global_v3 + +[mon] +mon_k1 = mon_v1 +mon_k2 = mon_v2 + +[osd] +osd_k1 = osd_v1 +osd_k2 = osd_v2 +""" + + assert cephadm_module.get_minimal_ceph_conf() == expected_combined_conf + + def test_client_keyrings_special_host_labels(self, cephadm_module): + cephadm_module.inventory.add_host(HostSpec('host1', labels=['keyring1'])) + cephadm_module.inventory.add_host(HostSpec('host2', labels=['keyring1', SpecialHostLabels.DRAIN_DAEMONS])) + cephadm_module.inventory.add_host(HostSpec('host3', labels=['keyring1', SpecialHostLabels.DRAIN_DAEMONS, SpecialHostLabels.DRAIN_CONF_KEYRING])) + # hosts need to be marked as having had refresh to be available for placement + # so "refresh" with empty daemon list + cephadm_module.cache.update_host_daemons('host1', {}) + cephadm_module.cache.update_host_daemons('host2', {}) + cephadm_module.cache.update_host_daemons('host3', {}) + + assert 'host1' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()] + assert 'host2' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()] + assert 'host3' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()] + + assert 'host1' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()] + assert 'host2' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()] + assert 'host3' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()] + + cephadm_module.keys.update(ClientKeyringSpec('keyring1', PlacementSpec(label='keyring1'))) + + with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd: + _mon_cmd.return_value = (0, 'real-keyring', '') + client_files = CephadmServe(cephadm_module)._calc_client_files() + assert 'host1' in client_files.keys() + assert '/etc/ceph/ceph.keyring1.keyring' in client_files['host1'].keys() + assert 'host2' in client_files.keys() + assert '/etc/ceph/ceph.keyring1.keyring' in client_files['host2'].keys() + assert 'host3' not in client_files.keys() + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_registry_login(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + def check_registry_credentials(url, username, password): + assert json.loads(cephadm_module.get_store('registry_credentials')) == { + 'url': url, 'username': username, 'password': password} + + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + # test successful login with valid args + code, out, err = cephadm_module.registry_login('test-url', 'test-user', 'test-password') + assert out == 'registry login scheduled' + assert err == '' + check_registry_credentials('test-url', 'test-user', 'test-password') + + # test bad login attempt with invalid args + code, out, err = cephadm_module.registry_login('bad-args') + assert err == ("Invalid arguments. Please provide arguments <url> <username> <password> " + "or -i <login credentials json file>") + check_registry_credentials('test-url', 'test-user', 'test-password') + + # test bad login using invalid json file + code, out, err = cephadm_module.registry_login( + None, None, None, '{"bad-json": "bad-json"}') + assert err == ("json provided for custom registry login did not include all necessary fields. " + "Please setup json file as\n" + "{\n" + " \"url\": \"REGISTRY_URL\",\n" + " \"username\": \"REGISTRY_USERNAME\",\n" + " \"password\": \"REGISTRY_PASSWORD\"\n" + "}\n") + check_registry_credentials('test-url', 'test-user', 'test-password') + + # test good login using valid json file + good_json = ("{\"url\": \"" + "json-url" + "\", \"username\": \"" + "json-user" + "\", " + " \"password\": \"" + "json-pass" + "\"}") + code, out, err = cephadm_module.registry_login(None, None, None, good_json) + assert out == 'registry login scheduled' + assert err == '' + check_registry_credentials('json-url', 'json-user', 'json-pass') + + # test bad login where args are valid but login command fails + _run_cephadm.side_effect = async_side_effect(('{}', 'error', 1)) + code, out, err = cephadm_module.registry_login('fail-url', 'fail-user', 'fail-password') + assert err == 'Host test failed to login to fail-url as fail-user with given password' + check_registry_credentials('json-url', 'json-user', 'json-pass') + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({ + 'image_id': 'image_id', + 'repo_digests': ['image@repo_digest'], + }))) + @pytest.mark.parametrize("use_repo_digest", + [ + False, + True + ]) + def test_upgrade_run(self, use_repo_digest, cephadm_module: CephadmOrchestrator): + cephadm_module.use_repo_digest = use_repo_digest + + with with_host(cephadm_module, 'test', refresh_hosts=False): + cephadm_module.set_container_image('global', 'image') + + if use_repo_digest: + + CephadmServe(cephadm_module).convert_tags_to_repo_digest() + + _, image, _ = cephadm_module.check_mon_command({ + 'prefix': 'config get', + 'who': 'global', + 'key': 'container_image', + }) + if use_repo_digest: + assert image == 'image@repo_digest' + else: + assert image == 'image' + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_ceph_volume_no_filter_for_batch(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + error_message = """cephadm exited with an error code: 1, stderr:/usr/bin/podman:stderr usage: ceph-volume inventory [-h] [--format {plain,json,json-pretty}] [path]/usr/bin/podman:stderr ceph-volume inventory: error: unrecognized arguments: --filter-for-batch +Traceback (most recent call last): + File "<stdin>", line 6112, in <module> + File "<stdin>", line 1299, in _infer_fsid + File "<stdin>", line 1382, in _infer_image + File "<stdin>", line 3612, in command_ceph_volume + File "<stdin>", line 1061, in call_throws""" + + with with_host(cephadm_module, 'test'): + _run_cephadm.reset_mock() + _run_cephadm.side_effect = OrchestratorError(error_message) + + s = CephadmServe(cephadm_module)._refresh_host_devices('test') + assert s == 'host test `cephadm ceph-volume` failed: ' + error_message + + assert _run_cephadm.mock_calls == [ + mock.call('test', 'osd', 'ceph-volume', + ['--', 'inventory', '--format=json-pretty', '--filter-for-batch'], image='', + no_fsid=False, error_ok=False, log_output=False), + mock.call('test', 'osd', 'ceph-volume', + ['--', 'inventory', '--format=json-pretty'], image='', + no_fsid=False, error_ok=False, log_output=False), + ] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_osd_activate_datadevice(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test', refresh_hosts=False): + with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1): + pass + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_osd_activate_datadevice_fail(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test', refresh_hosts=False): + cephadm_module.mock_store_set('_ceph_get', 'osd_map', { + 'osds': [ + { + 'osd': 1, + 'up_from': 0, + 'uuid': 'uuid' + } + ] + }) + + ceph_volume_lvm_list = { + '1': [{ + 'tags': { + 'ceph.cluster_fsid': cephadm_module._cluster_fsid, + 'ceph.osd_fsid': 'uuid' + }, + 'type': 'data' + }] + } + _run_cephadm.reset_mock(return_value=True, side_effect=True) + + async def _r_c(*args, **kwargs): + if 'ceph-volume' in args: + return (json.dumps(ceph_volume_lvm_list), '', 0) + else: + assert ['_orch', 'deploy'] in args + raise OrchestratorError("let's fail somehow") + _run_cephadm.side_effect = _r_c + assert cephadm_module._osd_activate( + ['test']).stderr == "let's fail somehow" + with pytest.raises(AssertionError): + cephadm_module.assert_issued_mon_command({ + 'prefix': 'auth rm', + 'entity': 'osd.1', + }) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_osd_activate_datadevice_dbdevice(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test', refresh_hosts=False): + + async def _ceph_volume_list(s, host, entity, cmd, **kwargs): + logging.info(f'ceph-volume cmd: {cmd}') + if 'raw' in cmd: + return json.dumps({ + "21a4209b-f51b-4225-81dc-d2dca5b8b2f5": { + "ceph_fsid": "64c84f19-fe1d-452a-a731-ab19dc144aa8", + "device": "/dev/loop0", + "osd_id": 21, + "osd_uuid": "21a4209b-f51b-4225-81dc-d2dca5b8b2f5", + "type": "bluestore" + }, + }), '', 0 + if 'lvm' in cmd: + return json.dumps({ + '1': [{ + 'tags': { + 'ceph.cluster_fsid': cephadm_module._cluster_fsid, + 'ceph.osd_fsid': 'uuid' + }, + 'type': 'data' + }, { + 'tags': { + 'ceph.cluster_fsid': cephadm_module._cluster_fsid, + 'ceph.osd_fsid': 'uuid' + }, + 'type': 'db' + }] + }), '', 0 + return '{}', '', 0 + + with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1, ceph_volume_lvm_list=_ceph_volume_list): + pass + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_osd_count(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + dg = DriveGroupSpec(service_id='', data_devices=DeviceSelection(all=True)) + with with_host(cephadm_module, 'test', refresh_hosts=False): + with with_service(cephadm_module, dg, host='test'): + with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1): + assert wait(cephadm_module, cephadm_module.describe_service())[0].size == 1 + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_host_rm_last_admin(self, cephadm_module: CephadmOrchestrator): + with pytest.raises(OrchestratorError): + with with_host(cephadm_module, 'test', refresh_hosts=False, rm_with_force=False): + cephadm_module.inventory.add_label('test', SpecialHostLabels.ADMIN) + pass + assert False + with with_host(cephadm_module, 'test1', refresh_hosts=False, rm_with_force=True): + with with_host(cephadm_module, 'test2', refresh_hosts=False, rm_with_force=False): + cephadm_module.inventory.add_label('test2', SpecialHostLabels.ADMIN) + + @pytest.mark.parametrize("facts, settings, expected_value", + [ + # All options are available on all hosts + ( + { + "host1": + { + "sysctl_options": + { + 'opt1': 'val1', + 'opt2': 'val2', + } + }, + "host2": + { + "sysctl_options": + { + 'opt1': '', + 'opt2': '', + } + }, + }, + {'opt1', 'opt2'}, # settings + {'host1': [], 'host2': []} # expected_value + ), + # opt1 is missing on host 1, opt2 is missing on host2 + ({ + "host1": + { + "sysctl_options": + { + 'opt2': '', + 'optX': '', + } + }, + "host2": + { + "sysctl_options": + { + 'opt1': '', + 'opt3': '', + 'opt4': '', + } + }, + }, + {'opt1', 'opt2'}, # settings + {'host1': ['opt1'], 'host2': ['opt2']} # expected_value + ), + # All options are missing on all hosts + ({ + "host1": + { + "sysctl_options": + { + } + }, + "host2": + { + "sysctl_options": + { + } + }, + }, + {'opt1', 'opt2'}, # settings + {'host1': ['opt1', 'opt2'], 'host2': [ + 'opt1', 'opt2']} # expected_value + ), + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_tuned_profiles_settings_validation(self, facts, settings, expected_value, cephadm_module): + with with_host(cephadm_module, 'test'): + spec = mock.Mock() + spec.settings = sorted(settings) + spec.placement.filter_matching_hostspecs = mock.Mock() + spec.placement.filter_matching_hostspecs.return_value = ['host1', 'host2'] + cephadm_module.cache.facts = facts + assert cephadm_module._validate_tunedprofile_settings(spec) == expected_value + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_tuned_profiles_validation(self, cephadm_module): + with with_host(cephadm_module, 'test'): + + with pytest.raises(OrchestratorError, match="^Invalid placement specification.+"): + spec = mock.Mock() + spec.settings = {'a': 'b'} + spec.placement = PlacementSpec(hosts=[]) + cephadm_module._validate_tuned_profile_spec(spec) + + with pytest.raises(OrchestratorError, match="Invalid spec: settings section cannot be empty."): + spec = mock.Mock() + spec.settings = {} + spec.placement = PlacementSpec(hosts=['host1', 'host2']) + cephadm_module._validate_tuned_profile_spec(spec) + + with pytest.raises(OrchestratorError, match="^Placement 'count' field is no supported .+"): + spec = mock.Mock() + spec.settings = {'a': 'b'} + spec.placement = PlacementSpec(count=1) + cephadm_module._validate_tuned_profile_spec(spec) + + with pytest.raises(OrchestratorError, match="^Placement 'count_per_host' field is no supported .+"): + spec = mock.Mock() + spec.settings = {'a': 'b'} + spec.placement = PlacementSpec(count_per_host=1, label='foo') + cephadm_module._validate_tuned_profile_spec(spec) + + with pytest.raises(OrchestratorError, match="^Found invalid host"): + spec = mock.Mock() + spec.settings = {'a': 'b'} + spec.placement = PlacementSpec(hosts=['host1', 'host2']) + cephadm_module.inventory = mock.Mock() + cephadm_module.inventory.all_specs = mock.Mock( + return_value=[mock.Mock().hostname, mock.Mock().hostname]) + cephadm_module._validate_tuned_profile_spec(spec) + + def test_set_unmanaged(self, cephadm_module): + cephadm_module.spec_store._specs['crash'] = ServiceSpec('crash', unmanaged=False) + assert not cephadm_module.spec_store._specs['crash'].unmanaged + cephadm_module.spec_store.set_unmanaged('crash', True) + assert cephadm_module.spec_store._specs['crash'].unmanaged + cephadm_module.spec_store.set_unmanaged('crash', False) + assert not cephadm_module.spec_store._specs['crash'].unmanaged + + def test_inventory_known_hostnames(self, cephadm_module): + cephadm_module.inventory.add_host(HostSpec('host1', '1.2.3.1')) + cephadm_module.inventory.add_host(HostSpec('host2', '1.2.3.2')) + cephadm_module.inventory.add_host(HostSpec('host3.domain', '1.2.3.3')) + cephadm_module.inventory.add_host(HostSpec('host4.domain', '1.2.3.4')) + cephadm_module.inventory.add_host(HostSpec('host5', '1.2.3.5')) + + # update_known_hostname expects args to be <hostname, shortname, fqdn> + # as are gathered from cephadm gather-facts. Although, passing the + # names in the wrong order should actually have no effect on functionality + cephadm_module.inventory.update_known_hostnames('host1', 'host1', 'host1.domain') + cephadm_module.inventory.update_known_hostnames('host2.domain', 'host2', 'host2.domain') + cephadm_module.inventory.update_known_hostnames('host3', 'host3', 'host3.domain') + cephadm_module.inventory.update_known_hostnames('host4.domain', 'host4', 'host4.domain') + cephadm_module.inventory.update_known_hostnames('host5', 'host5', 'host5') + + assert 'host1' in cephadm_module.inventory + assert 'host1.domain' in cephadm_module.inventory + assert cephadm_module.inventory.get_addr('host1') == '1.2.3.1' + assert cephadm_module.inventory.get_addr('host1.domain') == '1.2.3.1' + + assert 'host2' in cephadm_module.inventory + assert 'host2.domain' in cephadm_module.inventory + assert cephadm_module.inventory.get_addr('host2') == '1.2.3.2' + assert cephadm_module.inventory.get_addr('host2.domain') == '1.2.3.2' + + assert 'host3' in cephadm_module.inventory + assert 'host3.domain' in cephadm_module.inventory + assert cephadm_module.inventory.get_addr('host3') == '1.2.3.3' + assert cephadm_module.inventory.get_addr('host3.domain') == '1.2.3.3' + + assert 'host4' in cephadm_module.inventory + assert 'host4.domain' in cephadm_module.inventory + assert cephadm_module.inventory.get_addr('host4') == '1.2.3.4' + assert cephadm_module.inventory.get_addr('host4.domain') == '1.2.3.4' + + assert 'host4.otherdomain' not in cephadm_module.inventory + with pytest.raises(OrchestratorError): + cephadm_module.inventory.get_addr('host4.otherdomain') + + assert 'host5' in cephadm_module.inventory + assert cephadm_module.inventory.get_addr('host5') == '1.2.3.5' + with pytest.raises(OrchestratorError): + cephadm_module.inventory.get_addr('host5.domain') + + def test_async_timeout_handler(self, cephadm_module): + cephadm_module.default_cephadm_command_timeout = 900 + + async def _timeout(): + raise asyncio.TimeoutError + + with pytest.raises(OrchestratorError, match=r'Command timed out \(default 900 second timeout\)'): + with cephadm_module.async_timeout_handler(): + cephadm_module.wait_async(_timeout()) + + with pytest.raises(OrchestratorError, match=r'Command timed out on host hostA \(default 900 second timeout\)'): + with cephadm_module.async_timeout_handler('hostA'): + cephadm_module.wait_async(_timeout()) + + with pytest.raises(OrchestratorError, match=r'Command "testing" timed out \(default 900 second timeout\)'): + with cephadm_module.async_timeout_handler(cmd='testing'): + cephadm_module.wait_async(_timeout()) + + with pytest.raises(OrchestratorError, match=r'Command "testing" timed out on host hostB \(default 900 second timeout\)'): + with cephadm_module.async_timeout_handler('hostB', 'testing'): + cephadm_module.wait_async(_timeout()) + + with pytest.raises(OrchestratorError, match=r'Command timed out \(non-default 111 second timeout\)'): + with cephadm_module.async_timeout_handler(timeout=111): + cephadm_module.wait_async(_timeout()) + + with pytest.raises(OrchestratorError, match=r'Command "very slow" timed out on host hostC \(non-default 999 second timeout\)'): + with cephadm_module.async_timeout_handler('hostC', 'very slow', 999): + cephadm_module.wait_async(_timeout()) + + @mock.patch("cephadm.CephadmOrchestrator.remove_osds") + @mock.patch("cephadm.CephadmOrchestrator.add_host_label", lambda *a, **kw: None) + @mock.patch("cephadm.inventory.HostCache.get_daemons_by_host", lambda *a, **kw: []) + def test_host_drain_zap(self, _rm_osds, cephadm_module): + # pass force=true in these tests to bypass _admin label check + cephadm_module.drain_host('host1', force=True, zap_osd_devices=False) + assert _rm_osds.called_with([], zap=False) + + cephadm_module.drain_host('host1', force=True, zap_osd_devices=True) + assert _rm_osds.called_with([], zap=True) + + def test_process_ls_output(self, cephadm_module): + sample_ls_output = """[ + { + "style": "cephadm:v1", + "name": "mon.vm-00", + "fsid": "588f83ba-5995-11ee-9e94-52540057a206", + "systemd_unit": "ceph-588f83ba-5995-11ee-9e94-52540057a206@mon.vm-00", + "enabled": true, + "state": "running", + "service_name": "mon", + "ports": [], + "ip": null, + "deployed_by": [ + "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3" + ], + "rank": null, + "rank_generation": null, + "extra_container_args": null, + "extra_entrypoint_args": null, + "memory_request": null, + "memory_limit": null, + "container_id": "b170b964a6e2918955362eb36195627c6086d3f859d4ebce2ee13f3ee4738733", + "container_image_name": "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3", + "container_image_id": "674eb38037f1555bb7884ede5db47f1749486e7f12ecb416e34ada87c9934e55", + "container_image_digests": [ + "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3" + ], + "memory_usage": 56214159, + "cpu_percentage": "2.32%", + "version": "18.0.0-5185-g7b3a4f2b", + "started": "2023-09-22T22:31:11.752300Z", + "created": "2023-09-22T22:15:24.121387Z", + "deployed": "2023-09-22T22:31:10.383431Z", + "configured": "2023-09-22T22:31:11.859440Z" + }, + { + "style": "cephadm:v1", + "name": "mgr.vm-00.mpexeg", + "fsid": "588f83ba-5995-11ee-9e94-52540057a206", + "systemd_unit": "ceph-588f83ba-5995-11ee-9e94-52540057a206@mgr.vm-00.mpexeg", + "enabled": true, + "state": "running", + "service_name": "mgr", + "ports": [ + 8443, + 9283, + 8765 + ], + "ip": null, + "deployed_by": [ + "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3" + ], + "rank": null, + "rank_generation": null, + "extra_container_args": null, + "extra_entrypoint_args": null, + "memory_request": null, + "memory_limit": null, + "container_id": "6e7756cef553a25a2a84227e8755d3d25046b9cd8758b23c698d34b3af895242", + "container_image_name": "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3", + "container_image_id": "674eb38037f1555bb7884ede5db47f1749486e7f12ecb416e34ada87c9934e55", + "container_image_digests": [ + "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3" + ], + "memory_usage": 529740595, + "cpu_percentage": "8.35%", + "version": "18.0.0-5185-g7b3a4f2b", + "started": "2023-09-22T22:30:18.587021Z", + "created": "2023-09-22T22:15:29.101409Z", + "deployed": "2023-09-22T22:30:17.339114Z", + "configured": "2023-09-22T22:30:18.758122Z" + }, + { + "style": "cephadm:v1", + "name": "agent.vm-00", + "fsid": "588f83ba-5995-11ee-9e94-52540057a206", + "systemd_unit": "ceph-588f83ba-5995-11ee-9e94-52540057a206@agent.vm-00", + "enabled": true, + "state": "running", + "service_name": "agent", + "ports": [], + "ip": null, + "deployed_by": [ + "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3" + ], + "rank": null, + "rank_generation": null, + "extra_container_args": null, + "extra_entrypoint_args": null, + "container_id": null, + "container_image_name": null, + "container_image_id": null, + "container_image_digests": null, + "version": null, + "started": null, + "created": "2023-09-22T22:33:34.708289Z", + "deployed": null, + "configured": "2023-09-22T22:33:34.722289Z" + }, + { + "style": "cephadm:v1", + "name": "osd.0", + "fsid": "588f83ba-5995-11ee-9e94-52540057a206", + "systemd_unit": "ceph-588f83ba-5995-11ee-9e94-52540057a206@osd.0", + "enabled": true, + "state": "running", + "service_name": "osd.foo", + "ports": [], + "ip": null, + "deployed_by": [ + "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3" + ], + "rank": null, + "rank_generation": null, + "extra_container_args": null, + "extra_entrypoint_args": null, + "memory_request": null, + "memory_limit": null, + "container_id": "93f71c60820b86901a45b3b1fe3dba3e3e677b37fd22310b7e7da3f67bb8ccd6", + "container_image_name": "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3", + "container_image_id": "674eb38037f1555bb7884ede5db47f1749486e7f12ecb416e34ada87c9934e55", + "container_image_digests": [ + "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3" + ], + "memory_usage": 73410805, + "cpu_percentage": "6.54%", + "version": "18.0.0-5185-g7b3a4f2b", + "started": "2023-09-22T22:41:29.019587Z", + "created": "2023-09-22T22:41:03.615080Z", + "deployed": "2023-09-22T22:41:24.965222Z", + "configured": "2023-09-22T22:41:29.119250Z" + } +]""" + + now = str_to_datetime('2023-09-22T22:45:29.119250Z') + cephadm_module._cluster_fsid = '588f83ba-5995-11ee-9e94-52540057a206' + with mock.patch("cephadm.module.datetime_now", lambda: now): + cephadm_module._process_ls_output('vm-00', json.loads(sample_ls_output)) + assert 'vm-00' in cephadm_module.cache.daemons + assert 'mon.vm-00' in cephadm_module.cache.daemons['vm-00'] + assert 'mgr.vm-00.mpexeg' in cephadm_module.cache.daemons['vm-00'] + assert 'agent.vm-00' in cephadm_module.cache.daemons['vm-00'] + assert 'osd.0' in cephadm_module.cache.daemons['vm-00'] + + daemons = cephadm_module.cache.get_daemons_by_host('vm-00') + c_img_ids = [dd.container_image_id for dd in daemons if dd.daemon_type != 'agent'] + assert all(c_img_id == '674eb38037f1555bb7884ede5db47f1749486e7f12ecb416e34ada87c9934e55' for c_img_id in c_img_ids) + last_refreshes = [dd.last_refresh for dd in daemons] + assert all(lrf == now for lrf in last_refreshes) + versions = [dd.version for dd in daemons if dd.daemon_type != 'agent'] + assert all(version == '18.0.0-5185-g7b3a4f2b' for version in versions) + + osd = cephadm_module.cache.get_daemons_by_type('osd', 'vm-00')[0] + assert osd.cpu_percentage == '6.54%' + assert osd.memory_usage == 73410805 + assert osd.created == str_to_datetime('2023-09-22T22:41:03.615080Z') diff --git a/src/pybind/mgr/cephadm/tests/test_completion.py b/src/pybind/mgr/cephadm/tests/test_completion.py new file mode 100644 index 000000000..327c12d2a --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_completion.py @@ -0,0 +1,40 @@ +import pytest + +from ..module import forall_hosts + + +class TestCompletion(object): + + @pytest.mark.parametrize("input,expected", [ + ([], []), + ([1], ["(1,)"]), + (["hallo"], ["('hallo',)"]), + ("hi", ["('h',)", "('i',)"]), + (list(range(5)), [str((x, )) for x in range(5)]), + ([(1, 2), (3, 4)], ["(1, 2)", "(3, 4)"]), + ]) + def test_async_map(self, input, expected, cephadm_module): + @forall_hosts + def run_forall(*args): + return str(args) + assert run_forall(input) == expected + + @pytest.mark.parametrize("input,expected", [ + ([], []), + ([1], ["(1,)"]), + (["hallo"], ["('hallo',)"]), + ("hi", ["('h',)", "('i',)"]), + (list(range(5)), [str((x, )) for x in range(5)]), + ([(1, 2), (3, 4)], ["(1, 2)", "(3, 4)"]), + ]) + def test_async_map_self(self, input, expected, cephadm_module): + class Run(object): + def __init__(self): + self.attr = 1 + + @forall_hosts + def run_forall(self, *args): + assert self.attr == 1 + return str(args) + + assert Run().run_forall(input) == expected diff --git a/src/pybind/mgr/cephadm/tests/test_configchecks.py b/src/pybind/mgr/cephadm/tests/test_configchecks.py new file mode 100644 index 000000000..3cae0a27d --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_configchecks.py @@ -0,0 +1,668 @@ +import copy +import json +import logging +import ipaddress +import pytest +import uuid + +from time import time as now + +from ..configchecks import CephadmConfigChecks +from ..inventory import HostCache +from ..upgrade import CephadmUpgrade, UpgradeState +from orchestrator import DaemonDescription + +from typing import List, Dict, Any, Optional + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + +host_sample = { + "arch": "x86_64", + "bios_date": "04/01/2014", + "bios_version": "F2", + "cpu_cores": 16, + "cpu_count": 2, + "cpu_load": { + "15min": 0.0, + "1min": 0.01, + "5min": 0.01 + }, + "cpu_model": "Intel® Xeon® Processor E5-2698 v3", + "cpu_threads": 64, + "flash_capacity": "4.0TB", + "flash_capacity_bytes": 4000797868032, + "flash_count": 2, + "flash_list": [ + { + "description": "ATA CT2000MX500SSD1 (2.0TB)", + "dev_name": "sda", + "disk_size_bytes": 2000398934016, + "model": "CT2000MX500SSD1", + "rev": "023", + "vendor": "ATA", + "wwid": "t10.ATA CT2000MX500SSD1 193023156DE0" + }, + { + "description": "ATA CT2000MX500SSD1 (2.0TB)", + "dev_name": "sdb", + "disk_size_bytes": 2000398934016, + "model": "CT2000MX500SSD1", + "rev": "023", + "vendor": "ATA", + "wwid": "t10.ATA CT2000MX500SSD1 193023156DE0" + }, + ], + "hdd_capacity": "16.0TB", + "hdd_capacity_bytes": 16003148120064, + "hdd_count": 4, + "hdd_list": [ + { + "description": "ST4000VN008-2DR1 (4.0TB)", + "dev_name": "sdc", + "disk_size_bytes": 4000787030016, + "model": "ST4000VN008-2DR1", + "rev": "SC60", + "vendor": "ATA", + "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ" + }, + { + "description": "ST4000VN008-2DR1 (4.0TB)", + "dev_name": "sdd", + "disk_size_bytes": 4000787030016, + "model": "ST4000VN008-2DR1", + "rev": "SC60", + "vendor": "ATA", + "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ" + }, + { + "description": "ST4000VN008-2DR1 (4.0TB)", + "dev_name": "sde", + "disk_size_bytes": 4000787030016, + "model": "ST4000VN008-2DR1", + "rev": "SC60", + "vendor": "ATA", + "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ" + }, + { + "description": "ST4000VN008-2DR1 (4.0TB)", + "dev_name": "sdf", + "disk_size_bytes": 4000787030016, + "model": "ST4000VN008-2DR1", + "rev": "SC60", + "vendor": "ATA", + "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ" + }, + ], + "hostname": "dummy", + "interfaces": { + "eth0": { + "driver": "e1000e", + "iftype": "physical", + "ipv4_address": "10.7.17.1/24", + "ipv6_address": "fe80::215:17ff:feab:50e2/64", + "lower_devs_list": [], + "mtu": 9000, + "nic_type": "ethernet", + "operstate": "up", + "speed": 1000, + "upper_devs_list": [], + }, + "eth1": { + "driver": "e1000e", + "iftype": "physical", + "ipv4_address": "10.7.18.1/24", + "ipv6_address": "fe80::215:17ff:feab:50e2/64", + "lower_devs_list": [], + "mtu": 9000, + "nic_type": "ethernet", + "operstate": "up", + "speed": 1000, + "upper_devs_list": [], + }, + "eth2": { + "driver": "r8169", + "iftype": "physical", + "ipv4_address": "10.7.19.1/24", + "ipv6_address": "fe80::76d4:35ff:fe58:9a79/64", + "lower_devs_list": [], + "mtu": 1500, + "nic_type": "ethernet", + "operstate": "up", + "speed": 1000, + "upper_devs_list": [] + }, + }, + "kernel": "4.18.0-240.10.1.el8_3.x86_64", + "kernel_parameters": { + "net.ipv4.ip_nonlocal_bind": "0", + }, + "kernel_security": { + "SELINUX": "enforcing", + "SELINUXTYPE": "targeted", + "description": "SELinux: Enabled(enforcing, targeted)", + "type": "SELinux" + }, + "memory_available_kb": 19489212, + "memory_free_kb": 245164, + "memory_total_kb": 32900916, + "model": "StorageHeavy", + "nic_count": 3, + "operating_system": "Red Hat Enterprise Linux 8.3 (Ootpa)", + "subscribed": "Yes", + "system_uptime": 777600.0, + "timestamp": now(), + "vendor": "Ceph Servers Inc", +} + + +def role_list(n: int) -> List[str]: + if n == 1: + return ['mon', 'mgr', 'osd'] + if n in [2, 3]: + return ['mon', 'mds', 'osd'] + + return ['osd'] + + +def generate_testdata(count: int = 10, public_network: str = '10.7.17.0/24', cluster_network: str = '10.7.18.0/24'): + # public network = eth0, cluster_network = eth1 + assert count > 3 + assert public_network + num_disks = host_sample['hdd_count'] + hosts = {} + daemons = {} + daemon_to_host = {} + osd_num = 0 + public_netmask = public_network.split('/')[1] + cluster_ip_list = [] + cluster_netmask = '' + + public_ip_list = [str(i) for i in list(ipaddress.ip_network(public_network).hosts())] + if cluster_network: + cluster_ip_list = [str(i) for i in list(ipaddress.ip_network(cluster_network).hosts())] + cluster_netmask = cluster_network.split('/')[1] + + for n in range(1, count + 1, 1): + + new_host = copy.deepcopy(host_sample) + hostname = f"node-{n}.ceph.com" + + new_host['hostname'] = hostname + new_host['interfaces']['eth0']['ipv4_address'] = f"{public_ip_list.pop(0)}/{public_netmask}" + if cluster_ip_list: + new_host['interfaces']['eth1']['ipv4_address'] = f"{cluster_ip_list.pop(0)}/{cluster_netmask}" + else: + new_host['interfaces']['eth1']['ipv4_address'] = '' + + hosts[hostname] = new_host + daemons[hostname] = {} + for r in role_list(n): + name = '' + if r == 'osd': + for n in range(num_disks): + osd = DaemonDescription( + hostname=hostname, daemon_type='osd', daemon_id=osd_num) + name = f"osd.{osd_num}" + daemons[hostname][name] = osd + daemon_to_host[name] = hostname + osd_num += 1 + else: + name = f"{r}.{hostname}" + daemons[hostname][name] = DaemonDescription( + hostname=hostname, daemon_type=r, daemon_id=hostname) + daemon_to_host[name] = hostname + + logger.debug(f"daemon to host lookup - {json.dumps(daemon_to_host)}") + return hosts, daemons, daemon_to_host + + +@pytest.fixture() +def mgr(): + """Provide a fake ceph mgr object preloaded with a configuration""" + mgr = FakeMgr() + mgr.cache.facts, mgr.cache.daemons, mgr.daemon_to_host = \ + generate_testdata(public_network='10.9.64.0/24', cluster_network='') + mgr.module_option.update({ + "config_checks_enabled": True, + }) + yield mgr + + +class FakeMgr: + + def __init__(self): + self.datastore = {} + self.module_option = {} + self.health_checks = {} + self.default_version = 'quincy' + self.version_overrides = {} + self.daemon_to_host = {} + + self.cache = HostCache(self) + self.upgrade = CephadmUpgrade(self) + + def set_health_checks(self, checks: dict): + return + + def get_module_option(self, keyname: str) -> Optional[str]: + return self.module_option.get(keyname, None) + + def set_module_option(self, keyname: str, value: str) -> None: + return None + + def get_store(self, keyname: str, default=None) -> Optional[str]: + return self.datastore.get(keyname, None) + + def set_store(self, keyname: str, value: str) -> None: + self.datastore[keyname] = value + return None + + def _ceph_get_server(self) -> None: + pass + + def get_metadata(self, daemon_type: str, daemon_id: str) -> Dict[str, Any]: + key = f"{daemon_type}.{daemon_id}" + if key in self.version_overrides: + logger.debug(f"override applied for {key}") + version_str = self.version_overrides[key] + else: + version_str = self.default_version + + return {"ceph_release": version_str, "hostname": self.daemon_to_host[key]} + + def list_servers(self) -> List[Dict[str, List[Dict[str, str]]]]: + num_disks = host_sample['hdd_count'] + osd_num = 0 + service_map = [] + + for hostname in self.cache.facts: + + host_num = int(hostname.split('.')[0].split('-')[1]) + svc_list = [] + for r in role_list(host_num): + if r == 'osd': + for _n in range(num_disks): + svc_list.append({ + "type": "osd", + "id": osd_num, + }) + osd_num += 1 + else: + svc_list.append({ + "type": r, + "id": hostname, + }) + + service_map.append({"services": svc_list}) + logger.debug(f"services map - {json.dumps(service_map)}") + return service_map + + def use_repo_digest(self) -> None: + return None + + +class TestConfigCheck: + + def test_to_json(self, mgr): + checker = CephadmConfigChecks(mgr) + out = checker.to_json() + assert out + assert len(out) == len(checker.health_checks) + + def test_lookup_check(self, mgr): + checker = CephadmConfigChecks(mgr) + check = checker.lookup_check('osd_mtu_size') + logger.debug(json.dumps(check.to_json())) + assert check + assert check.healthcheck_name == "CEPHADM_CHECK_MTU" + + def test_old_checks_removed(self, mgr): + mgr.datastore.update({ + "config_checks": '{"bogus_one": "enabled", "bogus_two": "enabled", ' + '"kernel_security": "enabled", "public_network": "enabled", ' + '"kernel_version": "enabled", "network_missing": "enabled", ' + '"osd_mtu_size": "enabled", "osd_linkspeed": "enabled", ' + '"os_subscription": "enabled", "ceph_release": "enabled"}' + }) + checker = CephadmConfigChecks(mgr) + raw = mgr.get_store('config_checks') + checks = json.loads(raw) + assert "bogus_one" not in checks + assert "bogus_two" not in checks + assert len(checks) == len(checker.health_checks) + + def test_new_checks(self, mgr): + mgr.datastore.update({ + "config_checks": '{"kernel_security": "enabled", "public_network": "enabled", ' + '"osd_mtu_size": "enabled", "osd_linkspeed": "enabled"}' + }) + checker = CephadmConfigChecks(mgr) + raw = mgr.get_store('config_checks') + checks = json.loads(raw) + assert len(checks) == len(checker.health_checks) + + def test_no_issues(self, mgr): + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + checker.run_checks() + + assert not mgr.health_checks + + def test_no_public_network(self, mgr): + bad_node = mgr.cache.facts['node-1.ceph.com'] + bad_node['interfaces']['eth0']['ipv4_address'] = "192.168.1.20/24" + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + checker.run_checks() + logger.debug(mgr.health_checks) + assert len(mgr.health_checks) == 1 + assert 'CEPHADM_CHECK_PUBLIC_MEMBERSHIP' in mgr.health_checks + assert mgr.health_checks['CEPHADM_CHECK_PUBLIC_MEMBERSHIP']['detail'][0] == \ + 'node-1.ceph.com does not have an interface on any public network' + + def test_missing_networks(self, mgr): + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.66.0/24'] + checker.run_checks() + + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert len(mgr.health_checks) == 1 + assert 'CEPHADM_CHECK_NETWORK_MISSING' in mgr.health_checks + assert mgr.health_checks['CEPHADM_CHECK_NETWORK_MISSING']['detail'][0] == \ + "10.9.66.0/24 not found on any host in the cluster" + + def test_bad_mtu_single(self, mgr): + + bad_node = mgr.cache.facts['node-1.ceph.com'] + bad_node['interfaces']['eth0']['mtu'] = 1500 + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert "CEPHADM_CHECK_MTU" in mgr.health_checks and len(mgr.health_checks) == 1 + assert mgr.health_checks['CEPHADM_CHECK_MTU']['detail'][0] == \ + 'host node-1.ceph.com(eth0) is using MTU 1500 on 10.9.64.0/24, NICs on other hosts use 9000' + + def test_bad_mtu_multiple(self, mgr): + + for n in [1, 5]: + bad_node = mgr.cache.facts[f'node-{n}.ceph.com'] + bad_node['interfaces']['eth0']['mtu'] = 1500 + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert "CEPHADM_CHECK_MTU" in mgr.health_checks and len(mgr.health_checks) == 1 + assert mgr.health_checks['CEPHADM_CHECK_MTU']['count'] == 2 + + def test_bad_linkspeed_single(self, mgr): + + bad_node = mgr.cache.facts['node-1.ceph.com'] + bad_node['interfaces']['eth0']['speed'] = 100 + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert mgr.health_checks + assert "CEPHADM_CHECK_LINKSPEED" in mgr.health_checks and len(mgr.health_checks) == 1 + assert mgr.health_checks['CEPHADM_CHECK_LINKSPEED']['detail'][0] == \ + 'host node-1.ceph.com(eth0) has linkspeed of 100 on 10.9.64.0/24, NICs on other hosts use 1000' + + def test_super_linkspeed_single(self, mgr): + + bad_node = mgr.cache.facts['node-1.ceph.com'] + bad_node['interfaces']['eth0']['speed'] = 10000 + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert not mgr.health_checks + + def test_release_mismatch_single(self, mgr): + + mgr.version_overrides = { + "osd.1": "pacific", + } + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + assert mgr.health_checks + assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and len(mgr.health_checks) == 1 + assert mgr.health_checks['CEPHADM_CHECK_CEPH_RELEASE']['detail'][0] == \ + 'osd.1 is running pacific (majority of cluster is using quincy)' + + def test_release_mismatch_multi(self, mgr): + + mgr.version_overrides = { + "osd.1": "pacific", + "osd.5": "octopus", + } + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + assert mgr.health_checks + assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and len(mgr.health_checks) == 1 + assert len(mgr.health_checks['CEPHADM_CHECK_CEPH_RELEASE']['detail']) == 2 + + def test_kernel_mismatch(self, mgr): + + bad_host = mgr.cache.facts['node-1.ceph.com'] + bad_host['kernel'] = "5.10.18.0-241.10.1.el8.x86_64" + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + assert len(mgr.health_checks) == 1 + assert 'CEPHADM_CHECK_KERNEL_VERSION' in mgr.health_checks + assert mgr.health_checks['CEPHADM_CHECK_KERNEL_VERSION']['detail'][0] == \ + "host node-1.ceph.com running kernel 5.10, majority of hosts(9) running 4.18" + assert mgr.health_checks['CEPHADM_CHECK_KERNEL_VERSION']['count'] == 1 + + def test_inconsistent_subscription(self, mgr): + + bad_host = mgr.cache.facts['node-5.ceph.com'] + bad_host['subscribed'] = "no" + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + assert len(mgr.health_checks) == 1 + assert "CEPHADM_CHECK_SUBSCRIPTION" in mgr.health_checks + assert mgr.health_checks['CEPHADM_CHECK_SUBSCRIPTION']['detail'][0] == \ + "node-5.ceph.com does not have an active subscription" + + def test_kernel_security_inconsistent(self, mgr): + + bad_node = mgr.cache.facts['node-3.ceph.com'] + bad_node['kernel_security'] = { + "SELINUX": "permissive", + "SELINUXTYPE": "targeted", + "description": "SELinux: Enabled(permissive, targeted)", + "type": "SELinux" + } + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + assert len(mgr.health_checks) == 1 + assert 'CEPHADM_CHECK_KERNEL_LSM' in mgr.health_checks + assert mgr.health_checks['CEPHADM_CHECK_KERNEL_LSM']['detail'][0] == \ + "node-3.ceph.com has inconsistent KSM settings compared to the majority of hosts(9) in the cluster" + + def test_release_and_bad_mtu(self, mgr): + + mgr.version_overrides = { + "osd.1": "pacific", + } + bad_node = mgr.cache.facts['node-1.ceph.com'] + bad_node['interfaces']['eth0']['mtu'] = 1500 + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert mgr.health_checks + assert len(mgr.health_checks) == 2 + assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \ + "CEPHADM_CHECK_MTU" in mgr.health_checks + + def test_release_mtu_LSM(self, mgr): + + mgr.version_overrides = { + "osd.1": "pacific", + } + bad_node1 = mgr.cache.facts['node-1.ceph.com'] + bad_node1['interfaces']['eth0']['mtu'] = 1500 + bad_node2 = mgr.cache.facts['node-3.ceph.com'] + bad_node2['kernel_security'] = { + "SELINUX": "permissive", + "SELINUXTYPE": "targeted", + "description": "SELinux: Enabled(permissive, targeted)", + "type": "SELinux" + } + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert mgr.health_checks + assert len(mgr.health_checks) == 3 + assert \ + "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \ + "CEPHADM_CHECK_MTU" in mgr.health_checks and \ + "CEPHADM_CHECK_KERNEL_LSM" in mgr.health_checks + + def test_release_mtu_LSM_subscription(self, mgr): + + mgr.version_overrides = { + "osd.1": "pacific", + } + bad_node1 = mgr.cache.facts['node-1.ceph.com'] + bad_node1['interfaces']['eth0']['mtu'] = 1500 + bad_node1['subscribed'] = "no" + bad_node2 = mgr.cache.facts['node-3.ceph.com'] + bad_node2['kernel_security'] = { + "SELINUX": "permissive", + "SELINUXTYPE": "targeted", + "description": "SELinux: Enabled(permissive, targeted)", + "type": "SELinux" + } + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert mgr.health_checks + assert len(mgr.health_checks) == 4 + assert \ + "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \ + "CEPHADM_CHECK_MTU" in mgr.health_checks and \ + "CEPHADM_CHECK_KERNEL_LSM" in mgr.health_checks and \ + "CEPHADM_CHECK_SUBSCRIPTION" in mgr.health_checks + + def test_skip_release_during_upgrade(self, mgr): + mgr.upgrade.upgrade_state = UpgradeState.from_json({ + 'target_name': 'wah', + 'progress_id': str(uuid.uuid4()), + 'target_id': 'wah', + 'error': '', + 'paused': False, + }) + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(f"{checker.skipped_checks_count} skipped check(s): {checker.skipped_checks}") + assert checker.skipped_checks_count == 1 + assert 'ceph_release' in checker.skipped_checks + + def test_skip_when_disabled(self, mgr): + mgr.module_option.update({ + "config_checks_enabled": "false" + }) + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(checker.active_checks) + logger.info(checker.defined_checks) + assert checker.active_checks_count == 0 + + def test_skip_mtu_checks(self, mgr): + mgr.datastore.update({ + 'config_checks': '{"osd_mtu_size": "disabled"}' + }) + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(checker.active_checks) + logger.info(checker.defined_checks) + assert 'osd_mtu_size' not in checker.active_checks + assert checker.defined_checks == 8 and checker.active_checks_count == 7 + + def test_skip_mtu_lsm_checks(self, mgr): + mgr.datastore.update({ + 'config_checks': '{"osd_mtu_size": "disabled", "kernel_security": "disabled"}' + }) + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(checker.active_checks) + logger.info(checker.defined_checks) + assert 'osd_mtu_size' not in checker.active_checks and \ + 'kernel_security' not in checker.active_checks + assert checker.defined_checks == 8 and checker.active_checks_count == 6 + assert not mgr.health_checks diff --git a/src/pybind/mgr/cephadm/tests/test_facts.py b/src/pybind/mgr/cephadm/tests/test_facts.py new file mode 100644 index 000000000..7838ee5d4 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_facts.py @@ -0,0 +1,31 @@ +from ..import CephadmOrchestrator + +from .fixtures import wait + +from tests import mock + + +def test_facts(cephadm_module: CephadmOrchestrator): + facts = {'node-1.ceph.com': {'bios_version': 'F2', 'cpu_cores': 16}} + cephadm_module.cache.facts = facts + ret_facts = cephadm_module.get_facts('node-1.ceph.com') + assert wait(cephadm_module, ret_facts) == [{'bios_version': 'F2', 'cpu_cores': 16}] + + +@mock.patch("cephadm.inventory.Inventory.update_known_hostnames") +def test_known_hostnames(_update_known_hostnames, cephadm_module: CephadmOrchestrator): + host_facts = {'hostname': 'host1.domain', + 'shortname': 'host1', + 'fqdn': 'host1.domain', + 'memory_free_kb': 37383384, + 'memory_total_kb': 40980612, + 'nic_count': 2} + cephadm_module.cache.update_host_facts('host1', host_facts) + _update_known_hostnames.assert_called_with('host1.domain', 'host1', 'host1.domain') + + host_facts = {'hostname': 'host1.domain', + 'memory_free_kb': 37383384, + 'memory_total_kb': 40980612, + 'nic_count': 2} + cephadm_module.cache.update_host_facts('host1', host_facts) + _update_known_hostnames.assert_called_with('host1.domain', '', '') diff --git a/src/pybind/mgr/cephadm/tests/test_migration.py b/src/pybind/mgr/cephadm/tests/test_migration.py new file mode 100644 index 000000000..1f1d32e8b --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_migration.py @@ -0,0 +1,340 @@ +import json +import pytest + +from ceph.deployment.service_spec import PlacementSpec, ServiceSpec, HostPlacementSpec +from ceph.utils import datetime_to_str, datetime_now +from cephadm import CephadmOrchestrator +from cephadm.inventory import SPEC_STORE_PREFIX +from cephadm.migrations import LAST_MIGRATION +from cephadm.tests.fixtures import _run_cephadm, wait, with_host, receive_agent_metadata_all_hosts +from cephadm.serve import CephadmServe +from tests import mock + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_scheduler(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1', refresh_hosts=False): + with with_host(cephadm_module, 'host2', refresh_hosts=False): + + # emulate the old scheduler: + c = cephadm_module.apply_rgw( + ServiceSpec('rgw', 'r.z', placement=PlacementSpec(host_pattern='*', count=2)) + ) + assert wait(cephadm_module, c) == 'Scheduled rgw.r.z update...' + + # with pytest.raises(OrchestratorError, match="cephadm migration still ongoing. Please wait, until the migration is complete."): + CephadmServe(cephadm_module)._apply_all_services() + + cephadm_module.migration_current = 0 + cephadm_module.migration.migrate() + # assert we need all daemons. + assert cephadm_module.migration_current == 0 + + CephadmServe(cephadm_module)._refresh_hosts_and_daemons() + receive_agent_metadata_all_hosts(cephadm_module) + cephadm_module.migration.migrate() + + CephadmServe(cephadm_module)._apply_all_services() + + out = {o.hostname for o in wait(cephadm_module, cephadm_module.list_daemons())} + assert out == {'host1', 'host2'} + + c = cephadm_module.apply_rgw( + ServiceSpec('rgw', 'r.z', placement=PlacementSpec(host_pattern='host1', count=2)) + ) + assert wait(cephadm_module, c) == 'Scheduled rgw.r.z update...' + + # Sorry, for this hack, but I need to make sure, Migration thinks, + # we have updated all daemons already. + cephadm_module.cache.last_daemon_update['host1'] = datetime_now() + cephadm_module.cache.last_daemon_update['host2'] = datetime_now() + + cephadm_module.migration_current = 0 + cephadm_module.migration.migrate() + assert cephadm_module.migration_current >= 2 + + out = [o.spec.placement for o in wait( + cephadm_module, cephadm_module.describe_service())] + assert out == [PlacementSpec(count=2, hosts=[HostPlacementSpec( + hostname='host1', network='', name=''), HostPlacementSpec(hostname='host2', network='', name='')])] + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_service_id_mon_one(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon.wrong', json.dumps({ + 'spec': { + 'service_type': 'mon', + 'service_id': 'wrong', + 'placement': { + 'hosts': ['host1'] + } + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + + cephadm_module.spec_store.load() + + assert len(cephadm_module.spec_store.all_specs) == 1 + assert cephadm_module.spec_store.all_specs['mon.wrong'].service_name() == 'mon' + + cephadm_module.migration_current = 1 + cephadm_module.migration.migrate() + assert cephadm_module.migration_current >= 2 + + assert len(cephadm_module.spec_store.all_specs) == 1 + assert cephadm_module.spec_store.all_specs['mon'] == ServiceSpec( + service_type='mon', + unmanaged=True, + placement=PlacementSpec(hosts=['host1']) + ) + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_service_id_mon_two(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon', json.dumps({ + 'spec': { + 'service_type': 'mon', + 'placement': { + 'count': 5, + } + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon.wrong', json.dumps({ + 'spec': { + 'service_type': 'mon', + 'service_id': 'wrong', + 'placement': { + 'hosts': ['host1'] + } + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + + cephadm_module.spec_store.load() + + assert len(cephadm_module.spec_store.all_specs) == 2 + assert cephadm_module.spec_store.all_specs['mon.wrong'].service_name() == 'mon' + assert cephadm_module.spec_store.all_specs['mon'].service_name() == 'mon' + + cephadm_module.migration_current = 1 + cephadm_module.migration.migrate() + assert cephadm_module.migration_current >= 2 + + assert len(cephadm_module.spec_store.all_specs) == 1 + assert cephadm_module.spec_store.all_specs['mon'] == ServiceSpec( + service_type='mon', + unmanaged=True, + placement=PlacementSpec(count=5) + ) + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_service_id_mds_one(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.set_store(SPEC_STORE_PREFIX + 'mds', json.dumps({ + 'spec': { + 'service_type': 'mds', + 'placement': { + 'hosts': ['host1'] + } + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + + cephadm_module.spec_store.load() + + # there is nothing to migrate, as the spec is gone now. + assert len(cephadm_module.spec_store.all_specs) == 0 + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_nfs_initial(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.set_store( + SPEC_STORE_PREFIX + 'mds', + json.dumps({ + 'spec': { + 'service_type': 'nfs', + 'service_id': 'foo', + 'placement': { + 'hosts': ['host1'] + }, + 'spec': { + 'pool': 'mypool', + 'namespace': 'foons', + }, + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + cephadm_module.migration_current = 1 + cephadm_module.spec_store.load() + + ls = json.loads(cephadm_module.get_store('nfs_migration_queue')) + assert ls == [['foo', 'mypool', 'foons']] + + cephadm_module.migration.migrate(True) + assert cephadm_module.migration_current == 2 + + cephadm_module.migration.migrate() + assert cephadm_module.migration_current == LAST_MIGRATION + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_nfs_initial_octopus(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.set_store( + SPEC_STORE_PREFIX + 'mds', + json.dumps({ + 'spec': { + 'service_type': 'nfs', + 'service_id': 'ganesha-foo', + 'placement': { + 'hosts': ['host1'] + }, + 'spec': { + 'pool': 'mypool', + 'namespace': 'foons', + }, + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + cephadm_module.migration_current = 1 + cephadm_module.spec_store.load() + + ls = json.loads(cephadm_module.get_store('nfs_migration_queue')) + assert ls == [['ganesha-foo', 'mypool', 'foons']] + + cephadm_module.migration.migrate(True) + assert cephadm_module.migration_current == 2 + + cephadm_module.migration.migrate() + assert cephadm_module.migration_current == LAST_MIGRATION + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_admin_client_keyring(cephadm_module: CephadmOrchestrator): + assert 'client.admin' not in cephadm_module.keys.keys + + cephadm_module.migration_current = 3 + cephadm_module.migration.migrate() + assert cephadm_module.migration_current == LAST_MIGRATION + + assert cephadm_module.keys.keys['client.admin'].placement.label == '_admin' + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_set_sane_value(cephadm_module: CephadmOrchestrator): + cephadm_module.migration_current = 0 + cephadm_module.migration.set_sane_migration_current() + assert cephadm_module.migration_current == 0 + + cephadm_module.migration_current = LAST_MIGRATION + cephadm_module.migration.set_sane_migration_current() + assert cephadm_module.migration_current == LAST_MIGRATION + + cephadm_module.migration_current = None + cephadm_module.migration.set_sane_migration_current() + assert cephadm_module.migration_current == LAST_MIGRATION + + cephadm_module.migration_current = LAST_MIGRATION + 1 + cephadm_module.migration.set_sane_migration_current() + assert cephadm_module.migration_current == 0 + + cephadm_module.migration_current = None + ongoing = cephadm_module.migration.is_migration_ongoing() + assert not ongoing + assert cephadm_module.migration_current == LAST_MIGRATION + + cephadm_module.migration_current = LAST_MIGRATION + 1 + ongoing = cephadm_module.migration.is_migration_ongoing() + assert ongoing + assert cephadm_module.migration_current == 0 + + +@pytest.mark.parametrize( + "rgw_spec_store_entry, should_migrate", + [ + ({ + 'spec': { + 'service_type': 'rgw', + 'service_name': 'rgw.foo', + 'service_id': 'foo', + 'placement': { + 'hosts': ['host1'] + }, + 'spec': { + 'rgw_frontend_type': 'beast tcp_nodelay=1 request_timeout_ms=65000 rgw_thread_pool_size=512', + 'rgw_frontend_port': '5000', + }, + }, + 'created': datetime_to_str(datetime_now()), + }, True), + ({ + 'spec': { + 'service_type': 'rgw', + 'service_name': 'rgw.foo', + 'service_id': 'foo', + 'placement': { + 'hosts': ['host1'] + }, + }, + 'created': datetime_to_str(datetime_now()), + }, False), + ] +) +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_rgw_spec(cephadm_module: CephadmOrchestrator, rgw_spec_store_entry, should_migrate): + with with_host(cephadm_module, 'host1'): + cephadm_module.set_store( + SPEC_STORE_PREFIX + 'rgw', + json.dumps(rgw_spec_store_entry, sort_keys=True), + ) + + # make sure rgw_migration_queue is populated accordingly + cephadm_module.migration_current = 1 + cephadm_module.spec_store.load() + ls = json.loads(cephadm_module.get_store('rgw_migration_queue')) + assert 'rgw' == ls[0]['spec']['service_type'] + + # shortcut rgw_migration_queue loading by directly assigning + # ls output to rgw_migration_queue list + cephadm_module.migration.rgw_migration_queue = ls + + # skip other migrations and go directly to 5_6 migration (RGW spec) + cephadm_module.migration_current = 5 + cephadm_module.migration.migrate() + assert cephadm_module.migration_current == LAST_MIGRATION + + if should_migrate: + # make sure the spec has been migrated and the the param=value entries + # that were part of the rgw_frontend_type are now in the new + # 'rgw_frontend_extra_args' list + assert 'rgw.foo' in cephadm_module.spec_store.all_specs + rgw_spec = cephadm_module.spec_store.all_specs['rgw.foo'] + assert dict(rgw_spec.to_json()) == {'service_type': 'rgw', + 'service_id': 'foo', + 'service_name': 'rgw.foo', + 'placement': {'hosts': ['host1']}, + 'spec': { + 'rgw_frontend_extra_args': ['tcp_nodelay=1', + 'request_timeout_ms=65000', + 'rgw_thread_pool_size=512'], + 'rgw_frontend_port': '5000', + 'rgw_frontend_type': 'beast', + }} + else: + # in a real environment, we still expect the spec to be there, + # just untouched by the migration. For this test specifically + # though, the spec will only have ended up in the spec store + # if it was migrated, so we can use this to test the spec + # was untouched + assert 'rgw.foo' not in cephadm_module.spec_store.all_specs diff --git a/src/pybind/mgr/cephadm/tests/test_osd_removal.py b/src/pybind/mgr/cephadm/tests/test_osd_removal.py new file mode 100644 index 000000000..6685fcb2a --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_osd_removal.py @@ -0,0 +1,298 @@ +import json + +from cephadm.services.osd import OSDRemovalQueue, OSD +import pytest +from tests import mock +from .fixtures import with_cephadm_module +from datetime import datetime + + +class MockOSD: + + def __init__(self, osd_id): + self.osd_id = osd_id + + +class TestOSDRemoval: + + @pytest.mark.parametrize( + "osd_id, osd_df, expected", + [ + # missing 'nodes' key + (1, dict(nodes=[]), -1), + # missing 'pgs' key + (1, dict(nodes=[dict(id=1)]), -1), + # id != osd_id + (1, dict(nodes=[dict(id=999, pgs=1)]), -1), + # valid + (1, dict(nodes=[dict(id=1, pgs=1)]), 1), + ] + ) + def test_get_pg_count(self, rm_util, osd_id, osd_df, expected): + with mock.patch("cephadm.services.osd.RemoveUtil.osd_df", return_value=osd_df): + assert rm_util.get_pg_count(osd_id) == expected + + @pytest.mark.parametrize( + "osds, ok_to_stop, expected", + [ + # no osd_ids provided + ([], [False], []), + # all osds are ok_to_stop + ([1, 2], [True], [1, 2]), + # osds are ok_to_stop after the second iteration + ([1, 2], [False, True], [2]), + # osds are never ok_to_stop, (taking the sample size `(len(osd_ids))` into account), + # expected to get False + ([1, 2], [False, False], []), + ] + ) + def test_find_stop_threshold(self, rm_util, osds, ok_to_stop, expected): + with mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop", side_effect=ok_to_stop): + assert rm_util.find_osd_stop_threshold(osds) == expected + + def test_process_removal_queue(self, rm_util): + # TODO: ! + # rm_util.process_removal_queue() + pass + + @pytest.mark.parametrize( + "max_osd_draining_count, draining_osds, idling_osds, ok_to_stop, expected", + [ + # drain one at a time, one already draining + (1, [1], [1], [True], 0), + # drain one at a time, none draining yet + (1, [], [1, 2, 3], [True, True, True], 1), + # drain one at a time, one already draining, none ok-to-stop + (1, [1], [1], [False], 0), + # drain one at a time, none draining, one ok-to-stop + (1, [], [1, 2, 3], [False, False, True], 1), + # drain three at a time, one already draining, all ok-to-stop + (3, [1], [1, 2, 3], [True, True, True], 2), + # drain two at a time, none already draining, none ok-to-stop + (2, [], [1, 2, 3], [False, False, False], 0), + # drain two at a time, none already draining, none idling + (2, [], [], [], 0), + ] + ) + def test_ready_to_drain_osds(self, max_osd_draining_count, draining_osds, idling_osds, ok_to_stop, expected): + with with_cephadm_module({'max_osd_draining_count': max_osd_draining_count}) as m: + with mock.patch("cephadm.services.osd.OSDRemovalQueue.draining_osds", return_value=draining_osds): + with mock.patch("cephadm.services.osd.OSDRemovalQueue.idling_osds", return_value=idling_osds): + with mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop", side_effect=ok_to_stop): + removal_queue = OSDRemovalQueue(m) + assert len(removal_queue._ready_to_drain_osds()) == expected + + def test_ok_to_stop(self, rm_util): + rm_util.ok_to_stop([MockOSD(1)]) + rm_util._run_mon_cmd.assert_called_with({'prefix': 'osd ok-to-stop', 'ids': ['1']}, + error_ok=True) + + def test_safe_to_destroy(self, rm_util): + rm_util.safe_to_destroy([1]) + rm_util._run_mon_cmd.assert_called_with({'prefix': 'osd safe-to-destroy', + 'ids': ['1']}, error_ok=True) + + def test_destroy_osd(self, rm_util): + rm_util.destroy_osd(1) + rm_util._run_mon_cmd.assert_called_with( + {'prefix': 'osd destroy-actual', 'id': 1, 'yes_i_really_mean_it': True}) + + def test_purge_osd(self, rm_util): + rm_util.purge_osd(1) + rm_util._run_mon_cmd.assert_called_with( + {'prefix': 'osd purge-actual', 'id': 1, 'yes_i_really_mean_it': True}) + + def test_load(self, cephadm_module, rm_util): + data = json.dumps([ + { + "osd_id": 35, + "started": True, + "draining": True, + "stopped": False, + "replace": False, + "force": False, + "zap": False, + "nodename": "node2", + "drain_started_at": "2020-09-14T11:41:53.960463", + "drain_stopped_at": None, + "drain_done_at": None, + "process_started_at": "2020-09-14T11:41:52.245832" + } + ]) + cephadm_module.set_store('osd_remove_queue', data) + cephadm_module.to_remove_osds.load_from_store() + + expected = OSDRemovalQueue(cephadm_module) + expected.osds.add(OSD(osd_id=35, remove_util=rm_util, draining=True)) + assert cephadm_module.to_remove_osds == expected + + +class TestOSD: + + def test_start(self, osd_obj): + assert osd_obj.started is False + osd_obj.start() + assert osd_obj.started is True + assert osd_obj.stopped is False + + def test_start_draining_purge(self, osd_obj): + assert osd_obj.draining is False + assert osd_obj.drain_started_at is None + ret = osd_obj.start_draining() + osd_obj.rm_util.reweight_osd.assert_called_with(osd_obj, 0.0) + assert isinstance(osd_obj.drain_started_at, datetime) + assert osd_obj.draining is True + assert osd_obj.replace is False + assert ret is True + + def test_start_draining_replace(self, osd_obj): + assert osd_obj.draining is False + assert osd_obj.drain_started_at is None + osd_obj.replace = True + ret = osd_obj.start_draining() + osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'out') + assert isinstance(osd_obj.drain_started_at, datetime) + assert osd_obj.draining is True + assert osd_obj.replace is True + assert ret is True + + def test_start_draining_stopped(self, osd_obj): + osd_obj.stopped = True + ret = osd_obj.start_draining() + assert osd_obj.drain_started_at is None + assert ret is False + assert osd_obj.draining is False + + def test_stop_draining_replace(self, osd_obj): + osd_obj.replace = True + ret = osd_obj.stop_draining() + osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'in') + assert isinstance(osd_obj.drain_stopped_at, datetime) + assert osd_obj.draining is False + assert ret is True + + def test_stop_draining_purge(self, osd_obj): + osd_obj.original_weight = 1.0 + ret = osd_obj.stop_draining() + osd_obj.rm_util.reweight_osd.assert_called_with(osd_obj, 1.0) + assert isinstance(osd_obj.drain_stopped_at, datetime) + assert osd_obj.draining is False + assert ret is True + + @mock.patch('cephadm.services.osd.OSD.stop_draining') + def test_stop(self, stop_draining_mock, osd_obj): + osd_obj.stop() + assert osd_obj.started is False + assert osd_obj.stopped is True + stop_draining_mock.assert_called_once() + + @pytest.mark.parametrize( + "draining, empty, expected", + [ + # must be !draining! and !not empty! to yield True + (True, not True, True), + # not draining and not empty + (False, not True, False), + # not draining and empty + (False, True, False), + # draining and empty + (True, True, False), + ] + ) + def test_is_draining(self, osd_obj, draining, empty, expected): + with mock.patch("cephadm.services.osd.OSD.is_empty", new_callable=mock.PropertyMock(return_value=empty)): + osd_obj.draining = draining + assert osd_obj.is_draining is expected + + @mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop") + def test_is_ok_to_stop(self, _, osd_obj): + osd_obj.is_ok_to_stop + osd_obj.rm_util.ok_to_stop.assert_called_once() + + @pytest.mark.parametrize( + "pg_count, expected", + [ + (0, True), + (1, False), + (9999, False), + (-1, False), + ] + ) + def test_is_empty(self, osd_obj, pg_count, expected): + with mock.patch("cephadm.services.osd.OSD.get_pg_count", return_value=pg_count): + assert osd_obj.is_empty is expected + + @mock.patch("cephadm.services.osd.RemoveUtil.safe_to_destroy") + def test_safe_to_destroy(self, _, osd_obj): + osd_obj.safe_to_destroy() + osd_obj.rm_util.safe_to_destroy.assert_called_once() + + @mock.patch("cephadm.services.osd.RemoveUtil.set_osd_flag") + def test_down(self, _, osd_obj): + osd_obj.down() + osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'down') + + @mock.patch("cephadm.services.osd.RemoveUtil.destroy_osd") + def test_destroy_osd(self, _, osd_obj): + osd_obj.destroy() + osd_obj.rm_util.destroy_osd.assert_called_once() + + @mock.patch("cephadm.services.osd.RemoveUtil.purge_osd") + def test_purge(self, _, osd_obj): + osd_obj.purge() + osd_obj.rm_util.purge_osd.assert_called_once() + + @mock.patch("cephadm.services.osd.RemoveUtil.get_pg_count") + def test_pg_count(self, _, osd_obj): + osd_obj.get_pg_count() + osd_obj.rm_util.get_pg_count.assert_called_once() + + def test_drain_status_human_not_started(self, osd_obj): + assert osd_obj.drain_status_human() == 'not started' + + def test_drain_status_human_started(self, osd_obj): + osd_obj.started = True + assert osd_obj.drain_status_human() == 'started' + + def test_drain_status_human_draining(self, osd_obj): + osd_obj.started = True + osd_obj.draining = True + assert osd_obj.drain_status_human() == 'draining' + + def test_drain_status_human_done(self, osd_obj): + osd_obj.started = True + osd_obj.draining = False + osd_obj.drain_done_at = datetime.utcnow() + assert osd_obj.drain_status_human() == 'done, waiting for purge' + + +class TestOSDRemovalQueue: + + def test_queue_size(self, osd_obj): + q = OSDRemovalQueue(mock.Mock()) + assert q.queue_size() == 0 + q.osds.add(osd_obj) + assert q.queue_size() == 1 + + @mock.patch("cephadm.services.osd.OSD.start") + @mock.patch("cephadm.services.osd.OSD.exists") + def test_enqueue(self, exist, start, osd_obj): + q = OSDRemovalQueue(mock.Mock()) + q.enqueue(osd_obj) + osd_obj.start.assert_called_once() + + @mock.patch("cephadm.services.osd.OSD.stop") + @mock.patch("cephadm.services.osd.OSD.exists") + def test_rm_raise(self, exist, stop, osd_obj): + q = OSDRemovalQueue(mock.Mock()) + with pytest.raises(KeyError): + q.rm(osd_obj) + osd_obj.stop.assert_called_once() + + @mock.patch("cephadm.services.osd.OSD.stop") + @mock.patch("cephadm.services.osd.OSD.exists") + def test_rm(self, exist, stop, osd_obj): + q = OSDRemovalQueue(mock.Mock()) + q.osds.add(osd_obj) + q.rm(osd_obj) + osd_obj.stop.assert_called_once() diff --git a/src/pybind/mgr/cephadm/tests/test_scheduling.py b/src/pybind/mgr/cephadm/tests/test_scheduling.py new file mode 100644 index 000000000..067cd5028 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_scheduling.py @@ -0,0 +1,1699 @@ +# Disable autopep8 for this file: + +# fmt: off + +from typing import NamedTuple, List, Dict, Optional +import pytest + +from ceph.deployment.hostspec import HostSpec +from ceph.deployment.service_spec import ServiceSpec, PlacementSpec, IngressSpec +from ceph.deployment.hostspec import SpecValidationError + +from cephadm.module import HostAssignment +from cephadm.schedule import DaemonPlacement +from orchestrator import DaemonDescription, OrchestratorValidationError, OrchestratorError + + +def wrapper(func): + # some odd thingy to revert the order or arguments + def inner(*args): + def inner2(expected): + func(expected, *args) + return inner2 + return inner + + +@wrapper +def none(expected): + assert expected == [] + + +@wrapper +def one_of(expected, *hosts): + if not isinstance(expected, list): + assert False, str(expected) + assert len(expected) == 1, f'one_of failed len({expected}) != 1' + assert expected[0] in hosts + + +@wrapper +def two_of(expected, *hosts): + if not isinstance(expected, list): + assert False, str(expected) + assert len(expected) == 2, f'one_of failed len({expected}) != 2' + matches = 0 + for h in hosts: + matches += int(h in expected) + if matches != 2: + assert False, f'two of {hosts} not in {expected}' + + +@wrapper +def exactly(expected, *hosts): + assert expected == list(hosts) + + +@wrapper +def error(expected, kind, match): + assert isinstance(expected, kind), (str(expected), match) + assert str(expected) == match, (str(expected), match) + + +@wrapper +def _or(expected, *inners): + def catch(inner): + try: + inner(expected) + except AssertionError as e: + return e + result = [catch(i) for i in inners] + if None not in result: + assert False, f"_or failed: {expected}" + + +def _always_true(_): + pass + + +def k(s): + return [e for e in s.split(' ') if e] + + +def get_result(key, results): + def match(one): + for o, k in zip(one, key): + if o != k and o != '*': + return False + return True + return [v for k, v in results if match(k)][0] + + +def mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count): + + if spec_section == 'hosts': + mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec( # noqa: E731 + hosts=explicit, + count=count, + )) + elif spec_section == 'label': + mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec( # noqa: E731 + label='mylabel', + count=count, + )) + elif spec_section == 'host_pattern': + pattern = { + 'e': 'notfound', + '1': '1', + '12': '[1-2]', + '123': '*', + }[explicit_key] + mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec( # noqa: E731 + host_pattern=pattern, + count=count, + )) + else: + assert False + + hosts = [ + HostSpec(h, labels=['mylabel']) if h in explicit else HostSpec(h) + for h in hosts + ] + + return mk_spec, hosts + + +def run_scheduler_test(results, mk_spec, hosts, daemons, key_elems): + key = ' '.join('N' if e is None else str(e) for e in key_elems) + try: + assert_res = get_result(k(key), results) + except IndexError: + try: + spec = mk_spec() + host_res, to_add, to_remove = HostAssignment( + spec=spec, + hosts=hosts, + unreachable_hosts=[], + draining_hosts=[], + daemons=daemons, + ).place() + if isinstance(host_res, list): + e = ', '.join(repr(h.hostname) for h in host_res) + assert False, f'`(k("{key}"), exactly({e})),` not found' + assert False, f'`(k("{key}"), ...),` not found' + except OrchestratorError as e: + assert False, f'`(k("{key}"), error({type(e).__name__}, {repr(str(e))})),` not found' + + for _ in range(10): # scheduler has a random component + try: + spec = mk_spec() + host_res, to_add, to_remove = HostAssignment( + spec=spec, + hosts=hosts, + unreachable_hosts=[], + draining_hosts=[], + daemons=daemons + ).place() + + assert_res(sorted([h.hostname for h in host_res])) + except Exception as e: + assert_res(e) + + +@pytest.mark.parametrize("dp,n,result", + [ # noqa: E128 + ( + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]), + 0, + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]), + ), + ( + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]), + 2, + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[82]), + ), + ( + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80, 90]), + 2, + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[82, 92]), + ), + ]) +def test_daemon_placement_renumber(dp, n, result): + assert dp.renumber_ports(n) == result + + +@pytest.mark.parametrize( + 'dp,dd,result', + [ + ( + DaemonPlacement(daemon_type='mgr', hostname='host1'), + DaemonDescription('mgr', 'a', 'host1'), + True + ), + ( + DaemonPlacement(daemon_type='mgr', hostname='host1', name='a'), + DaemonDescription('mgr', 'a', 'host1'), + True + ), + ( + DaemonPlacement(daemon_type='mon', hostname='host1', name='a'), + DaemonDescription('mgr', 'a', 'host1'), + False + ), + ( + DaemonPlacement(daemon_type='mgr', hostname='host1', name='a'), + DaemonDescription('mgr', 'b', 'host1'), + False + ), + ]) +def test_daemon_placement_match(dp, dd, result): + assert dp.matches_daemon(dd) == result + + +# * first match from the top wins +# * where e=[], *=any +# +# + list of known hosts available for scheduling (host_key) +# | + hosts used for explict placement (explicit_key) +# | | + count +# | | | + section (host, label, pattern) +# | | | | + expected result +# | | | | | +test_explicit_scheduler_results = [ + (k("* * 0 *"), error(SpecValidationError, 'num/count must be >= 1')), + (k("* e N l"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr>: No matching hosts for label mylabel')), + (k("* e N p"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr>: No matching hosts')), + (k("* e N h"), error(OrchestratorValidationError, 'placement spec is empty: no hosts, no label, no pattern, no count')), + (k("* e * *"), none), + (k("1 12 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 2: Unknown hosts")), + (k("1 123 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 2, 3: Unknown hosts")), + (k("1 * * *"), exactly('1')), + (k("12 1 * *"), exactly('1')), + (k("12 12 1 *"), one_of('1', '2')), + (k("12 12 * *"), exactly('1', '2')), + (k("12 123 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 3: Unknown hosts")), + (k("12 123 1 *"), one_of('1', '2', '3')), + (k("12 123 * *"), two_of('1', '2', '3')), + (k("123 1 * *"), exactly('1')), + (k("123 12 1 *"), one_of('1', '2')), + (k("123 12 * *"), exactly('1', '2')), + (k("123 123 1 *"), one_of('1', '2', '3')), + (k("123 123 2 *"), two_of('1', '2', '3')), + (k("123 123 * *"), exactly('1', '2', '3')), +] + + +@pytest.mark.parametrize("spec_section_key,spec_section", + [ # noqa: E128 + ('h', 'hosts'), + ('l', 'label'), + ('p', 'host_pattern'), + ]) +@pytest.mark.parametrize("count", + [ # noqa: E128 + None, + 0, + 1, + 2, + 3, + ]) +@pytest.mark.parametrize("explicit_key, explicit", + [ # noqa: E128 + ('e', []), + ('1', ['1']), + ('12', ['1', '2']), + ('123', ['1', '2', '3']), + ]) +@pytest.mark.parametrize("host_key, hosts", + [ # noqa: E128 + ('1', ['1']), + ('12', ['1', '2']), + ('123', ['1', '2', '3']), + ]) +def test_explicit_scheduler(host_key, hosts, + explicit_key, explicit, + count, + spec_section_key, spec_section): + + mk_spec, hosts = mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count) + run_scheduler_test( + results=test_explicit_scheduler_results, + mk_spec=mk_spec, + hosts=hosts, + daemons=[], + key_elems=(host_key, explicit_key, count, spec_section_key) + ) + + +# * first match from the top wins +# * where e=[], *=any +# +# + list of known hosts available for scheduling (host_key) +# | + hosts used for explicit placement (explicit_key) +# | | + count +# | | | + existing daemons +# | | | | + section (host, label, pattern) +# | | | | | + expected result +# | | | | | | +test_scheduler_daemons_results = [ + (k("* 1 * * *"), exactly('1')), + (k("1 123 * * h"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr> on 2, 3: Unknown hosts')), + (k("1 123 * * *"), exactly('1')), + (k("12 123 * * h"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr> on 3: Unknown hosts')), + (k("12 123 N * *"), exactly('1', '2')), + (k("12 123 1 * *"), one_of('1', '2')), + (k("12 123 2 * *"), exactly('1', '2')), + (k("12 123 3 * *"), exactly('1', '2')), + (k("123 123 N * *"), exactly('1', '2', '3')), + (k("123 123 1 e *"), one_of('1', '2', '3')), + (k("123 123 1 1 *"), exactly('1')), + (k("123 123 1 3 *"), exactly('3')), + (k("123 123 1 12 *"), one_of('1', '2')), + (k("123 123 1 112 *"), one_of('1', '2')), + (k("123 123 1 23 *"), one_of('2', '3')), + (k("123 123 1 123 *"), one_of('1', '2', '3')), + (k("123 123 2 e *"), two_of('1', '2', '3')), + (k("123 123 2 1 *"), _or(exactly('1', '2'), exactly('1', '3'))), + (k("123 123 2 3 *"), _or(exactly('1', '3'), exactly('2', '3'))), + (k("123 123 2 12 *"), exactly('1', '2')), + (k("123 123 2 112 *"), exactly('1', '2')), + (k("123 123 2 23 *"), exactly('2', '3')), + (k("123 123 2 123 *"), two_of('1', '2', '3')), + (k("123 123 3 * *"), exactly('1', '2', '3')), +] + + +@pytest.mark.parametrize("spec_section_key,spec_section", + [ # noqa: E128 + ('h', 'hosts'), + ('l', 'label'), + ('p', 'host_pattern'), + ]) +@pytest.mark.parametrize("daemons_key, daemons", + [ # noqa: E128 + ('e', []), + ('1', ['1']), + ('3', ['3']), + ('12', ['1', '2']), + ('112', ['1', '1', '2']), # deal with existing co-located daemons + ('23', ['2', '3']), + ('123', ['1', '2', '3']), + ]) +@pytest.mark.parametrize("count", + [ # noqa: E128 + None, + 1, + 2, + 3, + ]) +@pytest.mark.parametrize("explicit_key, explicit", + [ # noqa: E128 + ('1', ['1']), + ('123', ['1', '2', '3']), + ]) +@pytest.mark.parametrize("host_key, hosts", + [ # noqa: E128 + ('1', ['1']), + ('12', ['1', '2']), + ('123', ['1', '2', '3']), + ]) +def test_scheduler_daemons(host_key, hosts, + explicit_key, explicit, + count, + daemons_key, daemons, + spec_section_key, spec_section): + mk_spec, hosts = mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count) + dds = [ + DaemonDescription('mgr', d, d) + for d in daemons + ] + run_scheduler_test( + results=test_scheduler_daemons_results, + mk_spec=mk_spec, + hosts=hosts, + daemons=dds, + key_elems=(host_key, explicit_key, count, daemons_key, spec_section_key) + ) + + +# ========================= + + +class NodeAssignmentTest(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + daemons: List[DaemonDescription] + rank_map: Optional[Dict[int, Dict[int, Optional[str]]]] + post_rank_map: Optional[Dict[int, Dict[int, Optional[str]]]] + expected: List[str] + expected_add: List[str] + expected_remove: List[DaemonDescription] + + +@pytest.mark.parametrize("service_type,placement,hosts,daemons,rank_map,post_rank_map,expected,expected_add,expected_remove", + [ # noqa: E128 + # just hosts + NodeAssignmentTest( + 'mgr', + PlacementSpec(hosts=['smithi060']), + ['smithi060'], + [], + None, None, + ['mgr:smithi060'], ['mgr:smithi060'], [] + ), + # all_hosts + NodeAssignmentTest( + 'mgr', + PlacementSpec(host_pattern='*'), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + ], + None, None, + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + ['mgr:host3'], + [] + ), + # all_hosts + count_per_host + NodeAssignmentTest( + 'mds', + PlacementSpec(host_pattern='*', count_per_host=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mds', 'a', 'host1'), + DaemonDescription('mds', 'b', 'host2'), + ], + None, None, + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + ['mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + [] + ), + # count that is bigger than the amount of hosts. Truncate to len(hosts) + # mgr should not be co-located to each other. + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=4), + 'host1 host2 host3'.split(), + [], + None, None, + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + [] + ), + # count that is bigger than the amount of hosts; wrap around. + NodeAssignmentTest( + 'mds', + PlacementSpec(count=6), + 'host1 host2 host3'.split(), + [], + None, None, + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + [] + ), + # count + partial host list + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=3, hosts=['host3']), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + ], + None, None, + ['mgr:host3'], + ['mgr:host3'], + ['mgr.a', 'mgr.b'] + ), + # count + partial host list (with colo) + NodeAssignmentTest( + 'mds', + PlacementSpec(count=3, hosts=['host3']), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mds', 'a', 'host1'), + DaemonDescription('mds', 'b', 'host2'), + ], + None, None, + ['mds:host3', 'mds:host3', 'mds:host3'], + ['mds:host3', 'mds:host3', 'mds:host3'], + ['mds.a', 'mds.b'] + ), + # count 1 + partial host list + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=1, hosts=['host3']), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + ], + None, None, + ['mgr:host3'], + ['mgr:host3'], + ['mgr.a', 'mgr.b'] + ), + # count + partial host list + existing + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=2, hosts=['host3']), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + ], + None, None, + ['mgr:host3'], + ['mgr:host3'], + ['mgr.a'] + ), + # count + partial host list + existing (deterministic) + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=2, hosts=['host1']), + 'host1 host2'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + ], + None, None, + ['mgr:host1'], + [], + [] + ), + # count + partial host list + existing (deterministic) + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=2, hosts=['host1']), + 'host1 host2'.split(), + [ + DaemonDescription('mgr', 'a', 'host2'), + ], + None, None, + ['mgr:host1'], + ['mgr:host1'], + ['mgr.a'] + ), + # label only + NodeAssignmentTest( + 'mgr', + PlacementSpec(label='foo'), + 'host1 host2 host3'.split(), + [], + None, None, + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + [] + ), + # label + count (truncate to host list) + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=4, label='foo'), + 'host1 host2 host3'.split(), + [], + None, None, + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + [] + ), + # label + count (with colo) + NodeAssignmentTest( + 'mds', + PlacementSpec(count=6, label='foo'), + 'host1 host2 host3'.split(), + [], + None, None, + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + [] + ), + # label only + count_per_hst + NodeAssignmentTest( + 'mds', + PlacementSpec(label='foo', count_per_host=3), + 'host1 host2 host3'.split(), + [], + None, None, + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3', + 'mds:host1', 'mds:host2', 'mds:host3'], + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3', + 'mds:host1', 'mds:host2', 'mds:host3'], + [] + ), + # host_pattern + NodeAssignmentTest( + 'mgr', + PlacementSpec(host_pattern='mgr*'), + 'mgrhost1 mgrhost2 datahost'.split(), + [], + None, None, + ['mgr:mgrhost1', 'mgr:mgrhost2'], + ['mgr:mgrhost1', 'mgr:mgrhost2'], + [] + ), + # host_pattern + count_per_host + NodeAssignmentTest( + 'mds', + PlacementSpec(host_pattern='mds*', count_per_host=3), + 'mdshost1 mdshost2 datahost'.split(), + [], + None, None, + ['mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2'], + ['mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2'], + [] + ), + # label + count_per_host + ports + NodeAssignmentTest( + 'rgw', + PlacementSpec(count=6, label='foo'), + 'host1 host2 host3'.split(), + [], + None, None, + ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)', + 'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'], + ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)', + 'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'], + [] + ), + # label + count_per_host + ports (+ existing) + NodeAssignmentTest( + 'rgw', + PlacementSpec(count=6, label='foo'), + 'host1 host2 host3'.split(), + [ + DaemonDescription('rgw', 'a', 'host1', ports=[81]), + DaemonDescription('rgw', 'b', 'host2', ports=[80]), + DaemonDescription('rgw', 'c', 'host1', ports=[82]), + ], + None, None, + ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)', + 'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'], + ['rgw:host1(*:80)', 'rgw:host3(*:80)', + 'rgw:host2(*:81)', 'rgw:host3(*:81)'], + ['rgw.c'] + ), + # cephadm.py teuth case + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=3, hosts=['host1=y', 'host2=x']), + 'host1 host2'.split(), + [ + DaemonDescription('mgr', 'y', 'host1'), + DaemonDescription('mgr', 'x', 'host2'), + ], + None, None, + ['mgr:host1(name=y)', 'mgr:host2(name=x)'], + [], [] + ), + + # note: host -> rank mapping is psuedo-random based on svc name, so these + # host/rank pairs may seem random but they match the nfs.mynfs seed used by + # the test. + + # ranked, fresh + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [], + {}, + {0: {0: None}, 1: {0: None}, 2: {0: None}}, + ['nfs:host3(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host1(rank=2.0)'], + ['nfs:host3(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host1(rank=2.0)'], + [] + ), + # 21: ranked, exist + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1), + ], + {0: {1: '0.1'}}, + {0: {1: '0.1'}, 1: {0: None}, 2: {0: None}}, + ['nfs:host1(rank=0.1)', 'nfs:host3(rank=1.0)', 'nfs:host2(rank=2.0)'], + ['nfs:host3(rank=1.0)', 'nfs:host2(rank=2.0)'], + [] + ), + # ranked, exist, different ranks + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1), + DaemonDescription('nfs', '1.1', 'host2', rank=1, rank_generation=1), + ], + {0: {1: '0.1'}, 1: {1: '1.1'}}, + {0: {1: '0.1'}, 1: {1: '1.1'}, 2: {0: None}}, + ['nfs:host1(rank=0.1)', 'nfs:host2(rank=1.1)', 'nfs:host3(rank=2.0)'], + ['nfs:host3(rank=2.0)'], + [] + ), + # ranked, exist, different ranks (2) + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1), + DaemonDescription('nfs', '1.1', 'host3', rank=1, rank_generation=1), + ], + {0: {1: '0.1'}, 1: {1: '1.1'}}, + {0: {1: '0.1'}, 1: {1: '1.1'}, 2: {0: None}}, + ['nfs:host1(rank=0.1)', 'nfs:host3(rank=1.1)', 'nfs:host2(rank=2.0)'], + ['nfs:host2(rank=2.0)'], + [] + ), + # ranked, exist, extra ranks + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5), + DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5), + DaemonDescription('nfs', '4.5', 'host2', rank=4, rank_generation=5), + ], + {0: {5: '0.5'}, 1: {5: '1.5'}}, + {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {0: None}}, + ['nfs:host1(rank=0.5)', 'nfs:host2(rank=1.5)', 'nfs:host3(rank=2.0)'], + ['nfs:host3(rank=2.0)'], + ['nfs.4.5'] + ), + # 25: ranked, exist, extra ranks (scale down: kill off high rank) + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=2), + 'host3 host2 host1'.split(), + [ + DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5), + DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5), + DaemonDescription('nfs', '2.5', 'host3', rank=2, rank_generation=5), + ], + {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}}, + {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}}, + ['nfs:host1(rank=0.5)', 'nfs:host2(rank=1.5)'], + [], + ['nfs.2.5'] + ), + # ranked, exist, extra ranks (scale down hosts) + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=2), + 'host1 host3'.split(), + [ + DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5), + DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5), + DaemonDescription('nfs', '2.5', 'host3', rank=4, rank_generation=5), + ], + {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}}, + {0: {5: '0.5'}, 1: {5: '1.5', 6: None}, 2: {5: '2.5'}}, + ['nfs:host1(rank=0.5)', 'nfs:host3(rank=1.6)'], + ['nfs:host3(rank=1.6)'], + ['nfs.2.5', 'nfs.1.5'] + ), + # ranked, exist, duplicate rank + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.0', 'host1', rank=0, rank_generation=0), + DaemonDescription('nfs', '1.1', 'host2', rank=1, rank_generation=1), + DaemonDescription('nfs', '1.2', 'host3', rank=1, rank_generation=2), + ], + {0: {0: '0.0'}, 1: {2: '1.2'}}, + {0: {0: '0.0'}, 1: {2: '1.2'}, 2: {0: None}}, + ['nfs:host1(rank=0.0)', 'nfs:host3(rank=1.2)', 'nfs:host2(rank=2.0)'], + ['nfs:host2(rank=2.0)'], + ['nfs.1.1'] + ), + # 28: ranked, all gens stale (failure during update cycle) + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2), + DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2), + ], + {0: {2: '0.2'}, 1: {2: '1.2', 3: '1.3'}}, + {0: {2: '0.2'}, 1: {2: '1.2', 3: '1.3', 4: None}}, + ['nfs:host1(rank=0.2)', 'nfs:host3(rank=1.4)'], + ['nfs:host3(rank=1.4)'], + ['nfs.1.2'] + ), + # ranked, not enough hosts + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=4), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2), + DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2), + ], + {0: {2: '0.2'}, 1: {2: '1.2'}}, + {0: {2: '0.2'}, 1: {2: '1.2'}, 2: {0: None}}, + ['nfs:host1(rank=0.2)', 'nfs:host2(rank=1.2)', 'nfs:host3(rank=2.0)'], + ['nfs:host3(rank=2.0)'], + [] + ), + # ranked, scale down + NodeAssignmentTest( + 'nfs', + PlacementSpec(hosts=['host2']), + 'host1 host2'.split(), + [ + DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2), + DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2), + DaemonDescription('nfs', '2.2', 'host3', rank=2, rank_generation=2), + ], + {0: {2: '0.2'}, 1: {2: '1.2'}, 2: {2: '2.2'}}, + {0: {2: '0.2', 3: None}, 1: {2: '1.2'}, 2: {2: '2.2'}}, + ['nfs:host2(rank=0.3)'], + ['nfs:host2(rank=0.3)'], + ['nfs.0.2', 'nfs.1.2', 'nfs.2.2'] + ), + + ]) +def test_node_assignment(service_type, placement, hosts, daemons, rank_map, post_rank_map, + expected, expected_add, expected_remove): + spec = None + service_id = None + allow_colo = False + if service_type == 'rgw': + service_id = 'realm.zone' + allow_colo = True + elif service_type == 'mds': + service_id = 'myfs' + allow_colo = True + elif service_type == 'nfs': + service_id = 'mynfs' + spec = ServiceSpec(service_type=service_type, + service_id=service_id, + placement=placement) + + if not spec: + spec = ServiceSpec(service_type=service_type, + service_id=service_id, + placement=placement) + + all_slots, to_add, to_remove = HostAssignment( + spec=spec, + hosts=[HostSpec(h, labels=['foo']) for h in hosts], + unreachable_hosts=[], + draining_hosts=[], + daemons=daemons, + allow_colo=allow_colo, + rank_map=rank_map, + ).place() + + assert rank_map == post_rank_map + + got = [str(p) for p in all_slots] + num_wildcard = 0 + for i in expected: + if i == '*': + num_wildcard += 1 + else: + assert i in got + got.remove(i) + assert num_wildcard == len(got) + + got = [str(p) for p in to_add] + num_wildcard = 0 + for i in expected_add: + if i == '*': + num_wildcard += 1 + else: + assert i in got + got.remove(i) + assert num_wildcard == len(got) + + assert sorted([d.name() for d in to_remove]) == sorted(expected_remove) + + +class NodeAssignmentTest5(NamedTuple): + service_type: str + placement: PlacementSpec + available_hosts: List[str] + candidates_hosts: List[str] + + +@pytest.mark.parametrize("service_type, placement, available_hosts, expected_candidates", + [ # noqa: E128 + NodeAssignmentTest5( + 'alertmanager', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host3 host1 host4 host2'.split(), + ), + NodeAssignmentTest5( + 'prometheus', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host3 host2 host4 host1'.split(), + ), + NodeAssignmentTest5( + 'grafana', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host1 host2 host4 host3'.split(), + ), + NodeAssignmentTest5( + 'mgr', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host4 host2 host1 host3'.split(), + ), + NodeAssignmentTest5( + 'mon', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host1 host3 host4 host2'.split(), + ), + NodeAssignmentTest5( + 'rgw', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host1 host3 host2 host4'.split(), + ), + NodeAssignmentTest5( + 'cephfs-mirror', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host4 host3 host1 host2'.split(), + ), + ]) +def test_node_assignment_random_shuffle(service_type, placement, available_hosts, expected_candidates): + spec = None + service_id = None + allow_colo = False + spec = ServiceSpec(service_type=service_type, + service_id=service_id, + placement=placement) + + candidates = HostAssignment( + spec=spec, + hosts=[HostSpec(h, labels=['foo']) for h in available_hosts], + unreachable_hosts=[], + draining_hosts=[], + daemons=[], + allow_colo=allow_colo, + ).get_candidates() + + candidates_hosts = [h.hostname for h in candidates] + assert candidates_hosts == expected_candidates + + +class NodeAssignmentTest2(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + daemons: List[DaemonDescription] + expected_len: int + in_set: List[str] + + +@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected_len,in_set", + [ # noqa: E128 + # just count + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [], + 1, + ['host1', 'host2', 'host3'], + ), + + # hosts + (smaller) count + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=1, hosts='host1 host2'.split()), + 'host1 host2'.split(), + [], + 1, + ['host1', 'host2'], + ), + # hosts + (smaller) count, existing + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=1, hosts='host1 host2 host3'.split()), + 'host1 host2 host3'.split(), + [DaemonDescription('mgr', 'mgr.a', 'host1')], + 1, + ['host1', 'host2', 'host3'], + ), + # hosts + (smaller) count, (more) existing + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=1, hosts='host1 host2 host3'.split()), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + ], + 1, + ['host1', 'host2'] + ), + # count + partial host list + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=2, hosts=['host3']), + 'host1 host2 host3'.split(), + [], + 1, + ['host1', 'host2', 'host3'] + ), + # label + count + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=1, label='foo'), + 'host1 host2 host3'.split(), + [], + 1, + ['host1', 'host2', 'host3'] + ), + ]) +def test_node_assignment2(service_type, placement, hosts, + daemons, expected_len, in_set): + hosts, to_add, to_remove = HostAssignment( + spec=ServiceSpec(service_type, placement=placement), + hosts=[HostSpec(h, labels=['foo']) for h in hosts], + unreachable_hosts=[], + draining_hosts=[], + daemons=daemons, + ).place() + assert len(hosts) == expected_len + for h in [h.hostname for h in hosts]: + assert h in in_set + + +@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected_len,must_have", + [ # noqa: E128 + # hosts + (smaller) count, (more) existing + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=3, hosts='host3'.split()), + 'host1 host2 host3'.split(), + [], + 1, + ['host3'] + ), + # count + partial host list + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=2, hosts=['host3']), + 'host1 host2 host3'.split(), + [], + 1, + ['host3'] + ), + ]) +def test_node_assignment3(service_type, placement, hosts, + daemons, expected_len, must_have): + hosts, to_add, to_remove = HostAssignment( + spec=ServiceSpec(service_type, placement=placement), + hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[], + draining_hosts=[], + daemons=daemons, + ).place() + assert len(hosts) == expected_len + for h in must_have: + assert h in [h.hostname for h in hosts] + + +class NodeAssignmentTest4(NamedTuple): + spec: ServiceSpec + networks: Dict[str, Dict[str, Dict[str, List[str]]]] + daemons: List[DaemonDescription] + expected: List[str] + expected_add: List[str] + expected_remove: List[DaemonDescription] + + +@pytest.mark.parametrize("spec,networks,daemons,expected,expected_add,expected_remove", + [ # noqa: E128 + NodeAssignmentTest4( + ServiceSpec( + service_type='rgw', + service_id='foo', + placement=PlacementSpec(count=6, label='foo'), + networks=['10.0.0.0/8'], + ), + { + 'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}}, + 'host2': {'10.0.0.0/8': {'eth0': ['10.0.0.2']}}, + 'host3': {'192.168.0.0/16': {'eth0': ['192.168.0.1']}}, + }, + [], + ['rgw:host1(10.0.0.1:80)', 'rgw:host2(10.0.0.2:80)', + 'rgw:host1(10.0.0.1:81)', 'rgw:host2(10.0.0.2:81)', + 'rgw:host1(10.0.0.1:82)', 'rgw:host2(10.0.0.2:82)'], + ['rgw:host1(10.0.0.1:80)', 'rgw:host2(10.0.0.2:80)', + 'rgw:host1(10.0.0.1:81)', 'rgw:host2(10.0.0.2:81)', + 'rgw:host1(10.0.0.1:82)', 'rgw:host2(10.0.0.2:82)'], + [] + ), + NodeAssignmentTest4( + IngressSpec( + service_type='ingress', + service_id='rgw.foo', + frontend_port=443, + monitor_port=8888, + virtual_ip='10.0.0.20/8', + backend_service='rgw.foo', + placement=PlacementSpec(label='foo'), + networks=['10.0.0.0/8'], + ), + { + 'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}}, + 'host2': {'10.0.0.0/8': {'eth1': ['10.0.0.2']}}, + 'host3': {'192.168.0.0/16': {'eth2': ['192.168.0.1']}}, + }, + [], + ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)', + 'keepalived:host1', 'keepalived:host2'], + ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)', + 'keepalived:host1', 'keepalived:host2'], + [] + ), + NodeAssignmentTest4( + IngressSpec( + service_type='ingress', + service_id='rgw.foo', + frontend_port=443, + monitor_port=8888, + virtual_ip='10.0.0.20/8', + backend_service='rgw.foo', + placement=PlacementSpec(label='foo'), + networks=['10.0.0.0/8'], + ), + { + 'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}}, + 'host2': {'10.0.0.0/8': {'eth1': ['10.0.0.2']}}, + 'host3': {'192.168.0.0/16': {'eth2': ['192.168.0.1']}}, + }, + [ + DaemonDescription('haproxy', 'a', 'host1', ip='10.0.0.1', + ports=[443, 8888]), + DaemonDescription('keepalived', 'b', 'host2'), + DaemonDescription('keepalived', 'c', 'host3'), + ], + ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)', + 'keepalived:host1', 'keepalived:host2'], + ['haproxy:host2(10.0.0.2:443,8888)', + 'keepalived:host1'], + ['keepalived.c'] + ), + ]) +def test_node_assignment4(spec, networks, daemons, + expected, expected_add, expected_remove): + all_slots, to_add, to_remove = HostAssignment( + spec=spec, + hosts=[HostSpec(h, labels=['foo']) for h in networks.keys()], + unreachable_hosts=[], + draining_hosts=[], + daemons=daemons, + allow_colo=True, + networks=networks, + primary_daemon_type='haproxy' if spec.service_type == 'ingress' else spec.service_type, + per_host_daemon_type='keepalived' if spec.service_type == 'ingress' else None, + ).place() + + got = [str(p) for p in all_slots] + num_wildcard = 0 + for i in expected: + if i == '*': + num_wildcard += 1 + else: + assert i in got + got.remove(i) + assert num_wildcard == len(got) + + got = [str(p) for p in to_add] + num_wildcard = 0 + for i in expected_add: + if i == '*': + num_wildcard += 1 + else: + assert i in got + got.remove(i) + assert num_wildcard == len(got) + + assert sorted([d.name() for d in to_remove]) == sorted(expected_remove) + + +@pytest.mark.parametrize("placement", + [ # noqa: E128 + ('1 *'), + ('* label:foo'), + ('* host1 host2'), + ('hostname12hostname12hostname12hostname12hostname12hostname12hostname12'), # > 63 chars + ]) +def test_bad_placements(placement): + try: + PlacementSpec.from_string(placement.split(' ')) + assert False + except SpecValidationError: + pass + + +class NodeAssignmentTestBadSpec(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + daemons: List[DaemonDescription] + expected: str + + +@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected", + [ # noqa: E128 + # unknown host + NodeAssignmentTestBadSpec( + 'mgr', + PlacementSpec(hosts=['unknownhost']), + ['knownhost'], + [], + "Cannot place <ServiceSpec for service_name=mgr> on unknownhost: Unknown hosts" + ), + # unknown host pattern + NodeAssignmentTestBadSpec( + 'mgr', + PlacementSpec(host_pattern='unknownhost'), + ['knownhost'], + [], + "Cannot place <ServiceSpec for service_name=mgr>: No matching hosts" + ), + # unknown label + NodeAssignmentTestBadSpec( + 'mgr', + PlacementSpec(label='unknownlabel'), + [], + [], + "Cannot place <ServiceSpec for service_name=mgr>: No matching hosts for label unknownlabel" + ), + ]) +def test_bad_specs(service_type, placement, hosts, daemons, expected): + with pytest.raises(OrchestratorValidationError) as e: + hosts, to_add, to_remove = HostAssignment( + spec=ServiceSpec(service_type, placement=placement), + hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[], + draining_hosts=[], + daemons=daemons, + ).place() + assert str(e.value) == expected + + +class ActiveAssignmentTest(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + daemons: List[DaemonDescription] + expected: List[List[str]] + expected_add: List[List[str]] + expected_remove: List[List[str]] + + +@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected,expected_add,expected_remove", + [ + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3'), + ], + [['host1', 'host2'], ['host1', 'host3']], + [[]], + [['mgr.b'], ['mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1', 'host3'], ['host2', 'host3']], + [[]], + [['mgr.a'], ['mgr.b']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2', is_active=True), + DaemonDescription('mgr', 'c', 'host3'), + ], + [['host2']], + [[]], + [['mgr.a', 'mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host3']], + [[]], + [['mgr.a', 'mgr.b']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1'], ['host3']], + [[]], + [['mgr.a', 'mgr.b'], ['mgr.b', 'mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2', is_active=True), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host2', 'host3']], + [[]], + [['mgr.a']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'b', 'host2', is_active=True), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1'], ['host2'], ['host3']], + [[]], + [['mgr.a', 'mgr.b'], ['mgr.b', 'mgr.c'], ['mgr.a', 'mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'a2', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3'), + ], + [['host1']], + [[]], + [['mgr.a2', 'mgr.b', 'mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'a2', 'host1', is_active=True), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3'), + ], + [['host1']], + [[]], + [['mgr.a', 'mgr.b', 'mgr.c'], ['mgr.a2', 'mgr.b', 'mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'a2', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1', 'host3']], + [[]], + [['mgr.a2', 'mgr.b']] + ), + # Explicit placement should override preference for active daemon + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1, hosts=['host1']), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1']], + [[]], + [['mgr.b', 'mgr.c']] + ), + + ]) +def test_active_assignment(service_type, placement, hosts, daemons, expected, expected_add, expected_remove): + + spec = ServiceSpec(service_type=service_type, + service_id=None, + placement=placement) + + hosts, to_add, to_remove = HostAssignment( + spec=spec, + hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[], + draining_hosts=[], + daemons=daemons, + ).place() + assert sorted([h.hostname for h in hosts]) in expected + assert sorted([h.hostname for h in to_add]) in expected_add + assert sorted([h.name() for h in to_remove]) in expected_remove + + +class UnreachableHostsTest(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + unreachables_hosts: List[str] + daemons: List[DaemonDescription] + expected_add: List[List[str]] + expected_remove: List[List[str]] + + +@pytest.mark.parametrize("service_type,placement,hosts,unreachable_hosts,daemons,expected_add,expected_remove", + [ + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + ['host2'], + [], + [['host1', 'host3']], + [[]], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(hosts=['host3']), + 'host1 host2 host3'.split(), + ['host1'], + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [[]], + [['mgr.b']], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3 host4'.split(), + ['host1'], + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [[]], + [[]], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3 host4'.split(), + 'host1 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [[]], + [['mgr.b']], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3 host4'.split(), + ['host2'], + [], + [['host1', 'host3', 'host4']], + [[]], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3 host4'.split(), + 'host1 host4'.split(), + [], + [['host2', 'host3']], + [[]], + ), + + ]) +def test_unreachable_host(service_type, placement, hosts, unreachable_hosts, daemons, expected_add, expected_remove): + + spec = ServiceSpec(service_type=service_type, + service_id=None, + placement=placement) + + hosts, to_add, to_remove = HostAssignment( + spec=spec, + hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[HostSpec(h) for h in unreachable_hosts], + draining_hosts=[], + daemons=daemons, + ).place() + assert sorted([h.hostname for h in to_add]) in expected_add + assert sorted([h.name() for h in to_remove]) in expected_remove + + +class RescheduleFromOfflineTest(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + maintenance_hosts: List[str] + offline_hosts: List[str] + daemons: List[DaemonDescription] + expected_add: List[List[str]] + expected_remove: List[List[str]] + + +@pytest.mark.parametrize("service_type,placement,hosts,maintenance_hosts,offline_hosts,daemons,expected_add,expected_remove", + [ + RescheduleFromOfflineTest( + 'nfs', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [], + ['host2'], + [ + DaemonDescription('nfs', 'a', 'host1'), + DaemonDescription('nfs', 'b', 'host2'), + ], + [['host3']], + [[]], + ), + RescheduleFromOfflineTest( + 'nfs', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + ['host2'], + [], + [ + DaemonDescription('nfs', 'a', 'host1'), + DaemonDescription('nfs', 'b', 'host2'), + ], + [[]], + [[]], + ), + RescheduleFromOfflineTest( + 'mon', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [], + ['host2'], + [ + DaemonDescription('mon', 'a', 'host1'), + DaemonDescription('mon', 'b', 'host2'), + ], + [[]], + [[]], + ), + RescheduleFromOfflineTest( + 'ingress', + PlacementSpec(count=1), + 'host1 host2'.split(), + [], + ['host2'], + [ + DaemonDescription('haproxy', 'b', 'host2'), + DaemonDescription('keepalived', 'b', 'host2'), + ], + [['host1']], + [[]], + ), + ]) +def test_remove_from_offline(service_type, placement, hosts, maintenance_hosts, offline_hosts, daemons, expected_add, expected_remove): + + if service_type == 'ingress': + spec = \ + IngressSpec( + service_type='ingress', + service_id='nfs-ha.foo', + frontend_port=443, + monitor_port=8888, + virtual_ip='10.0.0.20/8', + backend_service='nfs-ha.foo', + placement=placement, + ) + else: + spec = \ + ServiceSpec( + service_type=service_type, + service_id='test', + placement=placement, + ) + + host_specs = [HostSpec(h) for h in hosts] + for h in host_specs: + if h.hostname in offline_hosts: + h.status = 'offline' + if h.hostname in maintenance_hosts: + h.status = 'maintenance' + + hosts, to_add, to_remove = HostAssignment( + spec=spec, + hosts=host_specs, + unreachable_hosts=[h for h in host_specs if h.status], + draining_hosts=[], + daemons=daemons, + ).place() + assert sorted([h.hostname for h in to_add]) in expected_add + assert sorted([h.name() for h in to_remove]) in expected_remove + + +class DrainExplicitPlacementTest(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + maintenance_hosts: List[str] + offline_hosts: List[str] + draining_hosts: List[str] + daemons: List[DaemonDescription] + expected_add: List[List[str]] + expected_remove: List[List[str]] + + +@pytest.mark.parametrize("service_type,placement,hosts,maintenance_hosts,offline_hosts,draining_hosts,daemons,expected_add,expected_remove", + [ + DrainExplicitPlacementTest( + 'crash', + PlacementSpec(hosts='host1 host2 host3'.split()), + 'host1 host2 host3 host4'.split(), + [], + [], + ['host3'], + [ + DaemonDescription('crash', 'host1', 'host1'), + DaemonDescription('crash', 'host2', 'host2'), + DaemonDescription('crash', 'host3', 'host3'), + ], + [[]], + [['crash.host3']], + ), + DrainExplicitPlacementTest( + 'crash', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + [], + [], + ['host1', 'host4'], + [ + DaemonDescription('crash', 'host1', 'host1'), + DaemonDescription('crash', 'host3', 'host3'), + ], + [['host2']], + [['crash.host1']], + ), + ]) +def test_drain_from_explict_placement(service_type, placement, hosts, maintenance_hosts, offline_hosts, draining_hosts, daemons, expected_add, expected_remove): + + spec = ServiceSpec(service_type=service_type, + service_id='test', + placement=placement) + + host_specs = [HostSpec(h) for h in hosts] + draining_host_specs = [HostSpec(h) for h in draining_hosts] + for h in host_specs: + if h.hostname in offline_hosts: + h.status = 'offline' + if h.hostname in maintenance_hosts: + h.status = 'maintenance' + + hosts, to_add, to_remove = HostAssignment( + spec=spec, + hosts=host_specs, + unreachable_hosts=[h for h in host_specs if h.status], + draining_hosts=draining_host_specs, + daemons=daemons, + ).place() + assert sorted([h.hostname for h in to_add]) in expected_add + assert sorted([h.name() for h in to_remove]) in expected_remove diff --git a/src/pybind/mgr/cephadm/tests/test_service_discovery.py b/src/pybind/mgr/cephadm/tests/test_service_discovery.py new file mode 100644 index 000000000..ff98a1388 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_service_discovery.py @@ -0,0 +1,178 @@ +from unittest.mock import MagicMock +from cephadm.service_discovery import Root + + +class FakeDaemonDescription: + def __init__(self, ip, ports, hostname, service_name='', daemon_type=''): + self.ip = ip + self.ports = ports + self.hostname = hostname + self._service_name = service_name + self.daemon_type = daemon_type + + def service_name(self): + return self._service_name + + +class FakeCache: + def get_daemons_by_service(self, service_type): + if service_type == 'ceph-exporter': + return [FakeDaemonDescription('1.2.3.4', [9926], 'node0'), + FakeDaemonDescription('1.2.3.5', [9926], 'node1')] + + return [FakeDaemonDescription('1.2.3.4', [9100], 'node0'), + FakeDaemonDescription('1.2.3.5', [9200], 'node1')] + + def get_daemons_by_type(self, daemon_type): + return [FakeDaemonDescription('1.2.3.4', [9100], 'node0', 'ingress', 'haproxy'), + FakeDaemonDescription('1.2.3.5', [9200], 'node1', 'ingress', 'haproxy')] + + +class FakeInventory: + def get_addr(self, name: str): + return '1.2.3.4' + + +class FakeServiceSpec: + def __init__(self, port): + self.monitor_port = port + + +class FakeSpecDescription: + def __init__(self, port): + self.spec = FakeServiceSpec(port) + + +class FakeSpecStore(): + def __init__(self, mgr): + self.mgr = mgr + self._specs = {'ingress': FakeSpecDescription(9049)} + + def __contains__(self, name): + return name in self._specs + + def __getitem__(self, name): + return self._specs['ingress'] + + +class FakeMgr: + def __init__(self): + self.config = '' + self.check_mon_command = MagicMock(side_effect=self._check_mon_command) + self.mon_command = MagicMock(side_effect=self._check_mon_command) + self.template = MagicMock() + self.log = MagicMock() + self.inventory = FakeInventory() + self.cache = FakeCache() + self.spec_store = FakeSpecStore(self) + + def get_mgr_id(self): + return 'mgr-1' + + def list_servers(self): + + servers = [ + {'hostname': 'node0', + 'ceph_version': '16.2', + 'services': [{'type': 'mgr', 'id': 'mgr-1'}, {'type': 'mon'}]}, + {'hostname': 'node1', + 'ceph_version': '16.2', + 'services': [{'type': 'mgr', 'id': 'mgr-2'}, {'type': 'mon'}]} + ] + + return servers + + def _check_mon_command(self, cmd_dict, inbuf=None): + prefix = cmd_dict.get('prefix') + if prefix == 'get-cmd': + return 0, self.config, '' + if prefix == 'set-cmd': + self.config = cmd_dict.get('value') + return 0, 'value set', '' + return -1, '', 'error' + + def get_module_option_ex(self, module, option, default_value): + return "9283" + + +class TestServiceDiscovery: + + def test_get_sd_config_prometheus(self): + mgr = FakeMgr() + root = Root(mgr, 5000, '0.0.0.0') + cfg = root.get_sd_config('mgr-prometheus') + + # check response structure + assert cfg + for entry in cfg: + assert 'labels' in entry + assert 'targets' in entry + + # check content + assert cfg[0]['targets'] == ['node0:9283'] + + def test_get_sd_config_node_exporter(self): + mgr = FakeMgr() + root = Root(mgr, 5000, '0.0.0.0') + cfg = root.get_sd_config('node-exporter') + + # check response structure + assert cfg + for entry in cfg: + assert 'labels' in entry + assert 'targets' in entry + + # check content + assert cfg[0]['targets'] == ['1.2.3.4:9100'] + assert cfg[0]['labels'] == {'instance': 'node0'} + assert cfg[1]['targets'] == ['1.2.3.5:9200'] + assert cfg[1]['labels'] == {'instance': 'node1'} + + def test_get_sd_config_alertmgr(self): + mgr = FakeMgr() + root = Root(mgr, 5000, '0.0.0.0') + cfg = root.get_sd_config('alertmanager') + + # check response structure + assert cfg + for entry in cfg: + assert 'labels' in entry + assert 'targets' in entry + + # check content + assert cfg[0]['targets'] == ['1.2.3.4:9100', '1.2.3.5:9200'] + + def test_get_sd_config_haproxy(self): + mgr = FakeMgr() + root = Root(mgr, 5000, '0.0.0.0') + cfg = root.get_sd_config('haproxy') + + # check response structure + assert cfg + for entry in cfg: + assert 'labels' in entry + assert 'targets' in entry + + # check content + assert cfg[0]['targets'] == ['1.2.3.4:9049'] + assert cfg[0]['labels'] == {'instance': 'ingress'} + + def test_get_sd_config_ceph_exporter(self): + mgr = FakeMgr() + root = Root(mgr, 5000, '0.0.0.0') + cfg = root.get_sd_config('ceph-exporter') + + # check response structure + assert cfg + for entry in cfg: + assert 'labels' in entry + assert 'targets' in entry + + # check content + assert cfg[0]['targets'] == ['1.2.3.4:9926'] + + def test_get_sd_config_invalid_service(self): + mgr = FakeMgr() + root = Root(mgr, 5000, '0.0.0.0') + cfg = root.get_sd_config('invalid-service') + assert cfg == [] diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py new file mode 100644 index 000000000..2300b288d --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -0,0 +1,2725 @@ +from textwrap import dedent +import json +import urllib.parse +import yaml +from mgr_util import build_url + +import pytest + +from unittest.mock import MagicMock, call, patch, ANY + +from cephadm.serve import CephadmServe +from cephadm.services.cephadmservice import MonService, MgrService, MdsService, RgwService, \ + RbdMirrorService, CrashService, CephadmDaemonDeploySpec +from cephadm.services.iscsi import IscsiService +from cephadm.services.nfs import NFSService +from cephadm.services.nvmeof import NvmeofService +from cephadm.services.osd import OSDService +from cephadm.services.monitoring import GrafanaService, AlertmanagerService, PrometheusService, \ + NodeExporterService, LokiService, PromtailService +from cephadm.module import CephadmOrchestrator +from ceph.deployment.service_spec import IscsiServiceSpec, MonitoringSpec, AlertManagerSpec, \ + ServiceSpec, RGWSpec, GrafanaSpec, SNMPGatewaySpec, IngressSpec, PlacementSpec, TracingSpec, \ + PrometheusSpec, CephExporterSpec, NFSServiceSpec, NvmeofServiceSpec +from cephadm.tests.fixtures import with_host, with_service, _run_cephadm, async_side_effect + +from ceph.utils import datetime_now + +from orchestrator import OrchestratorError +from orchestrator._interface import DaemonDescription + +from typing import Dict, List + +grafana_cert = """-----BEGIN CERTIFICATE-----\nMIICxjCCAa4CEQDIZSujNBlKaLJzmvntjukjMA0GCSqGSIb3DQEBDQUAMCExDTAL\nBgNVBAoMBENlcGgxEDAOBgNVBAMMB2NlcGhhZG0wHhcNMjIwNzEzMTE0NzA3WhcN\nMzIwNzEwMTE0NzA3WjAhMQ0wCwYDVQQKDARDZXBoMRAwDgYDVQQDDAdjZXBoYWRt\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyyMe4DMA+MeYK7BHZMHB\nq7zjliEOcNgxomjU8qbf5USF7Mqrf6+/87XWqj4pCyAW8x0WXEr6A56a+cmBVmt+\nqtWDzl020aoId6lL5EgLLn6/kMDCCJLq++Lg9cEofMSvcZh+lY2f+1p+C+00xent\nrLXvXGOilAZWaQfojT2BpRnNWWIFbpFwlcKrlg2G0cFjV5c1m6a0wpsQ9JHOieq0\nSvwCixajwq3CwAYuuiU1wjI4oJO4Io1+g8yB3nH2Mo/25SApCxMXuXh4kHLQr/T4\n4hqisvG4uJYgKMcSIrWj5o25mclByGi1UI/kZkCUES94i7Z/3ihx4Bad0AMs/9tw\nFwIDAQABMA0GCSqGSIb3DQEBDQUAA4IBAQAf+pwz7Gd7mDwU2LY0TQXsK6/8KGzh\nHuX+ErOb8h5cOAbvCnHjyJFWf6gCITG98k9nxU9NToG0WYuNm/max1y/54f0dtxZ\npUo6KSNl3w6iYCfGOeUIj8isi06xMmeTgMNzv8DYhDt+P2igN6LenqWTVztogkiV\nxQ5ZJFFLEw4sN0CXnrZX3t5ruakxLXLTLKeE0I91YJvjClSBGkVJq26wOKQNHMhx\npWxeydQ5EgPZY+Aviz5Dnxe8aB7oSSovpXByzxURSabOuCK21awW5WJCGNpmqhWK\nZzACBDEstccj57c4OGV0eayHJRsluVr2e9NHRINZA3qdB37e6gsI1xHo\n-----END CERTIFICATE-----\n""" + +grafana_key = """-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDLIx7gMwD4x5gr\nsEdkwcGrvOOWIQ5w2DGiaNTypt/lRIXsyqt/r7/ztdaqPikLIBbzHRZcSvoDnpr5\nyYFWa36q1YPOXTbRqgh3qUvkSAsufr+QwMIIkur74uD1wSh8xK9xmH6VjZ/7Wn4L\n7TTF6e2ste9cY6KUBlZpB+iNPYGlGc1ZYgVukXCVwquWDYbRwWNXlzWbprTCmxD0\nkc6J6rRK/AKLFqPCrcLABi66JTXCMjigk7gijX6DzIHecfYyj/blICkLExe5eHiQ\nctCv9PjiGqKy8bi4liAoxxIitaPmjbmZyUHIaLVQj+RmQJQRL3iLtn/eKHHgFp3Q\nAyz/23AXAgMBAAECggEAVoTB3Mm8azlPlaQB9GcV3tiXslSn+uYJ1duCf0sV52dV\nBzKW8s5fGiTjpiTNhGCJhchowqxoaew+o47wmGc2TvqbpeRLuecKrjScD0GkCYyQ\neM2wlshEbz4FhIZdgS6gbuh9WaM1dW/oaZoBNR5aTYo7xYTmNNeyLA/jO2zr7+4W\n5yES1lMSBXpKk7bDGKYY4bsX2b5RLr2Grh2u2bp7hoLABCEvuu8tSQdWXLEXWpXo\njwmV3hc6tabypIa0mj2Dmn2Dmt1ppSO0AZWG/WAizN3f4Z0r/u9HnbVrVmh0IEDw\n3uf2LP5o3msG9qKCbzv3lMgt9mMr70HOKnJ8ohMSKQKBgQDLkNb+0nr152HU9AeJ\nvdz8BeMxcwxCG77iwZphZ1HprmYKvvXgedqWtS6FRU+nV6UuQoPUbQxJBQzrN1Qv\nwKSlOAPCrTJgNgF/RbfxZTrIgCPuK2KM8I89VZv92TSGi362oQA4MazXC8RAWjoJ\nSu1/PHzK3aXOfVNSLrOWvIYeZQKBgQD/dgT6RUXKg0UhmXj7ExevV+c7oOJTDlMl\nvLngrmbjRgPO9VxLnZQGdyaBJeRngU/UXfNgajT/MU8B5fSKInnTMawv/tW7634B\nw3v6n5kNIMIjJmENRsXBVMllDTkT9S7ApV+VoGnXRccbTiDapBThSGd0wri/CuwK\nNWK1YFOeywKBgEDyI/XG114PBUJ43NLQVWm+wx5qszWAPqV/2S5MVXD1qC6zgCSv\nG9NLWN1CIMimCNg6dm7Wn73IM7fzvhNCJgVkWqbItTLG6DFf3/DPODLx1wTMqLOI\nqFqMLqmNm9l1Nec0dKp5BsjRQzq4zp1aX21hsfrTPmwjxeqJZdioqy2VAoGAXR5X\nCCdSHlSlUW8RE2xNOOQw7KJjfWT+WAYoN0c7R+MQplL31rRU7dpm1bLLRBN11vJ8\nMYvlT5RYuVdqQSP6BkrX+hLJNBvOLbRlL+EXOBrVyVxHCkDe+u7+DnC4epbn+N8P\nLYpwqkDMKB7diPVAizIKTBxinXjMu5fkKDs5n+sCgYBbZheYKk5M0sIxiDfZuXGB\nkf4mJdEkTI1KUGRdCwO/O7hXbroGoUVJTwqBLi1tKqLLarwCITje2T200BYOzj82\nqwRkCXGtXPKnxYEEUOiFx9OeDrzsZV00cxsEnX0Zdj+PucQ/J3Cvd0dWUspJfLHJ\n39gnaegswnz9KMQAvzKFdg==\n-----END PRIVATE KEY-----\n""" + + +class FakeInventory: + def get_addr(self, name: str) -> str: + return '1.2.3.4' + + +class FakeMgr: + def __init__(self): + self.config = '' + self.set_mon_crush_locations: Dict[str, List[str]] = {} + self.check_mon_command = MagicMock(side_effect=self._check_mon_command) + self.mon_command = MagicMock(side_effect=self._check_mon_command) + self.template = MagicMock() + self.log = MagicMock() + self.inventory = FakeInventory() + + def _check_mon_command(self, cmd_dict, inbuf=None): + prefix = cmd_dict.get('prefix') + if prefix == 'get-cmd': + return 0, self.config, '' + if prefix == 'set-cmd': + self.config = cmd_dict.get('value') + return 0, 'value set', '' + if prefix in ['auth get']: + return 0, '[foo]\nkeyring = asdf\n', '' + if prefix == 'quorum_status': + # actual quorum status output from testing + # note in this output all of the mons have blank crush locations + return 0, """{"election_epoch": 14, "quorum": [0, 1, 2], "quorum_names": ["vm-00", "vm-01", "vm-02"], "quorum_leader_name": "vm-00", "quorum_age": 101, "features": {"quorum_con": "4540138322906710015", "quorum_mon": ["kraken", "luminous", "mimic", "osdmap-prune", "nautilus", "octopus", "pacific", "elector-pinging", "quincy", "reef"]}, "monmap": {"epoch": 3, "fsid": "9863e1b8-6f24-11ed-8ad8-525400c13ad2", "modified": "2022-11-28T14:00:29.972488Z", "created": "2022-11-28T13:57:55.847497Z", "min_mon_release": 18, "min_mon_release_name": "reef", "election_strategy": 1, "disallowed_leaders: ": "", "stretch_mode": false, "tiebreaker_mon": "", "features": {"persistent": ["kraken", "luminous", "mimic", "osdmap-prune", "nautilus", "octopus", "pacific", "elector-pinging", "quincy", "reef"], "optional": []}, "mons": [{"rank": 0, "name": "vm-00", "public_addrs": {"addrvec": [{"type": "v2", "addr": "192.168.122.61:3300", "nonce": 0}, {"type": "v1", "addr": "192.168.122.61:6789", "nonce": 0}]}, "addr": "192.168.122.61:6789/0", "public_addr": "192.168.122.61:6789/0", "priority": 0, "weight": 0, "crush_location": "{}"}, {"rank": 1, "name": "vm-01", "public_addrs": {"addrvec": [{"type": "v2", "addr": "192.168.122.63:3300", "nonce": 0}, {"type": "v1", "addr": "192.168.122.63:6789", "nonce": 0}]}, "addr": "192.168.122.63:6789/0", "public_addr": "192.168.122.63:6789/0", "priority": 0, "weight": 0, "crush_location": "{}"}, {"rank": 2, "name": "vm-02", "public_addrs": {"addrvec": [{"type": "v2", "addr": "192.168.122.82:3300", "nonce": 0}, {"type": "v1", "addr": "192.168.122.82:6789", "nonce": 0}]}, "addr": "192.168.122.82:6789/0", "public_addr": "192.168.122.82:6789/0", "priority": 0, "weight": 0, "crush_location": "{}"}]}}""", '' + if prefix == 'mon set_location': + self.set_mon_crush_locations[cmd_dict.get('name')] = cmd_dict.get('args') + return 0, '', '' + return -1, '', 'error' + + def get_minimal_ceph_conf(self) -> str: + return '' + + def get_mgr_ip(self) -> str: + return '1.2.3.4' + + +class TestCephadmService: + def test_set_service_url_on_dashboard(self): + # pylint: disable=protected-access + mgr = FakeMgr() + service_url = 'http://svc:1000' + service = GrafanaService(mgr) + service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url) + assert mgr.config == service_url + + # set-cmd should not be called if value doesn't change + mgr.check_mon_command.reset_mock() + service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url) + mgr.check_mon_command.assert_called_once_with({'prefix': 'get-cmd'}) + + def _get_services(self, mgr): + # services: + osd_service = OSDService(mgr) + nfs_service = NFSService(mgr) + mon_service = MonService(mgr) + mgr_service = MgrService(mgr) + mds_service = MdsService(mgr) + rgw_service = RgwService(mgr) + rbd_mirror_service = RbdMirrorService(mgr) + grafana_service = GrafanaService(mgr) + alertmanager_service = AlertmanagerService(mgr) + prometheus_service = PrometheusService(mgr) + node_exporter_service = NodeExporterService(mgr) + loki_service = LokiService(mgr) + promtail_service = PromtailService(mgr) + crash_service = CrashService(mgr) + iscsi_service = IscsiService(mgr) + nvmeof_service = NvmeofService(mgr) + cephadm_services = { + 'mon': mon_service, + 'mgr': mgr_service, + 'osd': osd_service, + 'mds': mds_service, + 'rgw': rgw_service, + 'rbd-mirror': rbd_mirror_service, + 'nfs': nfs_service, + 'grafana': grafana_service, + 'alertmanager': alertmanager_service, + 'prometheus': prometheus_service, + 'node-exporter': node_exporter_service, + 'loki': loki_service, + 'promtail': promtail_service, + 'crash': crash_service, + 'iscsi': iscsi_service, + 'nvmeof': nvmeof_service, + } + return cephadm_services + + def test_get_auth_entity(self): + mgr = FakeMgr() + cephadm_services = self._get_services(mgr) + + for daemon_type in ['rgw', 'rbd-mirror', 'nfs', "iscsi"]: + assert "client.%s.id1" % (daemon_type) == \ + cephadm_services[daemon_type].get_auth_entity("id1", "host") + assert "client.%s.id1" % (daemon_type) == \ + cephadm_services[daemon_type].get_auth_entity("id1", "") + assert "client.%s.id1" % (daemon_type) == \ + cephadm_services[daemon_type].get_auth_entity("id1") + + assert "client.crash.host" == \ + cephadm_services["crash"].get_auth_entity("id1", "host") + with pytest.raises(OrchestratorError): + cephadm_services["crash"].get_auth_entity("id1", "") + cephadm_services["crash"].get_auth_entity("id1") + + assert "mon." == cephadm_services["mon"].get_auth_entity("id1", "host") + assert "mon." == cephadm_services["mon"].get_auth_entity("id1", "") + assert "mon." == cephadm_services["mon"].get_auth_entity("id1") + + assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1", "host") + assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1", "") + assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1") + + for daemon_type in ["osd", "mds"]: + assert "%s.id1" % daemon_type == \ + cephadm_services[daemon_type].get_auth_entity("id1", "host") + assert "%s.id1" % daemon_type == \ + cephadm_services[daemon_type].get_auth_entity("id1", "") + assert "%s.id1" % daemon_type == \ + cephadm_services[daemon_type].get_auth_entity("id1") + + # services based on CephadmService shouldn't have get_auth_entity + with pytest.raises(AttributeError): + for daemon_type in ['grafana', 'alertmanager', 'prometheus', 'node-exporter', 'loki', 'promtail']: + cephadm_services[daemon_type].get_auth_entity("id1", "host") + cephadm_services[daemon_type].get_auth_entity("id1", "") + cephadm_services[daemon_type].get_auth_entity("id1") + + +class TestISCSIService: + + mgr = FakeMgr() + iscsi_service = IscsiService(mgr) + + iscsi_spec = IscsiServiceSpec(service_type='iscsi', service_id="a") + iscsi_spec.daemon_type = "iscsi" + iscsi_spec.daemon_id = "a" + iscsi_spec.spec = MagicMock() + iscsi_spec.spec.daemon_type = "iscsi" + iscsi_spec.spec.ssl_cert = '' + iscsi_spec.api_user = "user" + iscsi_spec.api_password = "password" + iscsi_spec.api_port = 5000 + iscsi_spec.api_secure = False + iscsi_spec.ssl_cert = "cert" + iscsi_spec.ssl_key = "key" + + mgr.spec_store = MagicMock() + mgr.spec_store.all_specs.get.return_value = iscsi_spec + + def test_iscsi_client_caps(self): + + iscsi_daemon_spec = CephadmDaemonDeploySpec( + host='host', daemon_id='a', service_name=self.iscsi_spec.service_name()) + + self.iscsi_service.prepare_create(iscsi_daemon_spec) + + expected_caps = ['mon', + 'profile rbd, allow command "osd blocklist", allow command "config-key get" with "key" prefix "iscsi/"', + 'mgr', 'allow command "service status"', + 'osd', 'allow rwx'] + + expected_call = call({'prefix': 'auth get-or-create', + 'entity': 'client.iscsi.a', + 'caps': expected_caps}) + expected_call2 = call({'prefix': 'auth caps', + 'entity': 'client.iscsi.a', + 'caps': expected_caps}) + expected_call3 = call({'prefix': 'auth get', + 'entity': 'client.iscsi.a'}) + + assert expected_call in self.mgr.mon_command.mock_calls + assert expected_call2 in self.mgr.mon_command.mock_calls + assert expected_call3 in self.mgr.mon_command.mock_calls + + @patch('cephadm.utils.resolve_ip') + def test_iscsi_dashboard_config(self, mock_resolve_ip): + + self.mgr.check_mon_command = MagicMock() + self.mgr.check_mon_command.return_value = ('', '{"gateways": {}}', '') + + # Case 1: use IPV4 address + id1 = DaemonDescription(daemon_type='iscsi', hostname="testhost1", + daemon_id="a", ip='192.168.1.1') + daemon_list = [id1] + mock_resolve_ip.return_value = '192.168.1.1' + + self.iscsi_service.config_dashboard(daemon_list) + + dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add', + 'name': 'testhost1'}, + 'http://user:password@192.168.1.1:5000') + + assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls + + # Case 2: use IPV6 address + self.mgr.check_mon_command.reset_mock() + + id1 = DaemonDescription(daemon_type='iscsi', hostname="testhost1", + daemon_id="a", ip='FEDC:BA98:7654:3210:FEDC:BA98:7654:3210') + mock_resolve_ip.return_value = 'FEDC:BA98:7654:3210:FEDC:BA98:7654:3210' + + self.iscsi_service.config_dashboard(daemon_list) + + dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add', + 'name': 'testhost1'}, + 'http://user:password@[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:5000') + + assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls + + # Case 3: IPV6 Address . Secure protocol + self.mgr.check_mon_command.reset_mock() + + self.iscsi_spec.api_secure = True + + self.iscsi_service.config_dashboard(daemon_list) + + dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add', + 'name': 'testhost1'}, + 'https://user:password@[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:5000') + + assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("cephadm.module.CephadmOrchestrator.get_unique_name") + @patch("cephadm.services.iscsi.IscsiService.get_trusted_ips") + def test_iscsi_config(self, _get_trusted_ips, _get_name, _run_cephadm, cephadm_module: CephadmOrchestrator): + + iscsi_daemon_id = 'testpool.test.qwert' + trusted_ips = '1.1.1.1,2.2.2.2' + api_port = 3456 + api_user = 'test-user' + api_password = 'test-password' + pool = 'testpool' + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + _get_name.return_value = iscsi_daemon_id + _get_trusted_ips.return_value = trusted_ips + + iscsi_gateway_conf = f"""# This file is generated by cephadm. +[config] +cluster_client_name = client.iscsi.{iscsi_daemon_id} +pool = {pool} +trusted_ip_list = {trusted_ips} +minimum_gateways = 1 +api_port = {api_port} +api_user = {api_user} +api_password = {api_password} +api_secure = False +log_to_stderr = True +log_to_stderr_prefix = debug +log_to_file = False""" + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, IscsiServiceSpec(service_id=pool, + api_port=api_port, + api_user=api_user, + api_password=api_password, + pool=pool, + trusted_ip_list=trusted_ips)): + _run_cephadm.assert_called_with( + 'test', + f'iscsi.{iscsi_daemon_id}', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": f'iscsi.{iscsi_daemon_id}', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [api_port], + }, + "meta": { + 'service_name': f'iscsi.{pool}', + 'ports': [api_port], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "config": "", + "keyring": f"[client.iscsi.{iscsi_daemon_id}]\nkey = None\n", + "files": { + "iscsi-gateway.cfg": iscsi_gateway_conf, + }, + } + }), + ) + + +class TestNVMEOFService: + + mgr = FakeMgr() + nvmeof_service = NvmeofService(mgr) + + nvmeof_spec = NvmeofServiceSpec(service_type='nvmeof', service_id="a") + nvmeof_spec.daemon_type = 'nvmeof' + nvmeof_spec.daemon_id = "a" + nvmeof_spec.spec = MagicMock() + nvmeof_spec.spec.daemon_type = 'nvmeof' + + mgr.spec_store = MagicMock() + mgr.spec_store.all_specs.get.return_value = nvmeof_spec + + def test_nvmeof_client_caps(self): + pass + + @patch('cephadm.utils.resolve_ip') + def test_nvmeof_dashboard_config(self, mock_resolve_ip): + pass + + @patch("cephadm.inventory.Inventory.get_addr", lambda _, __: '192.168.100.100') + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("cephadm.module.CephadmOrchestrator.get_unique_name") + def test_nvmeof_config(self, _get_name, _run_cephadm, cephadm_module: CephadmOrchestrator): + + nvmeof_daemon_id = 'testpool.test.qwert' + pool = 'testpool' + tgt_cmd_extra_args = '--cpumask=0xFF --msg-mempool-size=524288' + default_port = 5500 + group = 'mygroup' + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + _get_name.return_value = nvmeof_daemon_id + + nvmeof_gateway_conf = f"""# This file is generated by cephadm. +[gateway] +name = client.nvmeof.{nvmeof_daemon_id} +group = {group} +addr = 192.168.100.100 +port = {default_port} +enable_auth = False +state_update_notify = True +state_update_interval_sec = 5 + +[ceph] +pool = {pool} +config_file = /etc/ceph/ceph.conf +id = nvmeof.{nvmeof_daemon_id} + +[mtls] +server_key = ./server.key +client_key = ./client.key +server_cert = ./server.crt +client_cert = ./client.crt + +[spdk] +tgt_path = /usr/local/bin/nvmf_tgt +rpc_socket = /var/tmp/spdk.sock +timeout = 60 +log_level = WARN +conn_retries = 10 +transports = tcp +transport_tcp_options = {{"in_capsule_data_size": 8192, "max_io_qpairs_per_ctrlr": 7}} +tgt_cmd_extra_args = {tgt_cmd_extra_args}\n""" + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, NvmeofServiceSpec(service_id=pool, + tgt_cmd_extra_args=tgt_cmd_extra_args, + group=group, + pool=pool)): + _run_cephadm.assert_called_with( + 'test', + f'nvmeof.{nvmeof_daemon_id}', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": "nvmeof.testpool.test.qwert", + "image": "", + "deploy_arguments": [], + "params": { + "tcp_ports": [5500, 4420, 8009] + }, + "meta": { + "service_name": "nvmeof.testpool", + "ports": [5500, 4420, 8009], + "ip": None, + "deployed_by": [], + "rank": None, + "rank_generation": None, + "extra_container_args": None, + "extra_entrypoint_args": None + }, + "config_blobs": { + "config": "", + "keyring": "[client.nvmeof.testpool.test.qwert]\nkey = None\n", + "files": { + "ceph-nvmeof.conf": nvmeof_gateway_conf + } + } + }), + ) + + +class TestMonitoring: + def _get_config(self, url: str) -> str: + + return f""" + # This file is generated by cephadm. + # See https://prometheus.io/docs/alerting/configuration/ for documentation. + + global: + resolve_timeout: 5m + http_config: + tls_config: + insecure_skip_verify: true + + route: + receiver: 'default' + routes: + - group_by: ['alertname'] + group_wait: 10s + group_interval: 10s + repeat_interval: 1h + receiver: 'ceph-dashboard' + + receivers: + - name: 'default' + webhook_configs: + - name: 'ceph-dashboard' + webhook_configs: + - url: '{url}/api/prometheus_receiver' + """ + + @pytest.mark.parametrize( + "dashboard_url,expected_yaml_url", + [ + # loopback address + ("http://[::1]:8080", "http://localhost:8080"), + # IPv6 + ( + "http://[2001:db8:4321:0000:0000:0000:0000:0000]:8080", + "http://[2001:db8:4321:0000:0000:0000:0000:0000]:8080", + ), + # IPv6 to FQDN + ( + "http://[2001:db8:4321:0000:0000:0000:0000:0000]:8080", + "http://mgr.fqdn.test:8080", + ), + # IPv4 + ( + "http://192.168.0.123:8080", + "http://192.168.0.123:8080", + ), + # IPv4 to FQDN + ( + "http://192.168.0.123:8080", + "http://mgr.fqdn.test:8080", + ), + ], + ) + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("mgr_module.MgrModule.get") + @patch("socket.getfqdn") + def test_alertmanager_config( + self, + mock_getfqdn, + mock_get, + _run_cephadm, + cephadm_module: CephadmOrchestrator, + dashboard_url, + expected_yaml_url, + ): + _run_cephadm.side_effect = async_side_effect(("{}", "", 0)) + mock_get.return_value = {"services": {"dashboard": dashboard_url}} + purl = urllib.parse.urlparse(expected_yaml_url) + mock_getfqdn.return_value = purl.hostname + + with with_host(cephadm_module, "test"): + with with_service(cephadm_module, AlertManagerSpec()): + y = dedent(self._get_config(expected_yaml_url)).lstrip() + _run_cephadm.assert_called_with( + 'test', + "alertmanager.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'alertmanager.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9093, 9094], + }, + "meta": { + 'service_name': 'alertmanager', + 'ports': [9093, 9094], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "files": { + "alertmanager.yml": y, + }, + "peers": [], + } + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("socket.getfqdn") + @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1') + @patch("cephadm.services.monitoring.password_hash", lambda password: 'alertmanager_password_hash') + def test_alertmanager_config_security_enabled(self, _get_fqdn, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + fqdn = 'host1.test' + _get_fqdn.return_value = fqdn + + def gen_cert(host, addr): + return ('mycert', 'mykey') + + def get_root_cert(): + return 'my_root_cert' + + with with_host(cephadm_module, 'test'): + cephadm_module.secure_monitoring_stack = True + cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user') + cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password') + cephadm_module.http_server.service_discovery.ssl_certs.generate_cert = MagicMock(side_effect=gen_cert) + cephadm_module.http_server.service_discovery.ssl_certs.get_root_cert = MagicMock(side_effect=get_root_cert) + with with_service(cephadm_module, AlertManagerSpec()): + + y = dedent(f""" + # This file is generated by cephadm. + # See https://prometheus.io/docs/alerting/configuration/ for documentation. + + global: + resolve_timeout: 5m + http_config: + tls_config: + ca_file: root_cert.pem + + route: + receiver: 'default' + routes: + - group_by: ['alertname'] + group_wait: 10s + group_interval: 10s + repeat_interval: 1h + receiver: 'ceph-dashboard' + + receivers: + - name: 'default' + webhook_configs: + - name: 'ceph-dashboard' + webhook_configs: + - url: 'http://{fqdn}:8080/api/prometheus_receiver' + """).lstrip() + + web_config = dedent(""" + tls_server_config: + cert_file: alertmanager.crt + key_file: alertmanager.key + basic_auth_users: + alertmanager_user: alertmanager_password_hash""").lstrip() + + _run_cephadm.assert_called_with( + 'test', + "alertmanager.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'alertmanager.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9093, 9094], + }, + "meta": { + 'service_name': 'alertmanager', + 'ports': [9093, 9094], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "files": { + "alertmanager.yml": y, + 'alertmanager.crt': 'mycert', + 'alertmanager.key': 'mykey', + 'web.yml': web_config, + 'root_cert.pem': 'my_root_cert' + }, + 'peers': [], + 'web_config': '/etc/alertmanager/web.yml', + } + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1') + def test_prometheus_config_security_disabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast') + with with_host(cephadm_module, 'test'): + # host "test" needs to have networks for keepalive to be placed + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + }, + }) + with with_service(cephadm_module, MonitoringSpec('node-exporter')) as _, \ + with_service(cephadm_module, CephExporterSpec('ceph-exporter')) as _, \ + with_service(cephadm_module, s) as _, \ + with_service(cephadm_module, AlertManagerSpec('alertmanager')) as _, \ + with_service(cephadm_module, IngressSpec(service_id='ingress', + frontend_port=8089, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_ip="1.2.3.4/32", + backend_service='rgw.foo')) as _, \ + with_service(cephadm_module, PrometheusSpec('prometheus')) as _: + + y = dedent(""" + # This file is generated by cephadm. + global: + scrape_interval: 10s + evaluation_interval: 10s + rule_files: + - /etc/prometheus/alerting/* + + alerting: + alertmanagers: + - scheme: http + http_sd_configs: + - url: http://[::1]:8765/sd/prometheus/sd-config?service=alertmanager + + scrape_configs: + - job_name: 'ceph' + honor_labels: true + http_sd_configs: + - url: http://[::1]:8765/sd/prometheus/sd-config?service=mgr-prometheus + + - job_name: 'node' + http_sd_configs: + - url: http://[::1]:8765/sd/prometheus/sd-config?service=node-exporter + + - job_name: 'haproxy' + http_sd_configs: + - url: http://[::1]:8765/sd/prometheus/sd-config?service=haproxy + + - job_name: 'ceph-exporter' + honor_labels: true + http_sd_configs: + - url: http://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter + """).lstrip() + + _run_cephadm.assert_called_with( + 'test', + "prometheus.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'prometheus.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9095], + }, + "meta": { + 'service_name': 'prometheus', + 'ports': [9095], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "files": { + "prometheus.yml": y, + "/etc/prometheus/alerting/custom_alerts.yml": "", + }, + 'retention_time': '15d', + 'retention_size': '0', + }, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1') + @patch("cephadm.services.monitoring.password_hash", lambda password: 'prometheus_password_hash') + def test_prometheus_config_security_enabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast') + + def gen_cert(host, addr): + return ('mycert', 'mykey') + + with with_host(cephadm_module, 'test'): + cephadm_module.secure_monitoring_stack = True + cephadm_module.set_store(PrometheusService.USER_CFG_KEY, 'prometheus_user') + cephadm_module.set_store(PrometheusService.PASS_CFG_KEY, 'prometheus_plain_password') + cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user') + cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password') + cephadm_module.http_server.service_discovery.username = 'sd_user' + cephadm_module.http_server.service_discovery.password = 'sd_password' + cephadm_module.http_server.service_discovery.ssl_certs.generate_cert = MagicMock( + side_effect=gen_cert) + # host "test" needs to have networks for keepalive to be placed + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + }, + }) + with with_service(cephadm_module, MonitoringSpec('node-exporter')) as _, \ + with_service(cephadm_module, s) as _, \ + with_service(cephadm_module, AlertManagerSpec('alertmanager')) as _, \ + with_service(cephadm_module, IngressSpec(service_id='ingress', + frontend_port=8089, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_ip="1.2.3.4/32", + backend_service='rgw.foo')) as _, \ + with_service(cephadm_module, PrometheusSpec('prometheus')) as _: + + web_config = dedent(""" + tls_server_config: + cert_file: prometheus.crt + key_file: prometheus.key + basic_auth_users: + prometheus_user: prometheus_password_hash""").lstrip() + + y = dedent(""" + # This file is generated by cephadm. + global: + scrape_interval: 10s + evaluation_interval: 10s + rule_files: + - /etc/prometheus/alerting/* + + alerting: + alertmanagers: + - scheme: https + basic_auth: + username: alertmanager_user + password: alertmanager_plain_password + tls_config: + ca_file: root_cert.pem + http_sd_configs: + - url: https://[::1]:8765/sd/prometheus/sd-config?service=alertmanager + basic_auth: + username: sd_user + password: sd_password + tls_config: + ca_file: root_cert.pem + + scrape_configs: + - job_name: 'ceph' + scheme: https + tls_config: + ca_file: mgr_prometheus_cert.pem + honor_labels: true + http_sd_configs: + - url: https://[::1]:8765/sd/prometheus/sd-config?service=mgr-prometheus + basic_auth: + username: sd_user + password: sd_password + tls_config: + ca_file: root_cert.pem + + - job_name: 'node' + scheme: https + tls_config: + ca_file: root_cert.pem + http_sd_configs: + - url: https://[::1]:8765/sd/prometheus/sd-config?service=node-exporter + basic_auth: + username: sd_user + password: sd_password + tls_config: + ca_file: root_cert.pem + + - job_name: 'haproxy' + scheme: https + tls_config: + ca_file: root_cert.pem + http_sd_configs: + - url: https://[::1]:8765/sd/prometheus/sd-config?service=haproxy + basic_auth: + username: sd_user + password: sd_password + tls_config: + ca_file: root_cert.pem + + - job_name: 'ceph-exporter' + honor_labels: true + scheme: https + tls_config: + ca_file: root_cert.pem + http_sd_configs: + - url: https://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter + basic_auth: + username: sd_user + password: sd_password + tls_config: + ca_file: root_cert.pem + """).lstrip() + + _run_cephadm.assert_called_with( + 'test', + "prometheus.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'prometheus.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9095], + }, + "meta": { + 'service_name': 'prometheus', + 'ports': [9095], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + 'files': { + 'prometheus.yml': y, + 'root_cert.pem': '', + 'mgr_prometheus_cert.pem': '', + 'web.yml': web_config, + 'prometheus.crt': 'mycert', + 'prometheus.key': 'mykey', + "/etc/prometheus/alerting/custom_alerts.yml": "", + }, + 'retention_time': '15d', + 'retention_size': '0', + 'web_config': '/etc/prometheus/web.yml', + }, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_loki_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, MonitoringSpec('loki')) as _: + + y = dedent(""" + # This file is generated by cephadm. + auth_enabled: false + + server: + http_listen_port: 3100 + grpc_listen_port: 8080 + + common: + path_prefix: /tmp/loki + storage: + filesystem: + chunks_directory: /tmp/loki/chunks + rules_directory: /tmp/loki/rules + replication_factor: 1 + ring: + instance_addr: 127.0.0.1 + kvstore: + store: inmemory + + schema_config: + configs: + - from: 2020-10-24 + store: boltdb-shipper + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h""").lstrip() + + _run_cephadm.assert_called_with( + 'test', + "loki.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'loki.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [3100], + }, + "meta": { + 'service_name': 'loki', + 'ports': [3100], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "files": { + "loki.yml": y + }, + }, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_promtail_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec('mgr')) as _, \ + with_service(cephadm_module, MonitoringSpec('promtail')) as _: + + y = dedent(""" + # This file is generated by cephadm. + server: + http_listen_port: 9080 + grpc_listen_port: 0 + + positions: + filename: /tmp/positions.yaml + + clients: + - url: http://:3100/loki/api/v1/push + + scrape_configs: + - job_name: system + static_configs: + - labels: + job: Cluster Logs + __path__: /var/log/ceph/**/*.log""").lstrip() + + _run_cephadm.assert_called_with( + 'test', + "promtail.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'promtail.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9080], + }, + "meta": { + 'service_name': 'promtail', + 'ports': [9080], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "files": { + "promtail.yml": y + }, + }, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4') + @patch("cephadm.services.monitoring.verify_tls", lambda *_: None) + def test_grafana_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(("{}", "", 0)) + + with with_host(cephadm_module, "test"): + cephadm_module.set_store("test/grafana_crt", grafana_cert) + cephadm_module.set_store("test/grafana_key", grafana_key) + with with_service( + cephadm_module, PrometheusSpec("prometheus") + ) as _, with_service(cephadm_module, ServiceSpec("mgr")) as _, with_service( + cephadm_module, GrafanaSpec("grafana") + ) as _: + files = { + 'grafana.ini': dedent(""" + # This file is generated by cephadm. + [users] + default_theme = light + [auth.anonymous] + enabled = true + org_name = 'Main Org.' + org_role = 'Viewer' + [server] + domain = 'bootstrap.storage.lab' + protocol = https + cert_file = /etc/grafana/certs/cert_file + cert_key = /etc/grafana/certs/cert_key + http_port = 3000 + http_addr = + [snapshots] + external_enabled = false + [security] + disable_initial_admin_creation = true + cookie_secure = true + cookie_samesite = none + allow_embedding = true""").lstrip(), # noqa: W291 + 'provisioning/datasources/ceph-dashboard.yml': dedent(""" + # This file is generated by cephadm. + apiVersion: 1 + + deleteDatasources: + - name: 'Dashboard1' + orgId: 1 + + datasources: + - name: 'Dashboard1' + type: 'prometheus' + access: 'proxy' + orgId: 1 + url: 'http://[1::4]:9095' + basicAuth: false + isDefault: true + editable: false + + - name: 'Loki' + type: 'loki' + access: 'proxy' + url: '' + basicAuth: false + isDefault: false + editable: false""").lstrip(), + 'certs/cert_file': dedent(f""" + # generated by cephadm\n{grafana_cert}""").lstrip(), + 'certs/cert_key': dedent(f""" + # generated by cephadm\n{grafana_key}""").lstrip(), + } + + _run_cephadm.assert_called_with( + 'test', + "grafana.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'grafana.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [3000], + }, + "meta": { + 'service_name': 'grafana', + 'ports': [3000], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "files": files, + }, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_grafana_initial_admin_pw(self, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec('mgr')) as _, \ + with_service(cephadm_module, GrafanaSpec(initial_admin_password='secure')): + out = cephadm_module.cephadm_services['grafana'].generate_config( + CephadmDaemonDeploySpec('test', 'daemon', 'grafana')) + assert out == ( + { + 'files': + { + 'grafana.ini': + '# This file is generated by cephadm.\n' + '[users]\n' + ' default_theme = light\n' + '[auth.anonymous]\n' + ' enabled = true\n' + " org_name = 'Main Org.'\n" + " org_role = 'Viewer'\n" + '[server]\n' + " domain = 'bootstrap.storage.lab'\n" + ' protocol = https\n' + ' cert_file = /etc/grafana/certs/cert_file\n' + ' cert_key = /etc/grafana/certs/cert_key\n' + ' http_port = 3000\n' + ' http_addr = \n' + '[snapshots]\n' + ' external_enabled = false\n' + '[security]\n' + ' admin_user = admin\n' + ' admin_password = secure\n' + ' cookie_secure = true\n' + ' cookie_samesite = none\n' + ' allow_embedding = true', + 'provisioning/datasources/ceph-dashboard.yml': + "# This file is generated by cephadm.\n" + "apiVersion: 1\n\n" + 'deleteDatasources:\n\n' + 'datasources:\n\n' + " - name: 'Loki'\n" + " type: 'loki'\n" + " access: 'proxy'\n" + " url: ''\n" + ' basicAuth: false\n' + ' isDefault: false\n' + ' editable: false', + 'certs/cert_file': ANY, + 'certs/cert_key': ANY}}, ['secure_monitoring_stack:False']) + + @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_grafana_no_anon_access(self, cephadm_module: CephadmOrchestrator): + # with anonymous_access set to False, expecting the [auth.anonymous] section + # to not be present in the grafana config. Note that we require an initial_admin_password + # to be provided when anonymous_access is False + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec('mgr')) as _, \ + with_service(cephadm_module, GrafanaSpec(anonymous_access=False, initial_admin_password='secure')): + out = cephadm_module.cephadm_services['grafana'].generate_config( + CephadmDaemonDeploySpec('test', 'daemon', 'grafana')) + assert out == ( + { + 'files': + { + 'grafana.ini': + '# This file is generated by cephadm.\n' + '[users]\n' + ' default_theme = light\n' + '[server]\n' + " domain = 'bootstrap.storage.lab'\n" + ' protocol = https\n' + ' cert_file = /etc/grafana/certs/cert_file\n' + ' cert_key = /etc/grafana/certs/cert_key\n' + ' http_port = 3000\n' + ' http_addr = \n' + '[snapshots]\n' + ' external_enabled = false\n' + '[security]\n' + ' admin_user = admin\n' + ' admin_password = secure\n' + ' cookie_secure = true\n' + ' cookie_samesite = none\n' + ' allow_embedding = true', + 'provisioning/datasources/ceph-dashboard.yml': + "# This file is generated by cephadm.\n" + "apiVersion: 1\n\n" + 'deleteDatasources:\n\n' + 'datasources:\n\n' + " - name: 'Loki'\n" + " type: 'loki'\n" + " access: 'proxy'\n" + " url: ''\n" + ' basicAuth: false\n' + ' isDefault: false\n' + ' editable: false', + 'certs/cert_file': ANY, + 'certs/cert_key': ANY}}, ['secure_monitoring_stack:False']) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_monitoring_ports(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test'): + + yaml_str = """service_type: alertmanager +service_name: alertmanager +placement: + count: 1 +spec: + port: 4200 +""" + yaml_file = yaml.safe_load(yaml_str) + spec = ServiceSpec.from_json(yaml_file) + + with patch("cephadm.services.monitoring.AlertmanagerService.generate_config", return_value=({}, [])): + with with_service(cephadm_module, spec): + + CephadmServe(cephadm_module)._check_daemons() + + _run_cephadm.assert_called_with( + 'test', + "alertmanager.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'alertmanager.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [4200, 9094], + 'reconfig': True, + }, + "meta": { + 'service_name': 'alertmanager', + 'ports': [4200, 9094], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": {}, + }), + ) + + +class TestRGWService: + + @pytest.mark.parametrize( + "frontend, ssl, extra_args, expected", + [ + ('beast', False, ['tcp_nodelay=1'], + 'beast endpoint=[fd00:fd00:fd00:3000::1]:80 tcp_nodelay=1'), + ('beast', True, ['tcp_nodelay=0', 'max_header_size=65536'], + 'beast ssl_endpoint=[fd00:fd00:fd00:3000::1]:443 ssl_certificate=config://rgw/cert/rgw.foo tcp_nodelay=0 max_header_size=65536'), + ('civetweb', False, [], 'civetweb port=[fd00:fd00:fd00:3000::1]:80'), + ('civetweb', True, None, + 'civetweb port=[fd00:fd00:fd00:3000::1]:443s ssl_certificate=config://rgw/cert/rgw.foo'), + ] + ) + @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_rgw_update(self, frontend, ssl, extra_args, expected, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.cache.update_host_networks('host1', { + 'fd00:fd00:fd00:3000::/64': { + 'if0': ['fd00:fd00:fd00:3000::1'] + } + }) + s = RGWSpec(service_id="foo", + networks=['fd00:fd00:fd00:3000::/64'], + ssl=ssl, + rgw_frontend_type=frontend, + rgw_frontend_extra_args=extra_args) + with with_service(cephadm_module, s) as dds: + _, f, _ = cephadm_module.check_mon_command({ + 'prefix': 'config get', + 'who': f'client.{dds[0]}', + 'key': 'rgw_frontends', + }) + assert f == expected + + +class TestMonService: + + def test_set_crush_locations(self, cephadm_module: CephadmOrchestrator): + mgr = FakeMgr() + mon_service = MonService(mgr) + mon_spec = ServiceSpec(service_type='mon', crush_locations={'vm-00': ['datacenter=a', 'rack=1'], 'vm-01': ['datacenter=a'], 'vm-02': ['datacenter=b', 'rack=3']}) + + mon_daemons = [ + DaemonDescription(daemon_type='mon', daemon_id='vm-00', hostname='vm-00'), + DaemonDescription(daemon_type='mon', daemon_id='vm-01', hostname='vm-01'), + DaemonDescription(daemon_type='mon', daemon_id='vm-02', hostname='vm-02') + ] + mon_service.set_crush_locations(mon_daemons, mon_spec) + assert 'vm-00' in mgr.set_mon_crush_locations + assert mgr.set_mon_crush_locations['vm-00'] == ['datacenter=a', 'rack=1'] + assert 'vm-01' in mgr.set_mon_crush_locations + assert mgr.set_mon_crush_locations['vm-01'] == ['datacenter=a'] + assert 'vm-02' in mgr.set_mon_crush_locations + assert mgr.set_mon_crush_locations['vm-02'] == ['datacenter=b', 'rack=3'] + + +class TestSNMPGateway: + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_snmp_v2c_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + spec = SNMPGatewaySpec( + snmp_version='V2c', + snmp_destination='192.168.1.1:162', + credentials={ + 'snmp_community': 'public' + }) + + config = { + "destination": spec.snmp_destination, + "snmp_version": spec.snmp_version, + "snmp_community": spec.credentials.get('snmp_community') + } + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec): + _run_cephadm.assert_called_with( + 'test', + "snmp-gateway.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'snmp-gateway.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9464], + }, + "meta": { + 'service_name': 'snmp-gateway', + 'ports': [9464], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": config, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_snmp_v2c_with_port(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + spec = SNMPGatewaySpec( + snmp_version='V2c', + snmp_destination='192.168.1.1:162', + credentials={ + 'snmp_community': 'public' + }, + port=9465) + + config = { + "destination": spec.snmp_destination, + "snmp_version": spec.snmp_version, + "snmp_community": spec.credentials.get('snmp_community') + } + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec): + _run_cephadm.assert_called_with( + 'test', + "snmp-gateway.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'snmp-gateway.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9465], + }, + "meta": { + 'service_name': 'snmp-gateway', + 'ports': [9465], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": config, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_snmp_v3nopriv_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + spec = SNMPGatewaySpec( + snmp_version='V3', + snmp_destination='192.168.1.1:162', + engine_id='8000C53F00000000', + credentials={ + 'snmp_v3_auth_username': 'myuser', + 'snmp_v3_auth_password': 'mypassword' + }) + + config = { + 'destination': spec.snmp_destination, + 'snmp_version': spec.snmp_version, + 'snmp_v3_auth_protocol': 'SHA', + 'snmp_v3_auth_username': 'myuser', + 'snmp_v3_auth_password': 'mypassword', + 'snmp_v3_engine_id': '8000C53F00000000' + } + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec): + _run_cephadm.assert_called_with( + 'test', + "snmp-gateway.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'snmp-gateway.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9464], + }, + "meta": { + 'service_name': 'snmp-gateway', + 'ports': [9464], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": config, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_snmp_v3priv_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + spec = SNMPGatewaySpec( + snmp_version='V3', + snmp_destination='192.168.1.1:162', + engine_id='8000C53F00000000', + auth_protocol='MD5', + privacy_protocol='AES', + credentials={ + 'snmp_v3_auth_username': 'myuser', + 'snmp_v3_auth_password': 'mypassword', + 'snmp_v3_priv_password': 'mysecret', + }) + + config = { + 'destination': spec.snmp_destination, + 'snmp_version': spec.snmp_version, + 'snmp_v3_auth_protocol': 'MD5', + 'snmp_v3_auth_username': spec.credentials.get('snmp_v3_auth_username'), + 'snmp_v3_auth_password': spec.credentials.get('snmp_v3_auth_password'), + 'snmp_v3_engine_id': '8000C53F00000000', + 'snmp_v3_priv_protocol': spec.privacy_protocol, + 'snmp_v3_priv_password': spec.credentials.get('snmp_v3_priv_password'), + } + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec): + _run_cephadm.assert_called_with( + 'test', + "snmp-gateway.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'snmp-gateway.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9464], + }, + "meta": { + 'service_name': 'snmp-gateway', + 'ports': [9464], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": config, + }), + ) + + +class TestIngressService: + + @pytest.mark.parametrize( + "enable_haproxy_protocol", + [False, True], + ) + @patch("cephadm.inventory.Inventory.get_addr") + @patch("cephadm.utils.resolve_ip") + @patch("cephadm.inventory.HostCache.get_daemons_by_service") + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_ingress_config_nfs_multiple_nfs_same_rank( + self, + _run_cephadm, + _get_daemons_by_service, + _resolve_ip, _get_addr, + cephadm_module: CephadmOrchestrator, + enable_haproxy_protocol: bool, + ): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + def fake_resolve_ip(hostname: str) -> str: + if hostname == 'host1': + return '192.168.122.111' + elif hostname == 'host2': + return '192.168.122.222' + else: + return 'xxx.xxx.xxx.xxx' + _resolve_ip.side_effect = fake_resolve_ip + + def fake_get_addr(hostname: str) -> str: + return hostname + _get_addr.side_effect = fake_get_addr + + nfs_service = NFSServiceSpec( + service_id="foo", + placement=PlacementSpec( + count=1, + hosts=['host1', 'host2']), + port=12049, + enable_haproxy_protocol=enable_haproxy_protocol, + ) + + ispec = IngressSpec( + service_type='ingress', + service_id='nfs.foo', + backend_service='nfs.foo', + frontend_port=2049, + monitor_port=9049, + virtual_ip='192.168.122.100/24', + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + enable_haproxy_protocol=enable_haproxy_protocol, + ) + + cephadm_module.spec_store._specs = { + 'nfs.foo': nfs_service, + 'ingress.nfs.foo': ispec + } + cephadm_module.spec_store.spec_created = { + 'nfs.foo': datetime_now(), + 'ingress.nfs.foo': datetime_now() + } + + # in both test cases we'll do here, we want only the ip + # for the host1 nfs daemon as we'll end up giving that + # one higher rank_generation but the same rank as the one + # on host2 + haproxy_txt = ( + '# This file is generated by cephadm.\n' + 'global\n' + ' log 127.0.0.1 local2\n' + ' chroot /var/lib/haproxy\n' + ' pidfile /var/lib/haproxy/haproxy.pid\n' + ' maxconn 8000\n' + ' daemon\n' + ' stats socket /var/lib/haproxy/stats\n\n' + 'defaults\n' + ' mode tcp\n' + ' log global\n' + ' timeout queue 1m\n' + ' timeout connect 10s\n' + ' timeout client 1m\n' + ' timeout server 1m\n' + ' timeout check 10s\n' + ' maxconn 8000\n\n' + 'frontend stats\n' + ' mode http\n' + ' bind 192.168.122.100:9049\n' + ' bind host1:9049\n' + ' stats enable\n' + ' stats uri /stats\n' + ' stats refresh 10s\n' + ' stats auth admin:12345\n' + ' http-request use-service prometheus-exporter if { path /metrics }\n' + ' monitor-uri /health\n\n' + 'frontend frontend\n' + ' bind 192.168.122.100:2049\n' + ' default_backend backend\n\n' + 'backend backend\n' + ' mode tcp\n' + ' balance source\n' + ' hash-type consistent\n' + ) + if enable_haproxy_protocol: + haproxy_txt += ' default-server send-proxy-v2\n' + haproxy_txt += ' server nfs.foo.0 192.168.122.111:12049\n' + haproxy_expected_conf = { + 'files': {'haproxy.cfg': haproxy_txt} + } + + # verify we get the same cfg regardless of the order in which the nfs daemons are returned + # in this case both nfs are rank 0, so it should only take the one with rank_generation 1 a.k.a + # the one on host1 + nfs_daemons = [ + DaemonDescription(daemon_type='nfs', daemon_id='foo.0.1.host1.qwerty', hostname='host1', rank=0, rank_generation=1, ports=[12049]), + DaemonDescription(daemon_type='nfs', daemon_id='foo.0.0.host2.abcdef', hostname='host2', rank=0, rank_generation=0, ports=[12049]) + ] + _get_daemons_by_service.return_value = nfs_daemons + + haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config( + CephadmDaemonDeploySpec(host='host1', daemon_id='ingress', service_name=ispec.service_name())) + + assert haproxy_generated_conf[0] == haproxy_expected_conf + + # swapping order now, should still pick out the one with the higher rank_generation + # in this case both nfs are rank 0, so it should only take the one with rank_generation 1 a.k.a + # the one on host1 + nfs_daemons = [ + DaemonDescription(daemon_type='nfs', daemon_id='foo.0.0.host2.abcdef', hostname='host2', rank=0, rank_generation=0, ports=[12049]), + DaemonDescription(daemon_type='nfs', daemon_id='foo.0.1.host1.qwerty', hostname='host1', rank=0, rank_generation=1, ports=[12049]) + ] + _get_daemons_by_service.return_value = nfs_daemons + + haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config( + CephadmDaemonDeploySpec(host='host1', daemon_id='ingress', service_name=ispec.service_name())) + + assert haproxy_generated_conf[0] == haproxy_expected_conf + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_ingress_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test', addr='1.2.3.7'): + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.4'] + } + }) + + # the ingress backend + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), + rgw_frontend_type='beast') + + ispec = IngressSpec(service_type='ingress', + service_id='test', + backend_service='rgw.foo', + frontend_port=8089, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_interface_networks=['1.2.3.0/24'], + virtual_ip="1.2.3.4/32") + with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _: + # generate the keepalived conf based on the specified spec + keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + keepalived_expected_conf = { + 'files': + { + 'keepalived.conf': + '# This file is generated by cephadm.\n' + 'vrrp_script check_backend {\n ' + 'script "/usr/bin/curl http://1.2.3.7:8999/health"\n ' + 'weight -20\n ' + 'interval 2\n ' + 'rise 2\n ' + 'fall 2\n}\n\n' + 'vrrp_instance VI_0 {\n ' + 'state MASTER\n ' + 'priority 100\n ' + 'interface if0\n ' + 'virtual_router_id 50\n ' + 'advert_int 1\n ' + 'authentication {\n ' + 'auth_type PASS\n ' + 'auth_pass 12345\n ' + '}\n ' + 'unicast_src_ip 1.2.3.4\n ' + 'unicast_peer {\n ' + '}\n ' + 'virtual_ipaddress {\n ' + '1.2.3.4/32 dev if0\n ' + '}\n ' + 'track_script {\n ' + 'check_backend\n }\n' + '}\n' + } + } + + # check keepalived config + assert keepalived_generated_conf[0] == keepalived_expected_conf + + # generate the haproxy conf based on the specified spec + haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + haproxy_expected_conf = { + 'files': + { + 'haproxy.cfg': + '# This file is generated by cephadm.' + '\nglobal\n log ' + '127.0.0.1 local2\n ' + 'chroot /var/lib/haproxy\n ' + 'pidfile /var/lib/haproxy/haproxy.pid\n ' + 'maxconn 8000\n ' + 'daemon\n ' + 'stats socket /var/lib/haproxy/stats\n' + '\ndefaults\n ' + 'mode http\n ' + 'log global\n ' + 'option httplog\n ' + 'option dontlognull\n ' + 'option http-server-close\n ' + 'option forwardfor except 127.0.0.0/8\n ' + 'option redispatch\n ' + 'retries 3\n ' + 'timeout queue 20s\n ' + 'timeout connect 5s\n ' + 'timeout http-request 1s\n ' + 'timeout http-keep-alive 5s\n ' + 'timeout client 30s\n ' + 'timeout server 30s\n ' + 'timeout check 5s\n ' + 'maxconn 8000\n' + '\nfrontend stats\n ' + 'mode http\n ' + 'bind 1.2.3.4:8999\n ' + 'bind 1.2.3.7:8999\n ' + 'stats enable\n ' + 'stats uri /stats\n ' + 'stats refresh 10s\n ' + 'stats auth admin:12345\n ' + 'http-request use-service prometheus-exporter if { path /metrics }\n ' + 'monitor-uri /health\n' + '\nfrontend frontend\n ' + 'bind 1.2.3.4:8089\n ' + 'default_backend backend\n\n' + 'backend backend\n ' + 'option forwardfor\n ' + 'balance static-rr\n ' + 'option httpchk HEAD / HTTP/1.0\n ' + 'server ' + + haproxy_generated_conf[1][0] + ' 1.2.3.7:80 check weight 100\n' + } + } + + assert haproxy_generated_conf[0] == haproxy_expected_conf + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_ingress_config_ssl_rgw(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test'): + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + } + }) + + # the ingress backend + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), + rgw_frontend_type='beast', rgw_frontend_port=443, ssl=True) + + ispec = IngressSpec(service_type='ingress', + service_id='test', + backend_service='rgw.foo', + frontend_port=8089, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_interface_networks=['1.2.3.0/24'], + virtual_ip="1.2.3.4/32") + with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _: + # generate the keepalived conf based on the specified spec + keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + keepalived_expected_conf = { + 'files': + { + 'keepalived.conf': + '# This file is generated by cephadm.\n' + 'vrrp_script check_backend {\n ' + 'script "/usr/bin/curl http://[1::4]:8999/health"\n ' + 'weight -20\n ' + 'interval 2\n ' + 'rise 2\n ' + 'fall 2\n}\n\n' + 'vrrp_instance VI_0 {\n ' + 'state MASTER\n ' + 'priority 100\n ' + 'interface if0\n ' + 'virtual_router_id 50\n ' + 'advert_int 1\n ' + 'authentication {\n ' + 'auth_type PASS\n ' + 'auth_pass 12345\n ' + '}\n ' + 'unicast_src_ip 1.2.3.1\n ' + 'unicast_peer {\n ' + '}\n ' + 'virtual_ipaddress {\n ' + '1.2.3.4/32 dev if0\n ' + '}\n ' + 'track_script {\n ' + 'check_backend\n }\n' + '}\n' + } + } + + # check keepalived config + assert keepalived_generated_conf[0] == keepalived_expected_conf + + # generate the haproxy conf based on the specified spec + haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + haproxy_expected_conf = { + 'files': + { + 'haproxy.cfg': + '# This file is generated by cephadm.' + '\nglobal\n log ' + '127.0.0.1 local2\n ' + 'chroot /var/lib/haproxy\n ' + 'pidfile /var/lib/haproxy/haproxy.pid\n ' + 'maxconn 8000\n ' + 'daemon\n ' + 'stats socket /var/lib/haproxy/stats\n' + '\ndefaults\n ' + 'mode http\n ' + 'log global\n ' + 'option httplog\n ' + 'option dontlognull\n ' + 'option http-server-close\n ' + 'option forwardfor except 127.0.0.0/8\n ' + 'option redispatch\n ' + 'retries 3\n ' + 'timeout queue 20s\n ' + 'timeout connect 5s\n ' + 'timeout http-request 1s\n ' + 'timeout http-keep-alive 5s\n ' + 'timeout client 30s\n ' + 'timeout server 30s\n ' + 'timeout check 5s\n ' + 'maxconn 8000\n' + '\nfrontend stats\n ' + 'mode http\n ' + 'bind 1.2.3.4:8999\n ' + 'bind 1::4:8999\n ' + 'stats enable\n ' + 'stats uri /stats\n ' + 'stats refresh 10s\n ' + 'stats auth admin:12345\n ' + 'http-request use-service prometheus-exporter if { path /metrics }\n ' + 'monitor-uri /health\n' + '\nfrontend frontend\n ' + 'bind 1.2.3.4:8089\n ' + 'default_backend backend\n\n' + 'backend backend\n ' + 'option forwardfor\n ' + 'default-server ssl\n ' + 'default-server verify none\n ' + 'balance static-rr\n ' + 'option httpchk HEAD / HTTP/1.0\n ' + 'server ' + + haproxy_generated_conf[1][0] + ' 1::4:443 check weight 100\n' + } + } + + assert haproxy_generated_conf[0] == haproxy_expected_conf + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_ingress_config_multi_vips(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test', addr='1.2.3.7'): + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + } + }) + + # Check the ingress with multiple VIPs + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), + rgw_frontend_type='beast') + + ispec = IngressSpec(service_type='ingress', + service_id='test', + backend_service='rgw.foo', + frontend_port=8089, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_interface_networks=['1.2.3.0/24'], + virtual_ips_list=["1.2.3.4/32"]) + with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _: + # generate the keepalived conf based on the specified spec + # Test with only 1 IP on the list, as it will fail with more VIPS but only one host. + keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + keepalived_expected_conf = { + 'files': + { + 'keepalived.conf': + '# This file is generated by cephadm.\n' + 'vrrp_script check_backend {\n ' + 'script "/usr/bin/curl http://1.2.3.7:8999/health"\n ' + 'weight -20\n ' + 'interval 2\n ' + 'rise 2\n ' + 'fall 2\n}\n\n' + 'vrrp_instance VI_0 {\n ' + 'state MASTER\n ' + 'priority 100\n ' + 'interface if0\n ' + 'virtual_router_id 50\n ' + 'advert_int 1\n ' + 'authentication {\n ' + 'auth_type PASS\n ' + 'auth_pass 12345\n ' + '}\n ' + 'unicast_src_ip 1.2.3.1\n ' + 'unicast_peer {\n ' + '}\n ' + 'virtual_ipaddress {\n ' + '1.2.3.4/32 dev if0\n ' + '}\n ' + 'track_script {\n ' + 'check_backend\n }\n' + '}\n' + } + } + + # check keepalived config + assert keepalived_generated_conf[0] == keepalived_expected_conf + + # generate the haproxy conf based on the specified spec + haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + haproxy_expected_conf = { + 'files': + { + 'haproxy.cfg': + '# This file is generated by cephadm.' + '\nglobal\n log ' + '127.0.0.1 local2\n ' + 'chroot /var/lib/haproxy\n ' + 'pidfile /var/lib/haproxy/haproxy.pid\n ' + 'maxconn 8000\n ' + 'daemon\n ' + 'stats socket /var/lib/haproxy/stats\n' + '\ndefaults\n ' + 'mode http\n ' + 'log global\n ' + 'option httplog\n ' + 'option dontlognull\n ' + 'option http-server-close\n ' + 'option forwardfor except 127.0.0.0/8\n ' + 'option redispatch\n ' + 'retries 3\n ' + 'timeout queue 20s\n ' + 'timeout connect 5s\n ' + 'timeout http-request 1s\n ' + 'timeout http-keep-alive 5s\n ' + 'timeout client 30s\n ' + 'timeout server 30s\n ' + 'timeout check 5s\n ' + 'maxconn 8000\n' + '\nfrontend stats\n ' + 'mode http\n ' + 'bind *:8999\n ' + 'bind 1.2.3.7:8999\n ' + 'stats enable\n ' + 'stats uri /stats\n ' + 'stats refresh 10s\n ' + 'stats auth admin:12345\n ' + 'http-request use-service prometheus-exporter if { path /metrics }\n ' + 'monitor-uri /health\n' + '\nfrontend frontend\n ' + 'bind *:8089\n ' + 'default_backend backend\n\n' + 'backend backend\n ' + 'option forwardfor\n ' + 'balance static-rr\n ' + 'option httpchk HEAD / HTTP/1.0\n ' + 'server ' + + haproxy_generated_conf[1][0] + ' 1.2.3.7:80 check weight 100\n' + } + } + + assert haproxy_generated_conf[0] == haproxy_expected_conf + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_haproxy_port_ips(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test', addr='1.2.3.7'): + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.4/32'] + } + }) + + # Check the ingress with multiple VIPs + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), + rgw_frontend_type='beast') + + ip = '1.2.3.100' + frontend_port = 8089 + + ispec = IngressSpec(service_type='ingress', + service_id='test', + backend_service='rgw.foo', + frontend_port=frontend_port, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_ip=f"{ip}/24") + with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _: + # generate the haproxy conf based on the specified spec + haproxy_daemon_spec = cephadm_module.cephadm_services['ingress'].prepare_create( + CephadmDaemonDeploySpec( + host='test', + daemon_type='haproxy', + daemon_id='ingress', + service_name=ispec.service_name())) + + assert haproxy_daemon_spec.port_ips == {str(frontend_port): ip} + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_keepalive_config_multi_interface_vips(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test', addr='1.2.3.1'): + with with_host(cephadm_module, 'test2', addr='1.2.3.2'): + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + }, + '100.100.100.0/24': { + 'if1': ['100.100.100.1'] + } + }) + cephadm_module.cache.update_host_networks('test2', { + '1.2.3.0/24': { + 'if0': ['1.2.3.2'] + }, + '100.100.100.0/24': { + 'if1': ['100.100.100.2'] + } + }) + + # Check the ingress with multiple VIPs + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), + rgw_frontend_type='beast') + + ispec = IngressSpec(service_type='ingress', + service_id='test', + placement=PlacementSpec(hosts=['test', 'test2']), + backend_service='rgw.foo', + frontend_port=8089, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_ips_list=["1.2.3.100/24", "100.100.100.100/24"]) + with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _: + keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + keepalived_expected_conf = { + 'files': + { + 'keepalived.conf': + '# This file is generated by cephadm.\n' + 'vrrp_script check_backend {\n ' + 'script "/usr/bin/curl http://1.2.3.1:8999/health"\n ' + 'weight -20\n ' + 'interval 2\n ' + 'rise 2\n ' + 'fall 2\n}\n\n' + 'vrrp_instance VI_0 {\n ' + 'state MASTER\n ' + 'priority 100\n ' + 'interface if0\n ' + 'virtual_router_id 50\n ' + 'advert_int 1\n ' + 'authentication {\n ' + 'auth_type PASS\n ' + 'auth_pass 12345\n ' + '}\n ' + 'unicast_src_ip 1.2.3.1\n ' + 'unicast_peer {\n ' + '1.2.3.2\n ' + '}\n ' + 'virtual_ipaddress {\n ' + '1.2.3.100/24 dev if0\n ' + '}\n ' + 'track_script {\n ' + 'check_backend\n }\n' + '}\n' + 'vrrp_instance VI_1 {\n ' + 'state BACKUP\n ' + 'priority 90\n ' + 'interface if1\n ' + 'virtual_router_id 51\n ' + 'advert_int 1\n ' + 'authentication {\n ' + 'auth_type PASS\n ' + 'auth_pass 12345\n ' + '}\n ' + 'unicast_src_ip 100.100.100.1\n ' + 'unicast_peer {\n ' + '100.100.100.2\n ' + '}\n ' + 'virtual_ipaddress {\n ' + '100.100.100.100/24 dev if1\n ' + '}\n ' + 'track_script {\n ' + 'check_backend\n }\n' + '}\n' + } + } + + # check keepalived config + assert keepalived_generated_conf[0] == keepalived_expected_conf + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_keepalive_interface_host_filtering(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + # we need to make sure keepalive daemons will have an interface + # on the hosts we deploy them on in order to set up their VIP. + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test', addr='1.2.3.1'): + with with_host(cephadm_module, 'test2', addr='1.2.3.2'): + with with_host(cephadm_module, 'test3', addr='1.2.3.3'): + with with_host(cephadm_module, 'test4', addr='1.2.3.3'): + # setup "test" and "test4" to have all the necessary interfaces, + # "test2" to have one of them (should still be filtered) + # and "test3" to have none of them + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + }, + '100.100.100.0/24': { + 'if1': ['100.100.100.1'] + } + }) + cephadm_module.cache.update_host_networks('test2', { + '1.2.3.0/24': { + 'if0': ['1.2.3.2'] + }, + }) + cephadm_module.cache.update_host_networks('test4', { + '1.2.3.0/24': { + 'if0': ['1.2.3.4'] + }, + '100.100.100.0/24': { + 'if1': ['100.100.100.4'] + } + }) + + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), + rgw_frontend_type='beast') + + ispec = IngressSpec(service_type='ingress', + service_id='test', + placement=PlacementSpec(hosts=['test', 'test2', 'test3', 'test4']), + backend_service='rgw.foo', + frontend_port=8089, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_ips_list=["1.2.3.100/24", "100.100.100.100/24"]) + with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _: + # since we're never actually going to refresh the host here, + # check the tmp daemons to see what was placed during the apply + daemons = cephadm_module.cache._get_tmp_daemons() + keepalive_daemons = [d for d in daemons if d.daemon_type == 'keepalived'] + hosts_deployed_on = [d.hostname for d in keepalive_daemons] + assert 'test' in hosts_deployed_on + assert 'test2' not in hosts_deployed_on + assert 'test3' not in hosts_deployed_on + assert 'test4' in hosts_deployed_on + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("cephadm.services.nfs.NFSService.fence_old_ranks", MagicMock()) + @patch("cephadm.services.nfs.NFSService.run_grace_tool", MagicMock()) + @patch("cephadm.services.nfs.NFSService.purge", MagicMock()) + @patch("cephadm.services.nfs.NFSService.create_rados_config_obj", MagicMock()) + def test_keepalive_only_nfs_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test', addr='1.2.3.7'): + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + } + }) + + # Check the ingress with multiple VIPs + s = NFSServiceSpec(service_id="foo", placement=PlacementSpec(count=1), + virtual_ip='1.2.3.0/24') + + ispec = IngressSpec(service_type='ingress', + service_id='test', + backend_service='nfs.foo', + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_ip='1.2.3.0/24', + keepalive_only=True) + with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _: + nfs_generated_conf, _ = cephadm_module.cephadm_services['nfs'].generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='foo.test.0.0', service_name=s.service_name())) + ganesha_conf = nfs_generated_conf['files']['ganesha.conf'] + assert "Bind_addr = 1.2.3.0/24" in ganesha_conf + + keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + keepalived_expected_conf = { + 'files': + { + 'keepalived.conf': + '# This file is generated by cephadm.\n' + 'vrrp_script check_backend {\n ' + 'script "/usr/bin/false"\n ' + 'weight -20\n ' + 'interval 2\n ' + 'rise 2\n ' + 'fall 2\n}\n\n' + 'vrrp_instance VI_0 {\n ' + 'state MASTER\n ' + 'priority 100\n ' + 'interface if0\n ' + 'virtual_router_id 50\n ' + 'advert_int 1\n ' + 'authentication {\n ' + 'auth_type PASS\n ' + 'auth_pass 12345\n ' + '}\n ' + 'unicast_src_ip 1.2.3.1\n ' + 'unicast_peer {\n ' + '}\n ' + 'virtual_ipaddress {\n ' + '1.2.3.0/24 dev if0\n ' + '}\n ' + 'track_script {\n ' + 'check_backend\n }\n' + '}\n' + } + } + + # check keepalived config + assert keepalived_generated_conf[0] == keepalived_expected_conf + + @patch("cephadm.services.nfs.NFSService.fence_old_ranks", MagicMock()) + @patch("cephadm.services.nfs.NFSService.run_grace_tool", MagicMock()) + @patch("cephadm.services.nfs.NFSService.purge", MagicMock()) + @patch("cephadm.services.nfs.NFSService.create_rados_config_obj", MagicMock()) + @patch("cephadm.inventory.Inventory.keys") + @patch("cephadm.inventory.Inventory.get_addr") + @patch("cephadm.utils.resolve_ip") + @patch("cephadm.inventory.HostCache.get_daemons_by_service") + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_ingress_config_nfs_proxy_protocol( + self, + _run_cephadm, + _get_daemons_by_service, + _resolve_ip, + _get_addr, + _inventory_keys, + cephadm_module: CephadmOrchestrator, + ): + """Verify that setting enable_haproxy_protocol for both ingress and + nfs services sets the desired configuration parameters in both + the haproxy config and nfs ganesha config. + """ + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + def fake_resolve_ip(hostname: str) -> str: + if hostname in ('host1', "192.168.122.111"): + return '192.168.122.111' + elif hostname in ('host2', '192.168.122.222'): + return '192.168.122.222' + else: + raise KeyError(hostname) + _resolve_ip.side_effect = fake_resolve_ip + _get_addr.side_effect = fake_resolve_ip + + def fake_keys(): + return ['host1', 'host2'] + _inventory_keys.side_effect = fake_keys + + nfs_service = NFSServiceSpec( + service_id="foo", + placement=PlacementSpec( + count=1, + hosts=['host1', 'host2']), + port=12049, + enable_haproxy_protocol=True, + ) + + ispec = IngressSpec( + service_type='ingress', + service_id='nfs.foo', + backend_service='nfs.foo', + frontend_port=2049, + monitor_port=9049, + virtual_ip='192.168.122.100/24', + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + enable_haproxy_protocol=True, + ) + + cephadm_module.spec_store._specs = { + 'nfs.foo': nfs_service, + 'ingress.nfs.foo': ispec + } + cephadm_module.spec_store.spec_created = { + 'nfs.foo': datetime_now(), + 'ingress.nfs.foo': datetime_now() + } + + haproxy_txt = ( + '# This file is generated by cephadm.\n' + 'global\n' + ' log 127.0.0.1 local2\n' + ' chroot /var/lib/haproxy\n' + ' pidfile /var/lib/haproxy/haproxy.pid\n' + ' maxconn 8000\n' + ' daemon\n' + ' stats socket /var/lib/haproxy/stats\n\n' + 'defaults\n' + ' mode tcp\n' + ' log global\n' + ' timeout queue 1m\n' + ' timeout connect 10s\n' + ' timeout client 1m\n' + ' timeout server 1m\n' + ' timeout check 10s\n' + ' maxconn 8000\n\n' + 'frontend stats\n' + ' mode http\n' + ' bind 192.168.122.100:9049\n' + ' bind 192.168.122.111:9049\n' + ' stats enable\n' + ' stats uri /stats\n' + ' stats refresh 10s\n' + ' stats auth admin:12345\n' + ' http-request use-service prometheus-exporter if { path /metrics }\n' + ' monitor-uri /health\n\n' + 'frontend frontend\n' + ' bind 192.168.122.100:2049\n' + ' default_backend backend\n\n' + 'backend backend\n' + ' mode tcp\n' + ' balance source\n' + ' hash-type consistent\n' + ' default-server send-proxy-v2\n' + ' server nfs.foo.0 192.168.122.111:12049\n' + ) + haproxy_expected_conf = { + 'files': {'haproxy.cfg': haproxy_txt} + } + + nfs_ganesha_txt = ( + "# This file is generated by cephadm.\n" + 'NFS_CORE_PARAM {\n' + ' Enable_NLM = false;\n' + ' Enable_RQUOTA = false;\n' + ' Protocols = 4;\n' + ' NFS_Port = 2049;\n' + ' HAProxy_Hosts = 192.168.122.111, 10.10.2.20, 192.168.122.222;\n' + '}\n' + '\n' + 'NFSv4 {\n' + ' Delegations = false;\n' + " RecoveryBackend = 'rados_cluster';\n" + ' Minor_Versions = 1, 2;\n' + '}\n' + '\n' + 'RADOS_KV {\n' + ' UserId = "nfs.foo.test.0.0";\n' + ' nodeid = "nfs.foo.None";\n' + ' pool = ".nfs";\n' + ' namespace = "foo";\n' + '}\n' + '\n' + 'RADOS_URLS {\n' + ' UserId = "nfs.foo.test.0.0";\n' + ' watch_url = ' + '"rados://.nfs/foo/conf-nfs.foo";\n' + '}\n' + '\n' + 'RGW {\n' + ' cluster = "ceph";\n' + ' name = "client.nfs.foo.test.0.0-rgw";\n' + '}\n' + '\n' + "%url rados://.nfs/foo/conf-nfs.foo" + ) + nfs_expected_conf = { + 'files': {'ganesha.conf': nfs_ganesha_txt}, + 'config': '', + 'extra_args': ['-N', 'NIV_EVENT'], + 'keyring': ( + '[client.nfs.foo.test.0.0]\n' + 'key = None\n' + ), + 'namespace': 'foo', + 'pool': '.nfs', + 'rgw': { + 'cluster': 'ceph', + 'keyring': ( + '[client.nfs.foo.test.0.0-rgw]\n' + 'key = None\n' + ), + 'user': 'nfs.foo.test.0.0-rgw', + }, + 'userid': 'nfs.foo.test.0.0', + } + + nfs_daemons = [ + DaemonDescription( + daemon_type='nfs', + daemon_id='foo.0.1.host1.qwerty', + hostname='host1', + rank=0, + rank_generation=1, + ports=[12049], + ), + DaemonDescription( + daemon_type='nfs', + daemon_id='foo.0.0.host2.abcdef', + hostname='host2', + rank=0, + rank_generation=0, + ports=[12049], + ), + ] + _get_daemons_by_service.return_value = nfs_daemons + + ingress_svc = cephadm_module.cephadm_services['ingress'] + nfs_svc = cephadm_module.cephadm_services['nfs'] + + # add host network info to one host to test the behavior of + # adding all known-good addresses of the host to the list. + cephadm_module.cache.update_host_networks('host1', { + # this one is additional + '10.10.2.0/24': { + 'eth1': ['10.10.2.20'] + }, + # this is redundant and will be skipped + '192.168.122.0/24': { + 'eth0': ['192.168.122.111'] + }, + # this is a link-local address and will be ignored + "fe80::/64": { + "veth0": [ + "fe80::8cf5:25ff:fe1c:d963" + ], + "eth0": [ + "fe80::c7b:cbff:fef6:7370" + ], + "eth1": [ + "fe80::7201:25a7:390b:d9a7" + ] + }, + }) + + haproxy_generated_conf, _ = ingress_svc.haproxy_generate_config( + CephadmDaemonDeploySpec( + host='host1', + daemon_id='ingress', + service_name=ispec.service_name(), + ), + ) + assert haproxy_generated_conf == haproxy_expected_conf + + nfs_generated_conf, _ = nfs_svc.generate_config( + CephadmDaemonDeploySpec( + host='test', + daemon_id='foo.test.0.0', + service_name=nfs_service.service_name(), + ), + ) + assert nfs_generated_conf == nfs_expected_conf + + +class TestCephFsMirror: + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec('cephfs-mirror')): + cephadm_module.assert_issued_mon_command({ + 'prefix': 'mgr module enable', + 'module': 'mirroring' + }) + + +class TestJaeger: + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_jaeger_query(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + spec = TracingSpec(es_nodes="192.168.0.1:9200", + service_type="jaeger-query") + + config = {"elasticsearch_nodes": "http://192.168.0.1:9200"} + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec): + _run_cephadm.assert_called_with( + 'test', + "jaeger-query.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'jaeger-query.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [16686], + }, + "meta": { + 'service_name': 'jaeger-query', + 'ports': [16686], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": config, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_jaeger_collector_es_deploy(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + collector_spec = TracingSpec(service_type="jaeger-collector") + es_spec = TracingSpec(service_type="elasticsearch") + es_config = {} + + with with_host(cephadm_module, 'test'): + collector_config = { + "elasticsearch_nodes": f'http://{build_url(host=cephadm_module.inventory.get_addr("test"), port=9200).lstrip("/")}'} + with with_service(cephadm_module, es_spec): + _run_cephadm.assert_called_with( + "test", + "elasticsearch.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'elasticsearch.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9200], + }, + "meta": { + 'service_name': 'elasticsearch', + 'ports': [9200], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": es_config, + }), + ) + with with_service(cephadm_module, collector_spec): + _run_cephadm.assert_called_with( + "test", + "jaeger-collector.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'jaeger-collector.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [14250], + }, + "meta": { + 'service_name': 'jaeger-collector', + 'ports': [14250], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": collector_config, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_jaeger_agent(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + collector_spec = TracingSpec(service_type="jaeger-collector", es_nodes="192.168.0.1:9200") + collector_config = {"elasticsearch_nodes": "http://192.168.0.1:9200"} + + agent_spec = TracingSpec(service_type="jaeger-agent") + agent_config = {"collector_nodes": "test:14250"} + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, collector_spec): + _run_cephadm.assert_called_with( + "test", + "jaeger-collector.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'jaeger-collector.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [14250], + }, + "meta": { + 'service_name': 'jaeger-collector', + 'ports': [14250], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": collector_config, + }), + ) + with with_service(cephadm_module, agent_spec): + _run_cephadm.assert_called_with( + "test", + "jaeger-agent.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'jaeger-agent.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [6799], + }, + "meta": { + 'service_name': 'jaeger-agent', + 'ports': [6799], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": agent_config, + }), + ) diff --git a/src/pybind/mgr/cephadm/tests/test_spec.py b/src/pybind/mgr/cephadm/tests/test_spec.py new file mode 100644 index 000000000..78a2d7311 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_spec.py @@ -0,0 +1,590 @@ +# Disable autopep8 for this file: + +# fmt: off + +import json + +import pytest + +from ceph.deployment.service_spec import ServiceSpec, NFSServiceSpec, RGWSpec, \ + IscsiServiceSpec, HostPlacementSpec, CustomContainerSpec +from orchestrator import DaemonDescription, OrchestratorError + + +@pytest.mark.parametrize( + "spec_json", + json.loads("""[ +{ + "placement": { + "count": 1 + }, + "service_type": "alertmanager" +}, +{ + "placement": { + "host_pattern": "*" + }, + "service_type": "crash" +}, +{ + "placement": { + "count": 1 + }, + "service_type": "grafana", + "protocol": "https" +}, +{ + "placement": { + "count": 2 + }, + "service_type": "mgr" +}, +{ + "placement": { + "count": 5 + }, + "service_type": "mon" +}, +{ + "placement": { + "host_pattern": "*" + }, + "service_type": "node-exporter" +}, +{ + "placement": { + "count": 1 + }, + "service_type": "prometheus" +}, +{ + "placement": { + "hosts": [ + { + "hostname": "ceph-001", + "network": "", + "name": "" + } + ] + }, + "service_type": "rgw", + "service_id": "default-rgw-realm.eu-central-1.1", + "rgw_realm": "default-rgw-realm", + "rgw_zone": "eu-central-1" +}, +{ + "service_type": "osd", + "service_id": "osd_spec_default", + "placement": { + "host_pattern": "*" + }, + "data_devices": { + "model": "MC-55-44-XZ" + }, + "db_devices": { + "model": "SSD-123-foo" + }, + "wal_devices": { + "model": "NVME-QQQQ-987" + } +} +] +""") +) +def test_spec_octopus(spec_json): + # https://tracker.ceph.com/issues/44934 + # Those are real user data from early octopus. + # Please do not modify those JSON values. + + spec = ServiceSpec.from_json(spec_json) + + # just some verification that we can sill read old octopus specs + def convert_to_old_style_json(j): + j_c = dict(j.copy()) + j_c.pop('service_name', None) + if 'spec' in j_c: + spec = j_c.pop('spec') + j_c.update(spec) + if 'placement' in j_c: + if 'hosts' in j_c['placement']: + j_c['placement']['hosts'] = [ + { + 'hostname': HostPlacementSpec.parse(h).hostname, + 'network': HostPlacementSpec.parse(h).network, + 'name': HostPlacementSpec.parse(h).name + } + for h in j_c['placement']['hosts'] + ] + j_c.pop('objectstore', None) + j_c.pop('filter_logic', None) + j_c.pop('anonymous_access', None) + return j_c + + assert spec_json == convert_to_old_style_json(spec.to_json()) + + +@pytest.mark.parametrize( + "dd_json", + json.loads("""[ + { + "hostname": "ceph-001", + "container_id": "d94d7969094d", + "container_image_id": "0881eb8f169f5556a292b4e2c01d683172b12830a62a9225a98a8e206bb734f0", + "container_image_name": "docker.io/prom/alertmanager:latest", + "daemon_id": "ceph-001", + "daemon_type": "alertmanager", + "version": "0.20.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725856", + "created": "2020-04-02T19:23:08.829543", + "started": "2020-04-03T07:29:16.932838", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "c4b036202241", + "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1", + "container_image_name": "docker.io/ceph/ceph:v15", + "daemon_id": "ceph-001", + "daemon_type": "crash", + "version": "15.2.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725903", + "created": "2020-04-02T19:23:11.390694", + "started": "2020-04-03T07:29:16.910897", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "5b7b94b48f31", + "container_image_id": "87a51ecf0b1c9a7b187b21c1b071425dafea0d765a96d5bc371c791169b3d7f4", + "container_image_name": "docker.io/ceph/ceph-grafana:latest", + "daemon_id": "ceph-001", + "daemon_type": "grafana", + "version": "6.6.2", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725950", + "created": "2020-04-02T19:23:52.025088", + "started": "2020-04-03T07:29:16.847972", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "9ca007280456", + "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1", + "container_image_name": "docker.io/ceph/ceph:v15", + "daemon_id": "ceph-001.gkjwqp", + "daemon_type": "mgr", + "version": "15.2.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725807", + "created": "2020-04-02T19:22:18.648584", + "started": "2020-04-03T07:29:16.856153", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "3d1ba9a2b697", + "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1", + "container_image_name": "docker.io/ceph/ceph:v15", + "daemon_id": "ceph-001", + "daemon_type": "mon", + "version": "15.2.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725715", + "created": "2020-04-02T19:22:13.863300", + "started": "2020-04-03T07:29:17.206024", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "36d026c68ba1", + "container_image_id": "e5a616e4b9cf68dfcad7782b78e118be4310022e874d52da85c55923fb615f87", + "container_image_name": "docker.io/prom/node-exporter:latest", + "daemon_id": "ceph-001", + "daemon_type": "node-exporter", + "version": "0.18.1", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725996", + "created": "2020-04-02T19:23:53.880197", + "started": "2020-04-03T07:29:16.880044", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "faf76193cbfe", + "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1", + "container_image_name": "docker.io/ceph/ceph:v15", + "daemon_id": "0", + "daemon_type": "osd", + "version": "15.2.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.726088", + "created": "2020-04-02T20:35:02.991435", + "started": "2020-04-03T07:29:19.373956", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "f82505bae0f1", + "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1", + "container_image_name": "docker.io/ceph/ceph:v15", + "daemon_id": "1", + "daemon_type": "osd", + "version": "15.2.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.726134", + "created": "2020-04-02T20:35:17.142272", + "started": "2020-04-03T07:29:19.374002", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "2708d84cd484", + "container_image_id": "358a0d2395fe711bb8258e8fb4b2d7865c0a9a6463969bcd1452ee8869ea6653", + "container_image_name": "docker.io/prom/prometheus:latest", + "daemon_id": "ceph-001", + "daemon_type": "prometheus", + "version": "2.17.1", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.726042", + "created": "2020-04-02T19:24:10.281163", + "started": "2020-04-03T07:29:16.926292", + "is_active": false + }, + { + "hostname": "ceph-001", + "daemon_id": "default-rgw-realm.eu-central-1.1.ceph-001.ytywjo", + "daemon_type": "rgw", + "status": 1, + "status_desc": "starting", + "is_active": false + } +]""") +) +def test_dd_octopus(dd_json): + # https://tracker.ceph.com/issues/44934 + # Those are real user data from early octopus. + # Please do not modify those JSON values. + + # Convert datetime properties to old style. + # 2020-04-03T07:29:16.926292Z -> 2020-04-03T07:29:16.926292 + def convert_to_old_style_json(j): + for k in ['last_refresh', 'created', 'started', 'last_deployed', + 'last_configured']: + if k in j: + j[k] = j[k].rstrip('Z') + del j['daemon_name'] + return j + + assert dd_json == convert_to_old_style_json( + DaemonDescription.from_json(dd_json).to_json()) + + +@pytest.mark.parametrize("spec,dd,valid", +[ # noqa: E128 + # https://tracker.ceph.com/issues/44934 + ( + RGWSpec( + service_id="foo", + rgw_realm="default-rgw-realm", + rgw_zone="eu-central-1", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="foo.ceph-001.ytywjo", + hostname="ceph-001", + ), + True + ), + ( + # no realm + RGWSpec( + service_id="foo.bar", + rgw_zone="eu-central-1", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="foo.bar.ceph-001.ytywjo", + hostname="ceph-001", + ), + True + ), + ( + # no realm or zone + RGWSpec( + service_id="bar", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="bar.host.domain.tld.ytywjo", + hostname="host.domain.tld", + ), + True + ), + ( + # explicit naming + RGWSpec( + service_id="realm.zone", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="realm.zone.a", + hostname="smithi028", + ), + True + ), + ( + # without host + RGWSpec( + service_type='rgw', + service_id="foo", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="foo.hostname.ytywjo", + hostname=None, + ), + False + ), + ( + # without host (2) + RGWSpec( + service_type='rgw', + service_id="default-rgw-realm.eu-central-1.1", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="default-rgw-realm.eu-central-1.1.hostname.ytywjo", + hostname=None, + ), + False + ), + ( + # service_id contains hostname + # (sort of) https://tracker.ceph.com/issues/45294 + RGWSpec( + service_id="default.rgw.realm.ceph.001", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="default.rgw.realm.ceph.001.ceph.001.ytywjo", + hostname="ceph.001", + ), + True + ), + + # https://tracker.ceph.com/issues/45293 + ( + ServiceSpec( + service_type='mds', + service_id="a", + ), + DaemonDescription( + daemon_type='mds', + daemon_id="a.host1.abc123", + hostname="host1", + ), + True + ), + ( + # '.' char in service_id + ServiceSpec( + service_type='mds', + service_id="a.b.c", + ), + DaemonDescription( + daemon_type='mds', + daemon_id="a.b.c.host1.abc123", + hostname="host1", + ), + True + ), + + # https://tracker.ceph.com/issues/45617 + ( + # daemon_id does not contain hostname + ServiceSpec( + service_type='mds', + service_id="a", + ), + DaemonDescription( + daemon_type='mds', + daemon_id="a", + hostname="host1", + ), + True + ), + ( + # daemon_id only contains hostname + ServiceSpec( + service_type='mds', + service_id="host1", + ), + DaemonDescription( + daemon_type='mds', + daemon_id="host1", + hostname="host1", + ), + True + ), + + # https://tracker.ceph.com/issues/45399 + ( + # daemon_id only contains hostname + ServiceSpec( + service_type='mds', + service_id="a", + ), + DaemonDescription( + daemon_type='mds', + daemon_id="a.host1.abc123", + hostname="host1.site", + ), + True + ), + ( + NFSServiceSpec( + service_id="a", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="a.host1", + hostname="host1.site", + ), + True + ), + + # https://tracker.ceph.com/issues/45293 + ( + NFSServiceSpec( + service_id="a", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="a.host1", + hostname="host1", + ), + True + ), + ( + # service_id contains a '.' char + NFSServiceSpec( + service_id="a.b.c", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="a.b.c.host1", + hostname="host1", + ), + True + ), + ( + # trailing chars after hostname + NFSServiceSpec( + service_id="a.b.c", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="a.b.c.host1.abc123", + hostname="host1", + ), + True + ), + ( + # chars after hostname without '.' + NFSServiceSpec( + service_id="a", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="a.host1abc123", + hostname="host1", + ), + False + ), + ( + # chars before hostname without '.' + NFSServiceSpec( + service_id="a", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="ahost1.abc123", + hostname="host1", + ), + False + ), + + # https://tracker.ceph.com/issues/45293 + ( + IscsiServiceSpec( + service_type='iscsi', + service_id="a", + ), + DaemonDescription( + daemon_type='iscsi', + daemon_id="a.host1.abc123", + hostname="host1", + ), + True + ), + ( + # '.' char in service_id + IscsiServiceSpec( + service_type='iscsi', + service_id="a.b.c", + ), + DaemonDescription( + daemon_type='iscsi', + daemon_id="a.b.c.host1.abc123", + hostname="host1", + ), + True + ), + ( + # fixed daemon id for teuthology. + IscsiServiceSpec( + service_type='iscsi', + service_id='iscsi', + ), + DaemonDescription( + daemon_type='iscsi', + daemon_id="iscsi.a", + hostname="host1", + ), + True + ), + + ( + CustomContainerSpec( + service_type='container', + service_id='hello-world', + image='docker.io/library/hello-world:latest', + ), + DaemonDescription( + daemon_type='container', + daemon_id='hello-world.mgr0', + hostname='mgr0', + ), + True + ), + +]) +def test_daemon_description_service_name(spec: ServiceSpec, + dd: DaemonDescription, + valid: bool): + if valid: + assert spec.service_name() == dd.service_name() + else: + with pytest.raises(OrchestratorError): + dd.service_name() diff --git a/src/pybind/mgr/cephadm/tests/test_ssh.py b/src/pybind/mgr/cephadm/tests/test_ssh.py new file mode 100644 index 000000000..29f01b6c7 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_ssh.py @@ -0,0 +1,105 @@ +import asyncssh +from asyncssh.process import SSHCompletedProcess +from unittest import mock +try: + # AsyncMock was not added until python 3.8 + from unittest.mock import AsyncMock +except ImportError: + from asyncmock import AsyncMock +except ImportError: + AsyncMock = None +import pytest + + +try: + from asyncssh.misc import ConnectionLost +except ImportError: + ConnectionLost = None + +from ceph.deployment.hostspec import HostSpec + +from cephadm import CephadmOrchestrator +from cephadm.serve import CephadmServe +from cephadm.tests.fixtures import with_host, wait, async_side_effect +from orchestrator import OrchestratorError + + +@pytest.mark.skipif(ConnectionLost is None, reason='no asyncssh') +class TestWithSSH: + @mock.patch("cephadm.ssh.SSHManager._execute_command") + @mock.patch("cephadm.ssh.SSHManager._check_execute_command") + def test_offline(self, check_execute_command, execute_command, cephadm_module): + check_execute_command.side_effect = async_side_effect('') + execute_command.side_effect = async_side_effect(('', '', 0)) + + if not AsyncMock: + # can't run this test if we could not import AsyncMock + return + mock_connect = AsyncMock(return_value='') + with mock.patch("asyncssh.connect", new=mock_connect) as asyncssh_connect: + with with_host(cephadm_module, 'test'): + asyncssh_connect.side_effect = ConnectionLost('reason') + code, out, err = cephadm_module.check_host('test') + assert out == '' + assert "Failed to connect to test at address (1::4)" in err + + out = wait(cephadm_module, cephadm_module.get_hosts())[0].to_json() + assert out == HostSpec('test', '1::4', status='Offline').to_json() + + asyncssh_connect.return_value = mock.MagicMock() + asyncssh_connect.side_effect = None + assert CephadmServe(cephadm_module)._check_host('test') is None + out = wait(cephadm_module, cephadm_module.get_hosts())[0].to_json() + assert out == HostSpec('test', '1::4').to_json() + + def test_ssh_remote_cmds_execution(self, cephadm_module): + + if not AsyncMock: + # can't run this test if we could not import AsyncMock + return + + class FakeConn: + def __init__(self, exception=None, returncode=0): + self.exception = exception + self.returncode = returncode + + async def run(self, *args, **kwargs): + if self.exception: + raise self.exception + else: + return SSHCompletedProcess(returncode=self.returncode, stdout="", stderr="") + + async def close(self): + pass + + def run_test(host, conn, expected_error): + mock_connect = AsyncMock(return_value=conn) + with pytest.raises(OrchestratorError, match=expected_error): + with mock.patch("asyncssh.connect", new=mock_connect): + with with_host(cephadm_module, host): + CephadmServe(cephadm_module)._check_host(host) + + # Test case 1: command failure + run_test('test1', FakeConn(returncode=1), "Command .+ failed") + + # Test case 2: connection error + run_test('test2', FakeConn(exception=asyncssh.ChannelOpenError(1, "", "")), "Unable to reach remote host test2.") + + # Test case 3: asyncssh ProcessError + stderr = "my-process-stderr" + run_test('test3', FakeConn(exception=asyncssh.ProcessError(returncode=3, + env="", + command="", + subsystem="", + exit_status="", + exit_signal="", + stderr=stderr, + stdout="")), f"Cannot execute the command.+{stderr}") + # Test case 4: generic error + run_test('test4', FakeConn(exception=Exception), "Generic error while executing command.+") + + +@pytest.mark.skipif(ConnectionLost is not None, reason='asyncssh') +class TestWithoutSSH: + def test_can_run(self, cephadm_module: CephadmOrchestrator): + assert cephadm_module.can_run() == (False, "loading asyncssh library:No module named 'asyncssh'") diff --git a/src/pybind/mgr/cephadm/tests/test_template.py b/src/pybind/mgr/cephadm/tests/test_template.py new file mode 100644 index 000000000..f67304348 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_template.py @@ -0,0 +1,33 @@ +import pathlib + +import pytest + +from cephadm.template import TemplateMgr, UndefinedError, TemplateNotFoundError + + +def test_render(cephadm_module, fs): + template_base = (pathlib.Path(__file__).parent / '../templates').resolve() + fake_template = template_base / 'foo/bar' + fs.create_file(fake_template, contents='{{ cephadm_managed }}{{ var }}') + + template_mgr = TemplateMgr(cephadm_module) + value = 'test' + + # with base context + expected_text = '{}{}'.format(template_mgr.base_context['cephadm_managed'], value) + assert template_mgr.render('foo/bar', {'var': value}) == expected_text + + # without base context + with pytest.raises(UndefinedError): + template_mgr.render('foo/bar', {'var': value}, managed_context=False) + + # override the base context + context = { + 'cephadm_managed': 'abc', + 'var': value + } + assert template_mgr.render('foo/bar', context) == 'abc{}'.format(value) + + # template not found + with pytest.raises(TemplateNotFoundError): + template_mgr.render('foo/bar/2', {}) diff --git a/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py b/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py new file mode 100644 index 000000000..66feaee31 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py @@ -0,0 +1,256 @@ +import pytest +import json +from tests import mock +from cephadm.tuned_profiles import TunedProfileUtils, SYSCTL_DIR +from cephadm.inventory import TunedProfileStore +from ceph.utils import datetime_now +from ceph.deployment.service_spec import TunedProfileSpec, PlacementSpec +from cephadm.ssh import SSHManager +from orchestrator import HostSpec + +from typing import List, Dict + + +class SaveError(Exception): + pass + + +class FakeCache: + def __init__(self, + hosts, + schedulable_hosts, + unreachable_hosts): + self.hosts = hosts + self.unreachable_hosts = [HostSpec(h) for h in unreachable_hosts] + self.schedulable_hosts = [HostSpec(h) for h in schedulable_hosts] + self.last_tuned_profile_update = {} + + def get_hosts(self): + return self.hosts + + def get_schedulable_hosts(self): + return self.schedulable_hosts + + def get_unreachable_hosts(self): + return self.unreachable_hosts + + def get_draining_hosts(self): + return [] + + def is_host_unreachable(self, hostname: str): + return hostname in [h.hostname for h in self.get_unreachable_hosts()] + + def is_host_schedulable(self, hostname: str): + return hostname in [h.hostname for h in self.get_schedulable_hosts()] + + def is_host_draining(self, hostname: str): + return hostname in [h.hostname for h in self.get_draining_hosts()] + + @property + def networks(self): + return {h: {'a': {'b': ['c']}} for h in self.hosts} + + def host_needs_tuned_profile_update(self, host, profile_name): + return profile_name == 'p2' + + +class FakeMgr: + def __init__(self, + hosts: List[str], + schedulable_hosts: List[str], + unreachable_hosts: List[str], + profiles: Dict[str, TunedProfileSpec]): + self.cache = FakeCache(hosts, schedulable_hosts, unreachable_hosts) + self.tuned_profiles = TunedProfileStore(self) + self.tuned_profiles.profiles = profiles + self.ssh = SSHManager(self) + self.offline_hosts = [] + self.log_refresh_metadata = False + + def set_store(self, what: str, value: str): + raise SaveError(f'{what}: {value}') + + def get_store(self, what: str): + if what == 'tuned_profiles': + return json.dumps({'x': TunedProfileSpec('x', + PlacementSpec(hosts=['x']), + {'x': 'x'}).to_json(), + 'y': TunedProfileSpec('y', + PlacementSpec(hosts=['y']), + {'y': 'y'}).to_json()}) + return '' + + +class TestTunedProfiles: + tspec1 = TunedProfileSpec('p1', + PlacementSpec(hosts=['a', 'b', 'c']), + {'setting1': 'value1', + 'setting2': 'value2', + 'setting with space': 'value with space'}) + tspec2 = TunedProfileSpec('p2', + PlacementSpec(hosts=['a', 'c']), + {'something': 'something_else', + 'high': '5'}) + tspec3 = TunedProfileSpec('p3', + PlacementSpec(hosts=['c']), + {'wow': 'wow2', + 'setting with space': 'value with space', + 'down': 'low'}) + + def profiles_to_calls(self, tp: TunedProfileUtils, profiles: List[TunedProfileSpec]) -> List[Dict[str, str]]: + # this function takes a list of tuned profiles and returns a mapping from + # profile names to the string that will be written to the actual config file on the host. + res = [] + for p in profiles: + p_str = tp._profile_to_str(p) + res.append({p.profile_name: p_str}) + return res + + @mock.patch("cephadm.tuned_profiles.TunedProfileUtils._remove_stray_tuned_profiles") + @mock.patch("cephadm.tuned_profiles.TunedProfileUtils._write_tuned_profiles") + def test_write_all_tuned_profiles(self, _write_profiles, _rm_profiles): + profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3} + mgr = FakeMgr(['a', 'b', 'c'], + ['a', 'b', 'c'], + [], + profiles) + tp = TunedProfileUtils(mgr) + tp._write_all_tuned_profiles() + # need to check that _write_tuned_profiles is correctly called with the + # profiles that match the tuned profile placements and with the correct + # strings that should be generated from the settings the profiles have. + # the _profiles_to_calls helper allows us to generated the input we + # should check against + calls = [ + mock.call('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2])), + mock.call('b', self.profiles_to_calls(tp, [self.tspec1])), + mock.call('c', self.profiles_to_calls(tp, [self.tspec1, self.tspec2, self.tspec3])) + ] + _write_profiles.assert_has_calls(calls, any_order=True) + + @mock.patch('cephadm.ssh.SSHManager.check_execute_command') + def test_rm_stray_tuned_profiles(self, _check_execute_command): + profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3} + # for this test, going to use host "a" and put 4 cephadm generated + # profiles "p1" "p2", "p3" and "who" only two of which should be there ("p1", "p2") + # as well as a file not generated by cephadm. Only the "p3" and "who" + # profiles should be removed from the host. This should total to 4 + # calls to check_execute_command, 1 "ls", 2 "rm", and 1 "sysctl --system" + _check_execute_command.return_value = '\n'.join(['p1-cephadm-tuned-profile.conf', + 'p2-cephadm-tuned-profile.conf', + 'p3-cephadm-tuned-profile.conf', + 'who-cephadm-tuned-profile.conf', + 'dont-touch-me']) + mgr = FakeMgr(['a', 'b', 'c'], + ['a', 'b', 'c'], + [], + profiles) + tp = TunedProfileUtils(mgr) + tp._remove_stray_tuned_profiles('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2])) + calls = [ + mock.call('a', ['ls', SYSCTL_DIR], log_command=False), + mock.call('a', ['rm', '-f', f'{SYSCTL_DIR}/p3-cephadm-tuned-profile.conf']), + mock.call('a', ['rm', '-f', f'{SYSCTL_DIR}/who-cephadm-tuned-profile.conf']), + mock.call('a', ['sysctl', '--system']) + ] + _check_execute_command.assert_has_calls(calls, any_order=True) + + @mock.patch('cephadm.ssh.SSHManager.check_execute_command') + @mock.patch('cephadm.ssh.SSHManager.write_remote_file') + def test_write_tuned_profiles(self, _write_remote_file, _check_execute_command): + profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3} + # for this test we will use host "a" and have it so host_needs_tuned_profile_update + # returns True for p2 and False for p1 (see FakeCache class). So we should see + # 2 ssh calls, one to write p2, one to run sysctl --system + _check_execute_command.return_value = 'success' + _write_remote_file.return_value = 'success' + mgr = FakeMgr(['a', 'b', 'c'], + ['a', 'b', 'c'], + [], + profiles) + tp = TunedProfileUtils(mgr) + tp._write_tuned_profiles('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2])) + _check_execute_command.assert_called_with('a', ['sysctl', '--system']) + _write_remote_file.assert_called_with( + 'a', f'{SYSCTL_DIR}/p2-cephadm-tuned-profile.conf', tp._profile_to_str(self.tspec2).encode('utf-8')) + + def test_dont_write_to_unreachable_hosts(self): + profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3} + + # list host "a" and "b" as hosts that exist, "a" will be + # a normal, schedulable host and "b" is considered unreachable + mgr = FakeMgr(['a', 'b'], + ['a'], + ['b'], + profiles) + tp = TunedProfileUtils(mgr) + + assert 'a' not in tp.mgr.cache.last_tuned_profile_update + assert 'b' not in tp.mgr.cache.last_tuned_profile_update + + # with an online host, should proceed as normal. Providing + # no actual profiles here though so the only actual action taken + # is updating the entry in the last_tuned_profile_update dict + tp._write_tuned_profiles('a', {}) + assert 'a' in tp.mgr.cache.last_tuned_profile_update + + # trying to write to an unreachable host should be a no-op + # and return immediately. No entry for 'b' should be added + # to the last_tuned_profile_update dict + tp._write_tuned_profiles('b', {}) + assert 'b' not in tp.mgr.cache.last_tuned_profile_update + + def test_store(self): + mgr = FakeMgr(['a', 'b', 'c'], + ['a', 'b', 'c'], + [], + {}) + tps = TunedProfileStore(mgr) + save_str_p1 = 'tuned_profiles: ' + json.dumps({'p1': self.tspec1.to_json()}) + tspec1_updated = self.tspec1.copy() + tspec1_updated.settings.update({'new-setting': 'new-value'}) + save_str_p1_updated = 'tuned_profiles: ' + json.dumps({'p1': tspec1_updated.to_json()}) + save_str_p1_updated_p2 = 'tuned_profiles: ' + \ + json.dumps({'p1': tspec1_updated.to_json(), 'p2': self.tspec2.to_json()}) + tspec2_updated = self.tspec2.copy() + tspec2_updated.settings.pop('something') + save_str_p1_updated_p2_updated = 'tuned_profiles: ' + \ + json.dumps({'p1': tspec1_updated.to_json(), 'p2': tspec2_updated.to_json()}) + save_str_p2_updated = 'tuned_profiles: ' + json.dumps({'p2': tspec2_updated.to_json()}) + with pytest.raises(SaveError) as e: + tps.add_profile(self.tspec1) + assert str(e.value) == save_str_p1 + assert 'p1' in tps + with pytest.raises(SaveError) as e: + tps.add_setting('p1', 'new-setting', 'new-value') + assert str(e.value) == save_str_p1_updated + assert 'new-setting' in tps.list_profiles()[0].settings + with pytest.raises(SaveError) as e: + tps.add_profile(self.tspec2) + assert str(e.value) == save_str_p1_updated_p2 + assert 'p2' in tps + assert 'something' in tps.list_profiles()[1].settings + with pytest.raises(SaveError) as e: + tps.rm_setting('p2', 'something') + assert 'something' not in tps.list_profiles()[1].settings + assert str(e.value) == save_str_p1_updated_p2_updated + with pytest.raises(SaveError) as e: + tps.rm_profile('p1') + assert str(e.value) == save_str_p2_updated + assert 'p1' not in tps + assert 'p2' in tps + assert len(tps.list_profiles()) == 1 + assert tps.list_profiles()[0].profile_name == 'p2' + + cur_last_updated = tps.last_updated('p2') + new_last_updated = datetime_now() + assert cur_last_updated != new_last_updated + tps.set_last_updated('p2', new_last_updated) + assert tps.last_updated('p2') == new_last_updated + + # check FakeMgr get_store func to see what is expected to be found in Key Store here + tps.load() + assert 'x' in tps + assert 'y' in tps + assert [p for p in tps.list_profiles() if p.profile_name == 'x'][0].settings == {'x': 'x'} + assert [p for p in tps.list_profiles() if p.profile_name == 'y'][0].settings == {'y': 'y'} diff --git a/src/pybind/mgr/cephadm/tests/test_upgrade.py b/src/pybind/mgr/cephadm/tests/test_upgrade.py new file mode 100644 index 000000000..3b5c305b5 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_upgrade.py @@ -0,0 +1,481 @@ +import json +from unittest import mock + +import pytest + +from ceph.deployment.service_spec import PlacementSpec, ServiceSpec +from cephadm import CephadmOrchestrator +from cephadm.upgrade import CephadmUpgrade, UpgradeState +from cephadm.ssh import HostConnectionError +from cephadm.utils import ContainerInspectInfo +from orchestrator import OrchestratorError, DaemonDescription +from .fixtures import _run_cephadm, wait, with_host, with_service, \ + receive_agent_metadata, async_side_effect + +from typing import List, Tuple, Optional + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +def test_upgrade_start(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_host(cephadm_module, 'test2'): + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)), status_running=True): + assert wait(cephadm_module, cephadm_module.upgrade_start( + 'image_id', None)) == 'Initiating upgrade to image_id' + + assert wait(cephadm_module, cephadm_module.upgrade_status() + ).target_image == 'image_id' + + assert wait(cephadm_module, cephadm_module.upgrade_pause() + ) == 'Paused upgrade to image_id' + + assert wait(cephadm_module, cephadm_module.upgrade_resume() + ) == 'Resumed upgrade to image_id' + + assert wait(cephadm_module, cephadm_module.upgrade_stop() + ) == 'Stopped upgrade to image_id' + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +def test_upgrade_start_offline_hosts(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_host(cephadm_module, 'test2'): + cephadm_module.offline_hosts = set(['test2']) + with pytest.raises(OrchestratorError, match=r"Upgrade aborted - Some host\(s\) are currently offline: {'test2'}"): + cephadm_module.upgrade_start('image_id', None) + cephadm_module.offline_hosts = set([]) # so remove_host doesn't fail when leaving the with_host block + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +def test_upgrade_daemons_offline_hosts(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_host(cephadm_module, 'test2'): + cephadm_module.upgrade.upgrade_state = UpgradeState('target_image', 0) + with mock.patch("cephadm.serve.CephadmServe._run_cephadm", side_effect=HostConnectionError('connection failure reason', 'test2', '192.168.122.1')): + _to_upgrade = [(DaemonDescription(daemon_type='crash', daemon_id='test2', hostname='test2'), True)] + with pytest.raises(HostConnectionError, match=r"connection failure reason"): + cephadm_module.upgrade._upgrade_daemons(_to_upgrade, 'target_image', ['digest1']) + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +def test_do_upgrade_offline_hosts(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_host(cephadm_module, 'test2'): + cephadm_module.upgrade.upgrade_state = UpgradeState('target_image', 0) + cephadm_module.offline_hosts = set(['test2']) + with pytest.raises(HostConnectionError, match=r"Host\(s\) were marked offline: {'test2'}"): + cephadm_module.upgrade._do_upgrade() + cephadm_module.offline_hosts = set([]) # so remove_host doesn't fail when leaving the with_host block + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +@mock.patch("cephadm.module.CephadmOrchestrator.remove_health_warning") +def test_upgrade_resume_clear_health_warnings(_rm_health_warning, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_host(cephadm_module, 'test2'): + cephadm_module.upgrade.upgrade_state = UpgradeState('target_image', 0, paused=True) + _rm_health_warning.return_value = None + assert wait(cephadm_module, cephadm_module.upgrade_resume() + ) == 'Resumed upgrade to target_image' + calls_list = [mock.call(alert_id) for alert_id in cephadm_module.upgrade.UPGRADE_ERRORS] + _rm_health_warning.assert_has_calls(calls_list, any_order=True) + + +@mock.patch('cephadm.upgrade.CephadmUpgrade._get_current_version', lambda _: (17, 2, 6)) +@mock.patch("cephadm.serve.CephadmServe._get_container_image_info") +def test_upgrade_check_with_ceph_version(_get_img_info, cephadm_module: CephadmOrchestrator): + # This test was added to avoid screwing up the image base so that + # when the version was added to it it made an incorrect image + # The issue caused the image to come out as + # quay.io/ceph/ceph:v18:v18.2.0 + # see https://tracker.ceph.com/issues/63150 + _img = '' + + def _fake_get_img_info(img_name): + nonlocal _img + _img = img_name + return ContainerInspectInfo( + 'image_id', + '18.2.0', + 'digest' + ) + + _get_img_info.side_effect = _fake_get_img_info + cephadm_module.upgrade_check('', '18.2.0') + assert _img == 'quay.io/ceph/ceph:v18.2.0' + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +@pytest.mark.parametrize("use_repo_digest", + [ + False, + True + ]) +def test_upgrade_run(use_repo_digest, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + with with_host(cephadm_module, 'host2'): + cephadm_module.set_container_image('global', 'from_image') + cephadm_module.use_repo_digest = use_repo_digest + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*', count=2)), + CephadmOrchestrator.apply_mgr, '', status_running=True), \ + mock.patch("cephadm.module.CephadmOrchestrator.lookup_release_name", + return_value='foo'), \ + mock.patch("cephadm.module.CephadmOrchestrator.version", + new_callable=mock.PropertyMock) as version_mock, \ + mock.patch("cephadm.module.CephadmOrchestrator.get", + return_value={ + # capture fields in both mon and osd maps + "require_osd_release": "pacific", + "min_mon_release": 16, + }): + version_mock.return_value = 'ceph version 18.2.1 (somehash)' + assert wait(cephadm_module, cephadm_module.upgrade_start( + 'to_image', None)) == 'Initiating upgrade to to_image' + + assert wait(cephadm_module, cephadm_module.upgrade_status() + ).target_image == 'to_image' + + def _versions_mock(cmd): + return json.dumps({ + 'mgr': { + 'ceph version 1.2.3 (asdf) blah': 1 + } + }) + + cephadm_module._mon_command_mock_versions = _versions_mock + + with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({ + 'image_id': 'image_id', + 'repo_digests': ['to_image@repo_digest'], + 'ceph_version': 'ceph version 18.2.3 (hash)', + }))): + + cephadm_module.upgrade._do_upgrade() + + assert cephadm_module.upgrade_status is not None + + with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm( + json.dumps([ + dict( + name=list(cephadm_module.cache.daemons['host1'].keys())[0], + style='cephadm', + fsid='fsid', + container_id='container_id', + container_image_name='to_image', + container_image_id='image_id', + container_image_digests=['to_image@repo_digest'], + deployed_by=['to_image@repo_digest'], + version='version', + state='running', + ) + ]) + )): + receive_agent_metadata(cephadm_module, 'host1', ['ls']) + receive_agent_metadata(cephadm_module, 'host2', ['ls']) + + with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({ + 'image_id': 'image_id', + 'repo_digests': ['to_image@repo_digest'], + 'ceph_version': 'ceph version 18.2.3 (hash)', + }))): + cephadm_module.upgrade._do_upgrade() + + _, image, _ = cephadm_module.check_mon_command({ + 'prefix': 'config get', + 'who': 'global', + 'key': 'container_image', + }) + if use_repo_digest: + assert image == 'to_image@repo_digest' + else: + assert image == 'to_image' + + +def test_upgrade_state_null(cephadm_module: CephadmOrchestrator): + # This test validates https://tracker.ceph.com/issues/47580 + cephadm_module.set_store('upgrade_state', 'null') + CephadmUpgrade(cephadm_module) + assert CephadmUpgrade(cephadm_module).upgrade_state is None + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +def test_not_enough_mgrs(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=1)), CephadmOrchestrator.apply_mgr, ''): + with pytest.raises(OrchestratorError): + wait(cephadm_module, cephadm_module.upgrade_start('image_id', None)) + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +@mock.patch("cephadm.CephadmOrchestrator.check_mon_command") +def test_enough_mons_for_ok_to_stop(check_mon_command, cephadm_module: CephadmOrchestrator): + # only 2 monitors, not enough for ok-to-stop to ever pass + check_mon_command.return_value = ( + 0, '{"monmap": {"mons": [{"name": "mon.1"}, {"name": "mon.2"}]}}', '') + assert not cephadm_module.upgrade._enough_mons_for_ok_to_stop() + + # 3 monitors, ok-to-stop should work fine + check_mon_command.return_value = ( + 0, '{"monmap": {"mons": [{"name": "mon.1"}, {"name": "mon.2"}, {"name": "mon.3"}]}}', '') + assert cephadm_module.upgrade._enough_mons_for_ok_to_stop() + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +@mock.patch("cephadm.module.HostCache.get_daemons_by_service") +@mock.patch("cephadm.CephadmOrchestrator.get") +def test_enough_mds_for_ok_to_stop(get, get_daemons_by_service, cephadm_module: CephadmOrchestrator): + get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'test', 'max_mds': 1}}]}] + get_daemons_by_service.side_effect = [[DaemonDescription()]] + assert not cephadm_module.upgrade._enough_mds_for_ok_to_stop( + DaemonDescription(daemon_type='mds', daemon_id='test.host1.gfknd', service_name='mds.test')) + + get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'myfs.test', 'max_mds': 2}}]}] + get_daemons_by_service.side_effect = [[DaemonDescription(), DaemonDescription()]] + assert not cephadm_module.upgrade._enough_mds_for_ok_to_stop( + DaemonDescription(daemon_type='mds', daemon_id='myfs.test.host1.gfknd', service_name='mds.myfs.test')) + + get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'myfs.test', 'max_mds': 1}}]}] + get_daemons_by_service.side_effect = [[DaemonDescription(), DaemonDescription()]] + assert cephadm_module.upgrade._enough_mds_for_ok_to_stop( + DaemonDescription(daemon_type='mds', daemon_id='myfs.test.host1.gfknd', service_name='mds.myfs.test')) + + +@pytest.mark.parametrize("current_version, use_tags, show_all_versions, tags, result", + [ + # several candidate versions (from different major versions) + ( + (16, 1, '16.1.0'), + False, # use_tags + False, # show_all_versions + [ + 'v17.1.0', + 'v16.2.7', + 'v16.2.6', + 'v16.2.5', + 'v16.1.4', + 'v16.1.3', + 'v15.2.0', + ], + ['17.1.0', '16.2.7', '16.2.6', '16.2.5', '16.1.4', '16.1.3'] + ), + # candidate minor versions are available + ( + (16, 1, '16.1.0'), + False, # use_tags + False, # show_all_versions + [ + 'v16.2.2', + 'v16.2.1', + 'v16.1.6', + ], + ['16.2.2', '16.2.1', '16.1.6'] + ), + # all versions are less than the current version + ( + (17, 2, '17.2.0'), + False, # use_tags + False, # show_all_versions + [ + 'v17.1.0', + 'v16.2.7', + 'v16.2.6', + ], + [] + ), + # show all versions (regardless of the current version) + ( + (16, 1, '16.1.0'), + False, # use_tags + True, # show_all_versions + [ + 'v17.1.0', + 'v16.2.7', + 'v16.2.6', + 'v15.1.0', + 'v14.2.0', + ], + ['17.1.0', '16.2.7', '16.2.6', '15.1.0', '14.2.0'] + ), + # show all tags (regardless of the current version and show_all_versions flag) + ( + (16, 1, '16.1.0'), + True, # use_tags + False, # show_all_versions + [ + 'v17.1.0', + 'v16.2.7', + 'v16.2.6', + 'v16.2.5', + 'v16.1.4', + 'v16.1.3', + 'v15.2.0', + ], + ['v15.2.0', 'v16.1.3', 'v16.1.4', 'v16.2.5', + 'v16.2.6', 'v16.2.7', 'v17.1.0'] + ), + ]) +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +def test_upgrade_ls(current_version, use_tags, show_all_versions, tags, result, cephadm_module: CephadmOrchestrator): + with mock.patch('cephadm.upgrade.Registry.get_tags', return_value=tags): + with mock.patch('cephadm.upgrade.CephadmUpgrade._get_current_version', return_value=current_version): + out = cephadm_module.upgrade.upgrade_ls(None, use_tags, show_all_versions) + if use_tags: + assert out['tags'] == result + else: + assert out['versions'] == result + + +@pytest.mark.parametrize( + "upgraded, not_upgraded, daemon_types, hosts, services, should_block", + # [ ([(type, host, id), ... ], [...], [daemon types], [hosts], [services], True/False), ... ] + [ + ( # valid, upgrade mgr daemons + [], + [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')], + ['mgr'], + None, + None, + False + ), + ( # invalid, can't upgrade mons until mgr is upgraded + [], + [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')], + ['mon'], + None, + None, + True + ), + ( # invalid, can't upgrade mon service until all mgr daemons are upgraded + [], + [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')], + None, + None, + ['mon'], + True + ), + ( # valid, upgrade mgr service + [], + [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')], + None, + None, + ['mgr'], + False + ), + ( # valid, mgr is already upgraded so can upgrade mons + [('mgr', 'a', 'a.x')], + [('mon', 'a', 'a')], + ['mon'], + None, + None, + False + ), + ( # invalid, can't upgrade all daemons on b b/c un-upgraded mgr on a + [], + [('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + None, + ['a'], + None, + True + ), + ( # valid, only daemon on b is a mgr + [], + [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + None, + ['b'], + None, + False + ), + ( # invalid, can't upgrade mon on a while mgr on b is un-upgraded + [], + [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + None, + ['a'], + None, + True + ), + ( # valid, only upgrading the mgr on a + [], + [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + ['mgr'], + ['a'], + None, + False + ), + ( # valid, mgr daemon not on b are upgraded + [('mgr', 'a', 'a.x')], + [('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + None, + ['b'], + None, + False + ), + ( # valid, all the necessary hosts are covered, mgr on c is already upgraded + [('mgr', 'c', 'c.z')], + [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a'), ('osd', 'c', '0')], + None, + ['a', 'b'], + None, + False + ), + ( # invalid, can't upgrade mon on a while mgr on b is un-upgraded + [], + [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + ['mgr', 'mon'], + ['a'], + None, + True + ), + ( # valid, only mon not on "b" is upgraded already. Case hit while making teuthology test + [('mon', 'a', 'a')], + [('mon', 'b', 'x'), ('mon', 'b', 'y'), ('osd', 'a', '1'), ('osd', 'b', '2')], + ['mon', 'osd'], + ['b'], + None, + False + ), + ] +) +@mock.patch("cephadm.module.HostCache.get_daemons") +@mock.patch("cephadm.serve.CephadmServe._get_container_image_info") +@mock.patch('cephadm.module.SpecStore.__getitem__') +def test_staggered_upgrade_validation( + get_spec, + get_image_info, + get_daemons, + upgraded: List[Tuple[str, str, str]], + not_upgraded: List[Tuple[str, str, str, str]], + daemon_types: Optional[str], + hosts: Optional[str], + services: Optional[str], + should_block: bool, + cephadm_module: CephadmOrchestrator, +): + def to_dds(ts: List[Tuple[str, str]], upgraded: bool) -> List[DaemonDescription]: + dds = [] + digest = 'new_image@repo_digest' if upgraded else 'old_image@repo_digest' + for t in ts: + dds.append(DaemonDescription(daemon_type=t[0], + hostname=t[1], + daemon_id=t[2], + container_image_digests=[digest], + deployed_by=[digest],)) + return dds + get_daemons.return_value = to_dds(upgraded, True) + to_dds(not_upgraded, False) + get_image_info.side_effect = async_side_effect( + ('new_id', 'ceph version 99.99.99 (hash)', ['new_image@repo_digest'])) + + class FakeSpecDesc(): + def __init__(self, spec): + self.spec = spec + + def _get_spec(s): + return FakeSpecDesc(ServiceSpec(s)) + + get_spec.side_effect = _get_spec + if should_block: + with pytest.raises(OrchestratorError): + cephadm_module.upgrade._validate_upgrade_filters( + 'new_image_name', daemon_types, hosts, services) + else: + cephadm_module.upgrade._validate_upgrade_filters( + 'new_image_name', daemon_types, hosts, services) |