summaryrefslogtreecommitdiffstats
path: root/src/pybind/mgr/cephadm/tests
diff options
context:
space:
mode:
Diffstat (limited to 'src/pybind/mgr/cephadm/tests')
-rw-r--r--src/pybind/mgr/cephadm/tests/__init__.py0
-rw-r--r--src/pybind/mgr/cephadm/tests/conftest.py27
-rw-r--r--src/pybind/mgr/cephadm/tests/fixtures.py200
-rw-r--r--src/pybind/mgr/cephadm/tests/test_autotune.py69
-rw-r--r--src/pybind/mgr/cephadm/tests/test_cephadm.py2709
-rw-r--r--src/pybind/mgr/cephadm/tests/test_completion.py40
-rw-r--r--src/pybind/mgr/cephadm/tests/test_configchecks.py668
-rw-r--r--src/pybind/mgr/cephadm/tests/test_facts.py31
-rw-r--r--src/pybind/mgr/cephadm/tests/test_migration.py340
-rw-r--r--src/pybind/mgr/cephadm/tests/test_osd_removal.py298
-rw-r--r--src/pybind/mgr/cephadm/tests/test_scheduling.py1699
-rw-r--r--src/pybind/mgr/cephadm/tests/test_service_discovery.py178
-rw-r--r--src/pybind/mgr/cephadm/tests/test_services.py2725
-rw-r--r--src/pybind/mgr/cephadm/tests/test_spec.py590
-rw-r--r--src/pybind/mgr/cephadm/tests/test_ssh.py105
-rw-r--r--src/pybind/mgr/cephadm/tests/test_template.py33
-rw-r--r--src/pybind/mgr/cephadm/tests/test_tuned_profiles.py256
-rw-r--r--src/pybind/mgr/cephadm/tests/test_upgrade.py481
18 files changed, 10449 insertions, 0 deletions
diff --git a/src/pybind/mgr/cephadm/tests/__init__.py b/src/pybind/mgr/cephadm/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/__init__.py
diff --git a/src/pybind/mgr/cephadm/tests/conftest.py b/src/pybind/mgr/cephadm/tests/conftest.py
new file mode 100644
index 000000000..e8add2c7b
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/conftest.py
@@ -0,0 +1,27 @@
+import pytest
+
+from cephadm.services.osd import RemoveUtil, OSD
+from tests import mock
+
+from .fixtures import with_cephadm_module
+
+
+@pytest.fixture()
+def cephadm_module():
+ with with_cephadm_module({}) as m:
+ yield m
+
+
+@pytest.fixture()
+def rm_util():
+ with with_cephadm_module({}) as m:
+ r = RemoveUtil.__new__(RemoveUtil)
+ r.__init__(m)
+ yield r
+
+
+@pytest.fixture()
+def osd_obj():
+ with mock.patch("cephadm.services.osd.RemoveUtil"):
+ o = OSD(0, mock.MagicMock())
+ yield o
diff --git a/src/pybind/mgr/cephadm/tests/fixtures.py b/src/pybind/mgr/cephadm/tests/fixtures.py
new file mode 100644
index 000000000..6281283d7
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/fixtures.py
@@ -0,0 +1,200 @@
+import fnmatch
+import asyncio
+import sys
+from tempfile import NamedTemporaryFile
+from contextlib import contextmanager
+
+from ceph.deployment.service_spec import PlacementSpec, ServiceSpec
+from ceph.utils import datetime_to_str, datetime_now
+from cephadm.serve import CephadmServe, cephadmNoImage
+
+try:
+ from typing import Any, Iterator, List, Callable, Dict
+except ImportError:
+ pass
+
+from cephadm import CephadmOrchestrator
+from orchestrator import raise_if_exception, OrchResult, HostSpec, DaemonDescriptionStatus
+from tests import mock
+
+
+def async_side_effect(result):
+ async def side_effect(*args, **kwargs):
+ return result
+ return side_effect
+
+
+def get_ceph_option(_, key):
+ return __file__
+
+
+def get_module_option_ex(_, module, key, default=None):
+ if module == 'prometheus':
+ if key == 'server_port':
+ return 9283
+ return None
+
+
+def _run_cephadm(ret):
+ async def foo(s, host, entity, cmd, e, **kwargs):
+ if cmd == 'gather-facts':
+ return '{}', '', 0
+ return [ret], '', 0
+ return foo
+
+
+def match_glob(val, pat):
+ ok = fnmatch.fnmatchcase(val, pat)
+ if not ok:
+ assert pat in val
+
+
+class MockEventLoopThread:
+ def get_result(self, coro, timeout):
+ if sys.version_info >= (3, 7):
+ return asyncio.run(coro)
+
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+ try:
+ return loop.run_until_complete(coro)
+ finally:
+ loop.close()
+ asyncio.set_event_loop(None)
+
+
+def receive_agent_metadata(m: CephadmOrchestrator, host: str, ops: List[str] = None) -> None:
+ to_update: Dict[str, Callable[[str, Any], None]] = {
+ 'ls': m._process_ls_output,
+ 'gather-facts': m.cache.update_host_facts,
+ 'list-networks': m.cache.update_host_networks,
+ }
+ if ops:
+ for op in ops:
+ out = m.wait_async(CephadmServe(m)._run_cephadm_json(host, cephadmNoImage, op, []))
+ to_update[op](host, out)
+ m.cache.last_daemon_update[host] = datetime_now()
+ m.cache.last_facts_update[host] = datetime_now()
+ m.cache.last_network_update[host] = datetime_now()
+ m.cache.metadata_up_to_date[host] = True
+
+
+def receive_agent_metadata_all_hosts(m: CephadmOrchestrator) -> None:
+ for host in m.cache.get_hosts():
+ receive_agent_metadata(m, host)
+
+
+@contextmanager
+def with_cephadm_module(module_options=None, store=None):
+ """
+ :param module_options: Set opts as if they were set before module.__init__ is called
+ :param store: Set the store before module.__init__ is called
+ """
+ with mock.patch("cephadm.module.CephadmOrchestrator.get_ceph_option", get_ceph_option), \
+ mock.patch("cephadm.services.osd.RemoveUtil._run_mon_cmd"), \
+ mock.patch('cephadm.module.CephadmOrchestrator.get_module_option_ex', get_module_option_ex), \
+ mock.patch("cephadm.module.CephadmOrchestrator.get_osdmap"), \
+ mock.patch("cephadm.module.CephadmOrchestrator.remote"), \
+ mock.patch("cephadm.agent.CephadmAgentHelpers._request_agent_acks"), \
+ mock.patch("cephadm.agent.CephadmAgentHelpers._apply_agent", return_value=False), \
+ mock.patch("cephadm.agent.CephadmAgentHelpers._agent_down", return_value=False), \
+ mock.patch('cephadm.offline_watcher.OfflineHostWatcher.run'), \
+ mock.patch('cephadm.tuned_profiles.TunedProfileUtils._remove_stray_tuned_profiles'), \
+ mock.patch('cephadm.offline_watcher.OfflineHostWatcher.run'), \
+ mock.patch('cephadm.http_server.CephadmHttpServer.run'):
+
+ m = CephadmOrchestrator.__new__(CephadmOrchestrator)
+ if module_options is not None:
+ for k, v in module_options.items():
+ m._ceph_set_module_option('cephadm', k, v)
+ if store is None:
+ store = {}
+ if '_ceph_get/mon_map' not in store:
+ m.mock_store_set('_ceph_get', 'mon_map', {
+ 'modified': datetime_to_str(datetime_now()),
+ 'fsid': 'foobar',
+ })
+ if '_ceph_get/mgr_map' not in store:
+ m.mock_store_set('_ceph_get', 'mgr_map', {
+ 'services': {
+ 'dashboard': 'http://[::1]:8080',
+ 'prometheus': 'http://[::1]:8081'
+ },
+ 'modules': ['dashboard', 'prometheus'],
+ })
+ for k, v in store.items():
+ m._ceph_set_store(k, v)
+
+ m.__init__('cephadm', 0, 0)
+ m._cluster_fsid = "fsid"
+
+ m.event_loop = MockEventLoopThread()
+ m.tkey = NamedTemporaryFile(prefix='test-cephadm-identity-')
+
+ yield m
+
+
+def wait(m: CephadmOrchestrator, c: OrchResult) -> Any:
+ return raise_if_exception(c)
+
+
+@contextmanager
+def with_host(m: CephadmOrchestrator, name, addr='1::4', refresh_hosts=True, rm_with_force=True):
+ with mock.patch("cephadm.utils.resolve_ip", return_value=addr):
+ wait(m, m.add_host(HostSpec(hostname=name)))
+ if refresh_hosts:
+ CephadmServe(m)._refresh_hosts_and_daemons()
+ receive_agent_metadata(m, name)
+ yield
+ wait(m, m.remove_host(name, force=rm_with_force))
+
+
+def assert_rm_service(cephadm: CephadmOrchestrator, srv_name):
+ mon_or_mgr = cephadm.spec_store[srv_name].spec.service_type in ('mon', 'mgr')
+ if mon_or_mgr:
+ assert 'Unable' in wait(cephadm, cephadm.remove_service(srv_name))
+ return
+ assert wait(cephadm, cephadm.remove_service(srv_name)) == f'Removed service {srv_name}'
+ assert cephadm.spec_store[srv_name].deleted is not None
+ CephadmServe(cephadm)._check_daemons()
+ CephadmServe(cephadm)._apply_all_services()
+ assert cephadm.spec_store[srv_name].deleted
+ unmanaged = cephadm.spec_store[srv_name].spec.unmanaged
+ CephadmServe(cephadm)._purge_deleted_services()
+ if not unmanaged: # cause then we're not deleting daemons
+ assert srv_name not in cephadm.spec_store, f'{cephadm.spec_store[srv_name]!r}'
+
+
+@contextmanager
+def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth=None, host: str = '', status_running=False) -> Iterator[List[str]]:
+ if spec.placement.is_empty() and host:
+ spec.placement = PlacementSpec(hosts=[host], count=1)
+ if meth is not None:
+ c = meth(cephadm_module, spec)
+ assert wait(cephadm_module, c) == f'Scheduled {spec.service_name()} update...'
+ else:
+ c = cephadm_module.apply([spec])
+ assert wait(cephadm_module, c) == [f'Scheduled {spec.service_name()} update...']
+
+ specs = [d.spec for d in wait(cephadm_module, cephadm_module.describe_service())]
+ assert spec in specs
+
+ CephadmServe(cephadm_module)._apply_all_services()
+
+ if status_running:
+ make_daemons_running(cephadm_module, spec.service_name())
+
+ dds = wait(cephadm_module, cephadm_module.list_daemons())
+ own_dds = [dd for dd in dds if dd.service_name() == spec.service_name()]
+ if host and spec.service_type != 'osd':
+ assert own_dds
+
+ yield [dd.name() for dd in own_dds]
+
+ assert_rm_service(cephadm_module, spec.service_name())
+
+
+def make_daemons_running(cephadm_module, service_name):
+ own_dds = cephadm_module.cache.get_daemons_by_service(service_name)
+ for dd in own_dds:
+ dd.status = DaemonDescriptionStatus.running # We're changing the reference
diff --git a/src/pybind/mgr/cephadm/tests/test_autotune.py b/src/pybind/mgr/cephadm/tests/test_autotune.py
new file mode 100644
index 000000000..524da9c00
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_autotune.py
@@ -0,0 +1,69 @@
+# Disable autopep8 for this file:
+
+# fmt: off
+
+import pytest
+
+from cephadm.autotune import MemoryAutotuner
+from orchestrator import DaemonDescription
+
+
+@pytest.mark.parametrize("total,daemons,config,result",
+ [ # noqa: E128
+ (
+ 128 * 1024 * 1024 * 1024,
+ [],
+ {},
+ None,
+ ),
+ (
+ 128 * 1024 * 1024 * 1024,
+ [
+ DaemonDescription('osd', '1', 'host1'),
+ DaemonDescription('osd', '2', 'host1'),
+ ],
+ {},
+ 64 * 1024 * 1024 * 1024,
+ ),
+ (
+ 128 * 1024 * 1024 * 1024,
+ [
+ DaemonDescription('osd', '1', 'host1'),
+ DaemonDescription('osd', '2', 'host1'),
+ DaemonDescription('osd', '3', 'host1'),
+ ],
+ {
+ 'osd.3': 16 * 1024 * 1024 * 1024,
+ },
+ 56 * 1024 * 1024 * 1024,
+ ),
+ (
+ 128 * 1024 * 1024 * 1024,
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('osd', '1', 'host1'),
+ DaemonDescription('osd', '2', 'host1'),
+ ],
+ {},
+ 62 * 1024 * 1024 * 1024,
+ )
+ ])
+def test_autotune(total, daemons, config, result):
+ def fake_getter(who, opt):
+ if opt == 'osd_memory_target_autotune':
+ if who in config:
+ return False
+ else:
+ return True
+ if opt == 'osd_memory_target':
+ return config.get(who, 4 * 1024 * 1024 * 1024)
+ if opt == 'mds_cache_memory_limit':
+ return 16 * 1024 * 1024 * 1024
+
+ a = MemoryAutotuner(
+ total_mem=total,
+ daemons=daemons,
+ config_get=fake_getter,
+ )
+ val, osds = a.tune()
+ assert val == result
diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py
new file mode 100644
index 000000000..24fcb0280
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py
@@ -0,0 +1,2709 @@
+import asyncio
+import json
+import logging
+
+from contextlib import contextmanager
+
+import pytest
+
+from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection
+from cephadm.serve import CephadmServe
+from cephadm.inventory import HostCacheStatus, ClientKeyringSpec
+from cephadm.services.osd import OSD, OSDRemovalQueue, OsdIdClaims
+from cephadm.utils import SpecialHostLabels
+
+try:
+ from typing import List
+except ImportError:
+ pass
+
+from ceph.deployment.service_spec import (
+ CustomConfig,
+ CustomContainerSpec,
+ HostPlacementSpec,
+ IscsiServiceSpec,
+ MDSSpec,
+ NFSServiceSpec,
+ PlacementSpec,
+ RGWSpec,
+ ServiceSpec,
+)
+from ceph.deployment.drive_selection.selector import DriveSelection
+from ceph.deployment.inventory import Devices, Device
+from ceph.utils import datetime_to_str, datetime_now, str_to_datetime
+from orchestrator import DaemonDescription, InventoryHost, \
+ HostSpec, OrchestratorError, DaemonDescriptionStatus, OrchestratorEvent
+from tests import mock
+from .fixtures import wait, _run_cephadm, match_glob, with_host, \
+ with_cephadm_module, with_service, make_daemons_running, async_side_effect
+from cephadm.module import CephadmOrchestrator
+
+"""
+TODOs:
+ There is really room for improvement here. I just quickly assembled theses tests.
+ I general, everything should be testes in Teuthology as well. Reasons for
+ also testing this here is the development roundtrip time.
+"""
+
+
+def assert_rm_daemon(cephadm: CephadmOrchestrator, prefix, host):
+ dds: List[DaemonDescription] = wait(cephadm, cephadm.list_daemons(host=host))
+ d_names = [dd.name() for dd in dds if dd.name().startswith(prefix)]
+ assert d_names
+ # there should only be one daemon (if not match_glob will throw mismatch)
+ assert len(d_names) == 1
+
+ c = cephadm.remove_daemons(d_names)
+ [out] = wait(cephadm, c)
+ # picking the 1st element is needed, rather than passing the list when the daemon
+ # name contains '-' char. If not, the '-' is treated as a range i.e. cephadm-exporter
+ # is treated like a m-e range which is invalid. rbd-mirror (d-m) and node-exporter (e-e)
+ # are valid, so pass without incident! Also, match_gob acts on strings anyway!
+ match_glob(out, f"Removed {d_names[0]}* from host '{host}'")
+
+
+@contextmanager
+def with_daemon(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, host: str):
+ spec.placement = PlacementSpec(hosts=[host], count=1)
+
+ c = cephadm_module.add_daemon(spec)
+ [out] = wait(cephadm_module, c)
+ match_glob(out, f"Deployed {spec.service_name()}.* on host '{host}'")
+
+ dds = cephadm_module.cache.get_daemons_by_service(spec.service_name())
+ for dd in dds:
+ if dd.hostname == host:
+ yield dd.daemon_id
+ assert_rm_daemon(cephadm_module, spec.service_name(), host)
+ return
+
+ assert False, 'Daemon not found'
+
+
+@contextmanager
+def with_osd_daemon(cephadm_module: CephadmOrchestrator, _run_cephadm, host: str, osd_id: int, ceph_volume_lvm_list=None):
+ cephadm_module.mock_store_set('_ceph_get', 'osd_map', {
+ 'osds': [
+ {
+ 'osd': 1,
+ 'up_from': 0,
+ 'up': True,
+ 'uuid': 'uuid'
+ }
+ ]
+ })
+
+ _run_cephadm.reset_mock(return_value=True, side_effect=True)
+ if ceph_volume_lvm_list:
+ _run_cephadm.side_effect = ceph_volume_lvm_list
+ else:
+ async def _ceph_volume_list(s, host, entity, cmd, **kwargs):
+ logging.info(f'ceph-volume cmd: {cmd}')
+ if 'raw' in cmd:
+ return json.dumps({
+ "21a4209b-f51b-4225-81dc-d2dca5b8b2f5": {
+ "ceph_fsid": cephadm_module._cluster_fsid,
+ "device": "/dev/loop0",
+ "osd_id": 21,
+ "osd_uuid": "21a4209b-f51b-4225-81dc-d2dca5b8b2f5",
+ "type": "bluestore"
+ },
+ }), '', 0
+ if 'lvm' in cmd:
+ return json.dumps({
+ str(osd_id): [{
+ 'tags': {
+ 'ceph.cluster_fsid': cephadm_module._cluster_fsid,
+ 'ceph.osd_fsid': 'uuid'
+ },
+ 'type': 'data'
+ }]
+ }), '', 0
+ return '{}', '', 0
+
+ _run_cephadm.side_effect = _ceph_volume_list
+
+ assert cephadm_module._osd_activate(
+ [host]).stdout == f"Created osd(s) 1 on host '{host}'"
+ assert _run_cephadm.mock_calls == [
+ mock.call(host, 'osd', 'ceph-volume',
+ ['--', 'lvm', 'list', '--format', 'json'], no_fsid=False, error_ok=False, image='', log_output=True),
+ mock.call(host, f'osd.{osd_id}', ['_orch', 'deploy'], [], stdin=mock.ANY),
+ mock.call(host, 'osd', 'ceph-volume',
+ ['--', 'raw', 'list', '--format', 'json'], no_fsid=False, error_ok=False, image='', log_output=True),
+ ]
+ dd = cephadm_module.cache.get_daemon(f'osd.{osd_id}', host=host)
+ assert dd.name() == f'osd.{osd_id}'
+ yield dd
+ cephadm_module._remove_daemons([(f'osd.{osd_id}', host)])
+
+
+class TestCephadm(object):
+
+ def test_get_unique_name(self, cephadm_module):
+ # type: (CephadmOrchestrator) -> None
+ existing = [
+ DaemonDescription(daemon_type='mon', daemon_id='a')
+ ]
+ new_mon = cephadm_module.get_unique_name('mon', 'myhost', existing)
+ match_glob(new_mon, 'myhost')
+ new_mgr = cephadm_module.get_unique_name('mgr', 'myhost', existing)
+ match_glob(new_mgr, 'myhost.*')
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_host(self, cephadm_module):
+ assert wait(cephadm_module, cephadm_module.get_hosts()) == []
+ with with_host(cephadm_module, 'test'):
+ assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '1::4')]
+
+ # Be careful with backward compatibility when changing things here:
+ assert json.loads(cephadm_module.get_store('inventory')) == \
+ {"test": {"hostname": "test", "addr": "1::4", "labels": [], "status": ""}}
+
+ with with_host(cephadm_module, 'second', '1.2.3.5'):
+ assert wait(cephadm_module, cephadm_module.get_hosts()) == [
+ HostSpec('test', '1::4'),
+ HostSpec('second', '1.2.3.5')
+ ]
+
+ assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '1::4')]
+ assert wait(cephadm_module, cephadm_module.get_hosts()) == []
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ @mock.patch("cephadm.utils.resolve_ip")
+ def test_re_add_host_receive_loopback(self, resolve_ip, cephadm_module):
+ resolve_ip.side_effect = ['192.168.122.1', '127.0.0.1', '127.0.0.1']
+ assert wait(cephadm_module, cephadm_module.get_hosts()) == []
+ cephadm_module._add_host(HostSpec('test', '192.168.122.1'))
+ assert wait(cephadm_module, cephadm_module.get_hosts()) == [
+ HostSpec('test', '192.168.122.1')]
+ cephadm_module._add_host(HostSpec('test'))
+ assert wait(cephadm_module, cephadm_module.get_hosts()) == [
+ HostSpec('test', '192.168.122.1')]
+ with pytest.raises(OrchestratorError):
+ cephadm_module._add_host(HostSpec('test2'))
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_service_ls(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ c = cephadm_module.list_daemons(refresh=True)
+ assert wait(cephadm_module, c) == []
+ with with_service(cephadm_module, MDSSpec('mds', 'name', unmanaged=True)) as _, \
+ with_daemon(cephadm_module, MDSSpec('mds', 'name'), 'test') as _:
+
+ c = cephadm_module.list_daemons()
+
+ def remove_id_events(dd):
+ out = dd.to_json()
+ del out['daemon_id']
+ del out['events']
+ del out['daemon_name']
+ return out
+
+ assert [remove_id_events(dd) for dd in wait(cephadm_module, c)] == [
+ {
+ 'service_name': 'mds.name',
+ 'daemon_type': 'mds',
+ 'hostname': 'test',
+ 'status': 2,
+ 'status_desc': 'starting',
+ 'is_active': False,
+ 'ports': [],
+ }
+ ]
+
+ with with_service(cephadm_module, ServiceSpec('rgw', 'r.z'),
+ CephadmOrchestrator.apply_rgw, 'test', status_running=True):
+ make_daemons_running(cephadm_module, 'mds.name')
+
+ c = cephadm_module.describe_service()
+ out = [dict(o.to_json()) for o in wait(cephadm_module, c)]
+ expected = [
+ {
+ 'placement': {'count': 2},
+ 'service_id': 'name',
+ 'service_name': 'mds.name',
+ 'service_type': 'mds',
+ 'status': {'created': mock.ANY, 'running': 1, 'size': 2},
+ 'unmanaged': True
+ },
+ {
+ 'placement': {
+ 'count': 1,
+ 'hosts': ["test"]
+ },
+ 'service_id': 'r.z',
+ 'service_name': 'rgw.r.z',
+ 'service_type': 'rgw',
+ 'status': {'created': mock.ANY, 'running': 1, 'size': 1,
+ 'ports': [80]},
+ }
+ ]
+ for o in out:
+ if 'events' in o:
+ del o['events'] # delete it, as it contains a timestamp
+ assert out == expected
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_service_ls_service_type_flag(self, cephadm_module):
+ with with_host(cephadm_module, 'host1'):
+ with with_host(cephadm_module, 'host2'):
+ with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)),
+ CephadmOrchestrator.apply_mgr, '', status_running=True):
+ with with_service(cephadm_module, MDSSpec('mds', 'test-id', placement=PlacementSpec(count=2)),
+ CephadmOrchestrator.apply_mds, '', status_running=True):
+
+ # with no service-type. Should provide info fot both services
+ c = cephadm_module.describe_service()
+ out = [dict(o.to_json()) for o in wait(cephadm_module, c)]
+ expected = [
+ {
+ 'placement': {'count': 2},
+ 'service_name': 'mgr',
+ 'service_type': 'mgr',
+ 'status': {'created': mock.ANY,
+ 'running': 2,
+ 'size': 2}
+ },
+ {
+ 'placement': {'count': 2},
+ 'service_id': 'test-id',
+ 'service_name': 'mds.test-id',
+ 'service_type': 'mds',
+ 'status': {'created': mock.ANY,
+ 'running': 2,
+ 'size': 2}
+ },
+ ]
+
+ for o in out:
+ if 'events' in o:
+ del o['events'] # delete it, as it contains a timestamp
+ assert out == expected
+
+ # with service-type. Should provide info fot only mds
+ c = cephadm_module.describe_service(service_type='mds')
+ out = [dict(o.to_json()) for o in wait(cephadm_module, c)]
+ expected = [
+ {
+ 'placement': {'count': 2},
+ 'service_id': 'test-id',
+ 'service_name': 'mds.test-id',
+ 'service_type': 'mds',
+ 'status': {'created': mock.ANY,
+ 'running': 2,
+ 'size': 2}
+ },
+ ]
+
+ for o in out:
+ if 'events' in o:
+ del o['events'] # delete it, as it contains a timestamp
+ assert out == expected
+
+ # service-type should not match with service names
+ c = cephadm_module.describe_service(service_type='mds.test-id')
+ out = [dict(o.to_json()) for o in wait(cephadm_module, c)]
+ assert out == []
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_device_ls(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ c = cephadm_module.get_inventory()
+ assert wait(cephadm_module, c) == [InventoryHost('test')]
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(
+ json.dumps([
+ dict(
+ name='rgw.myrgw.foobar',
+ style='cephadm',
+ fsid='fsid',
+ container_id='container_id',
+ version='version',
+ state='running',
+ ),
+ dict(
+ name='something.foo.bar',
+ style='cephadm',
+ fsid='fsid',
+ ),
+ dict(
+ name='haproxy.test.bar',
+ style='cephadm',
+ fsid='fsid',
+ ),
+
+ ])
+ ))
+ def test_list_daemons(self, cephadm_module: CephadmOrchestrator):
+ cephadm_module.service_cache_timeout = 10
+ with with_host(cephadm_module, 'test'):
+ CephadmServe(cephadm_module)._refresh_host_daemons('test')
+ dds = wait(cephadm_module, cephadm_module.list_daemons())
+ assert {d.name() for d in dds} == {'rgw.myrgw.foobar', 'haproxy.test.bar'}
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_daemon_action(self, cephadm_module: CephadmOrchestrator):
+ cephadm_module.service_cache_timeout = 10
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, RGWSpec(service_id='myrgw.foobar', unmanaged=True)) as _, \
+ with_daemon(cephadm_module, RGWSpec(service_id='myrgw.foobar'), 'test') as daemon_id:
+
+ d_name = 'rgw.' + daemon_id
+
+ c = cephadm_module.daemon_action('redeploy', d_name)
+ assert wait(cephadm_module,
+ c) == f"Scheduled to redeploy rgw.{daemon_id} on host 'test'"
+
+ for what in ('start', 'stop', 'restart'):
+ c = cephadm_module.daemon_action(what, d_name)
+ assert wait(cephadm_module,
+ c) == F"Scheduled to {what} {d_name} on host 'test'"
+
+ # Make sure, _check_daemons does a redeploy due to monmap change:
+ cephadm_module._store['_ceph_get/mon_map'] = {
+ 'modified': datetime_to_str(datetime_now()),
+ 'fsid': 'foobar',
+ }
+ cephadm_module.notify('mon_map', None)
+
+ CephadmServe(cephadm_module)._check_daemons()
+
+ assert cephadm_module.events.get_for_daemon(d_name) == [
+ OrchestratorEvent(mock.ANY, 'daemon', d_name, 'INFO',
+ f"Deployed {d_name} on host \'test\'"),
+ OrchestratorEvent(mock.ANY, 'daemon', d_name, 'INFO',
+ f"stop {d_name} from host \'test\'"),
+ ]
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_daemon_action_fail(self, cephadm_module: CephadmOrchestrator):
+ cephadm_module.service_cache_timeout = 10
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, RGWSpec(service_id='myrgw.foobar', unmanaged=True)) as _, \
+ with_daemon(cephadm_module, RGWSpec(service_id='myrgw.foobar'), 'test') as daemon_id:
+ with mock.patch('ceph_module.BaseMgrModule._ceph_send_command') as _ceph_send_command:
+
+ _ceph_send_command.side_effect = Exception("myerror")
+
+ # Make sure, _check_daemons does a redeploy due to monmap change:
+ cephadm_module.mock_store_set('_ceph_get', 'mon_map', {
+ 'modified': datetime_to_str(datetime_now()),
+ 'fsid': 'foobar',
+ })
+ cephadm_module.notify('mon_map', None)
+
+ CephadmServe(cephadm_module)._check_daemons()
+
+ evs = [e.message for e in cephadm_module.events.get_for_daemon(
+ f'rgw.{daemon_id}')]
+
+ assert 'myerror' in ''.join(evs)
+
+ @pytest.mark.parametrize(
+ "action",
+ [
+ 'start',
+ 'stop',
+ 'restart',
+ 'reconfig',
+ 'redeploy'
+ ]
+ )
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("cephadm.module.HostCache.save_host")
+ def test_daemon_check(self, _save_host, cephadm_module: CephadmOrchestrator, action):
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, ServiceSpec(service_type='grafana'), CephadmOrchestrator.apply_grafana, 'test') as d_names:
+ [daemon_name] = d_names
+
+ cephadm_module._schedule_daemon_action(daemon_name, action)
+
+ assert cephadm_module.cache.get_scheduled_daemon_action(
+ 'test', daemon_name) == action
+
+ CephadmServe(cephadm_module)._check_daemons()
+
+ assert _save_host.called_with('test')
+ assert cephadm_module.cache.get_scheduled_daemon_action('test', daemon_name) is None
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_daemon_check_extra_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ with with_host(cephadm_module, 'test'):
+
+ # Also testing deploying mons without explicit network placement
+ cephadm_module.check_mon_command({
+ 'prefix': 'config set',
+ 'who': 'mon',
+ 'name': 'public_network',
+ 'value': '127.0.0.0/8'
+ })
+
+ cephadm_module.cache.update_host_networks(
+ 'test',
+ {
+ "127.0.0.0/8": [
+ "127.0.0.1"
+ ],
+ }
+ )
+
+ with with_service(cephadm_module, ServiceSpec(service_type='mon'), CephadmOrchestrator.apply_mon, 'test') as d_names:
+ [daemon_name] = d_names
+
+ cephadm_module._set_extra_ceph_conf('[mon]\nk=v')
+
+ CephadmServe(cephadm_module)._check_daemons()
+
+ _run_cephadm.assert_called_with(
+ 'test',
+ 'mon.test',
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": "mon.test",
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'reconfig': True,
+ },
+ "meta": {
+ 'service_name': 'mon',
+ 'ports': [],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": {
+ "config": "[mon]\nk=v\n[mon.test]\npublic network = 127.0.0.0/8\n",
+ "keyring": "",
+ "files": {
+ "config": "[mon.test]\npublic network = 127.0.0.0/8\n"
+ },
+ },
+ }),
+ )
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_mon_crush_location_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ with with_host(cephadm_module, 'test'):
+ cephadm_module.check_mon_command({
+ 'prefix': 'config set',
+ 'who': 'mon',
+ 'name': 'public_network',
+ 'value': '127.0.0.0/8'
+ })
+
+ cephadm_module.cache.update_host_networks(
+ 'test',
+ {
+ "127.0.0.0/8": [
+ "127.0.0.1"
+ ],
+ }
+ )
+
+ with with_service(cephadm_module, ServiceSpec(service_type='mon', crush_locations={'test': ['datacenter=a', 'rack=2']}), CephadmOrchestrator.apply_mon, 'test'):
+ _run_cephadm.assert_called_with(
+ 'test',
+ 'mon.test',
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": "mon.test",
+ "image": '',
+ "deploy_arguments": [],
+ "params": {},
+ "meta": {
+ 'service_name': 'mon',
+ 'ports': [],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": {
+ "config": "[mon.test]\npublic network = 127.0.0.0/8\n",
+ "keyring": "",
+ "files": {
+ "config": "[mon.test]\npublic network = 127.0.0.0/8\n",
+ },
+ "crush_location": "datacenter=a",
+ },
+ }),
+ )
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_extra_container_args(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, ServiceSpec(service_type='crash', extra_container_args=['--cpus=2', '--quiet']), CephadmOrchestrator.apply_crash):
+ _run_cephadm.assert_called_with(
+ 'test',
+ 'crash.test',
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": "crash.test",
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'extra_container_args': [
+ "--cpus=2",
+ "--quiet",
+ ],
+ },
+ "meta": {
+ 'service_name': 'crash',
+ 'ports': [],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': [
+ "--cpus=2",
+ "--quiet",
+ ],
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": {
+ "config": "",
+ "keyring": "[client.crash.test]\nkey = None\n",
+ },
+ }),
+ )
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_extra_entrypoint_args(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, ServiceSpec(service_type='node-exporter',
+ extra_entrypoint_args=['--collector.textfile.directory=/var/lib/node_exporter/textfile_collector', '--some-other-arg']),
+ CephadmOrchestrator.apply_node_exporter):
+ _run_cephadm.assert_called_with(
+ 'test',
+ 'node-exporter.test',
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": "node-exporter.test",
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [9100],
+ 'extra_entrypoint_args': [
+ "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector",
+ "--some-other-arg",
+ ],
+ },
+ "meta": {
+ 'service_name': 'node-exporter',
+ 'ports': [9100],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': [
+ "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector",
+ "--some-other-arg",
+ ],
+ },
+ "config_blobs": {},
+ }),
+ )
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_extra_entrypoint_and_container_args(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, ServiceSpec(service_type='node-exporter',
+ extra_entrypoint_args=['--collector.textfile.directory=/var/lib/node_exporter/textfile_collector', '--some-other-arg'],
+ extra_container_args=['--cpus=2', '--quiet']),
+ CephadmOrchestrator.apply_node_exporter):
+ _run_cephadm.assert_called_with(
+ 'test',
+ 'node-exporter.test',
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": "node-exporter.test",
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [9100],
+ 'extra_container_args': [
+ "--cpus=2",
+ "--quiet",
+ ],
+ 'extra_entrypoint_args': [
+ "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector",
+ "--some-other-arg",
+ ],
+ },
+ "meta": {
+ 'service_name': 'node-exporter',
+ 'ports': [9100],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': [
+ "--cpus=2",
+ "--quiet",
+ ],
+ 'extra_entrypoint_args': [
+ "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector",
+ "--some-other-arg",
+ ],
+ },
+ "config_blobs": {},
+ }),
+ )
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_extra_entrypoint_and_container_args_with_spaces(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, ServiceSpec(service_type='node-exporter',
+ extra_entrypoint_args=['--entrypoint-arg-with-value value', '--some-other-arg 3'],
+ extra_container_args=['--cpus 2', '--container-arg-with-value value']),
+ CephadmOrchestrator.apply_node_exporter):
+ _run_cephadm.assert_called_with(
+ 'test',
+ 'node-exporter.test',
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": "node-exporter.test",
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [9100],
+ 'extra_container_args': [
+ "--cpus",
+ "2",
+ "--container-arg-with-value",
+ "value",
+ ],
+ 'extra_entrypoint_args': [
+ "--entrypoint-arg-with-value",
+ "value",
+ "--some-other-arg",
+ "3",
+ ],
+ },
+ "meta": {
+ 'service_name': 'node-exporter',
+ 'ports': [9100],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': [
+ "--cpus 2",
+ "--container-arg-with-value value",
+ ],
+ 'extra_entrypoint_args': [
+ "--entrypoint-arg-with-value value",
+ "--some-other-arg 3",
+ ],
+ },
+ "config_blobs": {},
+ }),
+ )
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_custom_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ test_cert = ['-----BEGIN PRIVATE KEY-----',
+ 'YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg',
+ 'ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=',
+ '-----END PRIVATE KEY-----',
+ '-----BEGIN CERTIFICATE-----',
+ 'YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg',
+ 'ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=',
+ '-----END CERTIFICATE-----']
+ configs = [
+ CustomConfig(content='something something something',
+ mount_path='/etc/test.conf'),
+ CustomConfig(content='\n'.join(test_cert), mount_path='/usr/share/grafana/thing.crt')
+ ]
+ tc_joined = '\n'.join(test_cert)
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, ServiceSpec(service_type='crash', custom_configs=configs), CephadmOrchestrator.apply_crash):
+ _run_cephadm(
+ 'test',
+ 'crash.test',
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": "crash.test",
+ "image": "",
+ "deploy_arguments": [],
+ "params": {},
+ "meta": {
+ "service_name": "crash",
+ "ports": [],
+ "ip": None,
+ "deployed_by": [],
+ "rank": None,
+ "rank_generation": None,
+ "extra_container_args": None,
+ "extra_entrypoint_args": None,
+ },
+ "config_blobs": {
+ "config": "",
+ "keyring": "[client.crash.test]\nkey = None\n",
+ "custom_config_files": [
+ {
+ "content": "something something something",
+ "mount_path": "/etc/test.conf",
+ },
+ {
+ "content": tc_joined,
+ "mount_path": "/usr/share/grafana/thing.crt",
+ },
+ ]
+ }
+ }),
+ )
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_daemon_check_post(self, cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, ServiceSpec(service_type='grafana'), CephadmOrchestrator.apply_grafana, 'test'):
+
+ # Make sure, _check_daemons does a redeploy due to monmap change:
+ cephadm_module.mock_store_set('_ceph_get', 'mon_map', {
+ 'modified': datetime_to_str(datetime_now()),
+ 'fsid': 'foobar',
+ })
+ cephadm_module.notify('mon_map', None)
+ cephadm_module.mock_store_set('_ceph_get', 'mgr_map', {
+ 'modules': ['dashboard']
+ })
+
+ with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd:
+ CephadmServe(cephadm_module)._check_daemons()
+ _mon_cmd.assert_any_call(
+ {'prefix': 'dashboard set-grafana-api-url', 'value': 'https://[1::4]:3000'},
+ None)
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1.2.3.4')
+ def test_iscsi_post_actions_with_missing_daemon_in_cache(self, cephadm_module: CephadmOrchestrator):
+ # https://tracker.ceph.com/issues/52866
+ with with_host(cephadm_module, 'test1'):
+ with with_host(cephadm_module, 'test2'):
+ with with_service(cephadm_module, IscsiServiceSpec(service_id='foobar', pool='pool', placement=PlacementSpec(host_pattern='*')), CephadmOrchestrator.apply_iscsi, 'test'):
+
+ CephadmServe(cephadm_module)._apply_all_services()
+ assert len(cephadm_module.cache.get_daemons_by_type('iscsi')) == 2
+
+ # get a daemons from postaction list (ARRGH sets!!)
+ tempset = cephadm_module.requires_post_actions.copy()
+ tempdaemon1 = tempset.pop()
+ tempdaemon2 = tempset.pop()
+
+ # make sure post actions has 2 daemons in it
+ assert len(cephadm_module.requires_post_actions) == 2
+
+ # replicate a host cache that is not in sync when check_daemons is called
+ tempdd1 = cephadm_module.cache.get_daemon(tempdaemon1)
+ tempdd2 = cephadm_module.cache.get_daemon(tempdaemon2)
+ host = 'test1'
+ if 'test1' not in tempdaemon1:
+ host = 'test2'
+ cephadm_module.cache.rm_daemon(host, tempdaemon1)
+
+ # Make sure, _check_daemons does a redeploy due to monmap change:
+ cephadm_module.mock_store_set('_ceph_get', 'mon_map', {
+ 'modified': datetime_to_str(datetime_now()),
+ 'fsid': 'foobar',
+ })
+ cephadm_module.notify('mon_map', None)
+ cephadm_module.mock_store_set('_ceph_get', 'mgr_map', {
+ 'modules': ['dashboard']
+ })
+
+ with mock.patch("cephadm.module.IscsiService.config_dashboard") as _cfg_db:
+ CephadmServe(cephadm_module)._check_daemons()
+ _cfg_db.assert_called_once_with([tempdd2])
+
+ # post actions still has the other daemon in it and will run next _check_daemons
+ assert len(cephadm_module.requires_post_actions) == 1
+
+ # post actions was missed for a daemon
+ assert tempdaemon1 in cephadm_module.requires_post_actions
+
+ # put the daemon back in the cache
+ cephadm_module.cache.add_daemon(host, tempdd1)
+
+ _cfg_db.reset_mock()
+ # replicate serve loop running again
+ CephadmServe(cephadm_module)._check_daemons()
+
+ # post actions should have been called again
+ _cfg_db.asset_called()
+
+ # post actions is now empty
+ assert len(cephadm_module.requires_post_actions) == 0
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_mon_add(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, ServiceSpec(service_type='mon', unmanaged=True)):
+ ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1)
+ c = cephadm_module.add_daemon(ServiceSpec('mon', placement=ps))
+ assert wait(cephadm_module, c) == ["Deployed mon.a on host 'test'"]
+
+ with pytest.raises(OrchestratorError, match="Must set public_network config option or specify a CIDR network,"):
+ ps = PlacementSpec(hosts=['test'], count=1)
+ c = cephadm_module.add_daemon(ServiceSpec('mon', placement=ps))
+ wait(cephadm_module, c)
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_mgr_update(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1)
+ r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps))
+ assert r
+
+ assert_rm_daemon(cephadm_module, 'mgr.a', 'test')
+
+ @mock.patch("cephadm.module.CephadmOrchestrator.mon_command")
+ def test_find_destroyed_osds(self, _mon_cmd, cephadm_module):
+ dict_out = {
+ "nodes": [
+ {
+ "id": -1,
+ "name": "default",
+ "type": "root",
+ "type_id": 11,
+ "children": [
+ -3
+ ]
+ },
+ {
+ "id": -3,
+ "name": "host1",
+ "type": "host",
+ "type_id": 1,
+ "pool_weights": {},
+ "children": [
+ 0
+ ]
+ },
+ {
+ "id": 0,
+ "device_class": "hdd",
+ "name": "osd.0",
+ "type": "osd",
+ "type_id": 0,
+ "crush_weight": 0.0243988037109375,
+ "depth": 2,
+ "pool_weights": {},
+ "exists": 1,
+ "status": "destroyed",
+ "reweight": 1,
+ "primary_affinity": 1
+ }
+ ],
+ "stray": []
+ }
+ json_out = json.dumps(dict_out)
+ _mon_cmd.return_value = (0, json_out, '')
+ osd_claims = OsdIdClaims(cephadm_module)
+ assert osd_claims.get() == {'host1': ['0']}
+ assert osd_claims.filtered_by_host('host1') == ['0']
+ assert osd_claims.filtered_by_host('host1.domain.com') == ['0']
+
+ @ pytest.mark.parametrize(
+ "ceph_services, cephadm_daemons, strays_expected, metadata",
+ # [ ([(daemon_type, daemon_id), ... ], [...], [...]), ... ]
+ [
+ (
+ [('mds', 'a'), ('osd', '0'), ('mgr', 'x')],
+ [],
+ [('mds', 'a'), ('osd', '0'), ('mgr', 'x')],
+ {},
+ ),
+ (
+ [('mds', 'a'), ('osd', '0'), ('mgr', 'x')],
+ [('mds', 'a'), ('osd', '0'), ('mgr', 'x')],
+ [],
+ {},
+ ),
+ (
+ [('mds', 'a'), ('osd', '0'), ('mgr', 'x')],
+ [('mds', 'a'), ('osd', '0')],
+ [('mgr', 'x')],
+ {},
+ ),
+ # https://tracker.ceph.com/issues/49573
+ (
+ [('rgw-nfs', '14649')],
+ [],
+ [('nfs', 'foo-rgw.host1')],
+ {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}},
+ ),
+ (
+ [('rgw-nfs', '14649'), ('rgw-nfs', '14650')],
+ [('nfs', 'foo-rgw.host1'), ('nfs', 'foo2.host2')],
+ [],
+ {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}, '14650': {'id': 'nfs.foo2.host2-rgw'}},
+ ),
+ (
+ [('rgw-nfs', '14649'), ('rgw-nfs', '14650')],
+ [('nfs', 'foo-rgw.host1')],
+ [('nfs', 'foo2.host2')],
+ {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}, '14650': {'id': 'nfs.foo2.host2-rgw'}},
+ ),
+ ]
+ )
+ def test_check_for_stray_daemons(
+ self,
+ cephadm_module,
+ ceph_services,
+ cephadm_daemons,
+ strays_expected,
+ metadata
+ ):
+ # mock ceph service-map
+ services = []
+ for service in ceph_services:
+ s = {'type': service[0], 'id': service[1]}
+ services.append(s)
+ ls = [{'hostname': 'host1', 'services': services}]
+
+ with mock.patch.object(cephadm_module, 'list_servers', mock.MagicMock()) as list_servers:
+ list_servers.return_value = ls
+ list_servers.__iter__.side_effect = ls.__iter__
+
+ # populate cephadm daemon cache
+ dm = {}
+ for daemon_type, daemon_id in cephadm_daemons:
+ dd = DaemonDescription(daemon_type=daemon_type, daemon_id=daemon_id)
+ dm[dd.name()] = dd
+ cephadm_module.cache.update_host_daemons('host1', dm)
+
+ def get_metadata_mock(svc_type, svc_id, default):
+ return metadata[svc_id]
+
+ with mock.patch.object(cephadm_module, 'get_metadata', new_callable=lambda: get_metadata_mock):
+
+ # test
+ CephadmServe(cephadm_module)._check_for_strays()
+
+ # verify
+ strays = cephadm_module.health_checks.get('CEPHADM_STRAY_DAEMON')
+ if not strays:
+ assert len(strays_expected) == 0
+ else:
+ for dt, di in strays_expected:
+ name = '%s.%s' % (dt, di)
+ for detail in strays['detail']:
+ if name in detail:
+ strays['detail'].remove(detail)
+ break
+ assert name in detail
+ assert len(strays['detail']) == 0
+ assert strays['count'] == len(strays_expected)
+
+ @mock.patch("cephadm.module.CephadmOrchestrator.mon_command")
+ def test_find_destroyed_osds_cmd_failure(self, _mon_cmd, cephadm_module):
+ _mon_cmd.return_value = (1, "", "fail_msg")
+ with pytest.raises(OrchestratorError):
+ OsdIdClaims(cephadm_module)
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_apply_osd_save(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'test'):
+
+ spec = DriveGroupSpec(
+ service_id='foo',
+ placement=PlacementSpec(
+ host_pattern='*',
+ ),
+ data_devices=DeviceSelection(
+ all=True
+ )
+ )
+
+ c = cephadm_module.apply([spec])
+ assert wait(cephadm_module, c) == ['Scheduled osd.foo update...']
+
+ inventory = Devices([
+ Device(
+ '/dev/sdb',
+ available=True
+ ),
+ ])
+
+ cephadm_module.cache.update_host_devices('test', inventory.devices)
+
+ _run_cephadm.side_effect = async_side_effect((['{}'], '', 0))
+
+ assert CephadmServe(cephadm_module)._apply_all_services() is False
+
+ _run_cephadm.assert_any_call(
+ 'test', 'osd', 'ceph-volume',
+ ['--config-json', '-', '--', 'lvm', 'batch',
+ '--no-auto', '/dev/sdb', '--yes', '--no-systemd'],
+ env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=foo'], error_ok=True,
+ stdin='{"config": "", "keyring": ""}')
+ _run_cephadm.assert_any_call(
+ 'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False, error_ok=False, log_output=True)
+ _run_cephadm.assert_any_call(
+ 'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False, error_ok=False, log_output=True)
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_apply_osd_save_non_collocated(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'test'):
+
+ spec = DriveGroupSpec(
+ service_id='noncollocated',
+ placement=PlacementSpec(
+ hosts=['test']
+ ),
+ data_devices=DeviceSelection(paths=['/dev/sdb']),
+ db_devices=DeviceSelection(paths=['/dev/sdc']),
+ wal_devices=DeviceSelection(paths=['/dev/sdd'])
+ )
+
+ c = cephadm_module.apply([spec])
+ assert wait(cephadm_module, c) == ['Scheduled osd.noncollocated update...']
+
+ inventory = Devices([
+ Device('/dev/sdb', available=True),
+ Device('/dev/sdc', available=True),
+ Device('/dev/sdd', available=True)
+ ])
+
+ cephadm_module.cache.update_host_devices('test', inventory.devices)
+
+ _run_cephadm.side_effect = async_side_effect((['{}'], '', 0))
+
+ assert CephadmServe(cephadm_module)._apply_all_services() is False
+
+ _run_cephadm.assert_any_call(
+ 'test', 'osd', 'ceph-volume',
+ ['--config-json', '-', '--', 'lvm', 'batch',
+ '--no-auto', '/dev/sdb', '--db-devices', '/dev/sdc',
+ '--wal-devices', '/dev/sdd', '--yes', '--no-systemd'],
+ env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=noncollocated'],
+ error_ok=True, stdin='{"config": "", "keyring": ""}')
+ _run_cephadm.assert_any_call(
+ 'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False, error_ok=False, log_output=True)
+ _run_cephadm.assert_any_call(
+ 'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False, error_ok=False, log_output=True)
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("cephadm.module.SpecStore.save")
+ def test_apply_osd_save_placement(self, _save_spec, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ json_spec = {'service_type': 'osd', 'placement': {'host_pattern': 'test'},
+ 'service_id': 'foo', 'data_devices': {'all': True}}
+ spec = ServiceSpec.from_json(json_spec)
+ assert isinstance(spec, DriveGroupSpec)
+ c = cephadm_module.apply([spec])
+ assert wait(cephadm_module, c) == ['Scheduled osd.foo update...']
+ _save_spec.assert_called_with(spec)
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_create_osds(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'),
+ data_devices=DeviceSelection(paths=['']))
+ c = cephadm_module.create_osds(dg)
+ out = wait(cephadm_module, c)
+ assert out == "Created no osd(s) on host test; already created?"
+ bad_dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='invalid_host'),
+ data_devices=DeviceSelection(paths=['']))
+ c = cephadm_module.create_osds(bad_dg)
+ out = wait(cephadm_module, c)
+ assert "Invalid 'host:device' spec: host not found in cluster" in out
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_create_noncollocated_osd(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'),
+ data_devices=DeviceSelection(paths=['']))
+ c = cephadm_module.create_osds(dg)
+ out = wait(cephadm_module, c)
+ assert out == "Created no osd(s) on host test; already created?"
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ @mock.patch('cephadm.services.osd.OSDService._run_ceph_volume_command')
+ @mock.patch('cephadm.services.osd.OSDService.driveselection_to_ceph_volume')
+ @mock.patch('cephadm.services.osd.OsdIdClaims.refresh', lambda _: None)
+ @mock.patch('cephadm.services.osd.OsdIdClaims.get', lambda _: {})
+ def test_limit_not_reached(self, d_to_cv, _run_cv_cmd, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'),
+ data_devices=DeviceSelection(limit=5, rotational=1),
+ service_id='not_enough')
+
+ disks_found = [
+ '[{"data": "/dev/vdb", "data_size": "50.00 GB", "encryption": "None"}, {"data": "/dev/vdc", "data_size": "50.00 GB", "encryption": "None"}]']
+ d_to_cv.return_value = 'foo'
+ _run_cv_cmd.side_effect = async_side_effect((disks_found, '', 0))
+ preview = cephadm_module.osd_service.generate_previews([dg], 'test')
+
+ for osd in preview:
+ assert 'notes' in osd
+ assert osd['notes'] == [
+ 'NOTE: Did not find enough disks matching filter on host test to reach data device limit (Found: 2 | Limit: 5)']
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_prepare_drivegroup(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'),
+ data_devices=DeviceSelection(paths=['']))
+ out = cephadm_module.osd_service.prepare_drivegroup(dg)
+ assert len(out) == 1
+ f1 = out[0]
+ assert f1[0] == 'test'
+ assert isinstance(f1[1], DriveSelection)
+
+ @pytest.mark.parametrize(
+ "devices, preview, exp_commands",
+ [
+ # no preview and only one disk, prepare is used due the hack that is in place.
+ (['/dev/sda'], False, ["lvm batch --no-auto /dev/sda --yes --no-systemd"]),
+ # no preview and multiple disks, uses batch
+ (['/dev/sda', '/dev/sdb'], False,
+ ["CEPH_VOLUME_OSDSPEC_AFFINITY=test.spec lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd"]),
+ # preview and only one disk needs to use batch again to generate the preview
+ (['/dev/sda'], True, ["lvm batch --no-auto /dev/sda --yes --no-systemd --report --format json"]),
+ # preview and multiple disks work the same
+ (['/dev/sda', '/dev/sdb'], True,
+ ["CEPH_VOLUME_OSDSPEC_AFFINITY=test.spec lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd --report --format json"]),
+ ]
+ )
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_driveselection_to_ceph_volume(self, cephadm_module, devices, preview, exp_commands):
+ with with_host(cephadm_module, 'test'):
+ dg = DriveGroupSpec(service_id='test.spec', placement=PlacementSpec(
+ host_pattern='test'), data_devices=DeviceSelection(paths=devices))
+ ds = DriveSelection(dg, Devices([Device(path) for path in devices]))
+ preview = preview
+ out = cephadm_module.osd_service.driveselection_to_ceph_volume(ds, [], preview)
+ assert all(any(cmd in exp_cmd for exp_cmd in exp_commands)
+ for cmd in out), f'Expected cmds from f{out} in {exp_commands}'
+
+ @pytest.mark.parametrize(
+ "devices, preview, exp_commands",
+ [
+ # one data device, no preview
+ (['/dev/sda'], False, ["raw prepare --bluestore --data /dev/sda"]),
+ # multiple data devices, no preview
+ (['/dev/sda', '/dev/sdb'], False,
+ ["raw prepare --bluestore --data /dev/sda", "raw prepare --bluestore --data /dev/sdb"]),
+ # one data device, preview
+ (['/dev/sda'], True, ["raw prepare --bluestore --data /dev/sda --report --format json"]),
+ # multiple data devices, preview
+ (['/dev/sda', '/dev/sdb'], True,
+ ["raw prepare --bluestore --data /dev/sda --report --format json", "raw prepare --bluestore --data /dev/sdb --report --format json"]),
+ ]
+ )
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_raw_driveselection_to_ceph_volume(self, cephadm_module, devices, preview, exp_commands):
+ with with_host(cephadm_module, 'test'):
+ dg = DriveGroupSpec(service_id='test.spec', method='raw', placement=PlacementSpec(
+ host_pattern='test'), data_devices=DeviceSelection(paths=devices))
+ ds = DriveSelection(dg, Devices([Device(path) for path in devices]))
+ preview = preview
+ out = cephadm_module.osd_service.driveselection_to_ceph_volume(ds, [], preview)
+ assert all(any(cmd in exp_cmd for exp_cmd in exp_commands)
+ for cmd in out), f'Expected cmds from f{out} in {exp_commands}'
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(
+ json.dumps([
+ dict(
+ name='osd.0',
+ style='cephadm',
+ fsid='fsid',
+ container_id='container_id',
+ version='version',
+ state='running',
+ )
+ ])
+ ))
+ @mock.patch("cephadm.services.osd.OSD.exists", True)
+ @mock.patch("cephadm.services.osd.RemoveUtil.get_pg_count", lambda _, __: 0)
+ def test_remove_osds(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ CephadmServe(cephadm_module)._refresh_host_daemons('test')
+ c = cephadm_module.list_daemons()
+ wait(cephadm_module, c)
+
+ c = cephadm_module.remove_daemons(['osd.0'])
+ out = wait(cephadm_module, c)
+ assert out == ["Removed osd.0 from host 'test'"]
+
+ cephadm_module.to_remove_osds.enqueue(OSD(osd_id=0,
+ replace=False,
+ force=False,
+ hostname='test',
+ process_started_at=datetime_now(),
+ remove_util=cephadm_module.to_remove_osds.rm_util
+ ))
+ cephadm_module.to_remove_osds.process_removal_queue()
+ assert cephadm_module.to_remove_osds == OSDRemovalQueue(cephadm_module)
+
+ c = cephadm_module.remove_osds_status()
+ out = wait(cephadm_module, c)
+ assert out == []
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_rgw_update(self, cephadm_module):
+ with with_host(cephadm_module, 'host1'):
+ with with_host(cephadm_module, 'host2'):
+ with with_service(cephadm_module, RGWSpec(service_id="foo", unmanaged=True)):
+ ps = PlacementSpec(hosts=['host1'], count=1)
+ c = cephadm_module.add_daemon(
+ RGWSpec(service_id="foo", placement=ps))
+ [out] = wait(cephadm_module, c)
+ match_glob(out, "Deployed rgw.foo.* on host 'host1'")
+
+ ps = PlacementSpec(hosts=['host1', 'host2'], count=2)
+ r = CephadmServe(cephadm_module)._apply_service(
+ RGWSpec(service_id="foo", placement=ps))
+ assert r
+
+ assert_rm_daemon(cephadm_module, 'rgw.foo', 'host1')
+ assert_rm_daemon(cephadm_module, 'rgw.foo', 'host2')
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(
+ json.dumps([
+ dict(
+ name='rgw.myrgw.myhost.myid',
+ style='cephadm',
+ fsid='fsid',
+ container_id='container_id',
+ version='version',
+ state='running',
+ )
+ ])
+ ))
+ def test_remove_daemon(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ CephadmServe(cephadm_module)._refresh_host_daemons('test')
+ c = cephadm_module.list_daemons()
+ wait(cephadm_module, c)
+ c = cephadm_module.remove_daemons(['rgw.myrgw.myhost.myid'])
+ out = wait(cephadm_module, c)
+ assert out == ["Removed rgw.myrgw.myhost.myid from host 'test'"]
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_remove_duplicate_osds(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'host1'):
+ with with_host(cephadm_module, 'host2'):
+ with with_osd_daemon(cephadm_module, _run_cephadm, 'host1', 1) as dd1: # type: DaemonDescription
+ with with_osd_daemon(cephadm_module, _run_cephadm, 'host2', 1) as dd2: # type: DaemonDescription
+ CephadmServe(cephadm_module)._check_for_moved_osds()
+ # both are in status "starting"
+ assert len(cephadm_module.cache.get_daemons()) == 2
+
+ dd1.status = DaemonDescriptionStatus.running
+ dd2.status = DaemonDescriptionStatus.error
+ cephadm_module.cache.update_host_daemons(dd1.hostname, {dd1.name(): dd1})
+ cephadm_module.cache.update_host_daemons(dd2.hostname, {dd2.name(): dd2})
+ CephadmServe(cephadm_module)._check_for_moved_osds()
+ assert len(cephadm_module.cache.get_daemons()) == 1
+
+ assert cephadm_module.events.get_for_daemon('osd.1') == [
+ OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO',
+ "Deployed osd.1 on host 'host1'"),
+ OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO',
+ "Deployed osd.1 on host 'host2'"),
+ OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO',
+ "Removed duplicated daemon on host 'host2'"),
+ ]
+
+ with pytest.raises(AssertionError):
+ cephadm_module.assert_issued_mon_command({
+ 'prefix': 'auth rm',
+ 'entity': 'osd.1',
+ })
+
+ cephadm_module.assert_issued_mon_command({
+ 'prefix': 'auth rm',
+ 'entity': 'osd.1',
+ })
+
+ @pytest.mark.parametrize(
+ "spec",
+ [
+ ServiceSpec('crash'),
+ ServiceSpec('prometheus'),
+ ServiceSpec('grafana'),
+ ServiceSpec('node-exporter'),
+ ServiceSpec('alertmanager'),
+ ServiceSpec('rbd-mirror'),
+ ServiceSpec('cephfs-mirror'),
+ ServiceSpec('mds', service_id='fsname'),
+ RGWSpec(rgw_realm='realm', rgw_zone='zone'),
+ RGWSpec(service_id="foo"),
+ ]
+ )
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_daemon_add(self, spec: ServiceSpec, cephadm_module):
+ unmanaged_spec = ServiceSpec.from_json(spec.to_json())
+ unmanaged_spec.unmanaged = True
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, unmanaged_spec):
+ with with_daemon(cephadm_module, spec, 'test'):
+ pass
+
+ @pytest.mark.parametrize(
+ "entity,success,spec",
+ [
+ ('mgr.x', True, ServiceSpec(
+ service_type='mgr',
+ placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1),
+ unmanaged=True)
+ ), # noqa: E124
+ ('client.rgw.x', True, ServiceSpec(
+ service_type='rgw',
+ service_id='id',
+ placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1),
+ unmanaged=True)
+ ), # noqa: E124
+ ('client.nfs.x', True, ServiceSpec(
+ service_type='nfs',
+ service_id='id',
+ placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1),
+ unmanaged=True)
+ ), # noqa: E124
+ ('mon.', False, ServiceSpec(
+ service_type='mon',
+ placement=PlacementSpec(
+ hosts=[HostPlacementSpec('test', '127.0.0.0/24', 'x')], count=1),
+ unmanaged=True)
+ ), # noqa: E124
+ ]
+ )
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock())
+ @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock())
+ @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock())
+ def test_daemon_add_fail(self, _run_cephadm, entity, success, spec, cephadm_module):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, spec):
+ _run_cephadm.side_effect = OrchestratorError('fail')
+ with pytest.raises(OrchestratorError):
+ wait(cephadm_module, cephadm_module.add_daemon(spec))
+ if success:
+ cephadm_module.assert_issued_mon_command({
+ 'prefix': 'auth rm',
+ 'entity': entity,
+ })
+ else:
+ with pytest.raises(AssertionError):
+ cephadm_module.assert_issued_mon_command({
+ 'prefix': 'auth rm',
+ 'entity': entity,
+ })
+ assert cephadm_module.events.get_for_service(spec.service_name()) == [
+ OrchestratorEvent(mock.ANY, 'service', spec.service_name(), 'INFO',
+ "service was created"),
+ OrchestratorEvent(mock.ANY, 'service', spec.service_name(), 'ERROR',
+ "fail"),
+ ]
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_daemon_place_fail_health_warning(self, _run_cephadm, cephadm_module):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'test'):
+ _run_cephadm.side_effect = OrchestratorError('fail')
+ ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1)
+ r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps))
+ assert not r
+ assert cephadm_module.health_checks.get('CEPHADM_DAEMON_PLACE_FAIL') is not None
+ assert cephadm_module.health_checks['CEPHADM_DAEMON_PLACE_FAIL']['count'] == 1
+ assert 'Failed to place 1 daemon(s)' in cephadm_module.health_checks[
+ 'CEPHADM_DAEMON_PLACE_FAIL']['summary']
+ assert 'Failed while placing mgr.a on test: fail' in cephadm_module.health_checks[
+ 'CEPHADM_DAEMON_PLACE_FAIL']['detail']
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_apply_spec_fail_health_warning(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'test'):
+ CephadmServe(cephadm_module)._apply_all_services()
+ ps = PlacementSpec(hosts=['fail'], count=1)
+ r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps))
+ assert not r
+ assert cephadm_module.apply_spec_fails
+ assert cephadm_module.health_checks.get('CEPHADM_APPLY_SPEC_FAIL') is not None
+ assert cephadm_module.health_checks['CEPHADM_APPLY_SPEC_FAIL']['count'] == 1
+ assert 'Failed to apply 1 service(s)' in cephadm_module.health_checks[
+ 'CEPHADM_APPLY_SPEC_FAIL']['summary']
+
+ @mock.patch("cephadm.module.CephadmOrchestrator.get_foreign_ceph_option")
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ @mock.patch("cephadm.module.HostCache.save_host_devices")
+ def test_invalid_config_option_health_warning(self, _save_devs, _run_cephadm, get_foreign_ceph_option, cephadm_module: CephadmOrchestrator):
+ _save_devs.return_value = None
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'test'):
+ ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1)
+ get_foreign_ceph_option.side_effect = KeyError
+ CephadmServe(cephadm_module)._apply_service_config(
+ ServiceSpec('mgr', placement=ps, config={'test': 'foo'}))
+ assert cephadm_module.health_checks.get('CEPHADM_INVALID_CONFIG_OPTION') is not None
+ assert cephadm_module.health_checks['CEPHADM_INVALID_CONFIG_OPTION']['count'] == 1
+ assert 'Ignoring 1 invalid config option(s)' in cephadm_module.health_checks[
+ 'CEPHADM_INVALID_CONFIG_OPTION']['summary']
+ assert 'Ignoring invalid mgr config option test' in cephadm_module.health_checks[
+ 'CEPHADM_INVALID_CONFIG_OPTION']['detail']
+
+ @mock.patch("cephadm.module.CephadmOrchestrator.get_foreign_ceph_option")
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ @mock.patch("cephadm.module.CephadmOrchestrator.set_store")
+ def test_save_devices(self, _set_store, _run_cephadm, _get_foreign_ceph_option, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ entry_size = 65536 # default 64k size
+ _get_foreign_ceph_option.return_value = entry_size
+
+ class FakeDev():
+ def __init__(self, c: str = 'a'):
+ # using 1015 here makes the serialized string exactly 1024 bytes if c is one char
+ self.content = {c: c * 1015}
+ self.path = 'dev/vdc'
+
+ def to_json(self):
+ return self.content
+
+ def from_json(self, stuff):
+ return json.loads(stuff)
+
+ def byte_len(s):
+ return len(s.encode('utf-8'))
+
+ with with_host(cephadm_module, 'test'):
+ fake_devices = [FakeDev()] * 100 # should be ~100k
+ assert byte_len(json.dumps([d.to_json() for d in fake_devices])) > entry_size
+ assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size * 2
+ cephadm_module.cache.update_host_devices('test', fake_devices)
+ cephadm_module.cache.save_host_devices('test')
+ expected_calls = [
+ mock.call('host.test.devices.0', json.dumps(
+ {'devices': [d.to_json() for d in [FakeDev()] * 34], 'entries': 3})),
+ mock.call('host.test.devices.1', json.dumps(
+ {'devices': [d.to_json() for d in [FakeDev()] * 34]})),
+ mock.call('host.test.devices.2', json.dumps(
+ {'devices': [d.to_json() for d in [FakeDev()] * 32]})),
+ ]
+ _set_store.assert_has_calls(expected_calls)
+
+ fake_devices = [FakeDev()] * 300 # should be ~300k
+ assert byte_len(json.dumps([d.to_json() for d in fake_devices])) > entry_size * 4
+ assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size * 5
+ cephadm_module.cache.update_host_devices('test', fake_devices)
+ cephadm_module.cache.save_host_devices('test')
+ expected_calls = [
+ mock.call('host.test.devices.0', json.dumps(
+ {'devices': [d.to_json() for d in [FakeDev()] * 50], 'entries': 6})),
+ mock.call('host.test.devices.1', json.dumps(
+ {'devices': [d.to_json() for d in [FakeDev()] * 50]})),
+ mock.call('host.test.devices.2', json.dumps(
+ {'devices': [d.to_json() for d in [FakeDev()] * 50]})),
+ mock.call('host.test.devices.3', json.dumps(
+ {'devices': [d.to_json() for d in [FakeDev()] * 50]})),
+ mock.call('host.test.devices.4', json.dumps(
+ {'devices': [d.to_json() for d in [FakeDev()] * 50]})),
+ mock.call('host.test.devices.5', json.dumps(
+ {'devices': [d.to_json() for d in [FakeDev()] * 50]})),
+ ]
+ _set_store.assert_has_calls(expected_calls)
+
+ fake_devices = [FakeDev()] * 62 # should be ~62k, just under cache size
+ assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size
+ cephadm_module.cache.update_host_devices('test', fake_devices)
+ cephadm_module.cache.save_host_devices('test')
+ expected_calls = [
+ mock.call('host.test.devices.0', json.dumps(
+ {'devices': [d.to_json() for d in [FakeDev()] * 62], 'entries': 1})),
+ ]
+ _set_store.assert_has_calls(expected_calls)
+
+ # should be ~64k but just over so it requires more entries
+ fake_devices = [FakeDev()] * 64
+ assert byte_len(json.dumps([d.to_json() for d in fake_devices])) > entry_size
+ assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size * 2
+ cephadm_module.cache.update_host_devices('test', fake_devices)
+ cephadm_module.cache.save_host_devices('test')
+ expected_calls = [
+ mock.call('host.test.devices.0', json.dumps(
+ {'devices': [d.to_json() for d in [FakeDev()] * 22], 'entries': 3})),
+ mock.call('host.test.devices.1', json.dumps(
+ {'devices': [d.to_json() for d in [FakeDev()] * 22]})),
+ mock.call('host.test.devices.2', json.dumps(
+ {'devices': [d.to_json() for d in [FakeDev()] * 20]})),
+ ]
+ _set_store.assert_has_calls(expected_calls)
+
+ # test for actual content being correct using differing devices
+ entry_size = 3072
+ _get_foreign_ceph_option.return_value = entry_size
+ fake_devices = [FakeDev('a'), FakeDev('b'), FakeDev('c'), FakeDev('d'), FakeDev('e')]
+ assert byte_len(json.dumps([d.to_json() for d in fake_devices])) > entry_size
+ assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size * 2
+ cephadm_module.cache.update_host_devices('test', fake_devices)
+ cephadm_module.cache.save_host_devices('test')
+ expected_calls = [
+ mock.call('host.test.devices.0', json.dumps(
+ {'devices': [d.to_json() for d in [FakeDev('a'), FakeDev('b')]], 'entries': 3})),
+ mock.call('host.test.devices.1', json.dumps(
+ {'devices': [d.to_json() for d in [FakeDev('c'), FakeDev('d')]]})),
+ mock.call('host.test.devices.2', json.dumps(
+ {'devices': [d.to_json() for d in [FakeDev('e')]]})),
+ ]
+ _set_store.assert_has_calls(expected_calls)
+
+ @mock.patch("cephadm.module.CephadmOrchestrator.get_store")
+ def test_load_devices(self, _get_store, cephadm_module: CephadmOrchestrator):
+ def _fake_store(key):
+ if key == 'host.test.devices.0':
+ return json.dumps({'devices': [d.to_json() for d in [Device('/path')] * 9], 'entries': 3})
+ elif key == 'host.test.devices.1':
+ return json.dumps({'devices': [d.to_json() for d in [Device('/path')] * 7]})
+ elif key == 'host.test.devices.2':
+ return json.dumps({'devices': [d.to_json() for d in [Device('/path')] * 4]})
+ else:
+ raise Exception(f'Get store with unexpected value {key}')
+
+ _get_store.side_effect = _fake_store
+ devs = cephadm_module.cache.load_host_devices('test')
+ assert devs == [Device('/path')] * 20
+
+ @mock.patch("cephadm.module.Inventory.__contains__")
+ def test_check_stray_host_cache_entry(self, _contains, cephadm_module: CephadmOrchestrator):
+ def _fake_inv(key):
+ if key in ['host1', 'node02', 'host.something.com']:
+ return True
+ return False
+
+ _contains.side_effect = _fake_inv
+ assert cephadm_module.cache._get_host_cache_entry_status('host1') == HostCacheStatus.host
+ assert cephadm_module.cache._get_host_cache_entry_status(
+ 'host.something.com') == HostCacheStatus.host
+ assert cephadm_module.cache._get_host_cache_entry_status(
+ 'node02.devices.37') == HostCacheStatus.devices
+ assert cephadm_module.cache._get_host_cache_entry_status(
+ 'host.something.com.devices.0') == HostCacheStatus.devices
+ assert cephadm_module.cache._get_host_cache_entry_status('hostXXX') == HostCacheStatus.stray
+ assert cephadm_module.cache._get_host_cache_entry_status(
+ 'host.nothing.com') == HostCacheStatus.stray
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock())
+ @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock())
+ @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock())
+ def test_nfs(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ ps = PlacementSpec(hosts=['test'], count=1)
+ spec = NFSServiceSpec(
+ service_id='name',
+ placement=ps)
+ unmanaged_spec = ServiceSpec.from_json(spec.to_json())
+ unmanaged_spec.unmanaged = True
+ with with_service(cephadm_module, unmanaged_spec):
+ c = cephadm_module.add_daemon(spec)
+ [out] = wait(cephadm_module, c)
+ match_glob(out, "Deployed nfs.name.* on host 'test'")
+
+ assert_rm_daemon(cephadm_module, 'nfs.name.test', 'test')
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("subprocess.run", None)
+ @mock.patch("cephadm.module.CephadmOrchestrator.rados", mock.MagicMock())
+ @mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4')
+ def test_iscsi(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ ps = PlacementSpec(hosts=['test'], count=1)
+ spec = IscsiServiceSpec(
+ service_id='name',
+ pool='pool',
+ api_user='user',
+ api_password='password',
+ placement=ps)
+ unmanaged_spec = ServiceSpec.from_json(spec.to_json())
+ unmanaged_spec.unmanaged = True
+ with with_service(cephadm_module, unmanaged_spec):
+
+ c = cephadm_module.add_daemon(spec)
+ [out] = wait(cephadm_module, c)
+ match_glob(out, "Deployed iscsi.name.* on host 'test'")
+
+ assert_rm_daemon(cephadm_module, 'iscsi.name.test', 'test')
+
+ @pytest.mark.parametrize(
+ "on_bool",
+ [
+ True,
+ False
+ ]
+ )
+ @pytest.mark.parametrize(
+ "fault_ident",
+ [
+ 'fault',
+ 'ident'
+ ]
+ )
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_blink_device_light(self, _run_cephadm, on_bool, fault_ident, cephadm_module):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'test'):
+ c = cephadm_module.blink_device_light(fault_ident, on_bool, [('test', '', 'dev')])
+ on_off = 'on' if on_bool else 'off'
+ assert wait(cephadm_module, c) == [f'Set {fault_ident} light for test: {on_off}']
+ _run_cephadm.assert_called_with('test', 'osd', 'shell', [
+ '--', 'lsmcli', f'local-disk-{fault_ident}-led-{on_off}', '--path', 'dev'], error_ok=True)
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_blink_device_light_custom(self, _run_cephadm, cephadm_module):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'test'):
+ cephadm_module.set_store('blink_device_light_cmd', 'echo hello')
+ c = cephadm_module.blink_device_light('ident', True, [('test', '', '/dev/sda')])
+ assert wait(cephadm_module, c) == ['Set ident light for test: on']
+ _run_cephadm.assert_called_with('test', 'osd', 'shell', [
+ '--', 'echo', 'hello'], error_ok=True)
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_blink_device_light_custom_per_host(self, _run_cephadm, cephadm_module):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'mgr0'):
+ cephadm_module.set_store('mgr0/blink_device_light_cmd',
+ 'xyz --foo --{{ ident_fault }}={{\'on\' if on else \'off\'}} \'{{ path or dev }}\'')
+ c = cephadm_module.blink_device_light(
+ 'fault', True, [('mgr0', 'SanDisk_X400_M.2_2280_512GB_162924424784', '')])
+ assert wait(cephadm_module, c) == [
+ 'Set fault light for mgr0:SanDisk_X400_M.2_2280_512GB_162924424784 on']
+ _run_cephadm.assert_called_with('mgr0', 'osd', 'shell', [
+ '--', 'xyz', '--foo', '--fault=on', 'SanDisk_X400_M.2_2280_512GB_162924424784'
+ ], error_ok=True)
+
+ @pytest.mark.parametrize(
+ "spec, meth",
+ [
+ (ServiceSpec('mgr'), CephadmOrchestrator.apply_mgr),
+ (ServiceSpec('crash'), CephadmOrchestrator.apply_crash),
+ (ServiceSpec('prometheus'), CephadmOrchestrator.apply_prometheus),
+ (ServiceSpec('grafana'), CephadmOrchestrator.apply_grafana),
+ (ServiceSpec('node-exporter'), CephadmOrchestrator.apply_node_exporter),
+ (ServiceSpec('alertmanager'), CephadmOrchestrator.apply_alertmanager),
+ (ServiceSpec('rbd-mirror'), CephadmOrchestrator.apply_rbd_mirror),
+ (ServiceSpec('cephfs-mirror'), CephadmOrchestrator.apply_rbd_mirror),
+ (ServiceSpec('mds', service_id='fsname'), CephadmOrchestrator.apply_mds),
+ (ServiceSpec(
+ 'mds', service_id='fsname',
+ placement=PlacementSpec(
+ hosts=[HostPlacementSpec(
+ hostname='test',
+ name='fsname',
+ network=''
+ )]
+ )
+ ), CephadmOrchestrator.apply_mds),
+ (RGWSpec(service_id='foo'), CephadmOrchestrator.apply_rgw),
+ (RGWSpec(
+ service_id='bar',
+ rgw_realm='realm', rgw_zone='zone',
+ placement=PlacementSpec(
+ hosts=[HostPlacementSpec(
+ hostname='test',
+ name='bar',
+ network=''
+ )]
+ )
+ ), CephadmOrchestrator.apply_rgw),
+ (NFSServiceSpec(
+ service_id='name',
+ ), CephadmOrchestrator.apply_nfs),
+ (IscsiServiceSpec(
+ service_id='name',
+ pool='pool',
+ api_user='user',
+ api_password='password'
+ ), CephadmOrchestrator.apply_iscsi),
+ (CustomContainerSpec(
+ service_id='hello-world',
+ image='docker.io/library/hello-world:latest',
+ uid=65534,
+ gid=65534,
+ dirs=['foo/bar'],
+ files={
+ 'foo/bar/xyz.conf': 'aaa\nbbb'
+ },
+ bind_mounts=[[
+ 'type=bind',
+ 'source=lib/modules',
+ 'destination=/lib/modules',
+ 'ro=true'
+ ]],
+ volume_mounts={
+ 'foo/bar': '/foo/bar:Z'
+ },
+ args=['--no-healthcheck'],
+ envs=['SECRET=password'],
+ ports=[8080, 8443]
+ ), CephadmOrchestrator.apply_container),
+ ]
+ )
+ @mock.patch("subprocess.run", None)
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock())
+ @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock())
+ @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock())
+ @mock.patch("subprocess.run", mock.MagicMock())
+ def test_apply_save(self, spec: ServiceSpec, meth, cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, spec, meth, 'test'):
+ pass
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_mds_config_purge(self, cephadm_module: CephadmOrchestrator):
+ spec = MDSSpec('mds', service_id='fsname', config={'test': 'foo'})
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, spec, host='test'):
+ ret, out, err = cephadm_module.check_mon_command({
+ 'prefix': 'config get',
+ 'who': spec.service_name(),
+ 'key': 'mds_join_fs',
+ })
+ assert out == 'fsname'
+ ret, out, err = cephadm_module.check_mon_command({
+ 'prefix': 'config get',
+ 'who': spec.service_name(),
+ 'key': 'mds_join_fs',
+ })
+ assert not out
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("cephadm.services.cephadmservice.CephadmService.ok_to_stop")
+ def test_daemon_ok_to_stop(self, ok_to_stop, cephadm_module: CephadmOrchestrator):
+ spec = MDSSpec(
+ 'mds',
+ service_id='fsname',
+ placement=PlacementSpec(hosts=['host1', 'host2']),
+ config={'test': 'foo'}
+ )
+ with with_host(cephadm_module, 'host1'), with_host(cephadm_module, 'host2'):
+ c = cephadm_module.apply_mds(spec)
+ out = wait(cephadm_module, c)
+ match_glob(out, "Scheduled mds.fsname update...")
+ CephadmServe(cephadm_module)._apply_all_services()
+
+ [daemon] = cephadm_module.cache.daemons['host1'].keys()
+
+ spec.placement.set_hosts(['host2'])
+
+ ok_to_stop.side_effect = False
+
+ c = cephadm_module.apply_mds(spec)
+ out = wait(cephadm_module, c)
+ match_glob(out, "Scheduled mds.fsname update...")
+ CephadmServe(cephadm_module)._apply_all_services()
+
+ ok_to_stop.assert_called_with([daemon[4:]], force=True)
+
+ assert_rm_daemon(cephadm_module, spec.service_name(), 'host1') # verifies ok-to-stop
+ assert_rm_daemon(cephadm_module, spec.service_name(), 'host2')
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_dont_touch_offline_or_maintenance_host_daemons(self, cephadm_module):
+ # test daemons on offline/maint hosts not removed when applying specs
+ # test daemons not added to hosts in maint/offline state
+ with with_host(cephadm_module, 'test1'):
+ with with_host(cephadm_module, 'test2'):
+ with with_host(cephadm_module, 'test3'):
+ with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*'))):
+ # should get a mgr on all 3 hosts
+ # CephadmServe(cephadm_module)._apply_all_services()
+ assert len(cephadm_module.cache.get_daemons_by_type('mgr')) == 3
+
+ # put one host in offline state and one host in maintenance state
+ cephadm_module.offline_hosts = {'test2'}
+ cephadm_module.inventory._inventory['test3']['status'] = 'maintenance'
+ cephadm_module.inventory.save()
+
+ # being in offline/maint mode should disqualify hosts from being
+ # candidates for scheduling
+ assert cephadm_module.cache.is_host_schedulable('test2')
+ assert cephadm_module.cache.is_host_schedulable('test3')
+
+ assert cephadm_module.cache.is_host_unreachable('test2')
+ assert cephadm_module.cache.is_host_unreachable('test3')
+
+ with with_service(cephadm_module, ServiceSpec('crash', placement=PlacementSpec(host_pattern='*'))):
+ # re-apply services. No mgr should be removed from maint/offline hosts
+ # crash daemon should only be on host not in maint/offline mode
+ CephadmServe(cephadm_module)._apply_all_services()
+ assert len(cephadm_module.cache.get_daemons_by_type('mgr')) == 3
+ assert len(cephadm_module.cache.get_daemons_by_type('crash')) == 1
+
+ cephadm_module.offline_hosts = {}
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ @mock.patch("cephadm.CephadmOrchestrator._host_ok_to_stop")
+ @mock.patch("cephadm.module.HostCache.get_daemon_types")
+ @mock.patch("cephadm.module.HostCache.get_hosts")
+ def test_maintenance_enter_success(self, _hosts, _get_daemon_types, _host_ok, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ hostname = 'host1'
+ _run_cephadm.side_effect = async_side_effect(
+ ([''], ['something\nsuccess - systemd target xxx disabled'], 0))
+ _host_ok.return_value = 0, 'it is okay'
+ _get_daemon_types.return_value = ['crash']
+ _hosts.return_value = [hostname, 'other_host']
+ cephadm_module.inventory.add_host(HostSpec(hostname))
+ # should not raise an error
+ retval = cephadm_module.enter_host_maintenance(hostname)
+ assert retval.result_str().startswith('Daemons for Ceph cluster')
+ assert not retval.exception_str
+ assert cephadm_module.inventory._inventory[hostname]['status'] == 'maintenance'
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ @mock.patch("cephadm.CephadmOrchestrator._host_ok_to_stop")
+ @mock.patch("cephadm.module.HostCache.get_daemon_types")
+ @mock.patch("cephadm.module.HostCache.get_hosts")
+ def test_maintenance_enter_failure(self, _hosts, _get_daemon_types, _host_ok, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ hostname = 'host1'
+ _run_cephadm.side_effect = async_side_effect(
+ ([''], ['something\nfailed - disable the target'], 0))
+ _host_ok.return_value = 0, 'it is okay'
+ _get_daemon_types.return_value = ['crash']
+ _hosts.return_value = [hostname, 'other_host']
+ cephadm_module.inventory.add_host(HostSpec(hostname))
+
+ with pytest.raises(OrchestratorError, match='Failed to place host1 into maintenance for cluster fsid'):
+ cephadm_module.enter_host_maintenance(hostname)
+
+ assert not cephadm_module.inventory._inventory[hostname]['status']
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ @mock.patch("cephadm.CephadmOrchestrator._host_ok_to_stop")
+ @mock.patch("cephadm.module.HostCache.get_daemon_types")
+ @mock.patch("cephadm.module.HostCache.get_hosts")
+ def test_maintenance_enter_i_really_mean_it(self, _hosts, _get_daemon_types, _host_ok, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ hostname = 'host1'
+ err_str = 'some kind of error'
+ _run_cephadm.side_effect = async_side_effect(
+ ([''], ['something\nfailed - disable the target'], 0))
+ _host_ok.return_value = 1, err_str
+ _get_daemon_types.return_value = ['mon']
+ _hosts.return_value = [hostname, 'other_host']
+ cephadm_module.inventory.add_host(HostSpec(hostname))
+
+ with pytest.raises(OrchestratorError, match=err_str):
+ cephadm_module.enter_host_maintenance(hostname)
+ assert not cephadm_module.inventory._inventory[hostname]['status']
+
+ with pytest.raises(OrchestratorError, match=err_str):
+ cephadm_module.enter_host_maintenance(hostname, force=True)
+ assert not cephadm_module.inventory._inventory[hostname]['status']
+
+ retval = cephadm_module.enter_host_maintenance(hostname, force=True, yes_i_really_mean_it=True)
+ assert retval.result_str().startswith('Daemons for Ceph cluster')
+ assert not retval.exception_str
+ assert cephadm_module.inventory._inventory[hostname]['status'] == 'maintenance'
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ @mock.patch("cephadm.module.HostCache.get_daemon_types")
+ @mock.patch("cephadm.module.HostCache.get_hosts")
+ def test_maintenance_exit_success(self, _hosts, _get_daemon_types, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ hostname = 'host1'
+ _run_cephadm.side_effect = async_side_effect(([''], [
+ 'something\nsuccess - systemd target xxx enabled and started'], 0))
+ _get_daemon_types.return_value = ['crash']
+ _hosts.return_value = [hostname, 'other_host']
+ cephadm_module.inventory.add_host(HostSpec(hostname, status='maintenance'))
+ # should not raise an error
+ retval = cephadm_module.exit_host_maintenance(hostname)
+ assert retval.result_str().startswith('Ceph cluster')
+ assert not retval.exception_str
+ assert not cephadm_module.inventory._inventory[hostname]['status']
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ @mock.patch("cephadm.module.HostCache.get_daemon_types")
+ @mock.patch("cephadm.module.HostCache.get_hosts")
+ def test_maintenance_exit_failure(self, _hosts, _get_daemon_types, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ hostname = 'host1'
+ _run_cephadm.side_effect = async_side_effect(
+ ([''], ['something\nfailed - unable to enable the target'], 0))
+ _get_daemon_types.return_value = ['crash']
+ _hosts.return_value = [hostname, 'other_host']
+ cephadm_module.inventory.add_host(HostSpec(hostname, status='maintenance'))
+
+ with pytest.raises(OrchestratorError, match='Failed to exit maintenance state for host host1, cluster fsid'):
+ cephadm_module.exit_host_maintenance(hostname)
+
+ assert cephadm_module.inventory._inventory[hostname]['status'] == 'maintenance'
+
+ @mock.patch("cephadm.ssh.SSHManager._remote_connection")
+ @mock.patch("cephadm.ssh.SSHManager._execute_command")
+ @mock.patch("cephadm.ssh.SSHManager._check_execute_command")
+ @mock.patch("cephadm.ssh.SSHManager._write_remote_file")
+ def test_etc_ceph(self, _write_file, check_execute_command, execute_command, remote_connection, cephadm_module):
+ _write_file.side_effect = async_side_effect(None)
+ check_execute_command.side_effect = async_side_effect('')
+ execute_command.side_effect = async_side_effect(('{}', '', 0))
+ remote_connection.side_effect = async_side_effect(mock.Mock())
+
+ assert cephadm_module.manage_etc_ceph_ceph_conf is False
+
+ with with_host(cephadm_module, 'test'):
+ assert '/etc/ceph/ceph.conf' not in cephadm_module.cache.get_host_client_files('test')
+
+ with with_host(cephadm_module, 'test'):
+ cephadm_module.set_module_option('manage_etc_ceph_ceph_conf', True)
+ cephadm_module.config_notify()
+ assert cephadm_module.manage_etc_ceph_ceph_conf is True
+
+ CephadmServe(cephadm_module)._write_all_client_files()
+ # Make sure both ceph conf locations (default and per fsid) are called
+ _write_file.assert_has_calls([mock.call('test', '/etc/ceph/ceph.conf', b'',
+ 0o644, 0, 0, None),
+ mock.call('test', '/var/lib/ceph/fsid/config/ceph.conf', b'',
+ 0o644, 0, 0, None)]
+ )
+ ceph_conf_files = cephadm_module.cache.get_host_client_files('test')
+ assert len(ceph_conf_files) == 2
+ assert '/etc/ceph/ceph.conf' in ceph_conf_files
+ assert '/var/lib/ceph/fsid/config/ceph.conf' in ceph_conf_files
+
+ # set extra config and expect that we deploy another ceph.conf
+ cephadm_module._set_extra_ceph_conf('[mon]\nk=v')
+ CephadmServe(cephadm_module)._write_all_client_files()
+ _write_file.assert_has_calls([mock.call('test',
+ '/etc/ceph/ceph.conf',
+ b'[mon]\nk=v\n', 0o644, 0, 0, None),
+ mock.call('test',
+ '/var/lib/ceph/fsid/config/ceph.conf',
+ b'[mon]\nk=v\n', 0o644, 0, 0, None)])
+ # reload
+ cephadm_module.cache.last_client_files = {}
+ cephadm_module.cache.load()
+
+ ceph_conf_files = cephadm_module.cache.get_host_client_files('test')
+ assert len(ceph_conf_files) == 2
+ assert '/etc/ceph/ceph.conf' in ceph_conf_files
+ assert '/var/lib/ceph/fsid/config/ceph.conf' in ceph_conf_files
+
+ # Make sure, _check_daemons does a redeploy due to monmap change:
+ f1_before_digest = cephadm_module.cache.get_host_client_files('test')[
+ '/etc/ceph/ceph.conf'][0]
+ f2_before_digest = cephadm_module.cache.get_host_client_files(
+ 'test')['/var/lib/ceph/fsid/config/ceph.conf'][0]
+ cephadm_module._set_extra_ceph_conf('[mon]\nk2=v2')
+ CephadmServe(cephadm_module)._write_all_client_files()
+ f1_after_digest = cephadm_module.cache.get_host_client_files('test')[
+ '/etc/ceph/ceph.conf'][0]
+ f2_after_digest = cephadm_module.cache.get_host_client_files(
+ 'test')['/var/lib/ceph/fsid/config/ceph.conf'][0]
+ assert f1_before_digest != f1_after_digest
+ assert f2_before_digest != f2_after_digest
+
+ @mock.patch("cephadm.inventory.HostCache.get_host_client_files")
+ def test_dont_write_client_files_to_unreachable_hosts(self, _get_client_files, cephadm_module):
+ cephadm_module.inventory.add_host(HostSpec('host1', '1.2.3.1')) # online
+ cephadm_module.inventory.add_host(HostSpec('host2', '1.2.3.2')) # maintenance
+ cephadm_module.inventory.add_host(HostSpec('host3', '1.2.3.3')) # offline
+
+ # mark host2 as maintenance and host3 as offline
+ cephadm_module.inventory._inventory['host2']['status'] = 'maintenance'
+ cephadm_module.offline_hosts.add('host3')
+
+ # verify host2 and host3 are correctly marked as unreachable but host1 is not
+ assert not cephadm_module.cache.is_host_unreachable('host1')
+ assert cephadm_module.cache.is_host_unreachable('host2')
+ assert cephadm_module.cache.is_host_unreachable('host3')
+
+ _get_client_files.side_effect = Exception('Called _get_client_files')
+
+ # with the online host, should call _get_client_files which
+ # we have setup to raise an Exception
+ with pytest.raises(Exception, match='Called _get_client_files'):
+ CephadmServe(cephadm_module)._write_client_files({}, 'host1')
+
+ # for the maintenance and offline host, _get_client_files should
+ # not be called and it should just return immediately with nothing
+ # having been raised
+ CephadmServe(cephadm_module)._write_client_files({}, 'host2')
+ CephadmServe(cephadm_module)._write_client_files({}, 'host3')
+
+ def test_etc_ceph_init(self):
+ with with_cephadm_module({'manage_etc_ceph_ceph_conf': True}) as m:
+ assert m.manage_etc_ceph_ceph_conf is True
+
+ @mock.patch("cephadm.CephadmOrchestrator.check_mon_command")
+ @mock.patch("cephadm.CephadmOrchestrator.extra_ceph_conf")
+ def test_extra_ceph_conf(self, _extra_ceph_conf, _check_mon_cmd, cephadm_module: CephadmOrchestrator):
+ # settings put into the [global] section in the extra conf
+ # need to be appended to existing [global] section in given
+ # minimal ceph conf, but anything in another section (e.g. [mon])
+ # needs to continue to be its own section
+
+ # this is the conf "ceph generate-minimal-conf" will return in this test
+ _check_mon_cmd.return_value = (0, """[global]
+global_k1 = global_v1
+global_k2 = global_v2
+[mon]
+mon_k1 = mon_v1
+[osd]
+osd_k1 = osd_v1
+osd_k2 = osd_v2
+""", '')
+
+ # test with extra ceph conf that has some of the sections from minimal conf
+ _extra_ceph_conf.return_value = CephadmOrchestrator.ExtraCephConf(conf="""[mon]
+mon_k2 = mon_v2
+[global]
+global_k3 = global_v3
+""", last_modified=datetime_now())
+
+ expected_combined_conf = """[global]
+global_k1 = global_v1
+global_k2 = global_v2
+global_k3 = global_v3
+
+[mon]
+mon_k1 = mon_v1
+mon_k2 = mon_v2
+
+[osd]
+osd_k1 = osd_v1
+osd_k2 = osd_v2
+"""
+
+ assert cephadm_module.get_minimal_ceph_conf() == expected_combined_conf
+
+ def test_client_keyrings_special_host_labels(self, cephadm_module):
+ cephadm_module.inventory.add_host(HostSpec('host1', labels=['keyring1']))
+ cephadm_module.inventory.add_host(HostSpec('host2', labels=['keyring1', SpecialHostLabels.DRAIN_DAEMONS]))
+ cephadm_module.inventory.add_host(HostSpec('host3', labels=['keyring1', SpecialHostLabels.DRAIN_DAEMONS, SpecialHostLabels.DRAIN_CONF_KEYRING]))
+ # hosts need to be marked as having had refresh to be available for placement
+ # so "refresh" with empty daemon list
+ cephadm_module.cache.update_host_daemons('host1', {})
+ cephadm_module.cache.update_host_daemons('host2', {})
+ cephadm_module.cache.update_host_daemons('host3', {})
+
+ assert 'host1' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()]
+ assert 'host2' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()]
+ assert 'host3' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()]
+
+ assert 'host1' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()]
+ assert 'host2' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()]
+ assert 'host3' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()]
+
+ cephadm_module.keys.update(ClientKeyringSpec('keyring1', PlacementSpec(label='keyring1')))
+
+ with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd:
+ _mon_cmd.return_value = (0, 'real-keyring', '')
+ client_files = CephadmServe(cephadm_module)._calc_client_files()
+ assert 'host1' in client_files.keys()
+ assert '/etc/ceph/ceph.keyring1.keyring' in client_files['host1'].keys()
+ assert 'host2' in client_files.keys()
+ assert '/etc/ceph/ceph.keyring1.keyring' in client_files['host2'].keys()
+ assert 'host3' not in client_files.keys()
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_registry_login(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ def check_registry_credentials(url, username, password):
+ assert json.loads(cephadm_module.get_store('registry_credentials')) == {
+ 'url': url, 'username': username, 'password': password}
+
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'test'):
+ # test successful login with valid args
+ code, out, err = cephadm_module.registry_login('test-url', 'test-user', 'test-password')
+ assert out == 'registry login scheduled'
+ assert err == ''
+ check_registry_credentials('test-url', 'test-user', 'test-password')
+
+ # test bad login attempt with invalid args
+ code, out, err = cephadm_module.registry_login('bad-args')
+ assert err == ("Invalid arguments. Please provide arguments <url> <username> <password> "
+ "or -i <login credentials json file>")
+ check_registry_credentials('test-url', 'test-user', 'test-password')
+
+ # test bad login using invalid json file
+ code, out, err = cephadm_module.registry_login(
+ None, None, None, '{"bad-json": "bad-json"}')
+ assert err == ("json provided for custom registry login did not include all necessary fields. "
+ "Please setup json file as\n"
+ "{\n"
+ " \"url\": \"REGISTRY_URL\",\n"
+ " \"username\": \"REGISTRY_USERNAME\",\n"
+ " \"password\": \"REGISTRY_PASSWORD\"\n"
+ "}\n")
+ check_registry_credentials('test-url', 'test-user', 'test-password')
+
+ # test good login using valid json file
+ good_json = ("{\"url\": \"" + "json-url" + "\", \"username\": \"" + "json-user" + "\", "
+ " \"password\": \"" + "json-pass" + "\"}")
+ code, out, err = cephadm_module.registry_login(None, None, None, good_json)
+ assert out == 'registry login scheduled'
+ assert err == ''
+ check_registry_credentials('json-url', 'json-user', 'json-pass')
+
+ # test bad login where args are valid but login command fails
+ _run_cephadm.side_effect = async_side_effect(('{}', 'error', 1))
+ code, out, err = cephadm_module.registry_login('fail-url', 'fail-user', 'fail-password')
+ assert err == 'Host test failed to login to fail-url as fail-user with given password'
+ check_registry_credentials('json-url', 'json-user', 'json-pass')
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({
+ 'image_id': 'image_id',
+ 'repo_digests': ['image@repo_digest'],
+ })))
+ @pytest.mark.parametrize("use_repo_digest",
+ [
+ False,
+ True
+ ])
+ def test_upgrade_run(self, use_repo_digest, cephadm_module: CephadmOrchestrator):
+ cephadm_module.use_repo_digest = use_repo_digest
+
+ with with_host(cephadm_module, 'test', refresh_hosts=False):
+ cephadm_module.set_container_image('global', 'image')
+
+ if use_repo_digest:
+
+ CephadmServe(cephadm_module).convert_tags_to_repo_digest()
+
+ _, image, _ = cephadm_module.check_mon_command({
+ 'prefix': 'config get',
+ 'who': 'global',
+ 'key': 'container_image',
+ })
+ if use_repo_digest:
+ assert image == 'image@repo_digest'
+ else:
+ assert image == 'image'
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_ceph_volume_no_filter_for_batch(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ error_message = """cephadm exited with an error code: 1, stderr:/usr/bin/podman:stderr usage: ceph-volume inventory [-h] [--format {plain,json,json-pretty}] [path]/usr/bin/podman:stderr ceph-volume inventory: error: unrecognized arguments: --filter-for-batch
+Traceback (most recent call last):
+ File "<stdin>", line 6112, in <module>
+ File "<stdin>", line 1299, in _infer_fsid
+ File "<stdin>", line 1382, in _infer_image
+ File "<stdin>", line 3612, in command_ceph_volume
+ File "<stdin>", line 1061, in call_throws"""
+
+ with with_host(cephadm_module, 'test'):
+ _run_cephadm.reset_mock()
+ _run_cephadm.side_effect = OrchestratorError(error_message)
+
+ s = CephadmServe(cephadm_module)._refresh_host_devices('test')
+ assert s == 'host test `cephadm ceph-volume` failed: ' + error_message
+
+ assert _run_cephadm.mock_calls == [
+ mock.call('test', 'osd', 'ceph-volume',
+ ['--', 'inventory', '--format=json-pretty', '--filter-for-batch'], image='',
+ no_fsid=False, error_ok=False, log_output=False),
+ mock.call('test', 'osd', 'ceph-volume',
+ ['--', 'inventory', '--format=json-pretty'], image='',
+ no_fsid=False, error_ok=False, log_output=False),
+ ]
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_osd_activate_datadevice(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'test', refresh_hosts=False):
+ with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1):
+ pass
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_osd_activate_datadevice_fail(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'test', refresh_hosts=False):
+ cephadm_module.mock_store_set('_ceph_get', 'osd_map', {
+ 'osds': [
+ {
+ 'osd': 1,
+ 'up_from': 0,
+ 'uuid': 'uuid'
+ }
+ ]
+ })
+
+ ceph_volume_lvm_list = {
+ '1': [{
+ 'tags': {
+ 'ceph.cluster_fsid': cephadm_module._cluster_fsid,
+ 'ceph.osd_fsid': 'uuid'
+ },
+ 'type': 'data'
+ }]
+ }
+ _run_cephadm.reset_mock(return_value=True, side_effect=True)
+
+ async def _r_c(*args, **kwargs):
+ if 'ceph-volume' in args:
+ return (json.dumps(ceph_volume_lvm_list), '', 0)
+ else:
+ assert ['_orch', 'deploy'] in args
+ raise OrchestratorError("let's fail somehow")
+ _run_cephadm.side_effect = _r_c
+ assert cephadm_module._osd_activate(
+ ['test']).stderr == "let's fail somehow"
+ with pytest.raises(AssertionError):
+ cephadm_module.assert_issued_mon_command({
+ 'prefix': 'auth rm',
+ 'entity': 'osd.1',
+ })
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_osd_activate_datadevice_dbdevice(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'test', refresh_hosts=False):
+
+ async def _ceph_volume_list(s, host, entity, cmd, **kwargs):
+ logging.info(f'ceph-volume cmd: {cmd}')
+ if 'raw' in cmd:
+ return json.dumps({
+ "21a4209b-f51b-4225-81dc-d2dca5b8b2f5": {
+ "ceph_fsid": "64c84f19-fe1d-452a-a731-ab19dc144aa8",
+ "device": "/dev/loop0",
+ "osd_id": 21,
+ "osd_uuid": "21a4209b-f51b-4225-81dc-d2dca5b8b2f5",
+ "type": "bluestore"
+ },
+ }), '', 0
+ if 'lvm' in cmd:
+ return json.dumps({
+ '1': [{
+ 'tags': {
+ 'ceph.cluster_fsid': cephadm_module._cluster_fsid,
+ 'ceph.osd_fsid': 'uuid'
+ },
+ 'type': 'data'
+ }, {
+ 'tags': {
+ 'ceph.cluster_fsid': cephadm_module._cluster_fsid,
+ 'ceph.osd_fsid': 'uuid'
+ },
+ 'type': 'db'
+ }]
+ }), '', 0
+ return '{}', '', 0
+
+ with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1, ceph_volume_lvm_list=_ceph_volume_list):
+ pass
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_osd_count(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ dg = DriveGroupSpec(service_id='', data_devices=DeviceSelection(all=True))
+ with with_host(cephadm_module, 'test', refresh_hosts=False):
+ with with_service(cephadm_module, dg, host='test'):
+ with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1):
+ assert wait(cephadm_module, cephadm_module.describe_service())[0].size == 1
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_host_rm_last_admin(self, cephadm_module: CephadmOrchestrator):
+ with pytest.raises(OrchestratorError):
+ with with_host(cephadm_module, 'test', refresh_hosts=False, rm_with_force=False):
+ cephadm_module.inventory.add_label('test', SpecialHostLabels.ADMIN)
+ pass
+ assert False
+ with with_host(cephadm_module, 'test1', refresh_hosts=False, rm_with_force=True):
+ with with_host(cephadm_module, 'test2', refresh_hosts=False, rm_with_force=False):
+ cephadm_module.inventory.add_label('test2', SpecialHostLabels.ADMIN)
+
+ @pytest.mark.parametrize("facts, settings, expected_value",
+ [
+ # All options are available on all hosts
+ (
+ {
+ "host1":
+ {
+ "sysctl_options":
+ {
+ 'opt1': 'val1',
+ 'opt2': 'val2',
+ }
+ },
+ "host2":
+ {
+ "sysctl_options":
+ {
+ 'opt1': '',
+ 'opt2': '',
+ }
+ },
+ },
+ {'opt1', 'opt2'}, # settings
+ {'host1': [], 'host2': []} # expected_value
+ ),
+ # opt1 is missing on host 1, opt2 is missing on host2
+ ({
+ "host1":
+ {
+ "sysctl_options":
+ {
+ 'opt2': '',
+ 'optX': '',
+ }
+ },
+ "host2":
+ {
+ "sysctl_options":
+ {
+ 'opt1': '',
+ 'opt3': '',
+ 'opt4': '',
+ }
+ },
+ },
+ {'opt1', 'opt2'}, # settings
+ {'host1': ['opt1'], 'host2': ['opt2']} # expected_value
+ ),
+ # All options are missing on all hosts
+ ({
+ "host1":
+ {
+ "sysctl_options":
+ {
+ }
+ },
+ "host2":
+ {
+ "sysctl_options":
+ {
+ }
+ },
+ },
+ {'opt1', 'opt2'}, # settings
+ {'host1': ['opt1', 'opt2'], 'host2': [
+ 'opt1', 'opt2']} # expected_value
+ ),
+ ]
+ )
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_tuned_profiles_settings_validation(self, facts, settings, expected_value, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ spec = mock.Mock()
+ spec.settings = sorted(settings)
+ spec.placement.filter_matching_hostspecs = mock.Mock()
+ spec.placement.filter_matching_hostspecs.return_value = ['host1', 'host2']
+ cephadm_module.cache.facts = facts
+ assert cephadm_module._validate_tunedprofile_settings(spec) == expected_value
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_tuned_profiles_validation(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+
+ with pytest.raises(OrchestratorError, match="^Invalid placement specification.+"):
+ spec = mock.Mock()
+ spec.settings = {'a': 'b'}
+ spec.placement = PlacementSpec(hosts=[])
+ cephadm_module._validate_tuned_profile_spec(spec)
+
+ with pytest.raises(OrchestratorError, match="Invalid spec: settings section cannot be empty."):
+ spec = mock.Mock()
+ spec.settings = {}
+ spec.placement = PlacementSpec(hosts=['host1', 'host2'])
+ cephadm_module._validate_tuned_profile_spec(spec)
+
+ with pytest.raises(OrchestratorError, match="^Placement 'count' field is no supported .+"):
+ spec = mock.Mock()
+ spec.settings = {'a': 'b'}
+ spec.placement = PlacementSpec(count=1)
+ cephadm_module._validate_tuned_profile_spec(spec)
+
+ with pytest.raises(OrchestratorError, match="^Placement 'count_per_host' field is no supported .+"):
+ spec = mock.Mock()
+ spec.settings = {'a': 'b'}
+ spec.placement = PlacementSpec(count_per_host=1, label='foo')
+ cephadm_module._validate_tuned_profile_spec(spec)
+
+ with pytest.raises(OrchestratorError, match="^Found invalid host"):
+ spec = mock.Mock()
+ spec.settings = {'a': 'b'}
+ spec.placement = PlacementSpec(hosts=['host1', 'host2'])
+ cephadm_module.inventory = mock.Mock()
+ cephadm_module.inventory.all_specs = mock.Mock(
+ return_value=[mock.Mock().hostname, mock.Mock().hostname])
+ cephadm_module._validate_tuned_profile_spec(spec)
+
+ def test_set_unmanaged(self, cephadm_module):
+ cephadm_module.spec_store._specs['crash'] = ServiceSpec('crash', unmanaged=False)
+ assert not cephadm_module.spec_store._specs['crash'].unmanaged
+ cephadm_module.spec_store.set_unmanaged('crash', True)
+ assert cephadm_module.spec_store._specs['crash'].unmanaged
+ cephadm_module.spec_store.set_unmanaged('crash', False)
+ assert not cephadm_module.spec_store._specs['crash'].unmanaged
+
+ def test_inventory_known_hostnames(self, cephadm_module):
+ cephadm_module.inventory.add_host(HostSpec('host1', '1.2.3.1'))
+ cephadm_module.inventory.add_host(HostSpec('host2', '1.2.3.2'))
+ cephadm_module.inventory.add_host(HostSpec('host3.domain', '1.2.3.3'))
+ cephadm_module.inventory.add_host(HostSpec('host4.domain', '1.2.3.4'))
+ cephadm_module.inventory.add_host(HostSpec('host5', '1.2.3.5'))
+
+ # update_known_hostname expects args to be <hostname, shortname, fqdn>
+ # as are gathered from cephadm gather-facts. Although, passing the
+ # names in the wrong order should actually have no effect on functionality
+ cephadm_module.inventory.update_known_hostnames('host1', 'host1', 'host1.domain')
+ cephadm_module.inventory.update_known_hostnames('host2.domain', 'host2', 'host2.domain')
+ cephadm_module.inventory.update_known_hostnames('host3', 'host3', 'host3.domain')
+ cephadm_module.inventory.update_known_hostnames('host4.domain', 'host4', 'host4.domain')
+ cephadm_module.inventory.update_known_hostnames('host5', 'host5', 'host5')
+
+ assert 'host1' in cephadm_module.inventory
+ assert 'host1.domain' in cephadm_module.inventory
+ assert cephadm_module.inventory.get_addr('host1') == '1.2.3.1'
+ assert cephadm_module.inventory.get_addr('host1.domain') == '1.2.3.1'
+
+ assert 'host2' in cephadm_module.inventory
+ assert 'host2.domain' in cephadm_module.inventory
+ assert cephadm_module.inventory.get_addr('host2') == '1.2.3.2'
+ assert cephadm_module.inventory.get_addr('host2.domain') == '1.2.3.2'
+
+ assert 'host3' in cephadm_module.inventory
+ assert 'host3.domain' in cephadm_module.inventory
+ assert cephadm_module.inventory.get_addr('host3') == '1.2.3.3'
+ assert cephadm_module.inventory.get_addr('host3.domain') == '1.2.3.3'
+
+ assert 'host4' in cephadm_module.inventory
+ assert 'host4.domain' in cephadm_module.inventory
+ assert cephadm_module.inventory.get_addr('host4') == '1.2.3.4'
+ assert cephadm_module.inventory.get_addr('host4.domain') == '1.2.3.4'
+
+ assert 'host4.otherdomain' not in cephadm_module.inventory
+ with pytest.raises(OrchestratorError):
+ cephadm_module.inventory.get_addr('host4.otherdomain')
+
+ assert 'host5' in cephadm_module.inventory
+ assert cephadm_module.inventory.get_addr('host5') == '1.2.3.5'
+ with pytest.raises(OrchestratorError):
+ cephadm_module.inventory.get_addr('host5.domain')
+
+ def test_async_timeout_handler(self, cephadm_module):
+ cephadm_module.default_cephadm_command_timeout = 900
+
+ async def _timeout():
+ raise asyncio.TimeoutError
+
+ with pytest.raises(OrchestratorError, match=r'Command timed out \(default 900 second timeout\)'):
+ with cephadm_module.async_timeout_handler():
+ cephadm_module.wait_async(_timeout())
+
+ with pytest.raises(OrchestratorError, match=r'Command timed out on host hostA \(default 900 second timeout\)'):
+ with cephadm_module.async_timeout_handler('hostA'):
+ cephadm_module.wait_async(_timeout())
+
+ with pytest.raises(OrchestratorError, match=r'Command "testing" timed out \(default 900 second timeout\)'):
+ with cephadm_module.async_timeout_handler(cmd='testing'):
+ cephadm_module.wait_async(_timeout())
+
+ with pytest.raises(OrchestratorError, match=r'Command "testing" timed out on host hostB \(default 900 second timeout\)'):
+ with cephadm_module.async_timeout_handler('hostB', 'testing'):
+ cephadm_module.wait_async(_timeout())
+
+ with pytest.raises(OrchestratorError, match=r'Command timed out \(non-default 111 second timeout\)'):
+ with cephadm_module.async_timeout_handler(timeout=111):
+ cephadm_module.wait_async(_timeout())
+
+ with pytest.raises(OrchestratorError, match=r'Command "very slow" timed out on host hostC \(non-default 999 second timeout\)'):
+ with cephadm_module.async_timeout_handler('hostC', 'very slow', 999):
+ cephadm_module.wait_async(_timeout())
+
+ @mock.patch("cephadm.CephadmOrchestrator.remove_osds")
+ @mock.patch("cephadm.CephadmOrchestrator.add_host_label", lambda *a, **kw: None)
+ @mock.patch("cephadm.inventory.HostCache.get_daemons_by_host", lambda *a, **kw: [])
+ def test_host_drain_zap(self, _rm_osds, cephadm_module):
+ # pass force=true in these tests to bypass _admin label check
+ cephadm_module.drain_host('host1', force=True, zap_osd_devices=False)
+ assert _rm_osds.called_with([], zap=False)
+
+ cephadm_module.drain_host('host1', force=True, zap_osd_devices=True)
+ assert _rm_osds.called_with([], zap=True)
+
+ def test_process_ls_output(self, cephadm_module):
+ sample_ls_output = """[
+ {
+ "style": "cephadm:v1",
+ "name": "mon.vm-00",
+ "fsid": "588f83ba-5995-11ee-9e94-52540057a206",
+ "systemd_unit": "ceph-588f83ba-5995-11ee-9e94-52540057a206@mon.vm-00",
+ "enabled": true,
+ "state": "running",
+ "service_name": "mon",
+ "ports": [],
+ "ip": null,
+ "deployed_by": [
+ "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3"
+ ],
+ "rank": null,
+ "rank_generation": null,
+ "extra_container_args": null,
+ "extra_entrypoint_args": null,
+ "memory_request": null,
+ "memory_limit": null,
+ "container_id": "b170b964a6e2918955362eb36195627c6086d3f859d4ebce2ee13f3ee4738733",
+ "container_image_name": "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3",
+ "container_image_id": "674eb38037f1555bb7884ede5db47f1749486e7f12ecb416e34ada87c9934e55",
+ "container_image_digests": [
+ "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3"
+ ],
+ "memory_usage": 56214159,
+ "cpu_percentage": "2.32%",
+ "version": "18.0.0-5185-g7b3a4f2b",
+ "started": "2023-09-22T22:31:11.752300Z",
+ "created": "2023-09-22T22:15:24.121387Z",
+ "deployed": "2023-09-22T22:31:10.383431Z",
+ "configured": "2023-09-22T22:31:11.859440Z"
+ },
+ {
+ "style": "cephadm:v1",
+ "name": "mgr.vm-00.mpexeg",
+ "fsid": "588f83ba-5995-11ee-9e94-52540057a206",
+ "systemd_unit": "ceph-588f83ba-5995-11ee-9e94-52540057a206@mgr.vm-00.mpexeg",
+ "enabled": true,
+ "state": "running",
+ "service_name": "mgr",
+ "ports": [
+ 8443,
+ 9283,
+ 8765
+ ],
+ "ip": null,
+ "deployed_by": [
+ "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3"
+ ],
+ "rank": null,
+ "rank_generation": null,
+ "extra_container_args": null,
+ "extra_entrypoint_args": null,
+ "memory_request": null,
+ "memory_limit": null,
+ "container_id": "6e7756cef553a25a2a84227e8755d3d25046b9cd8758b23c698d34b3af895242",
+ "container_image_name": "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3",
+ "container_image_id": "674eb38037f1555bb7884ede5db47f1749486e7f12ecb416e34ada87c9934e55",
+ "container_image_digests": [
+ "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3"
+ ],
+ "memory_usage": 529740595,
+ "cpu_percentage": "8.35%",
+ "version": "18.0.0-5185-g7b3a4f2b",
+ "started": "2023-09-22T22:30:18.587021Z",
+ "created": "2023-09-22T22:15:29.101409Z",
+ "deployed": "2023-09-22T22:30:17.339114Z",
+ "configured": "2023-09-22T22:30:18.758122Z"
+ },
+ {
+ "style": "cephadm:v1",
+ "name": "agent.vm-00",
+ "fsid": "588f83ba-5995-11ee-9e94-52540057a206",
+ "systemd_unit": "ceph-588f83ba-5995-11ee-9e94-52540057a206@agent.vm-00",
+ "enabled": true,
+ "state": "running",
+ "service_name": "agent",
+ "ports": [],
+ "ip": null,
+ "deployed_by": [
+ "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3"
+ ],
+ "rank": null,
+ "rank_generation": null,
+ "extra_container_args": null,
+ "extra_entrypoint_args": null,
+ "container_id": null,
+ "container_image_name": null,
+ "container_image_id": null,
+ "container_image_digests": null,
+ "version": null,
+ "started": null,
+ "created": "2023-09-22T22:33:34.708289Z",
+ "deployed": null,
+ "configured": "2023-09-22T22:33:34.722289Z"
+ },
+ {
+ "style": "cephadm:v1",
+ "name": "osd.0",
+ "fsid": "588f83ba-5995-11ee-9e94-52540057a206",
+ "systemd_unit": "ceph-588f83ba-5995-11ee-9e94-52540057a206@osd.0",
+ "enabled": true,
+ "state": "running",
+ "service_name": "osd.foo",
+ "ports": [],
+ "ip": null,
+ "deployed_by": [
+ "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3"
+ ],
+ "rank": null,
+ "rank_generation": null,
+ "extra_container_args": null,
+ "extra_entrypoint_args": null,
+ "memory_request": null,
+ "memory_limit": null,
+ "container_id": "93f71c60820b86901a45b3b1fe3dba3e3e677b37fd22310b7e7da3f67bb8ccd6",
+ "container_image_name": "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3",
+ "container_image_id": "674eb38037f1555bb7884ede5db47f1749486e7f12ecb416e34ada87c9934e55",
+ "container_image_digests": [
+ "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3"
+ ],
+ "memory_usage": 73410805,
+ "cpu_percentage": "6.54%",
+ "version": "18.0.0-5185-g7b3a4f2b",
+ "started": "2023-09-22T22:41:29.019587Z",
+ "created": "2023-09-22T22:41:03.615080Z",
+ "deployed": "2023-09-22T22:41:24.965222Z",
+ "configured": "2023-09-22T22:41:29.119250Z"
+ }
+]"""
+
+ now = str_to_datetime('2023-09-22T22:45:29.119250Z')
+ cephadm_module._cluster_fsid = '588f83ba-5995-11ee-9e94-52540057a206'
+ with mock.patch("cephadm.module.datetime_now", lambda: now):
+ cephadm_module._process_ls_output('vm-00', json.loads(sample_ls_output))
+ assert 'vm-00' in cephadm_module.cache.daemons
+ assert 'mon.vm-00' in cephadm_module.cache.daemons['vm-00']
+ assert 'mgr.vm-00.mpexeg' in cephadm_module.cache.daemons['vm-00']
+ assert 'agent.vm-00' in cephadm_module.cache.daemons['vm-00']
+ assert 'osd.0' in cephadm_module.cache.daemons['vm-00']
+
+ daemons = cephadm_module.cache.get_daemons_by_host('vm-00')
+ c_img_ids = [dd.container_image_id for dd in daemons if dd.daemon_type != 'agent']
+ assert all(c_img_id == '674eb38037f1555bb7884ede5db47f1749486e7f12ecb416e34ada87c9934e55' for c_img_id in c_img_ids)
+ last_refreshes = [dd.last_refresh for dd in daemons]
+ assert all(lrf == now for lrf in last_refreshes)
+ versions = [dd.version for dd in daemons if dd.daemon_type != 'agent']
+ assert all(version == '18.0.0-5185-g7b3a4f2b' for version in versions)
+
+ osd = cephadm_module.cache.get_daemons_by_type('osd', 'vm-00')[0]
+ assert osd.cpu_percentage == '6.54%'
+ assert osd.memory_usage == 73410805
+ assert osd.created == str_to_datetime('2023-09-22T22:41:03.615080Z')
diff --git a/src/pybind/mgr/cephadm/tests/test_completion.py b/src/pybind/mgr/cephadm/tests/test_completion.py
new file mode 100644
index 000000000..327c12d2a
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_completion.py
@@ -0,0 +1,40 @@
+import pytest
+
+from ..module import forall_hosts
+
+
+class TestCompletion(object):
+
+ @pytest.mark.parametrize("input,expected", [
+ ([], []),
+ ([1], ["(1,)"]),
+ (["hallo"], ["('hallo',)"]),
+ ("hi", ["('h',)", "('i',)"]),
+ (list(range(5)), [str((x, )) for x in range(5)]),
+ ([(1, 2), (3, 4)], ["(1, 2)", "(3, 4)"]),
+ ])
+ def test_async_map(self, input, expected, cephadm_module):
+ @forall_hosts
+ def run_forall(*args):
+ return str(args)
+ assert run_forall(input) == expected
+
+ @pytest.mark.parametrize("input,expected", [
+ ([], []),
+ ([1], ["(1,)"]),
+ (["hallo"], ["('hallo',)"]),
+ ("hi", ["('h',)", "('i',)"]),
+ (list(range(5)), [str((x, )) for x in range(5)]),
+ ([(1, 2), (3, 4)], ["(1, 2)", "(3, 4)"]),
+ ])
+ def test_async_map_self(self, input, expected, cephadm_module):
+ class Run(object):
+ def __init__(self):
+ self.attr = 1
+
+ @forall_hosts
+ def run_forall(self, *args):
+ assert self.attr == 1
+ return str(args)
+
+ assert Run().run_forall(input) == expected
diff --git a/src/pybind/mgr/cephadm/tests/test_configchecks.py b/src/pybind/mgr/cephadm/tests/test_configchecks.py
new file mode 100644
index 000000000..3cae0a27d
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_configchecks.py
@@ -0,0 +1,668 @@
+import copy
+import json
+import logging
+import ipaddress
+import pytest
+import uuid
+
+from time import time as now
+
+from ..configchecks import CephadmConfigChecks
+from ..inventory import HostCache
+from ..upgrade import CephadmUpgrade, UpgradeState
+from orchestrator import DaemonDescription
+
+from typing import List, Dict, Any, Optional
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+host_sample = {
+ "arch": "x86_64",
+ "bios_date": "04/01/2014",
+ "bios_version": "F2",
+ "cpu_cores": 16,
+ "cpu_count": 2,
+ "cpu_load": {
+ "15min": 0.0,
+ "1min": 0.01,
+ "5min": 0.01
+ },
+ "cpu_model": "Intel® Xeon® Processor E5-2698 v3",
+ "cpu_threads": 64,
+ "flash_capacity": "4.0TB",
+ "flash_capacity_bytes": 4000797868032,
+ "flash_count": 2,
+ "flash_list": [
+ {
+ "description": "ATA CT2000MX500SSD1 (2.0TB)",
+ "dev_name": "sda",
+ "disk_size_bytes": 2000398934016,
+ "model": "CT2000MX500SSD1",
+ "rev": "023",
+ "vendor": "ATA",
+ "wwid": "t10.ATA CT2000MX500SSD1 193023156DE0"
+ },
+ {
+ "description": "ATA CT2000MX500SSD1 (2.0TB)",
+ "dev_name": "sdb",
+ "disk_size_bytes": 2000398934016,
+ "model": "CT2000MX500SSD1",
+ "rev": "023",
+ "vendor": "ATA",
+ "wwid": "t10.ATA CT2000MX500SSD1 193023156DE0"
+ },
+ ],
+ "hdd_capacity": "16.0TB",
+ "hdd_capacity_bytes": 16003148120064,
+ "hdd_count": 4,
+ "hdd_list": [
+ {
+ "description": "ST4000VN008-2DR1 (4.0TB)",
+ "dev_name": "sdc",
+ "disk_size_bytes": 4000787030016,
+ "model": "ST4000VN008-2DR1",
+ "rev": "SC60",
+ "vendor": "ATA",
+ "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ"
+ },
+ {
+ "description": "ST4000VN008-2DR1 (4.0TB)",
+ "dev_name": "sdd",
+ "disk_size_bytes": 4000787030016,
+ "model": "ST4000VN008-2DR1",
+ "rev": "SC60",
+ "vendor": "ATA",
+ "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ"
+ },
+ {
+ "description": "ST4000VN008-2DR1 (4.0TB)",
+ "dev_name": "sde",
+ "disk_size_bytes": 4000787030016,
+ "model": "ST4000VN008-2DR1",
+ "rev": "SC60",
+ "vendor": "ATA",
+ "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ"
+ },
+ {
+ "description": "ST4000VN008-2DR1 (4.0TB)",
+ "dev_name": "sdf",
+ "disk_size_bytes": 4000787030016,
+ "model": "ST4000VN008-2DR1",
+ "rev": "SC60",
+ "vendor": "ATA",
+ "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ"
+ },
+ ],
+ "hostname": "dummy",
+ "interfaces": {
+ "eth0": {
+ "driver": "e1000e",
+ "iftype": "physical",
+ "ipv4_address": "10.7.17.1/24",
+ "ipv6_address": "fe80::215:17ff:feab:50e2/64",
+ "lower_devs_list": [],
+ "mtu": 9000,
+ "nic_type": "ethernet",
+ "operstate": "up",
+ "speed": 1000,
+ "upper_devs_list": [],
+ },
+ "eth1": {
+ "driver": "e1000e",
+ "iftype": "physical",
+ "ipv4_address": "10.7.18.1/24",
+ "ipv6_address": "fe80::215:17ff:feab:50e2/64",
+ "lower_devs_list": [],
+ "mtu": 9000,
+ "nic_type": "ethernet",
+ "operstate": "up",
+ "speed": 1000,
+ "upper_devs_list": [],
+ },
+ "eth2": {
+ "driver": "r8169",
+ "iftype": "physical",
+ "ipv4_address": "10.7.19.1/24",
+ "ipv6_address": "fe80::76d4:35ff:fe58:9a79/64",
+ "lower_devs_list": [],
+ "mtu": 1500,
+ "nic_type": "ethernet",
+ "operstate": "up",
+ "speed": 1000,
+ "upper_devs_list": []
+ },
+ },
+ "kernel": "4.18.0-240.10.1.el8_3.x86_64",
+ "kernel_parameters": {
+ "net.ipv4.ip_nonlocal_bind": "0",
+ },
+ "kernel_security": {
+ "SELINUX": "enforcing",
+ "SELINUXTYPE": "targeted",
+ "description": "SELinux: Enabled(enforcing, targeted)",
+ "type": "SELinux"
+ },
+ "memory_available_kb": 19489212,
+ "memory_free_kb": 245164,
+ "memory_total_kb": 32900916,
+ "model": "StorageHeavy",
+ "nic_count": 3,
+ "operating_system": "Red Hat Enterprise Linux 8.3 (Ootpa)",
+ "subscribed": "Yes",
+ "system_uptime": 777600.0,
+ "timestamp": now(),
+ "vendor": "Ceph Servers Inc",
+}
+
+
+def role_list(n: int) -> List[str]:
+ if n == 1:
+ return ['mon', 'mgr', 'osd']
+ if n in [2, 3]:
+ return ['mon', 'mds', 'osd']
+
+ return ['osd']
+
+
+def generate_testdata(count: int = 10, public_network: str = '10.7.17.0/24', cluster_network: str = '10.7.18.0/24'):
+ # public network = eth0, cluster_network = eth1
+ assert count > 3
+ assert public_network
+ num_disks = host_sample['hdd_count']
+ hosts = {}
+ daemons = {}
+ daemon_to_host = {}
+ osd_num = 0
+ public_netmask = public_network.split('/')[1]
+ cluster_ip_list = []
+ cluster_netmask = ''
+
+ public_ip_list = [str(i) for i in list(ipaddress.ip_network(public_network).hosts())]
+ if cluster_network:
+ cluster_ip_list = [str(i) for i in list(ipaddress.ip_network(cluster_network).hosts())]
+ cluster_netmask = cluster_network.split('/')[1]
+
+ for n in range(1, count + 1, 1):
+
+ new_host = copy.deepcopy(host_sample)
+ hostname = f"node-{n}.ceph.com"
+
+ new_host['hostname'] = hostname
+ new_host['interfaces']['eth0']['ipv4_address'] = f"{public_ip_list.pop(0)}/{public_netmask}"
+ if cluster_ip_list:
+ new_host['interfaces']['eth1']['ipv4_address'] = f"{cluster_ip_list.pop(0)}/{cluster_netmask}"
+ else:
+ new_host['interfaces']['eth1']['ipv4_address'] = ''
+
+ hosts[hostname] = new_host
+ daemons[hostname] = {}
+ for r in role_list(n):
+ name = ''
+ if r == 'osd':
+ for n in range(num_disks):
+ osd = DaemonDescription(
+ hostname=hostname, daemon_type='osd', daemon_id=osd_num)
+ name = f"osd.{osd_num}"
+ daemons[hostname][name] = osd
+ daemon_to_host[name] = hostname
+ osd_num += 1
+ else:
+ name = f"{r}.{hostname}"
+ daemons[hostname][name] = DaemonDescription(
+ hostname=hostname, daemon_type=r, daemon_id=hostname)
+ daemon_to_host[name] = hostname
+
+ logger.debug(f"daemon to host lookup - {json.dumps(daemon_to_host)}")
+ return hosts, daemons, daemon_to_host
+
+
+@pytest.fixture()
+def mgr():
+ """Provide a fake ceph mgr object preloaded with a configuration"""
+ mgr = FakeMgr()
+ mgr.cache.facts, mgr.cache.daemons, mgr.daemon_to_host = \
+ generate_testdata(public_network='10.9.64.0/24', cluster_network='')
+ mgr.module_option.update({
+ "config_checks_enabled": True,
+ })
+ yield mgr
+
+
+class FakeMgr:
+
+ def __init__(self):
+ self.datastore = {}
+ self.module_option = {}
+ self.health_checks = {}
+ self.default_version = 'quincy'
+ self.version_overrides = {}
+ self.daemon_to_host = {}
+
+ self.cache = HostCache(self)
+ self.upgrade = CephadmUpgrade(self)
+
+ def set_health_checks(self, checks: dict):
+ return
+
+ def get_module_option(self, keyname: str) -> Optional[str]:
+ return self.module_option.get(keyname, None)
+
+ def set_module_option(self, keyname: str, value: str) -> None:
+ return None
+
+ def get_store(self, keyname: str, default=None) -> Optional[str]:
+ return self.datastore.get(keyname, None)
+
+ def set_store(self, keyname: str, value: str) -> None:
+ self.datastore[keyname] = value
+ return None
+
+ def _ceph_get_server(self) -> None:
+ pass
+
+ def get_metadata(self, daemon_type: str, daemon_id: str) -> Dict[str, Any]:
+ key = f"{daemon_type}.{daemon_id}"
+ if key in self.version_overrides:
+ logger.debug(f"override applied for {key}")
+ version_str = self.version_overrides[key]
+ else:
+ version_str = self.default_version
+
+ return {"ceph_release": version_str, "hostname": self.daemon_to_host[key]}
+
+ def list_servers(self) -> List[Dict[str, List[Dict[str, str]]]]:
+ num_disks = host_sample['hdd_count']
+ osd_num = 0
+ service_map = []
+
+ for hostname in self.cache.facts:
+
+ host_num = int(hostname.split('.')[0].split('-')[1])
+ svc_list = []
+ for r in role_list(host_num):
+ if r == 'osd':
+ for _n in range(num_disks):
+ svc_list.append({
+ "type": "osd",
+ "id": osd_num,
+ })
+ osd_num += 1
+ else:
+ svc_list.append({
+ "type": r,
+ "id": hostname,
+ })
+
+ service_map.append({"services": svc_list})
+ logger.debug(f"services map - {json.dumps(service_map)}")
+ return service_map
+
+ def use_repo_digest(self) -> None:
+ return None
+
+
+class TestConfigCheck:
+
+ def test_to_json(self, mgr):
+ checker = CephadmConfigChecks(mgr)
+ out = checker.to_json()
+ assert out
+ assert len(out) == len(checker.health_checks)
+
+ def test_lookup_check(self, mgr):
+ checker = CephadmConfigChecks(mgr)
+ check = checker.lookup_check('osd_mtu_size')
+ logger.debug(json.dumps(check.to_json()))
+ assert check
+ assert check.healthcheck_name == "CEPHADM_CHECK_MTU"
+
+ def test_old_checks_removed(self, mgr):
+ mgr.datastore.update({
+ "config_checks": '{"bogus_one": "enabled", "bogus_two": "enabled", '
+ '"kernel_security": "enabled", "public_network": "enabled", '
+ '"kernel_version": "enabled", "network_missing": "enabled", '
+ '"osd_mtu_size": "enabled", "osd_linkspeed": "enabled", '
+ '"os_subscription": "enabled", "ceph_release": "enabled"}'
+ })
+ checker = CephadmConfigChecks(mgr)
+ raw = mgr.get_store('config_checks')
+ checks = json.loads(raw)
+ assert "bogus_one" not in checks
+ assert "bogus_two" not in checks
+ assert len(checks) == len(checker.health_checks)
+
+ def test_new_checks(self, mgr):
+ mgr.datastore.update({
+ "config_checks": '{"kernel_security": "enabled", "public_network": "enabled", '
+ '"osd_mtu_size": "enabled", "osd_linkspeed": "enabled"}'
+ })
+ checker = CephadmConfigChecks(mgr)
+ raw = mgr.get_store('config_checks')
+ checks = json.loads(raw)
+ assert len(checks) == len(checker.health_checks)
+
+ def test_no_issues(self, mgr):
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+ checker.run_checks()
+
+ assert not mgr.health_checks
+
+ def test_no_public_network(self, mgr):
+ bad_node = mgr.cache.facts['node-1.ceph.com']
+ bad_node['interfaces']['eth0']['ipv4_address'] = "192.168.1.20/24"
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+ checker.run_checks()
+ logger.debug(mgr.health_checks)
+ assert len(mgr.health_checks) == 1
+ assert 'CEPHADM_CHECK_PUBLIC_MEMBERSHIP' in mgr.health_checks
+ assert mgr.health_checks['CEPHADM_CHECK_PUBLIC_MEMBERSHIP']['detail'][0] == \
+ 'node-1.ceph.com does not have an interface on any public network'
+
+ def test_missing_networks(self, mgr):
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.66.0/24']
+ checker.run_checks()
+
+ logger.info(json.dumps(mgr.health_checks))
+ logger.info(checker.subnet_lookup)
+ assert len(mgr.health_checks) == 1
+ assert 'CEPHADM_CHECK_NETWORK_MISSING' in mgr.health_checks
+ assert mgr.health_checks['CEPHADM_CHECK_NETWORK_MISSING']['detail'][0] == \
+ "10.9.66.0/24 not found on any host in the cluster"
+
+ def test_bad_mtu_single(self, mgr):
+
+ bad_node = mgr.cache.facts['node-1.ceph.com']
+ bad_node['interfaces']['eth0']['mtu'] = 1500
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ logger.info(checker.subnet_lookup)
+ assert "CEPHADM_CHECK_MTU" in mgr.health_checks and len(mgr.health_checks) == 1
+ assert mgr.health_checks['CEPHADM_CHECK_MTU']['detail'][0] == \
+ 'host node-1.ceph.com(eth0) is using MTU 1500 on 10.9.64.0/24, NICs on other hosts use 9000'
+
+ def test_bad_mtu_multiple(self, mgr):
+
+ for n in [1, 5]:
+ bad_node = mgr.cache.facts[f'node-{n}.ceph.com']
+ bad_node['interfaces']['eth0']['mtu'] = 1500
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ logger.info(checker.subnet_lookup)
+ assert "CEPHADM_CHECK_MTU" in mgr.health_checks and len(mgr.health_checks) == 1
+ assert mgr.health_checks['CEPHADM_CHECK_MTU']['count'] == 2
+
+ def test_bad_linkspeed_single(self, mgr):
+
+ bad_node = mgr.cache.facts['node-1.ceph.com']
+ bad_node['interfaces']['eth0']['speed'] = 100
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ logger.info(checker.subnet_lookup)
+ assert mgr.health_checks
+ assert "CEPHADM_CHECK_LINKSPEED" in mgr.health_checks and len(mgr.health_checks) == 1
+ assert mgr.health_checks['CEPHADM_CHECK_LINKSPEED']['detail'][0] == \
+ 'host node-1.ceph.com(eth0) has linkspeed of 100 on 10.9.64.0/24, NICs on other hosts use 1000'
+
+ def test_super_linkspeed_single(self, mgr):
+
+ bad_node = mgr.cache.facts['node-1.ceph.com']
+ bad_node['interfaces']['eth0']['speed'] = 10000
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ logger.info(checker.subnet_lookup)
+ assert not mgr.health_checks
+
+ def test_release_mismatch_single(self, mgr):
+
+ mgr.version_overrides = {
+ "osd.1": "pacific",
+ }
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ assert mgr.health_checks
+ assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and len(mgr.health_checks) == 1
+ assert mgr.health_checks['CEPHADM_CHECK_CEPH_RELEASE']['detail'][0] == \
+ 'osd.1 is running pacific (majority of cluster is using quincy)'
+
+ def test_release_mismatch_multi(self, mgr):
+
+ mgr.version_overrides = {
+ "osd.1": "pacific",
+ "osd.5": "octopus",
+ }
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ assert mgr.health_checks
+ assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and len(mgr.health_checks) == 1
+ assert len(mgr.health_checks['CEPHADM_CHECK_CEPH_RELEASE']['detail']) == 2
+
+ def test_kernel_mismatch(self, mgr):
+
+ bad_host = mgr.cache.facts['node-1.ceph.com']
+ bad_host['kernel'] = "5.10.18.0-241.10.1.el8.x86_64"
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ assert len(mgr.health_checks) == 1
+ assert 'CEPHADM_CHECK_KERNEL_VERSION' in mgr.health_checks
+ assert mgr.health_checks['CEPHADM_CHECK_KERNEL_VERSION']['detail'][0] == \
+ "host node-1.ceph.com running kernel 5.10, majority of hosts(9) running 4.18"
+ assert mgr.health_checks['CEPHADM_CHECK_KERNEL_VERSION']['count'] == 1
+
+ def test_inconsistent_subscription(self, mgr):
+
+ bad_host = mgr.cache.facts['node-5.ceph.com']
+ bad_host['subscribed'] = "no"
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ assert len(mgr.health_checks) == 1
+ assert "CEPHADM_CHECK_SUBSCRIPTION" in mgr.health_checks
+ assert mgr.health_checks['CEPHADM_CHECK_SUBSCRIPTION']['detail'][0] == \
+ "node-5.ceph.com does not have an active subscription"
+
+ def test_kernel_security_inconsistent(self, mgr):
+
+ bad_node = mgr.cache.facts['node-3.ceph.com']
+ bad_node['kernel_security'] = {
+ "SELINUX": "permissive",
+ "SELINUXTYPE": "targeted",
+ "description": "SELinux: Enabled(permissive, targeted)",
+ "type": "SELinux"
+ }
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ assert len(mgr.health_checks) == 1
+ assert 'CEPHADM_CHECK_KERNEL_LSM' in mgr.health_checks
+ assert mgr.health_checks['CEPHADM_CHECK_KERNEL_LSM']['detail'][0] == \
+ "node-3.ceph.com has inconsistent KSM settings compared to the majority of hosts(9) in the cluster"
+
+ def test_release_and_bad_mtu(self, mgr):
+
+ mgr.version_overrides = {
+ "osd.1": "pacific",
+ }
+ bad_node = mgr.cache.facts['node-1.ceph.com']
+ bad_node['interfaces']['eth0']['mtu'] = 1500
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ logger.info(checker.subnet_lookup)
+ assert mgr.health_checks
+ assert len(mgr.health_checks) == 2
+ assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \
+ "CEPHADM_CHECK_MTU" in mgr.health_checks
+
+ def test_release_mtu_LSM(self, mgr):
+
+ mgr.version_overrides = {
+ "osd.1": "pacific",
+ }
+ bad_node1 = mgr.cache.facts['node-1.ceph.com']
+ bad_node1['interfaces']['eth0']['mtu'] = 1500
+ bad_node2 = mgr.cache.facts['node-3.ceph.com']
+ bad_node2['kernel_security'] = {
+ "SELINUX": "permissive",
+ "SELINUXTYPE": "targeted",
+ "description": "SELinux: Enabled(permissive, targeted)",
+ "type": "SELinux"
+ }
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ logger.info(checker.subnet_lookup)
+ assert mgr.health_checks
+ assert len(mgr.health_checks) == 3
+ assert \
+ "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \
+ "CEPHADM_CHECK_MTU" in mgr.health_checks and \
+ "CEPHADM_CHECK_KERNEL_LSM" in mgr.health_checks
+
+ def test_release_mtu_LSM_subscription(self, mgr):
+
+ mgr.version_overrides = {
+ "osd.1": "pacific",
+ }
+ bad_node1 = mgr.cache.facts['node-1.ceph.com']
+ bad_node1['interfaces']['eth0']['mtu'] = 1500
+ bad_node1['subscribed'] = "no"
+ bad_node2 = mgr.cache.facts['node-3.ceph.com']
+ bad_node2['kernel_security'] = {
+ "SELINUX": "permissive",
+ "SELINUXTYPE": "targeted",
+ "description": "SELinux: Enabled(permissive, targeted)",
+ "type": "SELinux"
+ }
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ logger.info(checker.subnet_lookup)
+ assert mgr.health_checks
+ assert len(mgr.health_checks) == 4
+ assert \
+ "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \
+ "CEPHADM_CHECK_MTU" in mgr.health_checks and \
+ "CEPHADM_CHECK_KERNEL_LSM" in mgr.health_checks and \
+ "CEPHADM_CHECK_SUBSCRIPTION" in mgr.health_checks
+
+ def test_skip_release_during_upgrade(self, mgr):
+ mgr.upgrade.upgrade_state = UpgradeState.from_json({
+ 'target_name': 'wah',
+ 'progress_id': str(uuid.uuid4()),
+ 'target_id': 'wah',
+ 'error': '',
+ 'paused': False,
+ })
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(f"{checker.skipped_checks_count} skipped check(s): {checker.skipped_checks}")
+ assert checker.skipped_checks_count == 1
+ assert 'ceph_release' in checker.skipped_checks
+
+ def test_skip_when_disabled(self, mgr):
+ mgr.module_option.update({
+ "config_checks_enabled": "false"
+ })
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(checker.active_checks)
+ logger.info(checker.defined_checks)
+ assert checker.active_checks_count == 0
+
+ def test_skip_mtu_checks(self, mgr):
+ mgr.datastore.update({
+ 'config_checks': '{"osd_mtu_size": "disabled"}'
+ })
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(checker.active_checks)
+ logger.info(checker.defined_checks)
+ assert 'osd_mtu_size' not in checker.active_checks
+ assert checker.defined_checks == 8 and checker.active_checks_count == 7
+
+ def test_skip_mtu_lsm_checks(self, mgr):
+ mgr.datastore.update({
+ 'config_checks': '{"osd_mtu_size": "disabled", "kernel_security": "disabled"}'
+ })
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(checker.active_checks)
+ logger.info(checker.defined_checks)
+ assert 'osd_mtu_size' not in checker.active_checks and \
+ 'kernel_security' not in checker.active_checks
+ assert checker.defined_checks == 8 and checker.active_checks_count == 6
+ assert not mgr.health_checks
diff --git a/src/pybind/mgr/cephadm/tests/test_facts.py b/src/pybind/mgr/cephadm/tests/test_facts.py
new file mode 100644
index 000000000..7838ee5d4
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_facts.py
@@ -0,0 +1,31 @@
+from ..import CephadmOrchestrator
+
+from .fixtures import wait
+
+from tests import mock
+
+
+def test_facts(cephadm_module: CephadmOrchestrator):
+ facts = {'node-1.ceph.com': {'bios_version': 'F2', 'cpu_cores': 16}}
+ cephadm_module.cache.facts = facts
+ ret_facts = cephadm_module.get_facts('node-1.ceph.com')
+ assert wait(cephadm_module, ret_facts) == [{'bios_version': 'F2', 'cpu_cores': 16}]
+
+
+@mock.patch("cephadm.inventory.Inventory.update_known_hostnames")
+def test_known_hostnames(_update_known_hostnames, cephadm_module: CephadmOrchestrator):
+ host_facts = {'hostname': 'host1.domain',
+ 'shortname': 'host1',
+ 'fqdn': 'host1.domain',
+ 'memory_free_kb': 37383384,
+ 'memory_total_kb': 40980612,
+ 'nic_count': 2}
+ cephadm_module.cache.update_host_facts('host1', host_facts)
+ _update_known_hostnames.assert_called_with('host1.domain', 'host1', 'host1.domain')
+
+ host_facts = {'hostname': 'host1.domain',
+ 'memory_free_kb': 37383384,
+ 'memory_total_kb': 40980612,
+ 'nic_count': 2}
+ cephadm_module.cache.update_host_facts('host1', host_facts)
+ _update_known_hostnames.assert_called_with('host1.domain', '', '')
diff --git a/src/pybind/mgr/cephadm/tests/test_migration.py b/src/pybind/mgr/cephadm/tests/test_migration.py
new file mode 100644
index 000000000..1f1d32e8b
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_migration.py
@@ -0,0 +1,340 @@
+import json
+import pytest
+
+from ceph.deployment.service_spec import PlacementSpec, ServiceSpec, HostPlacementSpec
+from ceph.utils import datetime_to_str, datetime_now
+from cephadm import CephadmOrchestrator
+from cephadm.inventory import SPEC_STORE_PREFIX
+from cephadm.migrations import LAST_MIGRATION
+from cephadm.tests.fixtures import _run_cephadm, wait, with_host, receive_agent_metadata_all_hosts
+from cephadm.serve import CephadmServe
+from tests import mock
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_scheduler(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1', refresh_hosts=False):
+ with with_host(cephadm_module, 'host2', refresh_hosts=False):
+
+ # emulate the old scheduler:
+ c = cephadm_module.apply_rgw(
+ ServiceSpec('rgw', 'r.z', placement=PlacementSpec(host_pattern='*', count=2))
+ )
+ assert wait(cephadm_module, c) == 'Scheduled rgw.r.z update...'
+
+ # with pytest.raises(OrchestratorError, match="cephadm migration still ongoing. Please wait, until the migration is complete."):
+ CephadmServe(cephadm_module)._apply_all_services()
+
+ cephadm_module.migration_current = 0
+ cephadm_module.migration.migrate()
+ # assert we need all daemons.
+ assert cephadm_module.migration_current == 0
+
+ CephadmServe(cephadm_module)._refresh_hosts_and_daemons()
+ receive_agent_metadata_all_hosts(cephadm_module)
+ cephadm_module.migration.migrate()
+
+ CephadmServe(cephadm_module)._apply_all_services()
+
+ out = {o.hostname for o in wait(cephadm_module, cephadm_module.list_daemons())}
+ assert out == {'host1', 'host2'}
+
+ c = cephadm_module.apply_rgw(
+ ServiceSpec('rgw', 'r.z', placement=PlacementSpec(host_pattern='host1', count=2))
+ )
+ assert wait(cephadm_module, c) == 'Scheduled rgw.r.z update...'
+
+ # Sorry, for this hack, but I need to make sure, Migration thinks,
+ # we have updated all daemons already.
+ cephadm_module.cache.last_daemon_update['host1'] = datetime_now()
+ cephadm_module.cache.last_daemon_update['host2'] = datetime_now()
+
+ cephadm_module.migration_current = 0
+ cephadm_module.migration.migrate()
+ assert cephadm_module.migration_current >= 2
+
+ out = [o.spec.placement for o in wait(
+ cephadm_module, cephadm_module.describe_service())]
+ assert out == [PlacementSpec(count=2, hosts=[HostPlacementSpec(
+ hostname='host1', network='', name=''), HostPlacementSpec(hostname='host2', network='', name='')])]
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_service_id_mon_one(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1'):
+ cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon.wrong', json.dumps({
+ 'spec': {
+ 'service_type': 'mon',
+ 'service_id': 'wrong',
+ 'placement': {
+ 'hosts': ['host1']
+ }
+ },
+ 'created': datetime_to_str(datetime_now()),
+ }, sort_keys=True),
+ )
+
+ cephadm_module.spec_store.load()
+
+ assert len(cephadm_module.spec_store.all_specs) == 1
+ assert cephadm_module.spec_store.all_specs['mon.wrong'].service_name() == 'mon'
+
+ cephadm_module.migration_current = 1
+ cephadm_module.migration.migrate()
+ assert cephadm_module.migration_current >= 2
+
+ assert len(cephadm_module.spec_store.all_specs) == 1
+ assert cephadm_module.spec_store.all_specs['mon'] == ServiceSpec(
+ service_type='mon',
+ unmanaged=True,
+ placement=PlacementSpec(hosts=['host1'])
+ )
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_service_id_mon_two(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1'):
+ cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon', json.dumps({
+ 'spec': {
+ 'service_type': 'mon',
+ 'placement': {
+ 'count': 5,
+ }
+ },
+ 'created': datetime_to_str(datetime_now()),
+ }, sort_keys=True),
+ )
+ cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon.wrong', json.dumps({
+ 'spec': {
+ 'service_type': 'mon',
+ 'service_id': 'wrong',
+ 'placement': {
+ 'hosts': ['host1']
+ }
+ },
+ 'created': datetime_to_str(datetime_now()),
+ }, sort_keys=True),
+ )
+
+ cephadm_module.spec_store.load()
+
+ assert len(cephadm_module.spec_store.all_specs) == 2
+ assert cephadm_module.spec_store.all_specs['mon.wrong'].service_name() == 'mon'
+ assert cephadm_module.spec_store.all_specs['mon'].service_name() == 'mon'
+
+ cephadm_module.migration_current = 1
+ cephadm_module.migration.migrate()
+ assert cephadm_module.migration_current >= 2
+
+ assert len(cephadm_module.spec_store.all_specs) == 1
+ assert cephadm_module.spec_store.all_specs['mon'] == ServiceSpec(
+ service_type='mon',
+ unmanaged=True,
+ placement=PlacementSpec(count=5)
+ )
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_service_id_mds_one(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1'):
+ cephadm_module.set_store(SPEC_STORE_PREFIX + 'mds', json.dumps({
+ 'spec': {
+ 'service_type': 'mds',
+ 'placement': {
+ 'hosts': ['host1']
+ }
+ },
+ 'created': datetime_to_str(datetime_now()),
+ }, sort_keys=True),
+ )
+
+ cephadm_module.spec_store.load()
+
+ # there is nothing to migrate, as the spec is gone now.
+ assert len(cephadm_module.spec_store.all_specs) == 0
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_nfs_initial(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1'):
+ cephadm_module.set_store(
+ SPEC_STORE_PREFIX + 'mds',
+ json.dumps({
+ 'spec': {
+ 'service_type': 'nfs',
+ 'service_id': 'foo',
+ 'placement': {
+ 'hosts': ['host1']
+ },
+ 'spec': {
+ 'pool': 'mypool',
+ 'namespace': 'foons',
+ },
+ },
+ 'created': datetime_to_str(datetime_now()),
+ }, sort_keys=True),
+ )
+ cephadm_module.migration_current = 1
+ cephadm_module.spec_store.load()
+
+ ls = json.loads(cephadm_module.get_store('nfs_migration_queue'))
+ assert ls == [['foo', 'mypool', 'foons']]
+
+ cephadm_module.migration.migrate(True)
+ assert cephadm_module.migration_current == 2
+
+ cephadm_module.migration.migrate()
+ assert cephadm_module.migration_current == LAST_MIGRATION
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_nfs_initial_octopus(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1'):
+ cephadm_module.set_store(
+ SPEC_STORE_PREFIX + 'mds',
+ json.dumps({
+ 'spec': {
+ 'service_type': 'nfs',
+ 'service_id': 'ganesha-foo',
+ 'placement': {
+ 'hosts': ['host1']
+ },
+ 'spec': {
+ 'pool': 'mypool',
+ 'namespace': 'foons',
+ },
+ },
+ 'created': datetime_to_str(datetime_now()),
+ }, sort_keys=True),
+ )
+ cephadm_module.migration_current = 1
+ cephadm_module.spec_store.load()
+
+ ls = json.loads(cephadm_module.get_store('nfs_migration_queue'))
+ assert ls == [['ganesha-foo', 'mypool', 'foons']]
+
+ cephadm_module.migration.migrate(True)
+ assert cephadm_module.migration_current == 2
+
+ cephadm_module.migration.migrate()
+ assert cephadm_module.migration_current == LAST_MIGRATION
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_admin_client_keyring(cephadm_module: CephadmOrchestrator):
+ assert 'client.admin' not in cephadm_module.keys.keys
+
+ cephadm_module.migration_current = 3
+ cephadm_module.migration.migrate()
+ assert cephadm_module.migration_current == LAST_MIGRATION
+
+ assert cephadm_module.keys.keys['client.admin'].placement.label == '_admin'
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_set_sane_value(cephadm_module: CephadmOrchestrator):
+ cephadm_module.migration_current = 0
+ cephadm_module.migration.set_sane_migration_current()
+ assert cephadm_module.migration_current == 0
+
+ cephadm_module.migration_current = LAST_MIGRATION
+ cephadm_module.migration.set_sane_migration_current()
+ assert cephadm_module.migration_current == LAST_MIGRATION
+
+ cephadm_module.migration_current = None
+ cephadm_module.migration.set_sane_migration_current()
+ assert cephadm_module.migration_current == LAST_MIGRATION
+
+ cephadm_module.migration_current = LAST_MIGRATION + 1
+ cephadm_module.migration.set_sane_migration_current()
+ assert cephadm_module.migration_current == 0
+
+ cephadm_module.migration_current = None
+ ongoing = cephadm_module.migration.is_migration_ongoing()
+ assert not ongoing
+ assert cephadm_module.migration_current == LAST_MIGRATION
+
+ cephadm_module.migration_current = LAST_MIGRATION + 1
+ ongoing = cephadm_module.migration.is_migration_ongoing()
+ assert ongoing
+ assert cephadm_module.migration_current == 0
+
+
+@pytest.mark.parametrize(
+ "rgw_spec_store_entry, should_migrate",
+ [
+ ({
+ 'spec': {
+ 'service_type': 'rgw',
+ 'service_name': 'rgw.foo',
+ 'service_id': 'foo',
+ 'placement': {
+ 'hosts': ['host1']
+ },
+ 'spec': {
+ 'rgw_frontend_type': 'beast tcp_nodelay=1 request_timeout_ms=65000 rgw_thread_pool_size=512',
+ 'rgw_frontend_port': '5000',
+ },
+ },
+ 'created': datetime_to_str(datetime_now()),
+ }, True),
+ ({
+ 'spec': {
+ 'service_type': 'rgw',
+ 'service_name': 'rgw.foo',
+ 'service_id': 'foo',
+ 'placement': {
+ 'hosts': ['host1']
+ },
+ },
+ 'created': datetime_to_str(datetime_now()),
+ }, False),
+ ]
+)
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_rgw_spec(cephadm_module: CephadmOrchestrator, rgw_spec_store_entry, should_migrate):
+ with with_host(cephadm_module, 'host1'):
+ cephadm_module.set_store(
+ SPEC_STORE_PREFIX + 'rgw',
+ json.dumps(rgw_spec_store_entry, sort_keys=True),
+ )
+
+ # make sure rgw_migration_queue is populated accordingly
+ cephadm_module.migration_current = 1
+ cephadm_module.spec_store.load()
+ ls = json.loads(cephadm_module.get_store('rgw_migration_queue'))
+ assert 'rgw' == ls[0]['spec']['service_type']
+
+ # shortcut rgw_migration_queue loading by directly assigning
+ # ls output to rgw_migration_queue list
+ cephadm_module.migration.rgw_migration_queue = ls
+
+ # skip other migrations and go directly to 5_6 migration (RGW spec)
+ cephadm_module.migration_current = 5
+ cephadm_module.migration.migrate()
+ assert cephadm_module.migration_current == LAST_MIGRATION
+
+ if should_migrate:
+ # make sure the spec has been migrated and the the param=value entries
+ # that were part of the rgw_frontend_type are now in the new
+ # 'rgw_frontend_extra_args' list
+ assert 'rgw.foo' in cephadm_module.spec_store.all_specs
+ rgw_spec = cephadm_module.spec_store.all_specs['rgw.foo']
+ assert dict(rgw_spec.to_json()) == {'service_type': 'rgw',
+ 'service_id': 'foo',
+ 'service_name': 'rgw.foo',
+ 'placement': {'hosts': ['host1']},
+ 'spec': {
+ 'rgw_frontend_extra_args': ['tcp_nodelay=1',
+ 'request_timeout_ms=65000',
+ 'rgw_thread_pool_size=512'],
+ 'rgw_frontend_port': '5000',
+ 'rgw_frontend_type': 'beast',
+ }}
+ else:
+ # in a real environment, we still expect the spec to be there,
+ # just untouched by the migration. For this test specifically
+ # though, the spec will only have ended up in the spec store
+ # if it was migrated, so we can use this to test the spec
+ # was untouched
+ assert 'rgw.foo' not in cephadm_module.spec_store.all_specs
diff --git a/src/pybind/mgr/cephadm/tests/test_osd_removal.py b/src/pybind/mgr/cephadm/tests/test_osd_removal.py
new file mode 100644
index 000000000..6685fcb2a
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_osd_removal.py
@@ -0,0 +1,298 @@
+import json
+
+from cephadm.services.osd import OSDRemovalQueue, OSD
+import pytest
+from tests import mock
+from .fixtures import with_cephadm_module
+from datetime import datetime
+
+
+class MockOSD:
+
+ def __init__(self, osd_id):
+ self.osd_id = osd_id
+
+
+class TestOSDRemoval:
+
+ @pytest.mark.parametrize(
+ "osd_id, osd_df, expected",
+ [
+ # missing 'nodes' key
+ (1, dict(nodes=[]), -1),
+ # missing 'pgs' key
+ (1, dict(nodes=[dict(id=1)]), -1),
+ # id != osd_id
+ (1, dict(nodes=[dict(id=999, pgs=1)]), -1),
+ # valid
+ (1, dict(nodes=[dict(id=1, pgs=1)]), 1),
+ ]
+ )
+ def test_get_pg_count(self, rm_util, osd_id, osd_df, expected):
+ with mock.patch("cephadm.services.osd.RemoveUtil.osd_df", return_value=osd_df):
+ assert rm_util.get_pg_count(osd_id) == expected
+
+ @pytest.mark.parametrize(
+ "osds, ok_to_stop, expected",
+ [
+ # no osd_ids provided
+ ([], [False], []),
+ # all osds are ok_to_stop
+ ([1, 2], [True], [1, 2]),
+ # osds are ok_to_stop after the second iteration
+ ([1, 2], [False, True], [2]),
+ # osds are never ok_to_stop, (taking the sample size `(len(osd_ids))` into account),
+ # expected to get False
+ ([1, 2], [False, False], []),
+ ]
+ )
+ def test_find_stop_threshold(self, rm_util, osds, ok_to_stop, expected):
+ with mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop", side_effect=ok_to_stop):
+ assert rm_util.find_osd_stop_threshold(osds) == expected
+
+ def test_process_removal_queue(self, rm_util):
+ # TODO: !
+ # rm_util.process_removal_queue()
+ pass
+
+ @pytest.mark.parametrize(
+ "max_osd_draining_count, draining_osds, idling_osds, ok_to_stop, expected",
+ [
+ # drain one at a time, one already draining
+ (1, [1], [1], [True], 0),
+ # drain one at a time, none draining yet
+ (1, [], [1, 2, 3], [True, True, True], 1),
+ # drain one at a time, one already draining, none ok-to-stop
+ (1, [1], [1], [False], 0),
+ # drain one at a time, none draining, one ok-to-stop
+ (1, [], [1, 2, 3], [False, False, True], 1),
+ # drain three at a time, one already draining, all ok-to-stop
+ (3, [1], [1, 2, 3], [True, True, True], 2),
+ # drain two at a time, none already draining, none ok-to-stop
+ (2, [], [1, 2, 3], [False, False, False], 0),
+ # drain two at a time, none already draining, none idling
+ (2, [], [], [], 0),
+ ]
+ )
+ def test_ready_to_drain_osds(self, max_osd_draining_count, draining_osds, idling_osds, ok_to_stop, expected):
+ with with_cephadm_module({'max_osd_draining_count': max_osd_draining_count}) as m:
+ with mock.patch("cephadm.services.osd.OSDRemovalQueue.draining_osds", return_value=draining_osds):
+ with mock.patch("cephadm.services.osd.OSDRemovalQueue.idling_osds", return_value=idling_osds):
+ with mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop", side_effect=ok_to_stop):
+ removal_queue = OSDRemovalQueue(m)
+ assert len(removal_queue._ready_to_drain_osds()) == expected
+
+ def test_ok_to_stop(self, rm_util):
+ rm_util.ok_to_stop([MockOSD(1)])
+ rm_util._run_mon_cmd.assert_called_with({'prefix': 'osd ok-to-stop', 'ids': ['1']},
+ error_ok=True)
+
+ def test_safe_to_destroy(self, rm_util):
+ rm_util.safe_to_destroy([1])
+ rm_util._run_mon_cmd.assert_called_with({'prefix': 'osd safe-to-destroy',
+ 'ids': ['1']}, error_ok=True)
+
+ def test_destroy_osd(self, rm_util):
+ rm_util.destroy_osd(1)
+ rm_util._run_mon_cmd.assert_called_with(
+ {'prefix': 'osd destroy-actual', 'id': 1, 'yes_i_really_mean_it': True})
+
+ def test_purge_osd(self, rm_util):
+ rm_util.purge_osd(1)
+ rm_util._run_mon_cmd.assert_called_with(
+ {'prefix': 'osd purge-actual', 'id': 1, 'yes_i_really_mean_it': True})
+
+ def test_load(self, cephadm_module, rm_util):
+ data = json.dumps([
+ {
+ "osd_id": 35,
+ "started": True,
+ "draining": True,
+ "stopped": False,
+ "replace": False,
+ "force": False,
+ "zap": False,
+ "nodename": "node2",
+ "drain_started_at": "2020-09-14T11:41:53.960463",
+ "drain_stopped_at": None,
+ "drain_done_at": None,
+ "process_started_at": "2020-09-14T11:41:52.245832"
+ }
+ ])
+ cephadm_module.set_store('osd_remove_queue', data)
+ cephadm_module.to_remove_osds.load_from_store()
+
+ expected = OSDRemovalQueue(cephadm_module)
+ expected.osds.add(OSD(osd_id=35, remove_util=rm_util, draining=True))
+ assert cephadm_module.to_remove_osds == expected
+
+
+class TestOSD:
+
+ def test_start(self, osd_obj):
+ assert osd_obj.started is False
+ osd_obj.start()
+ assert osd_obj.started is True
+ assert osd_obj.stopped is False
+
+ def test_start_draining_purge(self, osd_obj):
+ assert osd_obj.draining is False
+ assert osd_obj.drain_started_at is None
+ ret = osd_obj.start_draining()
+ osd_obj.rm_util.reweight_osd.assert_called_with(osd_obj, 0.0)
+ assert isinstance(osd_obj.drain_started_at, datetime)
+ assert osd_obj.draining is True
+ assert osd_obj.replace is False
+ assert ret is True
+
+ def test_start_draining_replace(self, osd_obj):
+ assert osd_obj.draining is False
+ assert osd_obj.drain_started_at is None
+ osd_obj.replace = True
+ ret = osd_obj.start_draining()
+ osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'out')
+ assert isinstance(osd_obj.drain_started_at, datetime)
+ assert osd_obj.draining is True
+ assert osd_obj.replace is True
+ assert ret is True
+
+ def test_start_draining_stopped(self, osd_obj):
+ osd_obj.stopped = True
+ ret = osd_obj.start_draining()
+ assert osd_obj.drain_started_at is None
+ assert ret is False
+ assert osd_obj.draining is False
+
+ def test_stop_draining_replace(self, osd_obj):
+ osd_obj.replace = True
+ ret = osd_obj.stop_draining()
+ osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'in')
+ assert isinstance(osd_obj.drain_stopped_at, datetime)
+ assert osd_obj.draining is False
+ assert ret is True
+
+ def test_stop_draining_purge(self, osd_obj):
+ osd_obj.original_weight = 1.0
+ ret = osd_obj.stop_draining()
+ osd_obj.rm_util.reweight_osd.assert_called_with(osd_obj, 1.0)
+ assert isinstance(osd_obj.drain_stopped_at, datetime)
+ assert osd_obj.draining is False
+ assert ret is True
+
+ @mock.patch('cephadm.services.osd.OSD.stop_draining')
+ def test_stop(self, stop_draining_mock, osd_obj):
+ osd_obj.stop()
+ assert osd_obj.started is False
+ assert osd_obj.stopped is True
+ stop_draining_mock.assert_called_once()
+
+ @pytest.mark.parametrize(
+ "draining, empty, expected",
+ [
+ # must be !draining! and !not empty! to yield True
+ (True, not True, True),
+ # not draining and not empty
+ (False, not True, False),
+ # not draining and empty
+ (False, True, False),
+ # draining and empty
+ (True, True, False),
+ ]
+ )
+ def test_is_draining(self, osd_obj, draining, empty, expected):
+ with mock.patch("cephadm.services.osd.OSD.is_empty", new_callable=mock.PropertyMock(return_value=empty)):
+ osd_obj.draining = draining
+ assert osd_obj.is_draining is expected
+
+ @mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop")
+ def test_is_ok_to_stop(self, _, osd_obj):
+ osd_obj.is_ok_to_stop
+ osd_obj.rm_util.ok_to_stop.assert_called_once()
+
+ @pytest.mark.parametrize(
+ "pg_count, expected",
+ [
+ (0, True),
+ (1, False),
+ (9999, False),
+ (-1, False),
+ ]
+ )
+ def test_is_empty(self, osd_obj, pg_count, expected):
+ with mock.patch("cephadm.services.osd.OSD.get_pg_count", return_value=pg_count):
+ assert osd_obj.is_empty is expected
+
+ @mock.patch("cephadm.services.osd.RemoveUtil.safe_to_destroy")
+ def test_safe_to_destroy(self, _, osd_obj):
+ osd_obj.safe_to_destroy()
+ osd_obj.rm_util.safe_to_destroy.assert_called_once()
+
+ @mock.patch("cephadm.services.osd.RemoveUtil.set_osd_flag")
+ def test_down(self, _, osd_obj):
+ osd_obj.down()
+ osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'down')
+
+ @mock.patch("cephadm.services.osd.RemoveUtil.destroy_osd")
+ def test_destroy_osd(self, _, osd_obj):
+ osd_obj.destroy()
+ osd_obj.rm_util.destroy_osd.assert_called_once()
+
+ @mock.patch("cephadm.services.osd.RemoveUtil.purge_osd")
+ def test_purge(self, _, osd_obj):
+ osd_obj.purge()
+ osd_obj.rm_util.purge_osd.assert_called_once()
+
+ @mock.patch("cephadm.services.osd.RemoveUtil.get_pg_count")
+ def test_pg_count(self, _, osd_obj):
+ osd_obj.get_pg_count()
+ osd_obj.rm_util.get_pg_count.assert_called_once()
+
+ def test_drain_status_human_not_started(self, osd_obj):
+ assert osd_obj.drain_status_human() == 'not started'
+
+ def test_drain_status_human_started(self, osd_obj):
+ osd_obj.started = True
+ assert osd_obj.drain_status_human() == 'started'
+
+ def test_drain_status_human_draining(self, osd_obj):
+ osd_obj.started = True
+ osd_obj.draining = True
+ assert osd_obj.drain_status_human() == 'draining'
+
+ def test_drain_status_human_done(self, osd_obj):
+ osd_obj.started = True
+ osd_obj.draining = False
+ osd_obj.drain_done_at = datetime.utcnow()
+ assert osd_obj.drain_status_human() == 'done, waiting for purge'
+
+
+class TestOSDRemovalQueue:
+
+ def test_queue_size(self, osd_obj):
+ q = OSDRemovalQueue(mock.Mock())
+ assert q.queue_size() == 0
+ q.osds.add(osd_obj)
+ assert q.queue_size() == 1
+
+ @mock.patch("cephadm.services.osd.OSD.start")
+ @mock.patch("cephadm.services.osd.OSD.exists")
+ def test_enqueue(self, exist, start, osd_obj):
+ q = OSDRemovalQueue(mock.Mock())
+ q.enqueue(osd_obj)
+ osd_obj.start.assert_called_once()
+
+ @mock.patch("cephadm.services.osd.OSD.stop")
+ @mock.patch("cephadm.services.osd.OSD.exists")
+ def test_rm_raise(self, exist, stop, osd_obj):
+ q = OSDRemovalQueue(mock.Mock())
+ with pytest.raises(KeyError):
+ q.rm(osd_obj)
+ osd_obj.stop.assert_called_once()
+
+ @mock.patch("cephadm.services.osd.OSD.stop")
+ @mock.patch("cephadm.services.osd.OSD.exists")
+ def test_rm(self, exist, stop, osd_obj):
+ q = OSDRemovalQueue(mock.Mock())
+ q.osds.add(osd_obj)
+ q.rm(osd_obj)
+ osd_obj.stop.assert_called_once()
diff --git a/src/pybind/mgr/cephadm/tests/test_scheduling.py b/src/pybind/mgr/cephadm/tests/test_scheduling.py
new file mode 100644
index 000000000..067cd5028
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_scheduling.py
@@ -0,0 +1,1699 @@
+# Disable autopep8 for this file:
+
+# fmt: off
+
+from typing import NamedTuple, List, Dict, Optional
+import pytest
+
+from ceph.deployment.hostspec import HostSpec
+from ceph.deployment.service_spec import ServiceSpec, PlacementSpec, IngressSpec
+from ceph.deployment.hostspec import SpecValidationError
+
+from cephadm.module import HostAssignment
+from cephadm.schedule import DaemonPlacement
+from orchestrator import DaemonDescription, OrchestratorValidationError, OrchestratorError
+
+
+def wrapper(func):
+ # some odd thingy to revert the order or arguments
+ def inner(*args):
+ def inner2(expected):
+ func(expected, *args)
+ return inner2
+ return inner
+
+
+@wrapper
+def none(expected):
+ assert expected == []
+
+
+@wrapper
+def one_of(expected, *hosts):
+ if not isinstance(expected, list):
+ assert False, str(expected)
+ assert len(expected) == 1, f'one_of failed len({expected}) != 1'
+ assert expected[0] in hosts
+
+
+@wrapper
+def two_of(expected, *hosts):
+ if not isinstance(expected, list):
+ assert False, str(expected)
+ assert len(expected) == 2, f'one_of failed len({expected}) != 2'
+ matches = 0
+ for h in hosts:
+ matches += int(h in expected)
+ if matches != 2:
+ assert False, f'two of {hosts} not in {expected}'
+
+
+@wrapper
+def exactly(expected, *hosts):
+ assert expected == list(hosts)
+
+
+@wrapper
+def error(expected, kind, match):
+ assert isinstance(expected, kind), (str(expected), match)
+ assert str(expected) == match, (str(expected), match)
+
+
+@wrapper
+def _or(expected, *inners):
+ def catch(inner):
+ try:
+ inner(expected)
+ except AssertionError as e:
+ return e
+ result = [catch(i) for i in inners]
+ if None not in result:
+ assert False, f"_or failed: {expected}"
+
+
+def _always_true(_):
+ pass
+
+
+def k(s):
+ return [e for e in s.split(' ') if e]
+
+
+def get_result(key, results):
+ def match(one):
+ for o, k in zip(one, key):
+ if o != k and o != '*':
+ return False
+ return True
+ return [v for k, v in results if match(k)][0]
+
+
+def mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count):
+
+ if spec_section == 'hosts':
+ mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec( # noqa: E731
+ hosts=explicit,
+ count=count,
+ ))
+ elif spec_section == 'label':
+ mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec( # noqa: E731
+ label='mylabel',
+ count=count,
+ ))
+ elif spec_section == 'host_pattern':
+ pattern = {
+ 'e': 'notfound',
+ '1': '1',
+ '12': '[1-2]',
+ '123': '*',
+ }[explicit_key]
+ mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec( # noqa: E731
+ host_pattern=pattern,
+ count=count,
+ ))
+ else:
+ assert False
+
+ hosts = [
+ HostSpec(h, labels=['mylabel']) if h in explicit else HostSpec(h)
+ for h in hosts
+ ]
+
+ return mk_spec, hosts
+
+
+def run_scheduler_test(results, mk_spec, hosts, daemons, key_elems):
+ key = ' '.join('N' if e is None else str(e) for e in key_elems)
+ try:
+ assert_res = get_result(k(key), results)
+ except IndexError:
+ try:
+ spec = mk_spec()
+ host_res, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=hosts,
+ unreachable_hosts=[],
+ draining_hosts=[],
+ daemons=daemons,
+ ).place()
+ if isinstance(host_res, list):
+ e = ', '.join(repr(h.hostname) for h in host_res)
+ assert False, f'`(k("{key}"), exactly({e})),` not found'
+ assert False, f'`(k("{key}"), ...),` not found'
+ except OrchestratorError as e:
+ assert False, f'`(k("{key}"), error({type(e).__name__}, {repr(str(e))})),` not found'
+
+ for _ in range(10): # scheduler has a random component
+ try:
+ spec = mk_spec()
+ host_res, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=hosts,
+ unreachable_hosts=[],
+ draining_hosts=[],
+ daemons=daemons
+ ).place()
+
+ assert_res(sorted([h.hostname for h in host_res]))
+ except Exception as e:
+ assert_res(e)
+
+
+@pytest.mark.parametrize("dp,n,result",
+ [ # noqa: E128
+ (
+ DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]),
+ 0,
+ DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]),
+ ),
+ (
+ DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]),
+ 2,
+ DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[82]),
+ ),
+ (
+ DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80, 90]),
+ 2,
+ DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[82, 92]),
+ ),
+ ])
+def test_daemon_placement_renumber(dp, n, result):
+ assert dp.renumber_ports(n) == result
+
+
+@pytest.mark.parametrize(
+ 'dp,dd,result',
+ [
+ (
+ DaemonPlacement(daemon_type='mgr', hostname='host1'),
+ DaemonDescription('mgr', 'a', 'host1'),
+ True
+ ),
+ (
+ DaemonPlacement(daemon_type='mgr', hostname='host1', name='a'),
+ DaemonDescription('mgr', 'a', 'host1'),
+ True
+ ),
+ (
+ DaemonPlacement(daemon_type='mon', hostname='host1', name='a'),
+ DaemonDescription('mgr', 'a', 'host1'),
+ False
+ ),
+ (
+ DaemonPlacement(daemon_type='mgr', hostname='host1', name='a'),
+ DaemonDescription('mgr', 'b', 'host1'),
+ False
+ ),
+ ])
+def test_daemon_placement_match(dp, dd, result):
+ assert dp.matches_daemon(dd) == result
+
+
+# * first match from the top wins
+# * where e=[], *=any
+#
+# + list of known hosts available for scheduling (host_key)
+# | + hosts used for explict placement (explicit_key)
+# | | + count
+# | | | + section (host, label, pattern)
+# | | | | + expected result
+# | | | | |
+test_explicit_scheduler_results = [
+ (k("* * 0 *"), error(SpecValidationError, 'num/count must be >= 1')),
+ (k("* e N l"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr>: No matching hosts for label mylabel')),
+ (k("* e N p"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr>: No matching hosts')),
+ (k("* e N h"), error(OrchestratorValidationError, 'placement spec is empty: no hosts, no label, no pattern, no count')),
+ (k("* e * *"), none),
+ (k("1 12 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 2: Unknown hosts")),
+ (k("1 123 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 2, 3: Unknown hosts")),
+ (k("1 * * *"), exactly('1')),
+ (k("12 1 * *"), exactly('1')),
+ (k("12 12 1 *"), one_of('1', '2')),
+ (k("12 12 * *"), exactly('1', '2')),
+ (k("12 123 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 3: Unknown hosts")),
+ (k("12 123 1 *"), one_of('1', '2', '3')),
+ (k("12 123 * *"), two_of('1', '2', '3')),
+ (k("123 1 * *"), exactly('1')),
+ (k("123 12 1 *"), one_of('1', '2')),
+ (k("123 12 * *"), exactly('1', '2')),
+ (k("123 123 1 *"), one_of('1', '2', '3')),
+ (k("123 123 2 *"), two_of('1', '2', '3')),
+ (k("123 123 * *"), exactly('1', '2', '3')),
+]
+
+
+@pytest.mark.parametrize("spec_section_key,spec_section",
+ [ # noqa: E128
+ ('h', 'hosts'),
+ ('l', 'label'),
+ ('p', 'host_pattern'),
+ ])
+@pytest.mark.parametrize("count",
+ [ # noqa: E128
+ None,
+ 0,
+ 1,
+ 2,
+ 3,
+ ])
+@pytest.mark.parametrize("explicit_key, explicit",
+ [ # noqa: E128
+ ('e', []),
+ ('1', ['1']),
+ ('12', ['1', '2']),
+ ('123', ['1', '2', '3']),
+ ])
+@pytest.mark.parametrize("host_key, hosts",
+ [ # noqa: E128
+ ('1', ['1']),
+ ('12', ['1', '2']),
+ ('123', ['1', '2', '3']),
+ ])
+def test_explicit_scheduler(host_key, hosts,
+ explicit_key, explicit,
+ count,
+ spec_section_key, spec_section):
+
+ mk_spec, hosts = mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count)
+ run_scheduler_test(
+ results=test_explicit_scheduler_results,
+ mk_spec=mk_spec,
+ hosts=hosts,
+ daemons=[],
+ key_elems=(host_key, explicit_key, count, spec_section_key)
+ )
+
+
+# * first match from the top wins
+# * where e=[], *=any
+#
+# + list of known hosts available for scheduling (host_key)
+# | + hosts used for explicit placement (explicit_key)
+# | | + count
+# | | | + existing daemons
+# | | | | + section (host, label, pattern)
+# | | | | | + expected result
+# | | | | | |
+test_scheduler_daemons_results = [
+ (k("* 1 * * *"), exactly('1')),
+ (k("1 123 * * h"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr> on 2, 3: Unknown hosts')),
+ (k("1 123 * * *"), exactly('1')),
+ (k("12 123 * * h"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr> on 3: Unknown hosts')),
+ (k("12 123 N * *"), exactly('1', '2')),
+ (k("12 123 1 * *"), one_of('1', '2')),
+ (k("12 123 2 * *"), exactly('1', '2')),
+ (k("12 123 3 * *"), exactly('1', '2')),
+ (k("123 123 N * *"), exactly('1', '2', '3')),
+ (k("123 123 1 e *"), one_of('1', '2', '3')),
+ (k("123 123 1 1 *"), exactly('1')),
+ (k("123 123 1 3 *"), exactly('3')),
+ (k("123 123 1 12 *"), one_of('1', '2')),
+ (k("123 123 1 112 *"), one_of('1', '2')),
+ (k("123 123 1 23 *"), one_of('2', '3')),
+ (k("123 123 1 123 *"), one_of('1', '2', '3')),
+ (k("123 123 2 e *"), two_of('1', '2', '3')),
+ (k("123 123 2 1 *"), _or(exactly('1', '2'), exactly('1', '3'))),
+ (k("123 123 2 3 *"), _or(exactly('1', '3'), exactly('2', '3'))),
+ (k("123 123 2 12 *"), exactly('1', '2')),
+ (k("123 123 2 112 *"), exactly('1', '2')),
+ (k("123 123 2 23 *"), exactly('2', '3')),
+ (k("123 123 2 123 *"), two_of('1', '2', '3')),
+ (k("123 123 3 * *"), exactly('1', '2', '3')),
+]
+
+
+@pytest.mark.parametrize("spec_section_key,spec_section",
+ [ # noqa: E128
+ ('h', 'hosts'),
+ ('l', 'label'),
+ ('p', 'host_pattern'),
+ ])
+@pytest.mark.parametrize("daemons_key, daemons",
+ [ # noqa: E128
+ ('e', []),
+ ('1', ['1']),
+ ('3', ['3']),
+ ('12', ['1', '2']),
+ ('112', ['1', '1', '2']), # deal with existing co-located daemons
+ ('23', ['2', '3']),
+ ('123', ['1', '2', '3']),
+ ])
+@pytest.mark.parametrize("count",
+ [ # noqa: E128
+ None,
+ 1,
+ 2,
+ 3,
+ ])
+@pytest.mark.parametrize("explicit_key, explicit",
+ [ # noqa: E128
+ ('1', ['1']),
+ ('123', ['1', '2', '3']),
+ ])
+@pytest.mark.parametrize("host_key, hosts",
+ [ # noqa: E128
+ ('1', ['1']),
+ ('12', ['1', '2']),
+ ('123', ['1', '2', '3']),
+ ])
+def test_scheduler_daemons(host_key, hosts,
+ explicit_key, explicit,
+ count,
+ daemons_key, daemons,
+ spec_section_key, spec_section):
+ mk_spec, hosts = mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count)
+ dds = [
+ DaemonDescription('mgr', d, d)
+ for d in daemons
+ ]
+ run_scheduler_test(
+ results=test_scheduler_daemons_results,
+ mk_spec=mk_spec,
+ hosts=hosts,
+ daemons=dds,
+ key_elems=(host_key, explicit_key, count, daemons_key, spec_section_key)
+ )
+
+
+# =========================
+
+
+class NodeAssignmentTest(NamedTuple):
+ service_type: str
+ placement: PlacementSpec
+ hosts: List[str]
+ daemons: List[DaemonDescription]
+ rank_map: Optional[Dict[int, Dict[int, Optional[str]]]]
+ post_rank_map: Optional[Dict[int, Dict[int, Optional[str]]]]
+ expected: List[str]
+ expected_add: List[str]
+ expected_remove: List[DaemonDescription]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,daemons,rank_map,post_rank_map,expected,expected_add,expected_remove",
+ [ # noqa: E128
+ # just hosts
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(hosts=['smithi060']),
+ ['smithi060'],
+ [],
+ None, None,
+ ['mgr:smithi060'], ['mgr:smithi060'], []
+ ),
+ # all_hosts
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(host_pattern='*'),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ ],
+ None, None,
+ ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+ ['mgr:host3'],
+ []
+ ),
+ # all_hosts + count_per_host
+ NodeAssignmentTest(
+ 'mds',
+ PlacementSpec(host_pattern='*', count_per_host=2),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mds', 'a', 'host1'),
+ DaemonDescription('mds', 'b', 'host2'),
+ ],
+ None, None,
+ ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+ ['mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+ []
+ ),
+ # count that is bigger than the amount of hosts. Truncate to len(hosts)
+ # mgr should not be co-located to each other.
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=4),
+ 'host1 host2 host3'.split(),
+ [],
+ None, None,
+ ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+ ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+ []
+ ),
+ # count that is bigger than the amount of hosts; wrap around.
+ NodeAssignmentTest(
+ 'mds',
+ PlacementSpec(count=6),
+ 'host1 host2 host3'.split(),
+ [],
+ None, None,
+ ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+ ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+ []
+ ),
+ # count + partial host list
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=3, hosts=['host3']),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ ],
+ None, None,
+ ['mgr:host3'],
+ ['mgr:host3'],
+ ['mgr.a', 'mgr.b']
+ ),
+ # count + partial host list (with colo)
+ NodeAssignmentTest(
+ 'mds',
+ PlacementSpec(count=3, hosts=['host3']),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mds', 'a', 'host1'),
+ DaemonDescription('mds', 'b', 'host2'),
+ ],
+ None, None,
+ ['mds:host3', 'mds:host3', 'mds:host3'],
+ ['mds:host3', 'mds:host3', 'mds:host3'],
+ ['mds.a', 'mds.b']
+ ),
+ # count 1 + partial host list
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=1, hosts=['host3']),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ ],
+ None, None,
+ ['mgr:host3'],
+ ['mgr:host3'],
+ ['mgr.a', 'mgr.b']
+ ),
+ # count + partial host list + existing
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=2, hosts=['host3']),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ ],
+ None, None,
+ ['mgr:host3'],
+ ['mgr:host3'],
+ ['mgr.a']
+ ),
+ # count + partial host list + existing (deterministic)
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=2, hosts=['host1']),
+ 'host1 host2'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ ],
+ None, None,
+ ['mgr:host1'],
+ [],
+ []
+ ),
+ # count + partial host list + existing (deterministic)
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=2, hosts=['host1']),
+ 'host1 host2'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host2'),
+ ],
+ None, None,
+ ['mgr:host1'],
+ ['mgr:host1'],
+ ['mgr.a']
+ ),
+ # label only
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(label='foo'),
+ 'host1 host2 host3'.split(),
+ [],
+ None, None,
+ ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+ ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+ []
+ ),
+ # label + count (truncate to host list)
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=4, label='foo'),
+ 'host1 host2 host3'.split(),
+ [],
+ None, None,
+ ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+ ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+ []
+ ),
+ # label + count (with colo)
+ NodeAssignmentTest(
+ 'mds',
+ PlacementSpec(count=6, label='foo'),
+ 'host1 host2 host3'.split(),
+ [],
+ None, None,
+ ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+ ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+ []
+ ),
+ # label only + count_per_hst
+ NodeAssignmentTest(
+ 'mds',
+ PlacementSpec(label='foo', count_per_host=3),
+ 'host1 host2 host3'.split(),
+ [],
+ None, None,
+ ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3',
+ 'mds:host1', 'mds:host2', 'mds:host3'],
+ ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3',
+ 'mds:host1', 'mds:host2', 'mds:host3'],
+ []
+ ),
+ # host_pattern
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(host_pattern='mgr*'),
+ 'mgrhost1 mgrhost2 datahost'.split(),
+ [],
+ None, None,
+ ['mgr:mgrhost1', 'mgr:mgrhost2'],
+ ['mgr:mgrhost1', 'mgr:mgrhost2'],
+ []
+ ),
+ # host_pattern + count_per_host
+ NodeAssignmentTest(
+ 'mds',
+ PlacementSpec(host_pattern='mds*', count_per_host=3),
+ 'mdshost1 mdshost2 datahost'.split(),
+ [],
+ None, None,
+ ['mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2'],
+ ['mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2'],
+ []
+ ),
+ # label + count_per_host + ports
+ NodeAssignmentTest(
+ 'rgw',
+ PlacementSpec(count=6, label='foo'),
+ 'host1 host2 host3'.split(),
+ [],
+ None, None,
+ ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)',
+ 'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'],
+ ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)',
+ 'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'],
+ []
+ ),
+ # label + count_per_host + ports (+ existing)
+ NodeAssignmentTest(
+ 'rgw',
+ PlacementSpec(count=6, label='foo'),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('rgw', 'a', 'host1', ports=[81]),
+ DaemonDescription('rgw', 'b', 'host2', ports=[80]),
+ DaemonDescription('rgw', 'c', 'host1', ports=[82]),
+ ],
+ None, None,
+ ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)',
+ 'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'],
+ ['rgw:host1(*:80)', 'rgw:host3(*:80)',
+ 'rgw:host2(*:81)', 'rgw:host3(*:81)'],
+ ['rgw.c']
+ ),
+ # cephadm.py teuth case
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=3, hosts=['host1=y', 'host2=x']),
+ 'host1 host2'.split(),
+ [
+ DaemonDescription('mgr', 'y', 'host1'),
+ DaemonDescription('mgr', 'x', 'host2'),
+ ],
+ None, None,
+ ['mgr:host1(name=y)', 'mgr:host2(name=x)'],
+ [], []
+ ),
+
+ # note: host -> rank mapping is psuedo-random based on svc name, so these
+ # host/rank pairs may seem random but they match the nfs.mynfs seed used by
+ # the test.
+
+ # ranked, fresh
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=3),
+ 'host1 host2 host3'.split(),
+ [],
+ {},
+ {0: {0: None}, 1: {0: None}, 2: {0: None}},
+ ['nfs:host3(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host1(rank=2.0)'],
+ ['nfs:host3(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host1(rank=2.0)'],
+ []
+ ),
+ # 21: ranked, exist
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=3),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1),
+ ],
+ {0: {1: '0.1'}},
+ {0: {1: '0.1'}, 1: {0: None}, 2: {0: None}},
+ ['nfs:host1(rank=0.1)', 'nfs:host3(rank=1.0)', 'nfs:host2(rank=2.0)'],
+ ['nfs:host3(rank=1.0)', 'nfs:host2(rank=2.0)'],
+ []
+ ),
+ # ranked, exist, different ranks
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=3),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1),
+ DaemonDescription('nfs', '1.1', 'host2', rank=1, rank_generation=1),
+ ],
+ {0: {1: '0.1'}, 1: {1: '1.1'}},
+ {0: {1: '0.1'}, 1: {1: '1.1'}, 2: {0: None}},
+ ['nfs:host1(rank=0.1)', 'nfs:host2(rank=1.1)', 'nfs:host3(rank=2.0)'],
+ ['nfs:host3(rank=2.0)'],
+ []
+ ),
+ # ranked, exist, different ranks (2)
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=3),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1),
+ DaemonDescription('nfs', '1.1', 'host3', rank=1, rank_generation=1),
+ ],
+ {0: {1: '0.1'}, 1: {1: '1.1'}},
+ {0: {1: '0.1'}, 1: {1: '1.1'}, 2: {0: None}},
+ ['nfs:host1(rank=0.1)', 'nfs:host3(rank=1.1)', 'nfs:host2(rank=2.0)'],
+ ['nfs:host2(rank=2.0)'],
+ []
+ ),
+ # ranked, exist, extra ranks
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=3),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5),
+ DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5),
+ DaemonDescription('nfs', '4.5', 'host2', rank=4, rank_generation=5),
+ ],
+ {0: {5: '0.5'}, 1: {5: '1.5'}},
+ {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {0: None}},
+ ['nfs:host1(rank=0.5)', 'nfs:host2(rank=1.5)', 'nfs:host3(rank=2.0)'],
+ ['nfs:host3(rank=2.0)'],
+ ['nfs.4.5']
+ ),
+ # 25: ranked, exist, extra ranks (scale down: kill off high rank)
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=2),
+ 'host3 host2 host1'.split(),
+ [
+ DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5),
+ DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5),
+ DaemonDescription('nfs', '2.5', 'host3', rank=2, rank_generation=5),
+ ],
+ {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}},
+ {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}},
+ ['nfs:host1(rank=0.5)', 'nfs:host2(rank=1.5)'],
+ [],
+ ['nfs.2.5']
+ ),
+ # ranked, exist, extra ranks (scale down hosts)
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=2),
+ 'host1 host3'.split(),
+ [
+ DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5),
+ DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5),
+ DaemonDescription('nfs', '2.5', 'host3', rank=4, rank_generation=5),
+ ],
+ {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}},
+ {0: {5: '0.5'}, 1: {5: '1.5', 6: None}, 2: {5: '2.5'}},
+ ['nfs:host1(rank=0.5)', 'nfs:host3(rank=1.6)'],
+ ['nfs:host3(rank=1.6)'],
+ ['nfs.2.5', 'nfs.1.5']
+ ),
+ # ranked, exist, duplicate rank
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=3),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('nfs', '0.0', 'host1', rank=0, rank_generation=0),
+ DaemonDescription('nfs', '1.1', 'host2', rank=1, rank_generation=1),
+ DaemonDescription('nfs', '1.2', 'host3', rank=1, rank_generation=2),
+ ],
+ {0: {0: '0.0'}, 1: {2: '1.2'}},
+ {0: {0: '0.0'}, 1: {2: '1.2'}, 2: {0: None}},
+ ['nfs:host1(rank=0.0)', 'nfs:host3(rank=1.2)', 'nfs:host2(rank=2.0)'],
+ ['nfs:host2(rank=2.0)'],
+ ['nfs.1.1']
+ ),
+ # 28: ranked, all gens stale (failure during update cycle)
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2),
+ DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2),
+ ],
+ {0: {2: '0.2'}, 1: {2: '1.2', 3: '1.3'}},
+ {0: {2: '0.2'}, 1: {2: '1.2', 3: '1.3', 4: None}},
+ ['nfs:host1(rank=0.2)', 'nfs:host3(rank=1.4)'],
+ ['nfs:host3(rank=1.4)'],
+ ['nfs.1.2']
+ ),
+ # ranked, not enough hosts
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=4),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2),
+ DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2),
+ ],
+ {0: {2: '0.2'}, 1: {2: '1.2'}},
+ {0: {2: '0.2'}, 1: {2: '1.2'}, 2: {0: None}},
+ ['nfs:host1(rank=0.2)', 'nfs:host2(rank=1.2)', 'nfs:host3(rank=2.0)'],
+ ['nfs:host3(rank=2.0)'],
+ []
+ ),
+ # ranked, scale down
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(hosts=['host2']),
+ 'host1 host2'.split(),
+ [
+ DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2),
+ DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2),
+ DaemonDescription('nfs', '2.2', 'host3', rank=2, rank_generation=2),
+ ],
+ {0: {2: '0.2'}, 1: {2: '1.2'}, 2: {2: '2.2'}},
+ {0: {2: '0.2', 3: None}, 1: {2: '1.2'}, 2: {2: '2.2'}},
+ ['nfs:host2(rank=0.3)'],
+ ['nfs:host2(rank=0.3)'],
+ ['nfs.0.2', 'nfs.1.2', 'nfs.2.2']
+ ),
+
+ ])
+def test_node_assignment(service_type, placement, hosts, daemons, rank_map, post_rank_map,
+ expected, expected_add, expected_remove):
+ spec = None
+ service_id = None
+ allow_colo = False
+ if service_type == 'rgw':
+ service_id = 'realm.zone'
+ allow_colo = True
+ elif service_type == 'mds':
+ service_id = 'myfs'
+ allow_colo = True
+ elif service_type == 'nfs':
+ service_id = 'mynfs'
+ spec = ServiceSpec(service_type=service_type,
+ service_id=service_id,
+ placement=placement)
+
+ if not spec:
+ spec = ServiceSpec(service_type=service_type,
+ service_id=service_id,
+ placement=placement)
+
+ all_slots, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=[HostSpec(h, labels=['foo']) for h in hosts],
+ unreachable_hosts=[],
+ draining_hosts=[],
+ daemons=daemons,
+ allow_colo=allow_colo,
+ rank_map=rank_map,
+ ).place()
+
+ assert rank_map == post_rank_map
+
+ got = [str(p) for p in all_slots]
+ num_wildcard = 0
+ for i in expected:
+ if i == '*':
+ num_wildcard += 1
+ else:
+ assert i in got
+ got.remove(i)
+ assert num_wildcard == len(got)
+
+ got = [str(p) for p in to_add]
+ num_wildcard = 0
+ for i in expected_add:
+ if i == '*':
+ num_wildcard += 1
+ else:
+ assert i in got
+ got.remove(i)
+ assert num_wildcard == len(got)
+
+ assert sorted([d.name() for d in to_remove]) == sorted(expected_remove)
+
+
+class NodeAssignmentTest5(NamedTuple):
+ service_type: str
+ placement: PlacementSpec
+ available_hosts: List[str]
+ candidates_hosts: List[str]
+
+
+@pytest.mark.parametrize("service_type, placement, available_hosts, expected_candidates",
+ [ # noqa: E128
+ NodeAssignmentTest5(
+ 'alertmanager',
+ PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+ 'host1 host2 host3 host4'.split(),
+ 'host3 host1 host4 host2'.split(),
+ ),
+ NodeAssignmentTest5(
+ 'prometheus',
+ PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+ 'host1 host2 host3 host4'.split(),
+ 'host3 host2 host4 host1'.split(),
+ ),
+ NodeAssignmentTest5(
+ 'grafana',
+ PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+ 'host1 host2 host3 host4'.split(),
+ 'host1 host2 host4 host3'.split(),
+ ),
+ NodeAssignmentTest5(
+ 'mgr',
+ PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+ 'host1 host2 host3 host4'.split(),
+ 'host4 host2 host1 host3'.split(),
+ ),
+ NodeAssignmentTest5(
+ 'mon',
+ PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+ 'host1 host2 host3 host4'.split(),
+ 'host1 host3 host4 host2'.split(),
+ ),
+ NodeAssignmentTest5(
+ 'rgw',
+ PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+ 'host1 host2 host3 host4'.split(),
+ 'host1 host3 host2 host4'.split(),
+ ),
+ NodeAssignmentTest5(
+ 'cephfs-mirror',
+ PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+ 'host1 host2 host3 host4'.split(),
+ 'host4 host3 host1 host2'.split(),
+ ),
+ ])
+def test_node_assignment_random_shuffle(service_type, placement, available_hosts, expected_candidates):
+ spec = None
+ service_id = None
+ allow_colo = False
+ spec = ServiceSpec(service_type=service_type,
+ service_id=service_id,
+ placement=placement)
+
+ candidates = HostAssignment(
+ spec=spec,
+ hosts=[HostSpec(h, labels=['foo']) for h in available_hosts],
+ unreachable_hosts=[],
+ draining_hosts=[],
+ daemons=[],
+ allow_colo=allow_colo,
+ ).get_candidates()
+
+ candidates_hosts = [h.hostname for h in candidates]
+ assert candidates_hosts == expected_candidates
+
+
+class NodeAssignmentTest2(NamedTuple):
+ service_type: str
+ placement: PlacementSpec
+ hosts: List[str]
+ daemons: List[DaemonDescription]
+ expected_len: int
+ in_set: List[str]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected_len,in_set",
+ [ # noqa: E128
+ # just count
+ NodeAssignmentTest2(
+ 'mgr',
+ PlacementSpec(count=1),
+ 'host1 host2 host3'.split(),
+ [],
+ 1,
+ ['host1', 'host2', 'host3'],
+ ),
+
+ # hosts + (smaller) count
+ NodeAssignmentTest2(
+ 'mgr',
+ PlacementSpec(count=1, hosts='host1 host2'.split()),
+ 'host1 host2'.split(),
+ [],
+ 1,
+ ['host1', 'host2'],
+ ),
+ # hosts + (smaller) count, existing
+ NodeAssignmentTest2(
+ 'mgr',
+ PlacementSpec(count=1, hosts='host1 host2 host3'.split()),
+ 'host1 host2 host3'.split(),
+ [DaemonDescription('mgr', 'mgr.a', 'host1')],
+ 1,
+ ['host1', 'host2', 'host3'],
+ ),
+ # hosts + (smaller) count, (more) existing
+ NodeAssignmentTest2(
+ 'mgr',
+ PlacementSpec(count=1, hosts='host1 host2 host3'.split()),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ ],
+ 1,
+ ['host1', 'host2']
+ ),
+ # count + partial host list
+ NodeAssignmentTest2(
+ 'mgr',
+ PlacementSpec(count=2, hosts=['host3']),
+ 'host1 host2 host3'.split(),
+ [],
+ 1,
+ ['host1', 'host2', 'host3']
+ ),
+ # label + count
+ NodeAssignmentTest2(
+ 'mgr',
+ PlacementSpec(count=1, label='foo'),
+ 'host1 host2 host3'.split(),
+ [],
+ 1,
+ ['host1', 'host2', 'host3']
+ ),
+ ])
+def test_node_assignment2(service_type, placement, hosts,
+ daemons, expected_len, in_set):
+ hosts, to_add, to_remove = HostAssignment(
+ spec=ServiceSpec(service_type, placement=placement),
+ hosts=[HostSpec(h, labels=['foo']) for h in hosts],
+ unreachable_hosts=[],
+ draining_hosts=[],
+ daemons=daemons,
+ ).place()
+ assert len(hosts) == expected_len
+ for h in [h.hostname for h in hosts]:
+ assert h in in_set
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected_len,must_have",
+ [ # noqa: E128
+ # hosts + (smaller) count, (more) existing
+ NodeAssignmentTest2(
+ 'mgr',
+ PlacementSpec(count=3, hosts='host3'.split()),
+ 'host1 host2 host3'.split(),
+ [],
+ 1,
+ ['host3']
+ ),
+ # count + partial host list
+ NodeAssignmentTest2(
+ 'mgr',
+ PlacementSpec(count=2, hosts=['host3']),
+ 'host1 host2 host3'.split(),
+ [],
+ 1,
+ ['host3']
+ ),
+ ])
+def test_node_assignment3(service_type, placement, hosts,
+ daemons, expected_len, must_have):
+ hosts, to_add, to_remove = HostAssignment(
+ spec=ServiceSpec(service_type, placement=placement),
+ hosts=[HostSpec(h) for h in hosts],
+ unreachable_hosts=[],
+ draining_hosts=[],
+ daemons=daemons,
+ ).place()
+ assert len(hosts) == expected_len
+ for h in must_have:
+ assert h in [h.hostname for h in hosts]
+
+
+class NodeAssignmentTest4(NamedTuple):
+ spec: ServiceSpec
+ networks: Dict[str, Dict[str, Dict[str, List[str]]]]
+ daemons: List[DaemonDescription]
+ expected: List[str]
+ expected_add: List[str]
+ expected_remove: List[DaemonDescription]
+
+
+@pytest.mark.parametrize("spec,networks,daemons,expected,expected_add,expected_remove",
+ [ # noqa: E128
+ NodeAssignmentTest4(
+ ServiceSpec(
+ service_type='rgw',
+ service_id='foo',
+ placement=PlacementSpec(count=6, label='foo'),
+ networks=['10.0.0.0/8'],
+ ),
+ {
+ 'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}},
+ 'host2': {'10.0.0.0/8': {'eth0': ['10.0.0.2']}},
+ 'host3': {'192.168.0.0/16': {'eth0': ['192.168.0.1']}},
+ },
+ [],
+ ['rgw:host1(10.0.0.1:80)', 'rgw:host2(10.0.0.2:80)',
+ 'rgw:host1(10.0.0.1:81)', 'rgw:host2(10.0.0.2:81)',
+ 'rgw:host1(10.0.0.1:82)', 'rgw:host2(10.0.0.2:82)'],
+ ['rgw:host1(10.0.0.1:80)', 'rgw:host2(10.0.0.2:80)',
+ 'rgw:host1(10.0.0.1:81)', 'rgw:host2(10.0.0.2:81)',
+ 'rgw:host1(10.0.0.1:82)', 'rgw:host2(10.0.0.2:82)'],
+ []
+ ),
+ NodeAssignmentTest4(
+ IngressSpec(
+ service_type='ingress',
+ service_id='rgw.foo',
+ frontend_port=443,
+ monitor_port=8888,
+ virtual_ip='10.0.0.20/8',
+ backend_service='rgw.foo',
+ placement=PlacementSpec(label='foo'),
+ networks=['10.0.0.0/8'],
+ ),
+ {
+ 'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}},
+ 'host2': {'10.0.0.0/8': {'eth1': ['10.0.0.2']}},
+ 'host3': {'192.168.0.0/16': {'eth2': ['192.168.0.1']}},
+ },
+ [],
+ ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)',
+ 'keepalived:host1', 'keepalived:host2'],
+ ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)',
+ 'keepalived:host1', 'keepalived:host2'],
+ []
+ ),
+ NodeAssignmentTest4(
+ IngressSpec(
+ service_type='ingress',
+ service_id='rgw.foo',
+ frontend_port=443,
+ monitor_port=8888,
+ virtual_ip='10.0.0.20/8',
+ backend_service='rgw.foo',
+ placement=PlacementSpec(label='foo'),
+ networks=['10.0.0.0/8'],
+ ),
+ {
+ 'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}},
+ 'host2': {'10.0.0.0/8': {'eth1': ['10.0.0.2']}},
+ 'host3': {'192.168.0.0/16': {'eth2': ['192.168.0.1']}},
+ },
+ [
+ DaemonDescription('haproxy', 'a', 'host1', ip='10.0.0.1',
+ ports=[443, 8888]),
+ DaemonDescription('keepalived', 'b', 'host2'),
+ DaemonDescription('keepalived', 'c', 'host3'),
+ ],
+ ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)',
+ 'keepalived:host1', 'keepalived:host2'],
+ ['haproxy:host2(10.0.0.2:443,8888)',
+ 'keepalived:host1'],
+ ['keepalived.c']
+ ),
+ ])
+def test_node_assignment4(spec, networks, daemons,
+ expected, expected_add, expected_remove):
+ all_slots, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=[HostSpec(h, labels=['foo']) for h in networks.keys()],
+ unreachable_hosts=[],
+ draining_hosts=[],
+ daemons=daemons,
+ allow_colo=True,
+ networks=networks,
+ primary_daemon_type='haproxy' if spec.service_type == 'ingress' else spec.service_type,
+ per_host_daemon_type='keepalived' if spec.service_type == 'ingress' else None,
+ ).place()
+
+ got = [str(p) for p in all_slots]
+ num_wildcard = 0
+ for i in expected:
+ if i == '*':
+ num_wildcard += 1
+ else:
+ assert i in got
+ got.remove(i)
+ assert num_wildcard == len(got)
+
+ got = [str(p) for p in to_add]
+ num_wildcard = 0
+ for i in expected_add:
+ if i == '*':
+ num_wildcard += 1
+ else:
+ assert i in got
+ got.remove(i)
+ assert num_wildcard == len(got)
+
+ assert sorted([d.name() for d in to_remove]) == sorted(expected_remove)
+
+
+@pytest.mark.parametrize("placement",
+ [ # noqa: E128
+ ('1 *'),
+ ('* label:foo'),
+ ('* host1 host2'),
+ ('hostname12hostname12hostname12hostname12hostname12hostname12hostname12'), # > 63 chars
+ ])
+def test_bad_placements(placement):
+ try:
+ PlacementSpec.from_string(placement.split(' '))
+ assert False
+ except SpecValidationError:
+ pass
+
+
+class NodeAssignmentTestBadSpec(NamedTuple):
+ service_type: str
+ placement: PlacementSpec
+ hosts: List[str]
+ daemons: List[DaemonDescription]
+ expected: str
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected",
+ [ # noqa: E128
+ # unknown host
+ NodeAssignmentTestBadSpec(
+ 'mgr',
+ PlacementSpec(hosts=['unknownhost']),
+ ['knownhost'],
+ [],
+ "Cannot place <ServiceSpec for service_name=mgr> on unknownhost: Unknown hosts"
+ ),
+ # unknown host pattern
+ NodeAssignmentTestBadSpec(
+ 'mgr',
+ PlacementSpec(host_pattern='unknownhost'),
+ ['knownhost'],
+ [],
+ "Cannot place <ServiceSpec for service_name=mgr>: No matching hosts"
+ ),
+ # unknown label
+ NodeAssignmentTestBadSpec(
+ 'mgr',
+ PlacementSpec(label='unknownlabel'),
+ [],
+ [],
+ "Cannot place <ServiceSpec for service_name=mgr>: No matching hosts for label unknownlabel"
+ ),
+ ])
+def test_bad_specs(service_type, placement, hosts, daemons, expected):
+ with pytest.raises(OrchestratorValidationError) as e:
+ hosts, to_add, to_remove = HostAssignment(
+ spec=ServiceSpec(service_type, placement=placement),
+ hosts=[HostSpec(h) for h in hosts],
+ unreachable_hosts=[],
+ draining_hosts=[],
+ daemons=daemons,
+ ).place()
+ assert str(e.value) == expected
+
+
+class ActiveAssignmentTest(NamedTuple):
+ service_type: str
+ placement: PlacementSpec
+ hosts: List[str]
+ daemons: List[DaemonDescription]
+ expected: List[List[str]]
+ expected_add: List[List[str]]
+ expected_remove: List[List[str]]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected,expected_add,expected_remove",
+ [
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1', is_active=True),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3'),
+ ],
+ [['host1', 'host2'], ['host1', 'host3']],
+ [[]],
+ [['mgr.b'], ['mgr.c']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [['host1', 'host3'], ['host2', 'host3']],
+ [[]],
+ [['mgr.a'], ['mgr.b']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=1),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2', is_active=True),
+ DaemonDescription('mgr', 'c', 'host3'),
+ ],
+ [['host2']],
+ [[]],
+ [['mgr.a', 'mgr.c']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=1),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [['host3']],
+ [[]],
+ [['mgr.a', 'mgr.b']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=1),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1', is_active=True),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [['host1'], ['host3']],
+ [[]],
+ [['mgr.a', 'mgr.b'], ['mgr.b', 'mgr.c']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2', is_active=True),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [['host2', 'host3']],
+ [[]],
+ [['mgr.a']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=1),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1', is_active=True),
+ DaemonDescription('mgr', 'b', 'host2', is_active=True),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [['host1'], ['host2'], ['host3']],
+ [[]],
+ [['mgr.a', 'mgr.b'], ['mgr.b', 'mgr.c'], ['mgr.a', 'mgr.c']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=1),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1', is_active=True),
+ DaemonDescription('mgr', 'a2', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3'),
+ ],
+ [['host1']],
+ [[]],
+ [['mgr.a2', 'mgr.b', 'mgr.c']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=1),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1', is_active=True),
+ DaemonDescription('mgr', 'a2', 'host1', is_active=True),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3'),
+ ],
+ [['host1']],
+ [[]],
+ [['mgr.a', 'mgr.b', 'mgr.c'], ['mgr.a2', 'mgr.b', 'mgr.c']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1', is_active=True),
+ DaemonDescription('mgr', 'a2', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [['host1', 'host3']],
+ [[]],
+ [['mgr.a2', 'mgr.b']]
+ ),
+ # Explicit placement should override preference for active daemon
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=1, hosts=['host1']),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [['host1']],
+ [[]],
+ [['mgr.b', 'mgr.c']]
+ ),
+
+ ])
+def test_active_assignment(service_type, placement, hosts, daemons, expected, expected_add, expected_remove):
+
+ spec = ServiceSpec(service_type=service_type,
+ service_id=None,
+ placement=placement)
+
+ hosts, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=[HostSpec(h) for h in hosts],
+ unreachable_hosts=[],
+ draining_hosts=[],
+ daemons=daemons,
+ ).place()
+ assert sorted([h.hostname for h in hosts]) in expected
+ assert sorted([h.hostname for h in to_add]) in expected_add
+ assert sorted([h.name() for h in to_remove]) in expected_remove
+
+
+class UnreachableHostsTest(NamedTuple):
+ service_type: str
+ placement: PlacementSpec
+ hosts: List[str]
+ unreachables_hosts: List[str]
+ daemons: List[DaemonDescription]
+ expected_add: List[List[str]]
+ expected_remove: List[List[str]]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,unreachable_hosts,daemons,expected_add,expected_remove",
+ [
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(count=3),
+ 'host1 host2 host3'.split(),
+ ['host2'],
+ [],
+ [['host1', 'host3']],
+ [[]],
+ ),
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(hosts=['host3']),
+ 'host1 host2 host3'.split(),
+ ['host1'],
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [[]],
+ [['mgr.b']],
+ ),
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(count=3),
+ 'host1 host2 host3 host4'.split(),
+ ['host1'],
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [[]],
+ [[]],
+ ),
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(count=1),
+ 'host1 host2 host3 host4'.split(),
+ 'host1 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [[]],
+ [['mgr.b']],
+ ),
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(count=3),
+ 'host1 host2 host3 host4'.split(),
+ ['host2'],
+ [],
+ [['host1', 'host3', 'host4']],
+ [[]],
+ ),
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(count=3),
+ 'host1 host2 host3 host4'.split(),
+ 'host1 host4'.split(),
+ [],
+ [['host2', 'host3']],
+ [[]],
+ ),
+
+ ])
+def test_unreachable_host(service_type, placement, hosts, unreachable_hosts, daemons, expected_add, expected_remove):
+
+ spec = ServiceSpec(service_type=service_type,
+ service_id=None,
+ placement=placement)
+
+ hosts, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=[HostSpec(h) for h in hosts],
+ unreachable_hosts=[HostSpec(h) for h in unreachable_hosts],
+ draining_hosts=[],
+ daemons=daemons,
+ ).place()
+ assert sorted([h.hostname for h in to_add]) in expected_add
+ assert sorted([h.name() for h in to_remove]) in expected_remove
+
+
+class RescheduleFromOfflineTest(NamedTuple):
+ service_type: str
+ placement: PlacementSpec
+ hosts: List[str]
+ maintenance_hosts: List[str]
+ offline_hosts: List[str]
+ daemons: List[DaemonDescription]
+ expected_add: List[List[str]]
+ expected_remove: List[List[str]]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,maintenance_hosts,offline_hosts,daemons,expected_add,expected_remove",
+ [
+ RescheduleFromOfflineTest(
+ 'nfs',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ [],
+ ['host2'],
+ [
+ DaemonDescription('nfs', 'a', 'host1'),
+ DaemonDescription('nfs', 'b', 'host2'),
+ ],
+ [['host3']],
+ [[]],
+ ),
+ RescheduleFromOfflineTest(
+ 'nfs',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ ['host2'],
+ [],
+ [
+ DaemonDescription('nfs', 'a', 'host1'),
+ DaemonDescription('nfs', 'b', 'host2'),
+ ],
+ [[]],
+ [[]],
+ ),
+ RescheduleFromOfflineTest(
+ 'mon',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ [],
+ ['host2'],
+ [
+ DaemonDescription('mon', 'a', 'host1'),
+ DaemonDescription('mon', 'b', 'host2'),
+ ],
+ [[]],
+ [[]],
+ ),
+ RescheduleFromOfflineTest(
+ 'ingress',
+ PlacementSpec(count=1),
+ 'host1 host2'.split(),
+ [],
+ ['host2'],
+ [
+ DaemonDescription('haproxy', 'b', 'host2'),
+ DaemonDescription('keepalived', 'b', 'host2'),
+ ],
+ [['host1']],
+ [[]],
+ ),
+ ])
+def test_remove_from_offline(service_type, placement, hosts, maintenance_hosts, offline_hosts, daemons, expected_add, expected_remove):
+
+ if service_type == 'ingress':
+ spec = \
+ IngressSpec(
+ service_type='ingress',
+ service_id='nfs-ha.foo',
+ frontend_port=443,
+ monitor_port=8888,
+ virtual_ip='10.0.0.20/8',
+ backend_service='nfs-ha.foo',
+ placement=placement,
+ )
+ else:
+ spec = \
+ ServiceSpec(
+ service_type=service_type,
+ service_id='test',
+ placement=placement,
+ )
+
+ host_specs = [HostSpec(h) for h in hosts]
+ for h in host_specs:
+ if h.hostname in offline_hosts:
+ h.status = 'offline'
+ if h.hostname in maintenance_hosts:
+ h.status = 'maintenance'
+
+ hosts, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=host_specs,
+ unreachable_hosts=[h for h in host_specs if h.status],
+ draining_hosts=[],
+ daemons=daemons,
+ ).place()
+ assert sorted([h.hostname for h in to_add]) in expected_add
+ assert sorted([h.name() for h in to_remove]) in expected_remove
+
+
+class DrainExplicitPlacementTest(NamedTuple):
+ service_type: str
+ placement: PlacementSpec
+ hosts: List[str]
+ maintenance_hosts: List[str]
+ offline_hosts: List[str]
+ draining_hosts: List[str]
+ daemons: List[DaemonDescription]
+ expected_add: List[List[str]]
+ expected_remove: List[List[str]]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,maintenance_hosts,offline_hosts,draining_hosts,daemons,expected_add,expected_remove",
+ [
+ DrainExplicitPlacementTest(
+ 'crash',
+ PlacementSpec(hosts='host1 host2 host3'.split()),
+ 'host1 host2 host3 host4'.split(),
+ [],
+ [],
+ ['host3'],
+ [
+ DaemonDescription('crash', 'host1', 'host1'),
+ DaemonDescription('crash', 'host2', 'host2'),
+ DaemonDescription('crash', 'host3', 'host3'),
+ ],
+ [[]],
+ [['crash.host3']],
+ ),
+ DrainExplicitPlacementTest(
+ 'crash',
+ PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+ 'host1 host2 host3 host4'.split(),
+ [],
+ [],
+ ['host1', 'host4'],
+ [
+ DaemonDescription('crash', 'host1', 'host1'),
+ DaemonDescription('crash', 'host3', 'host3'),
+ ],
+ [['host2']],
+ [['crash.host1']],
+ ),
+ ])
+def test_drain_from_explict_placement(service_type, placement, hosts, maintenance_hosts, offline_hosts, draining_hosts, daemons, expected_add, expected_remove):
+
+ spec = ServiceSpec(service_type=service_type,
+ service_id='test',
+ placement=placement)
+
+ host_specs = [HostSpec(h) for h in hosts]
+ draining_host_specs = [HostSpec(h) for h in draining_hosts]
+ for h in host_specs:
+ if h.hostname in offline_hosts:
+ h.status = 'offline'
+ if h.hostname in maintenance_hosts:
+ h.status = 'maintenance'
+
+ hosts, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=host_specs,
+ unreachable_hosts=[h for h in host_specs if h.status],
+ draining_hosts=draining_host_specs,
+ daemons=daemons,
+ ).place()
+ assert sorted([h.hostname for h in to_add]) in expected_add
+ assert sorted([h.name() for h in to_remove]) in expected_remove
diff --git a/src/pybind/mgr/cephadm/tests/test_service_discovery.py b/src/pybind/mgr/cephadm/tests/test_service_discovery.py
new file mode 100644
index 000000000..ff98a1388
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_service_discovery.py
@@ -0,0 +1,178 @@
+from unittest.mock import MagicMock
+from cephadm.service_discovery import Root
+
+
+class FakeDaemonDescription:
+ def __init__(self, ip, ports, hostname, service_name='', daemon_type=''):
+ self.ip = ip
+ self.ports = ports
+ self.hostname = hostname
+ self._service_name = service_name
+ self.daemon_type = daemon_type
+
+ def service_name(self):
+ return self._service_name
+
+
+class FakeCache:
+ def get_daemons_by_service(self, service_type):
+ if service_type == 'ceph-exporter':
+ return [FakeDaemonDescription('1.2.3.4', [9926], 'node0'),
+ FakeDaemonDescription('1.2.3.5', [9926], 'node1')]
+
+ return [FakeDaemonDescription('1.2.3.4', [9100], 'node0'),
+ FakeDaemonDescription('1.2.3.5', [9200], 'node1')]
+
+ def get_daemons_by_type(self, daemon_type):
+ return [FakeDaemonDescription('1.2.3.4', [9100], 'node0', 'ingress', 'haproxy'),
+ FakeDaemonDescription('1.2.3.5', [9200], 'node1', 'ingress', 'haproxy')]
+
+
+class FakeInventory:
+ def get_addr(self, name: str):
+ return '1.2.3.4'
+
+
+class FakeServiceSpec:
+ def __init__(self, port):
+ self.monitor_port = port
+
+
+class FakeSpecDescription:
+ def __init__(self, port):
+ self.spec = FakeServiceSpec(port)
+
+
+class FakeSpecStore():
+ def __init__(self, mgr):
+ self.mgr = mgr
+ self._specs = {'ingress': FakeSpecDescription(9049)}
+
+ def __contains__(self, name):
+ return name in self._specs
+
+ def __getitem__(self, name):
+ return self._specs['ingress']
+
+
+class FakeMgr:
+ def __init__(self):
+ self.config = ''
+ self.check_mon_command = MagicMock(side_effect=self._check_mon_command)
+ self.mon_command = MagicMock(side_effect=self._check_mon_command)
+ self.template = MagicMock()
+ self.log = MagicMock()
+ self.inventory = FakeInventory()
+ self.cache = FakeCache()
+ self.spec_store = FakeSpecStore(self)
+
+ def get_mgr_id(self):
+ return 'mgr-1'
+
+ def list_servers(self):
+
+ servers = [
+ {'hostname': 'node0',
+ 'ceph_version': '16.2',
+ 'services': [{'type': 'mgr', 'id': 'mgr-1'}, {'type': 'mon'}]},
+ {'hostname': 'node1',
+ 'ceph_version': '16.2',
+ 'services': [{'type': 'mgr', 'id': 'mgr-2'}, {'type': 'mon'}]}
+ ]
+
+ return servers
+
+ def _check_mon_command(self, cmd_dict, inbuf=None):
+ prefix = cmd_dict.get('prefix')
+ if prefix == 'get-cmd':
+ return 0, self.config, ''
+ if prefix == 'set-cmd':
+ self.config = cmd_dict.get('value')
+ return 0, 'value set', ''
+ return -1, '', 'error'
+
+ def get_module_option_ex(self, module, option, default_value):
+ return "9283"
+
+
+class TestServiceDiscovery:
+
+ def test_get_sd_config_prometheus(self):
+ mgr = FakeMgr()
+ root = Root(mgr, 5000, '0.0.0.0')
+ cfg = root.get_sd_config('mgr-prometheus')
+
+ # check response structure
+ assert cfg
+ for entry in cfg:
+ assert 'labels' in entry
+ assert 'targets' in entry
+
+ # check content
+ assert cfg[0]['targets'] == ['node0:9283']
+
+ def test_get_sd_config_node_exporter(self):
+ mgr = FakeMgr()
+ root = Root(mgr, 5000, '0.0.0.0')
+ cfg = root.get_sd_config('node-exporter')
+
+ # check response structure
+ assert cfg
+ for entry in cfg:
+ assert 'labels' in entry
+ assert 'targets' in entry
+
+ # check content
+ assert cfg[0]['targets'] == ['1.2.3.4:9100']
+ assert cfg[0]['labels'] == {'instance': 'node0'}
+ assert cfg[1]['targets'] == ['1.2.3.5:9200']
+ assert cfg[1]['labels'] == {'instance': 'node1'}
+
+ def test_get_sd_config_alertmgr(self):
+ mgr = FakeMgr()
+ root = Root(mgr, 5000, '0.0.0.0')
+ cfg = root.get_sd_config('alertmanager')
+
+ # check response structure
+ assert cfg
+ for entry in cfg:
+ assert 'labels' in entry
+ assert 'targets' in entry
+
+ # check content
+ assert cfg[0]['targets'] == ['1.2.3.4:9100', '1.2.3.5:9200']
+
+ def test_get_sd_config_haproxy(self):
+ mgr = FakeMgr()
+ root = Root(mgr, 5000, '0.0.0.0')
+ cfg = root.get_sd_config('haproxy')
+
+ # check response structure
+ assert cfg
+ for entry in cfg:
+ assert 'labels' in entry
+ assert 'targets' in entry
+
+ # check content
+ assert cfg[0]['targets'] == ['1.2.3.4:9049']
+ assert cfg[0]['labels'] == {'instance': 'ingress'}
+
+ def test_get_sd_config_ceph_exporter(self):
+ mgr = FakeMgr()
+ root = Root(mgr, 5000, '0.0.0.0')
+ cfg = root.get_sd_config('ceph-exporter')
+
+ # check response structure
+ assert cfg
+ for entry in cfg:
+ assert 'labels' in entry
+ assert 'targets' in entry
+
+ # check content
+ assert cfg[0]['targets'] == ['1.2.3.4:9926']
+
+ def test_get_sd_config_invalid_service(self):
+ mgr = FakeMgr()
+ root = Root(mgr, 5000, '0.0.0.0')
+ cfg = root.get_sd_config('invalid-service')
+ assert cfg == []
diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py
new file mode 100644
index 000000000..2300b288d
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_services.py
@@ -0,0 +1,2725 @@
+from textwrap import dedent
+import json
+import urllib.parse
+import yaml
+from mgr_util import build_url
+
+import pytest
+
+from unittest.mock import MagicMock, call, patch, ANY
+
+from cephadm.serve import CephadmServe
+from cephadm.services.cephadmservice import MonService, MgrService, MdsService, RgwService, \
+ RbdMirrorService, CrashService, CephadmDaemonDeploySpec
+from cephadm.services.iscsi import IscsiService
+from cephadm.services.nfs import NFSService
+from cephadm.services.nvmeof import NvmeofService
+from cephadm.services.osd import OSDService
+from cephadm.services.monitoring import GrafanaService, AlertmanagerService, PrometheusService, \
+ NodeExporterService, LokiService, PromtailService
+from cephadm.module import CephadmOrchestrator
+from ceph.deployment.service_spec import IscsiServiceSpec, MonitoringSpec, AlertManagerSpec, \
+ ServiceSpec, RGWSpec, GrafanaSpec, SNMPGatewaySpec, IngressSpec, PlacementSpec, TracingSpec, \
+ PrometheusSpec, CephExporterSpec, NFSServiceSpec, NvmeofServiceSpec
+from cephadm.tests.fixtures import with_host, with_service, _run_cephadm, async_side_effect
+
+from ceph.utils import datetime_now
+
+from orchestrator import OrchestratorError
+from orchestrator._interface import DaemonDescription
+
+from typing import Dict, List
+
+grafana_cert = """-----BEGIN CERTIFICATE-----\nMIICxjCCAa4CEQDIZSujNBlKaLJzmvntjukjMA0GCSqGSIb3DQEBDQUAMCExDTAL\nBgNVBAoMBENlcGgxEDAOBgNVBAMMB2NlcGhhZG0wHhcNMjIwNzEzMTE0NzA3WhcN\nMzIwNzEwMTE0NzA3WjAhMQ0wCwYDVQQKDARDZXBoMRAwDgYDVQQDDAdjZXBoYWRt\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyyMe4DMA+MeYK7BHZMHB\nq7zjliEOcNgxomjU8qbf5USF7Mqrf6+/87XWqj4pCyAW8x0WXEr6A56a+cmBVmt+\nqtWDzl020aoId6lL5EgLLn6/kMDCCJLq++Lg9cEofMSvcZh+lY2f+1p+C+00xent\nrLXvXGOilAZWaQfojT2BpRnNWWIFbpFwlcKrlg2G0cFjV5c1m6a0wpsQ9JHOieq0\nSvwCixajwq3CwAYuuiU1wjI4oJO4Io1+g8yB3nH2Mo/25SApCxMXuXh4kHLQr/T4\n4hqisvG4uJYgKMcSIrWj5o25mclByGi1UI/kZkCUES94i7Z/3ihx4Bad0AMs/9tw\nFwIDAQABMA0GCSqGSIb3DQEBDQUAA4IBAQAf+pwz7Gd7mDwU2LY0TQXsK6/8KGzh\nHuX+ErOb8h5cOAbvCnHjyJFWf6gCITG98k9nxU9NToG0WYuNm/max1y/54f0dtxZ\npUo6KSNl3w6iYCfGOeUIj8isi06xMmeTgMNzv8DYhDt+P2igN6LenqWTVztogkiV\nxQ5ZJFFLEw4sN0CXnrZX3t5ruakxLXLTLKeE0I91YJvjClSBGkVJq26wOKQNHMhx\npWxeydQ5EgPZY+Aviz5Dnxe8aB7oSSovpXByzxURSabOuCK21awW5WJCGNpmqhWK\nZzACBDEstccj57c4OGV0eayHJRsluVr2e9NHRINZA3qdB37e6gsI1xHo\n-----END CERTIFICATE-----\n"""
+
+grafana_key = """-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDLIx7gMwD4x5gr\nsEdkwcGrvOOWIQ5w2DGiaNTypt/lRIXsyqt/r7/ztdaqPikLIBbzHRZcSvoDnpr5\nyYFWa36q1YPOXTbRqgh3qUvkSAsufr+QwMIIkur74uD1wSh8xK9xmH6VjZ/7Wn4L\n7TTF6e2ste9cY6KUBlZpB+iNPYGlGc1ZYgVukXCVwquWDYbRwWNXlzWbprTCmxD0\nkc6J6rRK/AKLFqPCrcLABi66JTXCMjigk7gijX6DzIHecfYyj/blICkLExe5eHiQ\nctCv9PjiGqKy8bi4liAoxxIitaPmjbmZyUHIaLVQj+RmQJQRL3iLtn/eKHHgFp3Q\nAyz/23AXAgMBAAECggEAVoTB3Mm8azlPlaQB9GcV3tiXslSn+uYJ1duCf0sV52dV\nBzKW8s5fGiTjpiTNhGCJhchowqxoaew+o47wmGc2TvqbpeRLuecKrjScD0GkCYyQ\neM2wlshEbz4FhIZdgS6gbuh9WaM1dW/oaZoBNR5aTYo7xYTmNNeyLA/jO2zr7+4W\n5yES1lMSBXpKk7bDGKYY4bsX2b5RLr2Grh2u2bp7hoLABCEvuu8tSQdWXLEXWpXo\njwmV3hc6tabypIa0mj2Dmn2Dmt1ppSO0AZWG/WAizN3f4Z0r/u9HnbVrVmh0IEDw\n3uf2LP5o3msG9qKCbzv3lMgt9mMr70HOKnJ8ohMSKQKBgQDLkNb+0nr152HU9AeJ\nvdz8BeMxcwxCG77iwZphZ1HprmYKvvXgedqWtS6FRU+nV6UuQoPUbQxJBQzrN1Qv\nwKSlOAPCrTJgNgF/RbfxZTrIgCPuK2KM8I89VZv92TSGi362oQA4MazXC8RAWjoJ\nSu1/PHzK3aXOfVNSLrOWvIYeZQKBgQD/dgT6RUXKg0UhmXj7ExevV+c7oOJTDlMl\nvLngrmbjRgPO9VxLnZQGdyaBJeRngU/UXfNgajT/MU8B5fSKInnTMawv/tW7634B\nw3v6n5kNIMIjJmENRsXBVMllDTkT9S7ApV+VoGnXRccbTiDapBThSGd0wri/CuwK\nNWK1YFOeywKBgEDyI/XG114PBUJ43NLQVWm+wx5qszWAPqV/2S5MVXD1qC6zgCSv\nG9NLWN1CIMimCNg6dm7Wn73IM7fzvhNCJgVkWqbItTLG6DFf3/DPODLx1wTMqLOI\nqFqMLqmNm9l1Nec0dKp5BsjRQzq4zp1aX21hsfrTPmwjxeqJZdioqy2VAoGAXR5X\nCCdSHlSlUW8RE2xNOOQw7KJjfWT+WAYoN0c7R+MQplL31rRU7dpm1bLLRBN11vJ8\nMYvlT5RYuVdqQSP6BkrX+hLJNBvOLbRlL+EXOBrVyVxHCkDe+u7+DnC4epbn+N8P\nLYpwqkDMKB7diPVAizIKTBxinXjMu5fkKDs5n+sCgYBbZheYKk5M0sIxiDfZuXGB\nkf4mJdEkTI1KUGRdCwO/O7hXbroGoUVJTwqBLi1tKqLLarwCITje2T200BYOzj82\nqwRkCXGtXPKnxYEEUOiFx9OeDrzsZV00cxsEnX0Zdj+PucQ/J3Cvd0dWUspJfLHJ\n39gnaegswnz9KMQAvzKFdg==\n-----END PRIVATE KEY-----\n"""
+
+
+class FakeInventory:
+ def get_addr(self, name: str) -> str:
+ return '1.2.3.4'
+
+
+class FakeMgr:
+ def __init__(self):
+ self.config = ''
+ self.set_mon_crush_locations: Dict[str, List[str]] = {}
+ self.check_mon_command = MagicMock(side_effect=self._check_mon_command)
+ self.mon_command = MagicMock(side_effect=self._check_mon_command)
+ self.template = MagicMock()
+ self.log = MagicMock()
+ self.inventory = FakeInventory()
+
+ def _check_mon_command(self, cmd_dict, inbuf=None):
+ prefix = cmd_dict.get('prefix')
+ if prefix == 'get-cmd':
+ return 0, self.config, ''
+ if prefix == 'set-cmd':
+ self.config = cmd_dict.get('value')
+ return 0, 'value set', ''
+ if prefix in ['auth get']:
+ return 0, '[foo]\nkeyring = asdf\n', ''
+ if prefix == 'quorum_status':
+ # actual quorum status output from testing
+ # note in this output all of the mons have blank crush locations
+ return 0, """{"election_epoch": 14, "quorum": [0, 1, 2], "quorum_names": ["vm-00", "vm-01", "vm-02"], "quorum_leader_name": "vm-00", "quorum_age": 101, "features": {"quorum_con": "4540138322906710015", "quorum_mon": ["kraken", "luminous", "mimic", "osdmap-prune", "nautilus", "octopus", "pacific", "elector-pinging", "quincy", "reef"]}, "monmap": {"epoch": 3, "fsid": "9863e1b8-6f24-11ed-8ad8-525400c13ad2", "modified": "2022-11-28T14:00:29.972488Z", "created": "2022-11-28T13:57:55.847497Z", "min_mon_release": 18, "min_mon_release_name": "reef", "election_strategy": 1, "disallowed_leaders: ": "", "stretch_mode": false, "tiebreaker_mon": "", "features": {"persistent": ["kraken", "luminous", "mimic", "osdmap-prune", "nautilus", "octopus", "pacific", "elector-pinging", "quincy", "reef"], "optional": []}, "mons": [{"rank": 0, "name": "vm-00", "public_addrs": {"addrvec": [{"type": "v2", "addr": "192.168.122.61:3300", "nonce": 0}, {"type": "v1", "addr": "192.168.122.61:6789", "nonce": 0}]}, "addr": "192.168.122.61:6789/0", "public_addr": "192.168.122.61:6789/0", "priority": 0, "weight": 0, "crush_location": "{}"}, {"rank": 1, "name": "vm-01", "public_addrs": {"addrvec": [{"type": "v2", "addr": "192.168.122.63:3300", "nonce": 0}, {"type": "v1", "addr": "192.168.122.63:6789", "nonce": 0}]}, "addr": "192.168.122.63:6789/0", "public_addr": "192.168.122.63:6789/0", "priority": 0, "weight": 0, "crush_location": "{}"}, {"rank": 2, "name": "vm-02", "public_addrs": {"addrvec": [{"type": "v2", "addr": "192.168.122.82:3300", "nonce": 0}, {"type": "v1", "addr": "192.168.122.82:6789", "nonce": 0}]}, "addr": "192.168.122.82:6789/0", "public_addr": "192.168.122.82:6789/0", "priority": 0, "weight": 0, "crush_location": "{}"}]}}""", ''
+ if prefix == 'mon set_location':
+ self.set_mon_crush_locations[cmd_dict.get('name')] = cmd_dict.get('args')
+ return 0, '', ''
+ return -1, '', 'error'
+
+ def get_minimal_ceph_conf(self) -> str:
+ return ''
+
+ def get_mgr_ip(self) -> str:
+ return '1.2.3.4'
+
+
+class TestCephadmService:
+ def test_set_service_url_on_dashboard(self):
+ # pylint: disable=protected-access
+ mgr = FakeMgr()
+ service_url = 'http://svc:1000'
+ service = GrafanaService(mgr)
+ service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url)
+ assert mgr.config == service_url
+
+ # set-cmd should not be called if value doesn't change
+ mgr.check_mon_command.reset_mock()
+ service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url)
+ mgr.check_mon_command.assert_called_once_with({'prefix': 'get-cmd'})
+
+ def _get_services(self, mgr):
+ # services:
+ osd_service = OSDService(mgr)
+ nfs_service = NFSService(mgr)
+ mon_service = MonService(mgr)
+ mgr_service = MgrService(mgr)
+ mds_service = MdsService(mgr)
+ rgw_service = RgwService(mgr)
+ rbd_mirror_service = RbdMirrorService(mgr)
+ grafana_service = GrafanaService(mgr)
+ alertmanager_service = AlertmanagerService(mgr)
+ prometheus_service = PrometheusService(mgr)
+ node_exporter_service = NodeExporterService(mgr)
+ loki_service = LokiService(mgr)
+ promtail_service = PromtailService(mgr)
+ crash_service = CrashService(mgr)
+ iscsi_service = IscsiService(mgr)
+ nvmeof_service = NvmeofService(mgr)
+ cephadm_services = {
+ 'mon': mon_service,
+ 'mgr': mgr_service,
+ 'osd': osd_service,
+ 'mds': mds_service,
+ 'rgw': rgw_service,
+ 'rbd-mirror': rbd_mirror_service,
+ 'nfs': nfs_service,
+ 'grafana': grafana_service,
+ 'alertmanager': alertmanager_service,
+ 'prometheus': prometheus_service,
+ 'node-exporter': node_exporter_service,
+ 'loki': loki_service,
+ 'promtail': promtail_service,
+ 'crash': crash_service,
+ 'iscsi': iscsi_service,
+ 'nvmeof': nvmeof_service,
+ }
+ return cephadm_services
+
+ def test_get_auth_entity(self):
+ mgr = FakeMgr()
+ cephadm_services = self._get_services(mgr)
+
+ for daemon_type in ['rgw', 'rbd-mirror', 'nfs', "iscsi"]:
+ assert "client.%s.id1" % (daemon_type) == \
+ cephadm_services[daemon_type].get_auth_entity("id1", "host")
+ assert "client.%s.id1" % (daemon_type) == \
+ cephadm_services[daemon_type].get_auth_entity("id1", "")
+ assert "client.%s.id1" % (daemon_type) == \
+ cephadm_services[daemon_type].get_auth_entity("id1")
+
+ assert "client.crash.host" == \
+ cephadm_services["crash"].get_auth_entity("id1", "host")
+ with pytest.raises(OrchestratorError):
+ cephadm_services["crash"].get_auth_entity("id1", "")
+ cephadm_services["crash"].get_auth_entity("id1")
+
+ assert "mon." == cephadm_services["mon"].get_auth_entity("id1", "host")
+ assert "mon." == cephadm_services["mon"].get_auth_entity("id1", "")
+ assert "mon." == cephadm_services["mon"].get_auth_entity("id1")
+
+ assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1", "host")
+ assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1", "")
+ assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1")
+
+ for daemon_type in ["osd", "mds"]:
+ assert "%s.id1" % daemon_type == \
+ cephadm_services[daemon_type].get_auth_entity("id1", "host")
+ assert "%s.id1" % daemon_type == \
+ cephadm_services[daemon_type].get_auth_entity("id1", "")
+ assert "%s.id1" % daemon_type == \
+ cephadm_services[daemon_type].get_auth_entity("id1")
+
+ # services based on CephadmService shouldn't have get_auth_entity
+ with pytest.raises(AttributeError):
+ for daemon_type in ['grafana', 'alertmanager', 'prometheus', 'node-exporter', 'loki', 'promtail']:
+ cephadm_services[daemon_type].get_auth_entity("id1", "host")
+ cephadm_services[daemon_type].get_auth_entity("id1", "")
+ cephadm_services[daemon_type].get_auth_entity("id1")
+
+
+class TestISCSIService:
+
+ mgr = FakeMgr()
+ iscsi_service = IscsiService(mgr)
+
+ iscsi_spec = IscsiServiceSpec(service_type='iscsi', service_id="a")
+ iscsi_spec.daemon_type = "iscsi"
+ iscsi_spec.daemon_id = "a"
+ iscsi_spec.spec = MagicMock()
+ iscsi_spec.spec.daemon_type = "iscsi"
+ iscsi_spec.spec.ssl_cert = ''
+ iscsi_spec.api_user = "user"
+ iscsi_spec.api_password = "password"
+ iscsi_spec.api_port = 5000
+ iscsi_spec.api_secure = False
+ iscsi_spec.ssl_cert = "cert"
+ iscsi_spec.ssl_key = "key"
+
+ mgr.spec_store = MagicMock()
+ mgr.spec_store.all_specs.get.return_value = iscsi_spec
+
+ def test_iscsi_client_caps(self):
+
+ iscsi_daemon_spec = CephadmDaemonDeploySpec(
+ host='host', daemon_id='a', service_name=self.iscsi_spec.service_name())
+
+ self.iscsi_service.prepare_create(iscsi_daemon_spec)
+
+ expected_caps = ['mon',
+ 'profile rbd, allow command "osd blocklist", allow command "config-key get" with "key" prefix "iscsi/"',
+ 'mgr', 'allow command "service status"',
+ 'osd', 'allow rwx']
+
+ expected_call = call({'prefix': 'auth get-or-create',
+ 'entity': 'client.iscsi.a',
+ 'caps': expected_caps})
+ expected_call2 = call({'prefix': 'auth caps',
+ 'entity': 'client.iscsi.a',
+ 'caps': expected_caps})
+ expected_call3 = call({'prefix': 'auth get',
+ 'entity': 'client.iscsi.a'})
+
+ assert expected_call in self.mgr.mon_command.mock_calls
+ assert expected_call2 in self.mgr.mon_command.mock_calls
+ assert expected_call3 in self.mgr.mon_command.mock_calls
+
+ @patch('cephadm.utils.resolve_ip')
+ def test_iscsi_dashboard_config(self, mock_resolve_ip):
+
+ self.mgr.check_mon_command = MagicMock()
+ self.mgr.check_mon_command.return_value = ('', '{"gateways": {}}', '')
+
+ # Case 1: use IPV4 address
+ id1 = DaemonDescription(daemon_type='iscsi', hostname="testhost1",
+ daemon_id="a", ip='192.168.1.1')
+ daemon_list = [id1]
+ mock_resolve_ip.return_value = '192.168.1.1'
+
+ self.iscsi_service.config_dashboard(daemon_list)
+
+ dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add',
+ 'name': 'testhost1'},
+ 'http://user:password@192.168.1.1:5000')
+
+ assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls
+
+ # Case 2: use IPV6 address
+ self.mgr.check_mon_command.reset_mock()
+
+ id1 = DaemonDescription(daemon_type='iscsi', hostname="testhost1",
+ daemon_id="a", ip='FEDC:BA98:7654:3210:FEDC:BA98:7654:3210')
+ mock_resolve_ip.return_value = 'FEDC:BA98:7654:3210:FEDC:BA98:7654:3210'
+
+ self.iscsi_service.config_dashboard(daemon_list)
+
+ dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add',
+ 'name': 'testhost1'},
+ 'http://user:password@[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:5000')
+
+ assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls
+
+ # Case 3: IPV6 Address . Secure protocol
+ self.mgr.check_mon_command.reset_mock()
+
+ self.iscsi_spec.api_secure = True
+
+ self.iscsi_service.config_dashboard(daemon_list)
+
+ dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add',
+ 'name': 'testhost1'},
+ 'https://user:password@[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:5000')
+
+ assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ @patch("cephadm.module.CephadmOrchestrator.get_unique_name")
+ @patch("cephadm.services.iscsi.IscsiService.get_trusted_ips")
+ def test_iscsi_config(self, _get_trusted_ips, _get_name, _run_cephadm, cephadm_module: CephadmOrchestrator):
+
+ iscsi_daemon_id = 'testpool.test.qwert'
+ trusted_ips = '1.1.1.1,2.2.2.2'
+ api_port = 3456
+ api_user = 'test-user'
+ api_password = 'test-password'
+ pool = 'testpool'
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ _get_name.return_value = iscsi_daemon_id
+ _get_trusted_ips.return_value = trusted_ips
+
+ iscsi_gateway_conf = f"""# This file is generated by cephadm.
+[config]
+cluster_client_name = client.iscsi.{iscsi_daemon_id}
+pool = {pool}
+trusted_ip_list = {trusted_ips}
+minimum_gateways = 1
+api_port = {api_port}
+api_user = {api_user}
+api_password = {api_password}
+api_secure = False
+log_to_stderr = True
+log_to_stderr_prefix = debug
+log_to_file = False"""
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, IscsiServiceSpec(service_id=pool,
+ api_port=api_port,
+ api_user=api_user,
+ api_password=api_password,
+ pool=pool,
+ trusted_ip_list=trusted_ips)):
+ _run_cephadm.assert_called_with(
+ 'test',
+ f'iscsi.{iscsi_daemon_id}',
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": f'iscsi.{iscsi_daemon_id}',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [api_port],
+ },
+ "meta": {
+ 'service_name': f'iscsi.{pool}',
+ 'ports': [api_port],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": {
+ "config": "",
+ "keyring": f"[client.iscsi.{iscsi_daemon_id}]\nkey = None\n",
+ "files": {
+ "iscsi-gateway.cfg": iscsi_gateway_conf,
+ },
+ }
+ }),
+ )
+
+
+class TestNVMEOFService:
+
+ mgr = FakeMgr()
+ nvmeof_service = NvmeofService(mgr)
+
+ nvmeof_spec = NvmeofServiceSpec(service_type='nvmeof', service_id="a")
+ nvmeof_spec.daemon_type = 'nvmeof'
+ nvmeof_spec.daemon_id = "a"
+ nvmeof_spec.spec = MagicMock()
+ nvmeof_spec.spec.daemon_type = 'nvmeof'
+
+ mgr.spec_store = MagicMock()
+ mgr.spec_store.all_specs.get.return_value = nvmeof_spec
+
+ def test_nvmeof_client_caps(self):
+ pass
+
+ @patch('cephadm.utils.resolve_ip')
+ def test_nvmeof_dashboard_config(self, mock_resolve_ip):
+ pass
+
+ @patch("cephadm.inventory.Inventory.get_addr", lambda _, __: '192.168.100.100')
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ @patch("cephadm.module.CephadmOrchestrator.get_unique_name")
+ def test_nvmeof_config(self, _get_name, _run_cephadm, cephadm_module: CephadmOrchestrator):
+
+ nvmeof_daemon_id = 'testpool.test.qwert'
+ pool = 'testpool'
+ tgt_cmd_extra_args = '--cpumask=0xFF --msg-mempool-size=524288'
+ default_port = 5500
+ group = 'mygroup'
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ _get_name.return_value = nvmeof_daemon_id
+
+ nvmeof_gateway_conf = f"""# This file is generated by cephadm.
+[gateway]
+name = client.nvmeof.{nvmeof_daemon_id}
+group = {group}
+addr = 192.168.100.100
+port = {default_port}
+enable_auth = False
+state_update_notify = True
+state_update_interval_sec = 5
+
+[ceph]
+pool = {pool}
+config_file = /etc/ceph/ceph.conf
+id = nvmeof.{nvmeof_daemon_id}
+
+[mtls]
+server_key = ./server.key
+client_key = ./client.key
+server_cert = ./server.crt
+client_cert = ./client.crt
+
+[spdk]
+tgt_path = /usr/local/bin/nvmf_tgt
+rpc_socket = /var/tmp/spdk.sock
+timeout = 60
+log_level = WARN
+conn_retries = 10
+transports = tcp
+transport_tcp_options = {{"in_capsule_data_size": 8192, "max_io_qpairs_per_ctrlr": 7}}
+tgt_cmd_extra_args = {tgt_cmd_extra_args}\n"""
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, NvmeofServiceSpec(service_id=pool,
+ tgt_cmd_extra_args=tgt_cmd_extra_args,
+ group=group,
+ pool=pool)):
+ _run_cephadm.assert_called_with(
+ 'test',
+ f'nvmeof.{nvmeof_daemon_id}',
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": "nvmeof.testpool.test.qwert",
+ "image": "",
+ "deploy_arguments": [],
+ "params": {
+ "tcp_ports": [5500, 4420, 8009]
+ },
+ "meta": {
+ "service_name": "nvmeof.testpool",
+ "ports": [5500, 4420, 8009],
+ "ip": None,
+ "deployed_by": [],
+ "rank": None,
+ "rank_generation": None,
+ "extra_container_args": None,
+ "extra_entrypoint_args": None
+ },
+ "config_blobs": {
+ "config": "",
+ "keyring": "[client.nvmeof.testpool.test.qwert]\nkey = None\n",
+ "files": {
+ "ceph-nvmeof.conf": nvmeof_gateway_conf
+ }
+ }
+ }),
+ )
+
+
+class TestMonitoring:
+ def _get_config(self, url: str) -> str:
+
+ return f"""
+ # This file is generated by cephadm.
+ # See https://prometheus.io/docs/alerting/configuration/ for documentation.
+
+ global:
+ resolve_timeout: 5m
+ http_config:
+ tls_config:
+ insecure_skip_verify: true
+
+ route:
+ receiver: 'default'
+ routes:
+ - group_by: ['alertname']
+ group_wait: 10s
+ group_interval: 10s
+ repeat_interval: 1h
+ receiver: 'ceph-dashboard'
+
+ receivers:
+ - name: 'default'
+ webhook_configs:
+ - name: 'ceph-dashboard'
+ webhook_configs:
+ - url: '{url}/api/prometheus_receiver'
+ """
+
+ @pytest.mark.parametrize(
+ "dashboard_url,expected_yaml_url",
+ [
+ # loopback address
+ ("http://[::1]:8080", "http://localhost:8080"),
+ # IPv6
+ (
+ "http://[2001:db8:4321:0000:0000:0000:0000:0000]:8080",
+ "http://[2001:db8:4321:0000:0000:0000:0000:0000]:8080",
+ ),
+ # IPv6 to FQDN
+ (
+ "http://[2001:db8:4321:0000:0000:0000:0000:0000]:8080",
+ "http://mgr.fqdn.test:8080",
+ ),
+ # IPv4
+ (
+ "http://192.168.0.123:8080",
+ "http://192.168.0.123:8080",
+ ),
+ # IPv4 to FQDN
+ (
+ "http://192.168.0.123:8080",
+ "http://mgr.fqdn.test:8080",
+ ),
+ ],
+ )
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ @patch("mgr_module.MgrModule.get")
+ @patch("socket.getfqdn")
+ def test_alertmanager_config(
+ self,
+ mock_getfqdn,
+ mock_get,
+ _run_cephadm,
+ cephadm_module: CephadmOrchestrator,
+ dashboard_url,
+ expected_yaml_url,
+ ):
+ _run_cephadm.side_effect = async_side_effect(("{}", "", 0))
+ mock_get.return_value = {"services": {"dashboard": dashboard_url}}
+ purl = urllib.parse.urlparse(expected_yaml_url)
+ mock_getfqdn.return_value = purl.hostname
+
+ with with_host(cephadm_module, "test"):
+ with with_service(cephadm_module, AlertManagerSpec()):
+ y = dedent(self._get_config(expected_yaml_url)).lstrip()
+ _run_cephadm.assert_called_with(
+ 'test',
+ "alertmanager.test",
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": 'alertmanager.test',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [9093, 9094],
+ },
+ "meta": {
+ 'service_name': 'alertmanager',
+ 'ports': [9093, 9094],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": {
+ "files": {
+ "alertmanager.yml": y,
+ },
+ "peers": [],
+ }
+ }),
+ )
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ @patch("socket.getfqdn")
+ @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
+ @patch("cephadm.services.monitoring.password_hash", lambda password: 'alertmanager_password_hash')
+ def test_alertmanager_config_security_enabled(self, _get_fqdn, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ fqdn = 'host1.test'
+ _get_fqdn.return_value = fqdn
+
+ def gen_cert(host, addr):
+ return ('mycert', 'mykey')
+
+ def get_root_cert():
+ return 'my_root_cert'
+
+ with with_host(cephadm_module, 'test'):
+ cephadm_module.secure_monitoring_stack = True
+ cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user')
+ cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password')
+ cephadm_module.http_server.service_discovery.ssl_certs.generate_cert = MagicMock(side_effect=gen_cert)
+ cephadm_module.http_server.service_discovery.ssl_certs.get_root_cert = MagicMock(side_effect=get_root_cert)
+ with with_service(cephadm_module, AlertManagerSpec()):
+
+ y = dedent(f"""
+ # This file is generated by cephadm.
+ # See https://prometheus.io/docs/alerting/configuration/ for documentation.
+
+ global:
+ resolve_timeout: 5m
+ http_config:
+ tls_config:
+ ca_file: root_cert.pem
+
+ route:
+ receiver: 'default'
+ routes:
+ - group_by: ['alertname']
+ group_wait: 10s
+ group_interval: 10s
+ repeat_interval: 1h
+ receiver: 'ceph-dashboard'
+
+ receivers:
+ - name: 'default'
+ webhook_configs:
+ - name: 'ceph-dashboard'
+ webhook_configs:
+ - url: 'http://{fqdn}:8080/api/prometheus_receiver'
+ """).lstrip()
+
+ web_config = dedent("""
+ tls_server_config:
+ cert_file: alertmanager.crt
+ key_file: alertmanager.key
+ basic_auth_users:
+ alertmanager_user: alertmanager_password_hash""").lstrip()
+
+ _run_cephadm.assert_called_with(
+ 'test',
+ "alertmanager.test",
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": 'alertmanager.test',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [9093, 9094],
+ },
+ "meta": {
+ 'service_name': 'alertmanager',
+ 'ports': [9093, 9094],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": {
+ "files": {
+ "alertmanager.yml": y,
+ 'alertmanager.crt': 'mycert',
+ 'alertmanager.key': 'mykey',
+ 'web.yml': web_config,
+ 'root_cert.pem': 'my_root_cert'
+ },
+ 'peers': [],
+ 'web_config': '/etc/alertmanager/web.yml',
+ }
+ }),
+ )
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
+ def test_prometheus_config_security_disabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast')
+ with with_host(cephadm_module, 'test'):
+ # host "test" needs to have networks for keepalive to be placed
+ cephadm_module.cache.update_host_networks('test', {
+ '1.2.3.0/24': {
+ 'if0': ['1.2.3.1']
+ },
+ })
+ with with_service(cephadm_module, MonitoringSpec('node-exporter')) as _, \
+ with_service(cephadm_module, CephExporterSpec('ceph-exporter')) as _, \
+ with_service(cephadm_module, s) as _, \
+ with_service(cephadm_module, AlertManagerSpec('alertmanager')) as _, \
+ with_service(cephadm_module, IngressSpec(service_id='ingress',
+ frontend_port=8089,
+ monitor_port=8999,
+ monitor_user='admin',
+ monitor_password='12345',
+ keepalived_password='12345',
+ virtual_ip="1.2.3.4/32",
+ backend_service='rgw.foo')) as _, \
+ with_service(cephadm_module, PrometheusSpec('prometheus')) as _:
+
+ y = dedent("""
+ # This file is generated by cephadm.
+ global:
+ scrape_interval: 10s
+ evaluation_interval: 10s
+ rule_files:
+ - /etc/prometheus/alerting/*
+
+ alerting:
+ alertmanagers:
+ - scheme: http
+ http_sd_configs:
+ - url: http://[::1]:8765/sd/prometheus/sd-config?service=alertmanager
+
+ scrape_configs:
+ - job_name: 'ceph'
+ honor_labels: true
+ http_sd_configs:
+ - url: http://[::1]:8765/sd/prometheus/sd-config?service=mgr-prometheus
+
+ - job_name: 'node'
+ http_sd_configs:
+ - url: http://[::1]:8765/sd/prometheus/sd-config?service=node-exporter
+
+ - job_name: 'haproxy'
+ http_sd_configs:
+ - url: http://[::1]:8765/sd/prometheus/sd-config?service=haproxy
+
+ - job_name: 'ceph-exporter'
+ honor_labels: true
+ http_sd_configs:
+ - url: http://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter
+ """).lstrip()
+
+ _run_cephadm.assert_called_with(
+ 'test',
+ "prometheus.test",
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": 'prometheus.test',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [9095],
+ },
+ "meta": {
+ 'service_name': 'prometheus',
+ 'ports': [9095],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": {
+ "files": {
+ "prometheus.yml": y,
+ "/etc/prometheus/alerting/custom_alerts.yml": "",
+ },
+ 'retention_time': '15d',
+ 'retention_size': '0',
+ },
+ }),
+ )
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
+ @patch("cephadm.services.monitoring.password_hash", lambda password: 'prometheus_password_hash')
+ def test_prometheus_config_security_enabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast')
+
+ def gen_cert(host, addr):
+ return ('mycert', 'mykey')
+
+ with with_host(cephadm_module, 'test'):
+ cephadm_module.secure_monitoring_stack = True
+ cephadm_module.set_store(PrometheusService.USER_CFG_KEY, 'prometheus_user')
+ cephadm_module.set_store(PrometheusService.PASS_CFG_KEY, 'prometheus_plain_password')
+ cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user')
+ cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password')
+ cephadm_module.http_server.service_discovery.username = 'sd_user'
+ cephadm_module.http_server.service_discovery.password = 'sd_password'
+ cephadm_module.http_server.service_discovery.ssl_certs.generate_cert = MagicMock(
+ side_effect=gen_cert)
+ # host "test" needs to have networks for keepalive to be placed
+ cephadm_module.cache.update_host_networks('test', {
+ '1.2.3.0/24': {
+ 'if0': ['1.2.3.1']
+ },
+ })
+ with with_service(cephadm_module, MonitoringSpec('node-exporter')) as _, \
+ with_service(cephadm_module, s) as _, \
+ with_service(cephadm_module, AlertManagerSpec('alertmanager')) as _, \
+ with_service(cephadm_module, IngressSpec(service_id='ingress',
+ frontend_port=8089,
+ monitor_port=8999,
+ monitor_user='admin',
+ monitor_password='12345',
+ keepalived_password='12345',
+ virtual_ip="1.2.3.4/32",
+ backend_service='rgw.foo')) as _, \
+ with_service(cephadm_module, PrometheusSpec('prometheus')) as _:
+
+ web_config = dedent("""
+ tls_server_config:
+ cert_file: prometheus.crt
+ key_file: prometheus.key
+ basic_auth_users:
+ prometheus_user: prometheus_password_hash""").lstrip()
+
+ y = dedent("""
+ # This file is generated by cephadm.
+ global:
+ scrape_interval: 10s
+ evaluation_interval: 10s
+ rule_files:
+ - /etc/prometheus/alerting/*
+
+ alerting:
+ alertmanagers:
+ - scheme: https
+ basic_auth:
+ username: alertmanager_user
+ password: alertmanager_plain_password
+ tls_config:
+ ca_file: root_cert.pem
+ http_sd_configs:
+ - url: https://[::1]:8765/sd/prometheus/sd-config?service=alertmanager
+ basic_auth:
+ username: sd_user
+ password: sd_password
+ tls_config:
+ ca_file: root_cert.pem
+
+ scrape_configs:
+ - job_name: 'ceph'
+ scheme: https
+ tls_config:
+ ca_file: mgr_prometheus_cert.pem
+ honor_labels: true
+ http_sd_configs:
+ - url: https://[::1]:8765/sd/prometheus/sd-config?service=mgr-prometheus
+ basic_auth:
+ username: sd_user
+ password: sd_password
+ tls_config:
+ ca_file: root_cert.pem
+
+ - job_name: 'node'
+ scheme: https
+ tls_config:
+ ca_file: root_cert.pem
+ http_sd_configs:
+ - url: https://[::1]:8765/sd/prometheus/sd-config?service=node-exporter
+ basic_auth:
+ username: sd_user
+ password: sd_password
+ tls_config:
+ ca_file: root_cert.pem
+
+ - job_name: 'haproxy'
+ scheme: https
+ tls_config:
+ ca_file: root_cert.pem
+ http_sd_configs:
+ - url: https://[::1]:8765/sd/prometheus/sd-config?service=haproxy
+ basic_auth:
+ username: sd_user
+ password: sd_password
+ tls_config:
+ ca_file: root_cert.pem
+
+ - job_name: 'ceph-exporter'
+ honor_labels: true
+ scheme: https
+ tls_config:
+ ca_file: root_cert.pem
+ http_sd_configs:
+ - url: https://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter
+ basic_auth:
+ username: sd_user
+ password: sd_password
+ tls_config:
+ ca_file: root_cert.pem
+ """).lstrip()
+
+ _run_cephadm.assert_called_with(
+ 'test',
+ "prometheus.test",
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": 'prometheus.test',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [9095],
+ },
+ "meta": {
+ 'service_name': 'prometheus',
+ 'ports': [9095],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": {
+ 'files': {
+ 'prometheus.yml': y,
+ 'root_cert.pem': '',
+ 'mgr_prometheus_cert.pem': '',
+ 'web.yml': web_config,
+ 'prometheus.crt': 'mycert',
+ 'prometheus.key': 'mykey',
+ "/etc/prometheus/alerting/custom_alerts.yml": "",
+ },
+ 'retention_time': '15d',
+ 'retention_size': '0',
+ 'web_config': '/etc/prometheus/web.yml',
+ },
+ }),
+ )
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_loki_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, MonitoringSpec('loki')) as _:
+
+ y = dedent("""
+ # This file is generated by cephadm.
+ auth_enabled: false
+
+ server:
+ http_listen_port: 3100
+ grpc_listen_port: 8080
+
+ common:
+ path_prefix: /tmp/loki
+ storage:
+ filesystem:
+ chunks_directory: /tmp/loki/chunks
+ rules_directory: /tmp/loki/rules
+ replication_factor: 1
+ ring:
+ instance_addr: 127.0.0.1
+ kvstore:
+ store: inmemory
+
+ schema_config:
+ configs:
+ - from: 2020-10-24
+ store: boltdb-shipper
+ object_store: filesystem
+ schema: v11
+ index:
+ prefix: index_
+ period: 24h""").lstrip()
+
+ _run_cephadm.assert_called_with(
+ 'test',
+ "loki.test",
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": 'loki.test',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [3100],
+ },
+ "meta": {
+ 'service_name': 'loki',
+ 'ports': [3100],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": {
+ "files": {
+ "loki.yml": y
+ },
+ },
+ }),
+ )
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_promtail_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, ServiceSpec('mgr')) as _, \
+ with_service(cephadm_module, MonitoringSpec('promtail')) as _:
+
+ y = dedent("""
+ # This file is generated by cephadm.
+ server:
+ http_listen_port: 9080
+ grpc_listen_port: 0
+
+ positions:
+ filename: /tmp/positions.yaml
+
+ clients:
+ - url: http://:3100/loki/api/v1/push
+
+ scrape_configs:
+ - job_name: system
+ static_configs:
+ - labels:
+ job: Cluster Logs
+ __path__: /var/log/ceph/**/*.log""").lstrip()
+
+ _run_cephadm.assert_called_with(
+ 'test',
+ "promtail.test",
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": 'promtail.test',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [9080],
+ },
+ "meta": {
+ 'service_name': 'promtail',
+ 'ports': [9080],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": {
+ "files": {
+ "promtail.yml": y
+ },
+ },
+ }),
+ )
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4')
+ @patch("cephadm.services.monitoring.verify_tls", lambda *_: None)
+ def test_grafana_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(("{}", "", 0))
+
+ with with_host(cephadm_module, "test"):
+ cephadm_module.set_store("test/grafana_crt", grafana_cert)
+ cephadm_module.set_store("test/grafana_key", grafana_key)
+ with with_service(
+ cephadm_module, PrometheusSpec("prometheus")
+ ) as _, with_service(cephadm_module, ServiceSpec("mgr")) as _, with_service(
+ cephadm_module, GrafanaSpec("grafana")
+ ) as _:
+ files = {
+ 'grafana.ini': dedent("""
+ # This file is generated by cephadm.
+ [users]
+ default_theme = light
+ [auth.anonymous]
+ enabled = true
+ org_name = 'Main Org.'
+ org_role = 'Viewer'
+ [server]
+ domain = 'bootstrap.storage.lab'
+ protocol = https
+ cert_file = /etc/grafana/certs/cert_file
+ cert_key = /etc/grafana/certs/cert_key
+ http_port = 3000
+ http_addr =
+ [snapshots]
+ external_enabled = false
+ [security]
+ disable_initial_admin_creation = true
+ cookie_secure = true
+ cookie_samesite = none
+ allow_embedding = true""").lstrip(), # noqa: W291
+ 'provisioning/datasources/ceph-dashboard.yml': dedent("""
+ # This file is generated by cephadm.
+ apiVersion: 1
+
+ deleteDatasources:
+ - name: 'Dashboard1'
+ orgId: 1
+
+ datasources:
+ - name: 'Dashboard1'
+ type: 'prometheus'
+ access: 'proxy'
+ orgId: 1
+ url: 'http://[1::4]:9095'
+ basicAuth: false
+ isDefault: true
+ editable: false
+
+ - name: 'Loki'
+ type: 'loki'
+ access: 'proxy'
+ url: ''
+ basicAuth: false
+ isDefault: false
+ editable: false""").lstrip(),
+ 'certs/cert_file': dedent(f"""
+ # generated by cephadm\n{grafana_cert}""").lstrip(),
+ 'certs/cert_key': dedent(f"""
+ # generated by cephadm\n{grafana_key}""").lstrip(),
+ }
+
+ _run_cephadm.assert_called_with(
+ 'test',
+ "grafana.test",
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": 'grafana.test',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [3000],
+ },
+ "meta": {
+ 'service_name': 'grafana',
+ 'ports': [3000],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": {
+ "files": files,
+ },
+ }),
+ )
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_grafana_initial_admin_pw(self, cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, ServiceSpec('mgr')) as _, \
+ with_service(cephadm_module, GrafanaSpec(initial_admin_password='secure')):
+ out = cephadm_module.cephadm_services['grafana'].generate_config(
+ CephadmDaemonDeploySpec('test', 'daemon', 'grafana'))
+ assert out == (
+ {
+ 'files':
+ {
+ 'grafana.ini':
+ '# This file is generated by cephadm.\n'
+ '[users]\n'
+ ' default_theme = light\n'
+ '[auth.anonymous]\n'
+ ' enabled = true\n'
+ " org_name = 'Main Org.'\n"
+ " org_role = 'Viewer'\n"
+ '[server]\n'
+ " domain = 'bootstrap.storage.lab'\n"
+ ' protocol = https\n'
+ ' cert_file = /etc/grafana/certs/cert_file\n'
+ ' cert_key = /etc/grafana/certs/cert_key\n'
+ ' http_port = 3000\n'
+ ' http_addr = \n'
+ '[snapshots]\n'
+ ' external_enabled = false\n'
+ '[security]\n'
+ ' admin_user = admin\n'
+ ' admin_password = secure\n'
+ ' cookie_secure = true\n'
+ ' cookie_samesite = none\n'
+ ' allow_embedding = true',
+ 'provisioning/datasources/ceph-dashboard.yml':
+ "# This file is generated by cephadm.\n"
+ "apiVersion: 1\n\n"
+ 'deleteDatasources:\n\n'
+ 'datasources:\n\n'
+ " - name: 'Loki'\n"
+ " type: 'loki'\n"
+ " access: 'proxy'\n"
+ " url: ''\n"
+ ' basicAuth: false\n'
+ ' isDefault: false\n'
+ ' editable: false',
+ 'certs/cert_file': ANY,
+ 'certs/cert_key': ANY}}, ['secure_monitoring_stack:False'])
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_grafana_no_anon_access(self, cephadm_module: CephadmOrchestrator):
+ # with anonymous_access set to False, expecting the [auth.anonymous] section
+ # to not be present in the grafana config. Note that we require an initial_admin_password
+ # to be provided when anonymous_access is False
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, ServiceSpec('mgr')) as _, \
+ with_service(cephadm_module, GrafanaSpec(anonymous_access=False, initial_admin_password='secure')):
+ out = cephadm_module.cephadm_services['grafana'].generate_config(
+ CephadmDaemonDeploySpec('test', 'daemon', 'grafana'))
+ assert out == (
+ {
+ 'files':
+ {
+ 'grafana.ini':
+ '# This file is generated by cephadm.\n'
+ '[users]\n'
+ ' default_theme = light\n'
+ '[server]\n'
+ " domain = 'bootstrap.storage.lab'\n"
+ ' protocol = https\n'
+ ' cert_file = /etc/grafana/certs/cert_file\n'
+ ' cert_key = /etc/grafana/certs/cert_key\n'
+ ' http_port = 3000\n'
+ ' http_addr = \n'
+ '[snapshots]\n'
+ ' external_enabled = false\n'
+ '[security]\n'
+ ' admin_user = admin\n'
+ ' admin_password = secure\n'
+ ' cookie_secure = true\n'
+ ' cookie_samesite = none\n'
+ ' allow_embedding = true',
+ 'provisioning/datasources/ceph-dashboard.yml':
+ "# This file is generated by cephadm.\n"
+ "apiVersion: 1\n\n"
+ 'deleteDatasources:\n\n'
+ 'datasources:\n\n'
+ " - name: 'Loki'\n"
+ " type: 'loki'\n"
+ " access: 'proxy'\n"
+ " url: ''\n"
+ ' basicAuth: false\n'
+ ' isDefault: false\n'
+ ' editable: false',
+ 'certs/cert_file': ANY,
+ 'certs/cert_key': ANY}}, ['secure_monitoring_stack:False'])
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_monitoring_ports(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ with with_host(cephadm_module, 'test'):
+
+ yaml_str = """service_type: alertmanager
+service_name: alertmanager
+placement:
+ count: 1
+spec:
+ port: 4200
+"""
+ yaml_file = yaml.safe_load(yaml_str)
+ spec = ServiceSpec.from_json(yaml_file)
+
+ with patch("cephadm.services.monitoring.AlertmanagerService.generate_config", return_value=({}, [])):
+ with with_service(cephadm_module, spec):
+
+ CephadmServe(cephadm_module)._check_daemons()
+
+ _run_cephadm.assert_called_with(
+ 'test',
+ "alertmanager.test",
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": 'alertmanager.test',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [4200, 9094],
+ 'reconfig': True,
+ },
+ "meta": {
+ 'service_name': 'alertmanager',
+ 'ports': [4200, 9094],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": {},
+ }),
+ )
+
+
+class TestRGWService:
+
+ @pytest.mark.parametrize(
+ "frontend, ssl, extra_args, expected",
+ [
+ ('beast', False, ['tcp_nodelay=1'],
+ 'beast endpoint=[fd00:fd00:fd00:3000::1]:80 tcp_nodelay=1'),
+ ('beast', True, ['tcp_nodelay=0', 'max_header_size=65536'],
+ 'beast ssl_endpoint=[fd00:fd00:fd00:3000::1]:443 ssl_certificate=config://rgw/cert/rgw.foo tcp_nodelay=0 max_header_size=65536'),
+ ('civetweb', False, [], 'civetweb port=[fd00:fd00:fd00:3000::1]:80'),
+ ('civetweb', True, None,
+ 'civetweb port=[fd00:fd00:fd00:3000::1]:443s ssl_certificate=config://rgw/cert/rgw.foo'),
+ ]
+ )
+ @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_rgw_update(self, frontend, ssl, extra_args, expected, cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1'):
+ cephadm_module.cache.update_host_networks('host1', {
+ 'fd00:fd00:fd00:3000::/64': {
+ 'if0': ['fd00:fd00:fd00:3000::1']
+ }
+ })
+ s = RGWSpec(service_id="foo",
+ networks=['fd00:fd00:fd00:3000::/64'],
+ ssl=ssl,
+ rgw_frontend_type=frontend,
+ rgw_frontend_extra_args=extra_args)
+ with with_service(cephadm_module, s) as dds:
+ _, f, _ = cephadm_module.check_mon_command({
+ 'prefix': 'config get',
+ 'who': f'client.{dds[0]}',
+ 'key': 'rgw_frontends',
+ })
+ assert f == expected
+
+
+class TestMonService:
+
+ def test_set_crush_locations(self, cephadm_module: CephadmOrchestrator):
+ mgr = FakeMgr()
+ mon_service = MonService(mgr)
+ mon_spec = ServiceSpec(service_type='mon', crush_locations={'vm-00': ['datacenter=a', 'rack=1'], 'vm-01': ['datacenter=a'], 'vm-02': ['datacenter=b', 'rack=3']})
+
+ mon_daemons = [
+ DaemonDescription(daemon_type='mon', daemon_id='vm-00', hostname='vm-00'),
+ DaemonDescription(daemon_type='mon', daemon_id='vm-01', hostname='vm-01'),
+ DaemonDescription(daemon_type='mon', daemon_id='vm-02', hostname='vm-02')
+ ]
+ mon_service.set_crush_locations(mon_daemons, mon_spec)
+ assert 'vm-00' in mgr.set_mon_crush_locations
+ assert mgr.set_mon_crush_locations['vm-00'] == ['datacenter=a', 'rack=1']
+ assert 'vm-01' in mgr.set_mon_crush_locations
+ assert mgr.set_mon_crush_locations['vm-01'] == ['datacenter=a']
+ assert 'vm-02' in mgr.set_mon_crush_locations
+ assert mgr.set_mon_crush_locations['vm-02'] == ['datacenter=b', 'rack=3']
+
+
+class TestSNMPGateway:
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_snmp_v2c_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ spec = SNMPGatewaySpec(
+ snmp_version='V2c',
+ snmp_destination='192.168.1.1:162',
+ credentials={
+ 'snmp_community': 'public'
+ })
+
+ config = {
+ "destination": spec.snmp_destination,
+ "snmp_version": spec.snmp_version,
+ "snmp_community": spec.credentials.get('snmp_community')
+ }
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, spec):
+ _run_cephadm.assert_called_with(
+ 'test',
+ "snmp-gateway.test",
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": 'snmp-gateway.test',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [9464],
+ },
+ "meta": {
+ 'service_name': 'snmp-gateway',
+ 'ports': [9464],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": config,
+ }),
+ )
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_snmp_v2c_with_port(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ spec = SNMPGatewaySpec(
+ snmp_version='V2c',
+ snmp_destination='192.168.1.1:162',
+ credentials={
+ 'snmp_community': 'public'
+ },
+ port=9465)
+
+ config = {
+ "destination": spec.snmp_destination,
+ "snmp_version": spec.snmp_version,
+ "snmp_community": spec.credentials.get('snmp_community')
+ }
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, spec):
+ _run_cephadm.assert_called_with(
+ 'test',
+ "snmp-gateway.test",
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": 'snmp-gateway.test',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [9465],
+ },
+ "meta": {
+ 'service_name': 'snmp-gateway',
+ 'ports': [9465],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": config,
+ }),
+ )
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_snmp_v3nopriv_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ spec = SNMPGatewaySpec(
+ snmp_version='V3',
+ snmp_destination='192.168.1.1:162',
+ engine_id='8000C53F00000000',
+ credentials={
+ 'snmp_v3_auth_username': 'myuser',
+ 'snmp_v3_auth_password': 'mypassword'
+ })
+
+ config = {
+ 'destination': spec.snmp_destination,
+ 'snmp_version': spec.snmp_version,
+ 'snmp_v3_auth_protocol': 'SHA',
+ 'snmp_v3_auth_username': 'myuser',
+ 'snmp_v3_auth_password': 'mypassword',
+ 'snmp_v3_engine_id': '8000C53F00000000'
+ }
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, spec):
+ _run_cephadm.assert_called_with(
+ 'test',
+ "snmp-gateway.test",
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": 'snmp-gateway.test',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [9464],
+ },
+ "meta": {
+ 'service_name': 'snmp-gateway',
+ 'ports': [9464],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": config,
+ }),
+ )
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_snmp_v3priv_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ spec = SNMPGatewaySpec(
+ snmp_version='V3',
+ snmp_destination='192.168.1.1:162',
+ engine_id='8000C53F00000000',
+ auth_protocol='MD5',
+ privacy_protocol='AES',
+ credentials={
+ 'snmp_v3_auth_username': 'myuser',
+ 'snmp_v3_auth_password': 'mypassword',
+ 'snmp_v3_priv_password': 'mysecret',
+ })
+
+ config = {
+ 'destination': spec.snmp_destination,
+ 'snmp_version': spec.snmp_version,
+ 'snmp_v3_auth_protocol': 'MD5',
+ 'snmp_v3_auth_username': spec.credentials.get('snmp_v3_auth_username'),
+ 'snmp_v3_auth_password': spec.credentials.get('snmp_v3_auth_password'),
+ 'snmp_v3_engine_id': '8000C53F00000000',
+ 'snmp_v3_priv_protocol': spec.privacy_protocol,
+ 'snmp_v3_priv_password': spec.credentials.get('snmp_v3_priv_password'),
+ }
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, spec):
+ _run_cephadm.assert_called_with(
+ 'test',
+ "snmp-gateway.test",
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": 'snmp-gateway.test',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [9464],
+ },
+ "meta": {
+ 'service_name': 'snmp-gateway',
+ 'ports': [9464],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": config,
+ }),
+ )
+
+
+class TestIngressService:
+
+ @pytest.mark.parametrize(
+ "enable_haproxy_protocol",
+ [False, True],
+ )
+ @patch("cephadm.inventory.Inventory.get_addr")
+ @patch("cephadm.utils.resolve_ip")
+ @patch("cephadm.inventory.HostCache.get_daemons_by_service")
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_ingress_config_nfs_multiple_nfs_same_rank(
+ self,
+ _run_cephadm,
+ _get_daemons_by_service,
+ _resolve_ip, _get_addr,
+ cephadm_module: CephadmOrchestrator,
+ enable_haproxy_protocol: bool,
+ ):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ def fake_resolve_ip(hostname: str) -> str:
+ if hostname == 'host1':
+ return '192.168.122.111'
+ elif hostname == 'host2':
+ return '192.168.122.222'
+ else:
+ return 'xxx.xxx.xxx.xxx'
+ _resolve_ip.side_effect = fake_resolve_ip
+
+ def fake_get_addr(hostname: str) -> str:
+ return hostname
+ _get_addr.side_effect = fake_get_addr
+
+ nfs_service = NFSServiceSpec(
+ service_id="foo",
+ placement=PlacementSpec(
+ count=1,
+ hosts=['host1', 'host2']),
+ port=12049,
+ enable_haproxy_protocol=enable_haproxy_protocol,
+ )
+
+ ispec = IngressSpec(
+ service_type='ingress',
+ service_id='nfs.foo',
+ backend_service='nfs.foo',
+ frontend_port=2049,
+ monitor_port=9049,
+ virtual_ip='192.168.122.100/24',
+ monitor_user='admin',
+ monitor_password='12345',
+ keepalived_password='12345',
+ enable_haproxy_protocol=enable_haproxy_protocol,
+ )
+
+ cephadm_module.spec_store._specs = {
+ 'nfs.foo': nfs_service,
+ 'ingress.nfs.foo': ispec
+ }
+ cephadm_module.spec_store.spec_created = {
+ 'nfs.foo': datetime_now(),
+ 'ingress.nfs.foo': datetime_now()
+ }
+
+ # in both test cases we'll do here, we want only the ip
+ # for the host1 nfs daemon as we'll end up giving that
+ # one higher rank_generation but the same rank as the one
+ # on host2
+ haproxy_txt = (
+ '# This file is generated by cephadm.\n'
+ 'global\n'
+ ' log 127.0.0.1 local2\n'
+ ' chroot /var/lib/haproxy\n'
+ ' pidfile /var/lib/haproxy/haproxy.pid\n'
+ ' maxconn 8000\n'
+ ' daemon\n'
+ ' stats socket /var/lib/haproxy/stats\n\n'
+ 'defaults\n'
+ ' mode tcp\n'
+ ' log global\n'
+ ' timeout queue 1m\n'
+ ' timeout connect 10s\n'
+ ' timeout client 1m\n'
+ ' timeout server 1m\n'
+ ' timeout check 10s\n'
+ ' maxconn 8000\n\n'
+ 'frontend stats\n'
+ ' mode http\n'
+ ' bind 192.168.122.100:9049\n'
+ ' bind host1:9049\n'
+ ' stats enable\n'
+ ' stats uri /stats\n'
+ ' stats refresh 10s\n'
+ ' stats auth admin:12345\n'
+ ' http-request use-service prometheus-exporter if { path /metrics }\n'
+ ' monitor-uri /health\n\n'
+ 'frontend frontend\n'
+ ' bind 192.168.122.100:2049\n'
+ ' default_backend backend\n\n'
+ 'backend backend\n'
+ ' mode tcp\n'
+ ' balance source\n'
+ ' hash-type consistent\n'
+ )
+ if enable_haproxy_protocol:
+ haproxy_txt += ' default-server send-proxy-v2\n'
+ haproxy_txt += ' server nfs.foo.0 192.168.122.111:12049\n'
+ haproxy_expected_conf = {
+ 'files': {'haproxy.cfg': haproxy_txt}
+ }
+
+ # verify we get the same cfg regardless of the order in which the nfs daemons are returned
+ # in this case both nfs are rank 0, so it should only take the one with rank_generation 1 a.k.a
+ # the one on host1
+ nfs_daemons = [
+ DaemonDescription(daemon_type='nfs', daemon_id='foo.0.1.host1.qwerty', hostname='host1', rank=0, rank_generation=1, ports=[12049]),
+ DaemonDescription(daemon_type='nfs', daemon_id='foo.0.0.host2.abcdef', hostname='host2', rank=0, rank_generation=0, ports=[12049])
+ ]
+ _get_daemons_by_service.return_value = nfs_daemons
+
+ haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config(
+ CephadmDaemonDeploySpec(host='host1', daemon_id='ingress', service_name=ispec.service_name()))
+
+ assert haproxy_generated_conf[0] == haproxy_expected_conf
+
+ # swapping order now, should still pick out the one with the higher rank_generation
+ # in this case both nfs are rank 0, so it should only take the one with rank_generation 1 a.k.a
+ # the one on host1
+ nfs_daemons = [
+ DaemonDescription(daemon_type='nfs', daemon_id='foo.0.0.host2.abcdef', hostname='host2', rank=0, rank_generation=0, ports=[12049]),
+ DaemonDescription(daemon_type='nfs', daemon_id='foo.0.1.host1.qwerty', hostname='host1', rank=0, rank_generation=1, ports=[12049])
+ ]
+ _get_daemons_by_service.return_value = nfs_daemons
+
+ haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config(
+ CephadmDaemonDeploySpec(host='host1', daemon_id='ingress', service_name=ispec.service_name()))
+
+ assert haproxy_generated_conf[0] == haproxy_expected_conf
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_ingress_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ with with_host(cephadm_module, 'test', addr='1.2.3.7'):
+ cephadm_module.cache.update_host_networks('test', {
+ '1.2.3.0/24': {
+ 'if0': ['1.2.3.4']
+ }
+ })
+
+ # the ingress backend
+ s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1),
+ rgw_frontend_type='beast')
+
+ ispec = IngressSpec(service_type='ingress',
+ service_id='test',
+ backend_service='rgw.foo',
+ frontend_port=8089,
+ monitor_port=8999,
+ monitor_user='admin',
+ monitor_password='12345',
+ keepalived_password='12345',
+ virtual_interface_networks=['1.2.3.0/24'],
+ virtual_ip="1.2.3.4/32")
+ with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _:
+ # generate the keepalived conf based on the specified spec
+ keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config(
+ CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+ keepalived_expected_conf = {
+ 'files':
+ {
+ 'keepalived.conf':
+ '# This file is generated by cephadm.\n'
+ 'vrrp_script check_backend {\n '
+ 'script "/usr/bin/curl http://1.2.3.7:8999/health"\n '
+ 'weight -20\n '
+ 'interval 2\n '
+ 'rise 2\n '
+ 'fall 2\n}\n\n'
+ 'vrrp_instance VI_0 {\n '
+ 'state MASTER\n '
+ 'priority 100\n '
+ 'interface if0\n '
+ 'virtual_router_id 50\n '
+ 'advert_int 1\n '
+ 'authentication {\n '
+ 'auth_type PASS\n '
+ 'auth_pass 12345\n '
+ '}\n '
+ 'unicast_src_ip 1.2.3.4\n '
+ 'unicast_peer {\n '
+ '}\n '
+ 'virtual_ipaddress {\n '
+ '1.2.3.4/32 dev if0\n '
+ '}\n '
+ 'track_script {\n '
+ 'check_backend\n }\n'
+ '}\n'
+ }
+ }
+
+ # check keepalived config
+ assert keepalived_generated_conf[0] == keepalived_expected_conf
+
+ # generate the haproxy conf based on the specified spec
+ haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config(
+ CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+ haproxy_expected_conf = {
+ 'files':
+ {
+ 'haproxy.cfg':
+ '# This file is generated by cephadm.'
+ '\nglobal\n log '
+ '127.0.0.1 local2\n '
+ 'chroot /var/lib/haproxy\n '
+ 'pidfile /var/lib/haproxy/haproxy.pid\n '
+ 'maxconn 8000\n '
+ 'daemon\n '
+ 'stats socket /var/lib/haproxy/stats\n'
+ '\ndefaults\n '
+ 'mode http\n '
+ 'log global\n '
+ 'option httplog\n '
+ 'option dontlognull\n '
+ 'option http-server-close\n '
+ 'option forwardfor except 127.0.0.0/8\n '
+ 'option redispatch\n '
+ 'retries 3\n '
+ 'timeout queue 20s\n '
+ 'timeout connect 5s\n '
+ 'timeout http-request 1s\n '
+ 'timeout http-keep-alive 5s\n '
+ 'timeout client 30s\n '
+ 'timeout server 30s\n '
+ 'timeout check 5s\n '
+ 'maxconn 8000\n'
+ '\nfrontend stats\n '
+ 'mode http\n '
+ 'bind 1.2.3.4:8999\n '
+ 'bind 1.2.3.7:8999\n '
+ 'stats enable\n '
+ 'stats uri /stats\n '
+ 'stats refresh 10s\n '
+ 'stats auth admin:12345\n '
+ 'http-request use-service prometheus-exporter if { path /metrics }\n '
+ 'monitor-uri /health\n'
+ '\nfrontend frontend\n '
+ 'bind 1.2.3.4:8089\n '
+ 'default_backend backend\n\n'
+ 'backend backend\n '
+ 'option forwardfor\n '
+ 'balance static-rr\n '
+ 'option httpchk HEAD / HTTP/1.0\n '
+ 'server '
+ + haproxy_generated_conf[1][0] + ' 1.2.3.7:80 check weight 100\n'
+ }
+ }
+
+ assert haproxy_generated_conf[0] == haproxy_expected_conf
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_ingress_config_ssl_rgw(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ with with_host(cephadm_module, 'test'):
+ cephadm_module.cache.update_host_networks('test', {
+ '1.2.3.0/24': {
+ 'if0': ['1.2.3.1']
+ }
+ })
+
+ # the ingress backend
+ s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1),
+ rgw_frontend_type='beast', rgw_frontend_port=443, ssl=True)
+
+ ispec = IngressSpec(service_type='ingress',
+ service_id='test',
+ backend_service='rgw.foo',
+ frontend_port=8089,
+ monitor_port=8999,
+ monitor_user='admin',
+ monitor_password='12345',
+ keepalived_password='12345',
+ virtual_interface_networks=['1.2.3.0/24'],
+ virtual_ip="1.2.3.4/32")
+ with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _:
+ # generate the keepalived conf based on the specified spec
+ keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config(
+ CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+ keepalived_expected_conf = {
+ 'files':
+ {
+ 'keepalived.conf':
+ '# This file is generated by cephadm.\n'
+ 'vrrp_script check_backend {\n '
+ 'script "/usr/bin/curl http://[1::4]:8999/health"\n '
+ 'weight -20\n '
+ 'interval 2\n '
+ 'rise 2\n '
+ 'fall 2\n}\n\n'
+ 'vrrp_instance VI_0 {\n '
+ 'state MASTER\n '
+ 'priority 100\n '
+ 'interface if0\n '
+ 'virtual_router_id 50\n '
+ 'advert_int 1\n '
+ 'authentication {\n '
+ 'auth_type PASS\n '
+ 'auth_pass 12345\n '
+ '}\n '
+ 'unicast_src_ip 1.2.3.1\n '
+ 'unicast_peer {\n '
+ '}\n '
+ 'virtual_ipaddress {\n '
+ '1.2.3.4/32 dev if0\n '
+ '}\n '
+ 'track_script {\n '
+ 'check_backend\n }\n'
+ '}\n'
+ }
+ }
+
+ # check keepalived config
+ assert keepalived_generated_conf[0] == keepalived_expected_conf
+
+ # generate the haproxy conf based on the specified spec
+ haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config(
+ CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+ haproxy_expected_conf = {
+ 'files':
+ {
+ 'haproxy.cfg':
+ '# This file is generated by cephadm.'
+ '\nglobal\n log '
+ '127.0.0.1 local2\n '
+ 'chroot /var/lib/haproxy\n '
+ 'pidfile /var/lib/haproxy/haproxy.pid\n '
+ 'maxconn 8000\n '
+ 'daemon\n '
+ 'stats socket /var/lib/haproxy/stats\n'
+ '\ndefaults\n '
+ 'mode http\n '
+ 'log global\n '
+ 'option httplog\n '
+ 'option dontlognull\n '
+ 'option http-server-close\n '
+ 'option forwardfor except 127.0.0.0/8\n '
+ 'option redispatch\n '
+ 'retries 3\n '
+ 'timeout queue 20s\n '
+ 'timeout connect 5s\n '
+ 'timeout http-request 1s\n '
+ 'timeout http-keep-alive 5s\n '
+ 'timeout client 30s\n '
+ 'timeout server 30s\n '
+ 'timeout check 5s\n '
+ 'maxconn 8000\n'
+ '\nfrontend stats\n '
+ 'mode http\n '
+ 'bind 1.2.3.4:8999\n '
+ 'bind 1::4:8999\n '
+ 'stats enable\n '
+ 'stats uri /stats\n '
+ 'stats refresh 10s\n '
+ 'stats auth admin:12345\n '
+ 'http-request use-service prometheus-exporter if { path /metrics }\n '
+ 'monitor-uri /health\n'
+ '\nfrontend frontend\n '
+ 'bind 1.2.3.4:8089\n '
+ 'default_backend backend\n\n'
+ 'backend backend\n '
+ 'option forwardfor\n '
+ 'default-server ssl\n '
+ 'default-server verify none\n '
+ 'balance static-rr\n '
+ 'option httpchk HEAD / HTTP/1.0\n '
+ 'server '
+ + haproxy_generated_conf[1][0] + ' 1::4:443 check weight 100\n'
+ }
+ }
+
+ assert haproxy_generated_conf[0] == haproxy_expected_conf
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_ingress_config_multi_vips(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ with with_host(cephadm_module, 'test', addr='1.2.3.7'):
+ cephadm_module.cache.update_host_networks('test', {
+ '1.2.3.0/24': {
+ 'if0': ['1.2.3.1']
+ }
+ })
+
+ # Check the ingress with multiple VIPs
+ s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1),
+ rgw_frontend_type='beast')
+
+ ispec = IngressSpec(service_type='ingress',
+ service_id='test',
+ backend_service='rgw.foo',
+ frontend_port=8089,
+ monitor_port=8999,
+ monitor_user='admin',
+ monitor_password='12345',
+ keepalived_password='12345',
+ virtual_interface_networks=['1.2.3.0/24'],
+ virtual_ips_list=["1.2.3.4/32"])
+ with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _:
+ # generate the keepalived conf based on the specified spec
+ # Test with only 1 IP on the list, as it will fail with more VIPS but only one host.
+ keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config(
+ CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+ keepalived_expected_conf = {
+ 'files':
+ {
+ 'keepalived.conf':
+ '# This file is generated by cephadm.\n'
+ 'vrrp_script check_backend {\n '
+ 'script "/usr/bin/curl http://1.2.3.7:8999/health"\n '
+ 'weight -20\n '
+ 'interval 2\n '
+ 'rise 2\n '
+ 'fall 2\n}\n\n'
+ 'vrrp_instance VI_0 {\n '
+ 'state MASTER\n '
+ 'priority 100\n '
+ 'interface if0\n '
+ 'virtual_router_id 50\n '
+ 'advert_int 1\n '
+ 'authentication {\n '
+ 'auth_type PASS\n '
+ 'auth_pass 12345\n '
+ '}\n '
+ 'unicast_src_ip 1.2.3.1\n '
+ 'unicast_peer {\n '
+ '}\n '
+ 'virtual_ipaddress {\n '
+ '1.2.3.4/32 dev if0\n '
+ '}\n '
+ 'track_script {\n '
+ 'check_backend\n }\n'
+ '}\n'
+ }
+ }
+
+ # check keepalived config
+ assert keepalived_generated_conf[0] == keepalived_expected_conf
+
+ # generate the haproxy conf based on the specified spec
+ haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config(
+ CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+ haproxy_expected_conf = {
+ 'files':
+ {
+ 'haproxy.cfg':
+ '# This file is generated by cephadm.'
+ '\nglobal\n log '
+ '127.0.0.1 local2\n '
+ 'chroot /var/lib/haproxy\n '
+ 'pidfile /var/lib/haproxy/haproxy.pid\n '
+ 'maxconn 8000\n '
+ 'daemon\n '
+ 'stats socket /var/lib/haproxy/stats\n'
+ '\ndefaults\n '
+ 'mode http\n '
+ 'log global\n '
+ 'option httplog\n '
+ 'option dontlognull\n '
+ 'option http-server-close\n '
+ 'option forwardfor except 127.0.0.0/8\n '
+ 'option redispatch\n '
+ 'retries 3\n '
+ 'timeout queue 20s\n '
+ 'timeout connect 5s\n '
+ 'timeout http-request 1s\n '
+ 'timeout http-keep-alive 5s\n '
+ 'timeout client 30s\n '
+ 'timeout server 30s\n '
+ 'timeout check 5s\n '
+ 'maxconn 8000\n'
+ '\nfrontend stats\n '
+ 'mode http\n '
+ 'bind *:8999\n '
+ 'bind 1.2.3.7:8999\n '
+ 'stats enable\n '
+ 'stats uri /stats\n '
+ 'stats refresh 10s\n '
+ 'stats auth admin:12345\n '
+ 'http-request use-service prometheus-exporter if { path /metrics }\n '
+ 'monitor-uri /health\n'
+ '\nfrontend frontend\n '
+ 'bind *:8089\n '
+ 'default_backend backend\n\n'
+ 'backend backend\n '
+ 'option forwardfor\n '
+ 'balance static-rr\n '
+ 'option httpchk HEAD / HTTP/1.0\n '
+ 'server '
+ + haproxy_generated_conf[1][0] + ' 1.2.3.7:80 check weight 100\n'
+ }
+ }
+
+ assert haproxy_generated_conf[0] == haproxy_expected_conf
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_haproxy_port_ips(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ with with_host(cephadm_module, 'test', addr='1.2.3.7'):
+ cephadm_module.cache.update_host_networks('test', {
+ '1.2.3.0/24': {
+ 'if0': ['1.2.3.4/32']
+ }
+ })
+
+ # Check the ingress with multiple VIPs
+ s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1),
+ rgw_frontend_type='beast')
+
+ ip = '1.2.3.100'
+ frontend_port = 8089
+
+ ispec = IngressSpec(service_type='ingress',
+ service_id='test',
+ backend_service='rgw.foo',
+ frontend_port=frontend_port,
+ monitor_port=8999,
+ monitor_user='admin',
+ monitor_password='12345',
+ keepalived_password='12345',
+ virtual_ip=f"{ip}/24")
+ with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _:
+ # generate the haproxy conf based on the specified spec
+ haproxy_daemon_spec = cephadm_module.cephadm_services['ingress'].prepare_create(
+ CephadmDaemonDeploySpec(
+ host='test',
+ daemon_type='haproxy',
+ daemon_id='ingress',
+ service_name=ispec.service_name()))
+
+ assert haproxy_daemon_spec.port_ips == {str(frontend_port): ip}
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_keepalive_config_multi_interface_vips(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ with with_host(cephadm_module, 'test', addr='1.2.3.1'):
+ with with_host(cephadm_module, 'test2', addr='1.2.3.2'):
+ cephadm_module.cache.update_host_networks('test', {
+ '1.2.3.0/24': {
+ 'if0': ['1.2.3.1']
+ },
+ '100.100.100.0/24': {
+ 'if1': ['100.100.100.1']
+ }
+ })
+ cephadm_module.cache.update_host_networks('test2', {
+ '1.2.3.0/24': {
+ 'if0': ['1.2.3.2']
+ },
+ '100.100.100.0/24': {
+ 'if1': ['100.100.100.2']
+ }
+ })
+
+ # Check the ingress with multiple VIPs
+ s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1),
+ rgw_frontend_type='beast')
+
+ ispec = IngressSpec(service_type='ingress',
+ service_id='test',
+ placement=PlacementSpec(hosts=['test', 'test2']),
+ backend_service='rgw.foo',
+ frontend_port=8089,
+ monitor_port=8999,
+ monitor_user='admin',
+ monitor_password='12345',
+ keepalived_password='12345',
+ virtual_ips_list=["1.2.3.100/24", "100.100.100.100/24"])
+ with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _:
+ keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config(
+ CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+ keepalived_expected_conf = {
+ 'files':
+ {
+ 'keepalived.conf':
+ '# This file is generated by cephadm.\n'
+ 'vrrp_script check_backend {\n '
+ 'script "/usr/bin/curl http://1.2.3.1:8999/health"\n '
+ 'weight -20\n '
+ 'interval 2\n '
+ 'rise 2\n '
+ 'fall 2\n}\n\n'
+ 'vrrp_instance VI_0 {\n '
+ 'state MASTER\n '
+ 'priority 100\n '
+ 'interface if0\n '
+ 'virtual_router_id 50\n '
+ 'advert_int 1\n '
+ 'authentication {\n '
+ 'auth_type PASS\n '
+ 'auth_pass 12345\n '
+ '}\n '
+ 'unicast_src_ip 1.2.3.1\n '
+ 'unicast_peer {\n '
+ '1.2.3.2\n '
+ '}\n '
+ 'virtual_ipaddress {\n '
+ '1.2.3.100/24 dev if0\n '
+ '}\n '
+ 'track_script {\n '
+ 'check_backend\n }\n'
+ '}\n'
+ 'vrrp_instance VI_1 {\n '
+ 'state BACKUP\n '
+ 'priority 90\n '
+ 'interface if1\n '
+ 'virtual_router_id 51\n '
+ 'advert_int 1\n '
+ 'authentication {\n '
+ 'auth_type PASS\n '
+ 'auth_pass 12345\n '
+ '}\n '
+ 'unicast_src_ip 100.100.100.1\n '
+ 'unicast_peer {\n '
+ '100.100.100.2\n '
+ '}\n '
+ 'virtual_ipaddress {\n '
+ '100.100.100.100/24 dev if1\n '
+ '}\n '
+ 'track_script {\n '
+ 'check_backend\n }\n'
+ '}\n'
+ }
+ }
+
+ # check keepalived config
+ assert keepalived_generated_conf[0] == keepalived_expected_conf
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_keepalive_interface_host_filtering(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ # we need to make sure keepalive daemons will have an interface
+ # on the hosts we deploy them on in order to set up their VIP.
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ with with_host(cephadm_module, 'test', addr='1.2.3.1'):
+ with with_host(cephadm_module, 'test2', addr='1.2.3.2'):
+ with with_host(cephadm_module, 'test3', addr='1.2.3.3'):
+ with with_host(cephadm_module, 'test4', addr='1.2.3.3'):
+ # setup "test" and "test4" to have all the necessary interfaces,
+ # "test2" to have one of them (should still be filtered)
+ # and "test3" to have none of them
+ cephadm_module.cache.update_host_networks('test', {
+ '1.2.3.0/24': {
+ 'if0': ['1.2.3.1']
+ },
+ '100.100.100.0/24': {
+ 'if1': ['100.100.100.1']
+ }
+ })
+ cephadm_module.cache.update_host_networks('test2', {
+ '1.2.3.0/24': {
+ 'if0': ['1.2.3.2']
+ },
+ })
+ cephadm_module.cache.update_host_networks('test4', {
+ '1.2.3.0/24': {
+ 'if0': ['1.2.3.4']
+ },
+ '100.100.100.0/24': {
+ 'if1': ['100.100.100.4']
+ }
+ })
+
+ s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1),
+ rgw_frontend_type='beast')
+
+ ispec = IngressSpec(service_type='ingress',
+ service_id='test',
+ placement=PlacementSpec(hosts=['test', 'test2', 'test3', 'test4']),
+ backend_service='rgw.foo',
+ frontend_port=8089,
+ monitor_port=8999,
+ monitor_user='admin',
+ monitor_password='12345',
+ keepalived_password='12345',
+ virtual_ips_list=["1.2.3.100/24", "100.100.100.100/24"])
+ with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _:
+ # since we're never actually going to refresh the host here,
+ # check the tmp daemons to see what was placed during the apply
+ daemons = cephadm_module.cache._get_tmp_daemons()
+ keepalive_daemons = [d for d in daemons if d.daemon_type == 'keepalived']
+ hosts_deployed_on = [d.hostname for d in keepalive_daemons]
+ assert 'test' in hosts_deployed_on
+ assert 'test2' not in hosts_deployed_on
+ assert 'test3' not in hosts_deployed_on
+ assert 'test4' in hosts_deployed_on
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ @patch("cephadm.services.nfs.NFSService.fence_old_ranks", MagicMock())
+ @patch("cephadm.services.nfs.NFSService.run_grace_tool", MagicMock())
+ @patch("cephadm.services.nfs.NFSService.purge", MagicMock())
+ @patch("cephadm.services.nfs.NFSService.create_rados_config_obj", MagicMock())
+ def test_keepalive_only_nfs_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ with with_host(cephadm_module, 'test', addr='1.2.3.7'):
+ cephadm_module.cache.update_host_networks('test', {
+ '1.2.3.0/24': {
+ 'if0': ['1.2.3.1']
+ }
+ })
+
+ # Check the ingress with multiple VIPs
+ s = NFSServiceSpec(service_id="foo", placement=PlacementSpec(count=1),
+ virtual_ip='1.2.3.0/24')
+
+ ispec = IngressSpec(service_type='ingress',
+ service_id='test',
+ backend_service='nfs.foo',
+ monitor_port=8999,
+ monitor_user='admin',
+ monitor_password='12345',
+ keepalived_password='12345',
+ virtual_ip='1.2.3.0/24',
+ keepalive_only=True)
+ with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _:
+ nfs_generated_conf, _ = cephadm_module.cephadm_services['nfs'].generate_config(
+ CephadmDaemonDeploySpec(host='test', daemon_id='foo.test.0.0', service_name=s.service_name()))
+ ganesha_conf = nfs_generated_conf['files']['ganesha.conf']
+ assert "Bind_addr = 1.2.3.0/24" in ganesha_conf
+
+ keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config(
+ CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+ keepalived_expected_conf = {
+ 'files':
+ {
+ 'keepalived.conf':
+ '# This file is generated by cephadm.\n'
+ 'vrrp_script check_backend {\n '
+ 'script "/usr/bin/false"\n '
+ 'weight -20\n '
+ 'interval 2\n '
+ 'rise 2\n '
+ 'fall 2\n}\n\n'
+ 'vrrp_instance VI_0 {\n '
+ 'state MASTER\n '
+ 'priority 100\n '
+ 'interface if0\n '
+ 'virtual_router_id 50\n '
+ 'advert_int 1\n '
+ 'authentication {\n '
+ 'auth_type PASS\n '
+ 'auth_pass 12345\n '
+ '}\n '
+ 'unicast_src_ip 1.2.3.1\n '
+ 'unicast_peer {\n '
+ '}\n '
+ 'virtual_ipaddress {\n '
+ '1.2.3.0/24 dev if0\n '
+ '}\n '
+ 'track_script {\n '
+ 'check_backend\n }\n'
+ '}\n'
+ }
+ }
+
+ # check keepalived config
+ assert keepalived_generated_conf[0] == keepalived_expected_conf
+
+ @patch("cephadm.services.nfs.NFSService.fence_old_ranks", MagicMock())
+ @patch("cephadm.services.nfs.NFSService.run_grace_tool", MagicMock())
+ @patch("cephadm.services.nfs.NFSService.purge", MagicMock())
+ @patch("cephadm.services.nfs.NFSService.create_rados_config_obj", MagicMock())
+ @patch("cephadm.inventory.Inventory.keys")
+ @patch("cephadm.inventory.Inventory.get_addr")
+ @patch("cephadm.utils.resolve_ip")
+ @patch("cephadm.inventory.HostCache.get_daemons_by_service")
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_ingress_config_nfs_proxy_protocol(
+ self,
+ _run_cephadm,
+ _get_daemons_by_service,
+ _resolve_ip,
+ _get_addr,
+ _inventory_keys,
+ cephadm_module: CephadmOrchestrator,
+ ):
+ """Verify that setting enable_haproxy_protocol for both ingress and
+ nfs services sets the desired configuration parameters in both
+ the haproxy config and nfs ganesha config.
+ """
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ def fake_resolve_ip(hostname: str) -> str:
+ if hostname in ('host1', "192.168.122.111"):
+ return '192.168.122.111'
+ elif hostname in ('host2', '192.168.122.222'):
+ return '192.168.122.222'
+ else:
+ raise KeyError(hostname)
+ _resolve_ip.side_effect = fake_resolve_ip
+ _get_addr.side_effect = fake_resolve_ip
+
+ def fake_keys():
+ return ['host1', 'host2']
+ _inventory_keys.side_effect = fake_keys
+
+ nfs_service = NFSServiceSpec(
+ service_id="foo",
+ placement=PlacementSpec(
+ count=1,
+ hosts=['host1', 'host2']),
+ port=12049,
+ enable_haproxy_protocol=True,
+ )
+
+ ispec = IngressSpec(
+ service_type='ingress',
+ service_id='nfs.foo',
+ backend_service='nfs.foo',
+ frontend_port=2049,
+ monitor_port=9049,
+ virtual_ip='192.168.122.100/24',
+ monitor_user='admin',
+ monitor_password='12345',
+ keepalived_password='12345',
+ enable_haproxy_protocol=True,
+ )
+
+ cephadm_module.spec_store._specs = {
+ 'nfs.foo': nfs_service,
+ 'ingress.nfs.foo': ispec
+ }
+ cephadm_module.spec_store.spec_created = {
+ 'nfs.foo': datetime_now(),
+ 'ingress.nfs.foo': datetime_now()
+ }
+
+ haproxy_txt = (
+ '# This file is generated by cephadm.\n'
+ 'global\n'
+ ' log 127.0.0.1 local2\n'
+ ' chroot /var/lib/haproxy\n'
+ ' pidfile /var/lib/haproxy/haproxy.pid\n'
+ ' maxconn 8000\n'
+ ' daemon\n'
+ ' stats socket /var/lib/haproxy/stats\n\n'
+ 'defaults\n'
+ ' mode tcp\n'
+ ' log global\n'
+ ' timeout queue 1m\n'
+ ' timeout connect 10s\n'
+ ' timeout client 1m\n'
+ ' timeout server 1m\n'
+ ' timeout check 10s\n'
+ ' maxconn 8000\n\n'
+ 'frontend stats\n'
+ ' mode http\n'
+ ' bind 192.168.122.100:9049\n'
+ ' bind 192.168.122.111:9049\n'
+ ' stats enable\n'
+ ' stats uri /stats\n'
+ ' stats refresh 10s\n'
+ ' stats auth admin:12345\n'
+ ' http-request use-service prometheus-exporter if { path /metrics }\n'
+ ' monitor-uri /health\n\n'
+ 'frontend frontend\n'
+ ' bind 192.168.122.100:2049\n'
+ ' default_backend backend\n\n'
+ 'backend backend\n'
+ ' mode tcp\n'
+ ' balance source\n'
+ ' hash-type consistent\n'
+ ' default-server send-proxy-v2\n'
+ ' server nfs.foo.0 192.168.122.111:12049\n'
+ )
+ haproxy_expected_conf = {
+ 'files': {'haproxy.cfg': haproxy_txt}
+ }
+
+ nfs_ganesha_txt = (
+ "# This file is generated by cephadm.\n"
+ 'NFS_CORE_PARAM {\n'
+ ' Enable_NLM = false;\n'
+ ' Enable_RQUOTA = false;\n'
+ ' Protocols = 4;\n'
+ ' NFS_Port = 2049;\n'
+ ' HAProxy_Hosts = 192.168.122.111, 10.10.2.20, 192.168.122.222;\n'
+ '}\n'
+ '\n'
+ 'NFSv4 {\n'
+ ' Delegations = false;\n'
+ " RecoveryBackend = 'rados_cluster';\n"
+ ' Minor_Versions = 1, 2;\n'
+ '}\n'
+ '\n'
+ 'RADOS_KV {\n'
+ ' UserId = "nfs.foo.test.0.0";\n'
+ ' nodeid = "nfs.foo.None";\n'
+ ' pool = ".nfs";\n'
+ ' namespace = "foo";\n'
+ '}\n'
+ '\n'
+ 'RADOS_URLS {\n'
+ ' UserId = "nfs.foo.test.0.0";\n'
+ ' watch_url = '
+ '"rados://.nfs/foo/conf-nfs.foo";\n'
+ '}\n'
+ '\n'
+ 'RGW {\n'
+ ' cluster = "ceph";\n'
+ ' name = "client.nfs.foo.test.0.0-rgw";\n'
+ '}\n'
+ '\n'
+ "%url rados://.nfs/foo/conf-nfs.foo"
+ )
+ nfs_expected_conf = {
+ 'files': {'ganesha.conf': nfs_ganesha_txt},
+ 'config': '',
+ 'extra_args': ['-N', 'NIV_EVENT'],
+ 'keyring': (
+ '[client.nfs.foo.test.0.0]\n'
+ 'key = None\n'
+ ),
+ 'namespace': 'foo',
+ 'pool': '.nfs',
+ 'rgw': {
+ 'cluster': 'ceph',
+ 'keyring': (
+ '[client.nfs.foo.test.0.0-rgw]\n'
+ 'key = None\n'
+ ),
+ 'user': 'nfs.foo.test.0.0-rgw',
+ },
+ 'userid': 'nfs.foo.test.0.0',
+ }
+
+ nfs_daemons = [
+ DaemonDescription(
+ daemon_type='nfs',
+ daemon_id='foo.0.1.host1.qwerty',
+ hostname='host1',
+ rank=0,
+ rank_generation=1,
+ ports=[12049],
+ ),
+ DaemonDescription(
+ daemon_type='nfs',
+ daemon_id='foo.0.0.host2.abcdef',
+ hostname='host2',
+ rank=0,
+ rank_generation=0,
+ ports=[12049],
+ ),
+ ]
+ _get_daemons_by_service.return_value = nfs_daemons
+
+ ingress_svc = cephadm_module.cephadm_services['ingress']
+ nfs_svc = cephadm_module.cephadm_services['nfs']
+
+ # add host network info to one host to test the behavior of
+ # adding all known-good addresses of the host to the list.
+ cephadm_module.cache.update_host_networks('host1', {
+ # this one is additional
+ '10.10.2.0/24': {
+ 'eth1': ['10.10.2.20']
+ },
+ # this is redundant and will be skipped
+ '192.168.122.0/24': {
+ 'eth0': ['192.168.122.111']
+ },
+ # this is a link-local address and will be ignored
+ "fe80::/64": {
+ "veth0": [
+ "fe80::8cf5:25ff:fe1c:d963"
+ ],
+ "eth0": [
+ "fe80::c7b:cbff:fef6:7370"
+ ],
+ "eth1": [
+ "fe80::7201:25a7:390b:d9a7"
+ ]
+ },
+ })
+
+ haproxy_generated_conf, _ = ingress_svc.haproxy_generate_config(
+ CephadmDaemonDeploySpec(
+ host='host1',
+ daemon_id='ingress',
+ service_name=ispec.service_name(),
+ ),
+ )
+ assert haproxy_generated_conf == haproxy_expected_conf
+
+ nfs_generated_conf, _ = nfs_svc.generate_config(
+ CephadmDaemonDeploySpec(
+ host='test',
+ daemon_id='foo.test.0.0',
+ service_name=nfs_service.service_name(),
+ ),
+ )
+ assert nfs_generated_conf == nfs_expected_conf
+
+
+class TestCephFsMirror:
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, ServiceSpec('cephfs-mirror')):
+ cephadm_module.assert_issued_mon_command({
+ 'prefix': 'mgr module enable',
+ 'module': 'mirroring'
+ })
+
+
+class TestJaeger:
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_jaeger_query(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ spec = TracingSpec(es_nodes="192.168.0.1:9200",
+ service_type="jaeger-query")
+
+ config = {"elasticsearch_nodes": "http://192.168.0.1:9200"}
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, spec):
+ _run_cephadm.assert_called_with(
+ 'test',
+ "jaeger-query.test",
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": 'jaeger-query.test',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [16686],
+ },
+ "meta": {
+ 'service_name': 'jaeger-query',
+ 'ports': [16686],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": config,
+ }),
+ )
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_jaeger_collector_es_deploy(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ collector_spec = TracingSpec(service_type="jaeger-collector")
+ es_spec = TracingSpec(service_type="elasticsearch")
+ es_config = {}
+
+ with with_host(cephadm_module, 'test'):
+ collector_config = {
+ "elasticsearch_nodes": f'http://{build_url(host=cephadm_module.inventory.get_addr("test"), port=9200).lstrip("/")}'}
+ with with_service(cephadm_module, es_spec):
+ _run_cephadm.assert_called_with(
+ "test",
+ "elasticsearch.test",
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": 'elasticsearch.test',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [9200],
+ },
+ "meta": {
+ 'service_name': 'elasticsearch',
+ 'ports': [9200],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": es_config,
+ }),
+ )
+ with with_service(cephadm_module, collector_spec):
+ _run_cephadm.assert_called_with(
+ "test",
+ "jaeger-collector.test",
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": 'jaeger-collector.test',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [14250],
+ },
+ "meta": {
+ 'service_name': 'jaeger-collector',
+ 'ports': [14250],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": collector_config,
+ }),
+ )
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_jaeger_agent(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+ collector_spec = TracingSpec(service_type="jaeger-collector", es_nodes="192.168.0.1:9200")
+ collector_config = {"elasticsearch_nodes": "http://192.168.0.1:9200"}
+
+ agent_spec = TracingSpec(service_type="jaeger-agent")
+ agent_config = {"collector_nodes": "test:14250"}
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, collector_spec):
+ _run_cephadm.assert_called_with(
+ "test",
+ "jaeger-collector.test",
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": 'jaeger-collector.test',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [14250],
+ },
+ "meta": {
+ 'service_name': 'jaeger-collector',
+ 'ports': [14250],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": collector_config,
+ }),
+ )
+ with with_service(cephadm_module, agent_spec):
+ _run_cephadm.assert_called_with(
+ "test",
+ "jaeger-agent.test",
+ ['_orch', 'deploy'],
+ [],
+ stdin=json.dumps({
+ "fsid": "fsid",
+ "name": 'jaeger-agent.test',
+ "image": '',
+ "deploy_arguments": [],
+ "params": {
+ 'tcp_ports': [6799],
+ },
+ "meta": {
+ 'service_name': 'jaeger-agent',
+ 'ports': [6799],
+ 'ip': None,
+ 'deployed_by': [],
+ 'rank': None,
+ 'rank_generation': None,
+ 'extra_container_args': None,
+ 'extra_entrypoint_args': None,
+ },
+ "config_blobs": agent_config,
+ }),
+ )
diff --git a/src/pybind/mgr/cephadm/tests/test_spec.py b/src/pybind/mgr/cephadm/tests/test_spec.py
new file mode 100644
index 000000000..78a2d7311
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_spec.py
@@ -0,0 +1,590 @@
+# Disable autopep8 for this file:
+
+# fmt: off
+
+import json
+
+import pytest
+
+from ceph.deployment.service_spec import ServiceSpec, NFSServiceSpec, RGWSpec, \
+ IscsiServiceSpec, HostPlacementSpec, CustomContainerSpec
+from orchestrator import DaemonDescription, OrchestratorError
+
+
+@pytest.mark.parametrize(
+ "spec_json",
+ json.loads("""[
+{
+ "placement": {
+ "count": 1
+ },
+ "service_type": "alertmanager"
+},
+{
+ "placement": {
+ "host_pattern": "*"
+ },
+ "service_type": "crash"
+},
+{
+ "placement": {
+ "count": 1
+ },
+ "service_type": "grafana",
+ "protocol": "https"
+},
+{
+ "placement": {
+ "count": 2
+ },
+ "service_type": "mgr"
+},
+{
+ "placement": {
+ "count": 5
+ },
+ "service_type": "mon"
+},
+{
+ "placement": {
+ "host_pattern": "*"
+ },
+ "service_type": "node-exporter"
+},
+{
+ "placement": {
+ "count": 1
+ },
+ "service_type": "prometheus"
+},
+{
+ "placement": {
+ "hosts": [
+ {
+ "hostname": "ceph-001",
+ "network": "",
+ "name": ""
+ }
+ ]
+ },
+ "service_type": "rgw",
+ "service_id": "default-rgw-realm.eu-central-1.1",
+ "rgw_realm": "default-rgw-realm",
+ "rgw_zone": "eu-central-1"
+},
+{
+ "service_type": "osd",
+ "service_id": "osd_spec_default",
+ "placement": {
+ "host_pattern": "*"
+ },
+ "data_devices": {
+ "model": "MC-55-44-XZ"
+ },
+ "db_devices": {
+ "model": "SSD-123-foo"
+ },
+ "wal_devices": {
+ "model": "NVME-QQQQ-987"
+ }
+}
+]
+""")
+)
+def test_spec_octopus(spec_json):
+ # https://tracker.ceph.com/issues/44934
+ # Those are real user data from early octopus.
+ # Please do not modify those JSON values.
+
+ spec = ServiceSpec.from_json(spec_json)
+
+ # just some verification that we can sill read old octopus specs
+ def convert_to_old_style_json(j):
+ j_c = dict(j.copy())
+ j_c.pop('service_name', None)
+ if 'spec' in j_c:
+ spec = j_c.pop('spec')
+ j_c.update(spec)
+ if 'placement' in j_c:
+ if 'hosts' in j_c['placement']:
+ j_c['placement']['hosts'] = [
+ {
+ 'hostname': HostPlacementSpec.parse(h).hostname,
+ 'network': HostPlacementSpec.parse(h).network,
+ 'name': HostPlacementSpec.parse(h).name
+ }
+ for h in j_c['placement']['hosts']
+ ]
+ j_c.pop('objectstore', None)
+ j_c.pop('filter_logic', None)
+ j_c.pop('anonymous_access', None)
+ return j_c
+
+ assert spec_json == convert_to_old_style_json(spec.to_json())
+
+
+@pytest.mark.parametrize(
+ "dd_json",
+ json.loads("""[
+ {
+ "hostname": "ceph-001",
+ "container_id": "d94d7969094d",
+ "container_image_id": "0881eb8f169f5556a292b4e2c01d683172b12830a62a9225a98a8e206bb734f0",
+ "container_image_name": "docker.io/prom/alertmanager:latest",
+ "daemon_id": "ceph-001",
+ "daemon_type": "alertmanager",
+ "version": "0.20.0",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.725856",
+ "created": "2020-04-02T19:23:08.829543",
+ "started": "2020-04-03T07:29:16.932838",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "container_id": "c4b036202241",
+ "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
+ "container_image_name": "docker.io/ceph/ceph:v15",
+ "daemon_id": "ceph-001",
+ "daemon_type": "crash",
+ "version": "15.2.0",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.725903",
+ "created": "2020-04-02T19:23:11.390694",
+ "started": "2020-04-03T07:29:16.910897",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "container_id": "5b7b94b48f31",
+ "container_image_id": "87a51ecf0b1c9a7b187b21c1b071425dafea0d765a96d5bc371c791169b3d7f4",
+ "container_image_name": "docker.io/ceph/ceph-grafana:latest",
+ "daemon_id": "ceph-001",
+ "daemon_type": "grafana",
+ "version": "6.6.2",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.725950",
+ "created": "2020-04-02T19:23:52.025088",
+ "started": "2020-04-03T07:29:16.847972",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "container_id": "9ca007280456",
+ "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
+ "container_image_name": "docker.io/ceph/ceph:v15",
+ "daemon_id": "ceph-001.gkjwqp",
+ "daemon_type": "mgr",
+ "version": "15.2.0",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.725807",
+ "created": "2020-04-02T19:22:18.648584",
+ "started": "2020-04-03T07:29:16.856153",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "container_id": "3d1ba9a2b697",
+ "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
+ "container_image_name": "docker.io/ceph/ceph:v15",
+ "daemon_id": "ceph-001",
+ "daemon_type": "mon",
+ "version": "15.2.0",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.725715",
+ "created": "2020-04-02T19:22:13.863300",
+ "started": "2020-04-03T07:29:17.206024",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "container_id": "36d026c68ba1",
+ "container_image_id": "e5a616e4b9cf68dfcad7782b78e118be4310022e874d52da85c55923fb615f87",
+ "container_image_name": "docker.io/prom/node-exporter:latest",
+ "daemon_id": "ceph-001",
+ "daemon_type": "node-exporter",
+ "version": "0.18.1",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.725996",
+ "created": "2020-04-02T19:23:53.880197",
+ "started": "2020-04-03T07:29:16.880044",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "container_id": "faf76193cbfe",
+ "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
+ "container_image_name": "docker.io/ceph/ceph:v15",
+ "daemon_id": "0",
+ "daemon_type": "osd",
+ "version": "15.2.0",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.726088",
+ "created": "2020-04-02T20:35:02.991435",
+ "started": "2020-04-03T07:29:19.373956",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "container_id": "f82505bae0f1",
+ "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
+ "container_image_name": "docker.io/ceph/ceph:v15",
+ "daemon_id": "1",
+ "daemon_type": "osd",
+ "version": "15.2.0",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.726134",
+ "created": "2020-04-02T20:35:17.142272",
+ "started": "2020-04-03T07:29:19.374002",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "container_id": "2708d84cd484",
+ "container_image_id": "358a0d2395fe711bb8258e8fb4b2d7865c0a9a6463969bcd1452ee8869ea6653",
+ "container_image_name": "docker.io/prom/prometheus:latest",
+ "daemon_id": "ceph-001",
+ "daemon_type": "prometheus",
+ "version": "2.17.1",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.726042",
+ "created": "2020-04-02T19:24:10.281163",
+ "started": "2020-04-03T07:29:16.926292",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "daemon_id": "default-rgw-realm.eu-central-1.1.ceph-001.ytywjo",
+ "daemon_type": "rgw",
+ "status": 1,
+ "status_desc": "starting",
+ "is_active": false
+ }
+]""")
+)
+def test_dd_octopus(dd_json):
+ # https://tracker.ceph.com/issues/44934
+ # Those are real user data from early octopus.
+ # Please do not modify those JSON values.
+
+ # Convert datetime properties to old style.
+ # 2020-04-03T07:29:16.926292Z -> 2020-04-03T07:29:16.926292
+ def convert_to_old_style_json(j):
+ for k in ['last_refresh', 'created', 'started', 'last_deployed',
+ 'last_configured']:
+ if k in j:
+ j[k] = j[k].rstrip('Z')
+ del j['daemon_name']
+ return j
+
+ assert dd_json == convert_to_old_style_json(
+ DaemonDescription.from_json(dd_json).to_json())
+
+
+@pytest.mark.parametrize("spec,dd,valid",
+[ # noqa: E128
+ # https://tracker.ceph.com/issues/44934
+ (
+ RGWSpec(
+ service_id="foo",
+ rgw_realm="default-rgw-realm",
+ rgw_zone="eu-central-1",
+ ),
+ DaemonDescription(
+ daemon_type='rgw',
+ daemon_id="foo.ceph-001.ytywjo",
+ hostname="ceph-001",
+ ),
+ True
+ ),
+ (
+ # no realm
+ RGWSpec(
+ service_id="foo.bar",
+ rgw_zone="eu-central-1",
+ ),
+ DaemonDescription(
+ daemon_type='rgw',
+ daemon_id="foo.bar.ceph-001.ytywjo",
+ hostname="ceph-001",
+ ),
+ True
+ ),
+ (
+ # no realm or zone
+ RGWSpec(
+ service_id="bar",
+ ),
+ DaemonDescription(
+ daemon_type='rgw',
+ daemon_id="bar.host.domain.tld.ytywjo",
+ hostname="host.domain.tld",
+ ),
+ True
+ ),
+ (
+ # explicit naming
+ RGWSpec(
+ service_id="realm.zone",
+ ),
+ DaemonDescription(
+ daemon_type='rgw',
+ daemon_id="realm.zone.a",
+ hostname="smithi028",
+ ),
+ True
+ ),
+ (
+ # without host
+ RGWSpec(
+ service_type='rgw',
+ service_id="foo",
+ ),
+ DaemonDescription(
+ daemon_type='rgw',
+ daemon_id="foo.hostname.ytywjo",
+ hostname=None,
+ ),
+ False
+ ),
+ (
+ # without host (2)
+ RGWSpec(
+ service_type='rgw',
+ service_id="default-rgw-realm.eu-central-1.1",
+ ),
+ DaemonDescription(
+ daemon_type='rgw',
+ daemon_id="default-rgw-realm.eu-central-1.1.hostname.ytywjo",
+ hostname=None,
+ ),
+ False
+ ),
+ (
+ # service_id contains hostname
+ # (sort of) https://tracker.ceph.com/issues/45294
+ RGWSpec(
+ service_id="default.rgw.realm.ceph.001",
+ ),
+ DaemonDescription(
+ daemon_type='rgw',
+ daemon_id="default.rgw.realm.ceph.001.ceph.001.ytywjo",
+ hostname="ceph.001",
+ ),
+ True
+ ),
+
+ # https://tracker.ceph.com/issues/45293
+ (
+ ServiceSpec(
+ service_type='mds',
+ service_id="a",
+ ),
+ DaemonDescription(
+ daemon_type='mds',
+ daemon_id="a.host1.abc123",
+ hostname="host1",
+ ),
+ True
+ ),
+ (
+ # '.' char in service_id
+ ServiceSpec(
+ service_type='mds',
+ service_id="a.b.c",
+ ),
+ DaemonDescription(
+ daemon_type='mds',
+ daemon_id="a.b.c.host1.abc123",
+ hostname="host1",
+ ),
+ True
+ ),
+
+ # https://tracker.ceph.com/issues/45617
+ (
+ # daemon_id does not contain hostname
+ ServiceSpec(
+ service_type='mds',
+ service_id="a",
+ ),
+ DaemonDescription(
+ daemon_type='mds',
+ daemon_id="a",
+ hostname="host1",
+ ),
+ True
+ ),
+ (
+ # daemon_id only contains hostname
+ ServiceSpec(
+ service_type='mds',
+ service_id="host1",
+ ),
+ DaemonDescription(
+ daemon_type='mds',
+ daemon_id="host1",
+ hostname="host1",
+ ),
+ True
+ ),
+
+ # https://tracker.ceph.com/issues/45399
+ (
+ # daemon_id only contains hostname
+ ServiceSpec(
+ service_type='mds',
+ service_id="a",
+ ),
+ DaemonDescription(
+ daemon_type='mds',
+ daemon_id="a.host1.abc123",
+ hostname="host1.site",
+ ),
+ True
+ ),
+ (
+ NFSServiceSpec(
+ service_id="a",
+ ),
+ DaemonDescription(
+ daemon_type='nfs',
+ daemon_id="a.host1",
+ hostname="host1.site",
+ ),
+ True
+ ),
+
+ # https://tracker.ceph.com/issues/45293
+ (
+ NFSServiceSpec(
+ service_id="a",
+ ),
+ DaemonDescription(
+ daemon_type='nfs',
+ daemon_id="a.host1",
+ hostname="host1",
+ ),
+ True
+ ),
+ (
+ # service_id contains a '.' char
+ NFSServiceSpec(
+ service_id="a.b.c",
+ ),
+ DaemonDescription(
+ daemon_type='nfs',
+ daemon_id="a.b.c.host1",
+ hostname="host1",
+ ),
+ True
+ ),
+ (
+ # trailing chars after hostname
+ NFSServiceSpec(
+ service_id="a.b.c",
+ ),
+ DaemonDescription(
+ daemon_type='nfs',
+ daemon_id="a.b.c.host1.abc123",
+ hostname="host1",
+ ),
+ True
+ ),
+ (
+ # chars after hostname without '.'
+ NFSServiceSpec(
+ service_id="a",
+ ),
+ DaemonDescription(
+ daemon_type='nfs',
+ daemon_id="a.host1abc123",
+ hostname="host1",
+ ),
+ False
+ ),
+ (
+ # chars before hostname without '.'
+ NFSServiceSpec(
+ service_id="a",
+ ),
+ DaemonDescription(
+ daemon_type='nfs',
+ daemon_id="ahost1.abc123",
+ hostname="host1",
+ ),
+ False
+ ),
+
+ # https://tracker.ceph.com/issues/45293
+ (
+ IscsiServiceSpec(
+ service_type='iscsi',
+ service_id="a",
+ ),
+ DaemonDescription(
+ daemon_type='iscsi',
+ daemon_id="a.host1.abc123",
+ hostname="host1",
+ ),
+ True
+ ),
+ (
+ # '.' char in service_id
+ IscsiServiceSpec(
+ service_type='iscsi',
+ service_id="a.b.c",
+ ),
+ DaemonDescription(
+ daemon_type='iscsi',
+ daemon_id="a.b.c.host1.abc123",
+ hostname="host1",
+ ),
+ True
+ ),
+ (
+ # fixed daemon id for teuthology.
+ IscsiServiceSpec(
+ service_type='iscsi',
+ service_id='iscsi',
+ ),
+ DaemonDescription(
+ daemon_type='iscsi',
+ daemon_id="iscsi.a",
+ hostname="host1",
+ ),
+ True
+ ),
+
+ (
+ CustomContainerSpec(
+ service_type='container',
+ service_id='hello-world',
+ image='docker.io/library/hello-world:latest',
+ ),
+ DaemonDescription(
+ daemon_type='container',
+ daemon_id='hello-world.mgr0',
+ hostname='mgr0',
+ ),
+ True
+ ),
+
+])
+def test_daemon_description_service_name(spec: ServiceSpec,
+ dd: DaemonDescription,
+ valid: bool):
+ if valid:
+ assert spec.service_name() == dd.service_name()
+ else:
+ with pytest.raises(OrchestratorError):
+ dd.service_name()
diff --git a/src/pybind/mgr/cephadm/tests/test_ssh.py b/src/pybind/mgr/cephadm/tests/test_ssh.py
new file mode 100644
index 000000000..29f01b6c7
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_ssh.py
@@ -0,0 +1,105 @@
+import asyncssh
+from asyncssh.process import SSHCompletedProcess
+from unittest import mock
+try:
+ # AsyncMock was not added until python 3.8
+ from unittest.mock import AsyncMock
+except ImportError:
+ from asyncmock import AsyncMock
+except ImportError:
+ AsyncMock = None
+import pytest
+
+
+try:
+ from asyncssh.misc import ConnectionLost
+except ImportError:
+ ConnectionLost = None
+
+from ceph.deployment.hostspec import HostSpec
+
+from cephadm import CephadmOrchestrator
+from cephadm.serve import CephadmServe
+from cephadm.tests.fixtures import with_host, wait, async_side_effect
+from orchestrator import OrchestratorError
+
+
+@pytest.mark.skipif(ConnectionLost is None, reason='no asyncssh')
+class TestWithSSH:
+ @mock.patch("cephadm.ssh.SSHManager._execute_command")
+ @mock.patch("cephadm.ssh.SSHManager._check_execute_command")
+ def test_offline(self, check_execute_command, execute_command, cephadm_module):
+ check_execute_command.side_effect = async_side_effect('')
+ execute_command.side_effect = async_side_effect(('', '', 0))
+
+ if not AsyncMock:
+ # can't run this test if we could not import AsyncMock
+ return
+ mock_connect = AsyncMock(return_value='')
+ with mock.patch("asyncssh.connect", new=mock_connect) as asyncssh_connect:
+ with with_host(cephadm_module, 'test'):
+ asyncssh_connect.side_effect = ConnectionLost('reason')
+ code, out, err = cephadm_module.check_host('test')
+ assert out == ''
+ assert "Failed to connect to test at address (1::4)" in err
+
+ out = wait(cephadm_module, cephadm_module.get_hosts())[0].to_json()
+ assert out == HostSpec('test', '1::4', status='Offline').to_json()
+
+ asyncssh_connect.return_value = mock.MagicMock()
+ asyncssh_connect.side_effect = None
+ assert CephadmServe(cephadm_module)._check_host('test') is None
+ out = wait(cephadm_module, cephadm_module.get_hosts())[0].to_json()
+ assert out == HostSpec('test', '1::4').to_json()
+
+ def test_ssh_remote_cmds_execution(self, cephadm_module):
+
+ if not AsyncMock:
+ # can't run this test if we could not import AsyncMock
+ return
+
+ class FakeConn:
+ def __init__(self, exception=None, returncode=0):
+ self.exception = exception
+ self.returncode = returncode
+
+ async def run(self, *args, **kwargs):
+ if self.exception:
+ raise self.exception
+ else:
+ return SSHCompletedProcess(returncode=self.returncode, stdout="", stderr="")
+
+ async def close(self):
+ pass
+
+ def run_test(host, conn, expected_error):
+ mock_connect = AsyncMock(return_value=conn)
+ with pytest.raises(OrchestratorError, match=expected_error):
+ with mock.patch("asyncssh.connect", new=mock_connect):
+ with with_host(cephadm_module, host):
+ CephadmServe(cephadm_module)._check_host(host)
+
+ # Test case 1: command failure
+ run_test('test1', FakeConn(returncode=1), "Command .+ failed")
+
+ # Test case 2: connection error
+ run_test('test2', FakeConn(exception=asyncssh.ChannelOpenError(1, "", "")), "Unable to reach remote host test2.")
+
+ # Test case 3: asyncssh ProcessError
+ stderr = "my-process-stderr"
+ run_test('test3', FakeConn(exception=asyncssh.ProcessError(returncode=3,
+ env="",
+ command="",
+ subsystem="",
+ exit_status="",
+ exit_signal="",
+ stderr=stderr,
+ stdout="")), f"Cannot execute the command.+{stderr}")
+ # Test case 4: generic error
+ run_test('test4', FakeConn(exception=Exception), "Generic error while executing command.+")
+
+
+@pytest.mark.skipif(ConnectionLost is not None, reason='asyncssh')
+class TestWithoutSSH:
+ def test_can_run(self, cephadm_module: CephadmOrchestrator):
+ assert cephadm_module.can_run() == (False, "loading asyncssh library:No module named 'asyncssh'")
diff --git a/src/pybind/mgr/cephadm/tests/test_template.py b/src/pybind/mgr/cephadm/tests/test_template.py
new file mode 100644
index 000000000..f67304348
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_template.py
@@ -0,0 +1,33 @@
+import pathlib
+
+import pytest
+
+from cephadm.template import TemplateMgr, UndefinedError, TemplateNotFoundError
+
+
+def test_render(cephadm_module, fs):
+ template_base = (pathlib.Path(__file__).parent / '../templates').resolve()
+ fake_template = template_base / 'foo/bar'
+ fs.create_file(fake_template, contents='{{ cephadm_managed }}{{ var }}')
+
+ template_mgr = TemplateMgr(cephadm_module)
+ value = 'test'
+
+ # with base context
+ expected_text = '{}{}'.format(template_mgr.base_context['cephadm_managed'], value)
+ assert template_mgr.render('foo/bar', {'var': value}) == expected_text
+
+ # without base context
+ with pytest.raises(UndefinedError):
+ template_mgr.render('foo/bar', {'var': value}, managed_context=False)
+
+ # override the base context
+ context = {
+ 'cephadm_managed': 'abc',
+ 'var': value
+ }
+ assert template_mgr.render('foo/bar', context) == 'abc{}'.format(value)
+
+ # template not found
+ with pytest.raises(TemplateNotFoundError):
+ template_mgr.render('foo/bar/2', {})
diff --git a/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py b/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py
new file mode 100644
index 000000000..66feaee31
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py
@@ -0,0 +1,256 @@
+import pytest
+import json
+from tests import mock
+from cephadm.tuned_profiles import TunedProfileUtils, SYSCTL_DIR
+from cephadm.inventory import TunedProfileStore
+from ceph.utils import datetime_now
+from ceph.deployment.service_spec import TunedProfileSpec, PlacementSpec
+from cephadm.ssh import SSHManager
+from orchestrator import HostSpec
+
+from typing import List, Dict
+
+
+class SaveError(Exception):
+ pass
+
+
+class FakeCache:
+ def __init__(self,
+ hosts,
+ schedulable_hosts,
+ unreachable_hosts):
+ self.hosts = hosts
+ self.unreachable_hosts = [HostSpec(h) for h in unreachable_hosts]
+ self.schedulable_hosts = [HostSpec(h) for h in schedulable_hosts]
+ self.last_tuned_profile_update = {}
+
+ def get_hosts(self):
+ return self.hosts
+
+ def get_schedulable_hosts(self):
+ return self.schedulable_hosts
+
+ def get_unreachable_hosts(self):
+ return self.unreachable_hosts
+
+ def get_draining_hosts(self):
+ return []
+
+ def is_host_unreachable(self, hostname: str):
+ return hostname in [h.hostname for h in self.get_unreachable_hosts()]
+
+ def is_host_schedulable(self, hostname: str):
+ return hostname in [h.hostname for h in self.get_schedulable_hosts()]
+
+ def is_host_draining(self, hostname: str):
+ return hostname in [h.hostname for h in self.get_draining_hosts()]
+
+ @property
+ def networks(self):
+ return {h: {'a': {'b': ['c']}} for h in self.hosts}
+
+ def host_needs_tuned_profile_update(self, host, profile_name):
+ return profile_name == 'p2'
+
+
+class FakeMgr:
+ def __init__(self,
+ hosts: List[str],
+ schedulable_hosts: List[str],
+ unreachable_hosts: List[str],
+ profiles: Dict[str, TunedProfileSpec]):
+ self.cache = FakeCache(hosts, schedulable_hosts, unreachable_hosts)
+ self.tuned_profiles = TunedProfileStore(self)
+ self.tuned_profiles.profiles = profiles
+ self.ssh = SSHManager(self)
+ self.offline_hosts = []
+ self.log_refresh_metadata = False
+
+ def set_store(self, what: str, value: str):
+ raise SaveError(f'{what}: {value}')
+
+ def get_store(self, what: str):
+ if what == 'tuned_profiles':
+ return json.dumps({'x': TunedProfileSpec('x',
+ PlacementSpec(hosts=['x']),
+ {'x': 'x'}).to_json(),
+ 'y': TunedProfileSpec('y',
+ PlacementSpec(hosts=['y']),
+ {'y': 'y'}).to_json()})
+ return ''
+
+
+class TestTunedProfiles:
+ tspec1 = TunedProfileSpec('p1',
+ PlacementSpec(hosts=['a', 'b', 'c']),
+ {'setting1': 'value1',
+ 'setting2': 'value2',
+ 'setting with space': 'value with space'})
+ tspec2 = TunedProfileSpec('p2',
+ PlacementSpec(hosts=['a', 'c']),
+ {'something': 'something_else',
+ 'high': '5'})
+ tspec3 = TunedProfileSpec('p3',
+ PlacementSpec(hosts=['c']),
+ {'wow': 'wow2',
+ 'setting with space': 'value with space',
+ 'down': 'low'})
+
+ def profiles_to_calls(self, tp: TunedProfileUtils, profiles: List[TunedProfileSpec]) -> List[Dict[str, str]]:
+ # this function takes a list of tuned profiles and returns a mapping from
+ # profile names to the string that will be written to the actual config file on the host.
+ res = []
+ for p in profiles:
+ p_str = tp._profile_to_str(p)
+ res.append({p.profile_name: p_str})
+ return res
+
+ @mock.patch("cephadm.tuned_profiles.TunedProfileUtils._remove_stray_tuned_profiles")
+ @mock.patch("cephadm.tuned_profiles.TunedProfileUtils._write_tuned_profiles")
+ def test_write_all_tuned_profiles(self, _write_profiles, _rm_profiles):
+ profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3}
+ mgr = FakeMgr(['a', 'b', 'c'],
+ ['a', 'b', 'c'],
+ [],
+ profiles)
+ tp = TunedProfileUtils(mgr)
+ tp._write_all_tuned_profiles()
+ # need to check that _write_tuned_profiles is correctly called with the
+ # profiles that match the tuned profile placements and with the correct
+ # strings that should be generated from the settings the profiles have.
+ # the _profiles_to_calls helper allows us to generated the input we
+ # should check against
+ calls = [
+ mock.call('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2])),
+ mock.call('b', self.profiles_to_calls(tp, [self.tspec1])),
+ mock.call('c', self.profiles_to_calls(tp, [self.tspec1, self.tspec2, self.tspec3]))
+ ]
+ _write_profiles.assert_has_calls(calls, any_order=True)
+
+ @mock.patch('cephadm.ssh.SSHManager.check_execute_command')
+ def test_rm_stray_tuned_profiles(self, _check_execute_command):
+ profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3}
+ # for this test, going to use host "a" and put 4 cephadm generated
+ # profiles "p1" "p2", "p3" and "who" only two of which should be there ("p1", "p2")
+ # as well as a file not generated by cephadm. Only the "p3" and "who"
+ # profiles should be removed from the host. This should total to 4
+ # calls to check_execute_command, 1 "ls", 2 "rm", and 1 "sysctl --system"
+ _check_execute_command.return_value = '\n'.join(['p1-cephadm-tuned-profile.conf',
+ 'p2-cephadm-tuned-profile.conf',
+ 'p3-cephadm-tuned-profile.conf',
+ 'who-cephadm-tuned-profile.conf',
+ 'dont-touch-me'])
+ mgr = FakeMgr(['a', 'b', 'c'],
+ ['a', 'b', 'c'],
+ [],
+ profiles)
+ tp = TunedProfileUtils(mgr)
+ tp._remove_stray_tuned_profiles('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2]))
+ calls = [
+ mock.call('a', ['ls', SYSCTL_DIR], log_command=False),
+ mock.call('a', ['rm', '-f', f'{SYSCTL_DIR}/p3-cephadm-tuned-profile.conf']),
+ mock.call('a', ['rm', '-f', f'{SYSCTL_DIR}/who-cephadm-tuned-profile.conf']),
+ mock.call('a', ['sysctl', '--system'])
+ ]
+ _check_execute_command.assert_has_calls(calls, any_order=True)
+
+ @mock.patch('cephadm.ssh.SSHManager.check_execute_command')
+ @mock.patch('cephadm.ssh.SSHManager.write_remote_file')
+ def test_write_tuned_profiles(self, _write_remote_file, _check_execute_command):
+ profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3}
+ # for this test we will use host "a" and have it so host_needs_tuned_profile_update
+ # returns True for p2 and False for p1 (see FakeCache class). So we should see
+ # 2 ssh calls, one to write p2, one to run sysctl --system
+ _check_execute_command.return_value = 'success'
+ _write_remote_file.return_value = 'success'
+ mgr = FakeMgr(['a', 'b', 'c'],
+ ['a', 'b', 'c'],
+ [],
+ profiles)
+ tp = TunedProfileUtils(mgr)
+ tp._write_tuned_profiles('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2]))
+ _check_execute_command.assert_called_with('a', ['sysctl', '--system'])
+ _write_remote_file.assert_called_with(
+ 'a', f'{SYSCTL_DIR}/p2-cephadm-tuned-profile.conf', tp._profile_to_str(self.tspec2).encode('utf-8'))
+
+ def test_dont_write_to_unreachable_hosts(self):
+ profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3}
+
+ # list host "a" and "b" as hosts that exist, "a" will be
+ # a normal, schedulable host and "b" is considered unreachable
+ mgr = FakeMgr(['a', 'b'],
+ ['a'],
+ ['b'],
+ profiles)
+ tp = TunedProfileUtils(mgr)
+
+ assert 'a' not in tp.mgr.cache.last_tuned_profile_update
+ assert 'b' not in tp.mgr.cache.last_tuned_profile_update
+
+ # with an online host, should proceed as normal. Providing
+ # no actual profiles here though so the only actual action taken
+ # is updating the entry in the last_tuned_profile_update dict
+ tp._write_tuned_profiles('a', {})
+ assert 'a' in tp.mgr.cache.last_tuned_profile_update
+
+ # trying to write to an unreachable host should be a no-op
+ # and return immediately. No entry for 'b' should be added
+ # to the last_tuned_profile_update dict
+ tp._write_tuned_profiles('b', {})
+ assert 'b' not in tp.mgr.cache.last_tuned_profile_update
+
+ def test_store(self):
+ mgr = FakeMgr(['a', 'b', 'c'],
+ ['a', 'b', 'c'],
+ [],
+ {})
+ tps = TunedProfileStore(mgr)
+ save_str_p1 = 'tuned_profiles: ' + json.dumps({'p1': self.tspec1.to_json()})
+ tspec1_updated = self.tspec1.copy()
+ tspec1_updated.settings.update({'new-setting': 'new-value'})
+ save_str_p1_updated = 'tuned_profiles: ' + json.dumps({'p1': tspec1_updated.to_json()})
+ save_str_p1_updated_p2 = 'tuned_profiles: ' + \
+ json.dumps({'p1': tspec1_updated.to_json(), 'p2': self.tspec2.to_json()})
+ tspec2_updated = self.tspec2.copy()
+ tspec2_updated.settings.pop('something')
+ save_str_p1_updated_p2_updated = 'tuned_profiles: ' + \
+ json.dumps({'p1': tspec1_updated.to_json(), 'p2': tspec2_updated.to_json()})
+ save_str_p2_updated = 'tuned_profiles: ' + json.dumps({'p2': tspec2_updated.to_json()})
+ with pytest.raises(SaveError) as e:
+ tps.add_profile(self.tspec1)
+ assert str(e.value) == save_str_p1
+ assert 'p1' in tps
+ with pytest.raises(SaveError) as e:
+ tps.add_setting('p1', 'new-setting', 'new-value')
+ assert str(e.value) == save_str_p1_updated
+ assert 'new-setting' in tps.list_profiles()[0].settings
+ with pytest.raises(SaveError) as e:
+ tps.add_profile(self.tspec2)
+ assert str(e.value) == save_str_p1_updated_p2
+ assert 'p2' in tps
+ assert 'something' in tps.list_profiles()[1].settings
+ with pytest.raises(SaveError) as e:
+ tps.rm_setting('p2', 'something')
+ assert 'something' not in tps.list_profiles()[1].settings
+ assert str(e.value) == save_str_p1_updated_p2_updated
+ with pytest.raises(SaveError) as e:
+ tps.rm_profile('p1')
+ assert str(e.value) == save_str_p2_updated
+ assert 'p1' not in tps
+ assert 'p2' in tps
+ assert len(tps.list_profiles()) == 1
+ assert tps.list_profiles()[0].profile_name == 'p2'
+
+ cur_last_updated = tps.last_updated('p2')
+ new_last_updated = datetime_now()
+ assert cur_last_updated != new_last_updated
+ tps.set_last_updated('p2', new_last_updated)
+ assert tps.last_updated('p2') == new_last_updated
+
+ # check FakeMgr get_store func to see what is expected to be found in Key Store here
+ tps.load()
+ assert 'x' in tps
+ assert 'y' in tps
+ assert [p for p in tps.list_profiles() if p.profile_name == 'x'][0].settings == {'x': 'x'}
+ assert [p for p in tps.list_profiles() if p.profile_name == 'y'][0].settings == {'y': 'y'}
diff --git a/src/pybind/mgr/cephadm/tests/test_upgrade.py b/src/pybind/mgr/cephadm/tests/test_upgrade.py
new file mode 100644
index 000000000..3b5c305b5
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_upgrade.py
@@ -0,0 +1,481 @@
+import json
+from unittest import mock
+
+import pytest
+
+from ceph.deployment.service_spec import PlacementSpec, ServiceSpec
+from cephadm import CephadmOrchestrator
+from cephadm.upgrade import CephadmUpgrade, UpgradeState
+from cephadm.ssh import HostConnectionError
+from cephadm.utils import ContainerInspectInfo
+from orchestrator import OrchestratorError, DaemonDescription
+from .fixtures import _run_cephadm, wait, with_host, with_service, \
+ receive_agent_metadata, async_side_effect
+
+from typing import List, Tuple, Optional
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+def test_upgrade_start(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'test'):
+ with with_host(cephadm_module, 'test2'):
+ with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)), status_running=True):
+ assert wait(cephadm_module, cephadm_module.upgrade_start(
+ 'image_id', None)) == 'Initiating upgrade to image_id'
+
+ assert wait(cephadm_module, cephadm_module.upgrade_status()
+ ).target_image == 'image_id'
+
+ assert wait(cephadm_module, cephadm_module.upgrade_pause()
+ ) == 'Paused upgrade to image_id'
+
+ assert wait(cephadm_module, cephadm_module.upgrade_resume()
+ ) == 'Resumed upgrade to image_id'
+
+ assert wait(cephadm_module, cephadm_module.upgrade_stop()
+ ) == 'Stopped upgrade to image_id'
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+def test_upgrade_start_offline_hosts(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'test'):
+ with with_host(cephadm_module, 'test2'):
+ cephadm_module.offline_hosts = set(['test2'])
+ with pytest.raises(OrchestratorError, match=r"Upgrade aborted - Some host\(s\) are currently offline: {'test2'}"):
+ cephadm_module.upgrade_start('image_id', None)
+ cephadm_module.offline_hosts = set([]) # so remove_host doesn't fail when leaving the with_host block
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+def test_upgrade_daemons_offline_hosts(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'test'):
+ with with_host(cephadm_module, 'test2'):
+ cephadm_module.upgrade.upgrade_state = UpgradeState('target_image', 0)
+ with mock.patch("cephadm.serve.CephadmServe._run_cephadm", side_effect=HostConnectionError('connection failure reason', 'test2', '192.168.122.1')):
+ _to_upgrade = [(DaemonDescription(daemon_type='crash', daemon_id='test2', hostname='test2'), True)]
+ with pytest.raises(HostConnectionError, match=r"connection failure reason"):
+ cephadm_module.upgrade._upgrade_daemons(_to_upgrade, 'target_image', ['digest1'])
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+def test_do_upgrade_offline_hosts(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'test'):
+ with with_host(cephadm_module, 'test2'):
+ cephadm_module.upgrade.upgrade_state = UpgradeState('target_image', 0)
+ cephadm_module.offline_hosts = set(['test2'])
+ with pytest.raises(HostConnectionError, match=r"Host\(s\) were marked offline: {'test2'}"):
+ cephadm_module.upgrade._do_upgrade()
+ cephadm_module.offline_hosts = set([]) # so remove_host doesn't fail when leaving the with_host block
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+@mock.patch("cephadm.module.CephadmOrchestrator.remove_health_warning")
+def test_upgrade_resume_clear_health_warnings(_rm_health_warning, cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'test'):
+ with with_host(cephadm_module, 'test2'):
+ cephadm_module.upgrade.upgrade_state = UpgradeState('target_image', 0, paused=True)
+ _rm_health_warning.return_value = None
+ assert wait(cephadm_module, cephadm_module.upgrade_resume()
+ ) == 'Resumed upgrade to target_image'
+ calls_list = [mock.call(alert_id) for alert_id in cephadm_module.upgrade.UPGRADE_ERRORS]
+ _rm_health_warning.assert_has_calls(calls_list, any_order=True)
+
+
+@mock.patch('cephadm.upgrade.CephadmUpgrade._get_current_version', lambda _: (17, 2, 6))
+@mock.patch("cephadm.serve.CephadmServe._get_container_image_info")
+def test_upgrade_check_with_ceph_version(_get_img_info, cephadm_module: CephadmOrchestrator):
+ # This test was added to avoid screwing up the image base so that
+ # when the version was added to it it made an incorrect image
+ # The issue caused the image to come out as
+ # quay.io/ceph/ceph:v18:v18.2.0
+ # see https://tracker.ceph.com/issues/63150
+ _img = ''
+
+ def _fake_get_img_info(img_name):
+ nonlocal _img
+ _img = img_name
+ return ContainerInspectInfo(
+ 'image_id',
+ '18.2.0',
+ 'digest'
+ )
+
+ _get_img_info.side_effect = _fake_get_img_info
+ cephadm_module.upgrade_check('', '18.2.0')
+ assert _img == 'quay.io/ceph/ceph:v18.2.0'
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+@pytest.mark.parametrize("use_repo_digest",
+ [
+ False,
+ True
+ ])
+def test_upgrade_run(use_repo_digest, cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1'):
+ with with_host(cephadm_module, 'host2'):
+ cephadm_module.set_container_image('global', 'from_image')
+ cephadm_module.use_repo_digest = use_repo_digest
+ with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*', count=2)),
+ CephadmOrchestrator.apply_mgr, '', status_running=True), \
+ mock.patch("cephadm.module.CephadmOrchestrator.lookup_release_name",
+ return_value='foo'), \
+ mock.patch("cephadm.module.CephadmOrchestrator.version",
+ new_callable=mock.PropertyMock) as version_mock, \
+ mock.patch("cephadm.module.CephadmOrchestrator.get",
+ return_value={
+ # capture fields in both mon and osd maps
+ "require_osd_release": "pacific",
+ "min_mon_release": 16,
+ }):
+ version_mock.return_value = 'ceph version 18.2.1 (somehash)'
+ assert wait(cephadm_module, cephadm_module.upgrade_start(
+ 'to_image', None)) == 'Initiating upgrade to to_image'
+
+ assert wait(cephadm_module, cephadm_module.upgrade_status()
+ ).target_image == 'to_image'
+
+ def _versions_mock(cmd):
+ return json.dumps({
+ 'mgr': {
+ 'ceph version 1.2.3 (asdf) blah': 1
+ }
+ })
+
+ cephadm_module._mon_command_mock_versions = _versions_mock
+
+ with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({
+ 'image_id': 'image_id',
+ 'repo_digests': ['to_image@repo_digest'],
+ 'ceph_version': 'ceph version 18.2.3 (hash)',
+ }))):
+
+ cephadm_module.upgrade._do_upgrade()
+
+ assert cephadm_module.upgrade_status is not None
+
+ with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(
+ json.dumps([
+ dict(
+ name=list(cephadm_module.cache.daemons['host1'].keys())[0],
+ style='cephadm',
+ fsid='fsid',
+ container_id='container_id',
+ container_image_name='to_image',
+ container_image_id='image_id',
+ container_image_digests=['to_image@repo_digest'],
+ deployed_by=['to_image@repo_digest'],
+ version='version',
+ state='running',
+ )
+ ])
+ )):
+ receive_agent_metadata(cephadm_module, 'host1', ['ls'])
+ receive_agent_metadata(cephadm_module, 'host2', ['ls'])
+
+ with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({
+ 'image_id': 'image_id',
+ 'repo_digests': ['to_image@repo_digest'],
+ 'ceph_version': 'ceph version 18.2.3 (hash)',
+ }))):
+ cephadm_module.upgrade._do_upgrade()
+
+ _, image, _ = cephadm_module.check_mon_command({
+ 'prefix': 'config get',
+ 'who': 'global',
+ 'key': 'container_image',
+ })
+ if use_repo_digest:
+ assert image == 'to_image@repo_digest'
+ else:
+ assert image == 'to_image'
+
+
+def test_upgrade_state_null(cephadm_module: CephadmOrchestrator):
+ # This test validates https://tracker.ceph.com/issues/47580
+ cephadm_module.set_store('upgrade_state', 'null')
+ CephadmUpgrade(cephadm_module)
+ assert CephadmUpgrade(cephadm_module).upgrade_state is None
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+def test_not_enough_mgrs(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1'):
+ with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=1)), CephadmOrchestrator.apply_mgr, ''):
+ with pytest.raises(OrchestratorError):
+ wait(cephadm_module, cephadm_module.upgrade_start('image_id', None))
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+@mock.patch("cephadm.CephadmOrchestrator.check_mon_command")
+def test_enough_mons_for_ok_to_stop(check_mon_command, cephadm_module: CephadmOrchestrator):
+ # only 2 monitors, not enough for ok-to-stop to ever pass
+ check_mon_command.return_value = (
+ 0, '{"monmap": {"mons": [{"name": "mon.1"}, {"name": "mon.2"}]}}', '')
+ assert not cephadm_module.upgrade._enough_mons_for_ok_to_stop()
+
+ # 3 monitors, ok-to-stop should work fine
+ check_mon_command.return_value = (
+ 0, '{"monmap": {"mons": [{"name": "mon.1"}, {"name": "mon.2"}, {"name": "mon.3"}]}}', '')
+ assert cephadm_module.upgrade._enough_mons_for_ok_to_stop()
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+@mock.patch("cephadm.module.HostCache.get_daemons_by_service")
+@mock.patch("cephadm.CephadmOrchestrator.get")
+def test_enough_mds_for_ok_to_stop(get, get_daemons_by_service, cephadm_module: CephadmOrchestrator):
+ get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'test', 'max_mds': 1}}]}]
+ get_daemons_by_service.side_effect = [[DaemonDescription()]]
+ assert not cephadm_module.upgrade._enough_mds_for_ok_to_stop(
+ DaemonDescription(daemon_type='mds', daemon_id='test.host1.gfknd', service_name='mds.test'))
+
+ get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'myfs.test', 'max_mds': 2}}]}]
+ get_daemons_by_service.side_effect = [[DaemonDescription(), DaemonDescription()]]
+ assert not cephadm_module.upgrade._enough_mds_for_ok_to_stop(
+ DaemonDescription(daemon_type='mds', daemon_id='myfs.test.host1.gfknd', service_name='mds.myfs.test'))
+
+ get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'myfs.test', 'max_mds': 1}}]}]
+ get_daemons_by_service.side_effect = [[DaemonDescription(), DaemonDescription()]]
+ assert cephadm_module.upgrade._enough_mds_for_ok_to_stop(
+ DaemonDescription(daemon_type='mds', daemon_id='myfs.test.host1.gfknd', service_name='mds.myfs.test'))
+
+
+@pytest.mark.parametrize("current_version, use_tags, show_all_versions, tags, result",
+ [
+ # several candidate versions (from different major versions)
+ (
+ (16, 1, '16.1.0'),
+ False, # use_tags
+ False, # show_all_versions
+ [
+ 'v17.1.0',
+ 'v16.2.7',
+ 'v16.2.6',
+ 'v16.2.5',
+ 'v16.1.4',
+ 'v16.1.3',
+ 'v15.2.0',
+ ],
+ ['17.1.0', '16.2.7', '16.2.6', '16.2.5', '16.1.4', '16.1.3']
+ ),
+ # candidate minor versions are available
+ (
+ (16, 1, '16.1.0'),
+ False, # use_tags
+ False, # show_all_versions
+ [
+ 'v16.2.2',
+ 'v16.2.1',
+ 'v16.1.6',
+ ],
+ ['16.2.2', '16.2.1', '16.1.6']
+ ),
+ # all versions are less than the current version
+ (
+ (17, 2, '17.2.0'),
+ False, # use_tags
+ False, # show_all_versions
+ [
+ 'v17.1.0',
+ 'v16.2.7',
+ 'v16.2.6',
+ ],
+ []
+ ),
+ # show all versions (regardless of the current version)
+ (
+ (16, 1, '16.1.0'),
+ False, # use_tags
+ True, # show_all_versions
+ [
+ 'v17.1.0',
+ 'v16.2.7',
+ 'v16.2.6',
+ 'v15.1.0',
+ 'v14.2.0',
+ ],
+ ['17.1.0', '16.2.7', '16.2.6', '15.1.0', '14.2.0']
+ ),
+ # show all tags (regardless of the current version and show_all_versions flag)
+ (
+ (16, 1, '16.1.0'),
+ True, # use_tags
+ False, # show_all_versions
+ [
+ 'v17.1.0',
+ 'v16.2.7',
+ 'v16.2.6',
+ 'v16.2.5',
+ 'v16.1.4',
+ 'v16.1.3',
+ 'v15.2.0',
+ ],
+ ['v15.2.0', 'v16.1.3', 'v16.1.4', 'v16.2.5',
+ 'v16.2.6', 'v16.2.7', 'v17.1.0']
+ ),
+ ])
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+def test_upgrade_ls(current_version, use_tags, show_all_versions, tags, result, cephadm_module: CephadmOrchestrator):
+ with mock.patch('cephadm.upgrade.Registry.get_tags', return_value=tags):
+ with mock.patch('cephadm.upgrade.CephadmUpgrade._get_current_version', return_value=current_version):
+ out = cephadm_module.upgrade.upgrade_ls(None, use_tags, show_all_versions)
+ if use_tags:
+ assert out['tags'] == result
+ else:
+ assert out['versions'] == result
+
+
+@pytest.mark.parametrize(
+ "upgraded, not_upgraded, daemon_types, hosts, services, should_block",
+ # [ ([(type, host, id), ... ], [...], [daemon types], [hosts], [services], True/False), ... ]
+ [
+ ( # valid, upgrade mgr daemons
+ [],
+ [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')],
+ ['mgr'],
+ None,
+ None,
+ False
+ ),
+ ( # invalid, can't upgrade mons until mgr is upgraded
+ [],
+ [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')],
+ ['mon'],
+ None,
+ None,
+ True
+ ),
+ ( # invalid, can't upgrade mon service until all mgr daemons are upgraded
+ [],
+ [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')],
+ None,
+ None,
+ ['mon'],
+ True
+ ),
+ ( # valid, upgrade mgr service
+ [],
+ [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')],
+ None,
+ None,
+ ['mgr'],
+ False
+ ),
+ ( # valid, mgr is already upgraded so can upgrade mons
+ [('mgr', 'a', 'a.x')],
+ [('mon', 'a', 'a')],
+ ['mon'],
+ None,
+ None,
+ False
+ ),
+ ( # invalid, can't upgrade all daemons on b b/c un-upgraded mgr on a
+ [],
+ [('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+ None,
+ ['a'],
+ None,
+ True
+ ),
+ ( # valid, only daemon on b is a mgr
+ [],
+ [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+ None,
+ ['b'],
+ None,
+ False
+ ),
+ ( # invalid, can't upgrade mon on a while mgr on b is un-upgraded
+ [],
+ [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+ None,
+ ['a'],
+ None,
+ True
+ ),
+ ( # valid, only upgrading the mgr on a
+ [],
+ [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+ ['mgr'],
+ ['a'],
+ None,
+ False
+ ),
+ ( # valid, mgr daemon not on b are upgraded
+ [('mgr', 'a', 'a.x')],
+ [('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+ None,
+ ['b'],
+ None,
+ False
+ ),
+ ( # valid, all the necessary hosts are covered, mgr on c is already upgraded
+ [('mgr', 'c', 'c.z')],
+ [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a'), ('osd', 'c', '0')],
+ None,
+ ['a', 'b'],
+ None,
+ False
+ ),
+ ( # invalid, can't upgrade mon on a while mgr on b is un-upgraded
+ [],
+ [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+ ['mgr', 'mon'],
+ ['a'],
+ None,
+ True
+ ),
+ ( # valid, only mon not on "b" is upgraded already. Case hit while making teuthology test
+ [('mon', 'a', 'a')],
+ [('mon', 'b', 'x'), ('mon', 'b', 'y'), ('osd', 'a', '1'), ('osd', 'b', '2')],
+ ['mon', 'osd'],
+ ['b'],
+ None,
+ False
+ ),
+ ]
+)
+@mock.patch("cephadm.module.HostCache.get_daemons")
+@mock.patch("cephadm.serve.CephadmServe._get_container_image_info")
+@mock.patch('cephadm.module.SpecStore.__getitem__')
+def test_staggered_upgrade_validation(
+ get_spec,
+ get_image_info,
+ get_daemons,
+ upgraded: List[Tuple[str, str, str]],
+ not_upgraded: List[Tuple[str, str, str, str]],
+ daemon_types: Optional[str],
+ hosts: Optional[str],
+ services: Optional[str],
+ should_block: bool,
+ cephadm_module: CephadmOrchestrator,
+):
+ def to_dds(ts: List[Tuple[str, str]], upgraded: bool) -> List[DaemonDescription]:
+ dds = []
+ digest = 'new_image@repo_digest' if upgraded else 'old_image@repo_digest'
+ for t in ts:
+ dds.append(DaemonDescription(daemon_type=t[0],
+ hostname=t[1],
+ daemon_id=t[2],
+ container_image_digests=[digest],
+ deployed_by=[digest],))
+ return dds
+ get_daemons.return_value = to_dds(upgraded, True) + to_dds(not_upgraded, False)
+ get_image_info.side_effect = async_side_effect(
+ ('new_id', 'ceph version 99.99.99 (hash)', ['new_image@repo_digest']))
+
+ class FakeSpecDesc():
+ def __init__(self, spec):
+ self.spec = spec
+
+ def _get_spec(s):
+ return FakeSpecDesc(ServiceSpec(s))
+
+ get_spec.side_effect = _get_spec
+ if should_block:
+ with pytest.raises(OrchestratorError):
+ cephadm_module.upgrade._validate_upgrade_filters(
+ 'new_image_name', daemon_types, hosts, services)
+ else:
+ cephadm_module.upgrade._validate_upgrade_filters(
+ 'new_image_name', daemon_types, hosts, services)