18 files changed, 10449 insertions, 0 deletions
diff --git a/src/pybind/mgr/cephadm/tests/__init__.py b/src/pybind/mgr/cephadm/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/__init__.py
diff --git a/src/pybind/mgr/cephadm/tests/conftest.py b/src/pybind/mgr/cephadm/tests/conftest.py
new file mode 100644
index 000000000..e8add2c7b
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/conftest.py
@@ -0,0 +1,27 @@
+import pytest
+
+from cephadm.services.osd import RemoveUtil, OSD
+from tests import mock
+
+from .fixtures import with_cephadm_module
+
+
+@pytest.fixture()
+def cephadm_module():
+    with with_cephadm_module({}) as m:
+        yield m
+
+
+@pytest.fixture()
+def rm_util():
+    with with_cephadm_module({}) as m:
+        r = RemoveUtil.__new__(RemoveUtil)
+        r.__init__(m)
+        yield r
+
+
+@pytest.fixture()
+def osd_obj():
+    with mock.patch("cephadm.services.osd.RemoveUtil"):
+        o = OSD(0, mock.MagicMock())
+        yield o
diff --git a/src/pybind/mgr/cephadm/tests/fixtures.py b/src/pybind/mgr/cephadm/tests/fixtures.py
new file mode 100644
index 000000000..6281283d7
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/fixtures.py
@@ -0,0 +1,200 @@
+import fnmatch
+import asyncio
+import sys
+from tempfile import NamedTemporaryFile
+from contextlib import contextmanager
+
+from ceph.deployment.service_spec import PlacementSpec, ServiceSpec
+from ceph.utils import datetime_to_str, datetime_now
+from cephadm.serve import CephadmServe, cephadmNoImage
+
+try:
+    from typing import Any, Iterator, List, Callable, Dict
+except ImportError:
+    pass
+
+from cephadm import CephadmOrchestrator
+from orchestrator import raise_if_exception, OrchResult, HostSpec, DaemonDescriptionStatus
+from tests import mock
+
+
+def async_side_effect(result):
+    async def side_effect(*args, **kwargs):
+        return result
+    return side_effect
+
+
+def get_ceph_option(_, key):
+    return __file__
+
+
+def get_module_option_ex(_, module, key, default=None):
+    if module == 'prometheus':
+        if key == 'server_port':
+            return 9283
+    return None
+
+
+def _run_cephadm(ret):
+    async def foo(s, host, entity, cmd, e, **kwargs):
+        if cmd == 'gather-facts':
+            return '{}', '', 0
+        return [ret], '', 0
+    return foo
+
+
+def match_glob(val, pat):
+    ok = fnmatch.fnmatchcase(val, pat)
+    if not ok:
+        assert pat in val
+
+
+class MockEventLoopThread:
+    def get_result(self, coro, timeout):
+        if sys.version_info >= (3, 7):
+            return asyncio.run(coro)
+
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        try:
+            return loop.run_until_complete(coro)
+        finally:
+            loop.close()
+            asyncio.set_event_loop(None)
+
+
+def receive_agent_metadata(m: CephadmOrchestrator, host: str, ops: List[str] = None) -> None:
+    to_update: Dict[str, Callable[[str, Any], None]] = {
+        'ls': m._process_ls_output,
+        'gather-facts': m.cache.update_host_facts,
+        'list-networks': m.cache.update_host_networks,
+    }
+    if ops:
+        for op in ops:
+            out = m.wait_async(CephadmServe(m)._run_cephadm_json(host, cephadmNoImage, op, []))
+            to_update[op](host, out)
+    m.cache.last_daemon_update[host] = datetime_now()
+    m.cache.last_facts_update[host] = datetime_now()
+    m.cache.last_network_update[host] = datetime_now()
+    m.cache.metadata_up_to_date[host] = True
+
+
+def receive_agent_metadata_all_hosts(m: CephadmOrchestrator) -> None:
+    for host in m.cache.get_hosts():
+        receive_agent_metadata(m, host)
+
+
+@contextmanager
+def with_cephadm_module(module_options=None, store=None):
+    """
+    :param module_options: Set opts as if they were set before module.__init__ is called
+    :param store: Set the store before module.__init__ is called
+    """
+    with mock.patch("cephadm.module.CephadmOrchestrator.get_ceph_option", get_ceph_option), \
+            mock.patch("cephadm.services.osd.RemoveUtil._run_mon_cmd"), \
+            mock.patch('cephadm.module.CephadmOrchestrator.get_module_option_ex', get_module_option_ex), \
+            mock.patch("cephadm.module.CephadmOrchestrator.get_osdmap"), \
+            mock.patch("cephadm.module.CephadmOrchestrator.remote"), \
+            mock.patch("cephadm.agent.CephadmAgentHelpers._request_agent_acks"), \
+            mock.patch("cephadm.agent.CephadmAgentHelpers._apply_agent", return_value=False), \
+            mock.patch("cephadm.agent.CephadmAgentHelpers._agent_down", return_value=False), \
+            mock.patch('cephadm.offline_watcher.OfflineHostWatcher.run'), \
+            mock.patch('cephadm.tuned_profiles.TunedProfileUtils._remove_stray_tuned_profiles'), \
+            mock.patch('cephadm.offline_watcher.OfflineHostWatcher.run'), \
+            mock.patch('cephadm.http_server.CephadmHttpServer.run'):
+
+        m = CephadmOrchestrator.__new__(CephadmOrchestrator)
+        if module_options is not None:
+            for k, v in module_options.items():
+                m._ceph_set_module_option('cephadm', k, v)
+        if store is None:
+            store = {}
+        if '_ceph_get/mon_map' not in store:
+            m.mock_store_set('_ceph_get', 'mon_map', {
+                'modified': datetime_to_str(datetime_now()),
+                'fsid': 'foobar',
+            })
+        if '_ceph_get/mgr_map' not in store:
+            m.mock_store_set('_ceph_get', 'mgr_map', {
+                'services': {
+                    'dashboard': 'http://[::1]:8080',
+                    'prometheus': 'http://[::1]:8081'
+                },
+                'modules': ['dashboard', 'prometheus'],
+            })
+        for k, v in store.items():
+            m._ceph_set_store(k, v)
+
+        m.__init__('cephadm', 0, 0)
+        m._cluster_fsid = "fsid"
+
+        m.event_loop = MockEventLoopThread()
+        m.tkey = NamedTemporaryFile(prefix='test-cephadm-identity-')
+
+        yield m
+
+
+def wait(m: CephadmOrchestrator, c: OrchResult) -> Any:
+    return raise_if_exception(c)
+
+
+@contextmanager
+def with_host(m: CephadmOrchestrator, name, addr='1::4', refresh_hosts=True, rm_with_force=True):
+    with mock.patch("cephadm.utils.resolve_ip", return_value=addr):
+        wait(m, m.add_host(HostSpec(hostname=name)))
+        if refresh_hosts:
+            CephadmServe(m)._refresh_hosts_and_daemons()
+            receive_agent_metadata(m, name)
+        yield
+        wait(m, m.remove_host(name, force=rm_with_force))
+
+
+def assert_rm_service(cephadm: CephadmOrchestrator, srv_name):
+    mon_or_mgr = cephadm.spec_store[srv_name].spec.service_type in ('mon', 'mgr')
+    if mon_or_mgr:
+        assert 'Unable' in wait(cephadm, cephadm.remove_service(srv_name))
+        return
+    assert wait(cephadm, cephadm.remove_service(srv_name)) == f'Removed service {srv_name}'
+    assert cephadm.spec_store[srv_name].deleted is not None
+    CephadmServe(cephadm)._check_daemons()
+    CephadmServe(cephadm)._apply_all_services()
+    assert cephadm.spec_store[srv_name].deleted
+    unmanaged = cephadm.spec_store[srv_name].spec.unmanaged
+    CephadmServe(cephadm)._purge_deleted_services()
+    if not unmanaged:  # cause then we're not deleting daemons
+        assert srv_name not in cephadm.spec_store, f'{cephadm.spec_store[srv_name]!r}'
+
+
+@contextmanager
+def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth=None, host: str = '', status_running=False) -> Iterator[List[str]]:
+    if spec.placement.is_empty() and host:
+        spec.placement = PlacementSpec(hosts=[host], count=1)
+    if meth is not None:
+        c = meth(cephadm_module, spec)
+        assert wait(cephadm_module, c) == f'Scheduled {spec.service_name()} update...'
+    else:
+        c = cephadm_module.apply([spec])
+        assert wait(cephadm_module, c) == [f'Scheduled {spec.service_name()} update...']
+
+    specs = [d.spec for d in wait(cephadm_module, cephadm_module.describe_service())]
+    assert spec in specs
+
+    CephadmServe(cephadm_module)._apply_all_services()
+
+    if status_running:
+        make_daemons_running(cephadm_module, spec.service_name())
+
+    dds = wait(cephadm_module, cephadm_module.list_daemons())
+    own_dds = [dd for dd in dds if dd.service_name() == spec.service_name()]
+    if host and spec.service_type != 'osd':
+        assert own_dds
+
+    yield [dd.name() for dd in own_dds]
+
+    assert_rm_service(cephadm_module, spec.service_name())
+
+
+def make_daemons_running(cephadm_module, service_name):
+    own_dds = cephadm_module.cache.get_daemons_by_service(service_name)
+    for dd in own_dds:
+        dd.status = DaemonDescriptionStatus.running  # We're changing the reference
diff --git a/src/pybind/mgr/cephadm/tests/test_autotune.py b/src/pybind/mgr/cephadm/tests/test_autotune.py
new file mode 100644
index 000000000..524da9c00
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_autotune.py
@@ -0,0 +1,69 @@
+# Disable autopep8 for this file:
+
+# fmt: off
+
+import pytest
+
+from cephadm.autotune import MemoryAutotuner
+from orchestrator import DaemonDescription
+
+
+@pytest.mark.parametrize("total,daemons,config,result",
+    [   # noqa: E128
+        (
+            128 * 1024 * 1024 * 1024,
+            [],
+            {},
+            None,
+        ),
+        (
+            128 * 1024 * 1024 * 1024,
+            [
+                DaemonDescription('osd', '1', 'host1'),
+                DaemonDescription('osd', '2', 'host1'),
+            ],
+            {},
+            64 * 1024 * 1024 * 1024,
+        ),
+        (
+            128 * 1024 * 1024 * 1024,
+            [
+                DaemonDescription('osd', '1', 'host1'),
+                DaemonDescription('osd', '2', 'host1'),
+                DaemonDescription('osd', '3', 'host1'),
+            ],
+            {
+                'osd.3': 16 * 1024 * 1024 * 1024,
+            },
+            56 * 1024 * 1024 * 1024,
+        ),
+        (
+            128 * 1024 * 1024 * 1024,
+            [
+                DaemonDescription('mgr', 'a', 'host1'),
+                DaemonDescription('osd', '1', 'host1'),
+                DaemonDescription('osd', '2', 'host1'),
+            ],
+            {},
+            62 * 1024 * 1024 * 1024,
+        )
+    ])
+def test_autotune(total, daemons, config, result):
+    def fake_getter(who, opt):
+        if opt == 'osd_memory_target_autotune':
+            if who in config:
+                return False
+            else:
+                return True
+        if opt == 'osd_memory_target':
+            return config.get(who, 4 * 1024 * 1024 * 1024)
+        if opt == 'mds_cache_memory_limit':
+            return 16 * 1024 * 1024 * 1024
+
+    a = MemoryAutotuner(
+        total_mem=total,
+        daemons=daemons,
+        config_get=fake_getter,
+    )
+    val, osds = a.tune()
+    assert val == result
diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py
new file mode 100644
index 000000000..24fcb0280
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py
@@ -0,0 +1,2709 @@
+import asyncio
+import json
+import logging
+
+from contextlib import contextmanager
+
+import pytest
+
+from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection
+from cephadm.serve import CephadmServe
+from cephadm.inventory import HostCacheStatus, ClientKeyringSpec
+from cephadm.services.osd import OSD, OSDRemovalQueue, OsdIdClaims
+from cephadm.utils import SpecialHostLabels
+
+try:
+    from typing import List
+except ImportError:
+    pass
+
+from ceph.deployment.service_spec import (
+    CustomConfig,
+    CustomContainerSpec,
+    HostPlacementSpec,
+    IscsiServiceSpec,
+    MDSSpec,
+    NFSServiceSpec,
+    PlacementSpec,
+    RGWSpec,
+    ServiceSpec,
+)
+from ceph.deployment.drive_selection.selector import DriveSelection
+from ceph.deployment.inventory import Devices, Device
+from ceph.utils import datetime_to_str, datetime_now, str_to_datetime
+from orchestrator import DaemonDescription, InventoryHost, \
+    HostSpec, OrchestratorError, DaemonDescriptionStatus, OrchestratorEvent
+from tests import mock
+from .fixtures import wait, _run_cephadm, match_glob, with_host, \
+    with_cephadm_module, with_service, make_daemons_running, async_side_effect
+from cephadm.module import CephadmOrchestrator
+
+"""
+TODOs:
+    There is really room for improvement here. I just quickly assembled theses tests.
+    I general, everything should be testes in Teuthology as well. Reasons for
+    also testing this here is the development roundtrip time.
+"""
+
+
+def assert_rm_daemon(cephadm: CephadmOrchestrator, prefix, host):
+    dds: List[DaemonDescription] = wait(cephadm, cephadm.list_daemons(host=host))
+    d_names = [dd.name() for dd in dds if dd.name().startswith(prefix)]
+    assert d_names
+    # there should only be one daemon (if not match_glob will throw mismatch)
+    assert len(d_names) == 1
+
+    c = cephadm.remove_daemons(d_names)
+    [out] = wait(cephadm, c)
+    # picking the 1st element is needed, rather than passing the list when the daemon
+    # name contains '-' char. If not, the '-' is treated as a range i.e. cephadm-exporter
+    # is treated like a m-e range which is invalid. rbd-mirror (d-m) and node-exporter (e-e)
+    # are valid, so pass without incident! Also, match_gob acts on strings anyway!
+    match_glob(out, f"Removed {d_names[0]}* from host '{host}'")
+
+
+@contextmanager
+def with_daemon(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, host: str):
+    spec.placement = PlacementSpec(hosts=[host], count=1)
+
+    c = cephadm_module.add_daemon(spec)
+    [out] = wait(cephadm_module, c)
+    match_glob(out, f"Deployed {spec.service_name()}.* on host '{host}'")
+
+    dds = cephadm_module.cache.get_daemons_by_service(spec.service_name())
+    for dd in dds:
+        if dd.hostname == host:
+            yield dd.daemon_id
+            assert_rm_daemon(cephadm_module, spec.service_name(), host)
+            return
+
+    assert False, 'Daemon not found'
+
+
+@contextmanager
+def with_osd_daemon(cephadm_module: CephadmOrchestrator, _run_cephadm, host: str, osd_id: int, ceph_volume_lvm_list=None):
+    cephadm_module.mock_store_set('_ceph_get', 'osd_map', {
+        'osds': [
+            {
+                'osd': 1,
+                'up_from': 0,
+                'up': True,
+                'uuid': 'uuid'
+            }
+        ]
+    })
+
+    _run_cephadm.reset_mock(return_value=True, side_effect=True)
+    if ceph_volume_lvm_list:
+        _run_cephadm.side_effect = ceph_volume_lvm_list
+    else:
+        async def _ceph_volume_list(s, host, entity, cmd, **kwargs):
+            logging.info(f'ceph-volume cmd: {cmd}')
+            if 'raw' in cmd:
+                return json.dumps({
+                    "21a4209b-f51b-4225-81dc-d2dca5b8b2f5": {
+                        "ceph_fsid": cephadm_module._cluster_fsid,
+                        "device": "/dev/loop0",
+                        "osd_id": 21,
+                        "osd_uuid": "21a4209b-f51b-4225-81dc-d2dca5b8b2f5",
+                        "type": "bluestore"
+                    },
+                }), '', 0
+            if 'lvm' in cmd:
+                return json.dumps({
+                    str(osd_id): [{
+                        'tags': {
+                            'ceph.cluster_fsid': cephadm_module._cluster_fsid,
+                            'ceph.osd_fsid': 'uuid'
+                        },
+                        'type': 'data'
+                    }]
+                }), '', 0
+            return '{}', '', 0
+
+        _run_cephadm.side_effect = _ceph_volume_list
+
+    assert cephadm_module._osd_activate(
+        [host]).stdout == f"Created osd(s) 1 on host '{host}'"
+    assert _run_cephadm.mock_calls == [
+        mock.call(host, 'osd', 'ceph-volume',
+                  ['--', 'lvm', 'list', '--format', 'json'], no_fsid=False, error_ok=False, image='', log_output=True),
+        mock.call(host, f'osd.{osd_id}', ['_orch', 'deploy'], [], stdin=mock.ANY),
+        mock.call(host, 'osd', 'ceph-volume',
+                  ['--', 'raw', 'list', '--format', 'json'], no_fsid=False, error_ok=False, image='', log_output=True),
+    ]
+    dd = cephadm_module.cache.get_daemon(f'osd.{osd_id}', host=host)
+    assert dd.name() == f'osd.{osd_id}'
+    yield dd
+    cephadm_module._remove_daemons([(f'osd.{osd_id}', host)])
+
+
+class TestCephadm(object):
+
+    def test_get_unique_name(self, cephadm_module):
+        # type: (CephadmOrchestrator) -> None
+        existing = [
+            DaemonDescription(daemon_type='mon', daemon_id='a')
+        ]
+        new_mon = cephadm_module.get_unique_name('mon', 'myhost', existing)
+        match_glob(new_mon, 'myhost')
+        new_mgr = cephadm_module.get_unique_name('mgr', 'myhost', existing)
+        match_glob(new_mgr, 'myhost.*')
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+    def test_host(self, cephadm_module):
+        assert wait(cephadm_module, cephadm_module.get_hosts()) == []
+        with with_host(cephadm_module, 'test'):
+            assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '1::4')]
+
+            # Be careful with backward compatibility when changing things here:
+            assert json.loads(cephadm_module.get_store('inventory')) == \
+                {"test": {"hostname": "test", "addr": "1::4", "labels": [], "status": ""}}
+
+            with with_host(cephadm_module, 'second', '1.2.3.5'):
+                assert wait(cephadm_module, cephadm_module.get_hosts()) == [
+                    HostSpec('test', '1::4'),
+                    HostSpec('second', '1.2.3.5')
+                ]
+
+            assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '1::4')]
+        assert wait(cephadm_module, cephadm_module.get_hosts()) == []
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+    @mock.patch("cephadm.utils.resolve_ip")
+    def test_re_add_host_receive_loopback(self, resolve_ip, cephadm_module):
+        resolve_ip.side_effect = ['192.168.122.1', '127.0.0.1', '127.0.0.1']
+        assert wait(cephadm_module, cephadm_module.get_hosts()) == []
+        cephadm_module._add_host(HostSpec('test', '192.168.122.1'))
+        assert wait(cephadm_module, cephadm_module.get_hosts()) == [
+            HostSpec('test', '192.168.122.1')]
+        cephadm_module._add_host(HostSpec('test'))
+        assert wait(cephadm_module, cephadm_module.get_hosts()) == [
+            HostSpec('test', '192.168.122.1')]
+        with pytest.raises(OrchestratorError):
+            cephadm_module._add_host(HostSpec('test2'))
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+    def test_service_ls(self, cephadm_module):
+        with with_host(cephadm_module, 'test'):
+            c = cephadm_module.list_daemons(refresh=True)
+            assert wait(cephadm_module, c) == []
+            with with_service(cephadm_module, MDSSpec('mds', 'name', unmanaged=True)) as _, \
+                    with_daemon(cephadm_module, MDSSpec('mds', 'name'), 'test') as _:
+
+                c = cephadm_module.list_daemons()
+
+                def remove_id_events(dd):
+                    out = dd.to_json()
+                    del out['daemon_id']
+                    del out['events']
+                    del out['daemon_name']
+                    return out
+
+                assert [remove_id_events(dd) for dd in wait(cephadm_module, c)] == [
+                    {
+                        'service_name': 'mds.name',
+                        'daemon_type': 'mds',
+                        'hostname': 'test',
+                        'status': 2,
+                        'status_desc': 'starting',
+                        'is_active': False,
+                        'ports': [],
+                    }
+                ]
+
+                with with_service(cephadm_module, ServiceSpec('rgw', 'r.z'),
+                                  CephadmOrchestrator.apply_rgw, 'test', status_running=True):
+                    make_daemons_running(cephadm_module, 'mds.name')
+
+                    c = cephadm_module.describe_service()
+                    out = [dict(o.to_json()) for o in wait(cephadm_module, c)]
+                    expected = [
+                        {
+                            'placement': {'count': 2},
+                            'service_id': 'name',
+                            'service_name': 'mds.name',
+                            'service_type': 'mds',
+                            'status': {'created': mock.ANY, 'running': 1, 'size': 2},
+                            'unmanaged': True
+                        },
+                        {
+                            'placement': {
+                                'count': 1,
+                                'hosts': ["test"]
+                            },
+                            'service_id': 'r.z',
+                            'service_name': 'rgw.r.z',
+                            'service_type': 'rgw',
+                            'status': {'created': mock.ANY, 'running': 1, 'size': 1,
+                                       'ports': [80]},
+                        }
+                    ]
+                    for o in out:
+                        if 'events' in o:
+                            del o['events']  # delete it, as it contains a timestamp
+                    assert out == expected
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+    def test_service_ls_service_type_flag(self, cephadm_module):
+        with with_host(cephadm_module, 'host1'):
+            with with_host(cephadm_module, 'host2'):
+                with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)),
+                                  CephadmOrchestrator.apply_mgr, '', status_running=True):
+                    with with_service(cephadm_module, MDSSpec('mds', 'test-id', placement=PlacementSpec(count=2)),
+                                      CephadmOrchestrator.apply_mds, '', status_running=True):
+
+                        # with no service-type. Should provide info fot both services
+                        c = cephadm_module.describe_service()
+                        out = [dict(o.to_json()) for o in wait(cephadm_module, c)]
+                        expected = [
+                            {
+                                'placement': {'count': 2},
+                                'service_name': 'mgr',
+                                'service_type': 'mgr',
+                                'status': {'created': mock.ANY,
+                                           'running': 2,
+                                           'size': 2}
+                            },
+                            {
+                                'placement': {'count': 2},
+                                'service_id': 'test-id',
+                                'service_name': 'mds.test-id',
+                                'service_type': 'mds',
+                                'status': {'created': mock.ANY,
+                                           'running': 2,
+                                           'size': 2}
+                            },
+                        ]
+
+                        for o in out:
+                            if 'events' in o:
+                                del o['events']  # delete it, as it contains a timestamp
+                        assert out == expected
+
+                        # with service-type. Should provide info fot only mds
+                        c = cephadm_module.describe_service(service_type='mds')
+                        out = [dict(o.to_json()) for o in wait(cephadm_module, c)]
+                        expected = [
+                            {
+                                'placement': {'count': 2},
+                                'service_id': 'test-id',
+                                'service_name': 'mds.test-id',
+                                'service_type': 'mds',
+                                'status': {'created': mock.ANY,
+                                           'running': 2,
+                                           'size': 2}
+                            },
+                        ]
+
+                        for o in out:
+                            if 'events' in o:
+                                del o['events']  # delete it, as it contains a timestamp
+                        assert out == expected
+
+                        # service-type should not match with service names
+                        c = cephadm_module.describe_service(service_type='mds.test-id')
+                        out = [dict(o.to_json()) for o in wait(cephadm_module, c)]
+                        assert out == []
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+    def test_device_ls(self, cephadm_module):
+        with with_host(cephadm_module, 'test'):
+            c = cephadm_module.get_inventory()
+            assert wait(cephadm_module, c) == [InventoryHost('test')]
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(
+        json.dumps([
+            dict(
+                name='rgw.myrgw.foobar',
+                style='cephadm',
+                fsid='fsid',
+                container_id='container_id',
+                version='version',
+                state='running',
+            ),
+            dict(
+                name='something.foo.bar',
+                style='cephadm',
+                fsid='fsid',
+            ),
+            dict(
+                name='haproxy.test.bar',
+                style='cephadm',
+                fsid='fsid',
+            ),
+
+        ])
+    ))
+    def test_list_daemons(self, cephadm_module: CephadmOrchestrator):
+        cephadm_module.service_cache_timeout = 10
+        with with_host(cephadm_module, 'test'):
+            CephadmServe(cephadm_module)._refresh_host_daemons('test')
+            dds = wait(cephadm_module, cephadm_module.list_daemons())
+            assert {d.name() for d in dds} == {'rgw.myrgw.foobar', 'haproxy.test.bar'}
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+    def test_daemon_action(self, cephadm_module: CephadmOrchestrator):
+        cephadm_module.service_cache_timeout = 10
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, RGWSpec(service_id='myrgw.foobar', unmanaged=True)) as _, \
+                    with_daemon(cephadm_module, RGWSpec(service_id='myrgw.foobar'), 'test') as daemon_id:
+
+                d_name = 'rgw.' + daemon_id
+
+                c = cephadm_module.daemon_action('redeploy', d_name)
+                assert wait(cephadm_module,
+                            c) == f"Scheduled to redeploy rgw.{daemon_id} on host 'test'"
+
+                for what in ('start', 'stop', 'restart'):
+                    c = cephadm_module.daemon_action(what, d_name)
+                    assert wait(cephadm_module,
+                                c) == F"Scheduled to {what} {d_name} on host 'test'"
+
+                # Make sure, _check_daemons does a redeploy due to monmap change:
+                cephadm_module._store['_ceph_get/mon_map'] = {
+                    'modified': datetime_to_str(datetime_now()),
+                    'fsid': 'foobar',
+                }
+                cephadm_module.notify('mon_map', None)
+
+                CephadmServe(cephadm_module)._check_daemons()
+
+                assert cephadm_module.events.get_for_daemon(d_name) == [
+                    OrchestratorEvent(mock.ANY, 'daemon', d_name, 'INFO',
+                                      f"Deployed {d_name} on host \'test\'"),
+                    OrchestratorEvent(mock.ANY, 'daemon', d_name, 'INFO',
+                                      f"stop {d_name} from host \'test\'"),
+                ]
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+    def test_daemon_action_fail(self, cephadm_module: CephadmOrchestrator):
+        cephadm_module.service_cache_timeout = 10
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, RGWSpec(service_id='myrgw.foobar', unmanaged=True)) as _, \
+                    with_daemon(cephadm_module, RGWSpec(service_id='myrgw.foobar'), 'test') as daemon_id:
+                with mock.patch('ceph_module.BaseMgrModule._ceph_send_command') as _ceph_send_command:
+
+                    _ceph_send_command.side_effect = Exception("myerror")
+
+                    # Make sure, _check_daemons does a redeploy due to monmap change:
+                    cephadm_module.mock_store_set('_ceph_get', 'mon_map', {
+                        'modified': datetime_to_str(datetime_now()),
+                        'fsid': 'foobar',
+                    })
+                    cephadm_module.notify('mon_map', None)
+
+                    CephadmServe(cephadm_module)._check_daemons()
+
+                    evs = [e.message for e in cephadm_module.events.get_for_daemon(
+                        f'rgw.{daemon_id}')]
+
+                    assert 'myerror' in ''.join(evs)
+
+    @pytest.mark.parametrize(
+        "action",
+        [
+            'start',
+            'stop',
+            'restart',
+            'reconfig',
+            'redeploy'
+        ]
+    )
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    @mock.patch("cephadm.module.HostCache.save_host")
+    def test_daemon_check(self, _save_host, cephadm_module: CephadmOrchestrator, action):
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, ServiceSpec(service_type='grafana'), CephadmOrchestrator.apply_grafana, 'test') as d_names:
+                [daemon_name] = d_names
+
+                cephadm_module._schedule_daemon_action(daemon_name, action)
+
+                assert cephadm_module.cache.get_scheduled_daemon_action(
+                    'test', daemon_name) == action
+
+                CephadmServe(cephadm_module)._check_daemons()
+
+                assert _save_host.called_with('test')
+                assert cephadm_module.cache.get_scheduled_daemon_action('test', daemon_name) is None
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_daemon_check_extra_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        with with_host(cephadm_module, 'test'):
+
+            # Also testing deploying mons without explicit network placement
+            cephadm_module.check_mon_command({
+                'prefix': 'config set',
+                'who': 'mon',
+                'name': 'public_network',
+                'value': '127.0.0.0/8'
+            })
+
+            cephadm_module.cache.update_host_networks(
+                'test',
+                {
+                    "127.0.0.0/8": [
+                        "127.0.0.1"
+                    ],
+                }
+            )
+
+            with with_service(cephadm_module, ServiceSpec(service_type='mon'), CephadmOrchestrator.apply_mon, 'test') as d_names:
+                [daemon_name] = d_names
+
+                cephadm_module._set_extra_ceph_conf('[mon]\nk=v')
+
+                CephadmServe(cephadm_module)._check_daemons()
+
+                _run_cephadm.assert_called_with(
+                    'test',
+                    'mon.test',
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": "mon.test",
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'reconfig': True,
+                        },
+                        "meta": {
+                            'service_name': 'mon',
+                            'ports': [],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": {
+                            "config": "[mon]\nk=v\n[mon.test]\npublic network = 127.0.0.0/8\n",
+                            "keyring": "",
+                            "files": {
+                                "config": "[mon.test]\npublic network = 127.0.0.0/8\n"
+                            },
+                        },
+                    }),
+                )
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_mon_crush_location_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        with with_host(cephadm_module, 'test'):
+            cephadm_module.check_mon_command({
+                'prefix': 'config set',
+                'who': 'mon',
+                'name': 'public_network',
+                'value': '127.0.0.0/8'
+            })
+
+            cephadm_module.cache.update_host_networks(
+                'test',
+                {
+                    "127.0.0.0/8": [
+                        "127.0.0.1"
+                    ],
+                }
+            )
+
+            with with_service(cephadm_module, ServiceSpec(service_type='mon', crush_locations={'test': ['datacenter=a', 'rack=2']}), CephadmOrchestrator.apply_mon, 'test'):
+                _run_cephadm.assert_called_with(
+                    'test',
+                    'mon.test',
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": "mon.test",
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {},
+                        "meta": {
+                            'service_name': 'mon',
+                            'ports': [],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": {
+                            "config": "[mon.test]\npublic network = 127.0.0.0/8\n",
+                            "keyring": "",
+                            "files": {
+                                "config": "[mon.test]\npublic network = 127.0.0.0/8\n",
+                            },
+                            "crush_location": "datacenter=a",
+                        },
+                    }),
+                )
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_extra_container_args(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, ServiceSpec(service_type='crash', extra_container_args=['--cpus=2', '--quiet']), CephadmOrchestrator.apply_crash):
+                _run_cephadm.assert_called_with(
+                    'test',
+                    'crash.test',
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": "crash.test",
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'extra_container_args': [
+                                "--cpus=2",
+                                "--quiet",
+                            ],
+                        },
+                        "meta": {
+                            'service_name': 'crash',
+                            'ports': [],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': [
+                                "--cpus=2",
+                                "--quiet",
+                            ],
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": {
+                            "config": "",
+                            "keyring": "[client.crash.test]\nkey = None\n",
+                        },
+                    }),
+                )
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_extra_entrypoint_args(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, ServiceSpec(service_type='node-exporter',
+                              extra_entrypoint_args=['--collector.textfile.directory=/var/lib/node_exporter/textfile_collector', '--some-other-arg']),
+                              CephadmOrchestrator.apply_node_exporter):
+                _run_cephadm.assert_called_with(
+                    'test',
+                    'node-exporter.test',
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": "node-exporter.test",
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [9100],
+                            'extra_entrypoint_args': [
+                                "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector",
+                                "--some-other-arg",
+                            ],
+                        },
+                        "meta": {
+                            'service_name': 'node-exporter',
+                            'ports': [9100],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': [
+                                "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector",
+                                "--some-other-arg",
+                            ],
+                        },
+                        "config_blobs": {},
+                    }),
+                )
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_extra_entrypoint_and_container_args(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, ServiceSpec(service_type='node-exporter',
+                              extra_entrypoint_args=['--collector.textfile.directory=/var/lib/node_exporter/textfile_collector', '--some-other-arg'],
+                              extra_container_args=['--cpus=2', '--quiet']),
+                              CephadmOrchestrator.apply_node_exporter):
+                _run_cephadm.assert_called_with(
+                    'test',
+                    'node-exporter.test',
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": "node-exporter.test",
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [9100],
+                            'extra_container_args': [
+                                "--cpus=2",
+                                "--quiet",
+                            ],
+                            'extra_entrypoint_args': [
+                                "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector",
+                                "--some-other-arg",
+                            ],
+                        },
+                        "meta": {
+                            'service_name': 'node-exporter',
+                            'ports': [9100],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': [
+                                "--cpus=2",
+                                "--quiet",
+                            ],
+                            'extra_entrypoint_args': [
+                                "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector",
+                                "--some-other-arg",
+                            ],
+                        },
+                        "config_blobs": {},
+                    }),
+                )
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_extra_entrypoint_and_container_args_with_spaces(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, ServiceSpec(service_type='node-exporter',
+                              extra_entrypoint_args=['--entrypoint-arg-with-value value', '--some-other-arg   3'],
+                              extra_container_args=['--cpus    2', '--container-arg-with-value value']),
+                              CephadmOrchestrator.apply_node_exporter):
+                _run_cephadm.assert_called_with(
+                    'test',
+                    'node-exporter.test',
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": "node-exporter.test",
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [9100],
+                            'extra_container_args': [
+                                "--cpus",
+                                "2",
+                                "--container-arg-with-value",
+                                "value",
+                            ],
+                            'extra_entrypoint_args': [
+                                "--entrypoint-arg-with-value",
+                                "value",
+                                "--some-other-arg",
+                                "3",
+                            ],
+                        },
+                        "meta": {
+                            'service_name': 'node-exporter',
+                            'ports': [9100],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': [
+                                "--cpus    2",
+                                "--container-arg-with-value value",
+                            ],
+                            'extra_entrypoint_args': [
+                                "--entrypoint-arg-with-value value",
+                                "--some-other-arg   3",
+                            ],
+                        },
+                        "config_blobs": {},
+                    }),
+                )
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_custom_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        test_cert = ['-----BEGIN PRIVATE KEY-----',
+                     'YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg',
+                     'ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=',
+                     '-----END PRIVATE KEY-----',
+                     '-----BEGIN CERTIFICATE-----',
+                     'YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg',
+                     'ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=',
+                     '-----END CERTIFICATE-----']
+        configs = [
+            CustomConfig(content='something something something',
+                         mount_path='/etc/test.conf'),
+            CustomConfig(content='\n'.join(test_cert), mount_path='/usr/share/grafana/thing.crt')
+        ]
+        tc_joined = '\n'.join(test_cert)
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, ServiceSpec(service_type='crash', custom_configs=configs), CephadmOrchestrator.apply_crash):
+                _run_cephadm(
+                    'test',
+                    'crash.test',
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": "crash.test",
+                        "image": "",
+                        "deploy_arguments": [],
+                        "params": {},
+                        "meta": {
+                            "service_name": "crash",
+                            "ports": [],
+                            "ip": None,
+                            "deployed_by": [],
+                            "rank": None,
+                            "rank_generation": None,
+                            "extra_container_args": None,
+                            "extra_entrypoint_args": None,
+                        },
+                        "config_blobs": {
+                            "config": "",
+                            "keyring": "[client.crash.test]\nkey = None\n",
+                            "custom_config_files": [
+                                {
+                                    "content": "something something something",
+                                    "mount_path": "/etc/test.conf",
+                                },
+                                {
+                                    "content": tc_joined,
+                                    "mount_path": "/usr/share/grafana/thing.crt",
+                                },
+                            ]
+                        }
+                    }),
+                )
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    def test_daemon_check_post(self, cephadm_module: CephadmOrchestrator):
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, ServiceSpec(service_type='grafana'), CephadmOrchestrator.apply_grafana, 'test'):
+
+                # Make sure, _check_daemons does a redeploy due to monmap change:
+                cephadm_module.mock_store_set('_ceph_get', 'mon_map', {
+                    'modified': datetime_to_str(datetime_now()),
+                    'fsid': 'foobar',
+                })
+                cephadm_module.notify('mon_map', None)
+                cephadm_module.mock_store_set('_ceph_get', 'mgr_map', {
+                    'modules': ['dashboard']
+                })
+
+                with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd:
+                    CephadmServe(cephadm_module)._check_daemons()
+                    _mon_cmd.assert_any_call(
+                        {'prefix': 'dashboard set-grafana-api-url', 'value': 'https://[1::4]:3000'},
+                        None)
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    @mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1.2.3.4')
+    def test_iscsi_post_actions_with_missing_daemon_in_cache(self, cephadm_module: CephadmOrchestrator):
+        # https://tracker.ceph.com/issues/52866
+        with with_host(cephadm_module, 'test1'):
+            with with_host(cephadm_module, 'test2'):
+                with with_service(cephadm_module, IscsiServiceSpec(service_id='foobar', pool='pool', placement=PlacementSpec(host_pattern='*')), CephadmOrchestrator.apply_iscsi, 'test'):
+
+                    CephadmServe(cephadm_module)._apply_all_services()
+                    assert len(cephadm_module.cache.get_daemons_by_type('iscsi')) == 2
+
+                    # get a daemons from postaction list (ARRGH sets!!)
+                    tempset = cephadm_module.requires_post_actions.copy()
+                    tempdaemon1 = tempset.pop()
+                    tempdaemon2 = tempset.pop()
+
+                    # make sure post actions has 2 daemons in it
+                    assert len(cephadm_module.requires_post_actions) == 2
+
+                    # replicate a host cache that is not in sync when check_daemons is called
+                    tempdd1 = cephadm_module.cache.get_daemon(tempdaemon1)
+                    tempdd2 = cephadm_module.cache.get_daemon(tempdaemon2)
+                    host = 'test1'
+                    if 'test1' not in tempdaemon1:
+                        host = 'test2'
+                    cephadm_module.cache.rm_daemon(host, tempdaemon1)
+
+                    # Make sure, _check_daemons does a redeploy due to monmap change:
+                    cephadm_module.mock_store_set('_ceph_get', 'mon_map', {
+                        'modified': datetime_to_str(datetime_now()),
+                        'fsid': 'foobar',
+                    })
+                    cephadm_module.notify('mon_map', None)
+                    cephadm_module.mock_store_set('_ceph_get', 'mgr_map', {
+                        'modules': ['dashboard']
+                    })
+
+                    with mock.patch("cephadm.module.IscsiService.config_dashboard") as _cfg_db:
+                        CephadmServe(cephadm_module)._check_daemons()
+                        _cfg_db.assert_called_once_with([tempdd2])
+
+                        # post actions still has the other daemon in it and will run next _check_daemons
+                        assert len(cephadm_module.requires_post_actions) == 1
+
+                        # post actions was missed for a daemon
+                        assert tempdaemon1 in cephadm_module.requires_post_actions
+
+                        # put the daemon back in the cache
+                        cephadm_module.cache.add_daemon(host, tempdd1)
+
+                        _cfg_db.reset_mock()
+                        # replicate serve loop running again
+                        CephadmServe(cephadm_module)._check_daemons()
+
+                        # post actions should have been called again
+                        _cfg_db.asset_called()
+
+                        # post actions is now empty
+                        assert len(cephadm_module.requires_post_actions) == 0
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+    def test_mon_add(self, cephadm_module):
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, ServiceSpec(service_type='mon', unmanaged=True)):
+                ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1)
+                c = cephadm_module.add_daemon(ServiceSpec('mon', placement=ps))
+                assert wait(cephadm_module, c) == ["Deployed mon.a on host 'test'"]
+
+                with pytest.raises(OrchestratorError, match="Must set public_network config option or specify a CIDR network,"):
+                    ps = PlacementSpec(hosts=['test'], count=1)
+                    c = cephadm_module.add_daemon(ServiceSpec('mon', placement=ps))
+                    wait(cephadm_module, c)
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+    def test_mgr_update(self, cephadm_module):
+        with with_host(cephadm_module, 'test'):
+            ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1)
+            r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps))
+            assert r
+
+            assert_rm_daemon(cephadm_module, 'mgr.a', 'test')
+
+    @mock.patch("cephadm.module.CephadmOrchestrator.mon_command")
+    def test_find_destroyed_osds(self, _mon_cmd, cephadm_module):
+        dict_out = {
+            "nodes": [
+                {
+                    "id": -1,
+                    "name": "default",
+                    "type": "root",
+                    "type_id": 11,
+                    "children": [
+                        -3
+                    ]
+                },
+                {
+                    "id": -3,
+                    "name": "host1",
+                    "type": "host",
+                    "type_id": 1,
+                    "pool_weights": {},
+                    "children": [
+                        0
+                    ]
+                },
+                {
+                    "id": 0,
+                    "device_class": "hdd",
+                    "name": "osd.0",
+                    "type": "osd",
+                    "type_id": 0,
+                    "crush_weight": 0.0243988037109375,
+                    "depth": 2,
+                    "pool_weights": {},
+                    "exists": 1,
+                    "status": "destroyed",
+                    "reweight": 1,
+                    "primary_affinity": 1
+                }
+            ],
+            "stray": []
+        }
+        json_out = json.dumps(dict_out)
+        _mon_cmd.return_value = (0, json_out, '')
+        osd_claims = OsdIdClaims(cephadm_module)
+        assert osd_claims.get() == {'host1': ['0']}
+        assert osd_claims.filtered_by_host('host1') == ['0']
+        assert osd_claims.filtered_by_host('host1.domain.com') == ['0']
+
+    @ pytest.mark.parametrize(
+        "ceph_services, cephadm_daemons, strays_expected, metadata",
+        # [ ([(daemon_type, daemon_id), ... ], [...], [...]), ... ]
+        [
+            (
+                [('mds', 'a'), ('osd', '0'), ('mgr', 'x')],
+                [],
+                [('mds', 'a'), ('osd', '0'), ('mgr', 'x')],
+                {},
+            ),
+            (
+                [('mds', 'a'), ('osd', '0'), ('mgr', 'x')],
+                [('mds', 'a'), ('osd', '0'), ('mgr', 'x')],
+                [],
+                {},
+            ),
+            (
+                [('mds', 'a'), ('osd', '0'), ('mgr', 'x')],
+                [('mds', 'a'), ('osd', '0')],
+                [('mgr', 'x')],
+                {},
+            ),
+            # https://tracker.ceph.com/issues/49573
+            (
+                [('rgw-nfs', '14649')],
+                [],
+                [('nfs', 'foo-rgw.host1')],
+                {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}},
+            ),
+            (
+                [('rgw-nfs', '14649'), ('rgw-nfs', '14650')],
+                [('nfs', 'foo-rgw.host1'), ('nfs', 'foo2.host2')],
+                [],
+                {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}, '14650': {'id': 'nfs.foo2.host2-rgw'}},
+            ),
+            (
+                [('rgw-nfs', '14649'), ('rgw-nfs', '14650')],
+                [('nfs', 'foo-rgw.host1')],
+                [('nfs', 'foo2.host2')],
+                {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}, '14650': {'id': 'nfs.foo2.host2-rgw'}},
+            ),
+        ]
+    )
+    def test_check_for_stray_daemons(
+            self,
+            cephadm_module,
+            ceph_services,
+            cephadm_daemons,
+            strays_expected,
+            metadata
+    ):
+        # mock ceph service-map
+        services = []
+        for service in ceph_services:
+            s = {'type': service[0], 'id': service[1]}
+            services.append(s)
+        ls = [{'hostname': 'host1', 'services': services}]
+
+        with mock.patch.object(cephadm_module, 'list_servers', mock.MagicMock()) as list_servers:
+            list_servers.return_value = ls
+            list_servers.__iter__.side_effect = ls.__iter__
+
+            # populate cephadm daemon cache
+            dm = {}
+            for daemon_type, daemon_id in cephadm_daemons:
+                dd = DaemonDescription(daemon_type=daemon_type, daemon_id=daemon_id)
+                dm[dd.name()] = dd
+            cephadm_module.cache.update_host_daemons('host1', dm)
+
+            def get_metadata_mock(svc_type, svc_id, default):
+                return metadata[svc_id]
+
+            with mock.patch.object(cephadm_module, 'get_metadata', new_callable=lambda: get_metadata_mock):
+
+                # test
+                CephadmServe(cephadm_module)._check_for_strays()
+
+                # verify
+                strays = cephadm_module.health_checks.get('CEPHADM_STRAY_DAEMON')
+                if not strays:
+                    assert len(strays_expected) == 0
+                else:
+                    for dt, di in strays_expected:
+                        name = '%s.%s' % (dt, di)
+                        for detail in strays['detail']:
+                            if name in detail:
+                                strays['detail'].remove(detail)
+                                break
+                        assert name in detail
+                    assert len(strays['detail']) == 0
+                    assert strays['count'] == len(strays_expected)
+
+    @mock.patch("cephadm.module.CephadmOrchestrator.mon_command")
+    def test_find_destroyed_osds_cmd_failure(self, _mon_cmd, cephadm_module):
+        _mon_cmd.return_value = (1, "", "fail_msg")
+        with pytest.raises(OrchestratorError):
+            OsdIdClaims(cephadm_module)
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_apply_osd_save(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'test'):
+
+            spec = DriveGroupSpec(
+                service_id='foo',
+                placement=PlacementSpec(
+                    host_pattern='*',
+                ),
+                data_devices=DeviceSelection(
+                    all=True
+                )
+            )
+
+            c = cephadm_module.apply([spec])
+            assert wait(cephadm_module, c) == ['Scheduled osd.foo update...']
+
+            inventory = Devices([
+                Device(
+                    '/dev/sdb',
+                    available=True
+                ),
+            ])
+
+            cephadm_module.cache.update_host_devices('test', inventory.devices)
+
+            _run_cephadm.side_effect = async_side_effect((['{}'], '', 0))
+
+            assert CephadmServe(cephadm_module)._apply_all_services() is False
+
+            _run_cephadm.assert_any_call(
+                'test', 'osd', 'ceph-volume',
+                ['--config-json', '-', '--', 'lvm', 'batch',
+                    '--no-auto', '/dev/sdb', '--yes', '--no-systemd'],
+                env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=foo'], error_ok=True,
+                stdin='{"config": "", "keyring": ""}')
+            _run_cephadm.assert_any_call(
+                'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False, error_ok=False, log_output=True)
+            _run_cephadm.assert_any_call(
+                'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False, error_ok=False, log_output=True)
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_apply_osd_save_non_collocated(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'test'):
+
+            spec = DriveGroupSpec(
+                service_id='noncollocated',
+                placement=PlacementSpec(
+                    hosts=['test']
+                ),
+                data_devices=DeviceSelection(paths=['/dev/sdb']),
+                db_devices=DeviceSelection(paths=['/dev/sdc']),
+                wal_devices=DeviceSelection(paths=['/dev/sdd'])
+            )
+
+            c = cephadm_module.apply([spec])
+            assert wait(cephadm_module, c) == ['Scheduled osd.noncollocated update...']
+
+            inventory = Devices([
+                Device('/dev/sdb', available=True),
+                Device('/dev/sdc', available=True),
+                Device('/dev/sdd', available=True)
+            ])
+
+            cephadm_module.cache.update_host_devices('test', inventory.devices)
+
+            _run_cephadm.side_effect = async_side_effect((['{}'], '', 0))
+
+            assert CephadmServe(cephadm_module)._apply_all_services() is False
+
+            _run_cephadm.assert_any_call(
+                'test', 'osd', 'ceph-volume',
+                ['--config-json', '-', '--', 'lvm', 'batch',
+                    '--no-auto', '/dev/sdb', '--db-devices', '/dev/sdc',
+                    '--wal-devices', '/dev/sdd', '--yes', '--no-systemd'],
+                env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=noncollocated'],
+                error_ok=True, stdin='{"config": "", "keyring": ""}')
+            _run_cephadm.assert_any_call(
+                'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False, error_ok=False, log_output=True)
+            _run_cephadm.assert_any_call(
+                'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False, error_ok=False, log_output=True)
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    @mock.patch("cephadm.module.SpecStore.save")
+    def test_apply_osd_save_placement(self, _save_spec, cephadm_module):
+        with with_host(cephadm_module, 'test'):
+            json_spec = {'service_type': 'osd', 'placement': {'host_pattern': 'test'},
+                         'service_id': 'foo', 'data_devices': {'all': True}}
+            spec = ServiceSpec.from_json(json_spec)
+            assert isinstance(spec, DriveGroupSpec)
+            c = cephadm_module.apply([spec])
+            assert wait(cephadm_module, c) == ['Scheduled osd.foo update...']
+            _save_spec.assert_called_with(spec)
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    def test_create_osds(self, cephadm_module):
+        with with_host(cephadm_module, 'test'):
+            dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'),
+                                data_devices=DeviceSelection(paths=['']))
+            c = cephadm_module.create_osds(dg)
+            out = wait(cephadm_module, c)
+            assert out == "Created no osd(s) on host test; already created?"
+            bad_dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='invalid_host'),
+                                    data_devices=DeviceSelection(paths=['']))
+            c = cephadm_module.create_osds(bad_dg)
+            out = wait(cephadm_module, c)
+            assert "Invalid 'host:device' spec: host not found in cluster" in out
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    def test_create_noncollocated_osd(self, cephadm_module):
+        with with_host(cephadm_module, 'test'):
+            dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'),
+                                data_devices=DeviceSelection(paths=['']))
+            c = cephadm_module.create_osds(dg)
+            out = wait(cephadm_module, c)
+            assert out == "Created no osd(s) on host test; already created?"
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    @mock.patch('cephadm.services.osd.OSDService._run_ceph_volume_command')
+    @mock.patch('cephadm.services.osd.OSDService.driveselection_to_ceph_volume')
+    @mock.patch('cephadm.services.osd.OsdIdClaims.refresh', lambda _: None)
+    @mock.patch('cephadm.services.osd.OsdIdClaims.get', lambda _: {})
+    def test_limit_not_reached(self, d_to_cv, _run_cv_cmd, cephadm_module):
+        with with_host(cephadm_module, 'test'):
+            dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'),
+                                data_devices=DeviceSelection(limit=5, rotational=1),
+                                service_id='not_enough')
+
+            disks_found = [
+                '[{"data": "/dev/vdb", "data_size": "50.00 GB", "encryption": "None"}, {"data": "/dev/vdc", "data_size": "50.00 GB", "encryption": "None"}]']
+            d_to_cv.return_value = 'foo'
+            _run_cv_cmd.side_effect = async_side_effect((disks_found, '', 0))
+            preview = cephadm_module.osd_service.generate_previews([dg], 'test')
+
+            for osd in preview:
+                assert 'notes' in osd
+                assert osd['notes'] == [
+                    'NOTE: Did not find enough disks matching filter on host test to reach data device limit (Found: 2 | Limit: 5)']
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    def test_prepare_drivegroup(self, cephadm_module):
+        with with_host(cephadm_module, 'test'):
+            dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'),
+                                data_devices=DeviceSelection(paths=['']))
+            out = cephadm_module.osd_service.prepare_drivegroup(dg)
+            assert len(out) == 1
+            f1 = out[0]
+            assert f1[0] == 'test'
+            assert isinstance(f1[1], DriveSelection)
+
+    @pytest.mark.parametrize(
+        "devices, preview, exp_commands",
+        [
+            # no preview and only one disk, prepare is used due the hack that is in place.
+            (['/dev/sda'], False, ["lvm batch --no-auto /dev/sda --yes --no-systemd"]),
+            # no preview and multiple disks, uses batch
+            (['/dev/sda', '/dev/sdb'], False,
+             ["CEPH_VOLUME_OSDSPEC_AFFINITY=test.spec lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd"]),
+            # preview and only one disk needs to use batch again to generate the preview
+            (['/dev/sda'], True, ["lvm batch --no-auto /dev/sda --yes --no-systemd --report --format json"]),
+            # preview and multiple disks work the same
+            (['/dev/sda', '/dev/sdb'], True,
+             ["CEPH_VOLUME_OSDSPEC_AFFINITY=test.spec lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd --report --format json"]),
+        ]
+    )
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    def test_driveselection_to_ceph_volume(self, cephadm_module, devices, preview, exp_commands):
+        with with_host(cephadm_module, 'test'):
+            dg = DriveGroupSpec(service_id='test.spec', placement=PlacementSpec(
+                host_pattern='test'), data_devices=DeviceSelection(paths=devices))
+            ds = DriveSelection(dg, Devices([Device(path) for path in devices]))
+            preview = preview
+            out = cephadm_module.osd_service.driveselection_to_ceph_volume(ds, [], preview)
+            assert all(any(cmd in exp_cmd for exp_cmd in exp_commands)
+                       for cmd in out), f'Expected cmds from f{out} in {exp_commands}'
+
+    @pytest.mark.parametrize(
+        "devices, preview, exp_commands",
+        [
+            # one data device, no preview
+            (['/dev/sda'], False, ["raw prepare --bluestore --data /dev/sda"]),
+            # multiple data devices, no preview
+            (['/dev/sda', '/dev/sdb'], False,
+             ["raw prepare --bluestore --data /dev/sda", "raw prepare --bluestore --data /dev/sdb"]),
+            # one data device, preview
+            (['/dev/sda'], True, ["raw prepare --bluestore --data /dev/sda --report --format json"]),
+            # multiple data devices, preview
+            (['/dev/sda', '/dev/sdb'], True,
+             ["raw prepare --bluestore --data /dev/sda --report --format json", "raw prepare --bluestore --data /dev/sdb --report --format json"]),
+        ]
+    )
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    def test_raw_driveselection_to_ceph_volume(self, cephadm_module, devices, preview, exp_commands):
+        with with_host(cephadm_module, 'test'):
+            dg = DriveGroupSpec(service_id='test.spec', method='raw', placement=PlacementSpec(
+                host_pattern='test'), data_devices=DeviceSelection(paths=devices))
+            ds = DriveSelection(dg, Devices([Device(path) for path in devices]))
+            preview = preview
+            out = cephadm_module.osd_service.driveselection_to_ceph_volume(ds, [], preview)
+            assert all(any(cmd in exp_cmd for exp_cmd in exp_commands)
+                       for cmd in out), f'Expected cmds from f{out} in {exp_commands}'
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(
+        json.dumps([
+            dict(
+                name='osd.0',
+                style='cephadm',
+                fsid='fsid',
+                container_id='container_id',
+                version='version',
+                state='running',
+            )
+        ])
+    ))
+    @mock.patch("cephadm.services.osd.OSD.exists", True)
+    @mock.patch("cephadm.services.osd.RemoveUtil.get_pg_count", lambda _, __: 0)
+    def test_remove_osds(self, cephadm_module):
+        with with_host(cephadm_module, 'test'):
+            CephadmServe(cephadm_module)._refresh_host_daemons('test')
+            c = cephadm_module.list_daemons()
+            wait(cephadm_module, c)
+
+            c = cephadm_module.remove_daemons(['osd.0'])
+            out = wait(cephadm_module, c)
+            assert out == ["Removed osd.0 from host 'test'"]
+
+            cephadm_module.to_remove_osds.enqueue(OSD(osd_id=0,
+                                                      replace=False,
+                                                      force=False,
+                                                      hostname='test',
+                                                      process_started_at=datetime_now(),
+                                                      remove_util=cephadm_module.to_remove_osds.rm_util
+                                                      ))
+            cephadm_module.to_remove_osds.process_removal_queue()
+            assert cephadm_module.to_remove_osds == OSDRemovalQueue(cephadm_module)
+
+            c = cephadm_module.remove_osds_status()
+            out = wait(cephadm_module, c)
+            assert out == []
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    def test_rgw_update(self, cephadm_module):
+        with with_host(cephadm_module, 'host1'):
+            with with_host(cephadm_module, 'host2'):
+                with with_service(cephadm_module, RGWSpec(service_id="foo", unmanaged=True)):
+                    ps = PlacementSpec(hosts=['host1'], count=1)
+                    c = cephadm_module.add_daemon(
+                        RGWSpec(service_id="foo", placement=ps))
+                    [out] = wait(cephadm_module, c)
+                    match_glob(out, "Deployed rgw.foo.* on host 'host1'")
+
+                    ps = PlacementSpec(hosts=['host1', 'host2'], count=2)
+                    r = CephadmServe(cephadm_module)._apply_service(
+                        RGWSpec(service_id="foo", placement=ps))
+                    assert r
+
+                    assert_rm_daemon(cephadm_module, 'rgw.foo', 'host1')
+                    assert_rm_daemon(cephadm_module, 'rgw.foo', 'host2')
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(
+        json.dumps([
+            dict(
+                name='rgw.myrgw.myhost.myid',
+                style='cephadm',
+                fsid='fsid',
+                container_id='container_id',
+                version='version',
+                state='running',
+            )
+        ])
+    ))
+    def test_remove_daemon(self, cephadm_module):
+        with with_host(cephadm_module, 'test'):
+            CephadmServe(cephadm_module)._refresh_host_daemons('test')
+            c = cephadm_module.list_daemons()
+            wait(cephadm_module, c)
+            c = cephadm_module.remove_daemons(['rgw.myrgw.myhost.myid'])
+            out = wait(cephadm_module, c)
+            assert out == ["Removed rgw.myrgw.myhost.myid from host 'test'"]
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_remove_duplicate_osds(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'host1'):
+            with with_host(cephadm_module, 'host2'):
+                with with_osd_daemon(cephadm_module, _run_cephadm, 'host1', 1) as dd1:  # type: DaemonDescription
+                    with with_osd_daemon(cephadm_module, _run_cephadm, 'host2', 1) as dd2:  # type: DaemonDescription
+                        CephadmServe(cephadm_module)._check_for_moved_osds()
+                        # both are in status "starting"
+                        assert len(cephadm_module.cache.get_daemons()) == 2
+
+                        dd1.status = DaemonDescriptionStatus.running
+                        dd2.status = DaemonDescriptionStatus.error
+                        cephadm_module.cache.update_host_daemons(dd1.hostname, {dd1.name(): dd1})
+                        cephadm_module.cache.update_host_daemons(dd2.hostname, {dd2.name(): dd2})
+                        CephadmServe(cephadm_module)._check_for_moved_osds()
+                        assert len(cephadm_module.cache.get_daemons()) == 1
+
+                        assert cephadm_module.events.get_for_daemon('osd.1') == [
+                            OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO',
+                                              "Deployed osd.1 on host 'host1'"),
+                            OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO',
+                                              "Deployed osd.1 on host 'host2'"),
+                            OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO',
+                                              "Removed duplicated daemon on host 'host2'"),
+                        ]
+
+                        with pytest.raises(AssertionError):
+                            cephadm_module.assert_issued_mon_command({
+                                'prefix': 'auth rm',
+                                'entity': 'osd.1',
+                            })
+
+                cephadm_module.assert_issued_mon_command({
+                    'prefix': 'auth rm',
+                    'entity': 'osd.1',
+                })
+
+    @pytest.mark.parametrize(
+        "spec",
+        [
+            ServiceSpec('crash'),
+            ServiceSpec('prometheus'),
+            ServiceSpec('grafana'),
+            ServiceSpec('node-exporter'),
+            ServiceSpec('alertmanager'),
+            ServiceSpec('rbd-mirror'),
+            ServiceSpec('cephfs-mirror'),
+            ServiceSpec('mds', service_id='fsname'),
+            RGWSpec(rgw_realm='realm', rgw_zone='zone'),
+            RGWSpec(service_id="foo"),
+        ]
+    )
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    def test_daemon_add(self, spec: ServiceSpec, cephadm_module):
+        unmanaged_spec = ServiceSpec.from_json(spec.to_json())
+        unmanaged_spec.unmanaged = True
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, unmanaged_spec):
+                with with_daemon(cephadm_module, spec, 'test'):
+                    pass
+
+    @pytest.mark.parametrize(
+        "entity,success,spec",
+        [
+            ('mgr.x', True, ServiceSpec(
+                service_type='mgr',
+                placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1),
+                unmanaged=True)
+            ),  # noqa: E124
+            ('client.rgw.x', True, ServiceSpec(
+                service_type='rgw',
+                service_id='id',
+                placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1),
+                unmanaged=True)
+            ),  # noqa: E124
+            ('client.nfs.x', True, ServiceSpec(
+                service_type='nfs',
+                service_id='id',
+                placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1),
+                unmanaged=True)
+            ),  # noqa: E124
+            ('mon.', False, ServiceSpec(
+                service_type='mon',
+                placement=PlacementSpec(
+                    hosts=[HostPlacementSpec('test', '127.0.0.0/24', 'x')], count=1),
+                unmanaged=True)
+            ),  # noqa: E124
+        ]
+    )
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock())
+    @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock())
+    @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock())
+    def test_daemon_add_fail(self, _run_cephadm, entity, success, spec, cephadm_module):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, spec):
+                _run_cephadm.side_effect = OrchestratorError('fail')
+                with pytest.raises(OrchestratorError):
+                    wait(cephadm_module, cephadm_module.add_daemon(spec))
+                if success:
+                    cephadm_module.assert_issued_mon_command({
+                        'prefix': 'auth rm',
+                        'entity': entity,
+                    })
+                else:
+                    with pytest.raises(AssertionError):
+                        cephadm_module.assert_issued_mon_command({
+                            'prefix': 'auth rm',
+                            'entity': entity,
+                        })
+                    assert cephadm_module.events.get_for_service(spec.service_name()) == [
+                        OrchestratorEvent(mock.ANY, 'service', spec.service_name(), 'INFO',
+                                          "service was created"),
+                        OrchestratorEvent(mock.ANY, 'service', spec.service_name(), 'ERROR',
+                                          "fail"),
+                    ]
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_daemon_place_fail_health_warning(self, _run_cephadm, cephadm_module):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'test'):
+            _run_cephadm.side_effect = OrchestratorError('fail')
+            ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1)
+            r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps))
+            assert not r
+            assert cephadm_module.health_checks.get('CEPHADM_DAEMON_PLACE_FAIL') is not None
+            assert cephadm_module.health_checks['CEPHADM_DAEMON_PLACE_FAIL']['count'] == 1
+            assert 'Failed to place 1 daemon(s)' in cephadm_module.health_checks[
+                'CEPHADM_DAEMON_PLACE_FAIL']['summary']
+            assert 'Failed while placing mgr.a on test: fail' in cephadm_module.health_checks[
+                'CEPHADM_DAEMON_PLACE_FAIL']['detail']
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_apply_spec_fail_health_warning(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'test'):
+            CephadmServe(cephadm_module)._apply_all_services()
+            ps = PlacementSpec(hosts=['fail'], count=1)
+            r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps))
+            assert not r
+            assert cephadm_module.apply_spec_fails
+            assert cephadm_module.health_checks.get('CEPHADM_APPLY_SPEC_FAIL') is not None
+            assert cephadm_module.health_checks['CEPHADM_APPLY_SPEC_FAIL']['count'] == 1
+            assert 'Failed to apply 1 service(s)' in cephadm_module.health_checks[
+                'CEPHADM_APPLY_SPEC_FAIL']['summary']
+
+    @mock.patch("cephadm.module.CephadmOrchestrator.get_foreign_ceph_option")
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    @mock.patch("cephadm.module.HostCache.save_host_devices")
+    def test_invalid_config_option_health_warning(self, _save_devs, _run_cephadm, get_foreign_ceph_option, cephadm_module: CephadmOrchestrator):
+        _save_devs.return_value = None
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'test'):
+            ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1)
+            get_foreign_ceph_option.side_effect = KeyError
+            CephadmServe(cephadm_module)._apply_service_config(
+                ServiceSpec('mgr', placement=ps, config={'test': 'foo'}))
+            assert cephadm_module.health_checks.get('CEPHADM_INVALID_CONFIG_OPTION') is not None
+            assert cephadm_module.health_checks['CEPHADM_INVALID_CONFIG_OPTION']['count'] == 1
+            assert 'Ignoring 1 invalid config option(s)' in cephadm_module.health_checks[
+                'CEPHADM_INVALID_CONFIG_OPTION']['summary']
+            assert 'Ignoring invalid mgr config option test' in cephadm_module.health_checks[
+                'CEPHADM_INVALID_CONFIG_OPTION']['detail']
+
+    @mock.patch("cephadm.module.CephadmOrchestrator.get_foreign_ceph_option")
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    @mock.patch("cephadm.module.CephadmOrchestrator.set_store")
+    def test_save_devices(self, _set_store, _run_cephadm, _get_foreign_ceph_option, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        entry_size = 65536  # default 64k size
+        _get_foreign_ceph_option.return_value = entry_size
+
+        class FakeDev():
+            def __init__(self, c: str = 'a'):
+                # using 1015 here makes the serialized string exactly 1024 bytes if c is one char
+                self.content = {c: c * 1015}
+                self.path = 'dev/vdc'
+
+            def to_json(self):
+                return self.content
+
+            def from_json(self, stuff):
+                return json.loads(stuff)
+
+        def byte_len(s):
+            return len(s.encode('utf-8'))
+
+        with with_host(cephadm_module, 'test'):
+            fake_devices = [FakeDev()] * 100  # should be ~100k
+            assert byte_len(json.dumps([d.to_json() for d in fake_devices])) > entry_size
+            assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size * 2
+            cephadm_module.cache.update_host_devices('test', fake_devices)
+            cephadm_module.cache.save_host_devices('test')
+            expected_calls = [
+                mock.call('host.test.devices.0', json.dumps(
+                    {'devices': [d.to_json() for d in [FakeDev()] * 34], 'entries': 3})),
+                mock.call('host.test.devices.1', json.dumps(
+                    {'devices': [d.to_json() for d in [FakeDev()] * 34]})),
+                mock.call('host.test.devices.2', json.dumps(
+                    {'devices': [d.to_json() for d in [FakeDev()] * 32]})),
+            ]
+            _set_store.assert_has_calls(expected_calls)
+
+            fake_devices = [FakeDev()] * 300  # should be ~300k
+            assert byte_len(json.dumps([d.to_json() for d in fake_devices])) > entry_size * 4
+            assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size * 5
+            cephadm_module.cache.update_host_devices('test', fake_devices)
+            cephadm_module.cache.save_host_devices('test')
+            expected_calls = [
+                mock.call('host.test.devices.0', json.dumps(
+                    {'devices': [d.to_json() for d in [FakeDev()] * 50], 'entries': 6})),
+                mock.call('host.test.devices.1', json.dumps(
+                    {'devices': [d.to_json() for d in [FakeDev()] * 50]})),
+                mock.call('host.test.devices.2', json.dumps(
+                    {'devices': [d.to_json() for d in [FakeDev()] * 50]})),
+                mock.call('host.test.devices.3', json.dumps(
+                    {'devices': [d.to_json() for d in [FakeDev()] * 50]})),
+                mock.call('host.test.devices.4', json.dumps(
+                    {'devices': [d.to_json() for d in [FakeDev()] * 50]})),
+                mock.call('host.test.devices.5', json.dumps(
+                    {'devices': [d.to_json() for d in [FakeDev()] * 50]})),
+            ]
+            _set_store.assert_has_calls(expected_calls)
+
+            fake_devices = [FakeDev()] * 62  # should be ~62k, just under cache size
+            assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size
+            cephadm_module.cache.update_host_devices('test', fake_devices)
+            cephadm_module.cache.save_host_devices('test')
+            expected_calls = [
+                mock.call('host.test.devices.0', json.dumps(
+                    {'devices': [d.to_json() for d in [FakeDev()] * 62], 'entries': 1})),
+            ]
+            _set_store.assert_has_calls(expected_calls)
+
+            # should be ~64k but just over so it requires more entries
+            fake_devices = [FakeDev()] * 64
+            assert byte_len(json.dumps([d.to_json() for d in fake_devices])) > entry_size
+            assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size * 2
+            cephadm_module.cache.update_host_devices('test', fake_devices)
+            cephadm_module.cache.save_host_devices('test')
+            expected_calls = [
+                mock.call('host.test.devices.0', json.dumps(
+                    {'devices': [d.to_json() for d in [FakeDev()] * 22], 'entries': 3})),
+                mock.call('host.test.devices.1', json.dumps(
+                    {'devices': [d.to_json() for d in [FakeDev()] * 22]})),
+                mock.call('host.test.devices.2', json.dumps(
+                    {'devices': [d.to_json() for d in [FakeDev()] * 20]})),
+            ]
+            _set_store.assert_has_calls(expected_calls)
+
+            # test for actual content being correct using differing devices
+            entry_size = 3072
+            _get_foreign_ceph_option.return_value = entry_size
+            fake_devices = [FakeDev('a'), FakeDev('b'), FakeDev('c'), FakeDev('d'), FakeDev('e')]
+            assert byte_len(json.dumps([d.to_json() for d in fake_devices])) > entry_size
+            assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size * 2
+            cephadm_module.cache.update_host_devices('test', fake_devices)
+            cephadm_module.cache.save_host_devices('test')
+            expected_calls = [
+                mock.call('host.test.devices.0', json.dumps(
+                    {'devices': [d.to_json() for d in [FakeDev('a'), FakeDev('b')]], 'entries': 3})),
+                mock.call('host.test.devices.1', json.dumps(
+                    {'devices': [d.to_json() for d in [FakeDev('c'), FakeDev('d')]]})),
+                mock.call('host.test.devices.2', json.dumps(
+                    {'devices': [d.to_json() for d in [FakeDev('e')]]})),
+            ]
+            _set_store.assert_has_calls(expected_calls)
+
+    @mock.patch("cephadm.module.CephadmOrchestrator.get_store")
+    def test_load_devices(self, _get_store, cephadm_module: CephadmOrchestrator):
+        def _fake_store(key):
+            if key == 'host.test.devices.0':
+                return json.dumps({'devices': [d.to_json() for d in [Device('/path')] * 9], 'entries': 3})
+            elif key == 'host.test.devices.1':
+                return json.dumps({'devices': [d.to_json() for d in [Device('/path')] * 7]})
+            elif key == 'host.test.devices.2':
+                return json.dumps({'devices': [d.to_json() for d in [Device('/path')] * 4]})
+            else:
+                raise Exception(f'Get store with unexpected value {key}')
+
+        _get_store.side_effect = _fake_store
+        devs = cephadm_module.cache.load_host_devices('test')
+        assert devs == [Device('/path')] * 20
+
+    @mock.patch("cephadm.module.Inventory.__contains__")
+    def test_check_stray_host_cache_entry(self, _contains, cephadm_module: CephadmOrchestrator):
+        def _fake_inv(key):
+            if key in ['host1', 'node02', 'host.something.com']:
+                return True
+            return False
+
+        _contains.side_effect = _fake_inv
+        assert cephadm_module.cache._get_host_cache_entry_status('host1') == HostCacheStatus.host
+        assert cephadm_module.cache._get_host_cache_entry_status(
+            'host.something.com') == HostCacheStatus.host
+        assert cephadm_module.cache._get_host_cache_entry_status(
+            'node02.devices.37') == HostCacheStatus.devices
+        assert cephadm_module.cache._get_host_cache_entry_status(
+            'host.something.com.devices.0') == HostCacheStatus.devices
+        assert cephadm_module.cache._get_host_cache_entry_status('hostXXX') == HostCacheStatus.stray
+        assert cephadm_module.cache._get_host_cache_entry_status(
+            'host.nothing.com') == HostCacheStatus.stray
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock())
+    @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock())
+    @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock())
+    def test_nfs(self, cephadm_module):
+        with with_host(cephadm_module, 'test'):
+            ps = PlacementSpec(hosts=['test'], count=1)
+            spec = NFSServiceSpec(
+                service_id='name',
+                placement=ps)
+            unmanaged_spec = ServiceSpec.from_json(spec.to_json())
+            unmanaged_spec.unmanaged = True
+            with with_service(cephadm_module, unmanaged_spec):
+                c = cephadm_module.add_daemon(spec)
+                [out] = wait(cephadm_module, c)
+                match_glob(out, "Deployed nfs.name.* on host 'test'")
+
+                assert_rm_daemon(cephadm_module, 'nfs.name.test', 'test')
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    @mock.patch("subprocess.run", None)
+    @mock.patch("cephadm.module.CephadmOrchestrator.rados", mock.MagicMock())
+    @mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4')
+    def test_iscsi(self, cephadm_module):
+        with with_host(cephadm_module, 'test'):
+            ps = PlacementSpec(hosts=['test'], count=1)
+            spec = IscsiServiceSpec(
+                service_id='name',
+                pool='pool',
+                api_user='user',
+                api_password='password',
+                placement=ps)
+            unmanaged_spec = ServiceSpec.from_json(spec.to_json())
+            unmanaged_spec.unmanaged = True
+            with with_service(cephadm_module, unmanaged_spec):
+
+                c = cephadm_module.add_daemon(spec)
+                [out] = wait(cephadm_module, c)
+                match_glob(out, "Deployed iscsi.name.* on host 'test'")
+
+                assert_rm_daemon(cephadm_module, 'iscsi.name.test', 'test')
+
+    @pytest.mark.parametrize(
+        "on_bool",
+        [
+            True,
+            False
+        ]
+    )
+    @pytest.mark.parametrize(
+        "fault_ident",
+        [
+            'fault',
+            'ident'
+        ]
+    )
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_blink_device_light(self, _run_cephadm, on_bool, fault_ident, cephadm_module):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'test'):
+            c = cephadm_module.blink_device_light(fault_ident, on_bool, [('test', '', 'dev')])
+            on_off = 'on' if on_bool else 'off'
+            assert wait(cephadm_module, c) == [f'Set {fault_ident} light for test: {on_off}']
+            _run_cephadm.assert_called_with('test', 'osd', 'shell', [
+                                            '--', 'lsmcli', f'local-disk-{fault_ident}-led-{on_off}', '--path', 'dev'], error_ok=True)
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_blink_device_light_custom(self, _run_cephadm, cephadm_module):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'test'):
+            cephadm_module.set_store('blink_device_light_cmd', 'echo hello')
+            c = cephadm_module.blink_device_light('ident', True, [('test', '', '/dev/sda')])
+            assert wait(cephadm_module, c) == ['Set ident light for test: on']
+            _run_cephadm.assert_called_with('test', 'osd', 'shell', [
+                                            '--', 'echo', 'hello'], error_ok=True)
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_blink_device_light_custom_per_host(self, _run_cephadm, cephadm_module):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'mgr0'):
+            cephadm_module.set_store('mgr0/blink_device_light_cmd',
+                                     'xyz --foo --{{ ident_fault }}={{\'on\' if on else \'off\'}} \'{{ path or dev }}\'')
+            c = cephadm_module.blink_device_light(
+                'fault', True, [('mgr0', 'SanDisk_X400_M.2_2280_512GB_162924424784', '')])
+            assert wait(cephadm_module, c) == [
+                'Set fault light for mgr0:SanDisk_X400_M.2_2280_512GB_162924424784 on']
+            _run_cephadm.assert_called_with('mgr0', 'osd', 'shell', [
+                '--', 'xyz', '--foo', '--fault=on', 'SanDisk_X400_M.2_2280_512GB_162924424784'
+            ], error_ok=True)
+
+    @pytest.mark.parametrize(
+        "spec, meth",
+        [
+            (ServiceSpec('mgr'), CephadmOrchestrator.apply_mgr),
+            (ServiceSpec('crash'), CephadmOrchestrator.apply_crash),
+            (ServiceSpec('prometheus'), CephadmOrchestrator.apply_prometheus),
+            (ServiceSpec('grafana'), CephadmOrchestrator.apply_grafana),
+            (ServiceSpec('node-exporter'), CephadmOrchestrator.apply_node_exporter),
+            (ServiceSpec('alertmanager'), CephadmOrchestrator.apply_alertmanager),
+            (ServiceSpec('rbd-mirror'), CephadmOrchestrator.apply_rbd_mirror),
+            (ServiceSpec('cephfs-mirror'), CephadmOrchestrator.apply_rbd_mirror),
+            (ServiceSpec('mds', service_id='fsname'), CephadmOrchestrator.apply_mds),
+            (ServiceSpec(
+                'mds', service_id='fsname',
+                placement=PlacementSpec(
+                    hosts=[HostPlacementSpec(
+                        hostname='test',
+                        name='fsname',
+                        network=''
+                    )]
+                )
+            ), CephadmOrchestrator.apply_mds),
+            (RGWSpec(service_id='foo'), CephadmOrchestrator.apply_rgw),
+            (RGWSpec(
+                service_id='bar',
+                rgw_realm='realm', rgw_zone='zone',
+                placement=PlacementSpec(
+                    hosts=[HostPlacementSpec(
+                        hostname='test',
+                        name='bar',
+                        network=''
+                    )]
+                )
+            ), CephadmOrchestrator.apply_rgw),
+            (NFSServiceSpec(
+                service_id='name',
+            ), CephadmOrchestrator.apply_nfs),
+            (IscsiServiceSpec(
+                service_id='name',
+                pool='pool',
+                api_user='user',
+                api_password='password'
+            ), CephadmOrchestrator.apply_iscsi),
+            (CustomContainerSpec(
+                service_id='hello-world',
+                image='docker.io/library/hello-world:latest',
+                uid=65534,
+                gid=65534,
+                dirs=['foo/bar'],
+                files={
+                    'foo/bar/xyz.conf': 'aaa\nbbb'
+                },
+                bind_mounts=[[
+                    'type=bind',
+                    'source=lib/modules',
+                    'destination=/lib/modules',
+                    'ro=true'
+                ]],
+                volume_mounts={
+                    'foo/bar': '/foo/bar:Z'
+                },
+                args=['--no-healthcheck'],
+                envs=['SECRET=password'],
+                ports=[8080, 8443]
+            ), CephadmOrchestrator.apply_container),
+        ]
+    )
+    @mock.patch("subprocess.run", None)
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock())
+    @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock())
+    @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock())
+    @mock.patch("subprocess.run", mock.MagicMock())
+    def test_apply_save(self, spec: ServiceSpec, meth, cephadm_module: CephadmOrchestrator):
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, spec, meth, 'test'):
+                pass
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    def test_mds_config_purge(self, cephadm_module: CephadmOrchestrator):
+        spec = MDSSpec('mds', service_id='fsname', config={'test': 'foo'})
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, spec, host='test'):
+                ret, out, err = cephadm_module.check_mon_command({
+                    'prefix': 'config get',
+                    'who': spec.service_name(),
+                    'key': 'mds_join_fs',
+                })
+                assert out == 'fsname'
+            ret, out, err = cephadm_module.check_mon_command({
+                'prefix': 'config get',
+                'who': spec.service_name(),
+                'key': 'mds_join_fs',
+            })
+            assert not out
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    @mock.patch("cephadm.services.cephadmservice.CephadmService.ok_to_stop")
+    def test_daemon_ok_to_stop(self, ok_to_stop, cephadm_module: CephadmOrchestrator):
+        spec = MDSSpec(
+            'mds',
+            service_id='fsname',
+            placement=PlacementSpec(hosts=['host1', 'host2']),
+            config={'test': 'foo'}
+        )
+        with with_host(cephadm_module, 'host1'), with_host(cephadm_module, 'host2'):
+            c = cephadm_module.apply_mds(spec)
+            out = wait(cephadm_module, c)
+            match_glob(out, "Scheduled mds.fsname update...")
+            CephadmServe(cephadm_module)._apply_all_services()
+
+            [daemon] = cephadm_module.cache.daemons['host1'].keys()
+
+            spec.placement.set_hosts(['host2'])
+
+            ok_to_stop.side_effect = False
+
+            c = cephadm_module.apply_mds(spec)
+            out = wait(cephadm_module, c)
+            match_glob(out, "Scheduled mds.fsname update...")
+            CephadmServe(cephadm_module)._apply_all_services()
+
+            ok_to_stop.assert_called_with([daemon[4:]], force=True)
+
+            assert_rm_daemon(cephadm_module, spec.service_name(), 'host1')  # verifies ok-to-stop
+            assert_rm_daemon(cephadm_module, spec.service_name(), 'host2')
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    def test_dont_touch_offline_or_maintenance_host_daemons(self, cephadm_module):
+        # test daemons on offline/maint hosts not removed when applying specs
+        # test daemons not added to hosts in maint/offline state
+        with with_host(cephadm_module, 'test1'):
+            with with_host(cephadm_module, 'test2'):
+                with with_host(cephadm_module, 'test3'):
+                    with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*'))):
+                        # should get a mgr on all 3 hosts
+                        # CephadmServe(cephadm_module)._apply_all_services()
+                        assert len(cephadm_module.cache.get_daemons_by_type('mgr')) == 3
+
+                        # put one host in offline state and one host in maintenance state
+                        cephadm_module.offline_hosts = {'test2'}
+                        cephadm_module.inventory._inventory['test3']['status'] = 'maintenance'
+                        cephadm_module.inventory.save()
+
+                        # being in offline/maint mode should disqualify hosts from being
+                        # candidates for scheduling
+                        assert cephadm_module.cache.is_host_schedulable('test2')
+                        assert cephadm_module.cache.is_host_schedulable('test3')
+
+                        assert cephadm_module.cache.is_host_unreachable('test2')
+                        assert cephadm_module.cache.is_host_unreachable('test3')
+
+                        with with_service(cephadm_module, ServiceSpec('crash', placement=PlacementSpec(host_pattern='*'))):
+                            # re-apply services. No mgr should be removed from maint/offline hosts
+                            # crash daemon should only be on host not in maint/offline mode
+                            CephadmServe(cephadm_module)._apply_all_services()
+                            assert len(cephadm_module.cache.get_daemons_by_type('mgr')) == 3
+                            assert len(cephadm_module.cache.get_daemons_by_type('crash')) == 1
+
+                        cephadm_module.offline_hosts = {}
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    @mock.patch("cephadm.CephadmOrchestrator._host_ok_to_stop")
+    @mock.patch("cephadm.module.HostCache.get_daemon_types")
+    @mock.patch("cephadm.module.HostCache.get_hosts")
+    def test_maintenance_enter_success(self, _hosts, _get_daemon_types, _host_ok, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        hostname = 'host1'
+        _run_cephadm.side_effect = async_side_effect(
+            ([''], ['something\nsuccess - systemd target xxx disabled'], 0))
+        _host_ok.return_value = 0, 'it is okay'
+        _get_daemon_types.return_value = ['crash']
+        _hosts.return_value = [hostname, 'other_host']
+        cephadm_module.inventory.add_host(HostSpec(hostname))
+        # should not raise an error
+        retval = cephadm_module.enter_host_maintenance(hostname)
+        assert retval.result_str().startswith('Daemons for Ceph cluster')
+        assert not retval.exception_str
+        assert cephadm_module.inventory._inventory[hostname]['status'] == 'maintenance'
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    @mock.patch("cephadm.CephadmOrchestrator._host_ok_to_stop")
+    @mock.patch("cephadm.module.HostCache.get_daemon_types")
+    @mock.patch("cephadm.module.HostCache.get_hosts")
+    def test_maintenance_enter_failure(self, _hosts, _get_daemon_types, _host_ok, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        hostname = 'host1'
+        _run_cephadm.side_effect = async_side_effect(
+            ([''], ['something\nfailed - disable the target'], 0))
+        _host_ok.return_value = 0, 'it is okay'
+        _get_daemon_types.return_value = ['crash']
+        _hosts.return_value = [hostname, 'other_host']
+        cephadm_module.inventory.add_host(HostSpec(hostname))
+
+        with pytest.raises(OrchestratorError, match='Failed to place host1 into maintenance for cluster fsid'):
+            cephadm_module.enter_host_maintenance(hostname)
+
+        assert not cephadm_module.inventory._inventory[hostname]['status']
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    @mock.patch("cephadm.CephadmOrchestrator._host_ok_to_stop")
+    @mock.patch("cephadm.module.HostCache.get_daemon_types")
+    @mock.patch("cephadm.module.HostCache.get_hosts")
+    def test_maintenance_enter_i_really_mean_it(self, _hosts, _get_daemon_types, _host_ok, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        hostname = 'host1'
+        err_str = 'some kind of error'
+        _run_cephadm.side_effect = async_side_effect(
+            ([''], ['something\nfailed - disable the target'], 0))
+        _host_ok.return_value = 1, err_str
+        _get_daemon_types.return_value = ['mon']
+        _hosts.return_value = [hostname, 'other_host']
+        cephadm_module.inventory.add_host(HostSpec(hostname))
+
+        with pytest.raises(OrchestratorError, match=err_str):
+            cephadm_module.enter_host_maintenance(hostname)
+        assert not cephadm_module.inventory._inventory[hostname]['status']
+
+        with pytest.raises(OrchestratorError, match=err_str):
+            cephadm_module.enter_host_maintenance(hostname, force=True)
+        assert not cephadm_module.inventory._inventory[hostname]['status']
+
+        retval = cephadm_module.enter_host_maintenance(hostname, force=True, yes_i_really_mean_it=True)
+        assert retval.result_str().startswith('Daemons for Ceph cluster')
+        assert not retval.exception_str
+        assert cephadm_module.inventory._inventory[hostname]['status'] == 'maintenance'
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    @mock.patch("cephadm.module.HostCache.get_daemon_types")
+    @mock.patch("cephadm.module.HostCache.get_hosts")
+    def test_maintenance_exit_success(self, _hosts, _get_daemon_types, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        hostname = 'host1'
+        _run_cephadm.side_effect = async_side_effect(([''], [
+            'something\nsuccess - systemd target xxx enabled and started'], 0))
+        _get_daemon_types.return_value = ['crash']
+        _hosts.return_value = [hostname, 'other_host']
+        cephadm_module.inventory.add_host(HostSpec(hostname, status='maintenance'))
+        # should not raise an error
+        retval = cephadm_module.exit_host_maintenance(hostname)
+        assert retval.result_str().startswith('Ceph cluster')
+        assert not retval.exception_str
+        assert not cephadm_module.inventory._inventory[hostname]['status']
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    @mock.patch("cephadm.module.HostCache.get_daemon_types")
+    @mock.patch("cephadm.module.HostCache.get_hosts")
+    def test_maintenance_exit_failure(self, _hosts, _get_daemon_types, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        hostname = 'host1'
+        _run_cephadm.side_effect = async_side_effect(
+            ([''], ['something\nfailed - unable to enable the target'], 0))
+        _get_daemon_types.return_value = ['crash']
+        _hosts.return_value = [hostname, 'other_host']
+        cephadm_module.inventory.add_host(HostSpec(hostname, status='maintenance'))
+
+        with pytest.raises(OrchestratorError, match='Failed to exit maintenance state for host host1, cluster fsid'):
+            cephadm_module.exit_host_maintenance(hostname)
+
+        assert cephadm_module.inventory._inventory[hostname]['status'] == 'maintenance'
+
+    @mock.patch("cephadm.ssh.SSHManager._remote_connection")
+    @mock.patch("cephadm.ssh.SSHManager._execute_command")
+    @mock.patch("cephadm.ssh.SSHManager._check_execute_command")
+    @mock.patch("cephadm.ssh.SSHManager._write_remote_file")
+    def test_etc_ceph(self, _write_file, check_execute_command, execute_command, remote_connection, cephadm_module):
+        _write_file.side_effect = async_side_effect(None)
+        check_execute_command.side_effect = async_side_effect('')
+        execute_command.side_effect = async_side_effect(('{}', '', 0))
+        remote_connection.side_effect = async_side_effect(mock.Mock())
+
+        assert cephadm_module.manage_etc_ceph_ceph_conf is False
+
+        with with_host(cephadm_module, 'test'):
+            assert '/etc/ceph/ceph.conf' not in cephadm_module.cache.get_host_client_files('test')
+
+        with with_host(cephadm_module, 'test'):
+            cephadm_module.set_module_option('manage_etc_ceph_ceph_conf', True)
+            cephadm_module.config_notify()
+            assert cephadm_module.manage_etc_ceph_ceph_conf is True
+
+            CephadmServe(cephadm_module)._write_all_client_files()
+            # Make sure both ceph conf locations (default and per fsid) are called
+            _write_file.assert_has_calls([mock.call('test', '/etc/ceph/ceph.conf', b'',
+                                          0o644, 0, 0, None),
+                                         mock.call('test', '/var/lib/ceph/fsid/config/ceph.conf', b'',
+                                          0o644, 0, 0, None)]
+                                         )
+            ceph_conf_files = cephadm_module.cache.get_host_client_files('test')
+            assert len(ceph_conf_files) == 2
+            assert '/etc/ceph/ceph.conf' in ceph_conf_files
+            assert '/var/lib/ceph/fsid/config/ceph.conf' in ceph_conf_files
+
+            # set extra config and expect that we deploy another ceph.conf
+            cephadm_module._set_extra_ceph_conf('[mon]\nk=v')
+            CephadmServe(cephadm_module)._write_all_client_files()
+            _write_file.assert_has_calls([mock.call('test',
+                                                    '/etc/ceph/ceph.conf',
+                                                    b'[mon]\nk=v\n', 0o644, 0, 0, None),
+                                          mock.call('test',
+                                                    '/var/lib/ceph/fsid/config/ceph.conf',
+                                                    b'[mon]\nk=v\n', 0o644, 0, 0, None)])
+            # reload
+            cephadm_module.cache.last_client_files = {}
+            cephadm_module.cache.load()
+
+            ceph_conf_files = cephadm_module.cache.get_host_client_files('test')
+            assert len(ceph_conf_files) == 2
+            assert '/etc/ceph/ceph.conf' in ceph_conf_files
+            assert '/var/lib/ceph/fsid/config/ceph.conf' in ceph_conf_files
+
+            # Make sure, _check_daemons does a redeploy due to monmap change:
+            f1_before_digest = cephadm_module.cache.get_host_client_files('test')[
+                '/etc/ceph/ceph.conf'][0]
+            f2_before_digest = cephadm_module.cache.get_host_client_files(
+                'test')['/var/lib/ceph/fsid/config/ceph.conf'][0]
+            cephadm_module._set_extra_ceph_conf('[mon]\nk2=v2')
+            CephadmServe(cephadm_module)._write_all_client_files()
+            f1_after_digest = cephadm_module.cache.get_host_client_files('test')[
+                '/etc/ceph/ceph.conf'][0]
+            f2_after_digest = cephadm_module.cache.get_host_client_files(
+                'test')['/var/lib/ceph/fsid/config/ceph.conf'][0]
+            assert f1_before_digest != f1_after_digest
+            assert f2_before_digest != f2_after_digest
+
+    @mock.patch("cephadm.inventory.HostCache.get_host_client_files")
+    def test_dont_write_client_files_to_unreachable_hosts(self, _get_client_files, cephadm_module):
+        cephadm_module.inventory.add_host(HostSpec('host1', '1.2.3.1'))  # online
+        cephadm_module.inventory.add_host(HostSpec('host2', '1.2.3.2'))  # maintenance
+        cephadm_module.inventory.add_host(HostSpec('host3', '1.2.3.3'))  # offline
+
+        # mark host2 as maintenance and host3 as offline
+        cephadm_module.inventory._inventory['host2']['status'] = 'maintenance'
+        cephadm_module.offline_hosts.add('host3')
+
+        # verify host2 and host3 are correctly marked as unreachable but host1 is not
+        assert not cephadm_module.cache.is_host_unreachable('host1')
+        assert cephadm_module.cache.is_host_unreachable('host2')
+        assert cephadm_module.cache.is_host_unreachable('host3')
+
+        _get_client_files.side_effect = Exception('Called _get_client_files')
+
+        # with the online host, should call _get_client_files which
+        # we have setup to raise an Exception
+        with pytest.raises(Exception, match='Called _get_client_files'):
+            CephadmServe(cephadm_module)._write_client_files({}, 'host1')
+
+        # for the maintenance and offline host, _get_client_files should
+        # not be called and it should just return immediately with nothing
+        # having been raised
+        CephadmServe(cephadm_module)._write_client_files({}, 'host2')
+        CephadmServe(cephadm_module)._write_client_files({}, 'host3')
+
+    def test_etc_ceph_init(self):
+        with with_cephadm_module({'manage_etc_ceph_ceph_conf': True}) as m:
+            assert m.manage_etc_ceph_ceph_conf is True
+
+    @mock.patch("cephadm.CephadmOrchestrator.check_mon_command")
+    @mock.patch("cephadm.CephadmOrchestrator.extra_ceph_conf")
+    def test_extra_ceph_conf(self, _extra_ceph_conf, _check_mon_cmd, cephadm_module: CephadmOrchestrator):
+        # settings put into the [global] section in the extra conf
+        # need to be appended to existing [global] section in given
+        # minimal ceph conf, but anything in another section (e.g. [mon])
+        # needs to continue to be its own section
+
+        # this is the conf "ceph generate-minimal-conf" will return in this test
+        _check_mon_cmd.return_value = (0, """[global]
+global_k1 = global_v1
+global_k2 = global_v2
+[mon]
+mon_k1 = mon_v1
+[osd]
+osd_k1 = osd_v1
+osd_k2 = osd_v2
+""", '')
+
+        # test with extra ceph conf that has some of the sections from minimal conf
+        _extra_ceph_conf.return_value = CephadmOrchestrator.ExtraCephConf(conf="""[mon]
+mon_k2 = mon_v2
+[global]
+global_k3 = global_v3
+""", last_modified=datetime_now())
+
+        expected_combined_conf = """[global]
+global_k1 = global_v1
+global_k2 = global_v2
+global_k3 = global_v3
+
+[mon]
+mon_k1 = mon_v1
+mon_k2 = mon_v2
+
+[osd]
+osd_k1 = osd_v1
+osd_k2 = osd_v2
+"""
+
+        assert cephadm_module.get_minimal_ceph_conf() == expected_combined_conf
+
+    def test_client_keyrings_special_host_labels(self, cephadm_module):
+        cephadm_module.inventory.add_host(HostSpec('host1', labels=['keyring1']))
+        cephadm_module.inventory.add_host(HostSpec('host2', labels=['keyring1', SpecialHostLabels.DRAIN_DAEMONS]))
+        cephadm_module.inventory.add_host(HostSpec('host3', labels=['keyring1', SpecialHostLabels.DRAIN_DAEMONS, SpecialHostLabels.DRAIN_CONF_KEYRING]))
+        # hosts need to be marked as having had refresh to be available for placement
+        # so "refresh" with empty daemon list
+        cephadm_module.cache.update_host_daemons('host1', {})
+        cephadm_module.cache.update_host_daemons('host2', {})
+        cephadm_module.cache.update_host_daemons('host3', {})
+
+        assert 'host1' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()]
+        assert 'host2' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()]
+        assert 'host3' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()]
+
+        assert 'host1' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()]
+        assert 'host2' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()]
+        assert 'host3' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()]
+
+        cephadm_module.keys.update(ClientKeyringSpec('keyring1', PlacementSpec(label='keyring1')))
+
+        with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd:
+            _mon_cmd.return_value = (0, 'real-keyring', '')
+            client_files = CephadmServe(cephadm_module)._calc_client_files()
+            assert 'host1' in client_files.keys()
+            assert '/etc/ceph/ceph.keyring1.keyring' in client_files['host1'].keys()
+            assert 'host2' in client_files.keys()
+            assert '/etc/ceph/ceph.keyring1.keyring' in client_files['host2'].keys()
+            assert 'host3' not in client_files.keys()
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_registry_login(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        def check_registry_credentials(url, username, password):
+            assert json.loads(cephadm_module.get_store('registry_credentials')) == {
+                'url': url, 'username': username, 'password': password}
+
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'test'):
+            # test successful login with valid args
+            code, out, err = cephadm_module.registry_login('test-url', 'test-user', 'test-password')
+            assert out == 'registry login scheduled'
+            assert err == ''
+            check_registry_credentials('test-url', 'test-user', 'test-password')
+
+            # test bad login attempt with invalid args
+            code, out, err = cephadm_module.registry_login('bad-args')
+            assert err == ("Invalid arguments. Please provide arguments <url> <username> <password> "
+                           "or -i <login credentials json file>")
+            check_registry_credentials('test-url', 'test-user', 'test-password')
+
+            # test bad login using invalid json file
+            code, out, err = cephadm_module.registry_login(
+                None, None, None, '{"bad-json": "bad-json"}')
+            assert err == ("json provided for custom registry login did not include all necessary fields. "
+                           "Please setup json file as\n"
+                           "{\n"
+                           " \"url\": \"REGISTRY_URL\",\n"
+                           " \"username\": \"REGISTRY_USERNAME\",\n"
+                           " \"password\": \"REGISTRY_PASSWORD\"\n"
+                           "}\n")
+            check_registry_credentials('test-url', 'test-user', 'test-password')
+
+            # test  good login using valid json file
+            good_json = ("{\"url\": \"" + "json-url" + "\", \"username\": \"" + "json-user" + "\", "
+                         " \"password\": \"" + "json-pass" + "\"}")
+            code, out, err = cephadm_module.registry_login(None, None, None, good_json)
+            assert out == 'registry login scheduled'
+            assert err == ''
+            check_registry_credentials('json-url', 'json-user', 'json-pass')
+
+            # test bad login where args are valid but login command fails
+            _run_cephadm.side_effect = async_side_effect(('{}', 'error', 1))
+            code, out, err = cephadm_module.registry_login('fail-url', 'fail-user', 'fail-password')
+            assert err == 'Host test failed to login to fail-url as fail-user with given password'
+            check_registry_credentials('json-url', 'json-user', 'json-pass')
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({
+        'image_id': 'image_id',
+                    'repo_digests': ['image@repo_digest'],
+    })))
+    @pytest.mark.parametrize("use_repo_digest",
+                             [
+                                 False,
+                                 True
+                             ])
+    def test_upgrade_run(self, use_repo_digest, cephadm_module: CephadmOrchestrator):
+        cephadm_module.use_repo_digest = use_repo_digest
+
+        with with_host(cephadm_module, 'test', refresh_hosts=False):
+            cephadm_module.set_container_image('global', 'image')
+
+            if use_repo_digest:
+
+                CephadmServe(cephadm_module).convert_tags_to_repo_digest()
+
+            _, image, _ = cephadm_module.check_mon_command({
+                'prefix': 'config get',
+                'who': 'global',
+                'key': 'container_image',
+            })
+            if use_repo_digest:
+                assert image == 'image@repo_digest'
+            else:
+                assert image == 'image'
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_ceph_volume_no_filter_for_batch(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        error_message = """cephadm exited with an error code: 1, stderr:/usr/bin/podman:stderr usage: ceph-volume inventory [-h] [--format {plain,json,json-pretty}] [path]/usr/bin/podman:stderr ceph-volume inventory: error: unrecognized arguments: --filter-for-batch
+Traceback (most recent call last):
+  File "<stdin>", line 6112, in <module>
+  File "<stdin>", line 1299, in _infer_fsid
+  File "<stdin>", line 1382, in _infer_image
+  File "<stdin>", line 3612, in command_ceph_volume
+  File "<stdin>", line 1061, in call_throws"""
+
+        with with_host(cephadm_module, 'test'):
+            _run_cephadm.reset_mock()
+            _run_cephadm.side_effect = OrchestratorError(error_message)
+
+            s = CephadmServe(cephadm_module)._refresh_host_devices('test')
+            assert s == 'host test `cephadm ceph-volume` failed: ' + error_message
+
+            assert _run_cephadm.mock_calls == [
+                mock.call('test', 'osd', 'ceph-volume',
+                          ['--', 'inventory', '--format=json-pretty', '--filter-for-batch'], image='',
+                          no_fsid=False, error_ok=False, log_output=False),
+                mock.call('test', 'osd', 'ceph-volume',
+                          ['--', 'inventory', '--format=json-pretty'], image='',
+                          no_fsid=False, error_ok=False, log_output=False),
+            ]
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_osd_activate_datadevice(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'test', refresh_hosts=False):
+            with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1):
+                pass
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_osd_activate_datadevice_fail(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'test', refresh_hosts=False):
+            cephadm_module.mock_store_set('_ceph_get', 'osd_map', {
+                'osds': [
+                    {
+                        'osd': 1,
+                        'up_from': 0,
+                        'uuid': 'uuid'
+                    }
+                ]
+            })
+
+            ceph_volume_lvm_list = {
+                '1': [{
+                    'tags': {
+                        'ceph.cluster_fsid': cephadm_module._cluster_fsid,
+                        'ceph.osd_fsid': 'uuid'
+                    },
+                    'type': 'data'
+                }]
+            }
+            _run_cephadm.reset_mock(return_value=True, side_effect=True)
+
+            async def _r_c(*args, **kwargs):
+                if 'ceph-volume' in args:
+                    return (json.dumps(ceph_volume_lvm_list), '', 0)
+                else:
+                    assert ['_orch', 'deploy'] in args
+                    raise OrchestratorError("let's fail somehow")
+            _run_cephadm.side_effect = _r_c
+            assert cephadm_module._osd_activate(
+                ['test']).stderr == "let's fail somehow"
+            with pytest.raises(AssertionError):
+                cephadm_module.assert_issued_mon_command({
+                    'prefix': 'auth rm',
+                    'entity': 'osd.1',
+                })
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_osd_activate_datadevice_dbdevice(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'test', refresh_hosts=False):
+
+            async def _ceph_volume_list(s, host, entity, cmd, **kwargs):
+                logging.info(f'ceph-volume cmd: {cmd}')
+                if 'raw' in cmd:
+                    return json.dumps({
+                        "21a4209b-f51b-4225-81dc-d2dca5b8b2f5": {
+                            "ceph_fsid": "64c84f19-fe1d-452a-a731-ab19dc144aa8",
+                            "device": "/dev/loop0",
+                            "osd_id": 21,
+                            "osd_uuid": "21a4209b-f51b-4225-81dc-d2dca5b8b2f5",
+                            "type": "bluestore"
+                        },
+                    }), '', 0
+                if 'lvm' in cmd:
+                    return json.dumps({
+                        '1': [{
+                            'tags': {
+                                'ceph.cluster_fsid': cephadm_module._cluster_fsid,
+                                'ceph.osd_fsid': 'uuid'
+                            },
+                            'type': 'data'
+                        }, {
+                            'tags': {
+                                'ceph.cluster_fsid': cephadm_module._cluster_fsid,
+                                'ceph.osd_fsid': 'uuid'
+                            },
+                            'type': 'db'
+                        }]
+                    }), '', 0
+                return '{}', '', 0
+
+            with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1, ceph_volume_lvm_list=_ceph_volume_list):
+                pass
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_osd_count(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        dg = DriveGroupSpec(service_id='', data_devices=DeviceSelection(all=True))
+        with with_host(cephadm_module, 'test', refresh_hosts=False):
+            with with_service(cephadm_module, dg, host='test'):
+                with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1):
+                    assert wait(cephadm_module, cephadm_module.describe_service())[0].size == 1
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+    def test_host_rm_last_admin(self, cephadm_module: CephadmOrchestrator):
+        with pytest.raises(OrchestratorError):
+            with with_host(cephadm_module, 'test', refresh_hosts=False, rm_with_force=False):
+                cephadm_module.inventory.add_label('test', SpecialHostLabels.ADMIN)
+                pass
+            assert False
+        with with_host(cephadm_module, 'test1', refresh_hosts=False, rm_with_force=True):
+            with with_host(cephadm_module, 'test2', refresh_hosts=False, rm_with_force=False):
+                cephadm_module.inventory.add_label('test2', SpecialHostLabels.ADMIN)
+
+    @pytest.mark.parametrize("facts, settings, expected_value",
+                             [
+                                 # All options are available on all hosts
+                                 (
+                                     {
+                                         "host1":
+                                         {
+                                             "sysctl_options":
+                                             {
+                                                 'opt1': 'val1',
+                                                 'opt2': 'val2',
+                                             }
+                                         },
+                                         "host2":
+                                         {
+                                             "sysctl_options":
+                                             {
+                                                 'opt1': '',
+                                                 'opt2': '',
+                                             }
+                                         },
+                                     },
+                                     {'opt1', 'opt2'},  # settings
+                                     {'host1': [], 'host2': []}  # expected_value
+                                 ),
+                                 # opt1 is missing on host 1, opt2 is missing on host2
+                                 ({
+                                     "host1":
+                                     {
+                                         "sysctl_options":
+                                         {
+                                             'opt2': '',
+                                             'optX': '',
+                                         }
+                                     },
+                                     "host2":
+                                     {
+                                         "sysctl_options":
+                                         {
+                                             'opt1': '',
+                                             'opt3': '',
+                                             'opt4': '',
+                                         }
+                                     },
+                                 },
+                                     {'opt1', 'opt2'},  # settings
+                                     {'host1': ['opt1'], 'host2': ['opt2']}  # expected_value
+                                 ),
+                                 # All options are missing on all hosts
+                                 ({
+                                     "host1":
+                                     {
+                                         "sysctl_options":
+                                         {
+                                         }
+                                     },
+                                     "host2":
+                                     {
+                                         "sysctl_options":
+                                         {
+                                         }
+                                     },
+                                 },
+                                     {'opt1', 'opt2'},  # settings
+                                     {'host1': ['opt1', 'opt2'], 'host2': [
+                                         'opt1', 'opt2']}  # expected_value
+                                 ),
+                             ]
+                             )
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+    def test_tuned_profiles_settings_validation(self, facts, settings, expected_value, cephadm_module):
+        with with_host(cephadm_module, 'test'):
+            spec = mock.Mock()
+            spec.settings = sorted(settings)
+            spec.placement.filter_matching_hostspecs = mock.Mock()
+            spec.placement.filter_matching_hostspecs.return_value = ['host1', 'host2']
+            cephadm_module.cache.facts = facts
+            assert cephadm_module._validate_tunedprofile_settings(spec) == expected_value
+
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+    def test_tuned_profiles_validation(self, cephadm_module):
+        with with_host(cephadm_module, 'test'):
+
+            with pytest.raises(OrchestratorError, match="^Invalid placement specification.+"):
+                spec = mock.Mock()
+                spec.settings = {'a': 'b'}
+                spec.placement = PlacementSpec(hosts=[])
+                cephadm_module._validate_tuned_profile_spec(spec)
+
+            with pytest.raises(OrchestratorError, match="Invalid spec: settings section cannot be empty."):
+                spec = mock.Mock()
+                spec.settings = {}
+                spec.placement = PlacementSpec(hosts=['host1', 'host2'])
+                cephadm_module._validate_tuned_profile_spec(spec)
+
+            with pytest.raises(OrchestratorError, match="^Placement 'count' field is no supported .+"):
+                spec = mock.Mock()
+                spec.settings = {'a': 'b'}
+                spec.placement = PlacementSpec(count=1)
+                cephadm_module._validate_tuned_profile_spec(spec)
+
+            with pytest.raises(OrchestratorError, match="^Placement 'count_per_host' field is no supported .+"):
+                spec = mock.Mock()
+                spec.settings = {'a': 'b'}
+                spec.placement = PlacementSpec(count_per_host=1, label='foo')
+                cephadm_module._validate_tuned_profile_spec(spec)
+
+            with pytest.raises(OrchestratorError, match="^Found invalid host"):
+                spec = mock.Mock()
+                spec.settings = {'a': 'b'}
+                spec.placement = PlacementSpec(hosts=['host1', 'host2'])
+                cephadm_module.inventory = mock.Mock()
+                cephadm_module.inventory.all_specs = mock.Mock(
+                    return_value=[mock.Mock().hostname, mock.Mock().hostname])
+                cephadm_module._validate_tuned_profile_spec(spec)
+
+    def test_set_unmanaged(self, cephadm_module):
+        cephadm_module.spec_store._specs['crash'] = ServiceSpec('crash', unmanaged=False)
+        assert not cephadm_module.spec_store._specs['crash'].unmanaged
+        cephadm_module.spec_store.set_unmanaged('crash', True)
+        assert cephadm_module.spec_store._specs['crash'].unmanaged
+        cephadm_module.spec_store.set_unmanaged('crash', False)
+        assert not cephadm_module.spec_store._specs['crash'].unmanaged
+
+    def test_inventory_known_hostnames(self, cephadm_module):
+        cephadm_module.inventory.add_host(HostSpec('host1', '1.2.3.1'))
+        cephadm_module.inventory.add_host(HostSpec('host2', '1.2.3.2'))
+        cephadm_module.inventory.add_host(HostSpec('host3.domain', '1.2.3.3'))
+        cephadm_module.inventory.add_host(HostSpec('host4.domain', '1.2.3.4'))
+        cephadm_module.inventory.add_host(HostSpec('host5', '1.2.3.5'))
+
+        # update_known_hostname expects args to be <hostname, shortname, fqdn>
+        # as are gathered from cephadm gather-facts. Although, passing the
+        # names in the wrong order should actually have no effect on functionality
+        cephadm_module.inventory.update_known_hostnames('host1', 'host1', 'host1.domain')
+        cephadm_module.inventory.update_known_hostnames('host2.domain', 'host2', 'host2.domain')
+        cephadm_module.inventory.update_known_hostnames('host3', 'host3', 'host3.domain')
+        cephadm_module.inventory.update_known_hostnames('host4.domain', 'host4', 'host4.domain')
+        cephadm_module.inventory.update_known_hostnames('host5', 'host5', 'host5')
+
+        assert 'host1' in cephadm_module.inventory
+        assert 'host1.domain' in cephadm_module.inventory
+        assert cephadm_module.inventory.get_addr('host1') == '1.2.3.1'
+        assert cephadm_module.inventory.get_addr('host1.domain') == '1.2.3.1'
+
+        assert 'host2' in cephadm_module.inventory
+        assert 'host2.domain' in cephadm_module.inventory
+        assert cephadm_module.inventory.get_addr('host2') == '1.2.3.2'
+        assert cephadm_module.inventory.get_addr('host2.domain') == '1.2.3.2'
+
+        assert 'host3' in cephadm_module.inventory
+        assert 'host3.domain' in cephadm_module.inventory
+        assert cephadm_module.inventory.get_addr('host3') == '1.2.3.3'
+        assert cephadm_module.inventory.get_addr('host3.domain') == '1.2.3.3'
+
+        assert 'host4' in cephadm_module.inventory
+        assert 'host4.domain' in cephadm_module.inventory
+        assert cephadm_module.inventory.get_addr('host4') == '1.2.3.4'
+        assert cephadm_module.inventory.get_addr('host4.domain') == '1.2.3.4'
+
+        assert 'host4.otherdomain' not in cephadm_module.inventory
+        with pytest.raises(OrchestratorError):
+            cephadm_module.inventory.get_addr('host4.otherdomain')
+
+        assert 'host5' in cephadm_module.inventory
+        assert cephadm_module.inventory.get_addr('host5') == '1.2.3.5'
+        with pytest.raises(OrchestratorError):
+            cephadm_module.inventory.get_addr('host5.domain')
+
+    def test_async_timeout_handler(self, cephadm_module):
+        cephadm_module.default_cephadm_command_timeout = 900
+
+        async def _timeout():
+            raise asyncio.TimeoutError
+
+        with pytest.raises(OrchestratorError, match=r'Command timed out \(default 900 second timeout\)'):
+            with cephadm_module.async_timeout_handler():
+                cephadm_module.wait_async(_timeout())
+
+        with pytest.raises(OrchestratorError, match=r'Command timed out on host hostA \(default 900 second timeout\)'):
+            with cephadm_module.async_timeout_handler('hostA'):
+                cephadm_module.wait_async(_timeout())
+
+        with pytest.raises(OrchestratorError, match=r'Command "testing" timed out \(default 900 second timeout\)'):
+            with cephadm_module.async_timeout_handler(cmd='testing'):
+                cephadm_module.wait_async(_timeout())
+
+        with pytest.raises(OrchestratorError, match=r'Command "testing" timed out on host hostB \(default 900 second timeout\)'):
+            with cephadm_module.async_timeout_handler('hostB', 'testing'):
+                cephadm_module.wait_async(_timeout())
+
+        with pytest.raises(OrchestratorError, match=r'Command timed out \(non-default 111 second timeout\)'):
+            with cephadm_module.async_timeout_handler(timeout=111):
+                cephadm_module.wait_async(_timeout())
+
+        with pytest.raises(OrchestratorError, match=r'Command "very slow" timed out on host hostC \(non-default 999 second timeout\)'):
+            with cephadm_module.async_timeout_handler('hostC', 'very slow', 999):
+                cephadm_module.wait_async(_timeout())
+
+    @mock.patch("cephadm.CephadmOrchestrator.remove_osds")
+    @mock.patch("cephadm.CephadmOrchestrator.add_host_label", lambda *a, **kw: None)
+    @mock.patch("cephadm.inventory.HostCache.get_daemons_by_host", lambda *a, **kw: [])
+    def test_host_drain_zap(self, _rm_osds, cephadm_module):
+        # pass force=true in these tests to bypass _admin label check
+        cephadm_module.drain_host('host1', force=True, zap_osd_devices=False)
+        assert _rm_osds.called_with([], zap=False)
+
+        cephadm_module.drain_host('host1', force=True, zap_osd_devices=True)
+        assert _rm_osds.called_with([], zap=True)
+
+    def test_process_ls_output(self, cephadm_module):
+        sample_ls_output = """[
+    {
+        "style": "cephadm:v1",
+        "name": "mon.vm-00",
+        "fsid": "588f83ba-5995-11ee-9e94-52540057a206",
+        "systemd_unit": "ceph-588f83ba-5995-11ee-9e94-52540057a206@mon.vm-00",
+        "enabled": true,
+        "state": "running",
+        "service_name": "mon",
+        "ports": [],
+        "ip": null,
+        "deployed_by": [
+            "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3"
+        ],
+        "rank": null,
+        "rank_generation": null,
+        "extra_container_args": null,
+        "extra_entrypoint_args": null,
+        "memory_request": null,
+        "memory_limit": null,
+        "container_id": "b170b964a6e2918955362eb36195627c6086d3f859d4ebce2ee13f3ee4738733",
+        "container_image_name": "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3",
+        "container_image_id": "674eb38037f1555bb7884ede5db47f1749486e7f12ecb416e34ada87c9934e55",
+        "container_image_digests": [
+            "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3"
+        ],
+        "memory_usage": 56214159,
+        "cpu_percentage": "2.32%",
+        "version": "18.0.0-5185-g7b3a4f2b",
+        "started": "2023-09-22T22:31:11.752300Z",
+        "created": "2023-09-22T22:15:24.121387Z",
+        "deployed": "2023-09-22T22:31:10.383431Z",
+        "configured": "2023-09-22T22:31:11.859440Z"
+    },
+    {
+        "style": "cephadm:v1",
+        "name": "mgr.vm-00.mpexeg",
+        "fsid": "588f83ba-5995-11ee-9e94-52540057a206",
+        "systemd_unit": "ceph-588f83ba-5995-11ee-9e94-52540057a206@mgr.vm-00.mpexeg",
+        "enabled": true,
+        "state": "running",
+        "service_name": "mgr",
+        "ports": [
+            8443,
+            9283,
+            8765
+        ],
+        "ip": null,
+        "deployed_by": [
+            "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3"
+        ],
+        "rank": null,
+        "rank_generation": null,
+        "extra_container_args": null,
+        "extra_entrypoint_args": null,
+        "memory_request": null,
+        "memory_limit": null,
+        "container_id": "6e7756cef553a25a2a84227e8755d3d25046b9cd8758b23c698d34b3af895242",
+        "container_image_name": "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3",
+        "container_image_id": "674eb38037f1555bb7884ede5db47f1749486e7f12ecb416e34ada87c9934e55",
+        "container_image_digests": [
+            "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3"
+        ],
+        "memory_usage": 529740595,
+        "cpu_percentage": "8.35%",
+        "version": "18.0.0-5185-g7b3a4f2b",
+        "started": "2023-09-22T22:30:18.587021Z",
+        "created": "2023-09-22T22:15:29.101409Z",
+        "deployed": "2023-09-22T22:30:17.339114Z",
+        "configured": "2023-09-22T22:30:18.758122Z"
+    },
+    {
+        "style": "cephadm:v1",
+        "name": "agent.vm-00",
+        "fsid": "588f83ba-5995-11ee-9e94-52540057a206",
+        "systemd_unit": "ceph-588f83ba-5995-11ee-9e94-52540057a206@agent.vm-00",
+        "enabled": true,
+        "state": "running",
+        "service_name": "agent",
+        "ports": [],
+        "ip": null,
+        "deployed_by": [
+            "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3"
+        ],
+        "rank": null,
+        "rank_generation": null,
+        "extra_container_args": null,
+        "extra_entrypoint_args": null,
+        "container_id": null,
+        "container_image_name": null,
+        "container_image_id": null,
+        "container_image_digests": null,
+        "version": null,
+        "started": null,
+        "created": "2023-09-22T22:33:34.708289Z",
+        "deployed": null,
+        "configured": "2023-09-22T22:33:34.722289Z"
+    },
+    {
+        "style": "cephadm:v1",
+        "name": "osd.0",
+        "fsid": "588f83ba-5995-11ee-9e94-52540057a206",
+        "systemd_unit": "ceph-588f83ba-5995-11ee-9e94-52540057a206@osd.0",
+        "enabled": true,
+        "state": "running",
+        "service_name": "osd.foo",
+        "ports": [],
+        "ip": null,
+        "deployed_by": [
+            "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3"
+        ],
+        "rank": null,
+        "rank_generation": null,
+        "extra_container_args": null,
+        "extra_entrypoint_args": null,
+        "memory_request": null,
+        "memory_limit": null,
+        "container_id": "93f71c60820b86901a45b3b1fe3dba3e3e677b37fd22310b7e7da3f67bb8ccd6",
+        "container_image_name": "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3",
+        "container_image_id": "674eb38037f1555bb7884ede5db47f1749486e7f12ecb416e34ada87c9934e55",
+        "container_image_digests": [
+            "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3"
+        ],
+        "memory_usage": 73410805,
+        "cpu_percentage": "6.54%",
+        "version": "18.0.0-5185-g7b3a4f2b",
+        "started": "2023-09-22T22:41:29.019587Z",
+        "created": "2023-09-22T22:41:03.615080Z",
+        "deployed": "2023-09-22T22:41:24.965222Z",
+        "configured": "2023-09-22T22:41:29.119250Z"
+    }
+]"""
+
+        now = str_to_datetime('2023-09-22T22:45:29.119250Z')
+        cephadm_module._cluster_fsid = '588f83ba-5995-11ee-9e94-52540057a206'
+        with mock.patch("cephadm.module.datetime_now", lambda: now):
+            cephadm_module._process_ls_output('vm-00', json.loads(sample_ls_output))
+            assert 'vm-00' in cephadm_module.cache.daemons
+            assert 'mon.vm-00' in cephadm_module.cache.daemons['vm-00']
+            assert 'mgr.vm-00.mpexeg' in cephadm_module.cache.daemons['vm-00']
+            assert 'agent.vm-00' in cephadm_module.cache.daemons['vm-00']
+            assert 'osd.0' in cephadm_module.cache.daemons['vm-00']
+
+            daemons = cephadm_module.cache.get_daemons_by_host('vm-00')
+            c_img_ids = [dd.container_image_id for dd in daemons if dd.daemon_type != 'agent']
+            assert all(c_img_id == '674eb38037f1555bb7884ede5db47f1749486e7f12ecb416e34ada87c9934e55' for c_img_id in c_img_ids)
+            last_refreshes = [dd.last_refresh for dd in daemons]
+            assert all(lrf == now for lrf in last_refreshes)
+            versions = [dd.version for dd in daemons if dd.daemon_type != 'agent']
+            assert all(version == '18.0.0-5185-g7b3a4f2b' for version in versions)
+
+            osd = cephadm_module.cache.get_daemons_by_type('osd', 'vm-00')[0]
+            assert osd.cpu_percentage == '6.54%'
+            assert osd.memory_usage == 73410805
+            assert osd.created == str_to_datetime('2023-09-22T22:41:03.615080Z')
diff --git a/src/pybind/mgr/cephadm/tests/test_completion.py b/src/pybind/mgr/cephadm/tests/test_completion.py
new file mode 100644
index 000000000..327c12d2a
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_completion.py
@@ -0,0 +1,40 @@
+import pytest
+
+from ..module import forall_hosts
+
+
+class TestCompletion(object):
+
+    @pytest.mark.parametrize("input,expected", [
+        ([], []),
+        ([1], ["(1,)"]),
+        (["hallo"], ["('hallo',)"]),
+        ("hi", ["('h',)", "('i',)"]),
+        (list(range(5)), [str((x, )) for x in range(5)]),
+        ([(1, 2), (3, 4)], ["(1, 2)", "(3, 4)"]),
+    ])
+    def test_async_map(self, input, expected, cephadm_module):
+        @forall_hosts
+        def run_forall(*args):
+            return str(args)
+        assert run_forall(input) == expected
+
+    @pytest.mark.parametrize("input,expected", [
+        ([], []),
+        ([1], ["(1,)"]),
+        (["hallo"], ["('hallo',)"]),
+        ("hi", ["('h',)", "('i',)"]),
+        (list(range(5)), [str((x, )) for x in range(5)]),
+        ([(1, 2), (3, 4)], ["(1, 2)", "(3, 4)"]),
+    ])
+    def test_async_map_self(self, input, expected, cephadm_module):
+        class Run(object):
+            def __init__(self):
+                self.attr = 1
+
+            @forall_hosts
+            def run_forall(self, *args):
+                assert self.attr == 1
+                return str(args)
+
+        assert Run().run_forall(input) == expected
diff --git a/src/pybind/mgr/cephadm/tests/test_configchecks.py b/src/pybind/mgr/cephadm/tests/test_configchecks.py
new file mode 100644
index 000000000..3cae0a27d
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_configchecks.py
@@ -0,0 +1,668 @@
+import copy
+import json
+import logging
+import ipaddress
+import pytest
+import uuid
+
+from time import time as now
+
+from ..configchecks import CephadmConfigChecks
+from ..inventory import HostCache
+from ..upgrade import CephadmUpgrade, UpgradeState
+from orchestrator import DaemonDescription
+
+from typing import List, Dict, Any, Optional
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+host_sample = {
+    "arch": "x86_64",
+    "bios_date": "04/01/2014",
+    "bios_version": "F2",
+    "cpu_cores": 16,
+    "cpu_count": 2,
+    "cpu_load": {
+            "15min": 0.0,
+            "1min": 0.01,
+            "5min": 0.01
+    },
+    "cpu_model": "Intel® Xeon® Processor E5-2698 v3",
+    "cpu_threads": 64,
+    "flash_capacity": "4.0TB",
+    "flash_capacity_bytes": 4000797868032,
+    "flash_count": 2,
+    "flash_list": [
+        {
+            "description": "ATA CT2000MX500SSD1 (2.0TB)",
+            "dev_name": "sda",
+            "disk_size_bytes": 2000398934016,
+            "model": "CT2000MX500SSD1",
+            "rev": "023",
+            "vendor": "ATA",
+            "wwid": "t10.ATA     CT2000MX500SSD1                         193023156DE0"
+        },
+        {
+            "description": "ATA CT2000MX500SSD1 (2.0TB)",
+            "dev_name": "sdb",
+            "disk_size_bytes": 2000398934016,
+            "model": "CT2000MX500SSD1",
+            "rev": "023",
+            "vendor": "ATA",
+            "wwid": "t10.ATA     CT2000MX500SSD1                         193023156DE0"
+        },
+    ],
+    "hdd_capacity": "16.0TB",
+    "hdd_capacity_bytes": 16003148120064,
+    "hdd_count": 4,
+    "hdd_list": [
+        {
+                    "description": "ST4000VN008-2DR1 (4.0TB)",
+                    "dev_name": "sdc",
+                    "disk_size_bytes": 4000787030016,
+                    "model": "ST4000VN008-2DR1",
+                    "rev": "SC60",
+                    "vendor": "ATA",
+                    "wwid": "t10.ATA     ST4000VN008-2DR1                                  Z340EPBJ"
+        },
+        {
+            "description": "ST4000VN008-2DR1 (4.0TB)",
+            "dev_name": "sdd",
+            "disk_size_bytes": 4000787030016,
+            "model": "ST4000VN008-2DR1",
+            "rev": "SC60",
+            "vendor": "ATA",
+            "wwid": "t10.ATA     ST4000VN008-2DR1                                  Z340EPBJ"
+        },
+        {
+            "description": "ST4000VN008-2DR1 (4.0TB)",
+            "dev_name": "sde",
+            "disk_size_bytes": 4000787030016,
+            "model": "ST4000VN008-2DR1",
+            "rev": "SC60",
+            "vendor": "ATA",
+            "wwid": "t10.ATA     ST4000VN008-2DR1                                  Z340EPBJ"
+        },
+        {
+            "description": "ST4000VN008-2DR1 (4.0TB)",
+            "dev_name": "sdf",
+            "disk_size_bytes": 4000787030016,
+            "model": "ST4000VN008-2DR1",
+            "rev": "SC60",
+            "vendor": "ATA",
+            "wwid": "t10.ATA     ST4000VN008-2DR1                                  Z340EPBJ"
+        },
+    ],
+    "hostname": "dummy",
+    "interfaces": {
+        "eth0": {
+                "driver": "e1000e",
+                "iftype": "physical",
+                "ipv4_address": "10.7.17.1/24",
+                "ipv6_address": "fe80::215:17ff:feab:50e2/64",
+                "lower_devs_list": [],
+                "mtu": 9000,
+                "nic_type": "ethernet",
+                "operstate": "up",
+                "speed": 1000,
+                "upper_devs_list": [],
+        },
+        "eth1": {
+            "driver": "e1000e",
+            "iftype": "physical",
+            "ipv4_address": "10.7.18.1/24",
+            "ipv6_address": "fe80::215:17ff:feab:50e2/64",
+            "lower_devs_list": [],
+            "mtu": 9000,
+            "nic_type": "ethernet",
+            "operstate": "up",
+            "speed": 1000,
+            "upper_devs_list": [],
+        },
+        "eth2": {
+            "driver": "r8169",
+            "iftype": "physical",
+            "ipv4_address": "10.7.19.1/24",
+            "ipv6_address": "fe80::76d4:35ff:fe58:9a79/64",
+            "lower_devs_list": [],
+            "mtu": 1500,
+            "nic_type": "ethernet",
+            "operstate": "up",
+            "speed": 1000,
+            "upper_devs_list": []
+        },
+    },
+    "kernel": "4.18.0-240.10.1.el8_3.x86_64",
+    "kernel_parameters": {
+        "net.ipv4.ip_nonlocal_bind": "0",
+    },
+    "kernel_security": {
+        "SELINUX": "enforcing",
+        "SELINUXTYPE": "targeted",
+        "description": "SELinux: Enabled(enforcing, targeted)",
+        "type": "SELinux"
+    },
+    "memory_available_kb": 19489212,
+    "memory_free_kb": 245164,
+    "memory_total_kb": 32900916,
+    "model": "StorageHeavy",
+    "nic_count": 3,
+    "operating_system": "Red Hat Enterprise Linux 8.3 (Ootpa)",
+    "subscribed": "Yes",
+    "system_uptime": 777600.0,
+    "timestamp": now(),
+    "vendor": "Ceph Servers Inc",
+}
+
+
+def role_list(n: int) -> List[str]:
+    if n == 1:
+        return ['mon', 'mgr', 'osd']
+    if n in [2, 3]:
+        return ['mon', 'mds', 'osd']
+
+    return ['osd']
+
+
+def generate_testdata(count: int = 10, public_network: str = '10.7.17.0/24', cluster_network: str = '10.7.18.0/24'):
+    # public network = eth0, cluster_network = eth1
+    assert count > 3
+    assert public_network
+    num_disks = host_sample['hdd_count']
+    hosts = {}
+    daemons = {}
+    daemon_to_host = {}
+    osd_num = 0
+    public_netmask = public_network.split('/')[1]
+    cluster_ip_list = []
+    cluster_netmask = ''
+
+    public_ip_list = [str(i) for i in list(ipaddress.ip_network(public_network).hosts())]
+    if cluster_network:
+        cluster_ip_list = [str(i) for i in list(ipaddress.ip_network(cluster_network).hosts())]
+        cluster_netmask = cluster_network.split('/')[1]
+
+    for n in range(1, count + 1, 1):
+
+        new_host = copy.deepcopy(host_sample)
+        hostname = f"node-{n}.ceph.com"
+
+        new_host['hostname'] = hostname
+        new_host['interfaces']['eth0']['ipv4_address'] = f"{public_ip_list.pop(0)}/{public_netmask}"
+        if cluster_ip_list:
+            new_host['interfaces']['eth1']['ipv4_address'] = f"{cluster_ip_list.pop(0)}/{cluster_netmask}"
+        else:
+            new_host['interfaces']['eth1']['ipv4_address'] = ''
+
+        hosts[hostname] = new_host
+        daemons[hostname] = {}
+        for r in role_list(n):
+            name = ''
+            if r == 'osd':
+                for n in range(num_disks):
+                    osd = DaemonDescription(
+                        hostname=hostname, daemon_type='osd', daemon_id=osd_num)
+                    name = f"osd.{osd_num}"
+                    daemons[hostname][name] = osd
+                    daemon_to_host[name] = hostname
+                    osd_num += 1
+            else:
+                name = f"{r}.{hostname}"
+                daemons[hostname][name] = DaemonDescription(
+                    hostname=hostname, daemon_type=r, daemon_id=hostname)
+                daemon_to_host[name] = hostname
+
+    logger.debug(f"daemon to host lookup - {json.dumps(daemon_to_host)}")
+    return hosts, daemons, daemon_to_host
+
+
+@pytest.fixture()
+def mgr():
+    """Provide a fake ceph mgr object preloaded with a configuration"""
+    mgr = FakeMgr()
+    mgr.cache.facts, mgr.cache.daemons, mgr.daemon_to_host = \
+        generate_testdata(public_network='10.9.64.0/24', cluster_network='')
+    mgr.module_option.update({
+        "config_checks_enabled": True,
+    })
+    yield mgr
+
+
+class FakeMgr:
+
+    def __init__(self):
+        self.datastore = {}
+        self.module_option = {}
+        self.health_checks = {}
+        self.default_version = 'quincy'
+        self.version_overrides = {}
+        self.daemon_to_host = {}
+
+        self.cache = HostCache(self)
+        self.upgrade = CephadmUpgrade(self)
+
+    def set_health_checks(self, checks: dict):
+        return
+
+    def get_module_option(self, keyname: str) -> Optional[str]:
+        return self.module_option.get(keyname, None)
+
+    def set_module_option(self, keyname: str, value: str) -> None:
+        return None
+
+    def get_store(self, keyname: str, default=None) -> Optional[str]:
+        return self.datastore.get(keyname, None)
+
+    def set_store(self, keyname: str, value: str) -> None:
+        self.datastore[keyname] = value
+        return None
+
+    def _ceph_get_server(self) -> None:
+        pass
+
+    def get_metadata(self, daemon_type: str, daemon_id: str) -> Dict[str, Any]:
+        key = f"{daemon_type}.{daemon_id}"
+        if key in self.version_overrides:
+            logger.debug(f"override applied for {key}")
+            version_str = self.version_overrides[key]
+        else:
+            version_str = self.default_version
+
+        return {"ceph_release": version_str, "hostname": self.daemon_to_host[key]}
+
+    def list_servers(self) -> List[Dict[str, List[Dict[str, str]]]]:
+        num_disks = host_sample['hdd_count']
+        osd_num = 0
+        service_map = []
+
+        for hostname in self.cache.facts:
+
+            host_num = int(hostname.split('.')[0].split('-')[1])
+            svc_list = []
+            for r in role_list(host_num):
+                if r == 'osd':
+                    for _n in range(num_disks):
+                        svc_list.append({
+                            "type": "osd",
+                            "id": osd_num,
+                        })
+                        osd_num += 1
+                else:
+                    svc_list.append({
+                        "type": r,
+                        "id": hostname,
+                    })
+
+            service_map.append({"services": svc_list})
+        logger.debug(f"services map - {json.dumps(service_map)}")
+        return service_map
+
+    def use_repo_digest(self) -> None:
+        return None
+
+
+class TestConfigCheck:
+
+    def test_to_json(self, mgr):
+        checker = CephadmConfigChecks(mgr)
+        out = checker.to_json()
+        assert out
+        assert len(out) == len(checker.health_checks)
+
+    def test_lookup_check(self, mgr):
+        checker = CephadmConfigChecks(mgr)
+        check = checker.lookup_check('osd_mtu_size')
+        logger.debug(json.dumps(check.to_json()))
+        assert check
+        assert check.healthcheck_name == "CEPHADM_CHECK_MTU"
+
+    def test_old_checks_removed(self, mgr):
+        mgr.datastore.update({
+            "config_checks": '{"bogus_one": "enabled", "bogus_two": "enabled", '
+                             '"kernel_security": "enabled", "public_network": "enabled", '
+                             '"kernel_version": "enabled", "network_missing": "enabled", '
+                             '"osd_mtu_size": "enabled", "osd_linkspeed": "enabled", '
+                             '"os_subscription": "enabled", "ceph_release": "enabled"}'
+        })
+        checker = CephadmConfigChecks(mgr)
+        raw = mgr.get_store('config_checks')
+        checks = json.loads(raw)
+        assert "bogus_one" not in checks
+        assert "bogus_two" not in checks
+        assert len(checks) == len(checker.health_checks)
+
+    def test_new_checks(self, mgr):
+        mgr.datastore.update({
+            "config_checks": '{"kernel_security": "enabled", "public_network": "enabled", '
+                             '"osd_mtu_size": "enabled", "osd_linkspeed": "enabled"}'
+        })
+        checker = CephadmConfigChecks(mgr)
+        raw = mgr.get_store('config_checks')
+        checks = json.loads(raw)
+        assert len(checks) == len(checker.health_checks)
+
+    def test_no_issues(self, mgr):
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+        checker.run_checks()
+
+        assert not mgr.health_checks
+
+    def test_no_public_network(self, mgr):
+        bad_node = mgr.cache.facts['node-1.ceph.com']
+        bad_node['interfaces']['eth0']['ipv4_address'] = "192.168.1.20/24"
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+        checker.run_checks()
+        logger.debug(mgr.health_checks)
+        assert len(mgr.health_checks) == 1
+        assert 'CEPHADM_CHECK_PUBLIC_MEMBERSHIP' in mgr.health_checks
+        assert mgr.health_checks['CEPHADM_CHECK_PUBLIC_MEMBERSHIP']['detail'][0] == \
+            'node-1.ceph.com does not have an interface on any public network'
+
+    def test_missing_networks(self, mgr):
+
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.66.0/24']
+        checker.run_checks()
+
+        logger.info(json.dumps(mgr.health_checks))
+        logger.info(checker.subnet_lookup)
+        assert len(mgr.health_checks) == 1
+        assert 'CEPHADM_CHECK_NETWORK_MISSING' in mgr.health_checks
+        assert mgr.health_checks['CEPHADM_CHECK_NETWORK_MISSING']['detail'][0] == \
+            "10.9.66.0/24 not found on any host in the cluster"
+
+    def test_bad_mtu_single(self, mgr):
+
+        bad_node = mgr.cache.facts['node-1.ceph.com']
+        bad_node['interfaces']['eth0']['mtu'] = 1500
+
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+
+        checker.run_checks()
+        logger.info(json.dumps(mgr.health_checks))
+        logger.info(checker.subnet_lookup)
+        assert "CEPHADM_CHECK_MTU" in mgr.health_checks and len(mgr.health_checks) == 1
+        assert mgr.health_checks['CEPHADM_CHECK_MTU']['detail'][0] == \
+            'host node-1.ceph.com(eth0) is using MTU 1500 on 10.9.64.0/24, NICs on other hosts use 9000'
+
+    def test_bad_mtu_multiple(self, mgr):
+
+        for n in [1, 5]:
+            bad_node = mgr.cache.facts[f'node-{n}.ceph.com']
+            bad_node['interfaces']['eth0']['mtu'] = 1500
+
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+
+        checker.run_checks()
+        logger.info(json.dumps(mgr.health_checks))
+        logger.info(checker.subnet_lookup)
+        assert "CEPHADM_CHECK_MTU" in mgr.health_checks and len(mgr.health_checks) == 1
+        assert mgr.health_checks['CEPHADM_CHECK_MTU']['count'] == 2
+
+    def test_bad_linkspeed_single(self, mgr):
+
+        bad_node = mgr.cache.facts['node-1.ceph.com']
+        bad_node['interfaces']['eth0']['speed'] = 100
+
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+
+        checker.run_checks()
+        logger.info(json.dumps(mgr.health_checks))
+        logger.info(checker.subnet_lookup)
+        assert mgr.health_checks
+        assert "CEPHADM_CHECK_LINKSPEED" in mgr.health_checks and len(mgr.health_checks) == 1
+        assert mgr.health_checks['CEPHADM_CHECK_LINKSPEED']['detail'][0] == \
+            'host node-1.ceph.com(eth0) has linkspeed of 100 on 10.9.64.0/24, NICs on other hosts use 1000'
+
+    def test_super_linkspeed_single(self, mgr):
+
+        bad_node = mgr.cache.facts['node-1.ceph.com']
+        bad_node['interfaces']['eth0']['speed'] = 10000
+
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+
+        checker.run_checks()
+        logger.info(json.dumps(mgr.health_checks))
+        logger.info(checker.subnet_lookup)
+        assert not mgr.health_checks
+
+    def test_release_mismatch_single(self, mgr):
+
+        mgr.version_overrides = {
+            "osd.1": "pacific",
+        }
+
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+
+        checker.run_checks()
+        logger.info(json.dumps(mgr.health_checks))
+        assert mgr.health_checks
+        assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and len(mgr.health_checks) == 1
+        assert mgr.health_checks['CEPHADM_CHECK_CEPH_RELEASE']['detail'][0] == \
+            'osd.1 is running pacific (majority of cluster is using quincy)'
+
+    def test_release_mismatch_multi(self, mgr):
+
+        mgr.version_overrides = {
+            "osd.1": "pacific",
+            "osd.5": "octopus",
+        }
+
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+
+        checker.run_checks()
+        logger.info(json.dumps(mgr.health_checks))
+        assert mgr.health_checks
+        assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and len(mgr.health_checks) == 1
+        assert len(mgr.health_checks['CEPHADM_CHECK_CEPH_RELEASE']['detail']) == 2
+
+    def test_kernel_mismatch(self, mgr):
+
+        bad_host = mgr.cache.facts['node-1.ceph.com']
+        bad_host['kernel'] = "5.10.18.0-241.10.1.el8.x86_64"
+
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+
+        checker.run_checks()
+        logger.info(json.dumps(mgr.health_checks))
+        assert len(mgr.health_checks) == 1
+        assert 'CEPHADM_CHECK_KERNEL_VERSION' in mgr.health_checks
+        assert mgr.health_checks['CEPHADM_CHECK_KERNEL_VERSION']['detail'][0] == \
+            "host node-1.ceph.com running kernel 5.10, majority of hosts(9) running 4.18"
+        assert mgr.health_checks['CEPHADM_CHECK_KERNEL_VERSION']['count'] == 1
+
+    def test_inconsistent_subscription(self, mgr):
+
+        bad_host = mgr.cache.facts['node-5.ceph.com']
+        bad_host['subscribed'] = "no"
+
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+
+        checker.run_checks()
+        logger.info(json.dumps(mgr.health_checks))
+        assert len(mgr.health_checks) == 1
+        assert "CEPHADM_CHECK_SUBSCRIPTION" in mgr.health_checks
+        assert mgr.health_checks['CEPHADM_CHECK_SUBSCRIPTION']['detail'][0] == \
+            "node-5.ceph.com does not have an active subscription"
+
+    def test_kernel_security_inconsistent(self, mgr):
+
+        bad_node = mgr.cache.facts['node-3.ceph.com']
+        bad_node['kernel_security'] = {
+            "SELINUX": "permissive",
+            "SELINUXTYPE": "targeted",
+            "description": "SELinux: Enabled(permissive, targeted)",
+            "type": "SELinux"
+        }
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+
+        checker.run_checks()
+        logger.info(json.dumps(mgr.health_checks))
+        assert len(mgr.health_checks) == 1
+        assert 'CEPHADM_CHECK_KERNEL_LSM' in mgr.health_checks
+        assert mgr.health_checks['CEPHADM_CHECK_KERNEL_LSM']['detail'][0] == \
+            "node-3.ceph.com has inconsistent KSM settings compared to the majority of hosts(9) in the cluster"
+
+    def test_release_and_bad_mtu(self, mgr):
+
+        mgr.version_overrides = {
+            "osd.1": "pacific",
+        }
+        bad_node = mgr.cache.facts['node-1.ceph.com']
+        bad_node['interfaces']['eth0']['mtu'] = 1500
+
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+
+        checker.run_checks()
+        logger.info(json.dumps(mgr.health_checks))
+        logger.info(checker.subnet_lookup)
+        assert mgr.health_checks
+        assert len(mgr.health_checks) == 2
+        assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \
+            "CEPHADM_CHECK_MTU" in mgr.health_checks
+
+    def test_release_mtu_LSM(self, mgr):
+
+        mgr.version_overrides = {
+            "osd.1": "pacific",
+        }
+        bad_node1 = mgr.cache.facts['node-1.ceph.com']
+        bad_node1['interfaces']['eth0']['mtu'] = 1500
+        bad_node2 = mgr.cache.facts['node-3.ceph.com']
+        bad_node2['kernel_security'] = {
+            "SELINUX": "permissive",
+            "SELINUXTYPE": "targeted",
+            "description": "SELinux: Enabled(permissive, targeted)",
+            "type": "SELinux"
+        }
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+
+        checker.run_checks()
+        logger.info(json.dumps(mgr.health_checks))
+        logger.info(checker.subnet_lookup)
+        assert mgr.health_checks
+        assert len(mgr.health_checks) == 3
+        assert \
+            "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \
+            "CEPHADM_CHECK_MTU" in mgr.health_checks and \
+            "CEPHADM_CHECK_KERNEL_LSM" in mgr.health_checks
+
+    def test_release_mtu_LSM_subscription(self, mgr):
+
+        mgr.version_overrides = {
+            "osd.1": "pacific",
+        }
+        bad_node1 = mgr.cache.facts['node-1.ceph.com']
+        bad_node1['interfaces']['eth0']['mtu'] = 1500
+        bad_node1['subscribed'] = "no"
+        bad_node2 = mgr.cache.facts['node-3.ceph.com']
+        bad_node2['kernel_security'] = {
+            "SELINUX": "permissive",
+            "SELINUXTYPE": "targeted",
+            "description": "SELinux: Enabled(permissive, targeted)",
+            "type": "SELinux"
+        }
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+
+        checker.run_checks()
+        logger.info(json.dumps(mgr.health_checks))
+        logger.info(checker.subnet_lookup)
+        assert mgr.health_checks
+        assert len(mgr.health_checks) == 4
+        assert \
+            "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \
+            "CEPHADM_CHECK_MTU" in mgr.health_checks and \
+            "CEPHADM_CHECK_KERNEL_LSM" in mgr.health_checks and \
+            "CEPHADM_CHECK_SUBSCRIPTION" in mgr.health_checks
+
+    def test_skip_release_during_upgrade(self, mgr):
+        mgr.upgrade.upgrade_state = UpgradeState.from_json({
+            'target_name': 'wah',
+            'progress_id': str(uuid.uuid4()),
+            'target_id': 'wah',
+            'error': '',
+            'paused': False,
+        })
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+
+        checker.run_checks()
+        logger.info(f"{checker.skipped_checks_count} skipped check(s): {checker.skipped_checks}")
+        assert checker.skipped_checks_count == 1
+        assert 'ceph_release' in checker.skipped_checks
+
+    def test_skip_when_disabled(self, mgr):
+        mgr.module_option.update({
+            "config_checks_enabled": "false"
+        })
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+
+        checker.run_checks()
+        logger.info(checker.active_checks)
+        logger.info(checker.defined_checks)
+        assert checker.active_checks_count == 0
+
+    def test_skip_mtu_checks(self, mgr):
+        mgr.datastore.update({
+            'config_checks': '{"osd_mtu_size": "disabled"}'
+        })
+
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+
+        checker.run_checks()
+        logger.info(checker.active_checks)
+        logger.info(checker.defined_checks)
+        assert 'osd_mtu_size' not in checker.active_checks
+        assert checker.defined_checks == 8 and checker.active_checks_count == 7
+
+    def test_skip_mtu_lsm_checks(self, mgr):
+        mgr.datastore.update({
+            'config_checks': '{"osd_mtu_size": "disabled", "kernel_security": "disabled"}'
+        })
+
+        checker = CephadmConfigChecks(mgr)
+        checker.cluster_network_list = []
+        checker.public_network_list = ['10.9.64.0/24']
+
+        checker.run_checks()
+        logger.info(checker.active_checks)
+        logger.info(checker.defined_checks)
+        assert 'osd_mtu_size' not in checker.active_checks and \
+            'kernel_security' not in checker.active_checks
+        assert checker.defined_checks == 8 and checker.active_checks_count == 6
+        assert not mgr.health_checks
diff --git a/src/pybind/mgr/cephadm/tests/test_facts.py b/src/pybind/mgr/cephadm/tests/test_facts.py
new file mode 100644
index 000000000..7838ee5d4
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_facts.py
@@ -0,0 +1,31 @@
+from ..import CephadmOrchestrator
+
+from .fixtures import wait
+
+from tests import mock
+
+
+def test_facts(cephadm_module: CephadmOrchestrator):
+    facts = {'node-1.ceph.com': {'bios_version': 'F2', 'cpu_cores': 16}}
+    cephadm_module.cache.facts = facts
+    ret_facts = cephadm_module.get_facts('node-1.ceph.com')
+    assert wait(cephadm_module, ret_facts) == [{'bios_version': 'F2', 'cpu_cores': 16}]
+
+
+@mock.patch("cephadm.inventory.Inventory.update_known_hostnames")
+def test_known_hostnames(_update_known_hostnames, cephadm_module: CephadmOrchestrator):
+    host_facts = {'hostname': 'host1.domain',
+                  'shortname': 'host1',
+                  'fqdn': 'host1.domain',
+                  'memory_free_kb': 37383384,
+                  'memory_total_kb': 40980612,
+                  'nic_count': 2}
+    cephadm_module.cache.update_host_facts('host1', host_facts)
+    _update_known_hostnames.assert_called_with('host1.domain', 'host1', 'host1.domain')
+
+    host_facts = {'hostname': 'host1.domain',
+                  'memory_free_kb': 37383384,
+                  'memory_total_kb': 40980612,
+                  'nic_count': 2}
+    cephadm_module.cache.update_host_facts('host1', host_facts)
+    _update_known_hostnames.assert_called_with('host1.domain', '', '')
diff --git a/src/pybind/mgr/cephadm/tests/test_migration.py b/src/pybind/mgr/cephadm/tests/test_migration.py
new file mode 100644
index 000000000..1f1d32e8b
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_migration.py
@@ -0,0 +1,340 @@
+import json
+import pytest
+
+from ceph.deployment.service_spec import PlacementSpec, ServiceSpec, HostPlacementSpec
+from ceph.utils import datetime_to_str, datetime_now
+from cephadm import CephadmOrchestrator
+from cephadm.inventory import SPEC_STORE_PREFIX
+from cephadm.migrations import LAST_MIGRATION
+from cephadm.tests.fixtures import _run_cephadm, wait, with_host, receive_agent_metadata_all_hosts
+from cephadm.serve import CephadmServe
+from tests import mock
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_scheduler(cephadm_module: CephadmOrchestrator):
+    with with_host(cephadm_module, 'host1', refresh_hosts=False):
+        with with_host(cephadm_module, 'host2', refresh_hosts=False):
+
+            # emulate the old scheduler:
+            c = cephadm_module.apply_rgw(
+                ServiceSpec('rgw', 'r.z', placement=PlacementSpec(host_pattern='*', count=2))
+            )
+            assert wait(cephadm_module, c) == 'Scheduled rgw.r.z update...'
+
+            # with pytest.raises(OrchestratorError, match="cephadm migration still ongoing. Please wait, until the migration is complete."):
+            CephadmServe(cephadm_module)._apply_all_services()
+
+            cephadm_module.migration_current = 0
+            cephadm_module.migration.migrate()
+            # assert we need all daemons.
+            assert cephadm_module.migration_current == 0
+
+            CephadmServe(cephadm_module)._refresh_hosts_and_daemons()
+            receive_agent_metadata_all_hosts(cephadm_module)
+            cephadm_module.migration.migrate()
+
+            CephadmServe(cephadm_module)._apply_all_services()
+
+            out = {o.hostname for o in wait(cephadm_module, cephadm_module.list_daemons())}
+            assert out == {'host1', 'host2'}
+
+            c = cephadm_module.apply_rgw(
+                ServiceSpec('rgw', 'r.z', placement=PlacementSpec(host_pattern='host1', count=2))
+            )
+            assert wait(cephadm_module, c) == 'Scheduled rgw.r.z update...'
+
+            # Sorry, for this hack, but I need to make sure, Migration thinks,
+            # we have updated all daemons already.
+            cephadm_module.cache.last_daemon_update['host1'] = datetime_now()
+            cephadm_module.cache.last_daemon_update['host2'] = datetime_now()
+
+            cephadm_module.migration_current = 0
+            cephadm_module.migration.migrate()
+            assert cephadm_module.migration_current >= 2
+
+            out = [o.spec.placement for o in wait(
+                cephadm_module, cephadm_module.describe_service())]
+            assert out == [PlacementSpec(count=2, hosts=[HostPlacementSpec(
+                hostname='host1', network='', name=''), HostPlacementSpec(hostname='host2', network='', name='')])]
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_service_id_mon_one(cephadm_module: CephadmOrchestrator):
+    with with_host(cephadm_module, 'host1'):
+        cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon.wrong', json.dumps({
+            'spec': {
+                'service_type': 'mon',
+                'service_id': 'wrong',
+                'placement': {
+                    'hosts': ['host1']
+                }
+            },
+            'created': datetime_to_str(datetime_now()),
+        }, sort_keys=True),
+        )
+
+        cephadm_module.spec_store.load()
+
+        assert len(cephadm_module.spec_store.all_specs) == 1
+        assert cephadm_module.spec_store.all_specs['mon.wrong'].service_name() == 'mon'
+
+        cephadm_module.migration_current = 1
+        cephadm_module.migration.migrate()
+        assert cephadm_module.migration_current >= 2
+
+        assert len(cephadm_module.spec_store.all_specs) == 1
+        assert cephadm_module.spec_store.all_specs['mon'] == ServiceSpec(
+            service_type='mon',
+            unmanaged=True,
+            placement=PlacementSpec(hosts=['host1'])
+        )
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_service_id_mon_two(cephadm_module: CephadmOrchestrator):
+    with with_host(cephadm_module, 'host1'):
+        cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon', json.dumps({
+            'spec': {
+                'service_type': 'mon',
+                'placement': {
+                    'count': 5,
+                }
+            },
+            'created': datetime_to_str(datetime_now()),
+        }, sort_keys=True),
+        )
+        cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon.wrong', json.dumps({
+            'spec': {
+                'service_type': 'mon',
+                'service_id': 'wrong',
+                'placement': {
+                    'hosts': ['host1']
+                }
+            },
+            'created': datetime_to_str(datetime_now()),
+        }, sort_keys=True),
+        )
+
+        cephadm_module.spec_store.load()
+
+        assert len(cephadm_module.spec_store.all_specs) == 2
+        assert cephadm_module.spec_store.all_specs['mon.wrong'].service_name() == 'mon'
+        assert cephadm_module.spec_store.all_specs['mon'].service_name() == 'mon'
+
+        cephadm_module.migration_current = 1
+        cephadm_module.migration.migrate()
+        assert cephadm_module.migration_current >= 2
+
+        assert len(cephadm_module.spec_store.all_specs) == 1
+        assert cephadm_module.spec_store.all_specs['mon'] == ServiceSpec(
+            service_type='mon',
+            unmanaged=True,
+            placement=PlacementSpec(count=5)
+        )
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_service_id_mds_one(cephadm_module: CephadmOrchestrator):
+    with with_host(cephadm_module, 'host1'):
+        cephadm_module.set_store(SPEC_STORE_PREFIX + 'mds', json.dumps({
+            'spec': {
+                'service_type': 'mds',
+                'placement': {
+                    'hosts': ['host1']
+                }
+            },
+            'created': datetime_to_str(datetime_now()),
+        }, sort_keys=True),
+        )
+
+        cephadm_module.spec_store.load()
+
+        # there is nothing to migrate, as the spec is gone now.
+        assert len(cephadm_module.spec_store.all_specs) == 0
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_nfs_initial(cephadm_module: CephadmOrchestrator):
+    with with_host(cephadm_module, 'host1'):
+        cephadm_module.set_store(
+            SPEC_STORE_PREFIX + 'mds',
+            json.dumps({
+                'spec': {
+                    'service_type': 'nfs',
+                    'service_id': 'foo',
+                    'placement': {
+                        'hosts': ['host1']
+                    },
+                    'spec': {
+                        'pool': 'mypool',
+                        'namespace': 'foons',
+                    },
+                },
+                'created': datetime_to_str(datetime_now()),
+            }, sort_keys=True),
+        )
+        cephadm_module.migration_current = 1
+        cephadm_module.spec_store.load()
+
+        ls = json.loads(cephadm_module.get_store('nfs_migration_queue'))
+        assert ls == [['foo', 'mypool', 'foons']]
+
+        cephadm_module.migration.migrate(True)
+        assert cephadm_module.migration_current == 2
+
+        cephadm_module.migration.migrate()
+        assert cephadm_module.migration_current == LAST_MIGRATION
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_nfs_initial_octopus(cephadm_module: CephadmOrchestrator):
+    with with_host(cephadm_module, 'host1'):
+        cephadm_module.set_store(
+            SPEC_STORE_PREFIX + 'mds',
+            json.dumps({
+                'spec': {
+                    'service_type': 'nfs',
+                    'service_id': 'ganesha-foo',
+                    'placement': {
+                        'hosts': ['host1']
+                    },
+                    'spec': {
+                        'pool': 'mypool',
+                        'namespace': 'foons',
+                    },
+                },
+                'created': datetime_to_str(datetime_now()),
+            }, sort_keys=True),
+        )
+        cephadm_module.migration_current = 1
+        cephadm_module.spec_store.load()
+
+        ls = json.loads(cephadm_module.get_store('nfs_migration_queue'))
+        assert ls == [['ganesha-foo', 'mypool', 'foons']]
+
+        cephadm_module.migration.migrate(True)
+        assert cephadm_module.migration_current == 2
+
+        cephadm_module.migration.migrate()
+        assert cephadm_module.migration_current == LAST_MIGRATION
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_admin_client_keyring(cephadm_module: CephadmOrchestrator):
+    assert 'client.admin' not in cephadm_module.keys.keys
+
+    cephadm_module.migration_current = 3
+    cephadm_module.migration.migrate()
+    assert cephadm_module.migration_current == LAST_MIGRATION
+
+    assert cephadm_module.keys.keys['client.admin'].placement.label == '_admin'
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_set_sane_value(cephadm_module: CephadmOrchestrator):
+    cephadm_module.migration_current = 0
+    cephadm_module.migration.set_sane_migration_current()
+    assert cephadm_module.migration_current == 0
+
+    cephadm_module.migration_current = LAST_MIGRATION
+    cephadm_module.migration.set_sane_migration_current()
+    assert cephadm_module.migration_current == LAST_MIGRATION
+
+    cephadm_module.migration_current = None
+    cephadm_module.migration.set_sane_migration_current()
+    assert cephadm_module.migration_current == LAST_MIGRATION
+
+    cephadm_module.migration_current = LAST_MIGRATION + 1
+    cephadm_module.migration.set_sane_migration_current()
+    assert cephadm_module.migration_current == 0
+
+    cephadm_module.migration_current = None
+    ongoing = cephadm_module.migration.is_migration_ongoing()
+    assert not ongoing
+    assert cephadm_module.migration_current == LAST_MIGRATION
+
+    cephadm_module.migration_current = LAST_MIGRATION + 1
+    ongoing = cephadm_module.migration.is_migration_ongoing()
+    assert ongoing
+    assert cephadm_module.migration_current == 0
+
+
+@pytest.mark.parametrize(
+    "rgw_spec_store_entry, should_migrate",
+    [
+        ({
+            'spec': {
+                'service_type': 'rgw',
+                'service_name': 'rgw.foo',
+                'service_id': 'foo',
+                'placement': {
+                    'hosts': ['host1']
+                },
+                'spec': {
+                    'rgw_frontend_type': 'beast  tcp_nodelay=1    request_timeout_ms=65000   rgw_thread_pool_size=512',
+                    'rgw_frontend_port': '5000',
+                },
+            },
+            'created': datetime_to_str(datetime_now()),
+        }, True),
+        ({
+            'spec': {
+                'service_type': 'rgw',
+                'service_name': 'rgw.foo',
+                'service_id': 'foo',
+                'placement': {
+                    'hosts': ['host1']
+                },
+            },
+            'created': datetime_to_str(datetime_now()),
+        }, False),
+    ]
+)
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_rgw_spec(cephadm_module: CephadmOrchestrator, rgw_spec_store_entry, should_migrate):
+    with with_host(cephadm_module, 'host1'):
+        cephadm_module.set_store(
+            SPEC_STORE_PREFIX + 'rgw',
+            json.dumps(rgw_spec_store_entry, sort_keys=True),
+        )
+
+        # make sure rgw_migration_queue is populated accordingly
+        cephadm_module.migration_current = 1
+        cephadm_module.spec_store.load()
+        ls = json.loads(cephadm_module.get_store('rgw_migration_queue'))
+        assert 'rgw' == ls[0]['spec']['service_type']
+
+        # shortcut rgw_migration_queue loading by directly assigning
+        # ls output to rgw_migration_queue list
+        cephadm_module.migration.rgw_migration_queue = ls
+
+        # skip other migrations and go directly to 5_6 migration (RGW spec)
+        cephadm_module.migration_current = 5
+        cephadm_module.migration.migrate()
+        assert cephadm_module.migration_current == LAST_MIGRATION
+
+        if should_migrate:
+            # make sure the spec has been migrated and the the param=value entries
+            # that were part of the rgw_frontend_type are now in the new
+            # 'rgw_frontend_extra_args' list
+            assert 'rgw.foo' in cephadm_module.spec_store.all_specs
+            rgw_spec = cephadm_module.spec_store.all_specs['rgw.foo']
+            assert dict(rgw_spec.to_json()) == {'service_type': 'rgw',
+                                                'service_id': 'foo',
+                                                'service_name': 'rgw.foo',
+                                                'placement': {'hosts': ['host1']},
+                                                'spec': {
+                                                    'rgw_frontend_extra_args': ['tcp_nodelay=1',
+                                                                                'request_timeout_ms=65000',
+                                                                                'rgw_thread_pool_size=512'],
+                                                    'rgw_frontend_port': '5000',
+                                                    'rgw_frontend_type': 'beast',
+                                                }}
+        else:
+            # in a real environment, we still expect the spec to be there,
+            # just untouched by the migration. For this test specifically
+            # though, the spec will only have ended up in the spec store
+            # if it was migrated, so we can use this to test the spec
+            # was untouched
+            assert 'rgw.foo' not in cephadm_module.spec_store.all_specs
diff --git a/src/pybind/mgr/cephadm/tests/test_osd_removal.py b/src/pybind/mgr/cephadm/tests/test_osd_removal.py
new file mode 100644
index 000000000..6685fcb2a
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_osd_removal.py
@@ -0,0 +1,298 @@
+import json
+
+from cephadm.services.osd import OSDRemovalQueue, OSD
+import pytest
+from tests import mock
+from .fixtures import with_cephadm_module
+from datetime import datetime
+
+
+class MockOSD:
+
+    def __init__(self, osd_id):
+        self.osd_id = osd_id
+
+
+class TestOSDRemoval:
+
+    @pytest.mark.parametrize(
+        "osd_id, osd_df, expected",
+        [
+            # missing 'nodes' key
+            (1, dict(nodes=[]), -1),
+            # missing 'pgs' key
+            (1, dict(nodes=[dict(id=1)]), -1),
+            # id != osd_id
+            (1, dict(nodes=[dict(id=999, pgs=1)]), -1),
+            # valid
+            (1, dict(nodes=[dict(id=1, pgs=1)]), 1),
+        ]
+    )
+    def test_get_pg_count(self, rm_util, osd_id, osd_df, expected):
+        with mock.patch("cephadm.services.osd.RemoveUtil.osd_df", return_value=osd_df):
+            assert rm_util.get_pg_count(osd_id) == expected
+
+    @pytest.mark.parametrize(
+        "osds, ok_to_stop, expected",
+        [
+            # no osd_ids provided
+            ([], [False], []),
+            # all osds are ok_to_stop
+            ([1, 2], [True], [1, 2]),
+            # osds are ok_to_stop after the second iteration
+            ([1, 2], [False, True], [2]),
+            # osds are never ok_to_stop, (taking the sample size `(len(osd_ids))` into account),
+            # expected to get False
+            ([1, 2], [False, False], []),
+        ]
+    )
+    def test_find_stop_threshold(self, rm_util, osds, ok_to_stop, expected):
+        with mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop", side_effect=ok_to_stop):
+            assert rm_util.find_osd_stop_threshold(osds) == expected
+
+    def test_process_removal_queue(self, rm_util):
+        # TODO: !
+        # rm_util.process_removal_queue()
+        pass
+
+    @pytest.mark.parametrize(
+        "max_osd_draining_count, draining_osds, idling_osds, ok_to_stop, expected",
+        [
+            # drain one at a time, one already draining
+            (1, [1], [1], [True], 0),
+            # drain one at a time, none draining yet
+            (1, [], [1, 2, 3], [True, True, True], 1),
+            # drain one at a time, one already draining, none ok-to-stop
+            (1, [1], [1], [False], 0),
+            # drain one at a time, none draining, one ok-to-stop
+            (1, [], [1, 2, 3], [False, False, True], 1),
+            # drain three at a time, one already draining, all ok-to-stop
+            (3, [1], [1, 2, 3], [True, True, True], 2),
+            # drain two at a time, none already draining, none ok-to-stop
+            (2, [], [1, 2, 3], [False, False, False], 0),
+            # drain two at a time, none already draining, none idling
+            (2, [], [], [], 0),
+        ]
+    )
+    def test_ready_to_drain_osds(self, max_osd_draining_count, draining_osds, idling_osds, ok_to_stop, expected):
+        with with_cephadm_module({'max_osd_draining_count': max_osd_draining_count}) as m:
+            with mock.patch("cephadm.services.osd.OSDRemovalQueue.draining_osds", return_value=draining_osds):
+                with mock.patch("cephadm.services.osd.OSDRemovalQueue.idling_osds", return_value=idling_osds):
+                    with mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop", side_effect=ok_to_stop):
+                        removal_queue = OSDRemovalQueue(m)
+                        assert len(removal_queue._ready_to_drain_osds()) == expected
+
+    def test_ok_to_stop(self, rm_util):
+        rm_util.ok_to_stop([MockOSD(1)])
+        rm_util._run_mon_cmd.assert_called_with({'prefix': 'osd ok-to-stop', 'ids': ['1']},
+                                                error_ok=True)
+
+    def test_safe_to_destroy(self, rm_util):
+        rm_util.safe_to_destroy([1])
+        rm_util._run_mon_cmd.assert_called_with({'prefix': 'osd safe-to-destroy',
+                                                 'ids': ['1']}, error_ok=True)
+
+    def test_destroy_osd(self, rm_util):
+        rm_util.destroy_osd(1)
+        rm_util._run_mon_cmd.assert_called_with(
+            {'prefix': 'osd destroy-actual', 'id': 1, 'yes_i_really_mean_it': True})
+
+    def test_purge_osd(self, rm_util):
+        rm_util.purge_osd(1)
+        rm_util._run_mon_cmd.assert_called_with(
+            {'prefix': 'osd purge-actual', 'id': 1, 'yes_i_really_mean_it': True})
+
+    def test_load(self, cephadm_module, rm_util):
+        data = json.dumps([
+            {
+                "osd_id": 35,
+                "started": True,
+                "draining": True,
+                "stopped": False,
+                "replace": False,
+                "force": False,
+                "zap": False,
+                "nodename": "node2",
+                "drain_started_at": "2020-09-14T11:41:53.960463",
+                "drain_stopped_at": None,
+                "drain_done_at": None,
+                "process_started_at": "2020-09-14T11:41:52.245832"
+            }
+        ])
+        cephadm_module.set_store('osd_remove_queue', data)
+        cephadm_module.to_remove_osds.load_from_store()
+
+        expected = OSDRemovalQueue(cephadm_module)
+        expected.osds.add(OSD(osd_id=35, remove_util=rm_util, draining=True))
+        assert cephadm_module.to_remove_osds == expected
+
+
+class TestOSD:
+
+    def test_start(self, osd_obj):
+        assert osd_obj.started is False
+        osd_obj.start()
+        assert osd_obj.started is True
+        assert osd_obj.stopped is False
+
+    def test_start_draining_purge(self, osd_obj):
+        assert osd_obj.draining is False
+        assert osd_obj.drain_started_at is None
+        ret = osd_obj.start_draining()
+        osd_obj.rm_util.reweight_osd.assert_called_with(osd_obj, 0.0)
+        assert isinstance(osd_obj.drain_started_at, datetime)
+        assert osd_obj.draining is True
+        assert osd_obj.replace is False
+        assert ret is True
+
+    def test_start_draining_replace(self, osd_obj):
+        assert osd_obj.draining is False
+        assert osd_obj.drain_started_at is None
+        osd_obj.replace = True
+        ret = osd_obj.start_draining()
+        osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'out')
+        assert isinstance(osd_obj.drain_started_at, datetime)
+        assert osd_obj.draining is True
+        assert osd_obj.replace is True
+        assert ret is True
+
+    def test_start_draining_stopped(self, osd_obj):
+        osd_obj.stopped = True
+        ret = osd_obj.start_draining()
+        assert osd_obj.drain_started_at is None
+        assert ret is False
+        assert osd_obj.draining is False
+
+    def test_stop_draining_replace(self, osd_obj):
+        osd_obj.replace = True
+        ret = osd_obj.stop_draining()
+        osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'in')
+        assert isinstance(osd_obj.drain_stopped_at, datetime)
+        assert osd_obj.draining is False
+        assert ret is True
+
+    def test_stop_draining_purge(self, osd_obj):
+        osd_obj.original_weight = 1.0
+        ret = osd_obj.stop_draining()
+        osd_obj.rm_util.reweight_osd.assert_called_with(osd_obj, 1.0)
+        assert isinstance(osd_obj.drain_stopped_at, datetime)
+        assert osd_obj.draining is False
+        assert ret is True
+
+    @mock.patch('cephadm.services.osd.OSD.stop_draining')
+    def test_stop(self, stop_draining_mock, osd_obj):
+        osd_obj.stop()
+        assert osd_obj.started is False
+        assert osd_obj.stopped is True
+        stop_draining_mock.assert_called_once()
+
+    @pytest.mark.parametrize(
+        "draining, empty, expected",
+        [
+            # must be !draining! and !not empty! to yield True
+            (True, not True, True),
+            # not draining and not empty
+            (False, not True, False),
+            # not draining and empty
+            (False, True, False),
+            # draining and empty
+            (True, True, False),
+        ]
+    )
+    def test_is_draining(self, osd_obj, draining, empty, expected):
+        with mock.patch("cephadm.services.osd.OSD.is_empty", new_callable=mock.PropertyMock(return_value=empty)):
+            osd_obj.draining = draining
+            assert osd_obj.is_draining is expected
+
+    @mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop")
+    def test_is_ok_to_stop(self, _, osd_obj):
+        osd_obj.is_ok_to_stop
+        osd_obj.rm_util.ok_to_stop.assert_called_once()
+
+    @pytest.mark.parametrize(
+        "pg_count, expected",
+        [
+            (0, True),
+            (1, False),
+            (9999, False),
+            (-1, False),
+        ]
+    )
+    def test_is_empty(self, osd_obj, pg_count, expected):
+        with mock.patch("cephadm.services.osd.OSD.get_pg_count", return_value=pg_count):
+            assert osd_obj.is_empty is expected
+
+    @mock.patch("cephadm.services.osd.RemoveUtil.safe_to_destroy")
+    def test_safe_to_destroy(self, _, osd_obj):
+        osd_obj.safe_to_destroy()
+        osd_obj.rm_util.safe_to_destroy.assert_called_once()
+
+    @mock.patch("cephadm.services.osd.RemoveUtil.set_osd_flag")
+    def test_down(self, _, osd_obj):
+        osd_obj.down()
+        osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'down')
+
+    @mock.patch("cephadm.services.osd.RemoveUtil.destroy_osd")
+    def test_destroy_osd(self, _, osd_obj):
+        osd_obj.destroy()
+        osd_obj.rm_util.destroy_osd.assert_called_once()
+
+    @mock.patch("cephadm.services.osd.RemoveUtil.purge_osd")
+    def test_purge(self, _, osd_obj):
+        osd_obj.purge()
+        osd_obj.rm_util.purge_osd.assert_called_once()
+
+    @mock.patch("cephadm.services.osd.RemoveUtil.get_pg_count")
+    def test_pg_count(self, _, osd_obj):
+        osd_obj.get_pg_count()
+        osd_obj.rm_util.get_pg_count.assert_called_once()
+
+    def test_drain_status_human_not_started(self, osd_obj):
+        assert osd_obj.drain_status_human() == 'not started'
+
+    def test_drain_status_human_started(self, osd_obj):
+        osd_obj.started = True
+        assert osd_obj.drain_status_human() == 'started'
+
+    def test_drain_status_human_draining(self, osd_obj):
+        osd_obj.started = True
+        osd_obj.draining = True
+        assert osd_obj.drain_status_human() == 'draining'
+
+    def test_drain_status_human_done(self, osd_obj):
+        osd_obj.started = True
+        osd_obj.draining = False
+        osd_obj.drain_done_at = datetime.utcnow()
+        assert osd_obj.drain_status_human() == 'done, waiting for purge'
+
+
+class TestOSDRemovalQueue:
+
+    def test_queue_size(self, osd_obj):
+        q = OSDRemovalQueue(mock.Mock())
+        assert q.queue_size() == 0
+        q.osds.add(osd_obj)
+        assert q.queue_size() == 1
+
+    @mock.patch("cephadm.services.osd.OSD.start")
+    @mock.patch("cephadm.services.osd.OSD.exists")
+    def test_enqueue(self, exist, start, osd_obj):
+        q = OSDRemovalQueue(mock.Mock())
+        q.enqueue(osd_obj)
+        osd_obj.start.assert_called_once()
+
+    @mock.patch("cephadm.services.osd.OSD.stop")
+    @mock.patch("cephadm.services.osd.OSD.exists")
+    def test_rm_raise(self, exist, stop, osd_obj):
+        q = OSDRemovalQueue(mock.Mock())
+        with pytest.raises(KeyError):
+            q.rm(osd_obj)
+            osd_obj.stop.assert_called_once()
+
+    @mock.patch("cephadm.services.osd.OSD.stop")
+    @mock.patch("cephadm.services.osd.OSD.exists")
+    def test_rm(self, exist, stop, osd_obj):
+        q = OSDRemovalQueue(mock.Mock())
+        q.osds.add(osd_obj)
+        q.rm(osd_obj)
+        osd_obj.stop.assert_called_once()
diff --git a/src/pybind/mgr/cephadm/tests/test_scheduling.py b/src/pybind/mgr/cephadm/tests/test_scheduling.py
new file mode 100644
index 000000000..067cd5028
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_scheduling.py
@@ -0,0 +1,1699 @@
+# Disable autopep8 for this file:
+
+# fmt: off
+
+from typing import NamedTuple, List, Dict, Optional
+import pytest
+
+from ceph.deployment.hostspec import HostSpec
+from ceph.deployment.service_spec import ServiceSpec, PlacementSpec, IngressSpec
+from ceph.deployment.hostspec import SpecValidationError
+
+from cephadm.module import HostAssignment
+from cephadm.schedule import DaemonPlacement
+from orchestrator import DaemonDescription, OrchestratorValidationError, OrchestratorError
+
+
+def wrapper(func):
+    # some odd thingy to revert the order or arguments
+    def inner(*args):
+        def inner2(expected):
+            func(expected, *args)
+        return inner2
+    return inner
+
+
+@wrapper
+def none(expected):
+    assert expected == []
+
+
+@wrapper
+def one_of(expected, *hosts):
+    if not isinstance(expected, list):
+        assert False, str(expected)
+    assert len(expected) == 1, f'one_of failed len({expected}) != 1'
+    assert expected[0] in hosts
+
+
+@wrapper
+def two_of(expected, *hosts):
+    if not isinstance(expected, list):
+        assert False, str(expected)
+    assert len(expected) == 2, f'one_of failed len({expected}) != 2'
+    matches = 0
+    for h in hosts:
+        matches += int(h in expected)
+    if matches != 2:
+        assert False, f'two of {hosts} not in {expected}'
+
+
+@wrapper
+def exactly(expected, *hosts):
+    assert expected == list(hosts)
+
+
+@wrapper
+def error(expected, kind, match):
+    assert isinstance(expected, kind), (str(expected), match)
+    assert str(expected) == match, (str(expected), match)
+
+
+@wrapper
+def _or(expected, *inners):
+    def catch(inner):
+        try:
+            inner(expected)
+        except AssertionError as e:
+            return e
+    result = [catch(i) for i in inners]
+    if None not in result:
+        assert False, f"_or failed: {expected}"
+
+
+def _always_true(_):
+    pass
+
+
+def k(s):
+    return [e for e in s.split(' ') if e]
+
+
+def get_result(key, results):
+    def match(one):
+        for o, k in zip(one, key):
+            if o != k and o != '*':
+                return False
+        return True
+    return [v for k, v in results if match(k)][0]
+
+
+def mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count):
+
+    if spec_section == 'hosts':
+        mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec(  # noqa: E731
+            hosts=explicit,
+            count=count,
+        ))
+    elif spec_section == 'label':
+        mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec(  # noqa: E731
+            label='mylabel',
+            count=count,
+        ))
+    elif spec_section == 'host_pattern':
+        pattern = {
+            'e': 'notfound',
+            '1': '1',
+            '12': '[1-2]',
+            '123': '*',
+        }[explicit_key]
+        mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec(  # noqa: E731
+            host_pattern=pattern,
+            count=count,
+        ))
+    else:
+        assert False
+
+    hosts = [
+        HostSpec(h, labels=['mylabel']) if h in explicit else HostSpec(h)
+        for h in hosts
+    ]
+
+    return mk_spec, hosts
+
+
+def run_scheduler_test(results, mk_spec, hosts, daemons, key_elems):
+    key = ' '.join('N' if e is None else str(e) for e in key_elems)
+    try:
+        assert_res = get_result(k(key), results)
+    except IndexError:
+        try:
+            spec = mk_spec()
+            host_res, to_add, to_remove = HostAssignment(
+                spec=spec,
+                hosts=hosts,
+                unreachable_hosts=[],
+                draining_hosts=[],
+                daemons=daemons,
+            ).place()
+            if isinstance(host_res, list):
+                e = ', '.join(repr(h.hostname) for h in host_res)
+                assert False, f'`(k("{key}"), exactly({e})),` not found'
+            assert False, f'`(k("{key}"), ...),` not found'
+        except OrchestratorError as e:
+            assert False, f'`(k("{key}"), error({type(e).__name__}, {repr(str(e))})),` not found'
+
+    for _ in range(10):  # scheduler has a random component
+        try:
+            spec = mk_spec()
+            host_res, to_add, to_remove = HostAssignment(
+                spec=spec,
+                hosts=hosts,
+                unreachable_hosts=[],
+                draining_hosts=[],
+                daemons=daemons
+            ).place()
+
+            assert_res(sorted([h.hostname for h in host_res]))
+        except Exception as e:
+            assert_res(e)
+
+
+@pytest.mark.parametrize("dp,n,result",
+    [   # noqa: E128
+        (
+            DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]),
+            0,
+            DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]),
+        ),
+        (
+            DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]),
+            2,
+            DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[82]),
+        ),
+        (
+            DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80, 90]),
+            2,
+            DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[82, 92]),
+        ),
+    ])
+def test_daemon_placement_renumber(dp, n, result):
+    assert dp.renumber_ports(n) == result
+
+
+@pytest.mark.parametrize(
+    'dp,dd,result',
+    [
+        (
+            DaemonPlacement(daemon_type='mgr', hostname='host1'),
+            DaemonDescription('mgr', 'a', 'host1'),
+            True
+        ),
+        (
+            DaemonPlacement(daemon_type='mgr', hostname='host1', name='a'),
+            DaemonDescription('mgr', 'a', 'host1'),
+            True
+        ),
+        (
+            DaemonPlacement(daemon_type='mon', hostname='host1', name='a'),
+            DaemonDescription('mgr', 'a', 'host1'),
+            False
+        ),
+        (
+            DaemonPlacement(daemon_type='mgr', hostname='host1', name='a'),
+            DaemonDescription('mgr', 'b', 'host1'),
+            False
+        ),
+    ])
+def test_daemon_placement_match(dp, dd, result):
+    assert dp.matches_daemon(dd) == result
+
+
+# * first match from the top wins
+# * where e=[], *=any
+#
+#       + list of known hosts available for scheduling (host_key)
+#       |   + hosts used for explict placement (explicit_key)
+#       |   |   + count
+#       |   |   | + section (host, label, pattern)
+#       |   |   | |     + expected result
+#       |   |   | |     |
+test_explicit_scheduler_results = [
+    (k("*   *   0 *"), error(SpecValidationError, 'num/count must be >= 1')),
+    (k("*   e   N l"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr>: No matching hosts for label mylabel')),
+    (k("*   e   N p"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr>: No matching hosts')),
+    (k("*   e   N h"), error(OrchestratorValidationError, 'placement spec is empty: no hosts, no label, no pattern, no count')),
+    (k("*   e   * *"), none),
+    (k("1   12  * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 2: Unknown hosts")),
+    (k("1   123 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 2, 3: Unknown hosts")),
+    (k("1   *   * *"), exactly('1')),
+    (k("12  1   * *"), exactly('1')),
+    (k("12  12  1 *"), one_of('1', '2')),
+    (k("12  12  * *"), exactly('1', '2')),
+    (k("12  123 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 3: Unknown hosts")),
+    (k("12  123 1 *"), one_of('1', '2', '3')),
+    (k("12  123 * *"), two_of('1', '2', '3')),
+    (k("123 1   * *"), exactly('1')),
+    (k("123 12  1 *"), one_of('1', '2')),
+    (k("123 12  * *"), exactly('1', '2')),
+    (k("123 123 1 *"), one_of('1', '2', '3')),
+    (k("123 123 2 *"), two_of('1', '2', '3')),
+    (k("123 123 * *"), exactly('1', '2', '3')),
+]
+
+
+@pytest.mark.parametrize("spec_section_key,spec_section",
+    [   # noqa: E128
+        ('h', 'hosts'),
+        ('l', 'label'),
+        ('p', 'host_pattern'),
+    ])
+@pytest.mark.parametrize("count",
+    [   # noqa: E128
+        None,
+        0,
+        1,
+        2,
+        3,
+    ])
+@pytest.mark.parametrize("explicit_key, explicit",
+    [   # noqa: E128
+        ('e', []),
+        ('1', ['1']),
+        ('12', ['1', '2']),
+        ('123', ['1', '2', '3']),
+    ])
+@pytest.mark.parametrize("host_key, hosts",
+    [   # noqa: E128
+        ('1', ['1']),
+        ('12', ['1', '2']),
+        ('123', ['1', '2', '3']),
+    ])
+def test_explicit_scheduler(host_key, hosts,
+                            explicit_key, explicit,
+                            count,
+                            spec_section_key, spec_section):
+
+    mk_spec, hosts = mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count)
+    run_scheduler_test(
+        results=test_explicit_scheduler_results,
+        mk_spec=mk_spec,
+        hosts=hosts,
+        daemons=[],
+        key_elems=(host_key, explicit_key, count, spec_section_key)
+    )
+
+
+# * first match from the top wins
+# * where e=[], *=any
+#
+#       + list of known hosts available for scheduling (host_key)
+#       |   + hosts used for explicit placement (explicit_key)
+#       |   |   + count
+#       |   |   | + existing daemons
+#       |   |   | |     + section (host, label, pattern)
+#       |   |   | |     |   + expected result
+#       |   |   | |     |   |
+test_scheduler_daemons_results = [
+    (k("*   1   * *   *"), exactly('1')),
+    (k("1   123 * *   h"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr> on 2, 3: Unknown hosts')),
+    (k("1   123 * *   *"), exactly('1')),
+    (k("12  123 * *   h"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr> on 3: Unknown hosts')),
+    (k("12  123 N *   *"), exactly('1', '2')),
+    (k("12  123 1 *   *"), one_of('1', '2')),
+    (k("12  123 2 *   *"), exactly('1', '2')),
+    (k("12  123 3 *   *"), exactly('1', '2')),
+    (k("123 123 N *   *"), exactly('1', '2', '3')),
+    (k("123 123 1 e   *"), one_of('1', '2', '3')),
+    (k("123 123 1 1   *"), exactly('1')),
+    (k("123 123 1 3   *"), exactly('3')),
+    (k("123 123 1 12  *"), one_of('1', '2')),
+    (k("123 123 1 112 *"), one_of('1', '2')),
+    (k("123 123 1 23  *"), one_of('2', '3')),
+    (k("123 123 1 123 *"), one_of('1', '2', '3')),
+    (k("123 123 2 e   *"), two_of('1', '2', '3')),
+    (k("123 123 2 1   *"), _or(exactly('1', '2'), exactly('1', '3'))),
+    (k("123 123 2 3   *"), _or(exactly('1', '3'), exactly('2', '3'))),
+    (k("123 123 2 12  *"), exactly('1', '2')),
+    (k("123 123 2 112 *"), exactly('1', '2')),
+    (k("123 123 2 23  *"), exactly('2', '3')),
+    (k("123 123 2 123 *"), two_of('1', '2', '3')),
+    (k("123 123 3 *   *"), exactly('1', '2', '3')),
+]
+
+
+@pytest.mark.parametrize("spec_section_key,spec_section",
+    [   # noqa: E128
+        ('h', 'hosts'),
+        ('l', 'label'),
+        ('p', 'host_pattern'),
+    ])
+@pytest.mark.parametrize("daemons_key, daemons",
+    [   # noqa: E128
+        ('e', []),
+        ('1', ['1']),
+        ('3', ['3']),
+        ('12', ['1', '2']),
+        ('112', ['1', '1', '2']),  # deal with existing co-located daemons
+        ('23', ['2', '3']),
+        ('123', ['1', '2', '3']),
+    ])
+@pytest.mark.parametrize("count",
+    [   # noqa: E128
+        None,
+        1,
+        2,
+        3,
+    ])
+@pytest.mark.parametrize("explicit_key, explicit",
+    [   # noqa: E128
+        ('1', ['1']),
+        ('123', ['1', '2', '3']),
+    ])
+@pytest.mark.parametrize("host_key, hosts",
+    [   # noqa: E128
+        ('1', ['1']),
+        ('12', ['1', '2']),
+        ('123', ['1', '2', '3']),
+    ])
+def test_scheduler_daemons(host_key, hosts,
+                           explicit_key, explicit,
+                           count,
+                           daemons_key, daemons,
+                           spec_section_key, spec_section):
+    mk_spec, hosts = mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count)
+    dds = [
+        DaemonDescription('mgr', d, d)
+        for d in daemons
+    ]
+    run_scheduler_test(
+        results=test_scheduler_daemons_results,
+        mk_spec=mk_spec,
+        hosts=hosts,
+        daemons=dds,
+        key_elems=(host_key, explicit_key, count, daemons_key, spec_section_key)
+    )
+
+
+# =========================
+
+
+class NodeAssignmentTest(NamedTuple):
+    service_type: str
+    placement: PlacementSpec
+    hosts: List[str]
+    daemons: List[DaemonDescription]
+    rank_map: Optional[Dict[int, Dict[int, Optional[str]]]]
+    post_rank_map: Optional[Dict[int, Dict[int, Optional[str]]]]
+    expected: List[str]
+    expected_add: List[str]
+    expected_remove: List[DaemonDescription]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,daemons,rank_map,post_rank_map,expected,expected_add,expected_remove",
+    [   # noqa: E128
+        # just hosts
+        NodeAssignmentTest(
+            'mgr',
+            PlacementSpec(hosts=['smithi060']),
+            ['smithi060'],
+            [],
+            None, None,
+            ['mgr:smithi060'], ['mgr:smithi060'], []
+        ),
+        # all_hosts
+        NodeAssignmentTest(
+            'mgr',
+            PlacementSpec(host_pattern='*'),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('mgr', 'a', 'host1'),
+                DaemonDescription('mgr', 'b', 'host2'),
+            ],
+            None, None,
+            ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+            ['mgr:host3'],
+            []
+        ),
+        # all_hosts + count_per_host
+        NodeAssignmentTest(
+            'mds',
+            PlacementSpec(host_pattern='*', count_per_host=2),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('mds', 'a', 'host1'),
+                DaemonDescription('mds', 'b', 'host2'),
+            ],
+            None, None,
+            ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+            ['mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+            []
+        ),
+        # count that is bigger than the amount of hosts. Truncate to len(hosts)
+        # mgr should not be co-located to each other.
+        NodeAssignmentTest(
+            'mgr',
+            PlacementSpec(count=4),
+            'host1 host2 host3'.split(),
+            [],
+            None, None,
+            ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+            ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+            []
+        ),
+        # count that is bigger than the amount of hosts; wrap around.
+        NodeAssignmentTest(
+            'mds',
+            PlacementSpec(count=6),
+            'host1 host2 host3'.split(),
+            [],
+            None, None,
+            ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+            ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+            []
+        ),
+        # count + partial host list
+        NodeAssignmentTest(
+            'mgr',
+            PlacementSpec(count=3, hosts=['host3']),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('mgr', 'a', 'host1'),
+                DaemonDescription('mgr', 'b', 'host2'),
+            ],
+            None, None,
+            ['mgr:host3'],
+            ['mgr:host3'],
+            ['mgr.a', 'mgr.b']
+        ),
+        # count + partial host list (with colo)
+        NodeAssignmentTest(
+            'mds',
+            PlacementSpec(count=3, hosts=['host3']),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('mds', 'a', 'host1'),
+                DaemonDescription('mds', 'b', 'host2'),
+            ],
+            None, None,
+            ['mds:host3', 'mds:host3', 'mds:host3'],
+            ['mds:host3', 'mds:host3', 'mds:host3'],
+            ['mds.a', 'mds.b']
+        ),
+        # count 1 + partial host list
+        NodeAssignmentTest(
+            'mgr',
+            PlacementSpec(count=1, hosts=['host3']),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('mgr', 'a', 'host1'),
+                DaemonDescription('mgr', 'b', 'host2'),
+            ],
+            None, None,
+            ['mgr:host3'],
+            ['mgr:host3'],
+            ['mgr.a', 'mgr.b']
+        ),
+        # count + partial host list + existing
+        NodeAssignmentTest(
+            'mgr',
+            PlacementSpec(count=2, hosts=['host3']),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('mgr', 'a', 'host1'),
+            ],
+            None, None,
+            ['mgr:host3'],
+            ['mgr:host3'],
+            ['mgr.a']
+        ),
+        # count + partial host list + existing (deterministic)
+        NodeAssignmentTest(
+            'mgr',
+            PlacementSpec(count=2, hosts=['host1']),
+            'host1 host2'.split(),
+            [
+                DaemonDescription('mgr', 'a', 'host1'),
+            ],
+            None, None,
+            ['mgr:host1'],
+            [],
+            []
+        ),
+        # count + partial host list + existing (deterministic)
+        NodeAssignmentTest(
+            'mgr',
+            PlacementSpec(count=2, hosts=['host1']),
+            'host1 host2'.split(),
+            [
+                DaemonDescription('mgr', 'a', 'host2'),
+            ],
+            None, None,
+            ['mgr:host1'],
+            ['mgr:host1'],
+            ['mgr.a']
+        ),
+        # label only
+        NodeAssignmentTest(
+            'mgr',
+            PlacementSpec(label='foo'),
+            'host1 host2 host3'.split(),
+            [],
+            None, None,
+            ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+            ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+            []
+        ),
+        # label + count (truncate to host list)
+        NodeAssignmentTest(
+            'mgr',
+            PlacementSpec(count=4, label='foo'),
+            'host1 host2 host3'.split(),
+            [],
+            None, None,
+            ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+            ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+            []
+        ),
+        # label + count (with colo)
+        NodeAssignmentTest(
+            'mds',
+            PlacementSpec(count=6, label='foo'),
+            'host1 host2 host3'.split(),
+            [],
+            None, None,
+            ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+            ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+            []
+        ),
+        # label only + count_per_hst
+        NodeAssignmentTest(
+            'mds',
+            PlacementSpec(label='foo', count_per_host=3),
+            'host1 host2 host3'.split(),
+            [],
+            None, None,
+            ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3',
+             'mds:host1', 'mds:host2', 'mds:host3'],
+            ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3',
+             'mds:host1', 'mds:host2', 'mds:host3'],
+            []
+        ),
+        # host_pattern
+        NodeAssignmentTest(
+            'mgr',
+            PlacementSpec(host_pattern='mgr*'),
+            'mgrhost1 mgrhost2 datahost'.split(),
+            [],
+            None, None,
+            ['mgr:mgrhost1', 'mgr:mgrhost2'],
+            ['mgr:mgrhost1', 'mgr:mgrhost2'],
+            []
+        ),
+        # host_pattern + count_per_host
+        NodeAssignmentTest(
+            'mds',
+            PlacementSpec(host_pattern='mds*', count_per_host=3),
+            'mdshost1 mdshost2 datahost'.split(),
+            [],
+            None, None,
+            ['mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2'],
+            ['mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2'],
+            []
+        ),
+        # label + count_per_host + ports
+        NodeAssignmentTest(
+            'rgw',
+            PlacementSpec(count=6, label='foo'),
+            'host1 host2 host3'.split(),
+            [],
+            None, None,
+            ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)',
+             'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'],
+            ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)',
+             'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'],
+            []
+        ),
+        # label + count_per_host + ports (+ existing)
+        NodeAssignmentTest(
+            'rgw',
+            PlacementSpec(count=6, label='foo'),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('rgw', 'a', 'host1', ports=[81]),
+                DaemonDescription('rgw', 'b', 'host2', ports=[80]),
+                DaemonDescription('rgw', 'c', 'host1', ports=[82]),
+            ],
+            None, None,
+            ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)',
+             'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'],
+            ['rgw:host1(*:80)', 'rgw:host3(*:80)',
+             'rgw:host2(*:81)', 'rgw:host3(*:81)'],
+            ['rgw.c']
+        ),
+        # cephadm.py teuth case
+        NodeAssignmentTest(
+            'mgr',
+            PlacementSpec(count=3, hosts=['host1=y', 'host2=x']),
+            'host1 host2'.split(),
+            [
+                DaemonDescription('mgr', 'y', 'host1'),
+                DaemonDescription('mgr', 'x', 'host2'),
+            ],
+            None, None,
+            ['mgr:host1(name=y)', 'mgr:host2(name=x)'],
+            [], []
+        ),
+
+        # note: host -> rank mapping is psuedo-random based on svc name, so these
+        # host/rank pairs may seem random but they match the nfs.mynfs seed used by
+        # the test.
+
+        # ranked, fresh
+        NodeAssignmentTest(
+            'nfs',
+            PlacementSpec(count=3),
+            'host1 host2 host3'.split(),
+            [],
+            {},
+            {0: {0: None}, 1: {0: None}, 2: {0: None}},
+            ['nfs:host3(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host1(rank=2.0)'],
+            ['nfs:host3(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host1(rank=2.0)'],
+            []
+        ),
+        # 21: ranked, exist
+        NodeAssignmentTest(
+            'nfs',
+            PlacementSpec(count=3),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1),
+            ],
+            {0: {1: '0.1'}},
+            {0: {1: '0.1'}, 1: {0: None}, 2: {0: None}},
+            ['nfs:host1(rank=0.1)', 'nfs:host3(rank=1.0)', 'nfs:host2(rank=2.0)'],
+            ['nfs:host3(rank=1.0)', 'nfs:host2(rank=2.0)'],
+            []
+        ),
+        # ranked, exist, different ranks
+        NodeAssignmentTest(
+            'nfs',
+            PlacementSpec(count=3),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1),
+                DaemonDescription('nfs', '1.1', 'host2', rank=1, rank_generation=1),
+            ],
+            {0: {1: '0.1'}, 1: {1: '1.1'}},
+            {0: {1: '0.1'}, 1: {1: '1.1'}, 2: {0: None}},
+            ['nfs:host1(rank=0.1)', 'nfs:host2(rank=1.1)', 'nfs:host3(rank=2.0)'],
+            ['nfs:host3(rank=2.0)'],
+            []
+        ),
+        # ranked, exist, different ranks (2)
+        NodeAssignmentTest(
+            'nfs',
+            PlacementSpec(count=3),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1),
+                DaemonDescription('nfs', '1.1', 'host3', rank=1, rank_generation=1),
+            ],
+            {0: {1: '0.1'}, 1: {1: '1.1'}},
+            {0: {1: '0.1'}, 1: {1: '1.1'}, 2: {0: None}},
+            ['nfs:host1(rank=0.1)', 'nfs:host3(rank=1.1)', 'nfs:host2(rank=2.0)'],
+            ['nfs:host2(rank=2.0)'],
+            []
+        ),
+        # ranked, exist, extra ranks
+        NodeAssignmentTest(
+            'nfs',
+            PlacementSpec(count=3),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5),
+                DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5),
+                DaemonDescription('nfs', '4.5', 'host2', rank=4, rank_generation=5),
+            ],
+            {0: {5: '0.5'}, 1: {5: '1.5'}},
+            {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {0: None}},
+            ['nfs:host1(rank=0.5)', 'nfs:host2(rank=1.5)', 'nfs:host3(rank=2.0)'],
+            ['nfs:host3(rank=2.0)'],
+            ['nfs.4.5']
+        ),
+        # 25: ranked, exist, extra ranks (scale down: kill off high rank)
+        NodeAssignmentTest(
+            'nfs',
+            PlacementSpec(count=2),
+            'host3 host2 host1'.split(),
+            [
+                DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5),
+                DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5),
+                DaemonDescription('nfs', '2.5', 'host3', rank=2, rank_generation=5),
+            ],
+            {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}},
+            {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}},
+            ['nfs:host1(rank=0.5)', 'nfs:host2(rank=1.5)'],
+            [],
+            ['nfs.2.5']
+        ),
+        # ranked, exist, extra ranks (scale down hosts)
+        NodeAssignmentTest(
+            'nfs',
+            PlacementSpec(count=2),
+            'host1 host3'.split(),
+            [
+                DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5),
+                DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5),
+                DaemonDescription('nfs', '2.5', 'host3', rank=4, rank_generation=5),
+            ],
+            {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}},
+            {0: {5: '0.5'}, 1: {5: '1.5', 6: None}, 2: {5: '2.5'}},
+            ['nfs:host1(rank=0.5)', 'nfs:host3(rank=1.6)'],
+            ['nfs:host3(rank=1.6)'],
+            ['nfs.2.5', 'nfs.1.5']
+        ),
+        # ranked, exist, duplicate rank
+        NodeAssignmentTest(
+            'nfs',
+            PlacementSpec(count=3),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('nfs', '0.0', 'host1', rank=0, rank_generation=0),
+                DaemonDescription('nfs', '1.1', 'host2', rank=1, rank_generation=1),
+                DaemonDescription('nfs', '1.2', 'host3', rank=1, rank_generation=2),
+            ],
+            {0: {0: '0.0'}, 1: {2: '1.2'}},
+            {0: {0: '0.0'}, 1: {2: '1.2'}, 2: {0: None}},
+            ['nfs:host1(rank=0.0)', 'nfs:host3(rank=1.2)', 'nfs:host2(rank=2.0)'],
+            ['nfs:host2(rank=2.0)'],
+            ['nfs.1.1']
+        ),
+        # 28: ranked, all gens stale (failure during update cycle)
+        NodeAssignmentTest(
+            'nfs',
+            PlacementSpec(count=2),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2),
+                DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2),
+            ],
+            {0: {2: '0.2'}, 1: {2: '1.2', 3: '1.3'}},
+            {0: {2: '0.2'}, 1: {2: '1.2', 3: '1.3', 4: None}},
+            ['nfs:host1(rank=0.2)', 'nfs:host3(rank=1.4)'],
+            ['nfs:host3(rank=1.4)'],
+            ['nfs.1.2']
+        ),
+        # ranked, not enough hosts
+        NodeAssignmentTest(
+            'nfs',
+            PlacementSpec(count=4),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2),
+                DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2),
+            ],
+            {0: {2: '0.2'}, 1: {2: '1.2'}},
+            {0: {2: '0.2'}, 1: {2: '1.2'}, 2: {0: None}},
+            ['nfs:host1(rank=0.2)', 'nfs:host2(rank=1.2)', 'nfs:host3(rank=2.0)'],
+            ['nfs:host3(rank=2.0)'],
+            []
+        ),
+        # ranked, scale down
+        NodeAssignmentTest(
+            'nfs',
+            PlacementSpec(hosts=['host2']),
+            'host1 host2'.split(),
+            [
+                DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2),
+                DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2),
+                DaemonDescription('nfs', '2.2', 'host3', rank=2, rank_generation=2),
+            ],
+            {0: {2: '0.2'}, 1: {2: '1.2'}, 2: {2: '2.2'}},
+            {0: {2: '0.2', 3: None}, 1: {2: '1.2'}, 2: {2: '2.2'}},
+            ['nfs:host2(rank=0.3)'],
+            ['nfs:host2(rank=0.3)'],
+            ['nfs.0.2', 'nfs.1.2', 'nfs.2.2']
+        ),
+
+    ])
+def test_node_assignment(service_type, placement, hosts, daemons, rank_map, post_rank_map,
+                         expected, expected_add, expected_remove):
+    spec = None
+    service_id = None
+    allow_colo = False
+    if service_type == 'rgw':
+        service_id = 'realm.zone'
+        allow_colo = True
+    elif service_type == 'mds':
+        service_id = 'myfs'
+        allow_colo = True
+    elif service_type == 'nfs':
+        service_id = 'mynfs'
+        spec = ServiceSpec(service_type=service_type,
+                           service_id=service_id,
+                           placement=placement)
+
+    if not spec:
+        spec = ServiceSpec(service_type=service_type,
+                           service_id=service_id,
+                           placement=placement)
+
+    all_slots, to_add, to_remove = HostAssignment(
+        spec=spec,
+        hosts=[HostSpec(h, labels=['foo']) for h in hosts],
+        unreachable_hosts=[],
+        draining_hosts=[],
+        daemons=daemons,
+        allow_colo=allow_colo,
+        rank_map=rank_map,
+    ).place()
+
+    assert rank_map == post_rank_map
+
+    got = [str(p) for p in all_slots]
+    num_wildcard = 0
+    for i in expected:
+        if i == '*':
+            num_wildcard += 1
+        else:
+            assert i in got
+            got.remove(i)
+    assert num_wildcard == len(got)
+
+    got = [str(p) for p in to_add]
+    num_wildcard = 0
+    for i in expected_add:
+        if i == '*':
+            num_wildcard += 1
+        else:
+            assert i in got
+            got.remove(i)
+    assert num_wildcard == len(got)
+
+    assert sorted([d.name() for d in to_remove]) == sorted(expected_remove)
+
+
+class NodeAssignmentTest5(NamedTuple):
+    service_type: str
+    placement: PlacementSpec
+    available_hosts: List[str]
+    candidates_hosts: List[str]
+
+
+@pytest.mark.parametrize("service_type, placement, available_hosts, expected_candidates",
+    [   # noqa: E128
+        NodeAssignmentTest5(
+            'alertmanager',
+            PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+            'host1 host2 host3 host4'.split(),
+            'host3 host1 host4 host2'.split(),
+        ),
+        NodeAssignmentTest5(
+            'prometheus',
+            PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+            'host1 host2 host3 host4'.split(),
+            'host3 host2 host4 host1'.split(),
+        ),
+        NodeAssignmentTest5(
+            'grafana',
+            PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+            'host1 host2 host3 host4'.split(),
+            'host1 host2 host4 host3'.split(),
+        ),
+        NodeAssignmentTest5(
+            'mgr',
+            PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+            'host1 host2 host3 host4'.split(),
+            'host4 host2 host1 host3'.split(),
+        ),
+        NodeAssignmentTest5(
+            'mon',
+            PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+            'host1 host2 host3 host4'.split(),
+            'host1 host3 host4 host2'.split(),
+        ),
+        NodeAssignmentTest5(
+            'rgw',
+            PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+            'host1 host2 host3 host4'.split(),
+            'host1 host3 host2 host4'.split(),
+        ),
+        NodeAssignmentTest5(
+            'cephfs-mirror',
+            PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+            'host1 host2 host3 host4'.split(),
+            'host4 host3 host1 host2'.split(),
+        ),
+    ])
+def test_node_assignment_random_shuffle(service_type, placement, available_hosts, expected_candidates):
+    spec = None
+    service_id = None
+    allow_colo = False
+    spec = ServiceSpec(service_type=service_type,
+                       service_id=service_id,
+                       placement=placement)
+
+    candidates = HostAssignment(
+        spec=spec,
+        hosts=[HostSpec(h, labels=['foo']) for h in available_hosts],
+        unreachable_hosts=[],
+        draining_hosts=[],
+        daemons=[],
+        allow_colo=allow_colo,
+    ).get_candidates()
+
+    candidates_hosts = [h.hostname for h in candidates]
+    assert candidates_hosts == expected_candidates
+
+
+class NodeAssignmentTest2(NamedTuple):
+    service_type: str
+    placement: PlacementSpec
+    hosts: List[str]
+    daemons: List[DaemonDescription]
+    expected_len: int
+    in_set: List[str]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected_len,in_set",
+    [   # noqa: E128
+        # just count
+        NodeAssignmentTest2(
+            'mgr',
+            PlacementSpec(count=1),
+            'host1 host2 host3'.split(),
+            [],
+            1,
+            ['host1', 'host2', 'host3'],
+        ),
+
+        # hosts + (smaller) count
+        NodeAssignmentTest2(
+            'mgr',
+            PlacementSpec(count=1, hosts='host1 host2'.split()),
+            'host1 host2'.split(),
+            [],
+            1,
+            ['host1', 'host2'],
+        ),
+        # hosts + (smaller) count, existing
+        NodeAssignmentTest2(
+            'mgr',
+            PlacementSpec(count=1, hosts='host1 host2 host3'.split()),
+            'host1 host2 host3'.split(),
+            [DaemonDescription('mgr', 'mgr.a', 'host1')],
+            1,
+            ['host1', 'host2', 'host3'],
+        ),
+        # hosts + (smaller) count, (more) existing
+        NodeAssignmentTest2(
+            'mgr',
+            PlacementSpec(count=1, hosts='host1 host2 host3'.split()),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('mgr', 'a', 'host1'),
+                DaemonDescription('mgr', 'b', 'host2'),
+            ],
+            1,
+            ['host1', 'host2']
+        ),
+        # count + partial host list
+        NodeAssignmentTest2(
+            'mgr',
+            PlacementSpec(count=2, hosts=['host3']),
+            'host1 host2 host3'.split(),
+            [],
+            1,
+            ['host1', 'host2', 'host3']
+        ),
+        # label + count
+        NodeAssignmentTest2(
+            'mgr',
+            PlacementSpec(count=1, label='foo'),
+            'host1 host2 host3'.split(),
+            [],
+            1,
+            ['host1', 'host2', 'host3']
+        ),
+    ])
+def test_node_assignment2(service_type, placement, hosts,
+                          daemons, expected_len, in_set):
+    hosts, to_add, to_remove = HostAssignment(
+        spec=ServiceSpec(service_type, placement=placement),
+        hosts=[HostSpec(h, labels=['foo']) for h in hosts],
+        unreachable_hosts=[],
+        draining_hosts=[],
+        daemons=daemons,
+    ).place()
+    assert len(hosts) == expected_len
+    for h in [h.hostname for h in hosts]:
+        assert h in in_set
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected_len,must_have",
+    [   # noqa: E128
+        # hosts + (smaller) count, (more) existing
+        NodeAssignmentTest2(
+            'mgr',
+            PlacementSpec(count=3, hosts='host3'.split()),
+            'host1 host2 host3'.split(),
+            [],
+            1,
+            ['host3']
+        ),
+        # count + partial host list
+        NodeAssignmentTest2(
+            'mgr',
+            PlacementSpec(count=2, hosts=['host3']),
+            'host1 host2 host3'.split(),
+            [],
+            1,
+            ['host3']
+        ),
+    ])
+def test_node_assignment3(service_type, placement, hosts,
+                          daemons, expected_len, must_have):
+    hosts, to_add, to_remove = HostAssignment(
+        spec=ServiceSpec(service_type, placement=placement),
+        hosts=[HostSpec(h) for h in hosts],
+        unreachable_hosts=[],
+        draining_hosts=[],
+        daemons=daemons,
+    ).place()
+    assert len(hosts) == expected_len
+    for h in must_have:
+        assert h in [h.hostname for h in hosts]
+
+
+class NodeAssignmentTest4(NamedTuple):
+    spec: ServiceSpec
+    networks: Dict[str, Dict[str, Dict[str, List[str]]]]
+    daemons: List[DaemonDescription]
+    expected: List[str]
+    expected_add: List[str]
+    expected_remove: List[DaemonDescription]
+
+
+@pytest.mark.parametrize("spec,networks,daemons,expected,expected_add,expected_remove",
+    [   # noqa: E128
+        NodeAssignmentTest4(
+            ServiceSpec(
+                service_type='rgw',
+                service_id='foo',
+                placement=PlacementSpec(count=6, label='foo'),
+                networks=['10.0.0.0/8'],
+            ),
+            {
+                'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}},
+                'host2': {'10.0.0.0/8': {'eth0': ['10.0.0.2']}},
+                'host3': {'192.168.0.0/16': {'eth0': ['192.168.0.1']}},
+            },
+            [],
+            ['rgw:host1(10.0.0.1:80)', 'rgw:host2(10.0.0.2:80)',
+             'rgw:host1(10.0.0.1:81)', 'rgw:host2(10.0.0.2:81)',
+             'rgw:host1(10.0.0.1:82)', 'rgw:host2(10.0.0.2:82)'],
+            ['rgw:host1(10.0.0.1:80)', 'rgw:host2(10.0.0.2:80)',
+             'rgw:host1(10.0.0.1:81)', 'rgw:host2(10.0.0.2:81)',
+             'rgw:host1(10.0.0.1:82)', 'rgw:host2(10.0.0.2:82)'],
+            []
+        ),
+        NodeAssignmentTest4(
+            IngressSpec(
+                service_type='ingress',
+                service_id='rgw.foo',
+                frontend_port=443,
+                monitor_port=8888,
+                virtual_ip='10.0.0.20/8',
+                backend_service='rgw.foo',
+                placement=PlacementSpec(label='foo'),
+                networks=['10.0.0.0/8'],
+            ),
+            {
+                'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}},
+                'host2': {'10.0.0.0/8': {'eth1': ['10.0.0.2']}},
+                'host3': {'192.168.0.0/16': {'eth2': ['192.168.0.1']}},
+            },
+            [],
+            ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)',
+             'keepalived:host1', 'keepalived:host2'],
+            ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)',
+             'keepalived:host1', 'keepalived:host2'],
+            []
+        ),
+        NodeAssignmentTest4(
+            IngressSpec(
+                service_type='ingress',
+                service_id='rgw.foo',
+                frontend_port=443,
+                monitor_port=8888,
+                virtual_ip='10.0.0.20/8',
+                backend_service='rgw.foo',
+                placement=PlacementSpec(label='foo'),
+                networks=['10.0.0.0/8'],
+            ),
+            {
+                'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}},
+                'host2': {'10.0.0.0/8': {'eth1': ['10.0.0.2']}},
+                'host3': {'192.168.0.0/16': {'eth2': ['192.168.0.1']}},
+            },
+            [
+                DaemonDescription('haproxy', 'a', 'host1', ip='10.0.0.1',
+                                  ports=[443, 8888]),
+                DaemonDescription('keepalived', 'b', 'host2'),
+                DaemonDescription('keepalived', 'c', 'host3'),
+            ],
+            ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)',
+             'keepalived:host1', 'keepalived:host2'],
+            ['haproxy:host2(10.0.0.2:443,8888)',
+             'keepalived:host1'],
+            ['keepalived.c']
+        ),
+    ])
+def test_node_assignment4(spec, networks, daemons,
+                          expected, expected_add, expected_remove):
+    all_slots, to_add, to_remove = HostAssignment(
+        spec=spec,
+        hosts=[HostSpec(h, labels=['foo']) for h in networks.keys()],
+        unreachable_hosts=[],
+        draining_hosts=[],
+        daemons=daemons,
+        allow_colo=True,
+        networks=networks,
+        primary_daemon_type='haproxy' if spec.service_type == 'ingress' else spec.service_type,
+        per_host_daemon_type='keepalived' if spec.service_type == 'ingress' else None,
+    ).place()
+
+    got = [str(p) for p in all_slots]
+    num_wildcard = 0
+    for i in expected:
+        if i == '*':
+            num_wildcard += 1
+        else:
+            assert i in got
+            got.remove(i)
+    assert num_wildcard == len(got)
+
+    got = [str(p) for p in to_add]
+    num_wildcard = 0
+    for i in expected_add:
+        if i == '*':
+            num_wildcard += 1
+        else:
+            assert i in got
+            got.remove(i)
+    assert num_wildcard == len(got)
+
+    assert sorted([d.name() for d in to_remove]) == sorted(expected_remove)
+
+
+@pytest.mark.parametrize("placement",
+    [   # noqa: E128
+        ('1 *'),
+        ('* label:foo'),
+        ('* host1 host2'),
+        ('hostname12hostname12hostname12hostname12hostname12hostname12hostname12'),  # > 63 chars
+    ])
+def test_bad_placements(placement):
+    try:
+        PlacementSpec.from_string(placement.split(' '))
+        assert False
+    except SpecValidationError:
+        pass
+
+
+class NodeAssignmentTestBadSpec(NamedTuple):
+    service_type: str
+    placement: PlacementSpec
+    hosts: List[str]
+    daemons: List[DaemonDescription]
+    expected: str
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected",
+    [   # noqa: E128
+        # unknown host
+        NodeAssignmentTestBadSpec(
+            'mgr',
+            PlacementSpec(hosts=['unknownhost']),
+            ['knownhost'],
+            [],
+            "Cannot place <ServiceSpec for service_name=mgr> on unknownhost: Unknown hosts"
+        ),
+        # unknown host pattern
+        NodeAssignmentTestBadSpec(
+            'mgr',
+            PlacementSpec(host_pattern='unknownhost'),
+            ['knownhost'],
+            [],
+            "Cannot place <ServiceSpec for service_name=mgr>: No matching hosts"
+        ),
+        # unknown label
+        NodeAssignmentTestBadSpec(
+            'mgr',
+            PlacementSpec(label='unknownlabel'),
+            [],
+            [],
+            "Cannot place <ServiceSpec for service_name=mgr>: No matching hosts for label unknownlabel"
+        ),
+    ])
+def test_bad_specs(service_type, placement, hosts, daemons, expected):
+    with pytest.raises(OrchestratorValidationError) as e:
+        hosts, to_add, to_remove = HostAssignment(
+            spec=ServiceSpec(service_type, placement=placement),
+            hosts=[HostSpec(h) for h in hosts],
+            unreachable_hosts=[],
+            draining_hosts=[],
+            daemons=daemons,
+        ).place()
+    assert str(e.value) == expected
+
+
+class ActiveAssignmentTest(NamedTuple):
+    service_type: str
+    placement: PlacementSpec
+    hosts: List[str]
+    daemons: List[DaemonDescription]
+    expected: List[List[str]]
+    expected_add: List[List[str]]
+    expected_remove: List[List[str]]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected,expected_add,expected_remove",
+                         [
+                             ActiveAssignmentTest(
+                                 'mgr',
+                                 PlacementSpec(count=2),
+                                 'host1 host2 host3'.split(),
+                                 [
+                                     DaemonDescription('mgr', 'a', 'host1', is_active=True),
+                                     DaemonDescription('mgr', 'b', 'host2'),
+                                     DaemonDescription('mgr', 'c', 'host3'),
+                                 ],
+                                 [['host1', 'host2'], ['host1', 'host3']],
+                                 [[]],
+                                 [['mgr.b'], ['mgr.c']]
+                             ),
+                             ActiveAssignmentTest(
+                                 'mgr',
+                                 PlacementSpec(count=2),
+                                 'host1 host2 host3'.split(),
+                                 [
+                                     DaemonDescription('mgr', 'a', 'host1'),
+                                     DaemonDescription('mgr', 'b', 'host2'),
+                                     DaemonDescription('mgr', 'c', 'host3', is_active=True),
+                                 ],
+                                 [['host1', 'host3'], ['host2', 'host3']],
+                                 [[]],
+                                 [['mgr.a'], ['mgr.b']]
+                             ),
+                             ActiveAssignmentTest(
+                                 'mgr',
+                                 PlacementSpec(count=1),
+                                 'host1 host2 host3'.split(),
+                                 [
+                                     DaemonDescription('mgr', 'a', 'host1'),
+                                     DaemonDescription('mgr', 'b', 'host2', is_active=True),
+                                     DaemonDescription('mgr', 'c', 'host3'),
+                                 ],
+                                 [['host2']],
+                                 [[]],
+                                 [['mgr.a', 'mgr.c']]
+                             ),
+                             ActiveAssignmentTest(
+                                 'mgr',
+                                 PlacementSpec(count=1),
+                                 'host1 host2 host3'.split(),
+                                 [
+                                     DaemonDescription('mgr', 'a', 'host1'),
+                                     DaemonDescription('mgr', 'b', 'host2'),
+                                     DaemonDescription('mgr', 'c', 'host3', is_active=True),
+                                 ],
+                                 [['host3']],
+                                 [[]],
+                                 [['mgr.a', 'mgr.b']]
+                             ),
+                             ActiveAssignmentTest(
+                                 'mgr',
+                                 PlacementSpec(count=1),
+                                 'host1 host2 host3'.split(),
+                                 [
+                                     DaemonDescription('mgr', 'a', 'host1', is_active=True),
+                                     DaemonDescription('mgr', 'b', 'host2'),
+                                     DaemonDescription('mgr', 'c', 'host3', is_active=True),
+                                 ],
+                                 [['host1'], ['host3']],
+                                 [[]],
+                                 [['mgr.a', 'mgr.b'], ['mgr.b', 'mgr.c']]
+                             ),
+                             ActiveAssignmentTest(
+                                 'mgr',
+                                 PlacementSpec(count=2),
+                                 'host1 host2 host3'.split(),
+                                 [
+                                     DaemonDescription('mgr', 'a', 'host1'),
+                                     DaemonDescription('mgr', 'b', 'host2', is_active=True),
+                                     DaemonDescription('mgr', 'c', 'host3', is_active=True),
+                                 ],
+                                 [['host2', 'host3']],
+                                 [[]],
+                                 [['mgr.a']]
+                             ),
+                             ActiveAssignmentTest(
+                                 'mgr',
+                                 PlacementSpec(count=1),
+                                 'host1 host2 host3'.split(),
+                                 [
+                                     DaemonDescription('mgr', 'a', 'host1', is_active=True),
+                                     DaemonDescription('mgr', 'b', 'host2', is_active=True),
+                                     DaemonDescription('mgr', 'c', 'host3', is_active=True),
+                                 ],
+                                 [['host1'], ['host2'], ['host3']],
+                                 [[]],
+                                 [['mgr.a', 'mgr.b'], ['mgr.b', 'mgr.c'], ['mgr.a', 'mgr.c']]
+                             ),
+                             ActiveAssignmentTest(
+                                 'mgr',
+                                 PlacementSpec(count=1),
+                                 'host1 host2 host3'.split(),
+                                 [
+                                     DaemonDescription('mgr', 'a', 'host1', is_active=True),
+                                     DaemonDescription('mgr', 'a2', 'host1'),
+                                     DaemonDescription('mgr', 'b', 'host2'),
+                                     DaemonDescription('mgr', 'c', 'host3'),
+                                 ],
+                                 [['host1']],
+                                 [[]],
+                                 [['mgr.a2', 'mgr.b', 'mgr.c']]
+                             ),
+                             ActiveAssignmentTest(
+                                 'mgr',
+                                 PlacementSpec(count=1),
+                                 'host1 host2 host3'.split(),
+                                 [
+                                     DaemonDescription('mgr', 'a', 'host1', is_active=True),
+                                     DaemonDescription('mgr', 'a2', 'host1', is_active=True),
+                                     DaemonDescription('mgr', 'b', 'host2'),
+                                     DaemonDescription('mgr', 'c', 'host3'),
+                                 ],
+                                 [['host1']],
+                                 [[]],
+                                 [['mgr.a', 'mgr.b', 'mgr.c'], ['mgr.a2', 'mgr.b', 'mgr.c']]
+                             ),
+                             ActiveAssignmentTest(
+                                 'mgr',
+                                 PlacementSpec(count=2),
+                                 'host1 host2 host3'.split(),
+                                 [
+                                     DaemonDescription('mgr', 'a', 'host1', is_active=True),
+                                     DaemonDescription('mgr', 'a2', 'host1'),
+                                     DaemonDescription('mgr', 'b', 'host2'),
+                                     DaemonDescription('mgr', 'c', 'host3', is_active=True),
+                                 ],
+                                 [['host1', 'host3']],
+                                 [[]],
+                                 [['mgr.a2', 'mgr.b']]
+                             ),
+                             # Explicit placement should override preference for active daemon
+                             ActiveAssignmentTest(
+                                 'mgr',
+                                 PlacementSpec(count=1, hosts=['host1']),
+                                 'host1 host2 host3'.split(),
+                                 [
+                                     DaemonDescription('mgr', 'a', 'host1'),
+                                     DaemonDescription('mgr', 'b', 'host2'),
+                                     DaemonDescription('mgr', 'c', 'host3', is_active=True),
+                                 ],
+                                 [['host1']],
+                                 [[]],
+                                 [['mgr.b', 'mgr.c']]
+                             ),
+
+                         ])
+def test_active_assignment(service_type, placement, hosts, daemons, expected, expected_add, expected_remove):
+
+    spec = ServiceSpec(service_type=service_type,
+                       service_id=None,
+                       placement=placement)
+
+    hosts, to_add, to_remove = HostAssignment(
+        spec=spec,
+        hosts=[HostSpec(h) for h in hosts],
+        unreachable_hosts=[],
+        draining_hosts=[],
+        daemons=daemons,
+    ).place()
+    assert sorted([h.hostname for h in hosts]) in expected
+    assert sorted([h.hostname for h in to_add]) in expected_add
+    assert sorted([h.name() for h in to_remove]) in expected_remove
+
+
+class UnreachableHostsTest(NamedTuple):
+    service_type: str
+    placement: PlacementSpec
+    hosts: List[str]
+    unreachables_hosts: List[str]
+    daemons: List[DaemonDescription]
+    expected_add: List[List[str]]
+    expected_remove: List[List[str]]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,unreachable_hosts,daemons,expected_add,expected_remove",
+                         [
+                             UnreachableHostsTest(
+                                 'mgr',
+                                 PlacementSpec(count=3),
+                                 'host1 host2 host3'.split(),
+                                 ['host2'],
+                                 [],
+                                 [['host1', 'host3']],
+                                 [[]],
+                             ),
+                             UnreachableHostsTest(
+                                 'mgr',
+                                 PlacementSpec(hosts=['host3']),
+                                 'host1 host2 host3'.split(),
+                                 ['host1'],
+                                 [
+                                     DaemonDescription('mgr', 'a', 'host1'),
+                                     DaemonDescription('mgr', 'b', 'host2'),
+                                     DaemonDescription('mgr', 'c', 'host3', is_active=True),
+                                 ],
+                                 [[]],
+                                 [['mgr.b']],
+                             ),
+                             UnreachableHostsTest(
+                                 'mgr',
+                                 PlacementSpec(count=3),
+                                 'host1 host2 host3 host4'.split(),
+                                 ['host1'],
+                                 [
+                                     DaemonDescription('mgr', 'a', 'host1'),
+                                     DaemonDescription('mgr', 'b', 'host2'),
+                                     DaemonDescription('mgr', 'c', 'host3', is_active=True),
+                                 ],
+                                 [[]],
+                                 [[]],
+                             ),
+                             UnreachableHostsTest(
+                                 'mgr',
+                                 PlacementSpec(count=1),
+                                 'host1 host2 host3 host4'.split(),
+                                 'host1 host3'.split(),
+                                 [
+                                     DaemonDescription('mgr', 'a', 'host1'),
+                                     DaemonDescription('mgr', 'b', 'host2'),
+                                     DaemonDescription('mgr', 'c', 'host3', is_active=True),
+                                 ],
+                                 [[]],
+                                 [['mgr.b']],
+                             ),
+                             UnreachableHostsTest(
+                                 'mgr',
+                                 PlacementSpec(count=3),
+                                 'host1 host2 host3 host4'.split(),
+                                 ['host2'],
+                                 [],
+                                 [['host1', 'host3', 'host4']],
+                                 [[]],
+                             ),
+                             UnreachableHostsTest(
+                                 'mgr',
+                                 PlacementSpec(count=3),
+                                 'host1 host2 host3 host4'.split(),
+                                 'host1 host4'.split(),
+                                 [],
+                                 [['host2', 'host3']],
+                                 [[]],
+                             ),
+
+                         ])
+def test_unreachable_host(service_type, placement, hosts, unreachable_hosts, daemons, expected_add, expected_remove):
+
+    spec = ServiceSpec(service_type=service_type,
+                       service_id=None,
+                       placement=placement)
+
+    hosts, to_add, to_remove = HostAssignment(
+        spec=spec,
+        hosts=[HostSpec(h) for h in hosts],
+        unreachable_hosts=[HostSpec(h) for h in unreachable_hosts],
+        draining_hosts=[],
+        daemons=daemons,
+    ).place()
+    assert sorted([h.hostname for h in to_add]) in expected_add
+    assert sorted([h.name() for h in to_remove]) in expected_remove
+
+
+class RescheduleFromOfflineTest(NamedTuple):
+    service_type: str
+    placement: PlacementSpec
+    hosts: List[str]
+    maintenance_hosts: List[str]
+    offline_hosts: List[str]
+    daemons: List[DaemonDescription]
+    expected_add: List[List[str]]
+    expected_remove: List[List[str]]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,maintenance_hosts,offline_hosts,daemons,expected_add,expected_remove",
+                         [
+                             RescheduleFromOfflineTest(
+                                 'nfs',
+                                 PlacementSpec(count=2),
+                                 'host1 host2 host3'.split(),
+                                 [],
+                                 ['host2'],
+                                 [
+                                     DaemonDescription('nfs', 'a', 'host1'),
+                                     DaemonDescription('nfs', 'b', 'host2'),
+                                 ],
+                                 [['host3']],
+                                 [[]],
+                             ),
+                             RescheduleFromOfflineTest(
+                                 'nfs',
+                                 PlacementSpec(count=2),
+                                 'host1 host2 host3'.split(),
+                                 ['host2'],
+                                 [],
+                                 [
+                                     DaemonDescription('nfs', 'a', 'host1'),
+                                     DaemonDescription('nfs', 'b', 'host2'),
+                                 ],
+                                 [[]],
+                                 [[]],
+                             ),
+                             RescheduleFromOfflineTest(
+                                 'mon',
+                                 PlacementSpec(count=2),
+                                 'host1 host2 host3'.split(),
+                                 [],
+                                 ['host2'],
+                                 [
+                                     DaemonDescription('mon', 'a', 'host1'),
+                                     DaemonDescription('mon', 'b', 'host2'),
+                                 ],
+                                 [[]],
+                                 [[]],
+                             ),
+                             RescheduleFromOfflineTest(
+                                 'ingress',
+                                 PlacementSpec(count=1),
+                                 'host1 host2'.split(),
+                                 [],
+                                 ['host2'],
+                                 [
+                                     DaemonDescription('haproxy', 'b', 'host2'),
+                                     DaemonDescription('keepalived', 'b', 'host2'),
+                                 ],
+                                 [['host1']],
+                                 [[]],
+                             ),
+                         ])
+def test_remove_from_offline(service_type, placement, hosts, maintenance_hosts, offline_hosts, daemons, expected_add, expected_remove):
+
+    if service_type == 'ingress':
+        spec = \
+            IngressSpec(
+                service_type='ingress',
+                service_id='nfs-ha.foo',
+                frontend_port=443,
+                monitor_port=8888,
+                virtual_ip='10.0.0.20/8',
+                backend_service='nfs-ha.foo',
+                placement=placement,
+            )
+    else:
+        spec = \
+            ServiceSpec(
+                service_type=service_type,
+                service_id='test',
+                placement=placement,
+            )
+
+    host_specs = [HostSpec(h) for h in hosts]
+    for h in host_specs:
+        if h.hostname in offline_hosts:
+            h.status = 'offline'
+        if h.hostname in maintenance_hosts:
+            h.status = 'maintenance'
+
+    hosts, to_add, to_remove = HostAssignment(
+        spec=spec,
+        hosts=host_specs,
+        unreachable_hosts=[h for h in host_specs if h.status],
+        draining_hosts=[],
+        daemons=daemons,
+    ).place()
+    assert sorted([h.hostname for h in to_add]) in expected_add
+    assert sorted([h.name() for h in to_remove]) in expected_remove
+
+
+class DrainExplicitPlacementTest(NamedTuple):
+    service_type: str
+    placement: PlacementSpec
+    hosts: List[str]
+    maintenance_hosts: List[str]
+    offline_hosts: List[str]
+    draining_hosts: List[str]
+    daemons: List[DaemonDescription]
+    expected_add: List[List[str]]
+    expected_remove: List[List[str]]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,maintenance_hosts,offline_hosts,draining_hosts,daemons,expected_add,expected_remove",
+                         [
+                             DrainExplicitPlacementTest(
+                                 'crash',
+                                 PlacementSpec(hosts='host1 host2 host3'.split()),
+                                 'host1 host2 host3 host4'.split(),
+                                 [],
+                                 [],
+                                 ['host3'],
+                                 [
+                                     DaemonDescription('crash', 'host1', 'host1'),
+                                     DaemonDescription('crash', 'host2', 'host2'),
+                                     DaemonDescription('crash', 'host3', 'host3'),
+                                 ],
+                                 [[]],
+                                 [['crash.host3']],
+                             ),
+                             DrainExplicitPlacementTest(
+                                 'crash',
+                                 PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+                                 'host1 host2 host3 host4'.split(),
+                                 [],
+                                 [],
+                                 ['host1', 'host4'],
+                                 [
+                                     DaemonDescription('crash', 'host1', 'host1'),
+                                     DaemonDescription('crash', 'host3', 'host3'),
+                                 ],
+                                 [['host2']],
+                                 [['crash.host1']],
+                             ),
+                         ])
+def test_drain_from_explict_placement(service_type, placement, hosts, maintenance_hosts, offline_hosts, draining_hosts, daemons, expected_add, expected_remove):
+
+    spec = ServiceSpec(service_type=service_type,
+                       service_id='test',
+                       placement=placement)
+
+    host_specs = [HostSpec(h) for h in hosts]
+    draining_host_specs = [HostSpec(h) for h in draining_hosts]
+    for h in host_specs:
+        if h.hostname in offline_hosts:
+            h.status = 'offline'
+        if h.hostname in maintenance_hosts:
+            h.status = 'maintenance'
+
+    hosts, to_add, to_remove = HostAssignment(
+        spec=spec,
+        hosts=host_specs,
+        unreachable_hosts=[h for h in host_specs if h.status],
+        draining_hosts=draining_host_specs,
+        daemons=daemons,
+    ).place()
+    assert sorted([h.hostname for h in to_add]) in expected_add
+    assert sorted([h.name() for h in to_remove]) in expected_remove
diff --git a/src/pybind/mgr/cephadm/tests/test_service_discovery.py b/src/pybind/mgr/cephadm/tests/test_service_discovery.py
new file mode 100644
index 000000000..ff98a1388
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_service_discovery.py
@@ -0,0 +1,178 @@
+from unittest.mock import MagicMock
+from cephadm.service_discovery import Root
+
+
+class FakeDaemonDescription:
+    def __init__(self, ip, ports, hostname, service_name='', daemon_type=''):
+        self.ip = ip
+        self.ports = ports
+        self.hostname = hostname
+        self._service_name = service_name
+        self.daemon_type = daemon_type
+
+    def service_name(self):
+        return self._service_name
+
+
+class FakeCache:
+    def get_daemons_by_service(self, service_type):
+        if service_type == 'ceph-exporter':
+            return [FakeDaemonDescription('1.2.3.4', [9926], 'node0'),
+                    FakeDaemonDescription('1.2.3.5', [9926], 'node1')]
+
+        return [FakeDaemonDescription('1.2.3.4', [9100], 'node0'),
+                FakeDaemonDescription('1.2.3.5', [9200], 'node1')]
+
+    def get_daemons_by_type(self, daemon_type):
+        return [FakeDaemonDescription('1.2.3.4', [9100], 'node0', 'ingress', 'haproxy'),
+                FakeDaemonDescription('1.2.3.5', [9200], 'node1', 'ingress', 'haproxy')]
+
+
+class FakeInventory:
+    def get_addr(self, name: str):
+        return '1.2.3.4'
+
+
+class FakeServiceSpec:
+    def __init__(self, port):
+        self.monitor_port = port
+
+
+class FakeSpecDescription:
+    def __init__(self, port):
+        self.spec = FakeServiceSpec(port)
+
+
+class FakeSpecStore():
+    def __init__(self, mgr):
+        self.mgr = mgr
+        self._specs = {'ingress': FakeSpecDescription(9049)}
+
+    def __contains__(self, name):
+        return name in self._specs
+
+    def __getitem__(self, name):
+        return self._specs['ingress']
+
+
+class FakeMgr:
+    def __init__(self):
+        self.config = ''
+        self.check_mon_command = MagicMock(side_effect=self._check_mon_command)
+        self.mon_command = MagicMock(side_effect=self._check_mon_command)
+        self.template = MagicMock()
+        self.log = MagicMock()
+        self.inventory = FakeInventory()
+        self.cache = FakeCache()
+        self.spec_store = FakeSpecStore(self)
+
+    def get_mgr_id(self):
+        return 'mgr-1'
+
+    def list_servers(self):
+
+        servers = [
+            {'hostname': 'node0',
+             'ceph_version': '16.2',
+             'services': [{'type': 'mgr', 'id': 'mgr-1'}, {'type': 'mon'}]},
+            {'hostname': 'node1',
+             'ceph_version': '16.2',
+             'services': [{'type': 'mgr', 'id': 'mgr-2'}, {'type': 'mon'}]}
+        ]
+
+        return servers
+
+    def _check_mon_command(self, cmd_dict, inbuf=None):
+        prefix = cmd_dict.get('prefix')
+        if prefix == 'get-cmd':
+            return 0, self.config, ''
+        if prefix == 'set-cmd':
+            self.config = cmd_dict.get('value')
+            return 0, 'value set', ''
+        return -1, '', 'error'
+
+    def get_module_option_ex(self, module, option, default_value):
+        return "9283"
+
+
+class TestServiceDiscovery:
+
+    def test_get_sd_config_prometheus(self):
+        mgr = FakeMgr()
+        root = Root(mgr, 5000, '0.0.0.0')
+        cfg = root.get_sd_config('mgr-prometheus')
+
+        # check response structure
+        assert cfg
+        for entry in cfg:
+            assert 'labels' in entry
+            assert 'targets' in entry
+
+        # check content
+        assert cfg[0]['targets'] == ['node0:9283']
+
+    def test_get_sd_config_node_exporter(self):
+        mgr = FakeMgr()
+        root = Root(mgr, 5000, '0.0.0.0')
+        cfg = root.get_sd_config('node-exporter')
+
+        # check response structure
+        assert cfg
+        for entry in cfg:
+            assert 'labels' in entry
+            assert 'targets' in entry
+
+        # check content
+        assert cfg[0]['targets'] == ['1.2.3.4:9100']
+        assert cfg[0]['labels'] == {'instance': 'node0'}
+        assert cfg[1]['targets'] == ['1.2.3.5:9200']
+        assert cfg[1]['labels'] == {'instance': 'node1'}
+
+    def test_get_sd_config_alertmgr(self):
+        mgr = FakeMgr()
+        root = Root(mgr, 5000, '0.0.0.0')
+        cfg = root.get_sd_config('alertmanager')
+
+        # check response structure
+        assert cfg
+        for entry in cfg:
+            assert 'labels' in entry
+            assert 'targets' in entry
+
+        # check content
+        assert cfg[0]['targets'] == ['1.2.3.4:9100', '1.2.3.5:9200']
+
+    def test_get_sd_config_haproxy(self):
+        mgr = FakeMgr()
+        root = Root(mgr, 5000, '0.0.0.0')
+        cfg = root.get_sd_config('haproxy')
+
+        # check response structure
+        assert cfg
+        for entry in cfg:
+            assert 'labels' in entry
+            assert 'targets' in entry
+
+        # check content
+        assert cfg[0]['targets'] == ['1.2.3.4:9049']
+        assert cfg[0]['labels'] == {'instance': 'ingress'}
+
+    def test_get_sd_config_ceph_exporter(self):
+        mgr = FakeMgr()
+        root = Root(mgr, 5000, '0.0.0.0')
+        cfg = root.get_sd_config('ceph-exporter')
+
+        # check response structure
+        assert cfg
+        for entry in cfg:
+            assert 'labels' in entry
+            assert 'targets' in entry
+
+        # check content
+        assert cfg[0]['targets'] == ['1.2.3.4:9926']
+
+    def test_get_sd_config_invalid_service(self):
+        mgr = FakeMgr()
+        root = Root(mgr, 5000, '0.0.0.0')
+        cfg = root.get_sd_config('invalid-service')
+        assert cfg == []
diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py
new file mode 100644
index 000000000..2300b288d
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_services.py
@@ -0,0 +1,2725 @@
+from textwrap import dedent
+import json
+import urllib.parse
+import yaml
+from mgr_util import build_url
+
+import pytest
+
+from unittest.mock import MagicMock, call, patch, ANY
+
+from cephadm.serve import CephadmServe
+from cephadm.services.cephadmservice import MonService, MgrService, MdsService, RgwService, \
+    RbdMirrorService, CrashService, CephadmDaemonDeploySpec
+from cephadm.services.iscsi import IscsiService
+from cephadm.services.nfs import NFSService
+from cephadm.services.nvmeof import NvmeofService
+from cephadm.services.osd import OSDService
+from cephadm.services.monitoring import GrafanaService, AlertmanagerService, PrometheusService, \
+    NodeExporterService, LokiService, PromtailService
+from cephadm.module import CephadmOrchestrator
+from ceph.deployment.service_spec import IscsiServiceSpec, MonitoringSpec, AlertManagerSpec, \
+    ServiceSpec, RGWSpec, GrafanaSpec, SNMPGatewaySpec, IngressSpec, PlacementSpec, TracingSpec, \
+    PrometheusSpec, CephExporterSpec, NFSServiceSpec, NvmeofServiceSpec
+from cephadm.tests.fixtures import with_host, with_service, _run_cephadm, async_side_effect
+
+from ceph.utils import datetime_now
+
+from orchestrator import OrchestratorError
+from orchestrator._interface import DaemonDescription
+
+from typing import Dict, List
+
+grafana_cert = """-----BEGIN CERTIFICATE-----\nMIICxjCCAa4CEQDIZSujNBlKaLJzmvntjukjMA0GCSqGSIb3DQEBDQUAMCExDTAL\nBgNVBAoMBENlcGgxEDAOBgNVBAMMB2NlcGhhZG0wHhcNMjIwNzEzMTE0NzA3WhcN\nMzIwNzEwMTE0NzA3WjAhMQ0wCwYDVQQKDARDZXBoMRAwDgYDVQQDDAdjZXBoYWRt\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyyMe4DMA+MeYK7BHZMHB\nq7zjliEOcNgxomjU8qbf5USF7Mqrf6+/87XWqj4pCyAW8x0WXEr6A56a+cmBVmt+\nqtWDzl020aoId6lL5EgLLn6/kMDCCJLq++Lg9cEofMSvcZh+lY2f+1p+C+00xent\nrLXvXGOilAZWaQfojT2BpRnNWWIFbpFwlcKrlg2G0cFjV5c1m6a0wpsQ9JHOieq0\nSvwCixajwq3CwAYuuiU1wjI4oJO4Io1+g8yB3nH2Mo/25SApCxMXuXh4kHLQr/T4\n4hqisvG4uJYgKMcSIrWj5o25mclByGi1UI/kZkCUES94i7Z/3ihx4Bad0AMs/9tw\nFwIDAQABMA0GCSqGSIb3DQEBDQUAA4IBAQAf+pwz7Gd7mDwU2LY0TQXsK6/8KGzh\nHuX+ErOb8h5cOAbvCnHjyJFWf6gCITG98k9nxU9NToG0WYuNm/max1y/54f0dtxZ\npUo6KSNl3w6iYCfGOeUIj8isi06xMmeTgMNzv8DYhDt+P2igN6LenqWTVztogkiV\nxQ5ZJFFLEw4sN0CXnrZX3t5ruakxLXLTLKeE0I91YJvjClSBGkVJq26wOKQNHMhx\npWxeydQ5EgPZY+Aviz5Dnxe8aB7oSSovpXByzxURSabOuCK21awW5WJCGNpmqhWK\nZzACBDEstccj57c4OGV0eayHJRsluVr2e9NHRINZA3qdB37e6gsI1xHo\n-----END CERTIFICATE-----\n"""
+
+grafana_key = """-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDLIx7gMwD4x5gr\nsEdkwcGrvOOWIQ5w2DGiaNTypt/lRIXsyqt/r7/ztdaqPikLIBbzHRZcSvoDnpr5\nyYFWa36q1YPOXTbRqgh3qUvkSAsufr+QwMIIkur74uD1wSh8xK9xmH6VjZ/7Wn4L\n7TTF6e2ste9cY6KUBlZpB+iNPYGlGc1ZYgVukXCVwquWDYbRwWNXlzWbprTCmxD0\nkc6J6rRK/AKLFqPCrcLABi66JTXCMjigk7gijX6DzIHecfYyj/blICkLExe5eHiQ\nctCv9PjiGqKy8bi4liAoxxIitaPmjbmZyUHIaLVQj+RmQJQRL3iLtn/eKHHgFp3Q\nAyz/23AXAgMBAAECggEAVoTB3Mm8azlPlaQB9GcV3tiXslSn+uYJ1duCf0sV52dV\nBzKW8s5fGiTjpiTNhGCJhchowqxoaew+o47wmGc2TvqbpeRLuecKrjScD0GkCYyQ\neM2wlshEbz4FhIZdgS6gbuh9WaM1dW/oaZoBNR5aTYo7xYTmNNeyLA/jO2zr7+4W\n5yES1lMSBXpKk7bDGKYY4bsX2b5RLr2Grh2u2bp7hoLABCEvuu8tSQdWXLEXWpXo\njwmV3hc6tabypIa0mj2Dmn2Dmt1ppSO0AZWG/WAizN3f4Z0r/u9HnbVrVmh0IEDw\n3uf2LP5o3msG9qKCbzv3lMgt9mMr70HOKnJ8ohMSKQKBgQDLkNb+0nr152HU9AeJ\nvdz8BeMxcwxCG77iwZphZ1HprmYKvvXgedqWtS6FRU+nV6UuQoPUbQxJBQzrN1Qv\nwKSlOAPCrTJgNgF/RbfxZTrIgCPuK2KM8I89VZv92TSGi362oQA4MazXC8RAWjoJ\nSu1/PHzK3aXOfVNSLrOWvIYeZQKBgQD/dgT6RUXKg0UhmXj7ExevV+c7oOJTDlMl\nvLngrmbjRgPO9VxLnZQGdyaBJeRngU/UXfNgajT/MU8B5fSKInnTMawv/tW7634B\nw3v6n5kNIMIjJmENRsXBVMllDTkT9S7ApV+VoGnXRccbTiDapBThSGd0wri/CuwK\nNWK1YFOeywKBgEDyI/XG114PBUJ43NLQVWm+wx5qszWAPqV/2S5MVXD1qC6zgCSv\nG9NLWN1CIMimCNg6dm7Wn73IM7fzvhNCJgVkWqbItTLG6DFf3/DPODLx1wTMqLOI\nqFqMLqmNm9l1Nec0dKp5BsjRQzq4zp1aX21hsfrTPmwjxeqJZdioqy2VAoGAXR5X\nCCdSHlSlUW8RE2xNOOQw7KJjfWT+WAYoN0c7R+MQplL31rRU7dpm1bLLRBN11vJ8\nMYvlT5RYuVdqQSP6BkrX+hLJNBvOLbRlL+EXOBrVyVxHCkDe+u7+DnC4epbn+N8P\nLYpwqkDMKB7diPVAizIKTBxinXjMu5fkKDs5n+sCgYBbZheYKk5M0sIxiDfZuXGB\nkf4mJdEkTI1KUGRdCwO/O7hXbroGoUVJTwqBLi1tKqLLarwCITje2T200BYOzj82\nqwRkCXGtXPKnxYEEUOiFx9OeDrzsZV00cxsEnX0Zdj+PucQ/J3Cvd0dWUspJfLHJ\n39gnaegswnz9KMQAvzKFdg==\n-----END PRIVATE KEY-----\n"""
+
+
+class FakeInventory:
+    def get_addr(self, name: str) -> str:
+        return '1.2.3.4'
+
+
+class FakeMgr:
+    def __init__(self):
+        self.config = ''
+        self.set_mon_crush_locations: Dict[str, List[str]] = {}
+        self.check_mon_command = MagicMock(side_effect=self._check_mon_command)
+        self.mon_command = MagicMock(side_effect=self._check_mon_command)
+        self.template = MagicMock()
+        self.log = MagicMock()
+        self.inventory = FakeInventory()
+
+    def _check_mon_command(self, cmd_dict, inbuf=None):
+        prefix = cmd_dict.get('prefix')
+        if prefix == 'get-cmd':
+            return 0, self.config, ''
+        if prefix == 'set-cmd':
+            self.config = cmd_dict.get('value')
+            return 0, 'value set', ''
+        if prefix in ['auth get']:
+            return 0, '[foo]\nkeyring = asdf\n', ''
+        if prefix == 'quorum_status':
+            # actual quorum status output from testing
+            # note in this output all of the mons have blank crush locations
+            return 0, """{"election_epoch": 14, "quorum": [0, 1, 2], "quorum_names": ["vm-00", "vm-01", "vm-02"], "quorum_leader_name": "vm-00", "quorum_age": 101, "features": {"quorum_con": "4540138322906710015", "quorum_mon": ["kraken", "luminous", "mimic", "osdmap-prune", "nautilus", "octopus", "pacific", "elector-pinging", "quincy", "reef"]}, "monmap": {"epoch": 3, "fsid": "9863e1b8-6f24-11ed-8ad8-525400c13ad2", "modified": "2022-11-28T14:00:29.972488Z", "created": "2022-11-28T13:57:55.847497Z", "min_mon_release": 18, "min_mon_release_name": "reef", "election_strategy": 1, "disallowed_leaders: ": "", "stretch_mode": false, "tiebreaker_mon": "", "features": {"persistent": ["kraken", "luminous", "mimic", "osdmap-prune", "nautilus", "octopus", "pacific", "elector-pinging", "quincy", "reef"], "optional": []}, "mons": [{"rank": 0, "name": "vm-00", "public_addrs": {"addrvec": [{"type": "v2", "addr": "192.168.122.61:3300", "nonce": 0}, {"type": "v1", "addr": "192.168.122.61:6789", "nonce": 0}]}, "addr": "192.168.122.61:6789/0", "public_addr": "192.168.122.61:6789/0", "priority": 0, "weight": 0, "crush_location": "{}"}, {"rank": 1, "name": "vm-01", "public_addrs": {"addrvec": [{"type": "v2", "addr": "192.168.122.63:3300", "nonce": 0}, {"type": "v1", "addr": "192.168.122.63:6789", "nonce": 0}]}, "addr": "192.168.122.63:6789/0", "public_addr": "192.168.122.63:6789/0", "priority": 0, "weight": 0, "crush_location": "{}"}, {"rank": 2, "name": "vm-02", "public_addrs": {"addrvec": [{"type": "v2", "addr": "192.168.122.82:3300", "nonce": 0}, {"type": "v1", "addr": "192.168.122.82:6789", "nonce": 0}]}, "addr": "192.168.122.82:6789/0", "public_addr": "192.168.122.82:6789/0", "priority": 0, "weight": 0, "crush_location": "{}"}]}}""", ''
+        if prefix == 'mon set_location':
+            self.set_mon_crush_locations[cmd_dict.get('name')] = cmd_dict.get('args')
+            return 0, '', ''
+        return -1, '', 'error'
+
+    def get_minimal_ceph_conf(self) -> str:
+        return ''
+
+    def get_mgr_ip(self) -> str:
+        return '1.2.3.4'
+
+
+class TestCephadmService:
+    def test_set_service_url_on_dashboard(self):
+        # pylint: disable=protected-access
+        mgr = FakeMgr()
+        service_url = 'http://svc:1000'
+        service = GrafanaService(mgr)
+        service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url)
+        assert mgr.config == service_url
+
+        # set-cmd should not be called if value doesn't change
+        mgr.check_mon_command.reset_mock()
+        service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url)
+        mgr.check_mon_command.assert_called_once_with({'prefix': 'get-cmd'})
+
+    def _get_services(self, mgr):
+        # services:
+        osd_service = OSDService(mgr)
+        nfs_service = NFSService(mgr)
+        mon_service = MonService(mgr)
+        mgr_service = MgrService(mgr)
+        mds_service = MdsService(mgr)
+        rgw_service = RgwService(mgr)
+        rbd_mirror_service = RbdMirrorService(mgr)
+        grafana_service = GrafanaService(mgr)
+        alertmanager_service = AlertmanagerService(mgr)
+        prometheus_service = PrometheusService(mgr)
+        node_exporter_service = NodeExporterService(mgr)
+        loki_service = LokiService(mgr)
+        promtail_service = PromtailService(mgr)
+        crash_service = CrashService(mgr)
+        iscsi_service = IscsiService(mgr)
+        nvmeof_service = NvmeofService(mgr)
+        cephadm_services = {
+            'mon': mon_service,
+            'mgr': mgr_service,
+            'osd': osd_service,
+            'mds': mds_service,
+            'rgw': rgw_service,
+            'rbd-mirror': rbd_mirror_service,
+            'nfs': nfs_service,
+            'grafana': grafana_service,
+            'alertmanager': alertmanager_service,
+            'prometheus': prometheus_service,
+            'node-exporter': node_exporter_service,
+            'loki': loki_service,
+            'promtail': promtail_service,
+            'crash': crash_service,
+            'iscsi': iscsi_service,
+            'nvmeof': nvmeof_service,
+        }
+        return cephadm_services
+
+    def test_get_auth_entity(self):
+        mgr = FakeMgr()
+        cephadm_services = self._get_services(mgr)
+
+        for daemon_type in ['rgw', 'rbd-mirror', 'nfs', "iscsi"]:
+            assert "client.%s.id1" % (daemon_type) == \
+                cephadm_services[daemon_type].get_auth_entity("id1", "host")
+            assert "client.%s.id1" % (daemon_type) == \
+                cephadm_services[daemon_type].get_auth_entity("id1", "")
+            assert "client.%s.id1" % (daemon_type) == \
+                cephadm_services[daemon_type].get_auth_entity("id1")
+
+        assert "client.crash.host" == \
+            cephadm_services["crash"].get_auth_entity("id1", "host")
+        with pytest.raises(OrchestratorError):
+            cephadm_services["crash"].get_auth_entity("id1", "")
+            cephadm_services["crash"].get_auth_entity("id1")
+
+        assert "mon." == cephadm_services["mon"].get_auth_entity("id1", "host")
+        assert "mon." == cephadm_services["mon"].get_auth_entity("id1", "")
+        assert "mon." == cephadm_services["mon"].get_auth_entity("id1")
+
+        assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1", "host")
+        assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1", "")
+        assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1")
+
+        for daemon_type in ["osd", "mds"]:
+            assert "%s.id1" % daemon_type == \
+                cephadm_services[daemon_type].get_auth_entity("id1", "host")
+            assert "%s.id1" % daemon_type == \
+                cephadm_services[daemon_type].get_auth_entity("id1", "")
+            assert "%s.id1" % daemon_type == \
+                cephadm_services[daemon_type].get_auth_entity("id1")
+
+        # services based on CephadmService shouldn't have get_auth_entity
+        with pytest.raises(AttributeError):
+            for daemon_type in ['grafana', 'alertmanager', 'prometheus', 'node-exporter', 'loki', 'promtail']:
+                cephadm_services[daemon_type].get_auth_entity("id1", "host")
+                cephadm_services[daemon_type].get_auth_entity("id1", "")
+                cephadm_services[daemon_type].get_auth_entity("id1")
+
+
+class TestISCSIService:
+
+    mgr = FakeMgr()
+    iscsi_service = IscsiService(mgr)
+
+    iscsi_spec = IscsiServiceSpec(service_type='iscsi', service_id="a")
+    iscsi_spec.daemon_type = "iscsi"
+    iscsi_spec.daemon_id = "a"
+    iscsi_spec.spec = MagicMock()
+    iscsi_spec.spec.daemon_type = "iscsi"
+    iscsi_spec.spec.ssl_cert = ''
+    iscsi_spec.api_user = "user"
+    iscsi_spec.api_password = "password"
+    iscsi_spec.api_port = 5000
+    iscsi_spec.api_secure = False
+    iscsi_spec.ssl_cert = "cert"
+    iscsi_spec.ssl_key = "key"
+
+    mgr.spec_store = MagicMock()
+    mgr.spec_store.all_specs.get.return_value = iscsi_spec
+
+    def test_iscsi_client_caps(self):
+
+        iscsi_daemon_spec = CephadmDaemonDeploySpec(
+            host='host', daemon_id='a', service_name=self.iscsi_spec.service_name())
+
+        self.iscsi_service.prepare_create(iscsi_daemon_spec)
+
+        expected_caps = ['mon',
+                         'profile rbd, allow command "osd blocklist", allow command "config-key get" with "key" prefix "iscsi/"',
+                         'mgr', 'allow command "service status"',
+                         'osd', 'allow rwx']
+
+        expected_call = call({'prefix': 'auth get-or-create',
+                              'entity': 'client.iscsi.a',
+                              'caps': expected_caps})
+        expected_call2 = call({'prefix': 'auth caps',
+                               'entity': 'client.iscsi.a',
+                               'caps': expected_caps})
+        expected_call3 = call({'prefix': 'auth get',
+                               'entity': 'client.iscsi.a'})
+
+        assert expected_call in self.mgr.mon_command.mock_calls
+        assert expected_call2 in self.mgr.mon_command.mock_calls
+        assert expected_call3 in self.mgr.mon_command.mock_calls
+
+    @patch('cephadm.utils.resolve_ip')
+    def test_iscsi_dashboard_config(self, mock_resolve_ip):
+
+        self.mgr.check_mon_command = MagicMock()
+        self.mgr.check_mon_command.return_value = ('', '{"gateways": {}}', '')
+
+        # Case 1: use IPV4 address
+        id1 = DaemonDescription(daemon_type='iscsi', hostname="testhost1",
+                                daemon_id="a", ip='192.168.1.1')
+        daemon_list = [id1]
+        mock_resolve_ip.return_value = '192.168.1.1'
+
+        self.iscsi_service.config_dashboard(daemon_list)
+
+        dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add',
+                                        'name': 'testhost1'},
+                                       'http://user:password@192.168.1.1:5000')
+
+        assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls
+
+        # Case 2: use IPV6 address
+        self.mgr.check_mon_command.reset_mock()
+
+        id1 = DaemonDescription(daemon_type='iscsi', hostname="testhost1",
+                                daemon_id="a", ip='FEDC:BA98:7654:3210:FEDC:BA98:7654:3210')
+        mock_resolve_ip.return_value = 'FEDC:BA98:7654:3210:FEDC:BA98:7654:3210'
+
+        self.iscsi_service.config_dashboard(daemon_list)
+
+        dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add',
+                                        'name': 'testhost1'},
+                                       'http://user:password@[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:5000')
+
+        assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls
+
+        # Case 3: IPV6 Address . Secure protocol
+        self.mgr.check_mon_command.reset_mock()
+
+        self.iscsi_spec.api_secure = True
+
+        self.iscsi_service.config_dashboard(daemon_list)
+
+        dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add',
+                                        'name': 'testhost1'},
+                                       'https://user:password@[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:5000')
+
+        assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("cephadm.module.CephadmOrchestrator.get_unique_name")
+    @patch("cephadm.services.iscsi.IscsiService.get_trusted_ips")
+    def test_iscsi_config(self, _get_trusted_ips, _get_name, _run_cephadm, cephadm_module: CephadmOrchestrator):
+
+        iscsi_daemon_id = 'testpool.test.qwert'
+        trusted_ips = '1.1.1.1,2.2.2.2'
+        api_port = 3456
+        api_user = 'test-user'
+        api_password = 'test-password'
+        pool = 'testpool'
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        _get_name.return_value = iscsi_daemon_id
+        _get_trusted_ips.return_value = trusted_ips
+
+        iscsi_gateway_conf = f"""# This file is generated by cephadm.
+[config]
+cluster_client_name = client.iscsi.{iscsi_daemon_id}
+pool = {pool}
+trusted_ip_list = {trusted_ips}
+minimum_gateways = 1
+api_port = {api_port}
+api_user = {api_user}
+api_password = {api_password}
+api_secure = False
+log_to_stderr = True
+log_to_stderr_prefix = debug
+log_to_file = False"""
+
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, IscsiServiceSpec(service_id=pool,
+                                                               api_port=api_port,
+                                                               api_user=api_user,
+                                                               api_password=api_password,
+                                                               pool=pool,
+                                                               trusted_ip_list=trusted_ips)):
+                _run_cephadm.assert_called_with(
+                    'test',
+                    f'iscsi.{iscsi_daemon_id}',
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": f'iscsi.{iscsi_daemon_id}',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [api_port],
+                        },
+                        "meta": {
+                            'service_name': f'iscsi.{pool}',
+                            'ports': [api_port],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": {
+                            "config": "",
+                            "keyring": f"[client.iscsi.{iscsi_daemon_id}]\nkey = None\n",
+                            "files": {
+                                "iscsi-gateway.cfg": iscsi_gateway_conf,
+                            },
+                        }
+                    }),
+                )
+
+
+class TestNVMEOFService:
+
+    mgr = FakeMgr()
+    nvmeof_service = NvmeofService(mgr)
+
+    nvmeof_spec = NvmeofServiceSpec(service_type='nvmeof', service_id="a")
+    nvmeof_spec.daemon_type = 'nvmeof'
+    nvmeof_spec.daemon_id = "a"
+    nvmeof_spec.spec = MagicMock()
+    nvmeof_spec.spec.daemon_type = 'nvmeof'
+
+    mgr.spec_store = MagicMock()
+    mgr.spec_store.all_specs.get.return_value = nvmeof_spec
+
+    def test_nvmeof_client_caps(self):
+        pass
+
+    @patch('cephadm.utils.resolve_ip')
+    def test_nvmeof_dashboard_config(self, mock_resolve_ip):
+        pass
+
+    @patch("cephadm.inventory.Inventory.get_addr", lambda _, __: '192.168.100.100')
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("cephadm.module.CephadmOrchestrator.get_unique_name")
+    def test_nvmeof_config(self, _get_name, _run_cephadm, cephadm_module: CephadmOrchestrator):
+
+        nvmeof_daemon_id = 'testpool.test.qwert'
+        pool = 'testpool'
+        tgt_cmd_extra_args = '--cpumask=0xFF --msg-mempool-size=524288'
+        default_port = 5500
+        group = 'mygroup'
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        _get_name.return_value = nvmeof_daemon_id
+
+        nvmeof_gateway_conf = f"""# This file is generated by cephadm.
+[gateway]
+name = client.nvmeof.{nvmeof_daemon_id}
+group = {group}
+addr = 192.168.100.100
+port = {default_port}
+enable_auth = False
+state_update_notify = True
+state_update_interval_sec = 5
+
+[ceph]
+pool = {pool}
+config_file = /etc/ceph/ceph.conf
+id = nvmeof.{nvmeof_daemon_id}
+
+[mtls]
+server_key = ./server.key
+client_key = ./client.key
+server_cert = ./server.crt
+client_cert = ./client.crt
+
+[spdk]
+tgt_path = /usr/local/bin/nvmf_tgt
+rpc_socket = /var/tmp/spdk.sock
+timeout = 60
+log_level = WARN
+conn_retries = 10
+transports = tcp
+transport_tcp_options = {{"in_capsule_data_size": 8192, "max_io_qpairs_per_ctrlr": 7}}
+tgt_cmd_extra_args = {tgt_cmd_extra_args}\n"""
+
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, NvmeofServiceSpec(service_id=pool,
+                                                                tgt_cmd_extra_args=tgt_cmd_extra_args,
+                                                                group=group,
+                                                                pool=pool)):
+                _run_cephadm.assert_called_with(
+                    'test',
+                    f'nvmeof.{nvmeof_daemon_id}',
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": "nvmeof.testpool.test.qwert",
+                        "image": "",
+                        "deploy_arguments": [],
+                        "params": {
+                            "tcp_ports": [5500, 4420, 8009]
+                        },
+                        "meta": {
+                            "service_name": "nvmeof.testpool",
+                            "ports": [5500, 4420, 8009],
+                            "ip": None,
+                            "deployed_by": [],
+                            "rank": None,
+                            "rank_generation": None,
+                            "extra_container_args": None,
+                            "extra_entrypoint_args": None
+                        },
+                        "config_blobs": {
+                            "config": "",
+                            "keyring": "[client.nvmeof.testpool.test.qwert]\nkey = None\n",
+                            "files": {
+                                "ceph-nvmeof.conf": nvmeof_gateway_conf
+                            }
+                        }
+                    }),
+                )
+
+
+class TestMonitoring:
+    def _get_config(self, url: str) -> str:
+
+        return f"""
+        # This file is generated by cephadm.
+        # See https://prometheus.io/docs/alerting/configuration/ for documentation.
+
+        global:
+          resolve_timeout: 5m
+          http_config:
+            tls_config:
+              insecure_skip_verify: true
+
+        route:
+          receiver: 'default'
+          routes:
+            - group_by: ['alertname']
+              group_wait: 10s
+              group_interval: 10s
+              repeat_interval: 1h
+              receiver: 'ceph-dashboard'
+
+        receivers:
+        - name: 'default'
+          webhook_configs:
+        - name: 'ceph-dashboard'
+          webhook_configs:
+          - url: '{url}/api/prometheus_receiver'
+        """
+
+    @pytest.mark.parametrize(
+        "dashboard_url,expected_yaml_url",
+        [
+            # loopback address
+            ("http://[::1]:8080", "http://localhost:8080"),
+            # IPv6
+            (
+                "http://[2001:db8:4321:0000:0000:0000:0000:0000]:8080",
+                "http://[2001:db8:4321:0000:0000:0000:0000:0000]:8080",
+            ),
+            # IPv6 to FQDN
+            (
+                "http://[2001:db8:4321:0000:0000:0000:0000:0000]:8080",
+                "http://mgr.fqdn.test:8080",
+            ),
+            # IPv4
+            (
+                "http://192.168.0.123:8080",
+                "http://192.168.0.123:8080",
+            ),
+            # IPv4 to FQDN
+            (
+                "http://192.168.0.123:8080",
+                "http://mgr.fqdn.test:8080",
+            ),
+        ],
+    )
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("mgr_module.MgrModule.get")
+    @patch("socket.getfqdn")
+    def test_alertmanager_config(
+        self,
+        mock_getfqdn,
+        mock_get,
+        _run_cephadm,
+        cephadm_module: CephadmOrchestrator,
+        dashboard_url,
+        expected_yaml_url,
+    ):
+        _run_cephadm.side_effect = async_side_effect(("{}", "", 0))
+        mock_get.return_value = {"services": {"dashboard": dashboard_url}}
+        purl = urllib.parse.urlparse(expected_yaml_url)
+        mock_getfqdn.return_value = purl.hostname
+
+        with with_host(cephadm_module, "test"):
+            with with_service(cephadm_module, AlertManagerSpec()):
+                y = dedent(self._get_config(expected_yaml_url)).lstrip()
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "alertmanager.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'alertmanager.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [9093, 9094],
+                        },
+                        "meta": {
+                            'service_name': 'alertmanager',
+                            'ports': [9093, 9094],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": {
+                            "files": {
+                                "alertmanager.yml": y,
+                            },
+                            "peers": [],
+                        }
+                    }),
+                )
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("socket.getfqdn")
+    @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
+    @patch("cephadm.services.monitoring.password_hash", lambda password: 'alertmanager_password_hash')
+    def test_alertmanager_config_security_enabled(self, _get_fqdn, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        fqdn = 'host1.test'
+        _get_fqdn.return_value = fqdn
+
+        def gen_cert(host, addr):
+            return ('mycert', 'mykey')
+
+        def get_root_cert():
+            return 'my_root_cert'
+
+        with with_host(cephadm_module, 'test'):
+            cephadm_module.secure_monitoring_stack = True
+            cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user')
+            cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password')
+            cephadm_module.http_server.service_discovery.ssl_certs.generate_cert = MagicMock(side_effect=gen_cert)
+            cephadm_module.http_server.service_discovery.ssl_certs.get_root_cert = MagicMock(side_effect=get_root_cert)
+            with with_service(cephadm_module, AlertManagerSpec()):
+
+                y = dedent(f"""
+                # This file is generated by cephadm.
+                # See https://prometheus.io/docs/alerting/configuration/ for documentation.
+
+                global:
+                  resolve_timeout: 5m
+                  http_config:
+                    tls_config:
+                      ca_file: root_cert.pem
+
+                route:
+                  receiver: 'default'
+                  routes:
+                    - group_by: ['alertname']
+                      group_wait: 10s
+                      group_interval: 10s
+                      repeat_interval: 1h
+                      receiver: 'ceph-dashboard'
+
+                receivers:
+                - name: 'default'
+                  webhook_configs:
+                - name: 'ceph-dashboard'
+                  webhook_configs:
+                  - url: 'http://{fqdn}:8080/api/prometheus_receiver'
+                """).lstrip()
+
+                web_config = dedent("""
+                tls_server_config:
+                  cert_file: alertmanager.crt
+                  key_file: alertmanager.key
+                basic_auth_users:
+                    alertmanager_user: alertmanager_password_hash""").lstrip()
+
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "alertmanager.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'alertmanager.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [9093, 9094],
+                        },
+                        "meta": {
+                            'service_name': 'alertmanager',
+                            'ports': [9093, 9094],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": {
+                            "files": {
+                                "alertmanager.yml": y,
+                                'alertmanager.crt': 'mycert',
+                                'alertmanager.key': 'mykey',
+                                'web.yml': web_config,
+                                'root_cert.pem': 'my_root_cert'
+                            },
+                            'peers': [],
+                            'web_config': '/etc/alertmanager/web.yml',
+                        }
+                    }),
+                )
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
+    def test_prometheus_config_security_disabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast')
+        with with_host(cephadm_module, 'test'):
+            # host "test" needs to have networks for keepalive to be placed
+            cephadm_module.cache.update_host_networks('test', {
+                '1.2.3.0/24': {
+                    'if0': ['1.2.3.1']
+                },
+            })
+            with with_service(cephadm_module, MonitoringSpec('node-exporter')) as _, \
+                    with_service(cephadm_module, CephExporterSpec('ceph-exporter')) as _, \
+                    with_service(cephadm_module, s) as _, \
+                    with_service(cephadm_module, AlertManagerSpec('alertmanager')) as _, \
+                    with_service(cephadm_module, IngressSpec(service_id='ingress',
+                                                             frontend_port=8089,
+                                                             monitor_port=8999,
+                                                             monitor_user='admin',
+                                                             monitor_password='12345',
+                                                             keepalived_password='12345',
+                                                             virtual_ip="1.2.3.4/32",
+                                                             backend_service='rgw.foo')) as _, \
+                    with_service(cephadm_module, PrometheusSpec('prometheus')) as _:
+
+                y = dedent("""
+                # This file is generated by cephadm.
+                global:
+                  scrape_interval: 10s
+                  evaluation_interval: 10s
+                rule_files:
+                  - /etc/prometheus/alerting/*
+
+                alerting:
+                  alertmanagers:
+                    - scheme: http
+                      http_sd_configs:
+                        - url: http://[::1]:8765/sd/prometheus/sd-config?service=alertmanager
+
+                scrape_configs:
+                  - job_name: 'ceph'
+                    honor_labels: true
+                    http_sd_configs:
+                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=mgr-prometheus
+
+                  - job_name: 'node'
+                    http_sd_configs:
+                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=node-exporter
+
+                  - job_name: 'haproxy'
+                    http_sd_configs:
+                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=haproxy
+
+                  - job_name: 'ceph-exporter'
+                    honor_labels: true
+                    http_sd_configs:
+                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter
+                """).lstrip()
+
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "prometheus.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'prometheus.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [9095],
+                        },
+                        "meta": {
+                            'service_name': 'prometheus',
+                            'ports': [9095],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": {
+                            "files": {
+                                "prometheus.yml": y,
+                                "/etc/prometheus/alerting/custom_alerts.yml": "",
+                            },
+                            'retention_time': '15d',
+                            'retention_size': '0',
+                        },
+                    }),
+                )
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
+    @patch("cephadm.services.monitoring.password_hash", lambda password: 'prometheus_password_hash')
+    def test_prometheus_config_security_enabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast')
+
+        def gen_cert(host, addr):
+            return ('mycert', 'mykey')
+
+        with with_host(cephadm_module, 'test'):
+            cephadm_module.secure_monitoring_stack = True
+            cephadm_module.set_store(PrometheusService.USER_CFG_KEY, 'prometheus_user')
+            cephadm_module.set_store(PrometheusService.PASS_CFG_KEY, 'prometheus_plain_password')
+            cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user')
+            cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password')
+            cephadm_module.http_server.service_discovery.username = 'sd_user'
+            cephadm_module.http_server.service_discovery.password = 'sd_password'
+            cephadm_module.http_server.service_discovery.ssl_certs.generate_cert = MagicMock(
+                side_effect=gen_cert)
+            # host "test" needs to have networks for keepalive to be placed
+            cephadm_module.cache.update_host_networks('test', {
+                '1.2.3.0/24': {
+                    'if0': ['1.2.3.1']
+                },
+            })
+            with with_service(cephadm_module, MonitoringSpec('node-exporter')) as _, \
+                    with_service(cephadm_module, s) as _, \
+                    with_service(cephadm_module, AlertManagerSpec('alertmanager')) as _, \
+                    with_service(cephadm_module, IngressSpec(service_id='ingress',
+                                                             frontend_port=8089,
+                                                             monitor_port=8999,
+                                                             monitor_user='admin',
+                                                             monitor_password='12345',
+                                                             keepalived_password='12345',
+                                                             virtual_ip="1.2.3.4/32",
+                                                             backend_service='rgw.foo')) as _, \
+                    with_service(cephadm_module, PrometheusSpec('prometheus')) as _:
+
+                web_config = dedent("""
+                tls_server_config:
+                  cert_file: prometheus.crt
+                  key_file: prometheus.key
+                basic_auth_users:
+                    prometheus_user: prometheus_password_hash""").lstrip()
+
+                y = dedent("""
+                # This file is generated by cephadm.
+                global:
+                  scrape_interval: 10s
+                  evaluation_interval: 10s
+                rule_files:
+                  - /etc/prometheus/alerting/*
+
+                alerting:
+                  alertmanagers:
+                    - scheme: https
+                      basic_auth:
+                        username: alertmanager_user
+                        password: alertmanager_plain_password
+                      tls_config:
+                        ca_file: root_cert.pem
+                      http_sd_configs:
+                        - url: https://[::1]:8765/sd/prometheus/sd-config?service=alertmanager
+                          basic_auth:
+                            username: sd_user
+                            password: sd_password
+                          tls_config:
+                            ca_file: root_cert.pem
+
+                scrape_configs:
+                  - job_name: 'ceph'
+                    scheme: https
+                    tls_config:
+                      ca_file: mgr_prometheus_cert.pem
+                    honor_labels: true
+                    http_sd_configs:
+                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=mgr-prometheus
+                      basic_auth:
+                        username: sd_user
+                        password: sd_password
+                      tls_config:
+                        ca_file: root_cert.pem
+
+                  - job_name: 'node'
+                    scheme: https
+                    tls_config:
+                      ca_file: root_cert.pem
+                    http_sd_configs:
+                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=node-exporter
+                      basic_auth:
+                        username: sd_user
+                        password: sd_password
+                      tls_config:
+                        ca_file: root_cert.pem
+
+                  - job_name: 'haproxy'
+                    scheme: https
+                    tls_config:
+                      ca_file: root_cert.pem
+                    http_sd_configs:
+                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=haproxy
+                      basic_auth:
+                        username: sd_user
+                        password: sd_password
+                      tls_config:
+                        ca_file: root_cert.pem
+
+                  - job_name: 'ceph-exporter'
+                    honor_labels: true
+                    scheme: https
+                    tls_config:
+                      ca_file: root_cert.pem
+                    http_sd_configs:
+                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter
+                      basic_auth:
+                        username: sd_user
+                        password: sd_password
+                      tls_config:
+                        ca_file: root_cert.pem
+                """).lstrip()
+
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "prometheus.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'prometheus.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [9095],
+                        },
+                        "meta": {
+                            'service_name': 'prometheus',
+                            'ports': [9095],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": {
+                            'files': {
+                                'prometheus.yml': y,
+                                'root_cert.pem': '',
+                                'mgr_prometheus_cert.pem': '',
+                                'web.yml': web_config,
+                                'prometheus.crt': 'mycert',
+                                'prometheus.key': 'mykey',
+                                "/etc/prometheus/alerting/custom_alerts.yml": "",
+                            },
+                            'retention_time': '15d',
+                            'retention_size': '0',
+                            'web_config': '/etc/prometheus/web.yml',
+                        },
+                    }),
+                )
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_loki_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, MonitoringSpec('loki')) as _:
+
+                y = dedent("""
+                # This file is generated by cephadm.
+                auth_enabled: false
+
+                server:
+                  http_listen_port: 3100
+                  grpc_listen_port: 8080
+
+                common:
+                  path_prefix: /tmp/loki
+                  storage:
+                    filesystem:
+                      chunks_directory: /tmp/loki/chunks
+                      rules_directory: /tmp/loki/rules
+                  replication_factor: 1
+                  ring:
+                    instance_addr: 127.0.0.1
+                    kvstore:
+                      store: inmemory
+
+                schema_config:
+                  configs:
+                    - from: 2020-10-24
+                      store: boltdb-shipper
+                      object_store: filesystem
+                      schema: v11
+                      index:
+                        prefix: index_
+                        period: 24h""").lstrip()
+
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "loki.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'loki.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [3100],
+                        },
+                        "meta": {
+                            'service_name': 'loki',
+                            'ports': [3100],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": {
+                            "files": {
+                                "loki.yml": y
+                            },
+                        },
+                    }),
+                )
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_promtail_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, ServiceSpec('mgr')) as _, \
+                    with_service(cephadm_module, MonitoringSpec('promtail')) as _:
+
+                y = dedent("""
+                # This file is generated by cephadm.
+                server:
+                  http_listen_port: 9080
+                  grpc_listen_port: 0
+
+                positions:
+                  filename: /tmp/positions.yaml
+
+                clients:
+                  - url: http://:3100/loki/api/v1/push
+
+                scrape_configs:
+                - job_name: system
+                  static_configs:
+                  - labels:
+                      job: Cluster Logs
+                      __path__: /var/log/ceph/**/*.log""").lstrip()
+
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "promtail.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'promtail.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [9080],
+                        },
+                        "meta": {
+                            'service_name': 'promtail',
+                            'ports': [9080],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": {
+                            "files": {
+                                "promtail.yml": y
+                            },
+                        },
+                    }),
+                )
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4')
+    @patch("cephadm.services.monitoring.verify_tls", lambda *_: None)
+    def test_grafana_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(("{}", "", 0))
+
+        with with_host(cephadm_module, "test"):
+            cephadm_module.set_store("test/grafana_crt", grafana_cert)
+            cephadm_module.set_store("test/grafana_key", grafana_key)
+            with with_service(
+                cephadm_module, PrometheusSpec("prometheus")
+            ) as _, with_service(cephadm_module, ServiceSpec("mgr")) as _, with_service(
+                cephadm_module, GrafanaSpec("grafana")
+            ) as _:
+                files = {
+                    'grafana.ini': dedent("""
+                        # This file is generated by cephadm.
+                        [users]
+                          default_theme = light
+                        [auth.anonymous]
+                          enabled = true
+                          org_name = 'Main Org.'
+                          org_role = 'Viewer'
+                        [server]
+                          domain = 'bootstrap.storage.lab'
+                          protocol = https
+                          cert_file = /etc/grafana/certs/cert_file
+                          cert_key = /etc/grafana/certs/cert_key
+                          http_port = 3000
+                          http_addr = 
+                        [snapshots]
+                          external_enabled = false
+                        [security]
+                          disable_initial_admin_creation = true
+                          cookie_secure = true
+                          cookie_samesite = none
+                          allow_embedding = true""").lstrip(),  # noqa: W291
+                    'provisioning/datasources/ceph-dashboard.yml': dedent("""
+                        # This file is generated by cephadm.
+                        apiVersion: 1
+
+                        deleteDatasources:
+                          - name: 'Dashboard1'
+                            orgId: 1
+
+                        datasources:
+                          - name: 'Dashboard1'
+                            type: 'prometheus'
+                            access: 'proxy'
+                            orgId: 1
+                            url: 'http://[1::4]:9095'
+                            basicAuth: false
+                            isDefault: true
+                            editable: false
+
+                          - name: 'Loki'
+                            type: 'loki'
+                            access: 'proxy'
+                            url: ''
+                            basicAuth: false
+                            isDefault: false
+                            editable: false""").lstrip(),
+                    'certs/cert_file': dedent(f"""
+                        # generated by cephadm\n{grafana_cert}""").lstrip(),
+                    'certs/cert_key': dedent(f"""
+                        # generated by cephadm\n{grafana_key}""").lstrip(),
+                }
+
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "grafana.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'grafana.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [3000],
+                        },
+                        "meta": {
+                            'service_name': 'grafana',
+                            'ports': [3000],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": {
+                            "files": files,
+                        },
+                    }),
+                )
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    def test_grafana_initial_admin_pw(self, cephadm_module: CephadmOrchestrator):
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, ServiceSpec('mgr')) as _, \
+                    with_service(cephadm_module, GrafanaSpec(initial_admin_password='secure')):
+                out = cephadm_module.cephadm_services['grafana'].generate_config(
+                    CephadmDaemonDeploySpec('test', 'daemon', 'grafana'))
+                assert out == (
+                    {
+                        'files':
+                            {
+                                'grafana.ini':
+                                    '# This file is generated by cephadm.\n'
+                                    '[users]\n'
+                                    '  default_theme = light\n'
+                                    '[auth.anonymous]\n'
+                                    '  enabled = true\n'
+                                    "  org_name = 'Main Org.'\n"
+                                    "  org_role = 'Viewer'\n"
+                                    '[server]\n'
+                                    "  domain = 'bootstrap.storage.lab'\n"
+                                    '  protocol = https\n'
+                                    '  cert_file = /etc/grafana/certs/cert_file\n'
+                                    '  cert_key = /etc/grafana/certs/cert_key\n'
+                                    '  http_port = 3000\n'
+                                    '  http_addr = \n'
+                                    '[snapshots]\n'
+                                    '  external_enabled = false\n'
+                                    '[security]\n'
+                                    '  admin_user = admin\n'
+                                    '  admin_password = secure\n'
+                                    '  cookie_secure = true\n'
+                                    '  cookie_samesite = none\n'
+                                    '  allow_embedding = true',
+                                'provisioning/datasources/ceph-dashboard.yml':
+                                    "# This file is generated by cephadm.\n"
+                                    "apiVersion: 1\n\n"
+                                    'deleteDatasources:\n\n'
+                                    'datasources:\n\n'
+                                    "  - name: 'Loki'\n"
+                                    "    type: 'loki'\n"
+                                    "    access: 'proxy'\n"
+                                    "    url: ''\n"
+                                    '    basicAuth: false\n'
+                                    '    isDefault: false\n'
+                                    '    editable: false',
+                                'certs/cert_file': ANY,
+                                'certs/cert_key': ANY}}, ['secure_monitoring_stack:False'])
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    def test_grafana_no_anon_access(self, cephadm_module: CephadmOrchestrator):
+        # with anonymous_access set to False, expecting the [auth.anonymous] section
+        # to not be present in the grafana config. Note that we require an initial_admin_password
+        # to be provided when anonymous_access is False
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, ServiceSpec('mgr')) as _, \
+                    with_service(cephadm_module, GrafanaSpec(anonymous_access=False, initial_admin_password='secure')):
+                out = cephadm_module.cephadm_services['grafana'].generate_config(
+                    CephadmDaemonDeploySpec('test', 'daemon', 'grafana'))
+                assert out == (
+                    {
+                        'files':
+                            {
+                                'grafana.ini':
+                                    '# This file is generated by cephadm.\n'
+                                    '[users]\n'
+                                    '  default_theme = light\n'
+                                    '[server]\n'
+                                    "  domain = 'bootstrap.storage.lab'\n"
+                                    '  protocol = https\n'
+                                    '  cert_file = /etc/grafana/certs/cert_file\n'
+                                    '  cert_key = /etc/grafana/certs/cert_key\n'
+                                    '  http_port = 3000\n'
+                                    '  http_addr = \n'
+                                    '[snapshots]\n'
+                                    '  external_enabled = false\n'
+                                    '[security]\n'
+                                    '  admin_user = admin\n'
+                                    '  admin_password = secure\n'
+                                    '  cookie_secure = true\n'
+                                    '  cookie_samesite = none\n'
+                                    '  allow_embedding = true',
+                                'provisioning/datasources/ceph-dashboard.yml':
+                                    "# This file is generated by cephadm.\n"
+                                    "apiVersion: 1\n\n"
+                                    'deleteDatasources:\n\n'
+                                    'datasources:\n\n'
+                                    "  - name: 'Loki'\n"
+                                    "    type: 'loki'\n"
+                                    "    access: 'proxy'\n"
+                                    "    url: ''\n"
+                                    '    basicAuth: false\n'
+                                    '    isDefault: false\n'
+                                    '    editable: false',
+                                'certs/cert_file': ANY,
+                                'certs/cert_key': ANY}}, ['secure_monitoring_stack:False'])
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_monitoring_ports(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        with with_host(cephadm_module, 'test'):
+
+            yaml_str = """service_type: alertmanager
+service_name: alertmanager
+placement:
+    count: 1
+spec:
+    port: 4200
+"""
+            yaml_file = yaml.safe_load(yaml_str)
+            spec = ServiceSpec.from_json(yaml_file)
+
+            with patch("cephadm.services.monitoring.AlertmanagerService.generate_config", return_value=({}, [])):
+                with with_service(cephadm_module, spec):
+
+                    CephadmServe(cephadm_module)._check_daemons()
+
+                    _run_cephadm.assert_called_with(
+                        'test',
+                        "alertmanager.test",
+                        ['_orch', 'deploy'],
+                        [],
+                        stdin=json.dumps({
+                            "fsid": "fsid",
+                            "name": 'alertmanager.test',
+                            "image": '',
+                            "deploy_arguments": [],
+                            "params": {
+                                'tcp_ports': [4200, 9094],
+                                'reconfig': True,
+                            },
+                            "meta": {
+                                'service_name': 'alertmanager',
+                                'ports': [4200, 9094],
+                                'ip': None,
+                                'deployed_by': [],
+                                'rank': None,
+                                'rank_generation': None,
+                                'extra_container_args': None,
+                                'extra_entrypoint_args': None,
+                            },
+                            "config_blobs": {},
+                        }),
+                    )
+
+
+class TestRGWService:
+
+    @pytest.mark.parametrize(
+        "frontend, ssl, extra_args, expected",
+        [
+            ('beast', False, ['tcp_nodelay=1'],
+             'beast endpoint=[fd00:fd00:fd00:3000::1]:80 tcp_nodelay=1'),
+            ('beast', True, ['tcp_nodelay=0', 'max_header_size=65536'],
+             'beast ssl_endpoint=[fd00:fd00:fd00:3000::1]:443 ssl_certificate=config://rgw/cert/rgw.foo tcp_nodelay=0 max_header_size=65536'),
+            ('civetweb', False, [], 'civetweb port=[fd00:fd00:fd00:3000::1]:80'),
+            ('civetweb', True, None,
+             'civetweb port=[fd00:fd00:fd00:3000::1]:443s ssl_certificate=config://rgw/cert/rgw.foo'),
+        ]
+    )
+    @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    def test_rgw_update(self, frontend, ssl, extra_args, expected, cephadm_module: CephadmOrchestrator):
+        with with_host(cephadm_module, 'host1'):
+            cephadm_module.cache.update_host_networks('host1', {
+                'fd00:fd00:fd00:3000::/64': {
+                    'if0': ['fd00:fd00:fd00:3000::1']
+                }
+            })
+            s = RGWSpec(service_id="foo",
+                        networks=['fd00:fd00:fd00:3000::/64'],
+                        ssl=ssl,
+                        rgw_frontend_type=frontend,
+                        rgw_frontend_extra_args=extra_args)
+            with with_service(cephadm_module, s) as dds:
+                _, f, _ = cephadm_module.check_mon_command({
+                    'prefix': 'config get',
+                    'who': f'client.{dds[0]}',
+                    'key': 'rgw_frontends',
+                })
+                assert f == expected
+
+
+class TestMonService:
+
+    def test_set_crush_locations(self, cephadm_module: CephadmOrchestrator):
+        mgr = FakeMgr()
+        mon_service = MonService(mgr)
+        mon_spec = ServiceSpec(service_type='mon', crush_locations={'vm-00': ['datacenter=a', 'rack=1'], 'vm-01': ['datacenter=a'], 'vm-02': ['datacenter=b', 'rack=3']})
+
+        mon_daemons = [
+            DaemonDescription(daemon_type='mon', daemon_id='vm-00', hostname='vm-00'),
+            DaemonDescription(daemon_type='mon', daemon_id='vm-01', hostname='vm-01'),
+            DaemonDescription(daemon_type='mon', daemon_id='vm-02', hostname='vm-02')
+        ]
+        mon_service.set_crush_locations(mon_daemons, mon_spec)
+        assert 'vm-00' in mgr.set_mon_crush_locations
+        assert mgr.set_mon_crush_locations['vm-00'] == ['datacenter=a', 'rack=1']
+        assert 'vm-01' in mgr.set_mon_crush_locations
+        assert mgr.set_mon_crush_locations['vm-01'] == ['datacenter=a']
+        assert 'vm-02' in mgr.set_mon_crush_locations
+        assert mgr.set_mon_crush_locations['vm-02'] == ['datacenter=b', 'rack=3']
+
+
+class TestSNMPGateway:
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_snmp_v2c_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        spec = SNMPGatewaySpec(
+            snmp_version='V2c',
+            snmp_destination='192.168.1.1:162',
+            credentials={
+                'snmp_community': 'public'
+            })
+
+        config = {
+            "destination": spec.snmp_destination,
+            "snmp_version": spec.snmp_version,
+            "snmp_community": spec.credentials.get('snmp_community')
+        }
+
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, spec):
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "snmp-gateway.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'snmp-gateway.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [9464],
+                        },
+                        "meta": {
+                            'service_name': 'snmp-gateway',
+                            'ports': [9464],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": config,
+                    }),
+                )
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_snmp_v2c_with_port(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        spec = SNMPGatewaySpec(
+            snmp_version='V2c',
+            snmp_destination='192.168.1.1:162',
+            credentials={
+                'snmp_community': 'public'
+            },
+            port=9465)
+
+        config = {
+            "destination": spec.snmp_destination,
+            "snmp_version": spec.snmp_version,
+            "snmp_community": spec.credentials.get('snmp_community')
+        }
+
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, spec):
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "snmp-gateway.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'snmp-gateway.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [9465],
+                        },
+                        "meta": {
+                            'service_name': 'snmp-gateway',
+                            'ports': [9465],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": config,
+                    }),
+                )
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_snmp_v3nopriv_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        spec = SNMPGatewaySpec(
+            snmp_version='V3',
+            snmp_destination='192.168.1.1:162',
+            engine_id='8000C53F00000000',
+            credentials={
+                'snmp_v3_auth_username': 'myuser',
+                'snmp_v3_auth_password': 'mypassword'
+            })
+
+        config = {
+            'destination': spec.snmp_destination,
+            'snmp_version': spec.snmp_version,
+            'snmp_v3_auth_protocol': 'SHA',
+            'snmp_v3_auth_username': 'myuser',
+            'snmp_v3_auth_password': 'mypassword',
+            'snmp_v3_engine_id': '8000C53F00000000'
+        }
+
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, spec):
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "snmp-gateway.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'snmp-gateway.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [9464],
+                        },
+                        "meta": {
+                            'service_name': 'snmp-gateway',
+                            'ports': [9464],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": config,
+                    }),
+                )
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_snmp_v3priv_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        spec = SNMPGatewaySpec(
+            snmp_version='V3',
+            snmp_destination='192.168.1.1:162',
+            engine_id='8000C53F00000000',
+            auth_protocol='MD5',
+            privacy_protocol='AES',
+            credentials={
+                'snmp_v3_auth_username': 'myuser',
+                'snmp_v3_auth_password': 'mypassword',
+                'snmp_v3_priv_password': 'mysecret',
+            })
+
+        config = {
+            'destination': spec.snmp_destination,
+            'snmp_version': spec.snmp_version,
+            'snmp_v3_auth_protocol': 'MD5',
+            'snmp_v3_auth_username': spec.credentials.get('snmp_v3_auth_username'),
+            'snmp_v3_auth_password': spec.credentials.get('snmp_v3_auth_password'),
+            'snmp_v3_engine_id': '8000C53F00000000',
+            'snmp_v3_priv_protocol': spec.privacy_protocol,
+            'snmp_v3_priv_password': spec.credentials.get('snmp_v3_priv_password'),
+        }
+
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, spec):
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "snmp-gateway.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'snmp-gateway.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [9464],
+                        },
+                        "meta": {
+                            'service_name': 'snmp-gateway',
+                            'ports': [9464],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": config,
+                    }),
+                )
+
+
+class TestIngressService:
+
+    @pytest.mark.parametrize(
+        "enable_haproxy_protocol",
+        [False, True],
+    )
+    @patch("cephadm.inventory.Inventory.get_addr")
+    @patch("cephadm.utils.resolve_ip")
+    @patch("cephadm.inventory.HostCache.get_daemons_by_service")
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_ingress_config_nfs_multiple_nfs_same_rank(
+        self,
+        _run_cephadm,
+        _get_daemons_by_service,
+        _resolve_ip, _get_addr,
+        cephadm_module: CephadmOrchestrator,
+        enable_haproxy_protocol: bool,
+    ):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        def fake_resolve_ip(hostname: str) -> str:
+            if hostname == 'host1':
+                return '192.168.122.111'
+            elif hostname == 'host2':
+                return '192.168.122.222'
+            else:
+                return 'xxx.xxx.xxx.xxx'
+        _resolve_ip.side_effect = fake_resolve_ip
+
+        def fake_get_addr(hostname: str) -> str:
+            return hostname
+        _get_addr.side_effect = fake_get_addr
+
+        nfs_service = NFSServiceSpec(
+            service_id="foo",
+            placement=PlacementSpec(
+                count=1,
+                hosts=['host1', 'host2']),
+            port=12049,
+            enable_haproxy_protocol=enable_haproxy_protocol,
+        )
+
+        ispec = IngressSpec(
+            service_type='ingress',
+            service_id='nfs.foo',
+            backend_service='nfs.foo',
+            frontend_port=2049,
+            monitor_port=9049,
+            virtual_ip='192.168.122.100/24',
+            monitor_user='admin',
+            monitor_password='12345',
+            keepalived_password='12345',
+            enable_haproxy_protocol=enable_haproxy_protocol,
+        )
+
+        cephadm_module.spec_store._specs = {
+            'nfs.foo': nfs_service,
+            'ingress.nfs.foo': ispec
+        }
+        cephadm_module.spec_store.spec_created = {
+            'nfs.foo': datetime_now(),
+            'ingress.nfs.foo': datetime_now()
+        }
+
+        # in both test cases we'll do here, we want only the ip
+        # for the host1 nfs daemon as we'll end up giving that
+        # one higher rank_generation but the same rank as the one
+        # on host2
+        haproxy_txt = (
+            '# This file is generated by cephadm.\n'
+            'global\n'
+            '    log         127.0.0.1 local2\n'
+            '    chroot      /var/lib/haproxy\n'
+            '    pidfile     /var/lib/haproxy/haproxy.pid\n'
+            '    maxconn     8000\n'
+            '    daemon\n'
+            '    stats socket /var/lib/haproxy/stats\n\n'
+            'defaults\n'
+            '    mode                    tcp\n'
+            '    log                     global\n'
+            '    timeout queue           1m\n'
+            '    timeout connect         10s\n'
+            '    timeout client          1m\n'
+            '    timeout server          1m\n'
+            '    timeout check           10s\n'
+            '    maxconn                 8000\n\n'
+            'frontend stats\n'
+            '    mode http\n'
+            '    bind 192.168.122.100:9049\n'
+            '    bind host1:9049\n'
+            '    stats enable\n'
+            '    stats uri /stats\n'
+            '    stats refresh 10s\n'
+            '    stats auth admin:12345\n'
+            '    http-request use-service prometheus-exporter if { path /metrics }\n'
+            '    monitor-uri /health\n\n'
+            'frontend frontend\n'
+            '    bind 192.168.122.100:2049\n'
+            '    default_backend backend\n\n'
+            'backend backend\n'
+            '    mode        tcp\n'
+            '    balance     source\n'
+            '    hash-type   consistent\n'
+        )
+        if enable_haproxy_protocol:
+            haproxy_txt += '    default-server send-proxy-v2\n'
+        haproxy_txt += '    server nfs.foo.0 192.168.122.111:12049\n'
+        haproxy_expected_conf = {
+            'files': {'haproxy.cfg': haproxy_txt}
+        }
+
+        # verify we get the same cfg regardless of the order in which the nfs daemons are returned
+        # in this case both nfs are rank 0, so it should only take the one with rank_generation 1 a.k.a
+        # the one on host1
+        nfs_daemons = [
+            DaemonDescription(daemon_type='nfs', daemon_id='foo.0.1.host1.qwerty', hostname='host1', rank=0, rank_generation=1, ports=[12049]),
+            DaemonDescription(daemon_type='nfs', daemon_id='foo.0.0.host2.abcdef', hostname='host2', rank=0, rank_generation=0, ports=[12049])
+        ]
+        _get_daemons_by_service.return_value = nfs_daemons
+
+        haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config(
+            CephadmDaemonDeploySpec(host='host1', daemon_id='ingress', service_name=ispec.service_name()))
+
+        assert haproxy_generated_conf[0] == haproxy_expected_conf
+
+        # swapping order now, should still pick out the one with the higher rank_generation
+        # in this case both nfs are rank 0, so it should only take the one with rank_generation 1 a.k.a
+        # the one on host1
+        nfs_daemons = [
+            DaemonDescription(daemon_type='nfs', daemon_id='foo.0.0.host2.abcdef', hostname='host2', rank=0, rank_generation=0, ports=[12049]),
+            DaemonDescription(daemon_type='nfs', daemon_id='foo.0.1.host1.qwerty', hostname='host1', rank=0, rank_generation=1, ports=[12049])
+        ]
+        _get_daemons_by_service.return_value = nfs_daemons
+
+        haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config(
+            CephadmDaemonDeploySpec(host='host1', daemon_id='ingress', service_name=ispec.service_name()))
+
+        assert haproxy_generated_conf[0] == haproxy_expected_conf
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_ingress_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        with with_host(cephadm_module, 'test', addr='1.2.3.7'):
+            cephadm_module.cache.update_host_networks('test', {
+                '1.2.3.0/24': {
+                    'if0': ['1.2.3.4']
+                }
+            })
+
+            # the ingress backend
+            s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1),
+                        rgw_frontend_type='beast')
+
+            ispec = IngressSpec(service_type='ingress',
+                                service_id='test',
+                                backend_service='rgw.foo',
+                                frontend_port=8089,
+                                monitor_port=8999,
+                                monitor_user='admin',
+                                monitor_password='12345',
+                                keepalived_password='12345',
+                                virtual_interface_networks=['1.2.3.0/24'],
+                                virtual_ip="1.2.3.4/32")
+            with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _:
+                # generate the keepalived conf based on the specified spec
+                keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config(
+                    CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+                keepalived_expected_conf = {
+                    'files':
+                        {
+                            'keepalived.conf':
+                                '# This file is generated by cephadm.\n'
+                                'vrrp_script check_backend {\n    '
+                                'script "/usr/bin/curl http://1.2.3.7:8999/health"\n    '
+                                'weight -20\n    '
+                                'interval 2\n    '
+                                'rise 2\n    '
+                                'fall 2\n}\n\n'
+                                'vrrp_instance VI_0 {\n  '
+                                'state MASTER\n  '
+                                'priority 100\n  '
+                                'interface if0\n  '
+                                'virtual_router_id 50\n  '
+                                'advert_int 1\n  '
+                                'authentication {\n      '
+                                'auth_type PASS\n      '
+                                'auth_pass 12345\n  '
+                                '}\n  '
+                                'unicast_src_ip 1.2.3.4\n  '
+                                'unicast_peer {\n  '
+                                '}\n  '
+                                'virtual_ipaddress {\n    '
+                                '1.2.3.4/32 dev if0\n  '
+                                '}\n  '
+                                'track_script {\n      '
+                                'check_backend\n  }\n'
+                                '}\n'
+                        }
+                }
+
+                # check keepalived config
+                assert keepalived_generated_conf[0] == keepalived_expected_conf
+
+                # generate the haproxy conf based on the specified spec
+                haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config(
+                    CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+                haproxy_expected_conf = {
+                    'files':
+                        {
+                            'haproxy.cfg':
+                                '# This file is generated by cephadm.'
+                                '\nglobal\n    log         '
+                                '127.0.0.1 local2\n    '
+                                'chroot      /var/lib/haproxy\n    '
+                                'pidfile     /var/lib/haproxy/haproxy.pid\n    '
+                                'maxconn     8000\n    '
+                                'daemon\n    '
+                                'stats socket /var/lib/haproxy/stats\n'
+                                '\ndefaults\n    '
+                                'mode                    http\n    '
+                                'log                     global\n    '
+                                'option                  httplog\n    '
+                                'option                  dontlognull\n    '
+                                'option http-server-close\n    '
+                                'option forwardfor       except 127.0.0.0/8\n    '
+                                'option                  redispatch\n    '
+                                'retries                 3\n    '
+                                'timeout queue           20s\n    '
+                                'timeout connect         5s\n    '
+                                'timeout http-request    1s\n    '
+                                'timeout http-keep-alive 5s\n    '
+                                'timeout client          30s\n    '
+                                'timeout server          30s\n    '
+                                'timeout check           5s\n    '
+                                'maxconn                 8000\n'
+                                '\nfrontend stats\n    '
+                                'mode http\n    '
+                                'bind 1.2.3.4:8999\n    '
+                                'bind 1.2.3.7:8999\n    '
+                                'stats enable\n    '
+                                'stats uri /stats\n    '
+                                'stats refresh 10s\n    '
+                                'stats auth admin:12345\n    '
+                                'http-request use-service prometheus-exporter if { path /metrics }\n    '
+                                'monitor-uri /health\n'
+                                '\nfrontend frontend\n    '
+                                'bind 1.2.3.4:8089\n    '
+                                'default_backend backend\n\n'
+                                'backend backend\n    '
+                                'option forwardfor\n    '
+                                'balance static-rr\n    '
+                                'option httpchk HEAD / HTTP/1.0\n    '
+                                'server '
+                                + haproxy_generated_conf[1][0] + ' 1.2.3.7:80 check weight 100\n'
+                        }
+                }
+
+                assert haproxy_generated_conf[0] == haproxy_expected_conf
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_ingress_config_ssl_rgw(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        with with_host(cephadm_module, 'test'):
+            cephadm_module.cache.update_host_networks('test', {
+                '1.2.3.0/24': {
+                    'if0': ['1.2.3.1']
+                }
+            })
+
+            # the ingress backend
+            s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1),
+                        rgw_frontend_type='beast', rgw_frontend_port=443, ssl=True)
+
+            ispec = IngressSpec(service_type='ingress',
+                                service_id='test',
+                                backend_service='rgw.foo',
+                                frontend_port=8089,
+                                monitor_port=8999,
+                                monitor_user='admin',
+                                monitor_password='12345',
+                                keepalived_password='12345',
+                                virtual_interface_networks=['1.2.3.0/24'],
+                                virtual_ip="1.2.3.4/32")
+            with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _:
+                # generate the keepalived conf based on the specified spec
+                keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config(
+                    CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+                keepalived_expected_conf = {
+                    'files':
+                        {
+                            'keepalived.conf':
+                                '# This file is generated by cephadm.\n'
+                                'vrrp_script check_backend {\n    '
+                                'script "/usr/bin/curl http://[1::4]:8999/health"\n    '
+                                'weight -20\n    '
+                                'interval 2\n    '
+                                'rise 2\n    '
+                                'fall 2\n}\n\n'
+                                'vrrp_instance VI_0 {\n  '
+                                'state MASTER\n  '
+                                'priority 100\n  '
+                                'interface if0\n  '
+                                'virtual_router_id 50\n  '
+                                'advert_int 1\n  '
+                                'authentication {\n      '
+                                'auth_type PASS\n      '
+                                'auth_pass 12345\n  '
+                                '}\n  '
+                                'unicast_src_ip 1.2.3.1\n  '
+                                'unicast_peer {\n  '
+                                '}\n  '
+                                'virtual_ipaddress {\n    '
+                                '1.2.3.4/32 dev if0\n  '
+                                '}\n  '
+                                'track_script {\n      '
+                                'check_backend\n  }\n'
+                                '}\n'
+                        }
+                }
+
+                # check keepalived config
+                assert keepalived_generated_conf[0] == keepalived_expected_conf
+
+                # generate the haproxy conf based on the specified spec
+                haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config(
+                    CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+                haproxy_expected_conf = {
+                    'files':
+                        {
+                            'haproxy.cfg':
+                                '# This file is generated by cephadm.'
+                                '\nglobal\n    log         '
+                                '127.0.0.1 local2\n    '
+                                'chroot      /var/lib/haproxy\n    '
+                                'pidfile     /var/lib/haproxy/haproxy.pid\n    '
+                                'maxconn     8000\n    '
+                                'daemon\n    '
+                                'stats socket /var/lib/haproxy/stats\n'
+                                '\ndefaults\n    '
+                                'mode                    http\n    '
+                                'log                     global\n    '
+                                'option                  httplog\n    '
+                                'option                  dontlognull\n    '
+                                'option http-server-close\n    '
+                                'option forwardfor       except 127.0.0.0/8\n    '
+                                'option                  redispatch\n    '
+                                'retries                 3\n    '
+                                'timeout queue           20s\n    '
+                                'timeout connect         5s\n    '
+                                'timeout http-request    1s\n    '
+                                'timeout http-keep-alive 5s\n    '
+                                'timeout client          30s\n    '
+                                'timeout server          30s\n    '
+                                'timeout check           5s\n    '
+                                'maxconn                 8000\n'
+                                '\nfrontend stats\n    '
+                                'mode http\n    '
+                                'bind 1.2.3.4:8999\n    '
+                                'bind 1::4:8999\n    '
+                                'stats enable\n    '
+                                'stats uri /stats\n    '
+                                'stats refresh 10s\n    '
+                                'stats auth admin:12345\n    '
+                                'http-request use-service prometheus-exporter if { path /metrics }\n    '
+                                'monitor-uri /health\n'
+                                '\nfrontend frontend\n    '
+                                'bind 1.2.3.4:8089\n    '
+                                'default_backend backend\n\n'
+                                'backend backend\n    '
+                                'option forwardfor\n    '
+                                'default-server ssl\n    '
+                                'default-server verify none\n    '
+                                'balance static-rr\n    '
+                                'option httpchk HEAD / HTTP/1.0\n    '
+                                'server '
+                                + haproxy_generated_conf[1][0] + ' 1::4:443 check weight 100\n'
+                        }
+                }
+
+                assert haproxy_generated_conf[0] == haproxy_expected_conf
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_ingress_config_multi_vips(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        with with_host(cephadm_module, 'test', addr='1.2.3.7'):
+            cephadm_module.cache.update_host_networks('test', {
+                '1.2.3.0/24': {
+                    'if0': ['1.2.3.1']
+                }
+            })
+
+            # Check the ingress with multiple VIPs
+            s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1),
+                        rgw_frontend_type='beast')
+
+            ispec = IngressSpec(service_type='ingress',
+                                service_id='test',
+                                backend_service='rgw.foo',
+                                frontend_port=8089,
+                                monitor_port=8999,
+                                monitor_user='admin',
+                                monitor_password='12345',
+                                keepalived_password='12345',
+                                virtual_interface_networks=['1.2.3.0/24'],
+                                virtual_ips_list=["1.2.3.4/32"])
+            with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _:
+                # generate the keepalived conf based on the specified spec
+                # Test with only 1 IP on the list, as it will fail with more VIPS but only one host.
+                keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config(
+                    CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+                keepalived_expected_conf = {
+                    'files':
+                        {
+                            'keepalived.conf':
+                                '# This file is generated by cephadm.\n'
+                                'vrrp_script check_backend {\n    '
+                                'script "/usr/bin/curl http://1.2.3.7:8999/health"\n    '
+                                'weight -20\n    '
+                                'interval 2\n    '
+                                'rise 2\n    '
+                                'fall 2\n}\n\n'
+                                'vrrp_instance VI_0 {\n  '
+                                'state MASTER\n  '
+                                'priority 100\n  '
+                                'interface if0\n  '
+                                'virtual_router_id 50\n  '
+                                'advert_int 1\n  '
+                                'authentication {\n      '
+                                'auth_type PASS\n      '
+                                'auth_pass 12345\n  '
+                                '}\n  '
+                                'unicast_src_ip 1.2.3.1\n  '
+                                'unicast_peer {\n  '
+                                '}\n  '
+                                'virtual_ipaddress {\n    '
+                                '1.2.3.4/32 dev if0\n  '
+                                '}\n  '
+                                'track_script {\n      '
+                                'check_backend\n  }\n'
+                                '}\n'
+                        }
+                }
+
+                # check keepalived config
+                assert keepalived_generated_conf[0] == keepalived_expected_conf
+
+                # generate the haproxy conf based on the specified spec
+                haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config(
+                    CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+                haproxy_expected_conf = {
+                    'files':
+                        {
+                            'haproxy.cfg':
+                                '# This file is generated by cephadm.'
+                                '\nglobal\n    log         '
+                                '127.0.0.1 local2\n    '
+                                'chroot      /var/lib/haproxy\n    '
+                                'pidfile     /var/lib/haproxy/haproxy.pid\n    '
+                                'maxconn     8000\n    '
+                                'daemon\n    '
+                                'stats socket /var/lib/haproxy/stats\n'
+                                '\ndefaults\n    '
+                                'mode                    http\n    '
+                                'log                     global\n    '
+                                'option                  httplog\n    '
+                                'option                  dontlognull\n    '
+                                'option http-server-close\n    '
+                                'option forwardfor       except 127.0.0.0/8\n    '
+                                'option                  redispatch\n    '
+                                'retries                 3\n    '
+                                'timeout queue           20s\n    '
+                                'timeout connect         5s\n    '
+                                'timeout http-request    1s\n    '
+                                'timeout http-keep-alive 5s\n    '
+                                'timeout client          30s\n    '
+                                'timeout server          30s\n    '
+                                'timeout check           5s\n    '
+                                'maxconn                 8000\n'
+                                '\nfrontend stats\n    '
+                                'mode http\n    '
+                                'bind *:8999\n    '
+                                'bind 1.2.3.7:8999\n    '
+                                'stats enable\n    '
+                                'stats uri /stats\n    '
+                                'stats refresh 10s\n    '
+                                'stats auth admin:12345\n    '
+                                'http-request use-service prometheus-exporter if { path /metrics }\n    '
+                                'monitor-uri /health\n'
+                                '\nfrontend frontend\n    '
+                                'bind *:8089\n    '
+                                'default_backend backend\n\n'
+                                'backend backend\n    '
+                                'option forwardfor\n    '
+                                'balance static-rr\n    '
+                                'option httpchk HEAD / HTTP/1.0\n    '
+                                'server '
+                                + haproxy_generated_conf[1][0] + ' 1.2.3.7:80 check weight 100\n'
+                        }
+                }
+
+                assert haproxy_generated_conf[0] == haproxy_expected_conf
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_haproxy_port_ips(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        with with_host(cephadm_module, 'test', addr='1.2.3.7'):
+            cephadm_module.cache.update_host_networks('test', {
+                '1.2.3.0/24': {
+                    'if0': ['1.2.3.4/32']
+                }
+            })
+
+            # Check the ingress with multiple VIPs
+            s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1),
+                        rgw_frontend_type='beast')
+
+            ip = '1.2.3.100'
+            frontend_port = 8089
+
+            ispec = IngressSpec(service_type='ingress',
+                                service_id='test',
+                                backend_service='rgw.foo',
+                                frontend_port=frontend_port,
+                                monitor_port=8999,
+                                monitor_user='admin',
+                                monitor_password='12345',
+                                keepalived_password='12345',
+                                virtual_ip=f"{ip}/24")
+            with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _:
+                # generate the haproxy conf based on the specified spec
+                haproxy_daemon_spec = cephadm_module.cephadm_services['ingress'].prepare_create(
+                    CephadmDaemonDeploySpec(
+                        host='test',
+                        daemon_type='haproxy',
+                        daemon_id='ingress',
+                        service_name=ispec.service_name()))
+
+                assert haproxy_daemon_spec.port_ips == {str(frontend_port): ip}
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_keepalive_config_multi_interface_vips(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        with with_host(cephadm_module, 'test', addr='1.2.3.1'):
+            with with_host(cephadm_module, 'test2', addr='1.2.3.2'):
+                cephadm_module.cache.update_host_networks('test', {
+                    '1.2.3.0/24': {
+                        'if0': ['1.2.3.1']
+                    },
+                    '100.100.100.0/24': {
+                        'if1': ['100.100.100.1']
+                    }
+                })
+                cephadm_module.cache.update_host_networks('test2', {
+                    '1.2.3.0/24': {
+                        'if0': ['1.2.3.2']
+                    },
+                    '100.100.100.0/24': {
+                        'if1': ['100.100.100.2']
+                    }
+                })
+
+                # Check the ingress with multiple VIPs
+                s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1),
+                            rgw_frontend_type='beast')
+
+                ispec = IngressSpec(service_type='ingress',
+                                    service_id='test',
+                                    placement=PlacementSpec(hosts=['test', 'test2']),
+                                    backend_service='rgw.foo',
+                                    frontend_port=8089,
+                                    monitor_port=8999,
+                                    monitor_user='admin',
+                                    monitor_password='12345',
+                                    keepalived_password='12345',
+                                    virtual_ips_list=["1.2.3.100/24", "100.100.100.100/24"])
+                with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _:
+                    keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config(
+                        CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+                    keepalived_expected_conf = {
+                        'files':
+                            {
+                                'keepalived.conf':
+                                    '# This file is generated by cephadm.\n'
+                                    'vrrp_script check_backend {\n    '
+                                    'script "/usr/bin/curl http://1.2.3.1:8999/health"\n    '
+                                    'weight -20\n    '
+                                    'interval 2\n    '
+                                    'rise 2\n    '
+                                    'fall 2\n}\n\n'
+                                    'vrrp_instance VI_0 {\n  '
+                                    'state MASTER\n  '
+                                    'priority 100\n  '
+                                    'interface if0\n  '
+                                    'virtual_router_id 50\n  '
+                                    'advert_int 1\n  '
+                                    'authentication {\n      '
+                                    'auth_type PASS\n      '
+                                    'auth_pass 12345\n  '
+                                    '}\n  '
+                                    'unicast_src_ip 1.2.3.1\n  '
+                                    'unicast_peer {\n    '
+                                    '1.2.3.2\n  '
+                                    '}\n  '
+                                    'virtual_ipaddress {\n    '
+                                    '1.2.3.100/24 dev if0\n  '
+                                    '}\n  '
+                                    'track_script {\n      '
+                                    'check_backend\n  }\n'
+                                    '}\n'
+                                    'vrrp_instance VI_1 {\n  '
+                                    'state BACKUP\n  '
+                                    'priority 90\n  '
+                                    'interface if1\n  '
+                                    'virtual_router_id 51\n  '
+                                    'advert_int 1\n  '
+                                    'authentication {\n      '
+                                    'auth_type PASS\n      '
+                                    'auth_pass 12345\n  '
+                                    '}\n  '
+                                    'unicast_src_ip 100.100.100.1\n  '
+                                    'unicast_peer {\n    '
+                                    '100.100.100.2\n  '
+                                    '}\n  '
+                                    'virtual_ipaddress {\n    '
+                                    '100.100.100.100/24 dev if1\n  '
+                                    '}\n  '
+                                    'track_script {\n      '
+                                    'check_backend\n  }\n'
+                                    '}\n'
+                            }
+                    }
+
+                    # check keepalived config
+                    assert keepalived_generated_conf[0] == keepalived_expected_conf
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_keepalive_interface_host_filtering(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        # we need to make sure keepalive daemons will have an interface
+        # on the hosts we deploy them on in order to set up their VIP.
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        with with_host(cephadm_module, 'test', addr='1.2.3.1'):
+            with with_host(cephadm_module, 'test2', addr='1.2.3.2'):
+                with with_host(cephadm_module, 'test3', addr='1.2.3.3'):
+                    with with_host(cephadm_module, 'test4', addr='1.2.3.3'):
+                        # setup "test" and "test4" to have all the necessary interfaces,
+                        # "test2" to have one of them (should still be filtered)
+                        # and "test3" to have none of them
+                        cephadm_module.cache.update_host_networks('test', {
+                            '1.2.3.0/24': {
+                                'if0': ['1.2.3.1']
+                            },
+                            '100.100.100.0/24': {
+                                'if1': ['100.100.100.1']
+                            }
+                        })
+                        cephadm_module.cache.update_host_networks('test2', {
+                            '1.2.3.0/24': {
+                                'if0': ['1.2.3.2']
+                            },
+                        })
+                        cephadm_module.cache.update_host_networks('test4', {
+                            '1.2.3.0/24': {
+                                'if0': ['1.2.3.4']
+                            },
+                            '100.100.100.0/24': {
+                                'if1': ['100.100.100.4']
+                            }
+                        })
+
+                        s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1),
+                                    rgw_frontend_type='beast')
+
+                        ispec = IngressSpec(service_type='ingress',
+                                            service_id='test',
+                                            placement=PlacementSpec(hosts=['test', 'test2', 'test3', 'test4']),
+                                            backend_service='rgw.foo',
+                                            frontend_port=8089,
+                                            monitor_port=8999,
+                                            monitor_user='admin',
+                                            monitor_password='12345',
+                                            keepalived_password='12345',
+                                            virtual_ips_list=["1.2.3.100/24", "100.100.100.100/24"])
+                        with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _:
+                            # since we're never actually going to refresh the host here,
+                            # check the tmp daemons to see what was placed during the apply
+                            daemons = cephadm_module.cache._get_tmp_daemons()
+                            keepalive_daemons = [d for d in daemons if d.daemon_type == 'keepalived']
+                            hosts_deployed_on = [d.hostname for d in keepalive_daemons]
+                            assert 'test' in hosts_deployed_on
+                            assert 'test2' not in hosts_deployed_on
+                            assert 'test3' not in hosts_deployed_on
+                            assert 'test4' in hosts_deployed_on
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("cephadm.services.nfs.NFSService.fence_old_ranks", MagicMock())
+    @patch("cephadm.services.nfs.NFSService.run_grace_tool", MagicMock())
+    @patch("cephadm.services.nfs.NFSService.purge", MagicMock())
+    @patch("cephadm.services.nfs.NFSService.create_rados_config_obj", MagicMock())
+    def test_keepalive_only_nfs_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        with with_host(cephadm_module, 'test', addr='1.2.3.7'):
+            cephadm_module.cache.update_host_networks('test', {
+                '1.2.3.0/24': {
+                    'if0': ['1.2.3.1']
+                }
+            })
+
+            # Check the ingress with multiple VIPs
+            s = NFSServiceSpec(service_id="foo", placement=PlacementSpec(count=1),
+                               virtual_ip='1.2.3.0/24')
+
+            ispec = IngressSpec(service_type='ingress',
+                                service_id='test',
+                                backend_service='nfs.foo',
+                                monitor_port=8999,
+                                monitor_user='admin',
+                                monitor_password='12345',
+                                keepalived_password='12345',
+                                virtual_ip='1.2.3.0/24',
+                                keepalive_only=True)
+            with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _:
+                nfs_generated_conf, _ = cephadm_module.cephadm_services['nfs'].generate_config(
+                    CephadmDaemonDeploySpec(host='test', daemon_id='foo.test.0.0', service_name=s.service_name()))
+                ganesha_conf = nfs_generated_conf['files']['ganesha.conf']
+                assert "Bind_addr = 1.2.3.0/24" in ganesha_conf
+
+                keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config(
+                    CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+                keepalived_expected_conf = {
+                    'files':
+                        {
+                            'keepalived.conf':
+                                '# This file is generated by cephadm.\n'
+                                'vrrp_script check_backend {\n    '
+                                'script "/usr/bin/false"\n    '
+                                'weight -20\n    '
+                                'interval 2\n    '
+                                'rise 2\n    '
+                                'fall 2\n}\n\n'
+                                'vrrp_instance VI_0 {\n  '
+                                'state MASTER\n  '
+                                'priority 100\n  '
+                                'interface if0\n  '
+                                'virtual_router_id 50\n  '
+                                'advert_int 1\n  '
+                                'authentication {\n      '
+                                'auth_type PASS\n      '
+                                'auth_pass 12345\n  '
+                                '}\n  '
+                                'unicast_src_ip 1.2.3.1\n  '
+                                'unicast_peer {\n  '
+                                '}\n  '
+                                'virtual_ipaddress {\n    '
+                                '1.2.3.0/24 dev if0\n  '
+                                '}\n  '
+                                'track_script {\n      '
+                                'check_backend\n  }\n'
+                                '}\n'
+                        }
+                }
+
+                # check keepalived config
+                assert keepalived_generated_conf[0] == keepalived_expected_conf
+
+    @patch("cephadm.services.nfs.NFSService.fence_old_ranks", MagicMock())
+    @patch("cephadm.services.nfs.NFSService.run_grace_tool", MagicMock())
+    @patch("cephadm.services.nfs.NFSService.purge", MagicMock())
+    @patch("cephadm.services.nfs.NFSService.create_rados_config_obj", MagicMock())
+    @patch("cephadm.inventory.Inventory.keys")
+    @patch("cephadm.inventory.Inventory.get_addr")
+    @patch("cephadm.utils.resolve_ip")
+    @patch("cephadm.inventory.HostCache.get_daemons_by_service")
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_ingress_config_nfs_proxy_protocol(
+        self,
+        _run_cephadm,
+        _get_daemons_by_service,
+        _resolve_ip,
+        _get_addr,
+        _inventory_keys,
+        cephadm_module: CephadmOrchestrator,
+    ):
+        """Verify that setting enable_haproxy_protocol for both ingress and
+        nfs services sets the desired configuration parameters in both
+        the haproxy config and nfs ganesha config.
+        """
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        def fake_resolve_ip(hostname: str) -> str:
+            if hostname in ('host1', "192.168.122.111"):
+                return '192.168.122.111'
+            elif hostname in ('host2', '192.168.122.222'):
+                return '192.168.122.222'
+            else:
+                raise KeyError(hostname)
+        _resolve_ip.side_effect = fake_resolve_ip
+        _get_addr.side_effect = fake_resolve_ip
+
+        def fake_keys():
+            return ['host1', 'host2']
+        _inventory_keys.side_effect = fake_keys
+
+        nfs_service = NFSServiceSpec(
+            service_id="foo",
+            placement=PlacementSpec(
+                count=1,
+                hosts=['host1', 'host2']),
+            port=12049,
+            enable_haproxy_protocol=True,
+        )
+
+        ispec = IngressSpec(
+            service_type='ingress',
+            service_id='nfs.foo',
+            backend_service='nfs.foo',
+            frontend_port=2049,
+            monitor_port=9049,
+            virtual_ip='192.168.122.100/24',
+            monitor_user='admin',
+            monitor_password='12345',
+            keepalived_password='12345',
+            enable_haproxy_protocol=True,
+        )
+
+        cephadm_module.spec_store._specs = {
+            'nfs.foo': nfs_service,
+            'ingress.nfs.foo': ispec
+        }
+        cephadm_module.spec_store.spec_created = {
+            'nfs.foo': datetime_now(),
+            'ingress.nfs.foo': datetime_now()
+        }
+
+        haproxy_txt = (
+            '# This file is generated by cephadm.\n'
+            'global\n'
+            '    log         127.0.0.1 local2\n'
+            '    chroot      /var/lib/haproxy\n'
+            '    pidfile     /var/lib/haproxy/haproxy.pid\n'
+            '    maxconn     8000\n'
+            '    daemon\n'
+            '    stats socket /var/lib/haproxy/stats\n\n'
+            'defaults\n'
+            '    mode                    tcp\n'
+            '    log                     global\n'
+            '    timeout queue           1m\n'
+            '    timeout connect         10s\n'
+            '    timeout client          1m\n'
+            '    timeout server          1m\n'
+            '    timeout check           10s\n'
+            '    maxconn                 8000\n\n'
+            'frontend stats\n'
+            '    mode http\n'
+            '    bind 192.168.122.100:9049\n'
+            '    bind 192.168.122.111:9049\n'
+            '    stats enable\n'
+            '    stats uri /stats\n'
+            '    stats refresh 10s\n'
+            '    stats auth admin:12345\n'
+            '    http-request use-service prometheus-exporter if { path /metrics }\n'
+            '    monitor-uri /health\n\n'
+            'frontend frontend\n'
+            '    bind 192.168.122.100:2049\n'
+            '    default_backend backend\n\n'
+            'backend backend\n'
+            '    mode        tcp\n'
+            '    balance     source\n'
+            '    hash-type   consistent\n'
+            '    default-server send-proxy-v2\n'
+            '    server nfs.foo.0 192.168.122.111:12049\n'
+        )
+        haproxy_expected_conf = {
+            'files': {'haproxy.cfg': haproxy_txt}
+        }
+
+        nfs_ganesha_txt = (
+            "# This file is generated by cephadm.\n"
+            'NFS_CORE_PARAM {\n'
+            '        Enable_NLM = false;\n'
+            '        Enable_RQUOTA = false;\n'
+            '        Protocols = 4;\n'
+            '        NFS_Port = 2049;\n'
+            '        HAProxy_Hosts = 192.168.122.111, 10.10.2.20, 192.168.122.222;\n'
+            '}\n'
+            '\n'
+            'NFSv4 {\n'
+            '        Delegations = false;\n'
+            "        RecoveryBackend = 'rados_cluster';\n"
+            '        Minor_Versions = 1, 2;\n'
+            '}\n'
+            '\n'
+            'RADOS_KV {\n'
+            '        UserId = "nfs.foo.test.0.0";\n'
+            '        nodeid = "nfs.foo.None";\n'
+            '        pool = ".nfs";\n'
+            '        namespace = "foo";\n'
+            '}\n'
+            '\n'
+            'RADOS_URLS {\n'
+            '        UserId = "nfs.foo.test.0.0";\n'
+            '        watch_url = '
+            '"rados://.nfs/foo/conf-nfs.foo";\n'
+            '}\n'
+            '\n'
+            'RGW {\n'
+            '        cluster = "ceph";\n'
+            '        name = "client.nfs.foo.test.0.0-rgw";\n'
+            '}\n'
+            '\n'
+            "%url    rados://.nfs/foo/conf-nfs.foo"
+        )
+        nfs_expected_conf = {
+            'files': {'ganesha.conf': nfs_ganesha_txt},
+            'config': '',
+            'extra_args': ['-N', 'NIV_EVENT'],
+            'keyring': (
+                '[client.nfs.foo.test.0.0]\n'
+                'key = None\n'
+            ),
+            'namespace': 'foo',
+            'pool': '.nfs',
+            'rgw': {
+                'cluster': 'ceph',
+                'keyring': (
+                    '[client.nfs.foo.test.0.0-rgw]\n'
+                    'key = None\n'
+                ),
+                'user': 'nfs.foo.test.0.0-rgw',
+            },
+            'userid': 'nfs.foo.test.0.0',
+        }
+
+        nfs_daemons = [
+            DaemonDescription(
+                daemon_type='nfs',
+                daemon_id='foo.0.1.host1.qwerty',
+                hostname='host1',
+                rank=0,
+                rank_generation=1,
+                ports=[12049],
+            ),
+            DaemonDescription(
+                daemon_type='nfs',
+                daemon_id='foo.0.0.host2.abcdef',
+                hostname='host2',
+                rank=0,
+                rank_generation=0,
+                ports=[12049],
+            ),
+        ]
+        _get_daemons_by_service.return_value = nfs_daemons
+
+        ingress_svc = cephadm_module.cephadm_services['ingress']
+        nfs_svc = cephadm_module.cephadm_services['nfs']
+
+        # add host network info to one host to test the behavior of
+        # adding all known-good addresses of the host to the list.
+        cephadm_module.cache.update_host_networks('host1', {
+            # this one is additional
+            '10.10.2.0/24': {
+                'eth1': ['10.10.2.20']
+            },
+            # this is redundant and will be skipped
+            '192.168.122.0/24': {
+                'eth0': ['192.168.122.111']
+            },
+            # this is a link-local address and will be ignored
+            "fe80::/64": {
+                "veth0": [
+                    "fe80::8cf5:25ff:fe1c:d963"
+                ],
+                "eth0": [
+                    "fe80::c7b:cbff:fef6:7370"
+                ],
+                "eth1": [
+                    "fe80::7201:25a7:390b:d9a7"
+                ]
+            },
+        })
+
+        haproxy_generated_conf, _ = ingress_svc.haproxy_generate_config(
+            CephadmDaemonDeploySpec(
+                host='host1',
+                daemon_id='ingress',
+                service_name=ispec.service_name(),
+            ),
+        )
+        assert haproxy_generated_conf == haproxy_expected_conf
+
+        nfs_generated_conf, _ = nfs_svc.generate_config(
+            CephadmDaemonDeploySpec(
+                host='test',
+                daemon_id='foo.test.0.0',
+                service_name=nfs_service.service_name(),
+            ),
+        )
+        assert nfs_generated_conf == nfs_expected_conf
+
+
+class TestCephFsMirror:
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, ServiceSpec('cephfs-mirror')):
+                cephadm_module.assert_issued_mon_command({
+                    'prefix': 'mgr module enable',
+                    'module': 'mirroring'
+                })
+
+
+class TestJaeger:
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_jaeger_query(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        spec = TracingSpec(es_nodes="192.168.0.1:9200",
+                           service_type="jaeger-query")
+
+        config = {"elasticsearch_nodes": "http://192.168.0.1:9200"}
+
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, spec):
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "jaeger-query.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'jaeger-query.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [16686],
+                        },
+                        "meta": {
+                            'service_name': 'jaeger-query',
+                            'ports': [16686],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": config,
+                    }),
+                )
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_jaeger_collector_es_deploy(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        collector_spec = TracingSpec(service_type="jaeger-collector")
+        es_spec = TracingSpec(service_type="elasticsearch")
+        es_config = {}
+
+        with with_host(cephadm_module, 'test'):
+            collector_config = {
+                "elasticsearch_nodes": f'http://{build_url(host=cephadm_module.inventory.get_addr("test"), port=9200).lstrip("/")}'}
+            with with_service(cephadm_module, es_spec):
+                _run_cephadm.assert_called_with(
+                    "test",
+                    "elasticsearch.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'elasticsearch.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [9200],
+                        },
+                        "meta": {
+                            'service_name': 'elasticsearch',
+                            'ports': [9200],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": es_config,
+                    }),
+                )
+                with with_service(cephadm_module, collector_spec):
+                    _run_cephadm.assert_called_with(
+                        "test",
+                        "jaeger-collector.test",
+                        ['_orch', 'deploy'],
+                        [],
+                        stdin=json.dumps({
+                            "fsid": "fsid",
+                            "name": 'jaeger-collector.test',
+                            "image": '',
+                            "deploy_arguments": [],
+                            "params": {
+                                'tcp_ports': [14250],
+                            },
+                            "meta": {
+                                'service_name': 'jaeger-collector',
+                                'ports': [14250],
+                                'ip': None,
+                                'deployed_by': [],
+                                'rank': None,
+                                'rank_generation': None,
+                                'extra_container_args': None,
+                                'extra_entrypoint_args': None,
+                            },
+                            "config_blobs": collector_config,
+                        }),
+                    )
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_jaeger_agent(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        collector_spec = TracingSpec(service_type="jaeger-collector", es_nodes="192.168.0.1:9200")
+        collector_config = {"elasticsearch_nodes": "http://192.168.0.1:9200"}
+
+        agent_spec = TracingSpec(service_type="jaeger-agent")
+        agent_config = {"collector_nodes": "test:14250"}
+
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, collector_spec):
+                _run_cephadm.assert_called_with(
+                    "test",
+                    "jaeger-collector.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'jaeger-collector.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [14250],
+                        },
+                        "meta": {
+                            'service_name': 'jaeger-collector',
+                            'ports': [14250],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": collector_config,
+                    }),
+                )
+                with with_service(cephadm_module, agent_spec):
+                    _run_cephadm.assert_called_with(
+                        "test",
+                        "jaeger-agent.test",
+                        ['_orch', 'deploy'],
+                        [],
+                        stdin=json.dumps({
+                            "fsid": "fsid",
+                            "name": 'jaeger-agent.test',
+                            "image": '',
+                            "deploy_arguments": [],
+                            "params": {
+                                'tcp_ports': [6799],
+                            },
+                            "meta": {
+                                'service_name': 'jaeger-agent',
+                                'ports': [6799],
+                                'ip': None,
+                                'deployed_by': [],
+                                'rank': None,
+                                'rank_generation': None,
+                                'extra_container_args': None,
+                                'extra_entrypoint_args': None,
+                            },
+                            "config_blobs": agent_config,
+                        }),
+                    )
diff --git a/src/pybind/mgr/cephadm/tests/test_spec.py b/src/pybind/mgr/cephadm/tests/test_spec.py
new file mode 100644
index 000000000..78a2d7311
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_spec.py
@@ -0,0 +1,590 @@
+# Disable autopep8 for this file:
+
+# fmt: off
+
+import json
+
+import pytest
+
+from ceph.deployment.service_spec import ServiceSpec, NFSServiceSpec, RGWSpec, \
+    IscsiServiceSpec, HostPlacementSpec, CustomContainerSpec
+from orchestrator import DaemonDescription, OrchestratorError
+
+
+@pytest.mark.parametrize(
+    "spec_json",
+    json.loads("""[
+{
+  "placement": {
+    "count": 1
+  },
+  "service_type": "alertmanager"
+},
+{
+  "placement": {
+    "host_pattern": "*"
+  },
+  "service_type": "crash"
+},
+{
+  "placement": {
+    "count": 1
+  },
+  "service_type": "grafana",
+  "protocol": "https"
+},
+{
+  "placement": {
+    "count": 2
+  },
+  "service_type": "mgr"
+},
+{
+  "placement": {
+    "count": 5
+  },
+  "service_type": "mon"
+},
+{
+  "placement": {
+    "host_pattern": "*"
+  },
+  "service_type": "node-exporter"
+},
+{
+  "placement": {
+    "count": 1
+  },
+  "service_type": "prometheus"
+},
+{
+  "placement": {
+    "hosts": [
+      {
+        "hostname": "ceph-001",
+        "network": "",
+        "name": ""
+      }
+    ]
+  },
+  "service_type": "rgw",
+  "service_id": "default-rgw-realm.eu-central-1.1",
+  "rgw_realm": "default-rgw-realm",
+  "rgw_zone": "eu-central-1"
+},
+{
+  "service_type": "osd",
+  "service_id": "osd_spec_default",
+  "placement": {
+    "host_pattern": "*"
+  },
+  "data_devices": {
+    "model": "MC-55-44-XZ"
+  },
+  "db_devices": {
+    "model": "SSD-123-foo"
+  },
+  "wal_devices": {
+    "model": "NVME-QQQQ-987"
+  }
+}
+]
+""")
+)
+def test_spec_octopus(spec_json):
+    # https://tracker.ceph.com/issues/44934
+    # Those are real user data from early octopus.
+    # Please do not modify those JSON values.
+
+    spec = ServiceSpec.from_json(spec_json)
+
+    # just some verification that we can sill read old octopus specs
+    def convert_to_old_style_json(j):
+        j_c = dict(j.copy())
+        j_c.pop('service_name', None)
+        if 'spec' in j_c:
+            spec = j_c.pop('spec')
+            j_c.update(spec)
+        if 'placement' in j_c:
+            if 'hosts' in j_c['placement']:
+                j_c['placement']['hosts'] = [
+                    {
+                        'hostname': HostPlacementSpec.parse(h).hostname,
+                        'network': HostPlacementSpec.parse(h).network,
+                        'name': HostPlacementSpec.parse(h).name
+                    }
+                    for h in j_c['placement']['hosts']
+                ]
+        j_c.pop('objectstore', None)
+        j_c.pop('filter_logic', None)
+        j_c.pop('anonymous_access', None)
+        return j_c
+
+    assert spec_json == convert_to_old_style_json(spec.to_json())
+
+
+@pytest.mark.parametrize(
+    "dd_json",
+    json.loads("""[
+    {
+        "hostname": "ceph-001",
+        "container_id": "d94d7969094d",
+        "container_image_id": "0881eb8f169f5556a292b4e2c01d683172b12830a62a9225a98a8e206bb734f0",
+        "container_image_name": "docker.io/prom/alertmanager:latest",
+        "daemon_id": "ceph-001",
+        "daemon_type": "alertmanager",
+        "version": "0.20.0",
+        "status": 1,
+        "status_desc": "running",
+        "last_refresh": "2020-04-03T15:31:48.725856",
+        "created": "2020-04-02T19:23:08.829543",
+        "started": "2020-04-03T07:29:16.932838",
+        "is_active": false
+    },
+    {
+        "hostname": "ceph-001",
+        "container_id": "c4b036202241",
+        "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
+        "container_image_name": "docker.io/ceph/ceph:v15",
+        "daemon_id": "ceph-001",
+        "daemon_type": "crash",
+        "version": "15.2.0",
+        "status": 1,
+        "status_desc": "running",
+        "last_refresh": "2020-04-03T15:31:48.725903",
+        "created": "2020-04-02T19:23:11.390694",
+        "started": "2020-04-03T07:29:16.910897",
+        "is_active": false
+    },
+    {
+        "hostname": "ceph-001",
+        "container_id": "5b7b94b48f31",
+        "container_image_id": "87a51ecf0b1c9a7b187b21c1b071425dafea0d765a96d5bc371c791169b3d7f4",
+        "container_image_name": "docker.io/ceph/ceph-grafana:latest",
+        "daemon_id": "ceph-001",
+        "daemon_type": "grafana",
+        "version": "6.6.2",
+        "status": 1,
+        "status_desc": "running",
+        "last_refresh": "2020-04-03T15:31:48.725950",
+        "created": "2020-04-02T19:23:52.025088",
+        "started": "2020-04-03T07:29:16.847972",
+        "is_active": false
+    },
+    {
+        "hostname": "ceph-001",
+        "container_id": "9ca007280456",
+        "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
+        "container_image_name": "docker.io/ceph/ceph:v15",
+        "daemon_id": "ceph-001.gkjwqp",
+        "daemon_type": "mgr",
+        "version": "15.2.0",
+        "status": 1,
+        "status_desc": "running",
+        "last_refresh": "2020-04-03T15:31:48.725807",
+        "created": "2020-04-02T19:22:18.648584",
+        "started": "2020-04-03T07:29:16.856153",
+        "is_active": false
+    },
+    {
+        "hostname": "ceph-001",
+        "container_id": "3d1ba9a2b697",
+        "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
+        "container_image_name": "docker.io/ceph/ceph:v15",
+        "daemon_id": "ceph-001",
+        "daemon_type": "mon",
+        "version": "15.2.0",
+        "status": 1,
+        "status_desc": "running",
+        "last_refresh": "2020-04-03T15:31:48.725715",
+        "created": "2020-04-02T19:22:13.863300",
+        "started": "2020-04-03T07:29:17.206024",
+        "is_active": false
+    },
+    {
+        "hostname": "ceph-001",
+        "container_id": "36d026c68ba1",
+        "container_image_id": "e5a616e4b9cf68dfcad7782b78e118be4310022e874d52da85c55923fb615f87",
+        "container_image_name": "docker.io/prom/node-exporter:latest",
+        "daemon_id": "ceph-001",
+        "daemon_type": "node-exporter",
+        "version": "0.18.1",
+        "status": 1,
+        "status_desc": "running",
+        "last_refresh": "2020-04-03T15:31:48.725996",
+        "created": "2020-04-02T19:23:53.880197",
+        "started": "2020-04-03T07:29:16.880044",
+        "is_active": false
+    },
+    {
+        "hostname": "ceph-001",
+        "container_id": "faf76193cbfe",
+        "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
+        "container_image_name": "docker.io/ceph/ceph:v15",
+        "daemon_id": "0",
+        "daemon_type": "osd",
+        "version": "15.2.0",
+        "status": 1,
+        "status_desc": "running",
+        "last_refresh": "2020-04-03T15:31:48.726088",
+        "created": "2020-04-02T20:35:02.991435",
+        "started": "2020-04-03T07:29:19.373956",
+        "is_active": false
+    },
+    {
+        "hostname": "ceph-001",
+        "container_id": "f82505bae0f1",
+        "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
+        "container_image_name": "docker.io/ceph/ceph:v15",
+        "daemon_id": "1",
+        "daemon_type": "osd",
+        "version": "15.2.0",
+        "status": 1,
+        "status_desc": "running",
+        "last_refresh": "2020-04-03T15:31:48.726134",
+        "created": "2020-04-02T20:35:17.142272",
+        "started": "2020-04-03T07:29:19.374002",
+        "is_active": false
+    },
+    {
+        "hostname": "ceph-001",
+        "container_id": "2708d84cd484",
+        "container_image_id": "358a0d2395fe711bb8258e8fb4b2d7865c0a9a6463969bcd1452ee8869ea6653",
+        "container_image_name": "docker.io/prom/prometheus:latest",
+        "daemon_id": "ceph-001",
+        "daemon_type": "prometheus",
+        "version": "2.17.1",
+        "status": 1,
+        "status_desc": "running",
+        "last_refresh": "2020-04-03T15:31:48.726042",
+        "created": "2020-04-02T19:24:10.281163",
+        "started": "2020-04-03T07:29:16.926292",
+        "is_active": false
+    },
+    {
+        "hostname": "ceph-001",
+        "daemon_id": "default-rgw-realm.eu-central-1.1.ceph-001.ytywjo",
+        "daemon_type": "rgw",
+        "status": 1,
+        "status_desc": "starting",
+        "is_active": false
+    }
+]""")
+)
+def test_dd_octopus(dd_json):
+    # https://tracker.ceph.com/issues/44934
+    # Those are real user data from early octopus.
+    # Please do not modify those JSON values.
+
+    # Convert datetime properties to old style.
+    # 2020-04-03T07:29:16.926292Z -> 2020-04-03T07:29:16.926292
+    def convert_to_old_style_json(j):
+        for k in ['last_refresh', 'created', 'started', 'last_deployed',
+                  'last_configured']:
+            if k in j:
+                j[k] = j[k].rstrip('Z')
+        del j['daemon_name']
+        return j
+
+    assert dd_json == convert_to_old_style_json(
+        DaemonDescription.from_json(dd_json).to_json())
+
+
+@pytest.mark.parametrize("spec,dd,valid",
+[   # noqa: E128
+    # https://tracker.ceph.com/issues/44934
+    (
+        RGWSpec(
+            service_id="foo",
+            rgw_realm="default-rgw-realm",
+            rgw_zone="eu-central-1",
+        ),
+        DaemonDescription(
+            daemon_type='rgw',
+            daemon_id="foo.ceph-001.ytywjo",
+            hostname="ceph-001",
+        ),
+        True
+    ),
+    (
+        # no realm
+        RGWSpec(
+            service_id="foo.bar",
+            rgw_zone="eu-central-1",
+        ),
+        DaemonDescription(
+            daemon_type='rgw',
+            daemon_id="foo.bar.ceph-001.ytywjo",
+            hostname="ceph-001",
+        ),
+        True
+    ),
+    (
+        # no realm or zone
+        RGWSpec(
+            service_id="bar",
+        ),
+        DaemonDescription(
+            daemon_type='rgw',
+            daemon_id="bar.host.domain.tld.ytywjo",
+            hostname="host.domain.tld",
+        ),
+        True
+    ),
+    (
+        # explicit naming
+        RGWSpec(
+            service_id="realm.zone",
+        ),
+        DaemonDescription(
+            daemon_type='rgw',
+            daemon_id="realm.zone.a",
+            hostname="smithi028",
+        ),
+        True
+    ),
+    (
+        # without host
+        RGWSpec(
+            service_type='rgw',
+            service_id="foo",
+        ),
+        DaemonDescription(
+            daemon_type='rgw',
+            daemon_id="foo.hostname.ytywjo",
+            hostname=None,
+        ),
+        False
+    ),
+    (
+        # without host (2)
+        RGWSpec(
+            service_type='rgw',
+            service_id="default-rgw-realm.eu-central-1.1",
+        ),
+        DaemonDescription(
+            daemon_type='rgw',
+            daemon_id="default-rgw-realm.eu-central-1.1.hostname.ytywjo",
+            hostname=None,
+        ),
+        False
+    ),
+    (
+        # service_id contains hostname
+        # (sort of) https://tracker.ceph.com/issues/45294
+        RGWSpec(
+            service_id="default.rgw.realm.ceph.001",
+        ),
+        DaemonDescription(
+            daemon_type='rgw',
+            daemon_id="default.rgw.realm.ceph.001.ceph.001.ytywjo",
+            hostname="ceph.001",
+        ),
+        True
+    ),
+
+    # https://tracker.ceph.com/issues/45293
+    (
+        ServiceSpec(
+            service_type='mds',
+            service_id="a",
+        ),
+        DaemonDescription(
+            daemon_type='mds',
+            daemon_id="a.host1.abc123",
+            hostname="host1",
+        ),
+        True
+    ),
+    (
+        # '.' char in service_id
+        ServiceSpec(
+            service_type='mds',
+            service_id="a.b.c",
+        ),
+        DaemonDescription(
+            daemon_type='mds',
+            daemon_id="a.b.c.host1.abc123",
+            hostname="host1",
+        ),
+        True
+    ),
+
+    # https://tracker.ceph.com/issues/45617
+    (
+        # daemon_id does not contain hostname
+        ServiceSpec(
+            service_type='mds',
+            service_id="a",
+        ),
+        DaemonDescription(
+            daemon_type='mds',
+            daemon_id="a",
+            hostname="host1",
+        ),
+        True
+    ),
+    (
+        # daemon_id only contains hostname
+        ServiceSpec(
+            service_type='mds',
+            service_id="host1",
+        ),
+        DaemonDescription(
+            daemon_type='mds',
+            daemon_id="host1",
+            hostname="host1",
+        ),
+        True
+    ),
+
+    # https://tracker.ceph.com/issues/45399
+    (
+        # daemon_id only contains hostname
+        ServiceSpec(
+            service_type='mds',
+            service_id="a",
+        ),
+        DaemonDescription(
+            daemon_type='mds',
+            daemon_id="a.host1.abc123",
+            hostname="host1.site",
+        ),
+        True
+    ),
+    (
+        NFSServiceSpec(
+            service_id="a",
+        ),
+        DaemonDescription(
+            daemon_type='nfs',
+            daemon_id="a.host1",
+            hostname="host1.site",
+        ),
+        True
+    ),
+
+    # https://tracker.ceph.com/issues/45293
+    (
+        NFSServiceSpec(
+            service_id="a",
+        ),
+        DaemonDescription(
+            daemon_type='nfs',
+            daemon_id="a.host1",
+            hostname="host1",
+        ),
+        True
+    ),
+    (
+        # service_id contains a '.' char
+        NFSServiceSpec(
+            service_id="a.b.c",
+        ),
+        DaemonDescription(
+            daemon_type='nfs',
+            daemon_id="a.b.c.host1",
+            hostname="host1",
+        ),
+        True
+    ),
+    (
+        # trailing chars after hostname
+        NFSServiceSpec(
+            service_id="a.b.c",
+        ),
+        DaemonDescription(
+            daemon_type='nfs',
+            daemon_id="a.b.c.host1.abc123",
+            hostname="host1",
+        ),
+        True
+    ),
+    (
+        # chars after hostname without '.'
+        NFSServiceSpec(
+            service_id="a",
+        ),
+        DaemonDescription(
+            daemon_type='nfs',
+            daemon_id="a.host1abc123",
+            hostname="host1",
+        ),
+        False
+    ),
+    (
+        # chars before hostname without '.'
+        NFSServiceSpec(
+            service_id="a",
+        ),
+        DaemonDescription(
+            daemon_type='nfs',
+            daemon_id="ahost1.abc123",
+            hostname="host1",
+        ),
+        False
+    ),
+
+    # https://tracker.ceph.com/issues/45293
+    (
+        IscsiServiceSpec(
+            service_type='iscsi',
+            service_id="a",
+        ),
+        DaemonDescription(
+            daemon_type='iscsi',
+            daemon_id="a.host1.abc123",
+            hostname="host1",
+        ),
+        True
+    ),
+    (
+        # '.' char in service_id
+        IscsiServiceSpec(
+            service_type='iscsi',
+            service_id="a.b.c",
+        ),
+        DaemonDescription(
+            daemon_type='iscsi',
+            daemon_id="a.b.c.host1.abc123",
+            hostname="host1",
+        ),
+        True
+    ),
+    (
+        # fixed daemon id for teuthology.
+        IscsiServiceSpec(
+            service_type='iscsi',
+            service_id='iscsi',
+        ),
+        DaemonDescription(
+            daemon_type='iscsi',
+            daemon_id="iscsi.a",
+            hostname="host1",
+        ),
+        True
+    ),
+
+    (
+        CustomContainerSpec(
+            service_type='container',
+            service_id='hello-world',
+            image='docker.io/library/hello-world:latest',
+        ),
+        DaemonDescription(
+            daemon_type='container',
+            daemon_id='hello-world.mgr0',
+            hostname='mgr0',
+        ),
+        True
+    ),
+
+])
+def test_daemon_description_service_name(spec: ServiceSpec,
+                                         dd: DaemonDescription,
+                                         valid: bool):
+    if valid:
+        assert spec.service_name() == dd.service_name()
+    else:
+        with pytest.raises(OrchestratorError):
+            dd.service_name()
diff --git a/src/pybind/mgr/cephadm/tests/test_ssh.py b/src/pybind/mgr/cephadm/tests/test_ssh.py
new file mode 100644
index 000000000..29f01b6c7
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_ssh.py
@@ -0,0 +1,105 @@
+import asyncssh
+from asyncssh.process import SSHCompletedProcess
+from unittest import mock
+try:
+    # AsyncMock was not added until python 3.8
+    from unittest.mock import AsyncMock
+except ImportError:
+    from asyncmock import AsyncMock
+except ImportError:
+    AsyncMock = None
+import pytest
+
+
+try:
+    from asyncssh.misc import ConnectionLost
+except ImportError:
+    ConnectionLost = None
+
+from ceph.deployment.hostspec import HostSpec
+
+from cephadm import CephadmOrchestrator
+from cephadm.serve import CephadmServe
+from cephadm.tests.fixtures import with_host, wait, async_side_effect
+from orchestrator import OrchestratorError
+
+
+@pytest.mark.skipif(ConnectionLost is None, reason='no asyncssh')
+class TestWithSSH:
+    @mock.patch("cephadm.ssh.SSHManager._execute_command")
+    @mock.patch("cephadm.ssh.SSHManager._check_execute_command")
+    def test_offline(self, check_execute_command, execute_command, cephadm_module):
+        check_execute_command.side_effect = async_side_effect('')
+        execute_command.side_effect = async_side_effect(('', '', 0))
+
+        if not AsyncMock:
+            # can't run this test if we could not import AsyncMock
+            return
+        mock_connect = AsyncMock(return_value='')
+        with mock.patch("asyncssh.connect", new=mock_connect) as asyncssh_connect:
+            with with_host(cephadm_module, 'test'):
+                asyncssh_connect.side_effect = ConnectionLost('reason')
+                code, out, err = cephadm_module.check_host('test')
+                assert out == ''
+                assert "Failed to connect to test at address (1::4)" in err
+
+                out = wait(cephadm_module, cephadm_module.get_hosts())[0].to_json()
+                assert out == HostSpec('test', '1::4', status='Offline').to_json()
+
+                asyncssh_connect.return_value = mock.MagicMock()
+                asyncssh_connect.side_effect = None
+                assert CephadmServe(cephadm_module)._check_host('test') is None
+                out = wait(cephadm_module, cephadm_module.get_hosts())[0].to_json()
+                assert out == HostSpec('test', '1::4').to_json()
+
+    def test_ssh_remote_cmds_execution(self, cephadm_module):
+
+        if not AsyncMock:
+            # can't run this test if we could not import AsyncMock
+            return
+
+        class FakeConn:
+            def __init__(self, exception=None, returncode=0):
+                self.exception = exception
+                self.returncode = returncode
+
+            async def run(self, *args, **kwargs):
+                if self.exception:
+                    raise self.exception
+                else:
+                    return SSHCompletedProcess(returncode=self.returncode, stdout="", stderr="")
+
+            async def close(self):
+                pass
+
+        def run_test(host, conn, expected_error):
+            mock_connect = AsyncMock(return_value=conn)
+            with pytest.raises(OrchestratorError, match=expected_error):
+                with mock.patch("asyncssh.connect", new=mock_connect):
+                    with with_host(cephadm_module, host):
+                        CephadmServe(cephadm_module)._check_host(host)
+
+        # Test case 1: command failure
+        run_test('test1', FakeConn(returncode=1), "Command .+ failed")
+
+        # Test case 2: connection error
+        run_test('test2', FakeConn(exception=asyncssh.ChannelOpenError(1, "", "")), "Unable to reach remote host test2.")
+
+        # Test case 3: asyncssh ProcessError
+        stderr = "my-process-stderr"
+        run_test('test3', FakeConn(exception=asyncssh.ProcessError(returncode=3,
+                                                                   env="",
+                                                                   command="",
+                                                                   subsystem="",
+                                                                   exit_status="",
+                                                                   exit_signal="",
+                                                                   stderr=stderr,
+                                                                   stdout="")), f"Cannot execute the command.+{stderr}")
+        # Test case 4: generic error
+        run_test('test4', FakeConn(exception=Exception), "Generic error while executing command.+")
+
+
+@pytest.mark.skipif(ConnectionLost is not None, reason='asyncssh')
+class TestWithoutSSH:
+    def test_can_run(self, cephadm_module: CephadmOrchestrator):
+        assert cephadm_module.can_run() == (False, "loading asyncssh library:No module named 'asyncssh'")
diff --git a/src/pybind/mgr/cephadm/tests/test_template.py b/src/pybind/mgr/cephadm/tests/test_template.py
new file mode 100644
index 000000000..f67304348
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_template.py
@@ -0,0 +1,33 @@
+import pathlib
+
+import pytest
+
+from cephadm.template import TemplateMgr, UndefinedError, TemplateNotFoundError
+
+
+def test_render(cephadm_module, fs):
+    template_base = (pathlib.Path(__file__).parent / '../templates').resolve()
+    fake_template = template_base / 'foo/bar'
+    fs.create_file(fake_template, contents='{{ cephadm_managed }}{{ var }}')
+
+    template_mgr = TemplateMgr(cephadm_module)
+    value = 'test'
+
+    # with base context
+    expected_text = '{}{}'.format(template_mgr.base_context['cephadm_managed'], value)
+    assert template_mgr.render('foo/bar', {'var': value}) == expected_text
+
+    # without base context
+    with pytest.raises(UndefinedError):
+        template_mgr.render('foo/bar', {'var': value}, managed_context=False)
+
+    # override the base context
+    context = {
+        'cephadm_managed': 'abc',
+        'var': value
+    }
+    assert template_mgr.render('foo/bar', context) == 'abc{}'.format(value)
+
+    # template not found
+    with pytest.raises(TemplateNotFoundError):
+        template_mgr.render('foo/bar/2', {})
diff --git a/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py b/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py
new file mode 100644
index 000000000..66feaee31
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py
@@ -0,0 +1,256 @@
+import pytest
+import json
+from tests import mock
+from cephadm.tuned_profiles import TunedProfileUtils, SYSCTL_DIR
+from cephadm.inventory import TunedProfileStore
+from ceph.utils import datetime_now
+from ceph.deployment.service_spec import TunedProfileSpec, PlacementSpec
+from cephadm.ssh import SSHManager
+from orchestrator import HostSpec
+
+from typing import List, Dict
+
+
+class SaveError(Exception):
+    pass
+
+
+class FakeCache:
+    def __init__(self,
+                 hosts,
+                 schedulable_hosts,
+                 unreachable_hosts):
+        self.hosts = hosts
+        self.unreachable_hosts = [HostSpec(h) for h in unreachable_hosts]
+        self.schedulable_hosts = [HostSpec(h) for h in schedulable_hosts]
+        self.last_tuned_profile_update = {}
+
+    def get_hosts(self):
+        return self.hosts
+
+    def get_schedulable_hosts(self):
+        return self.schedulable_hosts
+
+    def get_unreachable_hosts(self):
+        return self.unreachable_hosts
+
+    def get_draining_hosts(self):
+        return []
+
+    def is_host_unreachable(self, hostname: str):
+        return hostname in [h.hostname for h in self.get_unreachable_hosts()]
+
+    def is_host_schedulable(self, hostname: str):
+        return hostname in [h.hostname for h in self.get_schedulable_hosts()]
+
+    def is_host_draining(self, hostname: str):
+        return hostname in [h.hostname for h in self.get_draining_hosts()]
+
+    @property
+    def networks(self):
+        return {h: {'a': {'b': ['c']}} for h in self.hosts}
+
+    def host_needs_tuned_profile_update(self, host, profile_name):
+        return profile_name == 'p2'
+
+
+class FakeMgr:
+    def __init__(self,
+                 hosts: List[str],
+                 schedulable_hosts: List[str],
+                 unreachable_hosts: List[str],
+                 profiles: Dict[str, TunedProfileSpec]):
+        self.cache = FakeCache(hosts, schedulable_hosts, unreachable_hosts)
+        self.tuned_profiles = TunedProfileStore(self)
+        self.tuned_profiles.profiles = profiles
+        self.ssh = SSHManager(self)
+        self.offline_hosts = []
+        self.log_refresh_metadata = False
+
+    def set_store(self, what: str, value: str):
+        raise SaveError(f'{what}: {value}')
+
+    def get_store(self, what: str):
+        if what == 'tuned_profiles':
+            return json.dumps({'x': TunedProfileSpec('x',
+                                                     PlacementSpec(hosts=['x']),
+                                                     {'x': 'x'}).to_json(),
+                               'y': TunedProfileSpec('y',
+                                                     PlacementSpec(hosts=['y']),
+                                                     {'y': 'y'}).to_json()})
+        return ''
+
+
+class TestTunedProfiles:
+    tspec1 = TunedProfileSpec('p1',
+                              PlacementSpec(hosts=['a', 'b', 'c']),
+                              {'setting1': 'value1',
+                               'setting2': 'value2',
+                               'setting with space': 'value with space'})
+    tspec2 = TunedProfileSpec('p2',
+                              PlacementSpec(hosts=['a', 'c']),
+                              {'something': 'something_else',
+                               'high': '5'})
+    tspec3 = TunedProfileSpec('p3',
+                              PlacementSpec(hosts=['c']),
+                              {'wow': 'wow2',
+                               'setting with space': 'value with space',
+                               'down': 'low'})
+
+    def profiles_to_calls(self, tp: TunedProfileUtils, profiles: List[TunedProfileSpec]) -> List[Dict[str, str]]:
+        # this function takes a list of tuned profiles and returns a mapping from
+        # profile names to the string that will be written to the actual config file on the host.
+        res = []
+        for p in profiles:
+            p_str = tp._profile_to_str(p)
+            res.append({p.profile_name: p_str})
+        return res
+
+    @mock.patch("cephadm.tuned_profiles.TunedProfileUtils._remove_stray_tuned_profiles")
+    @mock.patch("cephadm.tuned_profiles.TunedProfileUtils._write_tuned_profiles")
+    def test_write_all_tuned_profiles(self, _write_profiles, _rm_profiles):
+        profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3}
+        mgr = FakeMgr(['a', 'b', 'c'],
+                      ['a', 'b', 'c'],
+                      [],
+                      profiles)
+        tp = TunedProfileUtils(mgr)
+        tp._write_all_tuned_profiles()
+        # need to check that _write_tuned_profiles is correctly called with the
+        # profiles that match the tuned profile placements and with the correct
+        # strings that should be generated from the settings the profiles have.
+        # the _profiles_to_calls helper allows us to generated the input we
+        # should check against
+        calls = [
+            mock.call('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2])),
+            mock.call('b', self.profiles_to_calls(tp, [self.tspec1])),
+            mock.call('c', self.profiles_to_calls(tp, [self.tspec1, self.tspec2, self.tspec3]))
+        ]
+        _write_profiles.assert_has_calls(calls, any_order=True)
+
+    @mock.patch('cephadm.ssh.SSHManager.check_execute_command')
+    def test_rm_stray_tuned_profiles(self, _check_execute_command):
+        profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3}
+        # for this test, going to use host "a" and put 4 cephadm generated
+        # profiles "p1" "p2", "p3" and "who" only two of which should be there ("p1", "p2")
+        # as well as a file not generated by cephadm. Only the "p3" and "who"
+        # profiles should be removed from the host. This should total to 4
+        # calls to check_execute_command, 1 "ls", 2 "rm", and 1 "sysctl --system"
+        _check_execute_command.return_value = '\n'.join(['p1-cephadm-tuned-profile.conf',
+                                                         'p2-cephadm-tuned-profile.conf',
+                                                         'p3-cephadm-tuned-profile.conf',
+                                                         'who-cephadm-tuned-profile.conf',
+                                                         'dont-touch-me'])
+        mgr = FakeMgr(['a', 'b', 'c'],
+                      ['a', 'b', 'c'],
+                      [],
+                      profiles)
+        tp = TunedProfileUtils(mgr)
+        tp._remove_stray_tuned_profiles('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2]))
+        calls = [
+            mock.call('a', ['ls', SYSCTL_DIR], log_command=False),
+            mock.call('a', ['rm', '-f', f'{SYSCTL_DIR}/p3-cephadm-tuned-profile.conf']),
+            mock.call('a', ['rm', '-f', f'{SYSCTL_DIR}/who-cephadm-tuned-profile.conf']),
+            mock.call('a', ['sysctl', '--system'])
+        ]
+        _check_execute_command.assert_has_calls(calls, any_order=True)
+
+    @mock.patch('cephadm.ssh.SSHManager.check_execute_command')
+    @mock.patch('cephadm.ssh.SSHManager.write_remote_file')
+    def test_write_tuned_profiles(self, _write_remote_file, _check_execute_command):
+        profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3}
+        # for this test we will use host "a" and have it so host_needs_tuned_profile_update
+        # returns True for p2 and False for p1 (see FakeCache class). So we should see
+        # 2 ssh calls, one to write p2, one to run sysctl --system
+        _check_execute_command.return_value = 'success'
+        _write_remote_file.return_value = 'success'
+        mgr = FakeMgr(['a', 'b', 'c'],
+                      ['a', 'b', 'c'],
+                      [],
+                      profiles)
+        tp = TunedProfileUtils(mgr)
+        tp._write_tuned_profiles('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2]))
+        _check_execute_command.assert_called_with('a', ['sysctl', '--system'])
+        _write_remote_file.assert_called_with(
+            'a', f'{SYSCTL_DIR}/p2-cephadm-tuned-profile.conf', tp._profile_to_str(self.tspec2).encode('utf-8'))
+
+    def test_dont_write_to_unreachable_hosts(self):
+        profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3}
+
+        # list host "a" and "b" as hosts that exist, "a" will be
+        # a normal, schedulable host and "b" is considered unreachable
+        mgr = FakeMgr(['a', 'b'],
+                      ['a'],
+                      ['b'],
+                      profiles)
+        tp = TunedProfileUtils(mgr)
+
+        assert 'a' not in tp.mgr.cache.last_tuned_profile_update
+        assert 'b' not in tp.mgr.cache.last_tuned_profile_update
+
+        # with an online host, should proceed as normal. Providing
+        # no actual profiles here though so the only actual action taken
+        # is updating the entry in the last_tuned_profile_update dict
+        tp._write_tuned_profiles('a', {})
+        assert 'a' in tp.mgr.cache.last_tuned_profile_update
+
+        # trying to write to an unreachable host should be a no-op
+        # and return immediately. No entry for 'b' should be added
+        # to the last_tuned_profile_update dict
+        tp._write_tuned_profiles('b', {})
+        assert 'b' not in tp.mgr.cache.last_tuned_profile_update
+
+    def test_store(self):
+        mgr = FakeMgr(['a', 'b', 'c'],
+                      ['a', 'b', 'c'],
+                      [],
+                      {})
+        tps = TunedProfileStore(mgr)
+        save_str_p1 = 'tuned_profiles: ' + json.dumps({'p1': self.tspec1.to_json()})
+        tspec1_updated = self.tspec1.copy()
+        tspec1_updated.settings.update({'new-setting': 'new-value'})
+        save_str_p1_updated = 'tuned_profiles: ' + json.dumps({'p1': tspec1_updated.to_json()})
+        save_str_p1_updated_p2 = 'tuned_profiles: ' + \
+            json.dumps({'p1': tspec1_updated.to_json(), 'p2': self.tspec2.to_json()})
+        tspec2_updated = self.tspec2.copy()
+        tspec2_updated.settings.pop('something')
+        save_str_p1_updated_p2_updated = 'tuned_profiles: ' + \
+            json.dumps({'p1': tspec1_updated.to_json(), 'p2': tspec2_updated.to_json()})
+        save_str_p2_updated = 'tuned_profiles: ' + json.dumps({'p2': tspec2_updated.to_json()})
+        with pytest.raises(SaveError) as e:
+            tps.add_profile(self.tspec1)
+        assert str(e.value) == save_str_p1
+        assert 'p1' in tps
+        with pytest.raises(SaveError) as e:
+            tps.add_setting('p1', 'new-setting', 'new-value')
+        assert str(e.value) == save_str_p1_updated
+        assert 'new-setting' in tps.list_profiles()[0].settings
+        with pytest.raises(SaveError) as e:
+            tps.add_profile(self.tspec2)
+        assert str(e.value) == save_str_p1_updated_p2
+        assert 'p2' in tps
+        assert 'something' in tps.list_profiles()[1].settings
+        with pytest.raises(SaveError) as e:
+            tps.rm_setting('p2', 'something')
+        assert 'something' not in tps.list_profiles()[1].settings
+        assert str(e.value) == save_str_p1_updated_p2_updated
+        with pytest.raises(SaveError) as e:
+            tps.rm_profile('p1')
+        assert str(e.value) == save_str_p2_updated
+        assert 'p1' not in tps
+        assert 'p2' in tps
+        assert len(tps.list_profiles()) == 1
+        assert tps.list_profiles()[0].profile_name == 'p2'
+
+        cur_last_updated = tps.last_updated('p2')
+        new_last_updated = datetime_now()
+        assert cur_last_updated != new_last_updated
+        tps.set_last_updated('p2', new_last_updated)
+        assert tps.last_updated('p2') == new_last_updated
+
+        # check FakeMgr get_store func to see what is expected to be found in Key Store here
+        tps.load()
+        assert 'x' in tps
+        assert 'y' in tps
+        assert [p for p in tps.list_profiles() if p.profile_name == 'x'][0].settings == {'x': 'x'}
+        assert [p for p in tps.list_profiles() if p.profile_name == 'y'][0].settings == {'y': 'y'}
diff --git a/src/pybind/mgr/cephadm/tests/test_upgrade.py b/src/pybind/mgr/cephadm/tests/test_upgrade.py
new file mode 100644
index 000000000..3b5c305b5
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_upgrade.py
@@ -0,0 +1,481 @@
+import json
+from unittest import mock
+
+import pytest
+
+from ceph.deployment.service_spec import PlacementSpec, ServiceSpec
+from cephadm import CephadmOrchestrator
+from cephadm.upgrade import CephadmUpgrade, UpgradeState
+from cephadm.ssh import HostConnectionError
+from cephadm.utils import ContainerInspectInfo
+from orchestrator import OrchestratorError, DaemonDescription
+from .fixtures import _run_cephadm, wait, with_host, with_service, \
+    receive_agent_metadata, async_side_effect
+
+from typing import List, Tuple, Optional
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+def test_upgrade_start(cephadm_module: CephadmOrchestrator):
+    with with_host(cephadm_module, 'test'):
+        with with_host(cephadm_module, 'test2'):
+            with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)), status_running=True):
+                assert wait(cephadm_module, cephadm_module.upgrade_start(
+                    'image_id', None)) == 'Initiating upgrade to image_id'
+
+                assert wait(cephadm_module, cephadm_module.upgrade_status()
+                            ).target_image == 'image_id'
+
+                assert wait(cephadm_module, cephadm_module.upgrade_pause()
+                            ) == 'Paused upgrade to image_id'
+
+                assert wait(cephadm_module, cephadm_module.upgrade_resume()
+                            ) == 'Resumed upgrade to image_id'
+
+                assert wait(cephadm_module, cephadm_module.upgrade_stop()
+                            ) == 'Stopped upgrade to image_id'
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+def test_upgrade_start_offline_hosts(cephadm_module: CephadmOrchestrator):
+    with with_host(cephadm_module, 'test'):
+        with with_host(cephadm_module, 'test2'):
+            cephadm_module.offline_hosts = set(['test2'])
+            with pytest.raises(OrchestratorError, match=r"Upgrade aborted - Some host\(s\) are currently offline: {'test2'}"):
+                cephadm_module.upgrade_start('image_id', None)
+            cephadm_module.offline_hosts = set([])  # so remove_host doesn't fail when leaving the with_host block
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+def test_upgrade_daemons_offline_hosts(cephadm_module: CephadmOrchestrator):
+    with with_host(cephadm_module, 'test'):
+        with with_host(cephadm_module, 'test2'):
+            cephadm_module.upgrade.upgrade_state = UpgradeState('target_image', 0)
+            with mock.patch("cephadm.serve.CephadmServe._run_cephadm", side_effect=HostConnectionError('connection failure reason', 'test2', '192.168.122.1')):
+                _to_upgrade = [(DaemonDescription(daemon_type='crash', daemon_id='test2', hostname='test2'), True)]
+                with pytest.raises(HostConnectionError, match=r"connection failure reason"):
+                    cephadm_module.upgrade._upgrade_daemons(_to_upgrade, 'target_image', ['digest1'])
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+def test_do_upgrade_offline_hosts(cephadm_module: CephadmOrchestrator):
+    with with_host(cephadm_module, 'test'):
+        with with_host(cephadm_module, 'test2'):
+            cephadm_module.upgrade.upgrade_state = UpgradeState('target_image', 0)
+            cephadm_module.offline_hosts = set(['test2'])
+            with pytest.raises(HostConnectionError, match=r"Host\(s\) were marked offline: {'test2'}"):
+                cephadm_module.upgrade._do_upgrade()
+            cephadm_module.offline_hosts = set([])  # so remove_host doesn't fail when leaving the with_host block
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+@mock.patch("cephadm.module.CephadmOrchestrator.remove_health_warning")
+def test_upgrade_resume_clear_health_warnings(_rm_health_warning, cephadm_module: CephadmOrchestrator):
+    with with_host(cephadm_module, 'test'):
+        with with_host(cephadm_module, 'test2'):
+            cephadm_module.upgrade.upgrade_state = UpgradeState('target_image', 0, paused=True)
+            _rm_health_warning.return_value = None
+            assert wait(cephadm_module, cephadm_module.upgrade_resume()
+                        ) == 'Resumed upgrade to target_image'
+            calls_list = [mock.call(alert_id) for alert_id in cephadm_module.upgrade.UPGRADE_ERRORS]
+            _rm_health_warning.assert_has_calls(calls_list, any_order=True)
+
+
+@mock.patch('cephadm.upgrade.CephadmUpgrade._get_current_version', lambda _: (17, 2, 6))
+@mock.patch("cephadm.serve.CephadmServe._get_container_image_info")
+def test_upgrade_check_with_ceph_version(_get_img_info, cephadm_module: CephadmOrchestrator):
+    # This test was added to avoid screwing up the image base so that
+    # when the version was added to it it made an incorrect image
+    # The issue caused the image to come out as
+    # quay.io/ceph/ceph:v18:v18.2.0
+    # see https://tracker.ceph.com/issues/63150
+    _img = ''
+
+    def _fake_get_img_info(img_name):
+        nonlocal _img
+        _img = img_name
+        return ContainerInspectInfo(
+            'image_id',
+            '18.2.0',
+            'digest'
+        )
+
+    _get_img_info.side_effect = _fake_get_img_info
+    cephadm_module.upgrade_check('', '18.2.0')
+    assert _img == 'quay.io/ceph/ceph:v18.2.0'
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+@pytest.mark.parametrize("use_repo_digest",
+                         [
+                             False,
+                             True
+                         ])
+def test_upgrade_run(use_repo_digest, cephadm_module: CephadmOrchestrator):
+    with with_host(cephadm_module, 'host1'):
+        with with_host(cephadm_module, 'host2'):
+            cephadm_module.set_container_image('global', 'from_image')
+            cephadm_module.use_repo_digest = use_repo_digest
+            with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*', count=2)),
+                              CephadmOrchestrator.apply_mgr, '', status_running=True), \
+                mock.patch("cephadm.module.CephadmOrchestrator.lookup_release_name",
+                           return_value='foo'), \
+                mock.patch("cephadm.module.CephadmOrchestrator.version",
+                           new_callable=mock.PropertyMock) as version_mock, \
+                mock.patch("cephadm.module.CephadmOrchestrator.get",
+                           return_value={
+                               # capture fields in both mon and osd maps
+                               "require_osd_release": "pacific",
+                               "min_mon_release": 16,
+                           }):
+                version_mock.return_value = 'ceph version 18.2.1 (somehash)'
+                assert wait(cephadm_module, cephadm_module.upgrade_start(
+                    'to_image', None)) == 'Initiating upgrade to to_image'
+
+                assert wait(cephadm_module, cephadm_module.upgrade_status()
+                            ).target_image == 'to_image'
+
+                def _versions_mock(cmd):
+                    return json.dumps({
+                        'mgr': {
+                            'ceph version 1.2.3 (asdf) blah': 1
+                        }
+                    })
+
+                cephadm_module._mon_command_mock_versions = _versions_mock
+
+                with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({
+                    'image_id': 'image_id',
+                    'repo_digests': ['to_image@repo_digest'],
+                    'ceph_version': 'ceph version 18.2.3 (hash)',
+                }))):
+
+                    cephadm_module.upgrade._do_upgrade()
+
+                assert cephadm_module.upgrade_status is not None
+
+                with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(
+                    json.dumps([
+                        dict(
+                            name=list(cephadm_module.cache.daemons['host1'].keys())[0],
+                            style='cephadm',
+                            fsid='fsid',
+                            container_id='container_id',
+                            container_image_name='to_image',
+                            container_image_id='image_id',
+                            container_image_digests=['to_image@repo_digest'],
+                            deployed_by=['to_image@repo_digest'],
+                            version='version',
+                            state='running',
+                        )
+                    ])
+                )):
+                    receive_agent_metadata(cephadm_module, 'host1', ['ls'])
+                    receive_agent_metadata(cephadm_module, 'host2', ['ls'])
+
+                with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({
+                    'image_id': 'image_id',
+                    'repo_digests': ['to_image@repo_digest'],
+                    'ceph_version': 'ceph version 18.2.3 (hash)',
+                }))):
+                    cephadm_module.upgrade._do_upgrade()
+
+                _, image, _ = cephadm_module.check_mon_command({
+                    'prefix': 'config get',
+                    'who': 'global',
+                    'key': 'container_image',
+                })
+                if use_repo_digest:
+                    assert image == 'to_image@repo_digest'
+                else:
+                    assert image == 'to_image'
+
+
+def test_upgrade_state_null(cephadm_module: CephadmOrchestrator):
+    # This test validates https://tracker.ceph.com/issues/47580
+    cephadm_module.set_store('upgrade_state', 'null')
+    CephadmUpgrade(cephadm_module)
+    assert CephadmUpgrade(cephadm_module).upgrade_state is None
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+def test_not_enough_mgrs(cephadm_module: CephadmOrchestrator):
+    with with_host(cephadm_module, 'host1'):
+        with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=1)), CephadmOrchestrator.apply_mgr, ''):
+            with pytest.raises(OrchestratorError):
+                wait(cephadm_module, cephadm_module.upgrade_start('image_id', None))
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+@mock.patch("cephadm.CephadmOrchestrator.check_mon_command")
+def test_enough_mons_for_ok_to_stop(check_mon_command, cephadm_module: CephadmOrchestrator):
+    # only 2 monitors, not enough for ok-to-stop to ever pass
+    check_mon_command.return_value = (
+        0, '{"monmap": {"mons": [{"name": "mon.1"}, {"name": "mon.2"}]}}', '')
+    assert not cephadm_module.upgrade._enough_mons_for_ok_to_stop()
+
+    # 3 monitors, ok-to-stop should work fine
+    check_mon_command.return_value = (
+        0, '{"monmap": {"mons": [{"name": "mon.1"}, {"name": "mon.2"}, {"name": "mon.3"}]}}', '')
+    assert cephadm_module.upgrade._enough_mons_for_ok_to_stop()
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+@mock.patch("cephadm.module.HostCache.get_daemons_by_service")
+@mock.patch("cephadm.CephadmOrchestrator.get")
+def test_enough_mds_for_ok_to_stop(get, get_daemons_by_service, cephadm_module: CephadmOrchestrator):
+    get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'test', 'max_mds': 1}}]}]
+    get_daemons_by_service.side_effect = [[DaemonDescription()]]
+    assert not cephadm_module.upgrade._enough_mds_for_ok_to_stop(
+        DaemonDescription(daemon_type='mds', daemon_id='test.host1.gfknd', service_name='mds.test'))
+
+    get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'myfs.test', 'max_mds': 2}}]}]
+    get_daemons_by_service.side_effect = [[DaemonDescription(), DaemonDescription()]]
+    assert not cephadm_module.upgrade._enough_mds_for_ok_to_stop(
+        DaemonDescription(daemon_type='mds', daemon_id='myfs.test.host1.gfknd', service_name='mds.myfs.test'))
+
+    get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'myfs.test', 'max_mds': 1}}]}]
+    get_daemons_by_service.side_effect = [[DaemonDescription(), DaemonDescription()]]
+    assert cephadm_module.upgrade._enough_mds_for_ok_to_stop(
+        DaemonDescription(daemon_type='mds', daemon_id='myfs.test.host1.gfknd', service_name='mds.myfs.test'))
+
+
+@pytest.mark.parametrize("current_version, use_tags, show_all_versions, tags, result",
+                         [
+                             # several candidate versions (from different major versions)
+                             (
+                                 (16, 1, '16.1.0'),
+                                 False,  # use_tags
+                                 False,  # show_all_versions
+                                 [
+                                     'v17.1.0',
+                                     'v16.2.7',
+                                     'v16.2.6',
+                                     'v16.2.5',
+                                     'v16.1.4',
+                                     'v16.1.3',
+                                     'v15.2.0',
+                                 ],
+                                 ['17.1.0', '16.2.7', '16.2.6', '16.2.5', '16.1.4', '16.1.3']
+                             ),
+                             # candidate minor versions are available
+                             (
+                                 (16, 1, '16.1.0'),
+                                 False,  # use_tags
+                                 False,  # show_all_versions
+                                 [
+                                     'v16.2.2',
+                                     'v16.2.1',
+                                     'v16.1.6',
+                                 ],
+                                 ['16.2.2', '16.2.1', '16.1.6']
+                             ),
+                             # all versions are less than the current version
+                             (
+                                 (17, 2, '17.2.0'),
+                                 False,  # use_tags
+                                 False,  # show_all_versions
+                                 [
+                                     'v17.1.0',
+                                     'v16.2.7',
+                                     'v16.2.6',
+                                 ],
+                                 []
+                             ),
+                             # show all versions (regardless of the current version)
+                             (
+                                 (16, 1, '16.1.0'),
+                                 False,  # use_tags
+                                 True,   # show_all_versions
+                                 [
+                                     'v17.1.0',
+                                     'v16.2.7',
+                                     'v16.2.6',
+                                     'v15.1.0',
+                                     'v14.2.0',
+                                 ],
+                                 ['17.1.0', '16.2.7', '16.2.6', '15.1.0', '14.2.0']
+                             ),
+                             # show all tags (regardless of the current version and show_all_versions flag)
+                             (
+                                 (16, 1, '16.1.0'),
+                                 True,   # use_tags
+                                 False,  # show_all_versions
+                                 [
+                                     'v17.1.0',
+                                     'v16.2.7',
+                                     'v16.2.6',
+                                     'v16.2.5',
+                                     'v16.1.4',
+                                     'v16.1.3',
+                                     'v15.2.0',
+                                 ],
+                                 ['v15.2.0', 'v16.1.3', 'v16.1.4', 'v16.2.5',
+                                     'v16.2.6', 'v16.2.7', 'v17.1.0']
+                             ),
+                         ])
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+def test_upgrade_ls(current_version, use_tags, show_all_versions, tags, result, cephadm_module: CephadmOrchestrator):
+    with mock.patch('cephadm.upgrade.Registry.get_tags', return_value=tags):
+        with mock.patch('cephadm.upgrade.CephadmUpgrade._get_current_version', return_value=current_version):
+            out = cephadm_module.upgrade.upgrade_ls(None, use_tags, show_all_versions)
+            if use_tags:
+                assert out['tags'] == result
+            else:
+                assert out['versions'] == result
+
+
+@pytest.mark.parametrize(
+    "upgraded, not_upgraded, daemon_types, hosts, services, should_block",
+    # [ ([(type, host, id), ... ], [...], [daemon types], [hosts], [services], True/False), ... ]
+    [
+        (  # valid, upgrade mgr daemons
+            [],
+            [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')],
+            ['mgr'],
+            None,
+            None,
+            False
+        ),
+        (  # invalid, can't upgrade mons until mgr is upgraded
+            [],
+            [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')],
+            ['mon'],
+            None,
+            None,
+            True
+        ),
+        (  # invalid, can't upgrade mon service until all mgr daemons are upgraded
+            [],
+            [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')],
+            None,
+            None,
+            ['mon'],
+            True
+        ),
+        (  # valid, upgrade mgr service
+            [],
+            [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')],
+            None,
+            None,
+            ['mgr'],
+            False
+        ),
+        (  # valid, mgr is already upgraded so can upgrade mons
+            [('mgr', 'a', 'a.x')],
+            [('mon', 'a', 'a')],
+            ['mon'],
+            None,
+            None,
+            False
+        ),
+        (  # invalid, can't upgrade all daemons on b b/c un-upgraded mgr on a
+            [],
+            [('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+            None,
+            ['a'],
+            None,
+            True
+        ),
+        (  # valid, only daemon on b is a mgr
+            [],
+            [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+            None,
+            ['b'],
+            None,
+            False
+        ),
+        (  # invalid, can't upgrade mon on a while mgr on b is un-upgraded
+            [],
+            [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+            None,
+            ['a'],
+            None,
+            True
+        ),
+        (  # valid, only upgrading the mgr on a
+            [],
+            [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+            ['mgr'],
+            ['a'],
+            None,
+            False
+        ),
+        (  # valid, mgr daemon not on b are upgraded
+            [('mgr', 'a', 'a.x')],
+            [('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+            None,
+            ['b'],
+            None,
+            False
+        ),
+        (  # valid, all the necessary hosts are covered, mgr on c is already upgraded
+            [('mgr', 'c', 'c.z')],
+            [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a'), ('osd', 'c', '0')],
+            None,
+            ['a', 'b'],
+            None,
+            False
+        ),
+        (  # invalid, can't upgrade mon on a while mgr on b is un-upgraded
+            [],
+            [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+            ['mgr', 'mon'],
+            ['a'],
+            None,
+            True
+        ),
+        (  # valid, only mon not on "b" is upgraded already. Case hit while making teuthology test
+            [('mon', 'a', 'a')],
+            [('mon', 'b', 'x'), ('mon', 'b', 'y'), ('osd', 'a', '1'), ('osd', 'b', '2')],
+            ['mon', 'osd'],
+            ['b'],
+            None,
+            False
+        ),
+    ]
+)
+@mock.patch("cephadm.module.HostCache.get_daemons")
+@mock.patch("cephadm.serve.CephadmServe._get_container_image_info")
+@mock.patch('cephadm.module.SpecStore.__getitem__')
+def test_staggered_upgrade_validation(
+        get_spec,
+        get_image_info,
+        get_daemons,
+        upgraded: List[Tuple[str, str, str]],
+        not_upgraded: List[Tuple[str, str, str, str]],
+        daemon_types: Optional[str],
+        hosts: Optional[str],
+        services: Optional[str],
+        should_block: bool,
+        cephadm_module: CephadmOrchestrator,
+):
+    def to_dds(ts: List[Tuple[str, str]], upgraded: bool) -> List[DaemonDescription]:
+        dds = []
+        digest = 'new_image@repo_digest' if upgraded else 'old_image@repo_digest'
+        for t in ts:
+            dds.append(DaemonDescription(daemon_type=t[0],
+                                         hostname=t[1],
+                                         daemon_id=t[2],
+                                         container_image_digests=[digest],
+                                         deployed_by=[digest],))
+        return dds
+    get_daemons.return_value = to_dds(upgraded, True) + to_dds(not_upgraded, False)
+    get_image_info.side_effect = async_side_effect(
+        ('new_id', 'ceph version 99.99.99 (hash)', ['new_image@repo_digest']))
+
+    class FakeSpecDesc():
+        def __init__(self, spec):
+            self.spec = spec
+
+    def _get_spec(s):
+        return FakeSpecDesc(ServiceSpec(s))
+
+    get_spec.side_effect = _get_spec
+    if should_block:
+        with pytest.raises(OrchestratorError):
+            cephadm_module.upgrade._validate_upgrade_filters(
+                'new_image_name', daemon_types, hosts, services)
+    else:
+        cephadm_module.upgrade._validate_upgrade_filters(
+            'new_image_name', daemon_types, hosts, services)