41 files changed, 18388 insertions, 0 deletions
diff --git a/src/cephadm/.gitignore b/src/cephadm/.gitignore
new file mode 100644
index 000000000..8d1529027
--- /dev/null
+++ b/src/cephadm/.gitignore
@@ -0,0 +1,6 @@
+# tox related
+.coverage*
+htmlcov
+.tox
+coverage.xml
+.mypy_cache
diff --git a/src/cephadm/CMakeLists.txt b/src/cephadm/CMakeLists.txt
new file mode 100644
index 000000000..8b969bc33
--- /dev/null
+++ b/src/cephadm/CMakeLists.txt
@@ -0,0 +1,28 @@
+if(WITH_TESTS)
+  include(AddCephTest)
+  add_tox_test(cephadm TOX_ENVS py3 mypy flake8)
+endif()
+
+set(bin_target_file ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/cephadm)
+
+add_custom_command(
+  OUTPUT "${bin_target_file}"
+  DEPENDS
+    ${CMAKE_CURRENT_SOURCE_DIR}/cephadm.py
+    ${CMAKE_CURRENT_SOURCE_DIR}/build.py
+  WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  COMMAND ${Python3_EXECUTABLE} build.py
+    --set-version-var=CEPH_GIT_VER=${CEPH_GIT_VER}
+    --set-version-var=CEPH_GIT_NICE_VER=${CEPH_GIT_NICE_VER}
+    --set-version-var=CEPH_RELEASE=${CEPH_RELEASE}
+    --set-version-var=CEPH_RELEASE_NAME=${CEPH_RELEASE_NAME}
+    --set-version-var=CEPH_RELEASE_TYPE=${CEPH_RELEASE_TYPE}
+    ${bin_target_file}
+)
+
+add_custom_target(cephadm ALL
+  DEPENDS "${bin_target_file}")
+
+install(PROGRAMS
+  ${bin_target_file}
+  DESTINATION ${CMAKE_INSTALL_SBINDIR})
diff --git a/src/cephadm/box/DockerfileDocker b/src/cephadm/box/DockerfileDocker
new file mode 100644
index 000000000..f64b48e4c
--- /dev/null
+++ b/src/cephadm/box/DockerfileDocker
@@ -0,0 +1,33 @@
+# https://developers.redhat.com/blog/2014/05/05/running-systemd-within-docker-container/
+FROM centos:8 as centos-systemd
+ENV container docker
+ENV CEPHADM_PATH=/usr/local/sbin/cephadm
+
+# Centos met EOL and the content of the CentOS 8 repos has been moved to vault.centos.org
+RUN sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-Linux-*
+RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=https://vault.centos.org|g' /etc/yum.repos.d/CentOS-Linux-*
+
+RUN dnf -y install chrony firewalld lvm2 \
+  openssh-server openssh-clients python3 \
+  yum-utils sudo which && dnf clean all
+
+RUN systemctl enable chronyd firewalld sshd
+
+
+FROM centos-systemd as centos-systemd-docker
+# To cache cephadm images
+RUN yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
+RUN dnf -y install docker-ce && \
+    dnf clean all && systemctl enable docker
+
+# ssh utilities
+RUN dnf install epel-release -y && dnf makecache && dnf install sshpass -y
+RUN touch /.box_container # empty file to check if inside a container
+
+EXPOSE 8443
+EXPOSE 22
+
+FROM centos-systemd-docker
+WORKDIR /root
+
+CMD [ "/usr/sbin/init" ]
diff --git a/src/cephadm/box/DockerfilePodman b/src/cephadm/box/DockerfilePodman
new file mode 100644
index 000000000..115c3c730
--- /dev/null
+++ b/src/cephadm/box/DockerfilePodman
@@ -0,0 +1,64 @@
+# stable/Dockerfile
+#
+# Build a Podman container image from the latest
+# stable version of Podman on the Fedoras Updates System.
+# https://bodhi.fedoraproject.org/updates/?search=podman
+# This image can be used to create a secured container
+# that runs safely with privileges within the container.
+#
+FROM fedora:34
+
+ENV CEPHADM_PATH=/usr/local/sbin/cephadm
+RUN ln -s /ceph/src/cephadm/cephadm.py $CEPHADM_PATH # NOTE: assume path of ceph volume
+
+# Don't include container-selinux and remove
+# directories used by yum that are just taking
+# up space.
+RUN dnf -y update; rpm --restore shadow-utils 2>/dev/null; \
+yum -y install podman fuse-overlayfs --exclude container-selinux; \
+rm -rf /var/cache /var/log/dnf* /var/log/yum.*
+
+RUN dnf install which firewalld chrony procps systemd openssh openssh-server openssh-clients sshpass lvm2 -y
+
+ADD https://raw.githubusercontent.com/containers/podman/main/contrib/podmanimage/stable/containers.conf /etc/containers/containers.conf
+ADD https://raw.githubusercontent.com/containers/podman/main/contrib/podmanimage/stable/podman-containers.conf /root/.config/containers/containers.conf
+
+RUN mkdir -p /root/.local/share/containers; # chown podman:podman -R /home/podman
+
+# Note VOLUME options must always happen after the chown call above
+# RUN commands can not modify existing volumes
+VOLUME /var/lib/containers
+VOLUME /root/.local/share/containers
+
+# chmod containers.conf and adjust storage.conf to enable Fuse storage.
+RUN chmod 644 /etc/containers/containers.conf; sed -i -e 's|^#mount_program|mount_program|g' -e '/additionalimage.*/a "/var/lib/shared",' -e 's|^mountopt[[:space:]]*=.*$|mountopt = "nodev,fsync=0"|g' /etc/containers/storage.conf
+RUN mkdir -p /var/lib/shared/overlay-images /var/lib/shared/overlay-layers /var/lib/shared/vfs-images /var/lib/shared/vfs-layers; touch /var/lib/shared/overlay-images/images.lock; touch /var/lib/shared/overlay-layers/layers.lock; touch /var/lib/shared/vfs-images/images.lock; touch /var/lib/shared/vfs-layers/layers.lock
+
+RUN echo 'root:root' | chpasswd
+
+RUN dnf install -y adjtimex # adjtimex syscall doesn't exist in fedora 35+ therefore we have to install it manually
+                    # so chronyd works
+RUN dnf install -y strace sysstat # debugging tools
+RUN dnf -y install hostname iproute udev
+ENV _CONTAINERS_USERNS_CONFIGURED=""
+
+RUN useradd podman; \
+echo podman:0:5000 > /etc/subuid; \
+echo podman:0:5000 > /etc/subgid; \
+echo root:0:65535 > /etc/subuid; \
+echo root:0:65535 > /etc/subgid;
+
+VOLUME /home/podman/.local/share/containers
+
+ADD https://raw.githubusercontent.com/containers/libpod/master/contrib/podmanimage/stable/containers.conf /etc/containers/containers.conf
+ADD https://raw.githubusercontent.com/containers/libpod/master/contrib/podmanimage/stable/podman-containers.conf /home/podman/.config/containers/containers.conf
+
+RUN chown podman:podman -R /home/podman
+
+RUN echo 'podman:podman' | chpasswd
+RUN touch /.box_container # empty file to check if inside a container
+
+EXPOSE 8443
+EXPOSE 22
+
+ENTRYPOINT ["/usr/sbin/init"]
diff --git a/src/cephadm/box/__init__.py b/src/cephadm/box/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/cephadm/box/__init__.py
diff --git a/src/cephadm/box/box.py b/src/cephadm/box/box.py
new file mode 100755
index 000000000..db2f24233
--- /dev/null
+++ b/src/cephadm/box/box.py
@@ -0,0 +1,414 @@
+#!/bin/python3
+import argparse
+import os
+import stat
+import json
+import sys
+import host
+import osd
+from multiprocessing import Process, Pool
+from util import (
+    BoxType,
+    Config,
+    Target,
+    ensure_inside_container,
+    ensure_outside_container,
+    get_boxes_container_info,
+    run_cephadm_shell_command,
+    run_dc_shell_command,
+    run_dc_shell_commands,
+    get_container_engine,
+    run_shell_command,
+    run_shell_commands,
+    ContainerEngine,
+    DockerEngine,
+    PodmanEngine,
+    colored,
+    engine,
+    engine_compose,
+    Colors,
+    get_seed_name
+)
+
+CEPH_IMAGE = 'quay.ceph.io/ceph-ci/ceph:main'
+BOX_IMAGE = 'cephadm-box:latest'
+
+# NOTE: this image tar is a trickeroo so cephadm won't pull the image everytime
+# we deploy a cluster. Keep in mind that you'll be responsible for pulling the
+# image yourself with `./box.py -v cluster setup`
+CEPH_IMAGE_TAR = 'docker/ceph/image/quay.ceph.image.tar'
+CEPH_ROOT = '../../../'
+DASHBOARD_PATH = '../../../src/pybind/mgr/dashboard/frontend/'
+
+root_error_msg = """
+WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+sudo with this script can kill your computer, try again without sudo
+if you value your time.
+"""
+
+def remove_ceph_image_tar():
+    if os.path.exists(CEPH_IMAGE_TAR):
+        os.remove(CEPH_IMAGE_TAR)
+
+
+def cleanup_box() -> None:
+    osd.cleanup_osds()
+    remove_ceph_image_tar()
+
+
+def image_exists(image_name: str):
+    # extract_tag
+    assert image_name.find(':')
+    image_name, tag = image_name.split(':')
+    engine = get_container_engine()
+    images = engine.run('image ls').split('\n')
+    IMAGE_NAME = 0
+    TAG = 1
+    for image in images:
+        image = image.split()
+        print(image)
+        print(image_name, tag)
+        if image[IMAGE_NAME] == image_name and image[TAG] == tag:
+            return True
+    return False
+
+
+def get_ceph_image():
+    print('Getting ceph image')
+    engine = get_container_engine()
+    engine.run(f'pull {CEPH_IMAGE}')
+    # update
+    engine.run(f'build -t {CEPH_IMAGE} docker/ceph')
+    if not os.path.exists('docker/ceph/image'):
+        os.mkdir('docker/ceph/image')
+
+    remove_ceph_image_tar()
+
+    engine.run(f'save {CEPH_IMAGE} -o {CEPH_IMAGE_TAR}')
+    run_shell_command(f'chmod 777 {CEPH_IMAGE_TAR}')
+    print('Ceph image added')
+
+
+def get_box_image():
+    print('Getting box image')
+    engine = get_container_engine()
+    engine.run(f'build -t cephadm-box -f {engine.dockerfile} .')
+    print('Box image added')
+
+def check_dashboard():
+    if not os.path.exists(os.path.join(CEPH_ROOT, 'dist')):
+        print(colored('Missing build in dashboard', Colors.WARNING))
+
+def check_cgroups():
+    if not os.path.exists('/sys/fs/cgroup/cgroup.controllers'):
+        print(colored('cgroups v1 is not supported', Colors.FAIL))
+        print('Enable cgroups v2 please')
+        sys.exit(666)
+
+def check_selinux():
+    selinux = run_shell_command('getenforce')
+    if 'Disabled' not in selinux:
+        print(colored('selinux should be disabled, please disable it if you '
+                       'don\'t want unexpected behaviour.', Colors.WARNING))
+def dashboard_setup():
+    command = f'cd {DASHBOARD_PATH} && npm install'
+    run_shell_command(command)
+    command = f'cd {DASHBOARD_PATH} && npm run build'
+    run_shell_command(command)
+
+class Cluster(Target):
+    _help = 'Manage docker cephadm boxes'
+    actions = ['bootstrap', 'start', 'down', 'list', 'bash', 'setup', 'cleanup']
+
+    def set_args(self):
+        self.parser.add_argument(
+            'action', choices=Cluster.actions, help='Action to perform on the box'
+        )
+        self.parser.add_argument('--osds', type=int, default=3, help='Number of osds')
+
+        self.parser.add_argument('--hosts', type=int, default=1, help='Number of hosts')
+        self.parser.add_argument('--skip-deploy-osds', action='store_true', help='skip deploy osd')
+        self.parser.add_argument('--skip-create-loop', action='store_true', help='skip create loopback device')
+        self.parser.add_argument('--skip-monitoring-stack', action='store_true', help='skip monitoring stack')
+        self.parser.add_argument('--skip-dashboard', action='store_true', help='skip dashboard')
+        self.parser.add_argument('--expanded', action='store_true', help='deploy 3 hosts and 3 osds')
+        self.parser.add_argument('--jobs', type=int, help='Number of jobs scheduled in parallel')
+
+    @ensure_outside_container
+    def setup(self):
+        check_cgroups()
+        check_selinux()
+
+        targets = [
+                get_ceph_image,
+                get_box_image,
+                dashboard_setup
+        ]
+        results = []
+        jobs = Config.get('jobs')
+        if jobs:
+            jobs = int(jobs)
+        else:
+            jobs = None
+        pool = Pool(jobs)
+        for target in targets:
+            results.append(pool.apply_async(target))
+
+        for result in results:
+            result.wait()
+
+
+    @ensure_outside_container
+    def cleanup(self):
+        cleanup_box()
+
+    @ensure_inside_container
+    def bootstrap(self):
+        print('Running bootstrap on seed')
+        cephadm_path = str(os.environ.get('CEPHADM_PATH'))
+
+        engine = get_container_engine()
+        if isinstance(engine, DockerEngine):
+            engine.restart()
+        st = os.stat(cephadm_path)
+        os.chmod(cephadm_path, st.st_mode | stat.S_IEXEC)
+
+        engine.run('load < /cephadm/box/docker/ceph/image/quay.ceph.image.tar')
+        # cephadm guid error because it sometimes tries to use quay.ceph.io/ceph-ci/ceph:<none>
+        # instead of main branch's tag
+        run_shell_command('export CEPH_SOURCE_FOLDER=/ceph')
+        run_shell_command('export CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:main')
+        run_shell_command(
+            'echo "export CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:main" >> ~/.bashrc'
+        )
+
+        extra_args = []
+
+        extra_args.append('--skip-pull')
+
+        # cephadm prints in warning, let's redirect it to the output so shell_command doesn't
+        # complain
+        extra_args.append('2>&0')
+
+        extra_args = ' '.join(extra_args)
+        skip_monitoring_stack = (
+            '--skip-monitoring-stack' if Config.get('skip-monitoring-stack') else ''
+        )
+        skip_dashboard = '--skip-dashboard' if Config.get('skip-dashboard') else ''
+
+        fsid = Config.get('fsid')
+        config_folder = str(Config.get('config_folder'))
+        config = str(Config.get('config'))
+        keyring = str(Config.get('keyring'))
+        if not os.path.exists(config_folder):
+            os.mkdir(config_folder)
+
+        cephadm_bootstrap_command = (
+            '$CEPHADM_PATH --verbose bootstrap '
+            '--mon-ip "$(hostname -i)" '
+            '--allow-fqdn-hostname '
+            '--initial-dashboard-password admin '
+            '--dashboard-password-noupdate '
+            '--shared_ceph_folder /ceph '
+            '--allow-overwrite '
+            f'--output-config {config} '
+            f'--output-keyring {keyring} '
+            f'--output-config {config} '
+            f'--fsid "{fsid}" '
+            '--log-to-file '
+            f'{skip_dashboard} '
+            f'{skip_monitoring_stack} '
+            f'{extra_args} '
+        )
+
+        print('Running cephadm bootstrap...')
+        run_shell_command(cephadm_bootstrap_command, expect_exit_code=120) 
+        print('Cephadm bootstrap complete')
+
+        run_shell_command('sudo vgchange --refresh')
+        run_shell_command('cephadm ls')
+        run_shell_command('ln -s /ceph/src/cephadm/box/box.py /usr/bin/box')
+
+        run_cephadm_shell_command('ceph -s')
+
+        print('Bootstrap completed!')
+
+    @ensure_outside_container
+    def start(self):
+        check_cgroups()
+        check_selinux()
+        osds = int(Config.get('osds'))
+        hosts = int(Config.get('hosts'))
+        engine = get_container_engine()
+
+        # ensure boxes don't exist
+        self.down()
+
+        # podman is ran without sudo
+        if isinstance(engine, PodmanEngine):
+            I_am = run_shell_command('whoami')
+            if 'root' in I_am:
+                print(root_error_msg)
+                sys.exit(1)
+
+        print('Checking docker images')
+        if not image_exists(CEPH_IMAGE):
+            get_ceph_image()
+        if not image_exists(BOX_IMAGE):
+            get_box_image()
+
+        used_loop = ""
+        if not Config.get('skip_create_loop'):
+            print('Creating OSD devices...')
+            used_loop = osd.create_loopback_devices(osds)
+            print(f'Added {osds} logical volumes in a loopback device')
+
+        print('Starting containers')
+
+        engine.up(hosts)
+
+        containers = engine.get_containers()
+        seed = engine.get_seed()
+        # Umounting somehow brings back the contents of the host /sys/dev/block. 
+        # On startup /sys/dev/block is empty. After umount, we can see symlinks again
+        # so that lsblk is able to run as expected
+        run_dc_shell_command('umount /sys/dev/block', seed)
+
+        run_shell_command('sudo sysctl net.ipv4.conf.all.forwarding=1')
+        run_shell_command('sudo iptables -P FORWARD ACCEPT')
+
+        # don't update clock with chronyd / setup chronyd on all boxes
+        chronyd_setup = """
+        sed 's/$OPTIONS/-x/g' /usr/lib/systemd/system/chronyd.service -i
+        systemctl daemon-reload
+        systemctl start chronyd
+        systemctl status --no-pager chronyd
+        """
+        for container in containers:
+            print(colored('Got container:', Colors.OKCYAN), str(container))
+        for container in containers:
+            run_dc_shell_commands(chronyd_setup, container)
+
+        print('Seting up host ssh servers')
+        for container in containers:
+            print(colored('Setting up ssh server for:', Colors.OKCYAN), str(container))
+            host._setup_ssh(container)
+
+        verbose = '-v' if Config.get('verbose') else ''
+        skip_deploy = '--skip-deploy-osds' if Config.get('skip-deploy-osds') else ''
+        skip_monitoring_stack = (
+            '--skip-monitoring-stack' if Config.get('skip-monitoring-stack') else ''
+        )
+        skip_dashboard = '--skip-dashboard' if Config.get('skip-dashboard') else ''
+        box_bootstrap_command = (
+            f'/cephadm/box/box.py {verbose} --engine {engine.command} cluster bootstrap '
+            f'--osds {osds} '
+            f'--hosts {hosts} '
+            f'{skip_deploy} '
+            f'{skip_dashboard} '
+            f'{skip_monitoring_stack} '
+        )
+        print(box_bootstrap_command)
+        run_dc_shell_command(box_bootstrap_command, seed)
+
+        expanded = Config.get('expanded')
+        if expanded:
+            info = get_boxes_container_info()
+            ips = info['ips']
+            hostnames = info['hostnames']
+            print(ips)
+            if hosts > 0:
+                host._copy_cluster_ssh_key(ips)
+                host._add_hosts(ips, hostnames)
+            if not Config.get('skip-deploy-osds'):
+                print('Deploying osds... This could take up to minutes')
+                osd.deploy_osds(osds)
+                print('Osds deployed')
+
+
+        dashboard_ip = 'localhost'
+        info = get_boxes_container_info(with_seed=True)
+        if isinstance(engine, DockerEngine):
+            for i in range(info['size']):
+                if get_seed_name() in info['container_names'][i]:
+                    dashboard_ip = info["ips"][i]
+        print(colored(f'dashboard available at https://{dashboard_ip}:8443', Colors.OKGREEN))
+
+        print('Bootstrap finished successfully')
+
+    @ensure_outside_container
+    def down(self):
+        engine = get_container_engine()
+        if isinstance(engine, PodmanEngine):
+            containers = json.loads(engine.run('container ls --format json'))
+            for container in containers:
+                for name in container['Names']:
+                    if name.startswith('box_hosts_'):
+                        engine.run(f'container kill {name}')
+                        engine.run(f'container rm {name}')
+            pods = json.loads(engine.run('pod ls --format json'))
+            for pod in pods:
+                if 'Name' in pod and pod['Name'].startswith('box_pod_host'):
+                    name = pod['Name']
+                    engine.run(f'pod kill {name}')
+                    engine.run(f'pod rm {name}')
+        else:
+            run_shell_command(f'{engine_compose()} -f {Config.get("docker_yaml")} down')
+        print('Successfully killed all boxes')
+
+    @ensure_outside_container
+    def list(self):
+        info = get_boxes_container_info(with_seed=True)
+        for i in range(info['size']):
+            ip = info['ips'][i]
+            name = info['container_names'][i]
+            hostname = info['hostnames'][i]
+            print(f'{name} \t{ip} \t{hostname}')
+
+    @ensure_outside_container
+    def bash(self):
+        # we need verbose to see the prompt after running shell command
+        Config.set('verbose', True)
+        print('Seed bash')
+        engine = get_container_engine()
+        engine.run(f'exec -it {engine.seed_name} bash')
+
+
+targets = {
+    'cluster': Cluster,
+    'osd': osd.Osd,
+    'host': host.Host,
+}
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '-v', action='store_true', dest='verbose', help='be more verbose'
+    )
+    parser.add_argument(
+        '--engine', type=str, default='podman',
+        dest='engine', help='choose engine between "docker" and "podman"'
+    )
+
+    subparsers = parser.add_subparsers()
+    target_instances = {}
+    for name, target in targets.items():
+        target_instances[name] = target(None, subparsers)
+
+    for count, arg in enumerate(sys.argv, 1):
+        if arg in targets:
+            instance = target_instances[arg]
+            if hasattr(instance, 'main'):
+                instance.argv = sys.argv[count:]
+                instance.set_args()
+                args = parser.parse_args()
+                Config.add_args(vars(args))
+                instance.main()
+                sys.exit(0)
+
+    parser.print_help()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/cephadm/box/daemon.json b/src/cephadm/box/daemon.json
new file mode 100644
index 000000000..5cfcaa87f
--- /dev/null
+++ b/src/cephadm/box/daemon.json
@@ -0,0 +1,3 @@
+{
+    "storage-driver": "fuse-overlayfs"
+}
diff --git a/src/cephadm/box/docker-compose-docker.yml b/src/cephadm/box/docker-compose-docker.yml
new file mode 100644
index 000000000..fdecf6677
--- /dev/null
+++ b/src/cephadm/box/docker-compose-docker.yml
@@ -0,0 +1,39 @@
+version: "2.4"
+services:
+  cephadm-host-base:
+    build:
+      context: .
+    environment:
+      - CEPH_BRANCH=master
+    image: cephadm-box
+    privileged: true
+    stop_signal: RTMIN+3
+    volumes:
+      - ../../../:/ceph
+      - ..:/cephadm
+      - ./daemon.json:/etc/docker/daemon.json
+      # dangerous, maybe just map the loopback
+      # https://stackoverflow.com/questions/36880565/why-dont-my-udev-rules-work-inside-of-a-running-docker-container
+      - /dev:/dev
+    networks:
+      - public
+    mem_limit: "20g"
+    scale: -1
+  seed:
+    extends:
+      service: cephadm-host-base
+    ports:
+      - "3000:3000"
+      - "8443:8443"
+      - "9095:9095"
+    scale: 1
+  hosts:
+    extends:
+      service: cephadm-host-base
+    scale: 3
+
+
+volumes:
+  var-lib-docker:
+networks:
+  public:
diff --git a/src/cephadm/box/docker-compose.cgroup1.yml b/src/cephadm/box/docker-compose.cgroup1.yml
new file mode 100644
index 000000000..ea23dec1e
--- /dev/null
+++ b/src/cephadm/box/docker-compose.cgroup1.yml
@@ -0,0 +1,10 @@
+version: "2.4"
+
+# If cgroups v2 is disabled then add cgroup fs
+services:
+        seed:
+            volumes:
+                - "/sys/fs/cgroup:/sys/fs/cgroup:ro"
+        hosts:
+            volumes:
+                - "/sys/fs/cgroup:/sys/fs/cgroup:ro"
diff --git a/src/cephadm/box/docker/ceph/.bashrc b/src/cephadm/box/docker/ceph/.bashrc
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/cephadm/box/docker/ceph/.bashrc
diff --git a/src/cephadm/box/docker/ceph/Dockerfile b/src/cephadm/box/docker/ceph/Dockerfile
new file mode 100644
index 000000000..b950750e9
--- /dev/null
+++ b/src/cephadm/box/docker/ceph/Dockerfile
@@ -0,0 +1,3 @@
+FROM quay.ceph.io/ceph-ci/ceph:main
+RUN pip3 install packaging
+EXPOSE 8443
diff --git a/src/cephadm/box/docker/ceph/locale.conf b/src/cephadm/box/docker/ceph/locale.conf
new file mode 100644
index 000000000..00d76c8cd
--- /dev/null
+++ b/src/cephadm/box/docker/ceph/locale.conf
@@ -0,0 +1,2 @@
+LANG="en_US.UTF-8"
+LC_ALL="en_US.UTF-8"
diff --git a/src/cephadm/box/host.py b/src/cephadm/box/host.py
new file mode 100644
index 000000000..aae16d07f
--- /dev/null
+++ b/src/cephadm/box/host.py
@@ -0,0 +1,120 @@
+import os
+from typing import List, Union
+
+from util import (
+    Config,
+    HostContainer,
+    Target,
+    get_boxes_container_info,
+    get_container_engine,
+    inside_container,
+    run_cephadm_shell_command,
+    run_dc_shell_command,
+    run_shell_command,
+    engine,
+    BoxType
+)
+
+
+def _setup_ssh(container: HostContainer):
+    if inside_container():
+        if not os.path.exists('/root/.ssh/known_hosts'):
+            run_shell_command('echo "y" | ssh-keygen -b 2048 -t rsa -f /root/.ssh/id_rsa -q -N ""', 
+                              expect_error=True)
+
+        run_shell_command('echo "root:root" | chpasswd')
+        with open('/etc/ssh/sshd_config', 'a+') as f:
+            f.write('PermitRootLogin yes\n')
+            f.write('PasswordAuthentication yes\n')
+            f.flush()
+        run_shell_command('systemctl restart sshd')
+    else:
+        print('Redirecting to _setup_ssh to container')
+        verbose = '-v' if Config.get('verbose') else ''
+        run_dc_shell_command(
+            f'/cephadm/box/box.py {verbose} --engine {engine()} host setup_ssh {container.name}',
+            container
+        )
+
+
+def _add_hosts(ips: Union[List[str], str], hostnames: Union[List[str], str]):
+    if inside_container():
+        assert len(ips) == len(hostnames)
+        for i in range(len(ips)):
+            run_cephadm_shell_command(f'ceph orch host add {hostnames[i]} {ips[i]}')
+    else:
+        print('Redirecting to _add_hosts to container')
+        verbose = '-v' if Config.get('verbose') else ''
+        print(ips)
+        ips = ' '.join(ips)
+        ips = f'{ips}'
+        hostnames = ' '.join(hostnames)
+        hostnames = f'{hostnames}'
+        seed = get_container_engine().get_seed()
+        run_dc_shell_command(
+                f'/cephadm/box/box.py {verbose} --engine {engine()} host add_hosts {seed.name} --ips {ips} --hostnames {hostnames}',
+                seed
+                )
+
+
+def _copy_cluster_ssh_key(ips: Union[List[str], str]):
+    if inside_container():
+        local_ip = run_shell_command('hostname -i')
+        for ip in ips:
+            if ip != local_ip:
+                run_shell_command(
+                    (
+                        'sshpass -p "root" ssh-copy-id -f '
+                        f'-o StrictHostKeyChecking=no -i /etc/ceph/ceph.pub "root@{ip}"'
+                    )
+                )
+
+    else:
+        print('Redirecting to _copy_cluster_ssh to container')
+        verbose = '-v' if Config.get('verbose') else ''
+        print(ips)
+        ips = ' '.join(ips)
+        ips = f'{ips}'
+        # assume we only have one seed
+        seed = get_container_engine().get_seed()
+        run_dc_shell_command(
+            f'/cephadm/box/box.py {verbose} --engine {engine()} host copy_cluster_ssh_key {seed.name} --ips {ips}',
+            seed
+        )
+
+
+class Host(Target):
+    _help = 'Run seed/host related commands'
+    actions = ['setup_ssh', 'copy_cluster_ssh_key', 'add_hosts']
+
+    def set_args(self):
+        self.parser.add_argument('action', choices=Host.actions)
+        self.parser.add_argument(
+            'container_name', 
+            type=str, 
+            help='box_{type}_{index}. In docker, type can be seed or hosts. In podman only hosts.'
+        )
+        self.parser.add_argument('--ips', nargs='*', help='List of host ips')
+        self.parser.add_argument(
+            '--hostnames', nargs='*', help='List of hostnames ips(relative to ip list)'
+        )
+
+    def setup_ssh(self):
+        container_name = Config.get('container_name')
+        engine = get_container_engine()
+        _setup_ssh(engine.get_container(container_name))
+
+    def add_hosts(self):
+        ips = Config.get('ips')
+        if not ips:
+            ips = get_boxes_container_info()['ips']
+        hostnames = Config.get('hostnames')
+        if not hostnames:
+            hostnames = get_boxes_container_info()['hostnames']
+        _add_hosts(ips, hostnames)
+
+    def copy_cluster_ssh_key(self):
+        ips = Config.get('ips')
+        if not ips:
+            ips = get_boxes_container_info()['ips']
+        _copy_cluster_ssh_key(ips)
diff --git a/src/cephadm/box/osd.py b/src/cephadm/box/osd.py
new file mode 100644
index 000000000..827a4de36
--- /dev/null
+++ b/src/cephadm/box/osd.py
@@ -0,0 +1,157 @@
+import json
+import os
+import time
+import re
+from typing import Dict
+
+from util import (
+    BoxType,
+    Config,
+    Target,
+    ensure_inside_container,
+    ensure_outside_container,
+    get_orch_hosts,
+    run_cephadm_shell_command,
+    run_dc_shell_command,
+    get_container_engine,
+    run_shell_command,
+)
+
+DEVICES_FILE="./devices.json"
+
+def remove_loop_img() -> None:
+    loop_image = Config.get('loop_img')
+    if os.path.exists(loop_image):
+        os.remove(loop_image)
+
+def create_loopback_devices(osds: int) -> Dict[int, Dict[str, str]]:
+    assert osds
+    cleanup_osds()
+    osd_devs = dict()
+
+    for i in range(osds):
+        img_name = f'osd{i}'
+        loop_dev = create_loopback_device(img_name)
+        osd_devs[i] = dict(img_name=img_name, device=loop_dev)
+    with open(DEVICES_FILE, 'w') as dev_file:
+        dev_file.write(json.dumps(osd_devs))
+    return osd_devs
+
+def create_loopback_device(img_name, size_gb=5):
+    loop_img_dir = Config.get('loop_img_dir')
+    run_shell_command(f'mkdir -p {loop_img_dir}')
+    loop_img = os.path.join(loop_img_dir, img_name)
+    run_shell_command(f'rm -f {loop_img}')
+    run_shell_command(f'dd if=/dev/zero of={loop_img} bs=1 count=0 seek={size_gb}G')
+    loop_dev = run_shell_command(f'sudo losetup -f')
+    if not os.path.exists(loop_dev):
+        dev_minor = re.match(r'\/dev\/[^\d]+(\d+)', loop_dev).groups()[0]
+        run_shell_command(f'sudo mknod -m777 {loop_dev} b 7 {dev_minor}')
+        run_shell_command(f'sudo chown {os.getuid()}:{os.getgid()} {loop_dev}')
+    if os.path.ismount(loop_dev):
+        os.umount(loop_dev)
+    run_shell_command(f'sudo losetup {loop_dev} {loop_img}')
+    run_shell_command(f'sudo chown {os.getuid()}:{os.getgid()} {loop_dev}')
+    return loop_dev
+
+
+def get_lvm_osd_data(data: str) -> Dict[str, str]:
+    osd_lvm_info = run_cephadm_shell_command(f'ceph-volume lvm list {data}')
+    osd_data = {}
+    for line in osd_lvm_info.split('\n'):
+        line = line.strip()
+        if not line:
+            continue
+        line = line.split()
+        if line[0].startswith('===') or line[0].startswith('[block]'):
+            continue
+        # "block device" key -> "block_device"
+        key = '_'.join(line[:-1])
+        osd_data[key] = line[-1]
+    return osd_data
+
+def load_osd_devices():
+    if not os.path.exists(DEVICES_FILE):
+        return dict()
+    with open(DEVICES_FILE) as dev_file:
+        devs = json.loads(dev_file.read())
+    return devs
+
+
+@ensure_inside_container
+def deploy_osd(data: str, hostname: str) -> bool:
+    out = run_cephadm_shell_command(f'ceph orch daemon add osd {hostname}:{data} raw')
+    return 'Created osd(s)' in out
+
+
+def cleanup_osds() -> None:
+    loop_img_dir = Config.get('loop_img_dir')
+    osd_devs = load_osd_devices()
+    for osd in osd_devs.values():
+        device = osd['device']
+        if 'loop' in device:
+            loop_img = os.path.join(loop_img_dir, osd['img_name'])
+            run_shell_command(f'sudo losetup -d {device}', expect_error=True)
+            if os.path.exists(loop_img):
+                os.remove(loop_img)
+    run_shell_command(f'rm -rf {loop_img_dir}')
+
+
+def deploy_osds(count: int):
+    osd_devs = load_osd_devices()
+    hosts = get_orch_hosts()
+    host_index = 0
+    seed = get_container_engine().get_seed()
+    v = '-v' if Config.get('verbose') else ''
+    for osd in osd_devs.values():
+        deployed = False
+        while not deployed:
+            print(hosts)
+            hostname = hosts[host_index]['hostname']
+            deployed = run_dc_shell_command(
+                f'/cephadm/box/box.py {v} osd deploy --data {osd["device"]} --hostname {hostname}',
+                seed
+            )
+            deployed = 'created osd' in deployed.lower() or 'already created?' in deployed.lower()
+            print('Waiting 5 seconds to re-run deploy osd...')
+            time.sleep(5)
+        host_index = (host_index + 1) % len(hosts)
+
+
+class Osd(Target):
+    _help = """
+    Deploy osds and create needed block devices with loopback devices:
+    Actions:
+    - deploy: Deploy an osd given a block device
+    - create_loop: Create needed loopback devices and block devices in logical volumes
+    for a number of osds.
+    - destroy: Remove all osds and the underlying loopback devices.
+    """
+    actions = ['deploy', 'create_loop', 'destroy']
+
+    def set_args(self):
+        self.parser.add_argument('action', choices=Osd.actions)
+        self.parser.add_argument('--data', type=str, help='path to a block device')
+        self.parser.add_argument('--hostname', type=str, help='host to deploy osd')
+        self.parser.add_argument('--osds', type=int, default=0, help='number of osds')
+
+    def deploy(self):
+        data = Config.get('data')
+        hostname = Config.get('hostname')
+        if not hostname:
+            # assume this host
+            hostname = run_shell_command('hostname')
+        if not data:
+            deploy_osds(Config.get('osds'))
+        else:
+            deploy_osd(data, hostname)
+
+    @ensure_outside_container
+    def create_loop(self):
+        osds = Config.get('osds')
+        create_loopback_devices(int(osds))
+        print('Successfully created loopback devices')
+
+    @ensure_outside_container
+    def destroy(self):
+        cleanup_osds()
diff --git a/src/cephadm/box/util.py b/src/cephadm/box/util.py
new file mode 100644
index 000000000..7dcf883f8
--- /dev/null
+++ b/src/cephadm/box/util.py
@@ -0,0 +1,421 @@
+import json
+import os
+import subprocess
+import sys
+import copy
+from abc import ABCMeta, abstractmethod
+from enum import Enum
+from typing import Any, Callable, Dict, List
+
+class Colors:
+    HEADER = '\033[95m'
+    OKBLUE = '\033[94m'
+    OKCYAN = '\033[96m'
+    OKGREEN = '\033[92m'
+    WARNING = '\033[93m'
+    FAIL = '\033[91m'
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+
+class Config:
+    args = {
+        'fsid': '00000000-0000-0000-0000-0000deadbeef',
+        'config_folder': '/etc/ceph/',
+        'config': '/etc/ceph/ceph.conf',
+        'keyring': '/etc/ceph/ceph.keyring',
+        'loop_img': 'loop-images/loop.img',
+        'engine': 'podman',
+        'docker_yaml': 'docker-compose-docker.yml',
+        'docker_v1_yaml': 'docker-compose.cgroup1.yml',
+        'podman_yaml': 'docker-compose-podman.yml',
+        'loop_img_dir': 'loop-images',
+    }
+
+    @staticmethod
+    def set(key, value):
+        Config.args[key] = value
+
+    @staticmethod
+    def get(key):
+        if key in Config.args:
+            return Config.args[key]
+        return None
+
+    @staticmethod
+    def add_args(args: Dict[str, str]) -> None:
+        Config.args.update(args)
+
+class Target:
+    def __init__(self, argv, subparsers):
+        self.argv = argv
+        self.parser = subparsers.add_parser(
+            self.__class__.__name__.lower(), help=self.__class__._help
+        )
+
+    def set_args(self):
+        """
+        adding the required arguments of the target should go here, example:
+        self.parser.add_argument(..)
+        """
+        raise NotImplementedError()
+
+    def main(self):
+        """
+        A target will be setup by first calling this main function
+        where the parser is initialized.
+        """
+        args = self.parser.parse_args(self.argv)
+        Config.add_args(vars(args))
+        function = getattr(self, args.action)
+        function()
+
+
+def ensure_outside_container(func) -> Callable:
+    def wrapper(*args, **kwargs):
+        if not inside_container():
+            return func(*args, **kwargs)
+        else:
+            raise RuntimeError('This command should be ran outside a container')
+
+    return wrapper
+
+
+def ensure_inside_container(func) -> bool:
+    def wrapper(*args, **kwargs):
+        if inside_container():
+            return func(*args, **kwargs)
+        else:
+            raise RuntimeError('This command should be ran inside a container')
+
+    return wrapper
+
+
+def colored(msg, color: Colors):
+    return color + msg + Colors.ENDC
+
+class BoxType(str, Enum):
+  SEED = 'seed'
+  HOST = 'host'
+
+class HostContainer:
+    def __init__(self, _name, _type) -> None:
+        self._name: str = _name
+        self._type: BoxType = _type
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @property
+    def type(self) -> BoxType:
+        return self._type
+    def __str__(self) -> str:
+        return f'{self.name} {self.type}'
+
+def run_shell_command(command: str, expect_error=False, verbose=True, expect_exit_code=0) -> str:
+    if Config.get('verbose'):
+        print(f'{colored("Running command", Colors.HEADER)}: {colored(command, Colors.OKBLUE)}')
+
+    process = subprocess.Popen(
+        command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+    )
+
+    out = ''
+    err = ''
+    # let's read when output comes so it is in real time
+    while True:
+        # TODO: improve performance of this part, I think this part is a problem
+        pout = process.stdout.read(1).decode('latin1')
+        if pout == '' and process.poll() is not None:
+            break
+        if pout:
+            if Config.get('verbose') and verbose:
+                sys.stdout.write(pout)
+                sys.stdout.flush()
+            out += pout
+
+    process.wait()
+
+    err += process.stderr.read().decode('latin1').strip()
+    out = out.strip()
+
+    if process.returncode != 0 and not expect_error and process.returncode != expect_exit_code:
+        err = colored(err, Colors.FAIL);
+        
+        raise RuntimeError(f'Failed command: {command}\n{err}\nexit code: {process.returncode}')
+        sys.exit(1)
+    return out
+
+
+def run_dc_shell_commands(commands: str, container: HostContainer, expect_error=False) -> str:
+    for command in commands.split('\n'):
+        command = command.strip()
+        if not command:
+            continue
+        run_dc_shell_command(command.strip(), container, expect_error=expect_error)
+
+def run_shell_commands(commands: str, expect_error=False) -> str:
+    for command in commands.split('\n'):
+        command = command.strip()
+        if not command:
+            continue
+        run_shell_command(command, expect_error=expect_error)
+
+@ensure_inside_container
+def run_cephadm_shell_command(command: str, expect_error=False) -> str:
+    config = Config.get('config')
+    keyring = Config.get('keyring')
+    fsid = Config.get('fsid')
+
+    with_cephadm_image = 'CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:main'
+    out = run_shell_command(
+        f'{with_cephadm_image} cephadm --verbose shell --fsid {fsid} --config {config} --keyring {keyring} -- {command}',
+        expect_error,
+    )
+    return out
+
+
+def run_dc_shell_command(
+        command: str, container: HostContainer, expect_error=False
+) -> str:
+    out = get_container_engine().run_exec(container, command, expect_error=expect_error)
+    return out
+
+def inside_container() -> bool:
+    return os.path.exists('/.box_container')
+
+def get_container_id(container_name: str):
+    return run_shell_command(f"{engine()} ps | \grep " + container_name + " | awk '{ print $1 }'")
+
+def engine():
+    return Config.get('engine')
+
+def engine_compose():
+    return f'{engine()}-compose'
+
+def get_seed_name():
+    if engine() == 'docker':
+        return 'seed'
+    elif engine() == 'podman':
+        return 'box_hosts_0'
+    else:
+        print(f'unkown engine {engine()}')
+        sys.exit(1)
+
+
+@ensure_outside_container
+def get_boxes_container_info(with_seed: bool = False) -> Dict[str, Any]:
+    # NOTE: this could be cached
+    ips_query = engine() + " inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}} %tab% {{.Name}} %tab% {{.Config.Hostname}}' $("+ engine() + " ps -aq) --format json"
+    containers = json.loads(run_shell_command(ips_query, verbose=False))
+    # FIXME: if things get more complex a class representing a container info might be useful,
+    # for now representing data this way is faster.
+    info = {'size': 0, 'ips': [], 'container_names': [], 'hostnames': []}
+    for container in containers:
+        # Most commands use hosts only
+        name = container['Name']
+        if name.startswith('box_hosts'):
+            if not with_seed and name == get_seed_name():
+                continue
+            info['size'] += 1
+            print(container['NetworkSettings'])
+            if 'Networks' in container['NetworkSettings']:
+                info['ips'].append(container['NetworkSettings']['Networks']['box_network']['IPAddress'])
+            else:
+                info['ips'].append('n/a')
+            info['container_names'].append(name)
+            info['hostnames'].append(container['Config']['Hostname'])
+    return info
+
+
+def get_orch_hosts():
+    if inside_container():
+        orch_host_ls_out = run_cephadm_shell_command('ceph orch host ls --format json')
+    else:
+        orch_host_ls_out = run_dc_shell_command(f'cephadm shell --keyring /etc/ceph/ceph.keyring --config /etc/ceph/ceph.conf -- ceph orch host ls --format json', 
+                                                get_container_engine().get_seed())
+        sp = orch_host_ls_out.split('\n')
+        orch_host_ls_out  = sp[len(sp) - 1]
+    hosts = json.loads(orch_host_ls_out)
+    return hosts
+
+
+class ContainerEngine(metaclass=ABCMeta):
+    @property
+    @abstractmethod
+    def command(self) -> str: pass
+
+    @property
+    @abstractmethod
+    def seed_name(self) -> str: pass
+
+    @property
+    @abstractmethod
+    def dockerfile(self) -> str: pass
+
+    @property
+    def host_name_prefix(self) -> str: 
+        return 'box_hosts_'
+
+    @abstractmethod
+    def up(self, hosts: int): pass
+
+    def run_exec(self, container: HostContainer, command: str, expect_error: bool = False):
+        return run_shell_command(' '.join([self.command, 'exec', container.name, command]), 
+                                 expect_error=expect_error) 
+
+    def run(self, engine_command: str, expect_error: bool = False):
+        return run_shell_command(' '.join([self.command, engine_command]), expect_error=expect_error) 
+
+    def get_containers(self) -> List[HostContainer]:
+        ps_out = json.loads(run_shell_command('podman ps --format json'))
+        containers = [] 
+        for container in ps_out:
+            if not container['Names']:
+                raise RuntimeError(f'Container {container} missing name')
+            name = container['Names'][0]
+            if name == self.seed_name:
+                containers.append(HostContainer(name, BoxType.SEED))
+            elif name.startswith(self.host_name_prefix):
+                containers.append(HostContainer(name, BoxType.HOST))
+        return containers
+
+    def get_seed(self) -> HostContainer:
+        for container in self.get_containers():
+            if container.type == BoxType.SEED:
+                return container
+        raise RuntimeError('Missing seed container')
+
+    def get_container(self, container_name: str):
+        containers = self.get_containers()
+        for container in containers:
+            if container.name == container_name:
+                return container
+        return None
+
+
+    def restart(self):
+        pass
+
+
+class DockerEngine(ContainerEngine):
+    command = 'docker'
+    seed_name = 'seed'
+    dockerfile = 'DockerfileDocker'
+
+    def restart(self):
+        run_shell_command('systemctl restart docker')
+
+    def up(self, hosts: int):
+        dcflags = f'-f {Config.get("docker_yaml")}'
+        if not os.path.exists('/sys/fs/cgroup/cgroup.controllers'):
+            dcflags += f' -f {Config.get("docker_v1_yaml")}'
+        run_shell_command(f'{engine_compose()} {dcflags} up --scale hosts={hosts} -d')
+
+class PodmanEngine(ContainerEngine):
+    command = 'podman'
+    seed_name = 'box_hosts_0'
+    dockerfile = 'DockerfilePodman'
+
+    CAPS = [
+            "SYS_ADMIN",
+            "NET_ADMIN",
+            "SYS_TIME",
+            "SYS_RAWIO",
+            "MKNOD",
+            "NET_RAW",
+            "SETUID",
+            "SETGID",
+            "CHOWN",
+            "SYS_PTRACE",
+            "SYS_TTY_CONFIG",
+            "CAP_AUDIT_WRITE",
+            "CAP_AUDIT_CONTROL",
+            ]
+
+    VOLUMES = [
+                '../../../:/ceph:z',
+                '../:/cephadm:z',
+                '/run/udev:/run/udev',
+                '/sys/dev/block:/sys/dev/block',
+                '/sys/fs/cgroup:/sys/fs/cgroup:ro',
+                '/dev/fuse:/dev/fuse',
+                '/dev/disk:/dev/disk',
+                '/sys/devices/virtual/block:/sys/devices/virtual/block',
+                '/sys/block:/dev/block',
+                '/dev/mapper:/dev/mapper',
+                '/dev/mapper/control:/dev/mapper/control',
+            ]
+
+    TMPFS = ['/run', '/tmp']
+
+    # FIXME: right now we are assuming every service will be exposed through the seed, but this is far
+    # from the truth. Services can be deployed on different hosts so we need a system to manage this.
+    SEED_PORTS = [
+            8443, # dashboard
+            3000, # grafana
+            9093, # alertmanager
+            9095  # prometheus
+            ]
+
+
+    def setup_podman_env(self, hosts: int = 1, osd_devs={}):
+        network_name = 'box_network'
+        networks = run_shell_command('podman network ls')
+        if network_name not in networks:
+            run_shell_command(f'podman network create -d bridge {network_name}')
+
+        args = [
+                '--group-add', 'keep-groups', 
+                '--device', '/dev/fuse' ,
+                '-it' ,
+                '-d',
+                '-e', 'CEPH_BRANCH=main',
+                '--stop-signal', 'RTMIN+3'
+                ]
+
+        for cap in self.CAPS:
+            args.append('--cap-add')
+            args.append(cap)
+
+        for volume in self.VOLUMES:
+            args.append('-v')
+            args.append(volume)
+
+        for tmp in self.TMPFS:
+            args.append('--tmpfs')
+            args.append(tmp)
+
+
+        for osd_dev in osd_devs.values():
+            device = osd_dev["device"]
+            args.append('--device')
+            args.append(f'{device}:{device}')
+
+
+        for host in range(hosts+1): # 0 will be the seed
+            options = copy.copy(args)
+            options.append('--name')
+            options.append(f'box_hosts_{host}')
+            options.append('--network')
+            options.append(f'{network_name}')
+            if host == 0:
+                for port in self.SEED_PORTS:
+                    options.append('-p')
+                    options.append(f'{port}:{port}')
+
+            options.append('cephadm-box')
+            options = ' '.join(options)
+
+            run_shell_command(f'podman run {options}')
+
+    def up(self, hosts: int):
+        import osd
+        self.setup_podman_env(hosts=hosts, osd_devs=osd.load_osd_devices())
+
+def get_container_engine() -> ContainerEngine:
+    if engine() == 'docker':
+        return DockerEngine()
+    else:
+        return PodmanEngine()
diff --git a/src/cephadm/build.py b/src/cephadm/build.py
new file mode 100755
index 000000000..4264b814f
--- /dev/null
+++ b/src/cephadm/build.py
@@ -0,0 +1,204 @@
+#!/usr/bin/python3
+"""Build cephadm from one or more files into a standalone executable.
+"""
+# TODO: If cephadm is being built and packaged within a format such as RPM
+# do we have to do anything special wrt passing in the version
+# of python to build with? Even with the intermediate cmake layer?
+
+import argparse
+import compileall
+import logging
+import os
+import pathlib
+import shutil
+import subprocess
+import tempfile
+import sys
+
+HAS_ZIPAPP = False
+try:
+    import zipapp
+
+    HAS_ZIPAPP = True
+except ImportError:
+    pass
+
+
+log = logging.getLogger(__name__)
+
+
+_VALID_VERS_VARS = [
+    "CEPH_GIT_VER",
+    "CEPH_GIT_NICE_VER",
+    "CEPH_RELEASE",
+    "CEPH_RELEASE_NAME",
+    "CEPH_RELEASE_TYPE",
+]
+
+
+def _reexec(python):
+    """Switch to the selected version of python by exec'ing into the desired
+    python path.
+    Sets the _BUILD_PYTHON_SET env variable as a sentinel to indicate exec has
+    been performed.
+    """
+    env = os.environ.copy()
+    env["_BUILD_PYTHON_SET"] = python
+    os.execvpe(python, [python, __file__] + sys.argv[1:], env)
+
+
+def _did_rexec():
+    """Returns true if the process has already exec'ed into the desired python
+    version.
+    """
+    return bool(os.environ.get("_BUILD_PYTHON_SET", ""))
+
+
+def _build(dest, src, versioning_vars=None):
+    """Build the binary."""
+    os.chdir(src)
+    tempdir = pathlib.Path(tempfile.mkdtemp(suffix=".cephadm.build"))
+    log.debug("working in %s", tempdir)
+    try:
+        if os.path.isfile("requirements.txt"):
+            _install_deps(tempdir)
+        log.info("Copying contents")
+        # TODO: currently the only file relevant to a compiled cephadm is the
+        # cephadm.py file. Once cephadm is broken up into multiple py files
+        # (and possibly other libs from python-common, etc) we'll want some
+        # sort organized structure to track what gets copied into the
+        # dir to be zipped. For now we just have a simple call to copy
+        # (and rename) the one file we care about.
+        shutil.copy("cephadm.py", tempdir / "__main__.py")
+        if versioning_vars:
+            generate_version_file(versioning_vars, tempdir / "_version.py")
+        _compile(dest, tempdir)
+    finally:
+        shutil.rmtree(tempdir)
+
+
+def _compile(dest, tempdir):
+    """Compile the zipapp."""
+    log.info("Byte-compiling py to pyc")
+    compileall.compile_dir(
+        tempdir,
+        maxlevels=16,
+        legacy=True,
+        quiet=1,
+        workers=0,
+    )
+    # TODO we could explicitly pass a python version here
+    log.info("Constructing the zipapp file")
+    try:
+        zipapp.create_archive(
+            source=tempdir,
+            target=dest,
+            interpreter=sys.executable,
+            compressed=True,
+        )
+        log.info("Zipapp created with compression")
+    except TypeError:
+        # automatically fall back to uncompressed
+        zipapp.create_archive(
+            source=tempdir,
+            target=dest,
+            interpreter=sys.executable,
+        )
+        log.info("Zipapp created without compression")
+
+
+def _install_deps(tempdir):
+    """Install dependencies with pip."""
+    # TODO we could explicitly pass a python version here
+    log.info("Installing dependencies")
+    # apparently pip doesn't have an API, just a cli.
+    subprocess.check_call(
+        [
+            sys.executable,
+            "-m",
+            "pip",
+            "install",
+            "--requirement",
+            "requirements.txt",
+            "--target",
+            tempdir,
+        ]
+    )
+
+
+def generate_version_file(versioning_vars, dest):
+    log.info("Generating version file")
+    log.debug("versioning_vars=%r", versioning_vars)
+    with open(dest, "w") as fh:
+        print("# GENERATED FILE -- do not edit", file=fh)
+        for key, value in versioning_vars:
+            print(f"{key} = {value!r}", file=fh)
+
+
+def version_kv_pair(value):
+    if "=" not in value:
+        raise argparse.ArgumentTypeError(f"not a key=value pair: {value!r}")
+    key, value = value.split("=", 1)
+    if key not in _VALID_VERS_VARS:
+        raise argparse.ArgumentTypeError(f"Unexpected key: {key!r}")
+    return key, value
+
+
+def main():
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setFormatter(logging.Formatter("cephadm/build.py: %(message)s"))
+    log.addHandler(handler)
+    log.setLevel(logging.INFO)
+
+    log.debug("argv: %r", sys.argv)
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "dest", help="Destination path name for new cephadm binary"
+    )
+    parser.add_argument(
+        "--source", help="Directory containing cephadm sources"
+    )
+    parser.add_argument(
+        "--python", help="The path to the desired version of python"
+    )
+    parser.add_argument(
+        "--set-version-var",
+        "-S",
+        type=version_kv_pair,
+        dest="version_vars",
+        action="append",
+        help="Set a key=value pair in the generated version info file",
+    )
+    args = parser.parse_args()
+
+    if not _did_rexec() and args.python:
+        _reexec(args.python)
+
+    log.info(
+        "Python Version: {v.major}.{v.minor}.{v.micro}".format(
+            v=sys.version_info
+        )
+    )
+    log.info("Args: %s", vars(args))
+    if not HAS_ZIPAPP:
+        # Unconditionally display an error that the version of python
+        # lacks zipapp (probably too old).
+        print("error: zipapp module not found", file=sys.stderr)
+        print(
+            "(zipapp is available in Python 3.5 or later."
+            " are you using a new enough version?)",
+            file=sys.stderr,
+        )
+        sys.exit(2)
+    if args.source:
+        source = pathlib.Path(args.source).absolute()
+    else:
+        source = pathlib.Path(__file__).absolute().parent
+    dest = pathlib.Path(args.dest).absolute()
+    log.info("Source Dir: %s", source)
+    log.info("Destination Path: %s", dest)
+    _build(dest, source, versioning_vars=args.version_vars)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/cephadm/build.sh b/src/cephadm/build.sh
new file mode 100755
index 000000000..84b58f14f
--- /dev/null
+++ b/src/cephadm/build.sh
@@ -0,0 +1,5 @@
+#!/bin/bash -ex
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+exec python3 $SCRIPT_DIR/build.py "$@"
diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py
new file mode 100755
index 000000000..bcb82c4c4
--- /dev/null
+++ b/src/cephadm/cephadm.py
@@ -0,0 +1,10700 @@
+#!/usr/bin/python3
+
+import asyncio
+import asyncio.subprocess
+import argparse
+import datetime
+import fcntl
+import ipaddress
+import io
+import json
+import logging
+from logging.config import dictConfig
+import os
+import platform
+import pwd
+import random
+import shlex
+import shutil
+import socket
+import string
+import subprocess
+import sys
+import tempfile
+import time
+import errno
+import struct
+import ssl
+from enum import Enum
+from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO, Sequence, TypeVar, cast, Set, Iterable, TextIO, Generator
+
+import re
+import uuid
+
+from configparser import ConfigParser
+from contextlib import redirect_stdout, contextmanager
+from functools import wraps
+from glob import glob
+from io import StringIO
+from threading import Thread, Event
+from urllib.error import HTTPError, URLError
+from urllib.request import urlopen, Request
+from pathlib import Path
+
+FuncT = TypeVar('FuncT', bound=Callable)
+
+# Default container images -----------------------------------------------------
+DEFAULT_IMAGE = 'quay.io/ceph/ceph:v18'
+DEFAULT_IMAGE_IS_MAIN = False
+DEFAULT_IMAGE_RELEASE = 'reef'
+DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.43.0'
+DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0'
+DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0'
+DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.5.0'
+DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.25.0'
+DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:9.4.7'
+DEFAULT_HAPROXY_IMAGE = 'quay.io/ceph/haproxy:2.3'
+DEFAULT_KEEPALIVED_IMAGE = 'quay.io/ceph/keepalived:2.2.4'
+DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:0.0.1'
+DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1'
+DEFAULT_ELASTICSEARCH_IMAGE = 'quay.io/omrizeneva/elasticsearch:6.8.23'
+DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29'
+DEFAULT_JAEGER_AGENT_IMAGE = 'quay.io/jaegertracing/jaeger-agent:1.29'
+DEFAULT_JAEGER_QUERY_IMAGE = 'quay.io/jaegertracing/jaeger-query:1.29'
+DEFAULT_REGISTRY = 'docker.io'   # normalize unqualified digests to this
+# ------------------------------------------------------------------------------
+
+LATEST_STABLE_RELEASE = 'reef'
+DATA_DIR = '/var/lib/ceph'
+LOG_DIR = '/var/log/ceph'
+LOCK_DIR = '/run/cephadm'
+LOGROTATE_DIR = '/etc/logrotate.d'
+SYSCTL_DIR = '/etc/sysctl.d'
+UNIT_DIR = '/etc/systemd/system'
+CEPH_CONF_DIR = 'config'
+CEPH_CONF = 'ceph.conf'
+CEPH_PUBKEY = 'ceph.pub'
+CEPH_KEYRING = 'ceph.client.admin.keyring'
+CEPH_DEFAULT_CONF = f'/etc/ceph/{CEPH_CONF}'
+CEPH_DEFAULT_KEYRING = f'/etc/ceph/{CEPH_KEYRING}'
+CEPH_DEFAULT_PUBKEY = f'/etc/ceph/{CEPH_PUBKEY}'
+LOG_DIR_MODE = 0o770
+DATA_DIR_MODE = 0o700
+DEFAULT_MODE = 0o600
+CONTAINER_INIT = True
+MIN_PODMAN_VERSION = (2, 0, 2)
+CGROUPS_SPLIT_PODMAN_VERSION = (2, 1, 0)
+PIDS_LIMIT_UNLIMITED_PODMAN_VERSION = (3, 4, 1)
+CUSTOM_PS1 = r'[ceph: \u@\h \W]\$ '
+DEFAULT_TIMEOUT = None  # in seconds
+DEFAULT_RETRY = 15
+DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ'
+QUIET_LOG_LEVEL = 9  # DEBUG is 10, so using 9 to be lower level than DEBUG
+NO_DEPRECATED = False
+
+logger: logging.Logger = None  # type: ignore
+
+"""
+You can invoke cephadm in two ways:
+
+1. The normal way, at the command line.
+
+2. By piping the script to the python3 binary.  In this latter case, you should
+   prepend one or more lines to the beginning of the script.
+
+   For arguments,
+
+       injected_argv = [...]
+
+   e.g.,
+
+       injected_argv = ['ls']
+
+   For reading stdin from the '--config-json -' argument,
+
+       injected_stdin = '...'
+"""
+cached_stdin = None
+
+
+##################################
+
+
+async def run_func(func: Callable, cmd: str) -> subprocess.CompletedProcess:
+    logger.debug(f'running function {func.__name__}, with parms: {cmd}')
+    response = func(cmd)
+    return response
+
+
+async def concurrent_tasks(func: Callable, cmd_list: List[str]) -> List[Any]:
+    tasks = []
+    for cmd in cmd_list:
+        tasks.append(run_func(func, cmd))
+
+    data = await asyncio.gather(*tasks)
+
+    return data
+
+
+class EndPoint:
+    """EndPoint representing an ip:port format"""
+
+    def __init__(self, ip: str, port: int) -> None:
+        self.ip = ip
+        self.port = port
+
+    def __str__(self) -> str:
+        return f'{self.ip}:{self.port}'
+
+    def __repr__(self) -> str:
+        return f'{self.ip}:{self.port}'
+
+
+class ContainerInfo:
+    def __init__(self, container_id: str,
+                 image_name: str,
+                 image_id: str,
+                 start: str,
+                 version: str) -> None:
+        self.container_id = container_id
+        self.image_name = image_name
+        self.image_id = image_id
+        self.start = start
+        self.version = version
+
+    def __eq__(self, other: Any) -> bool:
+        if not isinstance(other, ContainerInfo):
+            return NotImplemented
+        return (self.container_id == other.container_id
+                and self.image_name == other.image_name
+                and self.image_id == other.image_id
+                and self.start == other.start
+                and self.version == other.version)
+
+
+class DeploymentType(Enum):
+    # Fresh deployment of a daemon.
+    DEFAULT = 'Deploy'
+    # Redeploying a daemon. Works the same as fresh
+    # deployment minus port checking.
+    REDEPLOY = 'Redeploy'
+    # Reconfiguring a daemon. Rewrites config
+    # files and potentially restarts daemon.
+    RECONFIG = 'Reconfig'
+
+
+class BaseConfig:
+
+    def __init__(self) -> None:
+        self.image: str = ''
+        self.docker: bool = False
+        self.data_dir: str = DATA_DIR
+        self.log_dir: str = LOG_DIR
+        self.logrotate_dir: str = LOGROTATE_DIR
+        self.sysctl_dir: str = SYSCTL_DIR
+        self.unit_dir: str = UNIT_DIR
+        self.verbose: bool = False
+        self.timeout: Optional[int] = DEFAULT_TIMEOUT
+        self.retry: int = DEFAULT_RETRY
+        self.env: List[str] = []
+        self.memory_request: Optional[int] = None
+        self.memory_limit: Optional[int] = None
+        self.log_to_journald: Optional[bool] = None
+
+        self.container_init: bool = CONTAINER_INIT
+        self.container_engine: Optional[ContainerEngine] = None
+
+    def set_from_args(self, args: argparse.Namespace) -> None:
+        argdict: Dict[str, Any] = vars(args)
+        for k, v in argdict.items():
+            if hasattr(self, k):
+                setattr(self, k, v)
+
+
+class CephadmContext:
+
+    def __init__(self) -> None:
+        self.__dict__['_args'] = None
+        self.__dict__['_conf'] = BaseConfig()
+
+    def set_args(self, args: argparse.Namespace) -> None:
+        self._conf.set_from_args(args)
+        self._args = args
+
+    def has_function(self) -> bool:
+        return 'func' in self._args
+
+    def __contains__(self, name: str) -> bool:
+        return hasattr(self, name)
+
+    def __getattr__(self, name: str) -> Any:
+        if '_conf' in self.__dict__ and hasattr(self._conf, name):
+            return getattr(self._conf, name)
+        elif '_args' in self.__dict__ and hasattr(self._args, name):
+            return getattr(self._args, name)
+        else:
+            return super().__getattribute__(name)
+
+    def __setattr__(self, name: str, value: Any) -> None:
+        if hasattr(self._conf, name):
+            setattr(self._conf, name, value)
+        elif hasattr(self._args, name):
+            setattr(self._args, name, value)
+        else:
+            super().__setattr__(name, value)
+
+
+class ContainerEngine:
+    def __init__(self) -> None:
+        self.path = find_program(self.EXE)
+
+    @property
+    def EXE(self) -> str:
+        raise NotImplementedError()
+
+    def __str__(self) -> str:
+        return f'{self.EXE} ({self.path})'
+
+
+class Podman(ContainerEngine):
+    EXE = 'podman'
+
+    def __init__(self) -> None:
+        super().__init__()
+        self._version: Optional[Tuple[int, ...]] = None
+
+    @property
+    def version(self) -> Tuple[int, ...]:
+        if self._version is None:
+            raise RuntimeError('Please call `get_version` first')
+        return self._version
+
+    def get_version(self, ctx: CephadmContext) -> None:
+        out, _, _ = call_throws(ctx, [self.path, 'version', '--format', '{{.Client.Version}}'], verbosity=CallVerbosity.QUIET)
+        self._version = _parse_podman_version(out)
+
+    def __str__(self) -> str:
+        version = '.'.join(map(str, self.version))
+        return f'{self.EXE} ({self.path}) version {version}'
+
+
+class Docker(ContainerEngine):
+    EXE = 'docker'
+
+
+CONTAINER_PREFERENCE = (Podman, Docker)  # prefer podman to docker
+
+
+# During normal cephadm operations (cephadm ls, gather-facts, etc ) we use:
+# stdout: for JSON output only
+# stderr: for error, debug, info, etc
+logging_config = {
+    'version': 1,
+    'disable_existing_loggers': True,
+    'formatters': {
+        'cephadm': {
+            'format': '%(asctime)s %(thread)x %(levelname)s %(message)s'
+        },
+    },
+    'handlers': {
+        'console': {
+            'level': 'INFO',
+            'class': 'logging.StreamHandler',
+        },
+        'log_file': {
+            'level': 'DEBUG',
+            'class': 'logging.handlers.WatchedFileHandler',
+            'formatter': 'cephadm',
+            'filename': '%s/cephadm.log' % LOG_DIR,
+        }
+    },
+    'loggers': {
+        '': {
+            'level': 'DEBUG',
+            'handlers': ['console', 'log_file'],
+        }
+    }
+}
+
+
+class ExcludeErrorsFilter(logging.Filter):
+    def filter(self, record: logging.LogRecord) -> bool:
+        """Only lets through log messages with log level below WARNING ."""
+        return record.levelno < logging.WARNING
+
+
+# When cephadm is used as standard binary (bootstrap, rm-cluster, etc) we use:
+# stdout: for debug and info
+# stderr: for errors and warnings
+interactive_logging_config = {
+    'version': 1,
+    'filters': {
+        'exclude_errors': {
+            '()': ExcludeErrorsFilter
+        }
+    },
+    'disable_existing_loggers': True,
+    'formatters': {
+        'cephadm': {
+            'format': '%(asctime)s %(thread)x %(levelname)s %(message)s'
+        },
+    },
+    'handlers': {
+        'console_stdout': {
+            'level': 'INFO',
+            'class': 'logging.StreamHandler',
+            'filters': ['exclude_errors'],
+            'stream': sys.stdout
+        },
+        'console_stderr': {
+            'level': 'WARNING',
+            'class': 'logging.StreamHandler',
+            'stream': sys.stderr
+        },
+        'log_file': {
+            'level': 'DEBUG',
+            'class': 'logging.handlers.WatchedFileHandler',
+            'formatter': 'cephadm',
+            'filename': '%s/cephadm.log' % LOG_DIR,
+        }
+    },
+    'loggers': {
+        '': {
+            'level': 'DEBUG',
+            'handlers': ['console_stdout', 'console_stderr', 'log_file'],
+        }
+    }
+}
+
+
+class termcolor:
+    yellow = '\033[93m'
+    red = '\033[31m'
+    end = '\033[0m'
+
+
+class Error(Exception):
+    pass
+
+
+class ClusterAlreadyExists(Exception):
+    pass
+
+
+class TimeoutExpired(Error):
+    pass
+
+
+class UnauthorizedRegistryError(Error):
+    pass
+
+##################################
+
+
+class Ceph(object):
+    daemons = ('mon', 'mgr', 'osd', 'mds', 'rgw', 'rbd-mirror',
+               'crash', 'cephfs-mirror', 'ceph-exporter')
+    gateways = ('iscsi', 'nfs', 'nvmeof')
+
+##################################
+
+
+class OSD(object):
+    @staticmethod
+    def get_sysctl_settings() -> List[str]:
+        return [
+            '# allow a large number of OSDs',
+            'fs.aio-max-nr = 1048576',
+            'kernel.pid_max = 4194304',
+        ]
+
+
+##################################
+
+
+class SNMPGateway:
+    """Defines an SNMP gateway between Prometheus and SNMP monitoring Frameworks"""
+    daemon_type = 'snmp-gateway'
+    SUPPORTED_VERSIONS = ['V2c', 'V3']
+    default_image = DEFAULT_SNMP_GATEWAY_IMAGE
+    DEFAULT_PORT = 9464
+    env_filename = 'snmp-gateway.conf'
+
+    def __init__(self,
+                 ctx: CephadmContext,
+                 fsid: str,
+                 daemon_id: Union[int, str],
+                 config_json: Dict[str, Any],
+                 image: Optional[str] = None) -> None:
+        self.ctx = ctx
+        self.fsid = fsid
+        self.daemon_id = daemon_id
+        self.image = image or SNMPGateway.default_image
+
+        self.uid = config_json.get('uid', 0)
+        self.gid = config_json.get('gid', 0)
+
+        self.destination = config_json.get('destination', '')
+        self.snmp_version = config_json.get('snmp_version', 'V2c')
+        self.snmp_community = config_json.get('snmp_community', 'public')
+        self.log_level = config_json.get('log_level', 'info')
+        self.snmp_v3_auth_username = config_json.get('snmp_v3_auth_username', '')
+        self.snmp_v3_auth_password = config_json.get('snmp_v3_auth_password', '')
+        self.snmp_v3_auth_protocol = config_json.get('snmp_v3_auth_protocol', '')
+        self.snmp_v3_priv_protocol = config_json.get('snmp_v3_priv_protocol', '')
+        self.snmp_v3_priv_password = config_json.get('snmp_v3_priv_password', '')
+        self.snmp_v3_engine_id = config_json.get('snmp_v3_engine_id', '')
+
+        self.validate()
+
+    @classmethod
+    def init(cls, ctx: CephadmContext, fsid: str,
+             daemon_id: Union[int, str]) -> 'SNMPGateway':
+        cfgs = fetch_configs(ctx)
+        assert cfgs  # assert some config data was found
+        return cls(ctx, fsid, daemon_id, cfgs, ctx.image)
+
+    @staticmethod
+    def get_version(ctx: CephadmContext, fsid: str, daemon_id: str) -> Optional[str]:
+        """Return the version of the notifier from it's http endpoint"""
+        path = os.path.join(ctx.data_dir, fsid, f'snmp-gateway.{daemon_id}', 'unit.meta')
+        try:
+            with open(path, 'r') as env:
+                metadata = json.loads(env.read())
+        except (OSError, json.JSONDecodeError):
+            return None
+
+        ports = metadata.get('ports', [])
+        if not ports:
+            return None
+
+        try:
+            with urlopen(f'http://127.0.0.1:{ports[0]}/') as r:
+                html = r.read().decode('utf-8').split('\n')
+        except (HTTPError, URLError):
+            return None
+
+        for h in html:
+            stripped = h.strip()
+            if stripped.startswith(('<pre>', '<PRE>')) and \
+               stripped.endswith(('</pre>', '</PRE>')):
+                # <pre>(version=1.2.1, branch=HEAD, revision=7...
+                return stripped.split(',')[0].split('version=')[1]
+
+        return None
+
+    @property
+    def port(self) -> int:
+        endpoints = fetch_tcp_ports(self.ctx)
+        if not endpoints:
+            return self.DEFAULT_PORT
+        return endpoints[0].port
+
+    def get_daemon_args(self) -> List[str]:
+        v3_args = []
+        base_args = [
+            f'--web.listen-address=:{self.port}',
+            f'--snmp.destination={self.destination}',
+            f'--snmp.version={self.snmp_version}',
+            f'--log.level={self.log_level}',
+            '--snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl'
+        ]
+
+        if self.snmp_version == 'V3':
+            # common auth settings
+            v3_args.extend([
+                '--snmp.authentication-enabled',
+                f'--snmp.authentication-protocol={self.snmp_v3_auth_protocol}',
+                f'--snmp.security-engine-id={self.snmp_v3_engine_id}'
+            ])
+            # authPriv setting is applied if we have a privacy protocol setting
+            if self.snmp_v3_priv_protocol:
+                v3_args.extend([
+                    '--snmp.private-enabled',
+                    f'--snmp.private-protocol={self.snmp_v3_priv_protocol}'
+                ])
+
+        return base_args + v3_args
+
+    @property
+    def data_dir(self) -> str:
+        return os.path.join(self.ctx.data_dir, self.ctx.fsid, f'{self.daemon_type}.{self.daemon_id}')
+
+    @property
+    def conf_file_path(self) -> str:
+        return os.path.join(self.data_dir, self.env_filename)
+
+    def create_daemon_conf(self) -> None:
+        """Creates the environment file holding 'secrets' passed to the snmp-notifier daemon"""
+        with write_new(self.conf_file_path) as f:
+            if self.snmp_version == 'V2c':
+                f.write(f'SNMP_NOTIFIER_COMMUNITY={self.snmp_community}\n')
+            else:
+                f.write(f'SNMP_NOTIFIER_AUTH_USERNAME={self.snmp_v3_auth_username}\n')
+                f.write(f'SNMP_NOTIFIER_AUTH_PASSWORD={self.snmp_v3_auth_password}\n')
+                if self.snmp_v3_priv_password:
+                    f.write(f'SNMP_NOTIFIER_PRIV_PASSWORD={self.snmp_v3_priv_password}\n')
+
+    def validate(self) -> None:
+        """Validate the settings
+
+        Raises:
+            Error: if the fsid doesn't look like an fsid
+            Error: if the snmp version is not supported
+            Error: destination IP and port address missing
+        """
+        if not is_fsid(self.fsid):
+            raise Error(f'not a valid fsid: {self.fsid}')
+
+        if self.snmp_version not in SNMPGateway.SUPPORTED_VERSIONS:
+            raise Error(f'not a valid snmp version: {self.snmp_version}')
+
+        if not self.destination:
+            raise Error('config is missing destination attribute(<ip>:<port>) of the target SNMP listener')
+
+
+##################################
+class Monitoring(object):
+    """Define the configs for the monitoring containers"""
+
+    port_map = {
+        'prometheus': [9095],  # Avoid default 9090, due to conflict with cockpit UI
+        'node-exporter': [9100],
+        'grafana': [3000],
+        'alertmanager': [9093, 9094],
+        'loki': [3100],
+        'promtail': [9080]
+    }
+
+    components = {
+        'prometheus': {
+            'image': DEFAULT_PROMETHEUS_IMAGE,
+            'cpus': '2',
+            'memory': '4GB',
+            'args': [
+                '--config.file=/etc/prometheus/prometheus.yml',
+                '--storage.tsdb.path=/prometheus',
+            ],
+            'config-json-files': [
+                'prometheus.yml',
+            ],
+        },
+        'loki': {
+            'image': DEFAULT_LOKI_IMAGE,
+            'cpus': '1',
+            'memory': '1GB',
+            'args': [
+                '--config.file=/etc/loki/loki.yml',
+            ],
+            'config-json-files': [
+                'loki.yml'
+            ],
+        },
+        'promtail': {
+            'image': DEFAULT_PROMTAIL_IMAGE,
+            'cpus': '1',
+            'memory': '1GB',
+            'args': [
+                '--config.file=/etc/promtail/promtail.yml',
+            ],
+            'config-json-files': [
+                'promtail.yml',
+            ],
+        },
+        'node-exporter': {
+            'image': DEFAULT_NODE_EXPORTER_IMAGE,
+            'cpus': '1',
+            'memory': '1GB',
+            'args': [
+                '--no-collector.timex'
+            ],
+        },
+        'grafana': {
+            'image': DEFAULT_GRAFANA_IMAGE,
+            'cpus': '2',
+            'memory': '4GB',
+            'args': [],
+            'config-json-files': [
+                'grafana.ini',
+                'provisioning/datasources/ceph-dashboard.yml',
+                'certs/cert_file',
+                'certs/cert_key',
+            ],
+        },
+        'alertmanager': {
+            'image': DEFAULT_ALERT_MANAGER_IMAGE,
+            'cpus': '2',
+            'memory': '2GB',
+            'args': [
+                '--cluster.listen-address=:{}'.format(port_map['alertmanager'][1]),
+            ],
+            'config-json-files': [
+                'alertmanager.yml',
+            ],
+            'config-json-args': [
+                'peers',
+            ],
+        },
+    }  # type: ignore
+
+    @staticmethod
+    def get_version(ctx, container_id, daemon_type):
+        # type: (CephadmContext, str, str) -> str
+        """
+        :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter"
+        """
+        assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter', 'loki', 'promtail')
+        cmd = daemon_type.replace('-', '_')
+        code = -1
+        err = ''
+        out = ''
+        version = ''
+        if daemon_type == 'alertmanager':
+            for cmd in ['alertmanager', 'prometheus-alertmanager']:
+                out, err, code = call(ctx, [
+                    ctx.container_engine.path, 'exec', container_id, cmd,
+                    '--version'
+                ], verbosity=CallVerbosity.QUIET)
+                if code == 0:
+                    break
+            cmd = 'alertmanager'  # reset cmd for version extraction
+        else:
+            out, err, code = call(ctx, [
+                ctx.container_engine.path, 'exec', container_id, cmd, '--version'
+            ], verbosity=CallVerbosity.QUIET)
+        if code == 0:
+            if err.startswith('%s, version ' % cmd):
+                version = err.split(' ')[2]
+            elif out.startswith('%s, version ' % cmd):
+                version = out.split(' ')[2]
+        return version
+
+##################################
+
+
+@contextmanager
+def write_new(
+    destination: Union[str, Path],
+    *,
+    owner: Optional[Tuple[int, int]] = None,
+    perms: Optional[int] = DEFAULT_MODE,
+    encoding: Optional[str] = None,
+) -> Generator[IO, None, None]:
+    """Write a new file in a robust manner, optionally specifying the owner,
+    permissions, or encoding. This function takes care to never leave a file in
+    a partially-written state due to a crash or power outage by writing to
+    temporary file and then renaming that temp file over to the final
+    destination once all data is written.  Note that the temporary files can be
+    leaked but only for a "crash" or power outage - regular exceptions will
+    clean up the temporary file.
+    """
+    destination = os.path.abspath(destination)
+    tempname = f'{destination}.new'
+    open_kwargs: Dict[str, Any] = {}
+    if encoding:
+        open_kwargs['encoding'] = encoding
+    try:
+        with open(tempname, 'w', **open_kwargs) as fh:
+            yield fh
+            fh.flush()
+            os.fsync(fh.fileno())
+            if owner is not None:
+                os.fchown(fh.fileno(), *owner)
+            if perms is not None:
+                os.fchmod(fh.fileno(), perms)
+    except Exception:
+        os.unlink(tempname)
+        raise
+    os.rename(tempname, destination)
+
+
+def populate_files(config_dir, config_files, uid, gid):
+    # type: (str, Dict, int, int) -> None
+    """create config files for different services"""
+    for fname in config_files:
+        config_file = os.path.join(config_dir, fname)
+        config_content = dict_get_join(config_files, fname)
+        logger.info('Write file: %s' % (config_file))
+        with write_new(config_file, owner=(uid, gid), encoding='utf-8') as f:
+            f.write(config_content)
+
+
+class NFSGanesha(object):
+    """Defines a NFS-Ganesha container"""
+
+    daemon_type = 'nfs'
+    entrypoint = '/usr/bin/ganesha.nfsd'
+    daemon_args = ['-F', '-L', 'STDERR']
+
+    required_files = ['ganesha.conf']
+
+    port_map = {
+        'nfs': 2049,
+    }
+
+    def __init__(self,
+                 ctx,
+                 fsid,
+                 daemon_id,
+                 config_json,
+                 image=DEFAULT_IMAGE):
+        # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
+        self.ctx = ctx
+        self.fsid = fsid
+        self.daemon_id = daemon_id
+        self.image = image
+
+        # config-json options
+        self.pool = dict_get(config_json, 'pool', require=True)
+        self.namespace = dict_get(config_json, 'namespace')
+        self.userid = dict_get(config_json, 'userid')
+        self.extra_args = dict_get(config_json, 'extra_args', [])
+        self.files = dict_get(config_json, 'files', {})
+        self.rgw = dict_get(config_json, 'rgw', {})
+
+        # validate the supplied args
+        self.validate()
+
+    @classmethod
+    def init(cls, ctx, fsid, daemon_id):
+        # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha
+        return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
+
+    def get_container_mounts(self, data_dir):
+        # type: (str) -> Dict[str, str]
+        mounts = dict()
+        mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
+        mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
+        mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z'
+        if self.rgw:
+            cluster = self.rgw.get('cluster', 'ceph')
+            rgw_user = self.rgw.get('user', 'admin')
+            mounts[os.path.join(data_dir, 'keyring.rgw')] = \
+                '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user)
+        return mounts
+
+    @staticmethod
+    def get_container_envs():
+        # type: () -> List[str]
+        envs = [
+            'CEPH_CONF=%s' % (CEPH_DEFAULT_CONF)
+        ]
+        return envs
+
+    @staticmethod
+    def get_version(ctx, container_id):
+        # type: (CephadmContext, str) -> Optional[str]
+        version = None
+        out, err, code = call(ctx,
+                              [ctx.container_engine.path, 'exec', container_id,
+                               NFSGanesha.entrypoint, '-v'],
+                              verbosity=CallVerbosity.QUIET)
+        if code == 0:
+            match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out)
+            if match:
+                version = match.group(1)
+        return version
+
+    def validate(self):
+        # type: () -> None
+        if not is_fsid(self.fsid):
+            raise Error('not an fsid: %s' % self.fsid)
+        if not self.daemon_id:
+            raise Error('invalid daemon_id: %s' % self.daemon_id)
+        if not self.image:
+            raise Error('invalid image: %s' % self.image)
+
+        # check for the required files
+        if self.required_files:
+            for fname in self.required_files:
+                if fname not in self.files:
+                    raise Error('required file missing from config-json: %s' % fname)
+
+        # check for an RGW config
+        if self.rgw:
+            if not self.rgw.get('keyring'):
+                raise Error('RGW keyring is missing')
+            if not self.rgw.get('user'):
+                raise Error('RGW user is missing')
+
+    def get_daemon_name(self):
+        # type: () -> str
+        return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+    def get_container_name(self, desc=None):
+        # type: (Optional[str]) -> str
+        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
+        if desc:
+            cname = '%s-%s' % (cname, desc)
+        return cname
+
+    def get_daemon_args(self):
+        # type: () -> List[str]
+        return self.daemon_args + self.extra_args
+
+    def create_daemon_dirs(self, data_dir, uid, gid):
+        # type: (str, int, int) -> None
+        """Create files under the container data dir"""
+        if not os.path.isdir(data_dir):
+            raise OSError('data_dir is not a directory: %s' % (data_dir))
+
+        logger.info('Creating ganesha config...')
+
+        # create the ganesha conf dir
+        config_dir = os.path.join(data_dir, 'etc/ganesha')
+        makedirs(config_dir, uid, gid, 0o755)
+
+        # populate files from the config-json
+        populate_files(config_dir, self.files, uid, gid)
+
+        # write the RGW keyring
+        if self.rgw:
+            keyring_path = os.path.join(data_dir, 'keyring.rgw')
+            with write_new(keyring_path, owner=(uid, gid)) as f:
+                f.write(self.rgw.get('keyring', ''))
+
+##################################
+
+
+class CephIscsi(object):
+    """Defines a Ceph-Iscsi container"""
+
+    daemon_type = 'iscsi'
+    entrypoint = '/usr/bin/rbd-target-api'
+
+    required_files = ['iscsi-gateway.cfg']
+
+    def __init__(self,
+                 ctx,
+                 fsid,
+                 daemon_id,
+                 config_json,
+                 image=DEFAULT_IMAGE):
+        # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
+        self.ctx = ctx
+        self.fsid = fsid
+        self.daemon_id = daemon_id
+        self.image = image
+
+        # config-json options
+        self.files = dict_get(config_json, 'files', {})
+
+        # validate the supplied args
+        self.validate()
+
+    @classmethod
+    def init(cls, ctx, fsid, daemon_id):
+        # type: (CephadmContext, str, Union[int, str]) -> CephIscsi
+        return cls(ctx, fsid, daemon_id,
+                   fetch_configs(ctx), ctx.image)
+
+    @staticmethod
+    def get_container_mounts(data_dir, log_dir):
+        # type: (str, str) -> Dict[str, str]
+        mounts = dict()
+        mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
+        mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
+        mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
+        mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
+        mounts[os.path.join(data_dir, 'tcmu-runner-entrypoint.sh')] = '/usr/local/scripts/tcmu-runner-entrypoint.sh'
+        mounts[log_dir] = '/var/log:z'
+        mounts['/dev'] = '/dev'
+        return mounts
+
+    @staticmethod
+    def get_container_binds():
+        # type: () -> List[List[str]]
+        binds = []
+        lib_modules = ['type=bind',
+                       'source=/lib/modules',
+                       'destination=/lib/modules',
+                       'ro=true']
+        binds.append(lib_modules)
+        return binds
+
+    @staticmethod
+    def get_version(ctx, container_id):
+        # type: (CephadmContext, str) -> Optional[str]
+        version = None
+        out, err, code = call(ctx,
+                              [ctx.container_engine.path, 'exec', container_id,
+                               '/usr/bin/python3', '-c',
+                               "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"],
+                              verbosity=CallVerbosity.QUIET)
+        if code == 0:
+            version = out.strip()
+        return version
+
+    def validate(self):
+        # type: () -> None
+        if not is_fsid(self.fsid):
+            raise Error('not an fsid: %s' % self.fsid)
+        if not self.daemon_id:
+            raise Error('invalid daemon_id: %s' % self.daemon_id)
+        if not self.image:
+            raise Error('invalid image: %s' % self.image)
+
+        # check for the required files
+        if self.required_files:
+            for fname in self.required_files:
+                if fname not in self.files:
+                    raise Error('required file missing from config-json: %s' % fname)
+
+    def get_daemon_name(self):
+        # type: () -> str
+        return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+    def get_container_name(self, desc=None):
+        # type: (Optional[str]) -> str
+        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
+        if desc:
+            cname = '%s-%s' % (cname, desc)
+        return cname
+
+    def create_daemon_dirs(self, data_dir, uid, gid):
+        # type: (str, int, int) -> None
+        """Create files under the container data dir"""
+        if not os.path.isdir(data_dir):
+            raise OSError('data_dir is not a directory: %s' % (data_dir))
+
+        logger.info('Creating ceph-iscsi config...')
+        configfs_dir = os.path.join(data_dir, 'configfs')
+        makedirs(configfs_dir, uid, gid, 0o755)
+
+        # set up the tcmu-runner entrypoint script
+        # to be mounted into the container. For more info
+        # on why we need this script, see the
+        # tcmu_runner_entrypoint_script function
+        self.files['tcmu-runner-entrypoint.sh'] = self.tcmu_runner_entrypoint_script()
+
+        # populate files from the config-json
+        populate_files(data_dir, self.files, uid, gid)
+
+        # we want the tcmu runner entrypoint script to be executable
+        # populate_files will give it 0o600 by default
+        os.chmod(os.path.join(data_dir, 'tcmu-runner-entrypoint.sh'), 0o700)
+
+    @staticmethod
+    def configfs_mount_umount(data_dir, mount=True):
+        # type: (str, bool) -> List[str]
+        mount_path = os.path.join(data_dir, 'configfs')
+        if mount:
+            cmd = 'if ! grep -qs {0} /proc/mounts; then ' \
+                  'mount -t configfs none {0}; fi'.format(mount_path)
+        else:
+            cmd = 'if grep -qs {0} /proc/mounts; then ' \
+                  'umount {0}; fi'.format(mount_path)
+        return cmd.split()
+
+    @staticmethod
+    def tcmu_runner_entrypoint_script() -> str:
+        # since we are having tcmu-runner be a background
+        # process in its systemd unit (rbd-target-api being
+        # the main process) systemd will not restart it when
+        # it fails. in order to try and get around that for now
+        # we can have a script mounted in the container that
+        # that attempts to do the restarting for us. This script
+        # can then become the entrypoint for the tcmu-runner
+        # container
+
+        # This is intended to be dropped for a better solution
+        # for at least the squid release onward
+        return """#!/bin/bash
+RUN_DIR=/var/run/tcmu-runner
+
+if [ ! -d "${RUN_DIR}" ] ; then
+    mkdir -p "${RUN_DIR}"
+fi
+
+rm -rf "${RUN_DIR}"/*
+
+while true
+do
+    touch "${RUN_DIR}"/start-up-$(date -Ins)
+    /usr/bin/tcmu-runner
+
+    # If we got around 3 kills/segfaults in the last minute,
+    # don't start anymore
+    if [ $(find "${RUN_DIR}" -type f -cmin -1 | wc -l) -ge 3 ] ; then
+        exit 0
+    fi
+
+    sleep 1
+done
+"""
+
+    def get_tcmu_runner_container(self):
+        # type: () -> CephContainer
+        # daemon_id, is used to generated the cid and pid files used by podman but as both tcmu-runner
+        # and rbd-target-api have the same daemon_id, it conflits and prevent the second container from
+        # starting. .tcmu runner is appended to the daemon_id to fix that.
+        tcmu_container = get_deployment_container(self.ctx, self.fsid, self.daemon_type, str(self.daemon_id) + '.tcmu')
+        # TODO: Eventually we don't want to run tcmu-runner through this script.
+        # This is intended to be a workaround backported to older releases
+        # and should eventually be removed in at least squid onward
+        tcmu_container.entrypoint = '/usr/local/scripts/tcmu-runner-entrypoint.sh'
+        tcmu_container.cname = self.get_container_name(desc='tcmu')
+        return tcmu_container
+
+
+##################################
+
+
+class CephNvmeof(object):
+    """Defines a Ceph-Nvmeof container"""
+
+    daemon_type = 'nvmeof'
+    required_files = ['ceph-nvmeof.conf']
+    default_image = DEFAULT_NVMEOF_IMAGE
+
+    def __init__(self,
+                 ctx,
+                 fsid,
+                 daemon_id,
+                 config_json,
+                 image=DEFAULT_NVMEOF_IMAGE):
+        # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
+        self.ctx = ctx
+        self.fsid = fsid
+        self.daemon_id = daemon_id
+        self.image = image
+
+        # config-json options
+        self.files = dict_get(config_json, 'files', {})
+
+        # validate the supplied args
+        self.validate()
+
+    @classmethod
+    def init(cls, ctx, fsid, daemon_id):
+        # type: (CephadmContext, str, Union[int, str]) -> CephNvmeof
+        return cls(ctx, fsid, daemon_id,
+                   fetch_configs(ctx), ctx.image)
+
+    @staticmethod
+    def get_container_mounts(data_dir: str) -> Dict[str, str]:
+        mounts = dict()
+        mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
+        mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
+        mounts[os.path.join(data_dir, 'ceph-nvmeof.conf')] = '/src/ceph-nvmeof.conf:z'
+        mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
+        mounts['/dev/hugepages'] = '/dev/hugepages'
+        mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio'
+        return mounts
+
+    @staticmethod
+    def get_container_binds():
+        # type: () -> List[List[str]]
+        binds = []
+        lib_modules = ['type=bind',
+                       'source=/lib/modules',
+                       'destination=/lib/modules',
+                       'ro=true']
+        binds.append(lib_modules)
+        return binds
+
+    @staticmethod
+    def get_version(ctx: CephadmContext, container_id: str) -> Optional[str]:
+        out, err, ret = call(ctx,
+                             [ctx.container_engine.path, 'inspect',
+                              '--format', '{{index .Config.Labels "io.ceph.version"}}',
+                              ctx.image])
+        version = None
+        if ret == 0:
+            version = out.strip()
+        return version
+
+    def validate(self):
+        # type: () -> None
+        if not is_fsid(self.fsid):
+            raise Error('not an fsid: %s' % self.fsid)
+        if not self.daemon_id:
+            raise Error('invalid daemon_id: %s' % self.daemon_id)
+        if not self.image:
+            raise Error('invalid image: %s' % self.image)
+
+        # check for the required files
+        if self.required_files:
+            for fname in self.required_files:
+                if fname not in self.files:
+                    raise Error('required file missing from config-json: %s' % fname)
+
+    def get_daemon_name(self):
+        # type: () -> str
+        return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+    def get_container_name(self, desc=None):
+        # type: (Optional[str]) -> str
+        cname = '%s-%s' % (self.fsid, self.get_daemon_name())
+        if desc:
+            cname = '%s-%s' % (cname, desc)
+        return cname
+
+    def create_daemon_dirs(self, data_dir, uid, gid):
+        # type: (str, int, int) -> None
+        """Create files under the container data dir"""
+        if not os.path.isdir(data_dir):
+            raise OSError('data_dir is not a directory: %s' % (data_dir))
+
+        logger.info('Creating ceph-nvmeof config...')
+        configfs_dir = os.path.join(data_dir, 'configfs')
+        makedirs(configfs_dir, uid, gid, 0o755)
+
+        # populate files from the config-json
+        populate_files(data_dir, self.files, uid, gid)
+
+    @staticmethod
+    def configfs_mount_umount(data_dir, mount=True):
+        # type: (str, bool) -> List[str]
+        mount_path = os.path.join(data_dir, 'configfs')
+        if mount:
+            cmd = 'if ! grep -qs {0} /proc/mounts; then ' \
+                  'mount -t configfs none {0}; fi'.format(mount_path)
+        else:
+            cmd = 'if grep -qs {0} /proc/mounts; then ' \
+                  'umount {0}; fi'.format(mount_path)
+        return cmd.split()
+
+    @staticmethod
+    def get_sysctl_settings() -> List[str]:
+        return [
+            'vm.nr_hugepages = 4096',
+        ]
+
+
+##################################
+
+
+class CephExporter(object):
+    """Defines a Ceph exporter container"""
+
+    daemon_type = 'ceph-exporter'
+    entrypoint = '/usr/bin/ceph-exporter'
+    DEFAULT_PORT = 9926
+    port_map = {
+        'ceph-exporter': DEFAULT_PORT,
+    }
+
+    def __init__(self,
+                 ctx: CephadmContext,
+                 fsid: str, daemon_id: Union[int, str],
+                 config_json: Dict[str, Any],
+                 image: str = DEFAULT_IMAGE) -> None:
+        self.ctx = ctx
+        self.fsid = fsid
+        self.daemon_id = daemon_id
+        self.image = image
+
+        self.sock_dir = config_json.get('sock-dir', '/var/run/ceph/')
+        ipv4_addrs, _ = get_ip_addresses(get_hostname())
+        addrs = '0.0.0.0' if ipv4_addrs else '::'
+        self.addrs = config_json.get('addrs', addrs)
+        self.port = config_json.get('port', self.DEFAULT_PORT)
+        self.prio_limit = config_json.get('prio-limit', 5)
+        self.stats_period = config_json.get('stats-period', 5)
+
+        self.validate()
+
+    @classmethod
+    def init(cls, ctx: CephadmContext, fsid: str,
+             daemon_id: Union[int, str]) -> 'CephExporter':
+        return cls(ctx, fsid, daemon_id,
+                   fetch_configs(ctx), ctx.image)
+
+    @staticmethod
+    def get_container_mounts() -> Dict[str, str]:
+        mounts = dict()
+        mounts['/var/run/ceph'] = '/var/run/ceph:z'
+        return mounts
+
+    def get_daemon_args(self) -> List[str]:
+        args = [
+            f'--sock-dir={self.sock_dir}',
+            f'--addrs={self.addrs}',
+            f'--port={self.port}',
+            f'--prio-limit={self.prio_limit}',
+            f'--stats-period={self.stats_period}',
+        ]
+        return args
+
+    def validate(self) -> None:
+        if not os.path.isdir(self.sock_dir):
+            raise Error(f'Directory does not exist. Got: {self.sock_dir}')
+
+
+##################################
+
+
+class HAproxy(object):
+    """Defines an HAproxy container"""
+    daemon_type = 'haproxy'
+    required_files = ['haproxy.cfg']
+    default_image = DEFAULT_HAPROXY_IMAGE
+
+    def __init__(self,
+                 ctx: CephadmContext,
+                 fsid: str, daemon_id: Union[int, str],
+                 config_json: Dict, image: str) -> None:
+        self.ctx = ctx
+        self.fsid = fsid
+        self.daemon_id = daemon_id
+        self.image = image
+
+        # config-json options
+        self.files = dict_get(config_json, 'files', {})
+
+        self.validate()
+
+    @classmethod
+    def init(cls, ctx: CephadmContext,
+             fsid: str, daemon_id: Union[int, str]) -> 'HAproxy':
+        return cls(ctx, fsid, daemon_id, fetch_configs(ctx),
+                   ctx.image)
+
+    def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
+        """Create files under the container data dir"""
+        if not os.path.isdir(data_dir):
+            raise OSError('data_dir is not a directory: %s' % (data_dir))
+
+        # create additional directories in data dir for HAproxy to use
+        if not os.path.isdir(os.path.join(data_dir, 'haproxy')):
+            makedirs(os.path.join(data_dir, 'haproxy'), uid, gid, DATA_DIR_MODE)
+
+        data_dir = os.path.join(data_dir, 'haproxy')
+        populate_files(data_dir, self.files, uid, gid)
+
+    def get_daemon_args(self) -> List[str]:
+        return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg']
+
+    def validate(self):
+        # type: () -> None
+        if not is_fsid(self.fsid):
+            raise Error('not an fsid: %s' % self.fsid)
+        if not self.daemon_id:
+            raise Error('invalid daemon_id: %s' % self.daemon_id)
+        if not self.image:
+            raise Error('invalid image: %s' % self.image)
+
+        # check for the required files
+        if self.required_files:
+            for fname in self.required_files:
+                if fname not in self.files:
+                    raise Error('required file missing from config-json: %s' % fname)
+
+    def get_daemon_name(self):
+        # type: () -> str
+        return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+    def get_container_name(self, desc=None):
+        # type: (Optional[str]) -> str
+        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
+        if desc:
+            cname = '%s-%s' % (cname, desc)
+        return cname
+
+    def extract_uid_gid_haproxy(self) -> Tuple[int, int]:
+        # better directory for this?
+        return extract_uid_gid(self.ctx, file_path='/var/lib')
+
+    @staticmethod
+    def get_container_mounts(data_dir: str) -> Dict[str, str]:
+        mounts = dict()
+        mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy'
+        return mounts
+
+    @staticmethod
+    def get_sysctl_settings() -> List[str]:
+        return [
+            '# IP forwarding and non-local bind',
+            'net.ipv4.ip_forward = 1',
+            'net.ipv4.ip_nonlocal_bind = 1',
+        ]
+
+##################################
+
+
+class Keepalived(object):
+    """Defines an Keepalived container"""
+    daemon_type = 'keepalived'
+    required_files = ['keepalived.conf']
+    default_image = DEFAULT_KEEPALIVED_IMAGE
+
+    def __init__(self,
+                 ctx: CephadmContext,
+                 fsid: str, daemon_id: Union[int, str],
+                 config_json: Dict, image: str) -> None:
+        self.ctx = ctx
+        self.fsid = fsid
+        self.daemon_id = daemon_id
+        self.image = image
+
+        # config-json options
+        self.files = dict_get(config_json, 'files', {})
+
+        self.validate()
+
+    @classmethod
+    def init(cls, ctx: CephadmContext, fsid: str,
+             daemon_id: Union[int, str]) -> 'Keepalived':
+        return cls(ctx, fsid, daemon_id,
+                   fetch_configs(ctx), ctx.image)
+
+    def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
+        """Create files under the container data dir"""
+        if not os.path.isdir(data_dir):
+            raise OSError('data_dir is not a directory: %s' % (data_dir))
+
+        # create additional directories in data dir for keepalived to use
+        if not os.path.isdir(os.path.join(data_dir, 'keepalived')):
+            makedirs(os.path.join(data_dir, 'keepalived'), uid, gid, DATA_DIR_MODE)
+
+        # populate files from the config-json
+        populate_files(data_dir, self.files, uid, gid)
+
+    def validate(self):
+        # type: () -> None
+        if not is_fsid(self.fsid):
+            raise Error('not an fsid: %s' % self.fsid)
+        if not self.daemon_id:
+            raise Error('invalid daemon_id: %s' % self.daemon_id)
+        if not self.image:
+            raise Error('invalid image: %s' % self.image)
+
+        # check for the required files
+        if self.required_files:
+            for fname in self.required_files:
+                if fname not in self.files:
+                    raise Error('required file missing from config-json: %s' % fname)
+
+    def get_daemon_name(self):
+        # type: () -> str
+        return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+    def get_container_name(self, desc=None):
+        # type: (Optional[str]) -> str
+        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
+        if desc:
+            cname = '%s-%s' % (cname, desc)
+        return cname
+
+    @staticmethod
+    def get_container_envs():
+        # type: () -> List[str]
+        envs = [
+            'KEEPALIVED_AUTOCONF=false',
+            'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf',
+            'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf',
+            'KEEPALIVED_DEBUG=false'
+        ]
+        return envs
+
+    @staticmethod
+    def get_sysctl_settings() -> List[str]:
+        return [
+            '# IP forwarding and non-local bind',
+            'net.ipv4.ip_forward = 1',
+            'net.ipv4.ip_nonlocal_bind = 1',
+        ]
+
+    def extract_uid_gid_keepalived(self) -> Tuple[int, int]:
+        # better directory for this?
+        return extract_uid_gid(self.ctx, file_path='/var/lib')
+
+    @staticmethod
+    def get_container_mounts(data_dir: str) -> Dict[str, str]:
+        mounts = dict()
+        mounts[os.path.join(data_dir, 'keepalived.conf')] = '/etc/keepalived/keepalived.conf'
+        return mounts
+
+##################################
+
+
+class Tracing(object):
+    """Define the configs for the jaeger tracing containers"""
+
+    components: Dict[str, Dict[str, Any]] = {
+        'elasticsearch': {
+            'image': DEFAULT_ELASTICSEARCH_IMAGE,
+            'envs': ['discovery.type=single-node']
+        },
+        'jaeger-agent': {
+            'image': DEFAULT_JAEGER_AGENT_IMAGE,
+        },
+        'jaeger-collector': {
+            'image': DEFAULT_JAEGER_COLLECTOR_IMAGE,
+        },
+        'jaeger-query': {
+            'image': DEFAULT_JAEGER_QUERY_IMAGE,
+        },
+    }  # type: ignore
+
+    @staticmethod
+    def set_configuration(config: Dict[str, str], daemon_type: str) -> None:
+        if daemon_type in ['jaeger-collector', 'jaeger-query']:
+            assert 'elasticsearch_nodes' in config
+            Tracing.components[daemon_type]['envs'] = [
+                'SPAN_STORAGE_TYPE=elasticsearch',
+                f'ES_SERVER_URLS={config["elasticsearch_nodes"]}']
+        if daemon_type == 'jaeger-agent':
+            assert 'collector_nodes' in config
+            Tracing.components[daemon_type]['daemon_args'] = [
+                f'--reporter.grpc.host-port={config["collector_nodes"]}',
+                '--processor.jaeger-compact.server-host-port=6799'
+            ]
+
+##################################
+
+
+class CustomContainer(object):
+    """Defines a custom container"""
+    daemon_type = 'container'
+
+    def __init__(self,
+                 fsid: str, daemon_id: Union[int, str],
+                 config_json: Dict, image: str) -> None:
+        self.fsid = fsid
+        self.daemon_id = daemon_id
+        self.image = image
+
+        # config-json options
+        self.entrypoint = dict_get(config_json, 'entrypoint')
+        self.uid = dict_get(config_json, 'uid', 65534)  # nobody
+        self.gid = dict_get(config_json, 'gid', 65534)  # nobody
+        self.volume_mounts = dict_get(config_json, 'volume_mounts', {})
+        self.args = dict_get(config_json, 'args', [])
+        self.envs = dict_get(config_json, 'envs', [])
+        self.privileged = dict_get(config_json, 'privileged', False)
+        self.bind_mounts = dict_get(config_json, 'bind_mounts', [])
+        self.ports = dict_get(config_json, 'ports', [])
+        self.dirs = dict_get(config_json, 'dirs', [])
+        self.files = dict_get(config_json, 'files', {})
+
+    @classmethod
+    def init(cls, ctx: CephadmContext,
+             fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer':
+        return cls(fsid, daemon_id,
+                   fetch_configs(ctx), ctx.image)
+
+    def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
+        """
+        Create dirs/files below the container data directory.
+        """
+        logger.info('Creating custom container configuration '
+                    'dirs/files in {} ...'.format(data_dir))
+
+        if not os.path.isdir(data_dir):
+            raise OSError('data_dir is not a directory: %s' % data_dir)
+
+        for dir_path in self.dirs:
+            logger.info('Creating directory: {}'.format(dir_path))
+            dir_path = os.path.join(data_dir, dir_path.strip('/'))
+            makedirs(dir_path, uid, gid, 0o755)
+
+        for file_path in self.files:
+            logger.info('Creating file: {}'.format(file_path))
+            content = dict_get_join(self.files, file_path)
+            file_path = os.path.join(data_dir, file_path.strip('/'))
+            with write_new(file_path, owner=(uid, gid), encoding='utf-8') as f:
+                f.write(content)
+
+    def get_daemon_args(self) -> List[str]:
+        return []
+
+    def get_container_args(self) -> List[str]:
+        return self.args
+
+    def get_container_envs(self) -> List[str]:
+        return self.envs
+
+    def get_container_mounts(self, data_dir: str) -> Dict[str, str]:
+        """
+        Get the volume mounts. Relative source paths will be located below
+        `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
+
+        Example:
+        {
+            /foo/conf: /conf
+            foo/conf: /conf
+        }
+        becomes
+        {
+            /foo/conf: /conf
+            /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
+        }
+        """
+        mounts = {}
+        for source, destination in self.volume_mounts.items():
+            source = os.path.join(data_dir, source)
+            mounts[source] = destination
+        return mounts
+
+    def get_container_binds(self, data_dir: str) -> List[List[str]]:
+        """
+        Get the bind mounts. Relative `source=...` paths will be located below
+        `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
+
+        Example:
+        [
+            'type=bind',
+            'source=lib/modules',
+            'destination=/lib/modules',
+            'ro=true'
+        ]
+        becomes
+        [
+            ...
+            'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
+            ...
+        ]
+        """
+        binds = self.bind_mounts.copy()
+        for bind in binds:
+            for index, value in enumerate(bind):
+                match = re.match(r'^source=(.+)$', value)
+                if match:
+                    bind[index] = 'source={}'.format(os.path.join(
+                        data_dir, match.group(1)))
+        return binds
+
+##################################
+
+
+def touch(file_path: str, uid: Optional[int] = None, gid: Optional[int] = None) -> None:
+    Path(file_path).touch()
+    if uid and gid:
+        os.chown(file_path, uid, gid)
+
+
+##################################
+
+
+def dict_get(d: Dict, key: str, default: Any = None, require: bool = False) -> Any:
+    """
+    Helper function to get a key from a dictionary.
+    :param d: The dictionary to process.
+    :param key: The name of the key to get.
+    :param default: The default value in case the key does not
+        exist. Default is `None`.
+    :param require: Set to `True` if the key is required. An
+        exception will be raised if the key does not exist in
+        the given dictionary.
+    :return: Returns the value of the given key.
+    :raises: :exc:`self.Error` if the given key does not exist
+        and `require` is set to `True`.
+    """
+    if require and key not in d.keys():
+        raise Error('{} missing from dict'.format(key))
+    return d.get(key, default)  # type: ignore
+
+##################################
+
+
+def dict_get_join(d: Dict[str, Any], key: str) -> Any:
+    """
+    Helper function to get the value of a given key from a dictionary.
+    `List` values will be converted to a string by joining them with a
+    line break.
+    :param d: The dictionary to process.
+    :param key: The name of the key to get.
+    :return: Returns the value of the given key. If it was a `list`, it
+        will be joining with a line break.
+    """
+    value = d.get(key)
+    if isinstance(value, list):
+        value = '\n'.join(map(str, value))
+    return value
+
+##################################
+
+
+def get_supported_daemons():
+    # type: () -> List[str]
+    supported_daemons = list(Ceph.daemons)
+    supported_daemons.extend(Monitoring.components)
+    supported_daemons.append(NFSGanesha.daemon_type)
+    supported_daemons.append(CephIscsi.daemon_type)
+    supported_daemons.append(CephNvmeof.daemon_type)
+    supported_daemons.append(CustomContainer.daemon_type)
+    supported_daemons.append(HAproxy.daemon_type)
+    supported_daemons.append(Keepalived.daemon_type)
+    supported_daemons.append(CephadmAgent.daemon_type)
+    supported_daemons.append(SNMPGateway.daemon_type)
+    supported_daemons.extend(Tracing.components)
+    assert len(supported_daemons) == len(set(supported_daemons))
+    return supported_daemons
+
+##################################
+
+
+class PortOccupiedError(Error):
+    pass
+
+
+def attempt_bind(ctx, s, address, port):
+    # type: (CephadmContext, socket.socket, str, int) -> None
+    try:
+        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        s.bind((address, port))
+    except OSError as e:
+        if e.errno == errno.EADDRINUSE:
+            msg = 'Cannot bind to IP %s port %d: %s' % (address, port, e)
+            logger.warning(msg)
+            raise PortOccupiedError(msg)
+        else:
+            raise e
+    except Exception as e:
+        raise Error(e)
+    finally:
+        s.close()
+
+
+def port_in_use(ctx: CephadmContext, endpoint: EndPoint) -> bool:
+    """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
+    logger.info('Verifying port %s ...' % str(endpoint))
+
+    def _port_in_use(af: socket.AddressFamily, address: str) -> bool:
+        try:
+            s = socket.socket(af, socket.SOCK_STREAM)
+            attempt_bind(ctx, s, address, endpoint.port)
+        except PortOccupiedError:
+            return True
+        except OSError as e:
+            if e.errno in (errno.EAFNOSUPPORT, errno.EADDRNOTAVAIL):
+                # Ignore EAFNOSUPPORT and EADDRNOTAVAIL as two interfaces are
+                # being tested here and one might be intentionally be disabled.
+                # In that case no error should be raised.
+                return False
+            else:
+                raise e
+        return False
+
+    if endpoint.ip != '0.0.0.0' and endpoint.ip != '::':
+        if is_ipv6(endpoint.ip):
+            return _port_in_use(socket.AF_INET6, endpoint.ip)
+        else:
+            return _port_in_use(socket.AF_INET, endpoint.ip)
+
+    return any(_port_in_use(af, address) for af, address in (
+        (socket.AF_INET, '0.0.0.0'),
+        (socket.AF_INET6, '::')
+    ))
+
+
+def check_ip_port(ctx, ep):
+    # type: (CephadmContext, EndPoint) -> None
+    if not ctx.skip_ping_check:
+        logger.info(f'Verifying IP {ep.ip} port {ep.port} ...')
+        if is_ipv6(ep.ip):
+            s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+            ip = unwrap_ipv6(ep.ip)
+        else:
+            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            ip = ep.ip
+        attempt_bind(ctx, s, ip, ep.port)
+
+##################################
+
+
+# this is an abbreviated version of
+# https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
+# that drops all of the compatibility (this is Unix/Linux only).
+
+class Timeout(TimeoutError):
+    """
+    Raised when the lock could not be acquired in *timeout*
+    seconds.
+    """
+
+    def __init__(self, lock_file: str) -> None:
+        """
+        """
+        #: The path of the file lock.
+        self.lock_file = lock_file
+        return None
+
+    def __str__(self) -> str:
+        temp = "The file lock '{}' could not be acquired."\
+               .format(self.lock_file)
+        return temp
+
+
+class _Acquire_ReturnProxy(object):
+    def __init__(self, lock: 'FileLock') -> None:
+        self.lock = lock
+        return None
+
+    def __enter__(self) -> 'FileLock':
+        return self.lock
+
+    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+        self.lock.release()
+        return None
+
+
+class FileLock(object):
+    def __init__(self, ctx: CephadmContext, name: str, timeout: int = -1) -> None:
+        if not os.path.exists(LOCK_DIR):
+            os.mkdir(LOCK_DIR, 0o700)
+        self._lock_file = os.path.join(LOCK_DIR, name + '.lock')
+        self.ctx = ctx
+
+        # The file descriptor for the *_lock_file* as it is returned by the
+        # os.open() function.
+        # This file lock is only NOT None, if the object currently holds the
+        # lock.
+        self._lock_file_fd: Optional[int] = None
+        self.timeout = timeout
+        # The lock counter is used for implementing the nested locking
+        # mechanism. Whenever the lock is acquired, the counter is increased and
+        # the lock is only released, when this value is 0 again.
+        self._lock_counter = 0
+        return None
+
+    @property
+    def is_locked(self) -> bool:
+        return self._lock_file_fd is not None
+
+    def acquire(self, timeout: Optional[int] = None, poll_intervall: float = 0.05) -> _Acquire_ReturnProxy:
+        """
+        Acquires the file lock or fails with a :exc:`Timeout` error.
+        .. code-block:: python
+            # You can use this method in the context manager (recommended)
+            with lock.acquire():
+                pass
+            # Or use an equivalent try-finally construct:
+            lock.acquire()
+            try:
+                pass
+            finally:
+                lock.release()
+        :arg float timeout:
+            The maximum time waited for the file lock.
+            If ``timeout < 0``, there is no timeout and this method will
+            block until the lock could be acquired.
+            If ``timeout`` is None, the default :attr:`~timeout` is used.
+        :arg float poll_intervall:
+            We check once in *poll_intervall* seconds if we can acquire the
+            file lock.
+        :raises Timeout:
+            if the lock could not be acquired in *timeout* seconds.
+        .. versionchanged:: 2.0.0
+            This method returns now a *proxy* object instead of *self*,
+            so that it can be used in a with statement without side effects.
+        """
+
+        # Use the default timeout, if no timeout is provided.
+        if timeout is None:
+            timeout = self.timeout
+
+        # Increment the number right at the beginning.
+        # We can still undo it, if something fails.
+        self._lock_counter += 1
+
+        lock_id = id(self)
+        lock_filename = self._lock_file
+        start_time = time.time()
+        try:
+            while True:
+                if not self.is_locked:
+                    logger.log(QUIET_LOG_LEVEL, 'Acquiring lock %s on %s', lock_id,
+                               lock_filename)
+                    self._acquire()
+
+                if self.is_locked:
+                    logger.log(QUIET_LOG_LEVEL, 'Lock %s acquired on %s', lock_id,
+                               lock_filename)
+                    break
+                elif timeout >= 0 and time.time() - start_time > timeout:
+                    logger.warning('Timeout acquiring lock %s on %s', lock_id,
+                                   lock_filename)
+                    raise Timeout(self._lock_file)
+                else:
+                    logger.log(
+                        QUIET_LOG_LEVEL,
+                        'Lock %s not acquired on %s, waiting %s seconds ...',
+                        lock_id, lock_filename, poll_intervall
+                    )
+                    time.sleep(poll_intervall)
+        except Exception:
+            # Something did go wrong, so decrement the counter.
+            self._lock_counter = max(0, self._lock_counter - 1)
+
+            raise
+        return _Acquire_ReturnProxy(lock=self)
+
+    def release(self, force: bool = False) -> None:
+        """
+        Releases the file lock.
+        Please note, that the lock is only completely released, if the lock
+        counter is 0.
+        Also note, that the lock file itself is not automatically deleted.
+        :arg bool force:
+            If true, the lock counter is ignored and the lock is released in
+            every case.
+        """
+        if self.is_locked:
+            self._lock_counter -= 1
+
+            if self._lock_counter == 0 or force:
+                # lock_id = id(self)
+                # lock_filename = self._lock_file
+
+                # Can't log in shutdown:
+                #  File "/usr/lib64/python3.9/logging/__init__.py", line 1175, in _open
+                #    NameError: name 'open' is not defined
+                # logger.debug('Releasing lock %s on %s', lock_id, lock_filename)
+                self._release()
+                self._lock_counter = 0
+                # logger.debug('Lock %s released on %s', lock_id, lock_filename)
+
+        return None
+
+    def __enter__(self) -> 'FileLock':
+        self.acquire()
+        return self
+
+    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+        self.release()
+        return None
+
+    def __del__(self) -> None:
+        self.release(force=True)
+        return None
+
+    def _acquire(self) -> None:
+        open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC
+        fd = os.open(self._lock_file, open_mode)
+
+        try:
+            fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+        except (IOError, OSError):
+            os.close(fd)
+        else:
+            self._lock_file_fd = fd
+        return None
+
+    def _release(self) -> None:
+        # Do not remove the lockfile:
+        #
+        #   https://github.com/benediktschmitt/py-filelock/issues/31
+        #   https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
+        fd = self._lock_file_fd
+        self._lock_file_fd = None
+        fcntl.flock(fd, fcntl.LOCK_UN)  # type: ignore
+        os.close(fd)  # type: ignore
+        return None
+
+
+##################################
+# Popen wrappers, lifted from ceph-volume
+
+class CallVerbosity(Enum):
+    #####
+    # Format:
+    # Normal Operation: <log-level-when-no-errors>, Errors: <log-level-when-error>
+    #
+    # NOTE: QUIET log level is custom level only used when --verbose is passed
+    #####
+
+    # Normal Operation: None, Errors: None
+    SILENT = 0
+    # Normal Operation: QUIET, Error: QUIET
+    QUIET = 1
+    # Normal Operation: DEBUG, Error: DEBUG
+    DEBUG = 2
+    # Normal Operation: QUIET, Error: INFO
+    QUIET_UNLESS_ERROR = 3
+    # Normal Operation: DEBUG, Error: INFO
+    VERBOSE_ON_FAILURE = 4
+    # Normal Operation: INFO, Error: INFO
+    VERBOSE = 5
+
+    def success_log_level(self) -> int:
+        _verbosity_level_to_log_level = {
+            self.SILENT: 0,
+            self.QUIET: QUIET_LOG_LEVEL,
+            self.DEBUG: logging.DEBUG,
+            self.QUIET_UNLESS_ERROR: QUIET_LOG_LEVEL,
+            self.VERBOSE_ON_FAILURE: logging.DEBUG,
+            self.VERBOSE: logging.INFO
+        }
+        return _verbosity_level_to_log_level[self]  # type: ignore
+
+    def error_log_level(self) -> int:
+        _verbosity_level_to_log_level = {
+            self.SILENT: 0,
+            self.QUIET: QUIET_LOG_LEVEL,
+            self.DEBUG: logging.DEBUG,
+            self.QUIET_UNLESS_ERROR: logging.INFO,
+            self.VERBOSE_ON_FAILURE: logging.INFO,
+            self.VERBOSE: logging.INFO
+        }
+        return _verbosity_level_to_log_level[self]  # type: ignore
+
+
+# disable coverage for the next block. this is copy-n-paste
+# from other code for compatibilty on older python versions
+if sys.version_info < (3, 8):  # pragma: no cover
+    import itertools
+    import threading
+    import warnings
+    from asyncio import events
+
+    class ThreadedChildWatcher(asyncio.AbstractChildWatcher):
+        """Threaded child watcher implementation.
+        The watcher uses a thread per process
+        for waiting for the process finish.
+        It doesn't require subscription on POSIX signal
+        but a thread creation is not free.
+        The watcher has O(1) complexity, its performance doesn't depend
+        on amount of spawn processes.
+        """
+
+        def __init__(self) -> None:
+            self._pid_counter = itertools.count(0)
+            self._threads: Dict[Any, Any] = {}
+
+        def is_active(self) -> bool:
+            return True
+
+        def close(self) -> None:
+            self._join_threads()
+
+        def _join_threads(self) -> None:
+            """Internal: Join all non-daemon threads"""
+            threads = [thread for thread in list(self._threads.values())
+                       if thread.is_alive() and not thread.daemon]
+            for thread in threads:
+                thread.join()
+
+        def __enter__(self) -> Any:
+            return self
+
+        def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+            pass
+
+        def __del__(self, _warn: Any = warnings.warn) -> None:
+            threads = [thread for thread in list(self._threads.values())
+                       if thread.is_alive()]
+            if threads:
+                _warn(f'{self.__class__} has registered but not finished child processes',
+                      ResourceWarning,
+                      source=self)
+
+        def add_child_handler(self, pid: Any, callback: Any, *args: Any) -> None:
+            loop = events.get_event_loop()
+            thread = threading.Thread(target=self._do_waitpid,
+                                      name=f'waitpid-{next(self._pid_counter)}',
+                                      args=(loop, pid, callback, args),
+                                      daemon=True)
+            self._threads[pid] = thread
+            thread.start()
+
+        def remove_child_handler(self, pid: Any) -> bool:
+            # asyncio never calls remove_child_handler() !!!
+            # The method is no-op but is implemented because
+            # abstract base classe requires it
+            return True
+
+        def attach_loop(self, loop: Any) -> None:
+            pass
+
+        def _do_waitpid(self, loop: Any, expected_pid: Any, callback: Any, args: Any) -> None:
+            assert expected_pid > 0
+
+            try:
+                pid, status = os.waitpid(expected_pid, 0)
+            except ChildProcessError:
+                # The child process is already reaped
+                # (may happen if waitpid() is called elsewhere).
+                pid = expected_pid
+                returncode = 255
+                logger.warning(
+                    'Unknown child process pid %d, will report returncode 255',
+                    pid)
+            else:
+                if os.WIFEXITED(status):
+                    returncode = os.WEXITSTATUS(status)
+                elif os.WIFSIGNALED(status):
+                    returncode = -os.WTERMSIG(status)
+                else:
+                    raise ValueError(f'unknown wait status {status}')
+                if loop.get_debug():
+                    logger.debug('process %s exited with returncode %s',
+                                 expected_pid, returncode)
+
+            if loop.is_closed():
+                logger.warning('Loop %r that handles pid %r is closed', loop, pid)
+            else:
+                loop.call_soon_threadsafe(callback, pid, returncode, *args)
+
+            self._threads.pop(expected_pid)
+
+    # unlike SafeChildWatcher which handles SIGCHLD in the main thread,
+    # ThreadedChildWatcher runs in a separated thread, hence allows us to
+    # run create_subprocess_exec() in non-main thread, see
+    # https://bugs.python.org/issue35621
+    asyncio.set_child_watcher(ThreadedChildWatcher())
+
+
+try:
+    from asyncio import run as async_run   # type: ignore[attr-defined]
+except ImportError:  # pragma: no cover
+    # disable coverage for this block. it should be a copy-n-paste from
+    # from newer libs for compatibilty on older python versions
+    def async_run(coro):  # type: ignore
+        loop = asyncio.new_event_loop()
+        try:
+            asyncio.set_event_loop(loop)
+            return loop.run_until_complete(coro)
+        finally:
+            try:
+                loop.run_until_complete(loop.shutdown_asyncgens())
+            finally:
+                asyncio.set_event_loop(None)
+                loop.close()
+
+
+def call(ctx: CephadmContext,
+         command: List[str],
+         desc: Optional[str] = None,
+         verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
+         timeout: Optional[int] = DEFAULT_TIMEOUT,
+         **kwargs: Any) -> Tuple[str, str, int]:
+    """
+    Wrap subprocess.Popen to
+
+    - log stdout/stderr to a logger,
+    - decode utf-8
+    - cleanly return out, err, returncode
+
+    :param timeout: timeout in seconds
+    """
+
+    prefix = command[0] if desc is None else desc
+    if prefix:
+        prefix += ': '
+    timeout = timeout or ctx.timeout
+
+    async def run_with_timeout() -> Tuple[str, str, int]:
+        process = await asyncio.create_subprocess_exec(
+            *command,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+            env=os.environ.copy())
+        assert process.stdout
+        assert process.stderr
+        try:
+            stdout, stderr = await asyncio.wait_for(
+                process.communicate(),
+                timeout,
+            )
+        except asyncio.TimeoutError:
+            # try to terminate the process assuming it is still running.  It's
+            # possible that even after killing the process it will not
+            # complete, particularly if it is D-state.  If that happens the
+            # process.wait call will block, but we're no worse off than before
+            # when the timeout did not work.  Additionally, there are other
+            # corner-cases we could try and handle here but we decided to start
+            # simple.
+            process.kill()
+            await process.wait()
+            logger.info(prefix + f'timeout after {timeout} seconds')
+            return '', '', 124
+        else:
+            assert process.returncode is not None
+            return (
+                stdout.decode('utf-8'),
+                stderr.decode('utf-8'),
+                process.returncode,
+            )
+
+    stdout, stderr, returncode = async_run(run_with_timeout())
+    log_level = verbosity.success_log_level()
+    if returncode != 0:
+        log_level = verbosity.error_log_level()
+        logger.log(log_level, f'Non-zero exit code {returncode} from {" ".join(command)}')
+    for line in stdout.splitlines():
+        logger.log(log_level, prefix + 'stdout ' + line)
+    for line in stderr.splitlines():
+        logger.log(log_level, prefix + 'stderr ' + line)
+    return stdout, stderr, returncode
+
+
+def call_throws(
+        ctx: CephadmContext,
+        command: List[str],
+        desc: Optional[str] = None,
+        verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
+        timeout: Optional[int] = DEFAULT_TIMEOUT,
+        **kwargs: Any) -> Tuple[str, str, int]:
+    out, err, ret = call(ctx, command, desc, verbosity, timeout, **kwargs)
+    if ret:
+        for s in (out, err):
+            if s.strip() and len(s.splitlines()) <= 2:  # readable message?
+                raise RuntimeError(f'Failed command: {" ".join(command)}: {s}')
+        raise RuntimeError('Failed command: %s' % ' '.join(command))
+    return out, err, ret
+
+
+def call_timeout(ctx, command, timeout):
+    # type: (CephadmContext, List[str], int) -> int
+    logger.debug('Running command (timeout=%s): %s'
+                 % (timeout, ' '.join(command)))
+
+    def raise_timeout(command, timeout):
+        # type: (List[str], int) -> NoReturn
+        msg = 'Command `%s` timed out after %s seconds' % (command, timeout)
+        logger.debug(msg)
+        raise TimeoutExpired(msg)
+
+    try:
+        return subprocess.call(command, timeout=timeout, env=os.environ.copy())
+    except subprocess.TimeoutExpired:
+        raise_timeout(command, timeout)
+
+##################################
+
+
+def json_loads_retry(cli_func: Callable[[], str]) -> Any:
+    for sleep_secs in [1, 4, 4]:
+        try:
+            return json.loads(cli_func())
+        except json.JSONDecodeError:
+            logger.debug('Invalid JSON. Retrying in %s seconds...' % sleep_secs)
+            time.sleep(sleep_secs)
+    return json.loads(cli_func())
+
+
+def is_available(ctx, what, func):
+    # type: (CephadmContext, str, Callable[[], bool]) -> None
+    """
+    Wait for a service to become available
+
+    :param what: the name of the service
+    :param func: the callable object that determines availability
+    """
+    retry = ctx.retry
+    logger.info('Waiting for %s...' % what)
+    num = 1
+    while True:
+        if func():
+            logger.info('%s is available'
+                        % what)
+            break
+        elif num > retry:
+            raise Error('%s not available after %s tries'
+                        % (what, retry))
+
+        logger.info('%s not available, waiting (%s/%s)...'
+                    % (what, num, retry))
+
+        num += 1
+        time.sleep(2)
+
+
+def read_config(fn):
+    # type: (Optional[str]) -> ConfigParser
+    cp = ConfigParser()
+    if fn:
+        cp.read(fn)
+    return cp
+
+
+def pathify(p):
+    # type: (str) -> str
+    p = os.path.expanduser(p)
+    return os.path.abspath(p)
+
+
+def get_file_timestamp(fn):
+    # type: (str) -> Optional[str]
+    try:
+        mt = os.path.getmtime(fn)
+        return datetime.datetime.fromtimestamp(
+            mt, tz=datetime.timezone.utc
+        ).strftime(DATEFMT)
+    except Exception:
+        return None
+
+
+def try_convert_datetime(s):
+    # type: (str) -> Optional[str]
+    # This is super irritating because
+    #  1) podman and docker use different formats
+    #  2) python's strptime can't parse either one
+    #
+    # I've seen:
+    #  docker 18.09.7:  2020-03-03T09:21:43.636153304Z
+    #  podman 1.7.0:    2020-03-03T15:52:30.136257504-06:00
+    #                   2020-03-03 15:52:30.136257504 -0600 CST
+    # (In the podman case, there is a different string format for
+    # 'inspect' and 'inspect --format {{.Created}}'!!)
+
+    # In *all* cases, the 9 digit second precision is too much for
+    # python's strptime.  Shorten it to 6 digits.
+    p = re.compile(r'(\.[\d]{6})[\d]*')
+    s = p.sub(r'\1', s)
+
+    # replace trailing Z with -0000, since (on python 3.6.8) it won't parse
+    if s and s[-1] == 'Z':
+        s = s[:-1] + '-0000'
+
+    # cut off the redundant 'CST' part that strptime can't parse, if
+    # present.
+    v = s.split(' ')
+    s = ' '.join(v[0:3])
+
+    # try parsing with several format strings
+    fmts = [
+        '%Y-%m-%dT%H:%M:%S.%f%z',
+        '%Y-%m-%d %H:%M:%S.%f %z',
+    ]
+    for f in fmts:
+        try:
+            # return timestamp normalized to UTC, rendered as DATEFMT.
+            return datetime.datetime.strptime(s, f).astimezone(tz=datetime.timezone.utc).strftime(DATEFMT)
+        except ValueError:
+            pass
+    return None
+
+
+def _parse_podman_version(version_str):
+    # type: (str) -> Tuple[int, ...]
+    def to_int(val: str, org_e: Optional[Exception] = None) -> int:
+        if not val and org_e:
+            raise org_e
+        try:
+            return int(val)
+        except ValueError as e:
+            return to_int(val[0:-1], org_e or e)
+
+    return tuple(map(to_int, version_str.split('.')))
+
+
+def get_hostname():
+    # type: () -> str
+    return socket.gethostname()
+
+
+def get_short_hostname():
+    # type: () -> str
+    return get_hostname().split('.', 1)[0]
+
+
+def get_fqdn():
+    # type: () -> str
+    return socket.getfqdn() or socket.gethostname()
+
+
+def get_ip_addresses(hostname: str) -> Tuple[List[str], List[str]]:
+    items = socket.getaddrinfo(hostname, None,
+                               flags=socket.AI_CANONNAME,
+                               type=socket.SOCK_STREAM)
+    ipv4_addresses = [i[4][0] for i in items if i[0] == socket.AF_INET]
+    ipv6_addresses = [i[4][0] for i in items if i[0] == socket.AF_INET6]
+    return ipv4_addresses, ipv6_addresses
+
+
+def get_arch():
+    # type: () -> str
+    return platform.uname().machine
+
+
+def generate_service_id():
+    # type: () -> str
+    return get_short_hostname() + '.' + ''.join(random.choice(string.ascii_lowercase)
+                                                for _ in range(6))
+
+
+def generate_password():
+    # type: () -> str
+    return ''.join(random.choice(string.ascii_lowercase + string.digits)
+                   for i in range(10))
+
+
+def normalize_container_id(i):
+    # type: (str) -> str
+    # docker adds the sha256: prefix, but AFAICS both
+    # docker (18.09.7 in bionic at least) and podman
+    # both always use sha256, so leave off the prefix
+    # for consistency.
+    prefix = 'sha256:'
+    if i.startswith(prefix):
+        i = i[len(prefix):]
+    return i
+
+
+def make_fsid():
+    # type: () -> str
+    return str(uuid.uuid1())
+
+
+def is_fsid(s):
+    # type: (str) -> bool
+    try:
+        uuid.UUID(s)
+    except ValueError:
+        return False
+    return True
+
+
+def validate_fsid(func: FuncT) -> FuncT:
+    @wraps(func)
+    def _validate_fsid(ctx: CephadmContext) -> Any:
+        if 'fsid' in ctx and ctx.fsid:
+            if not is_fsid(ctx.fsid):
+                raise Error('not an fsid: %s' % ctx.fsid)
+        return func(ctx)
+    return cast(FuncT, _validate_fsid)
+
+
+def infer_fsid(func: FuncT) -> FuncT:
+    """
+    If we only find a single fsid in /var/lib/ceph/*, use that
+    """
+    @infer_config
+    @wraps(func)
+    def _infer_fsid(ctx: CephadmContext) -> Any:
+        if 'fsid' in ctx and ctx.fsid:
+            logger.debug('Using specified fsid: %s' % ctx.fsid)
+            return func(ctx)
+
+        fsids = set()
+
+        cp = read_config(ctx.config)
+        if cp.has_option('global', 'fsid'):
+            fsids.add(cp.get('global', 'fsid'))
+
+        daemon_list = list_daemons(ctx, detail=False)
+        for daemon in daemon_list:
+            if not is_fsid(daemon['fsid']):
+                # 'unknown' fsid
+                continue
+            elif 'name' not in ctx or not ctx.name:
+                # ctx.name not specified
+                fsids.add(daemon['fsid'])
+            elif daemon['name'] == ctx.name:
+                # ctx.name is a match
+                fsids.add(daemon['fsid'])
+        fsids = sorted(fsids)
+
+        if not fsids:
+            # some commands do not always require an fsid
+            pass
+        elif len(fsids) == 1:
+            logger.info('Inferring fsid %s' % fsids[0])
+            ctx.fsid = fsids[0]
+        else:
+            raise Error('Cannot infer an fsid, one must be specified (using --fsid): %s' % fsids)
+        return func(ctx)
+
+    return cast(FuncT, _infer_fsid)
+
+
+def infer_config(func: FuncT) -> FuncT:
+    """
+    Infer the cluster configuration using the following priority order:
+     1- if the user has provided custom conf file (-c option) use it
+     2- otherwise if daemon --name has been provided use daemon conf
+     3- otherwise find the mon daemon conf file and use it (if v1)
+     4- otherwise if {ctx.data_dir}/{fsid}/{CEPH_CONF_DIR} dir exists use it
+     5- finally: fallback to the default file /etc/ceph/ceph.conf
+    """
+    @wraps(func)
+    def _infer_config(ctx: CephadmContext) -> Any:
+
+        def config_path(daemon_type: str, daemon_name: str) -> str:
+            data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_name)
+            return os.path.join(data_dir, 'config')
+
+        def get_mon_daemon_name(fsid: str) -> Optional[str]:
+            daemon_list = list_daemons(ctx, detail=False)
+            for daemon in daemon_list:
+                if (
+                    daemon.get('name', '').startswith('mon.')
+                    and daemon.get('fsid', '') == fsid
+                    and daemon.get('style', '') == 'cephadm:v1'
+                    and os.path.exists(config_path('mon', daemon['name'].split('.', 1)[1]))
+                ):
+                    return daemon['name']
+            return None
+
+        ctx.config = ctx.config if 'config' in ctx else None
+        #  check if user has provided conf by using -c option
+        if ctx.config and (ctx.config != CEPH_DEFAULT_CONF):
+            logger.debug(f'Using specified config: {ctx.config}')
+            return func(ctx)
+
+        if 'fsid' in ctx and ctx.fsid:
+            name = ctx.name if ('name' in ctx and ctx.name) else get_mon_daemon_name(ctx.fsid)
+            if name is not None:
+                # daemon name has been specified (or inferred from mon), let's use its conf
+                ctx.config = config_path(name.split('.', 1)[0], name.split('.', 1)[1])
+            else:
+                # no daemon, in case the cluster has a config dir then use it
+                ceph_conf = f'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_CONF}'
+                if os.path.exists(ceph_conf):
+                    ctx.config = ceph_conf
+
+        if ctx.config:
+            logger.info(f'Inferring config {ctx.config}')
+        elif os.path.exists(CEPH_DEFAULT_CONF):
+            logger.debug(f'Using default config {CEPH_DEFAULT_CONF}')
+            ctx.config = CEPH_DEFAULT_CONF
+        return func(ctx)
+
+    return cast(FuncT, _infer_config)
+
+
+def _get_default_image(ctx: CephadmContext) -> str:
+    if DEFAULT_IMAGE_IS_MAIN:
+        warn = """This is a development version of cephadm.
+For information regarding the latest stable release:
+    https://docs.ceph.com/docs/{}/cephadm/install
+""".format(LATEST_STABLE_RELEASE)
+        for line in warn.splitlines():
+            logger.warning('{}{}{}'.format(termcolor.yellow, line, termcolor.end))
+    return DEFAULT_IMAGE
+
+
+def infer_image(func: FuncT) -> FuncT:
+    """
+    Use the most recent ceph image
+    """
+    @wraps(func)
+    def _infer_image(ctx: CephadmContext) -> Any:
+        if not ctx.image:
+            ctx.image = os.environ.get('CEPHADM_IMAGE')
+        if not ctx.image:
+            ctx.image = infer_local_ceph_image(ctx, ctx.container_engine.path)
+        if not ctx.image:
+            ctx.image = _get_default_image(ctx)
+        return func(ctx)
+
+    return cast(FuncT, _infer_image)
+
+
+def require_image(func: FuncT) -> FuncT:
+    """
+    Require the global --image flag to be set
+    """
+    @wraps(func)
+    def _require_image(ctx: CephadmContext) -> Any:
+        if not ctx.image:
+            raise Error('This command requires the global --image option to be set')
+        return func(ctx)
+
+    return cast(FuncT, _require_image)
+
+
+def default_image(func: FuncT) -> FuncT:
+    @wraps(func)
+    def _default_image(ctx: CephadmContext) -> Any:
+        update_default_image(ctx)
+        return func(ctx)
+
+    return cast(FuncT, _default_image)
+
+
+def update_default_image(ctx: CephadmContext) -> None:
+    if getattr(ctx, 'image', None):
+        return
+    ctx.image = None  # ensure ctx.image exists to avoid repeated `getattr`s
+    name = getattr(ctx, 'name', None)
+    if name:
+        type_ = name.split('.', 1)[0]
+        if type_ in Monitoring.components:
+            ctx.image = Monitoring.components[type_]['image']
+        if type_ == 'haproxy':
+            ctx.image = HAproxy.default_image
+        if type_ == 'keepalived':
+            ctx.image = Keepalived.default_image
+        if type_ == SNMPGateway.daemon_type:
+            ctx.image = SNMPGateway.default_image
+        if type_ == CephNvmeof.daemon_type:
+            ctx.image = CephNvmeof.default_image
+        if type_ in Tracing.components:
+            ctx.image = Tracing.components[type_]['image']
+    if not ctx.image:
+        ctx.image = os.environ.get('CEPHADM_IMAGE')
+    if not ctx.image:
+        ctx.image = _get_default_image(ctx)
+
+
+def executes_early(func: FuncT) -> FuncT:
+    """Decorator that indicates the command function is meant to have no
+    dependencies and no environmental requirements and can therefore be
+    executed as non-root and with no logging, etc. Commands that have this
+    decorator applied must be simple and self-contained.
+    """
+    cast(Any, func)._execute_early = True
+    return func
+
+
+def deprecated_command(func: FuncT) -> FuncT:
+    @wraps(func)
+    def _deprecated_command(ctx: CephadmContext) -> Any:
+        logger.warning(f'Deprecated command used: {func}')
+        if NO_DEPRECATED:
+            raise Error('running deprecated commands disabled')
+        return func(ctx)
+
+    return cast(FuncT, _deprecated_command)
+
+
+def get_container_info(ctx: CephadmContext, daemon_filter: str, by_name: bool) -> Optional[ContainerInfo]:
+    """
+    :param ctx: Cephadm context
+    :param daemon_filter: daemon name or type
+    :param by_name: must be set to True if daemon name is provided
+    :return: Container information or None
+    """
+    def daemon_name_or_type(daemon: Dict[str, str]) -> str:
+        return daemon['name'] if by_name else daemon['name'].split('.', 1)[0]
+
+    if by_name and '.' not in daemon_filter:
+        logger.warning(f'Trying to get container info using invalid daemon name {daemon_filter}')
+        return None
+    daemons = list_daemons(ctx, detail=False)
+    matching_daemons = [d for d in daemons if daemon_name_or_type(d) == daemon_filter and d['fsid'] == ctx.fsid]
+    if matching_daemons:
+        d_type, d_id = matching_daemons[0]['name'].split('.', 1)
+        out, _, code = get_container_stats(ctx, ctx.container_engine.path, ctx.fsid, d_type, d_id)
+        if not code:
+            (container_id, image_name, image_id, start, version) = out.strip().split(',')
+            return ContainerInfo(container_id, image_name, image_id, start, version)
+    return None
+
+
+def infer_local_ceph_image(ctx: CephadmContext, container_path: str) -> Optional[str]:
+    """
+     Infer the local ceph image based on the following priority criteria:
+       1- the image specified by --image arg (if provided).
+       2- the same image as the daemon container specified by --name arg (if provided).
+       3- image used by any ceph container running on the host. In this case we use daemon types.
+       4- if no container is found then we use the most ceph recent image on the host.
+
+     Note: any selected container must have the same fsid inferred previously.
+
+    :return: The most recent local ceph image (already pulled)
+    """
+    # '|' special character is used to separate the output fields into:
+    #  - Repository@digest
+    #  - Image Id
+    #  - Image Tag
+    #  - Image creation date
+    out, _, _ = call_throws(ctx,
+                            [container_path, 'images',
+                             '--filter', 'label=ceph=True',
+                             '--filter', 'dangling=false',
+                             '--format', '{{.Repository}}@{{.Digest}}|{{.ID}}|{{.Tag}}|{{.CreatedAt}}'])
+
+    container_info = None
+    daemon_name = ctx.name if ('name' in ctx and ctx.name and '.' in ctx.name) else None
+    daemons_ls = [daemon_name] if daemon_name is not None else Ceph.daemons  # daemon types: 'mon', 'mgr', etc
+    for daemon in daemons_ls:
+        container_info = get_container_info(ctx, daemon, daemon_name is not None)
+        if container_info is not None:
+            logger.debug(f"Using container info for daemon '{daemon}'")
+            break
+
+    for image in out.splitlines():
+        if image and not image.isspace():
+            (digest, image_id, tag, created_date) = image.lstrip().split('|')
+            if container_info is not None and image_id not in container_info.image_id:
+                continue
+            if digest and not digest.endswith('@'):
+                logger.info(f"Using ceph image with id '{image_id}' and tag '{tag}' created on {created_date}\n{digest}")
+                return digest
+    return None
+
+
+def write_tmp(s, uid, gid):
+    # type: (str, int, int) -> IO[str]
+    tmp_f = tempfile.NamedTemporaryFile(mode='w',
+                                        prefix='ceph-tmp')
+    os.fchown(tmp_f.fileno(), uid, gid)
+    tmp_f.write(s)
+    tmp_f.flush()
+
+    return tmp_f
+
+
+def makedirs(dir, uid, gid, mode):
+    # type: (str, int, int, int) -> None
+    if not os.path.exists(dir):
+        os.makedirs(dir, mode=mode)
+    else:
+        os.chmod(dir, mode)
+    os.chown(dir, uid, gid)
+    os.chmod(dir, mode)   # the above is masked by umask...
+
+
+def get_data_dir(fsid, data_dir, t, n):
+    # type: (str, str, str, Union[int, str]) -> str
+    return os.path.join(data_dir, fsid, '%s.%s' % (t, n))
+
+
+def get_log_dir(fsid, log_dir):
+    # type: (str, str) -> str
+    return os.path.join(log_dir, fsid)
+
+
+def make_data_dir_base(fsid, data_dir, uid, gid):
+    # type: (str, str, int, int) -> str
+    data_dir_base = os.path.join(data_dir, fsid)
+    makedirs(data_dir_base, uid, gid, DATA_DIR_MODE)
+    makedirs(os.path.join(data_dir_base, 'crash'), uid, gid, DATA_DIR_MODE)
+    makedirs(os.path.join(data_dir_base, 'crash', 'posted'), uid, gid,
+             DATA_DIR_MODE)
+    return data_dir_base
+
+
+def make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=None, gid=None):
+    # type: (CephadmContext, str, str, Union[int, str], Optional[int], Optional[int]) -> str
+    if uid is None or gid is None:
+        uid, gid = extract_uid_gid(ctx)
+    make_data_dir_base(fsid, ctx.data_dir, uid, gid)
+    data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+    makedirs(data_dir, uid, gid, DATA_DIR_MODE)
+    return data_dir
+
+
+def make_log_dir(ctx, fsid, uid=None, gid=None):
+    # type: (CephadmContext, str, Optional[int], Optional[int]) -> str
+    if uid is None or gid is None:
+        uid, gid = extract_uid_gid(ctx)
+    log_dir = get_log_dir(fsid, ctx.log_dir)
+    makedirs(log_dir, uid, gid, LOG_DIR_MODE)
+    return log_dir
+
+
+def make_var_run(ctx, fsid, uid, gid):
+    # type: (CephadmContext, str, int, int) -> None
+    call_throws(ctx, ['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid),
+                      '/var/run/ceph/%s' % fsid])
+
+
+def copy_tree(ctx, src, dst, uid=None, gid=None):
+    # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
+    """
+    Copy a directory tree from src to dst
+    """
+    if uid is None or gid is None:
+        (uid, gid) = extract_uid_gid(ctx)
+
+    for src_dir in src:
+        dst_dir = dst
+        if os.path.isdir(dst):
+            dst_dir = os.path.join(dst, os.path.basename(src_dir))
+
+        logger.debug('copy directory `%s` -> `%s`' % (src_dir, dst_dir))
+        shutil.rmtree(dst_dir, ignore_errors=True)
+        shutil.copytree(src_dir, dst_dir)  # dirs_exist_ok needs python 3.8
+
+        for dirpath, dirnames, filenames in os.walk(dst_dir):
+            logger.debug('chown %s:%s `%s`' % (uid, gid, dirpath))
+            os.chown(dirpath, uid, gid)
+            for filename in filenames:
+                logger.debug('chown %s:%s `%s`' % (uid, gid, filename))
+                os.chown(os.path.join(dirpath, filename), uid, gid)
+
+
+def copy_files(ctx, src, dst, uid=None, gid=None):
+    # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
+    """
+    Copy a files from src to dst
+    """
+    if uid is None or gid is None:
+        (uid, gid) = extract_uid_gid(ctx)
+
+    for src_file in src:
+        dst_file = dst
+        if os.path.isdir(dst):
+            dst_file = os.path.join(dst, os.path.basename(src_file))
+
+        logger.debug('copy file `%s` -> `%s`' % (src_file, dst_file))
+        shutil.copyfile(src_file, dst_file)
+
+        logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file))
+        os.chown(dst_file, uid, gid)
+
+
+def move_files(ctx, src, dst, uid=None, gid=None):
+    # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
+    """
+    Move files from src to dst
+    """
+    if uid is None or gid is None:
+        (uid, gid) = extract_uid_gid(ctx)
+
+    for src_file in src:
+        dst_file = dst
+        if os.path.isdir(dst):
+            dst_file = os.path.join(dst, os.path.basename(src_file))
+
+        if os.path.islink(src_file):
+            # shutil.move() in py2 does not handle symlinks correctly
+            src_rl = os.readlink(src_file)
+            logger.debug("symlink '%s' -> '%s'" % (dst_file, src_rl))
+            os.symlink(src_rl, dst_file)
+            os.unlink(src_file)
+        else:
+            logger.debug("move file '%s' -> '%s'" % (src_file, dst_file))
+            shutil.move(src_file, dst_file)
+            logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file))
+            os.chown(dst_file, uid, gid)
+
+
+def recursive_chown(path: str, uid: int, gid: int) -> None:
+    for dirpath, dirnames, filenames in os.walk(path):
+        os.chown(dirpath, uid, gid)
+        for filename in filenames:
+            os.chown(os.path.join(dirpath, filename), uid, gid)
+
+
+# copied from distutils
+def find_executable(executable: str, path: Optional[str] = None) -> Optional[str]:
+    """Tries to find 'executable' in the directories listed in 'path'.
+    A string listing directories separated by 'os.pathsep'; defaults to
+    os.environ['PATH'].  Returns the complete filename or None if not found.
+    """
+    _, ext = os.path.splitext(executable)
+    if (sys.platform == 'win32') and (ext != '.exe'):
+        executable = executable + '.exe'  # pragma: no cover
+
+    if os.path.isfile(executable):
+        return executable
+
+    if path is None:
+        path = os.environ.get('PATH', None)
+        if path is None:
+            try:
+                path = os.confstr('CS_PATH')
+            except (AttributeError, ValueError):
+                # os.confstr() or CS_PATH is not available
+                path = os.defpath
+        # bpo-35755: Don't use os.defpath if the PATH environment variable is
+        # set to an empty string
+
+    # PATH='' doesn't match, whereas PATH=':' looks in the current directory
+    if not path:
+        return None
+
+    paths = path.split(os.pathsep)
+    for p in paths:
+        f = os.path.join(p, executable)
+        if os.path.isfile(f):
+            # the file exists, we have a shot at spawn working
+            return f
+    return None
+
+
+def find_program(filename):
+    # type: (str) -> str
+    name = find_executable(filename)
+    if name is None:
+        raise ValueError('%s not found' % filename)
+    return name
+
+
+def find_container_engine(ctx: CephadmContext) -> Optional[ContainerEngine]:
+    if ctx.docker:
+        return Docker()
+    else:
+        for i in CONTAINER_PREFERENCE:
+            try:
+                return i()
+            except Exception:
+                pass
+    return None
+
+
+def check_container_engine(ctx: CephadmContext) -> ContainerEngine:
+    engine = ctx.container_engine
+    if not isinstance(engine, CONTAINER_PREFERENCE):
+        # See https://github.com/python/mypy/issues/8993
+        exes: List[str] = [i.EXE for i in CONTAINER_PREFERENCE]  # type: ignore
+        raise Error('No container engine binary found ({}). Try run `apt/dnf/yum/zypper install <container engine>`'.format(' or '.join(exes)))
+    elif isinstance(engine, Podman):
+        engine.get_version(ctx)
+        if engine.version < MIN_PODMAN_VERSION:
+            raise Error('podman version %d.%d.%d or later is required' % MIN_PODMAN_VERSION)
+    return engine
+
+
+def get_unit_name(fsid, daemon_type, daemon_id=None):
+    # type: (str, str, Optional[Union[int, str]]) -> str
+    # accept either name or type + id
+    if daemon_id is not None:
+        return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id)
+    else:
+        return 'ceph-%s@%s' % (fsid, daemon_type)
+
+
+def get_unit_name_by_daemon_name(ctx: CephadmContext, fsid: str, name: str) -> str:
+    daemon = get_daemon_description(ctx, fsid, name)
+    try:
+        return daemon['systemd_unit']
+    except KeyError:
+        raise Error('Failed to get unit name for {}'.format(daemon))
+
+
+def check_unit(ctx, unit_name):
+    # type: (CephadmContext, str) -> Tuple[bool, str, bool]
+    # NOTE: we ignore the exit code here because systemctl outputs
+    # various exit codes based on the state of the service, but the
+    # string result is more explicit (and sufficient).
+    enabled = False
+    installed = False
+    try:
+        out, err, code = call(ctx, ['systemctl', 'is-enabled', unit_name],
+                              verbosity=CallVerbosity.QUIET)
+        if code == 0:
+            enabled = True
+            installed = True
+        elif 'disabled' in out:
+            installed = True
+    except Exception as e:
+        logger.warning('unable to run systemctl: %s' % e)
+        enabled = False
+        installed = False
+
+    state = 'unknown'
+    try:
+        out, err, code = call(ctx, ['systemctl', 'is-active', unit_name],
+                              verbosity=CallVerbosity.QUIET)
+        out = out.strip()
+        if out in ['active']:
+            state = 'running'
+        elif out in ['inactive']:
+            state = 'stopped'
+        elif out in ['failed', 'auto-restart']:
+            state = 'error'
+        else:
+            state = 'unknown'
+    except Exception as e:
+        logger.warning('unable to run systemctl: %s' % e)
+        state = 'unknown'
+    return (enabled, state, installed)
+
+
+def check_units(ctx, units, enabler=None):
+    # type: (CephadmContext, List[str], Optional[Packager]) -> bool
+    for u in units:
+        (enabled, state, installed) = check_unit(ctx, u)
+        if enabled and state == 'running':
+            logger.info('Unit %s is enabled and running' % u)
+            return True
+        if enabler is not None:
+            if installed:
+                logger.info('Enabling unit %s' % u)
+                enabler.enable_service(u)
+    return False
+
+
+def is_container_running(ctx: CephadmContext, c: 'CephContainer') -> bool:
+    if ctx.name.split('.', 1)[0] in ['agent', 'cephadm-exporter']:
+        # these are non-containerized daemon types
+        return False
+    return bool(get_running_container_name(ctx, c))
+
+
+def get_running_container_name(ctx: CephadmContext, c: 'CephContainer') -> Optional[str]:
+    for name in [c.cname, c.old_cname]:
+        out, err, ret = call(ctx, [
+            ctx.container_engine.path, 'container', 'inspect',
+            '--format', '{{.State.Status}}', name
+        ])
+        if out.strip() == 'running':
+            return name
+    return None
+
+
+def get_legacy_config_fsid(cluster, legacy_dir=None):
+    # type: (str, Optional[str]) -> Optional[str]
+    config_file = '/etc/ceph/%s.conf' % cluster
+    if legacy_dir is not None:
+        config_file = os.path.abspath(legacy_dir + config_file)
+
+    if os.path.exists(config_file):
+        config = read_config(config_file)
+        if config.has_section('global') and config.has_option('global', 'fsid'):
+            return config.get('global', 'fsid')
+    return None
+
+
+def get_legacy_daemon_fsid(ctx, cluster,
+                           daemon_type, daemon_id, legacy_dir=None):
+    # type: (CephadmContext, str, str, Union[int, str], Optional[str]) -> Optional[str]
+    fsid = None
+    if daemon_type == 'osd':
+        try:
+            fsid_file = os.path.join(ctx.data_dir,
+                                     daemon_type,
+                                     'ceph-%s' % daemon_id,
+                                     'ceph_fsid')
+            if legacy_dir is not None:
+                fsid_file = os.path.abspath(legacy_dir + fsid_file)
+            with open(fsid_file, 'r') as f:
+                fsid = f.read().strip()
+        except IOError:
+            pass
+    if not fsid:
+        fsid = get_legacy_config_fsid(cluster, legacy_dir=legacy_dir)
+    return fsid
+
+
+def should_log_to_journald(ctx: CephadmContext) -> bool:
+    if ctx.log_to_journald is not None:
+        return ctx.log_to_journald
+    return isinstance(ctx.container_engine, Podman) and \
+        ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION
+
+
+def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
+    # type: (CephadmContext, str, str, Union[int, str]) -> List[str]
+    r = list()  # type: List[str]
+
+    if daemon_type in Ceph.daemons and daemon_type not in ['crash', 'ceph-exporter']:
+        r += [
+            '--setuser', 'ceph',
+            '--setgroup', 'ceph',
+            '--default-log-to-file=false',
+        ]
+        log_to_journald = should_log_to_journald(ctx)
+        if log_to_journald:
+            r += [
+                '--default-log-to-journald=true',
+                '--default-log-to-stderr=false',
+            ]
+        else:
+            r += [
+                '--default-log-to-stderr=true',
+                '--default-log-stderr-prefix=debug ',
+            ]
+        if daemon_type == 'mon':
+            r += [
+                '--default-mon-cluster-log-to-file=false',
+            ]
+            if log_to_journald:
+                r += [
+                    '--default-mon-cluster-log-to-journald=true',
+                    '--default-mon-cluster-log-to-stderr=false',
+                ]
+            else:
+                r += ['--default-mon-cluster-log-to-stderr=true']
+    elif daemon_type in Monitoring.components:
+        metadata = Monitoring.components[daemon_type]
+        r += metadata.get('args', list())
+        # set ip and port to bind to for nodeexporter,alertmanager,prometheus
+        if daemon_type not in ['grafana', 'loki', 'promtail']:
+            ip = ''
+            port = Monitoring.port_map[daemon_type][0]
+            meta = fetch_meta(ctx)
+            if meta:
+                if 'ip' in meta and meta['ip']:
+                    ip = meta['ip']
+                if 'ports' in meta and meta['ports']:
+                    port = meta['ports'][0]
+            r += [f'--web.listen-address={ip}:{port}']
+            if daemon_type == 'prometheus':
+                config = fetch_configs(ctx)
+                retention_time = config.get('retention_time', '15d')
+                retention_size = config.get('retention_size', '0')  # default to disabled
+                r += [f'--storage.tsdb.retention.time={retention_time}']
+                r += [f'--storage.tsdb.retention.size={retention_size}']
+                scheme = 'http'
+                host = get_fqdn()
+                # in case host is not an fqdn then we use the IP to
+                # avoid producing a broken web.external-url link
+                if '.' not in host:
+                    ipv4_addrs, ipv6_addrs = get_ip_addresses(get_hostname())
+                    # use the first ipv4 (if any) otherwise use the first ipv6
+                    addr = next(iter(ipv4_addrs or ipv6_addrs), None)
+                    host = wrap_ipv6(addr) if addr else host
+                r += [f'--web.external-url={scheme}://{host}:{port}']
+        if daemon_type == 'alertmanager':
+            config = fetch_configs(ctx)
+            peers = config.get('peers', list())  # type: ignore
+            for peer in peers:
+                r += ['--cluster.peer={}'.format(peer)]
+            try:
+                r += [f'--web.config.file={config["web_config"]}']
+            except KeyError:
+                pass
+            # some alertmanager, by default, look elsewhere for a config
+            r += ['--config.file=/etc/alertmanager/alertmanager.yml']
+        if daemon_type == 'promtail':
+            r += ['--config.expand-env']
+        if daemon_type == 'prometheus':
+            config = fetch_configs(ctx)
+            try:
+                r += [f'--web.config.file={config["web_config"]}']
+            except KeyError:
+                pass
+        if daemon_type == 'node-exporter':
+            config = fetch_configs(ctx)
+            try:
+                r += [f'--web.config.file={config["web_config"]}']
+            except KeyError:
+                pass
+            r += ['--path.procfs=/host/proc',
+                  '--path.sysfs=/host/sys',
+                  '--path.rootfs=/rootfs']
+    elif daemon_type == 'jaeger-agent':
+        r.extend(Tracing.components[daemon_type]['daemon_args'])
+    elif daemon_type == NFSGanesha.daemon_type:
+        nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
+        r += nfs_ganesha.get_daemon_args()
+    elif daemon_type == CephExporter.daemon_type:
+        ceph_exporter = CephExporter.init(ctx, fsid, daemon_id)
+        r.extend(ceph_exporter.get_daemon_args())
+    elif daemon_type == HAproxy.daemon_type:
+        haproxy = HAproxy.init(ctx, fsid, daemon_id)
+        r += haproxy.get_daemon_args()
+    elif daemon_type == CustomContainer.daemon_type:
+        cc = CustomContainer.init(ctx, fsid, daemon_id)
+        r.extend(cc.get_daemon_args())
+    elif daemon_type == SNMPGateway.daemon_type:
+        sc = SNMPGateway.init(ctx, fsid, daemon_id)
+        r.extend(sc.get_daemon_args())
+
+    return r
+
+
+def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid,
+                       config=None, keyring=None):
+    # type: (CephadmContext, str, str, Union[int, str], int, int, Optional[str], Optional[str]) ->  None
+    data_dir = make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=uid, gid=gid)
+
+    if daemon_type in Ceph.daemons:
+        make_log_dir(ctx, fsid, uid=uid, gid=gid)
+
+    if config:
+        config_path = os.path.join(data_dir, 'config')
+        with write_new(config_path, owner=(uid, gid)) as f:
+            f.write(config)
+
+    if keyring:
+        keyring_path = os.path.join(data_dir, 'keyring')
+        with write_new(keyring_path, owner=(uid, gid)) as f:
+            f.write(keyring)
+
+    if daemon_type in Monitoring.components.keys():
+        config_json = fetch_configs(ctx)
+
+        # Set up directories specific to the monitoring component
+        config_dir = ''
+        data_dir_root = ''
+        if daemon_type == 'prometheus':
+            data_dir_root = get_data_dir(fsid, ctx.data_dir,
+                                         daemon_type, daemon_id)
+            config_dir = 'etc/prometheus'
+            makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
+            makedirs(os.path.join(data_dir_root, config_dir, 'alerting'), uid, gid, 0o755)
+            makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
+            recursive_chown(os.path.join(data_dir_root, 'etc'), uid, gid)
+            recursive_chown(os.path.join(data_dir_root, 'data'), uid, gid)
+        elif daemon_type == 'grafana':
+            data_dir_root = get_data_dir(fsid, ctx.data_dir,
+                                         daemon_type, daemon_id)
+            config_dir = 'etc/grafana'
+            makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
+            makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755)
+            makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755)
+            makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
+            touch(os.path.join(data_dir_root, 'data', 'grafana.db'), uid, gid)
+        elif daemon_type == 'alertmanager':
+            data_dir_root = get_data_dir(fsid, ctx.data_dir,
+                                         daemon_type, daemon_id)
+            config_dir = 'etc/alertmanager'
+            makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
+            makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755)
+        elif daemon_type == 'promtail':
+            data_dir_root = get_data_dir(fsid, ctx.data_dir,
+                                         daemon_type, daemon_id)
+            config_dir = 'etc/promtail'
+            makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
+            makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
+        elif daemon_type == 'loki':
+            data_dir_root = get_data_dir(fsid, ctx.data_dir,
+                                         daemon_type, daemon_id)
+            config_dir = 'etc/loki'
+            makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
+            makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
+        elif daemon_type == 'node-exporter':
+            data_dir_root = get_data_dir(fsid, ctx.data_dir,
+                                         daemon_type, daemon_id)
+            config_dir = 'etc/node-exporter'
+            makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
+            recursive_chown(os.path.join(data_dir_root, 'etc'), uid, gid)
+
+        # populate the config directory for the component from the config-json
+        if 'files' in config_json:
+            for fname in config_json['files']:
+                # work around mypy wierdness where it thinks `str`s aren't Anys
+                # when used for dictionary values! feels like possibly a mypy bug?!
+                cfg = cast(Dict[str, Any], config_json['files'])
+                content = dict_get_join(cfg, fname)
+                if os.path.isabs(fname):
+                    fpath = os.path.join(data_dir_root, fname.lstrip(os.path.sep))
+                else:
+                    fpath = os.path.join(data_dir_root, config_dir, fname)
+                with write_new(fpath, owner=(uid, gid), encoding='utf-8') as f:
+                    f.write(content)
+
+    elif daemon_type == NFSGanesha.daemon_type:
+        nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
+        nfs_ganesha.create_daemon_dirs(data_dir, uid, gid)
+
+    elif daemon_type == CephIscsi.daemon_type:
+        ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
+        ceph_iscsi.create_daemon_dirs(data_dir, uid, gid)
+
+    elif daemon_type == CephNvmeof.daemon_type:
+        ceph_nvmeof = CephNvmeof.init(ctx, fsid, daemon_id)
+        ceph_nvmeof.create_daemon_dirs(data_dir, uid, gid)
+
+    elif daemon_type == HAproxy.daemon_type:
+        haproxy = HAproxy.init(ctx, fsid, daemon_id)
+        haproxy.create_daemon_dirs(data_dir, uid, gid)
+
+    elif daemon_type == Keepalived.daemon_type:
+        keepalived = Keepalived.init(ctx, fsid, daemon_id)
+        keepalived.create_daemon_dirs(data_dir, uid, gid)
+
+    elif daemon_type == CustomContainer.daemon_type:
+        cc = CustomContainer.init(ctx, fsid, daemon_id)
+        cc.create_daemon_dirs(data_dir, uid, gid)
+
+    elif daemon_type == SNMPGateway.daemon_type:
+        sg = SNMPGateway.init(ctx, fsid, daemon_id)
+        sg.create_daemon_conf()
+
+    _write_custom_conf_files(ctx, daemon_type, str(daemon_id), fsid, uid, gid)
+
+
+def _write_custom_conf_files(ctx: CephadmContext, daemon_type: str, daemon_id: str, fsid: str, uid: int, gid: int) -> None:
+    # mostly making this its own function to make unit testing easier
+    ccfiles = fetch_custom_config_files(ctx)
+    if not ccfiles:
+        return
+    custom_config_dir = os.path.join(ctx.data_dir, fsid, 'custom_config_files', f'{daemon_type}.{daemon_id}')
+    if not os.path.exists(custom_config_dir):
+        makedirs(custom_config_dir, uid, gid, 0o755)
+    mandatory_keys = ['mount_path', 'content']
+    for ccf in ccfiles:
+        if all(k in ccf for k in mandatory_keys):
+            file_path = os.path.join(custom_config_dir, os.path.basename(ccf['mount_path']))
+            with write_new(file_path, owner=(uid, gid), encoding='utf-8') as f:
+                f.write(ccf['content'])
+            # temporary workaround to make custom config files work for tcmu-runner
+            # container we deploy with iscsi until iscsi is refactored
+            if daemon_type == 'iscsi':
+                tcmu_config_dir = custom_config_dir + '.tcmu'
+                if not os.path.exists(tcmu_config_dir):
+                    makedirs(tcmu_config_dir, uid, gid, 0o755)
+                tcmu_file_path = os.path.join(tcmu_config_dir, os.path.basename(ccf['mount_path']))
+                with write_new(tcmu_file_path, owner=(uid, gid), encoding='utf-8') as f:
+                    f.write(ccf['content'])
+
+
+def get_parm(option: str) -> Dict[str, str]:
+    js = _get_config_json(option)
+    # custom_config_files is a special field that may be in the config
+    # dict. It is used for mounting custom config files into daemon's containers
+    # and should be accessed through the "fetch_custom_config_files" function.
+    # For get_parm we need to discard it.
+    js.pop('custom_config_files', None)
+    return js
+
+
+def _get_config_json(option: str) -> Dict[str, Any]:
+    if not option:
+        return dict()
+
+    global cached_stdin
+    if option == '-':
+        if cached_stdin is not None:
+            j = cached_stdin
+        else:
+            j = sys.stdin.read()
+            cached_stdin = j
+    else:
+        # inline json string
+        if option[0] == '{' and option[-1] == '}':
+            j = option
+        # json file
+        elif os.path.exists(option):
+            with open(option, 'r') as f:
+                j = f.read()
+        else:
+            raise Error('Config file {} not found'.format(option))
+
+    try:
+        js = json.loads(j)
+    except ValueError as e:
+        raise Error('Invalid JSON in {}: {}'.format(option, e))
+    else:
+        return js
+
+
+def fetch_meta(ctx: CephadmContext) -> Dict[str, Any]:
+    """Return a dict containing metadata about a deployment.
+    """
+    meta = getattr(ctx, 'meta_properties', None)
+    if meta is not None:
+        return meta
+    mjson = getattr(ctx, 'meta_json', None)
+    if mjson is not None:
+        meta = json.loads(mjson) or {}
+        ctx.meta_properties = meta
+        return meta
+    return {}
+
+
+def fetch_configs(ctx: CephadmContext) -> Dict[str, str]:
+    """Return a dict containing arbitrary configuration parameters.
+    This function filters out the key 'custom_config_files' which
+    must not be part of a deployment's configuration key-value pairs.
+    To access custom configuration file data, use `fetch_custom_config_files`.
+    """
+    # ctx.config_blobs is *always* a dict. it is created once when
+    # a command is parsed/processed and stored "forever"
+    cfg_blobs = getattr(ctx, 'config_blobs', None)
+    if cfg_blobs:
+        cfg_blobs = dict(cfg_blobs)
+        cfg_blobs.pop('custom_config_files', None)
+        return cfg_blobs
+    # ctx.config_json is the legacy equivalent of config_blobs. it is a
+    # string that either contains json or refers to a file name where
+    # the file contains json.
+    cfg_json = getattr(ctx, 'config_json', None)
+    if cfg_json:
+        jdata = _get_config_json(cfg_json) or {}
+        jdata.pop('custom_config_files', None)
+        return jdata
+    return {}
+
+
+def fetch_custom_config_files(ctx: CephadmContext) -> List[Dict[str, Any]]:
+    """Return a list containing dicts that can be used to populate
+    custom configuration files for containers.
+    """
+    # NOTE: this function works like the opposite of fetch_configs.
+    # instead of filtering out custom_config_files, it returns only
+    # the content in that key.
+    cfg_blobs = getattr(ctx, 'config_blobs', None)
+    if cfg_blobs:
+        return cfg_blobs.get('custom_config_files', [])
+    cfg_json = getattr(ctx, 'config_json', None)
+    if cfg_json:
+        jdata = _get_config_json(cfg_json)
+        return jdata.get('custom_config_files', [])
+    return []
+
+
+def fetch_tcp_ports(ctx: CephadmContext) -> List[EndPoint]:
+    """Return a list of Endpoints, which have a port and ip attribute
+    """
+    ports = getattr(ctx, 'tcp_ports', None)
+    if ports is None:
+        ports = []
+    if isinstance(ports, str):
+        ports = list(map(int, ports.split()))
+    port_ips: Dict[str, str] = {}
+    port_ips_attr: Union[str, Dict[str, str], None] = getattr(ctx, 'port_ips', None)
+    if isinstance(port_ips_attr, str):
+        port_ips = json.loads(port_ips_attr)
+    elif port_ips_attr is not None:
+        # if it's not None or a str, assume it's already the dict we want
+        port_ips = port_ips_attr
+
+    endpoints: List[EndPoint] = []
+    for port in ports:
+        if str(port) in port_ips:
+            endpoints.append(EndPoint(port_ips[str(port)], port))
+        else:
+            endpoints.append(EndPoint('0.0.0.0', port))
+
+    return endpoints
+
+
+def get_config_and_keyring(ctx):
+    # type: (CephadmContext) -> Tuple[Optional[str], Optional[str]]
+    config = None
+    keyring = None
+
+    d = fetch_configs(ctx)
+    if d:
+        config = d.get('config')
+        keyring = d.get('keyring')
+        if config and keyring:
+            return config, keyring
+
+    if 'config' in ctx and ctx.config:
+        try:
+            with open(ctx.config, 'r') as f:
+                config = f.read()
+        except FileNotFoundError as e:
+            raise Error(e)
+
+    if 'key' in ctx and ctx.key:
+        keyring = '[%s]\n\tkey = %s\n' % (ctx.name, ctx.key)
+    elif 'keyring' in ctx and ctx.keyring:
+        try:
+            with open(ctx.keyring, 'r') as f:
+                keyring = f.read()
+        except FileNotFoundError as e:
+            raise Error(e)
+
+    return config, keyring
+
+
+def get_container_binds(ctx, fsid, daemon_type, daemon_id):
+    # type: (CephadmContext, str, str, Union[int, str, None]) -> List[List[str]]
+    binds = list()
+
+    if daemon_type == CephIscsi.daemon_type:
+        binds.extend(CephIscsi.get_container_binds())
+    if daemon_type == CephNvmeof.daemon_type:
+        binds.extend(CephNvmeof.get_container_binds())
+    elif daemon_type == CustomContainer.daemon_type:
+        assert daemon_id
+        cc = CustomContainer.init(ctx, fsid, daemon_id)
+        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+        binds.extend(cc.get_container_binds(data_dir))
+
+    return binds
+
+
+def get_container_mounts(ctx, fsid, daemon_type, daemon_id,
+                         no_config=False):
+    # type: (CephadmContext, str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
+    mounts = dict()
+
+    if daemon_type in Ceph.daemons:
+        if fsid:
+            run_path = os.path.join('/var/run/ceph', fsid)
+            if os.path.exists(run_path):
+                mounts[run_path] = '/var/run/ceph:z'
+            log_dir = get_log_dir(fsid, ctx.log_dir)
+            mounts[log_dir] = '/var/log/ceph:z'
+            crash_dir = '/var/lib/ceph/%s/crash' % fsid
+            if os.path.exists(crash_dir):
+                mounts[crash_dir] = '/var/lib/ceph/crash:z'
+            if daemon_type != 'crash' and should_log_to_journald(ctx):
+                journald_sock_dir = '/run/systemd/journal'
+                mounts[journald_sock_dir] = journald_sock_dir
+
+    if daemon_type in Ceph.daemons and daemon_id:
+        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+        if daemon_type == 'rgw':
+            cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id)
+        else:
+            cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id)
+        if daemon_type != 'crash':
+            mounts[data_dir] = cdata_dir + ':z'
+        if not no_config:
+            mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
+        if daemon_type in ['rbd-mirror', 'cephfs-mirror', 'crash', 'ceph-exporter']:
+            # these do not search for their keyrings in a data directory
+            mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id)
+
+    if daemon_type in ['mon', 'osd', 'clusterless-ceph-volume']:
+        mounts['/dev'] = '/dev'  # FIXME: narrow this down?
+        mounts['/run/udev'] = '/run/udev'
+    if daemon_type in ['osd', 'clusterless-ceph-volume']:
+        mounts['/sys'] = '/sys'  # for numa.cc, pick_address, cgroups, ...
+        mounts['/run/lvm'] = '/run/lvm'
+        mounts['/run/lock/lvm'] = '/run/lock/lvm'
+    if daemon_type == 'osd':
+        # selinux-policy in the container may not match the host.
+        if HostFacts(ctx).selinux_enabled:
+            cluster_dir = f'{ctx.data_dir}/{fsid}'
+            selinux_folder = f'{cluster_dir}/selinux'
+            if os.path.exists(cluster_dir):
+                if not os.path.exists(selinux_folder):
+                    os.makedirs(selinux_folder, mode=0o755)
+                mounts[selinux_folder] = '/sys/fs/selinux:ro'
+            else:
+                logger.error(f'Cluster direcotry {cluster_dir} does not exist.')
+        mounts['/'] = '/rootfs'
+
+    try:
+        if ctx.shared_ceph_folder:  # make easy manager modules/ceph-volume development
+            ceph_folder = pathify(ctx.shared_ceph_folder)
+            if os.path.exists(ceph_folder):
+                mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
+                mounts[ceph_folder + '/src/cephadm/cephadm.py'] = '/usr/sbin/cephadm'
+                mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
+                mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
+                mounts[ceph_folder + '/monitoring/ceph-mixin/dashboards_out'] = '/etc/grafana/dashboards/ceph-dashboard'
+                mounts[ceph_folder + '/monitoring/ceph-mixin/prometheus_alerts.yml'] = '/etc/prometheus/ceph/ceph_default_alerts.yml'
+            else:
+                logger.error('{}{}{}'.format(termcolor.red,
+                                             'Ceph shared source folder does not exist.',
+                                             termcolor.end))
+    except AttributeError:
+        pass
+
+    if daemon_type in Monitoring.components and daemon_id:
+        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+        log_dir = get_log_dir(fsid, ctx.log_dir)
+        if daemon_type == 'prometheus':
+            mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z'
+            mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z'
+        elif daemon_type == 'loki':
+            mounts[os.path.join(data_dir, 'etc/loki')] = '/etc/loki:Z'
+            mounts[os.path.join(data_dir, 'data')] = '/loki:Z'
+        elif daemon_type == 'promtail':
+            mounts[os.path.join(data_dir, 'etc/promtail')] = '/etc/promtail:Z'
+            mounts[log_dir] = '/var/log/ceph:z'
+            mounts[os.path.join(data_dir, 'data')] = '/promtail:Z'
+        elif daemon_type == 'node-exporter':
+            mounts[os.path.join(data_dir, 'etc/node-exporter')] = '/etc/node-exporter:Z'
+            mounts['/proc'] = '/host/proc:ro'
+            mounts['/sys'] = '/host/sys:ro'
+            mounts['/'] = '/rootfs:ro'
+        elif daemon_type == 'grafana':
+            mounts[os.path.join(data_dir, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
+            mounts[os.path.join(data_dir, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
+            mounts[os.path.join(data_dir, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
+            mounts[os.path.join(data_dir, 'data/grafana.db')] = '/var/lib/grafana/grafana.db:Z'
+        elif daemon_type == 'alertmanager':
+            mounts[os.path.join(data_dir, 'etc/alertmanager')] = '/etc/alertmanager:Z'
+
+    if daemon_type == NFSGanesha.daemon_type:
+        assert daemon_id
+        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+        nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
+        mounts.update(nfs_ganesha.get_container_mounts(data_dir))
+
+    if daemon_type == HAproxy.daemon_type:
+        assert daemon_id
+        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+        mounts.update(HAproxy.get_container_mounts(data_dir))
+
+    if daemon_type == CephNvmeof.daemon_type:
+        assert daemon_id
+        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+        mounts.update(CephNvmeof.get_container_mounts(data_dir))
+
+    if daemon_type == CephIscsi.daemon_type:
+        assert daemon_id
+        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+        # Removes ending ".tcmu" from data_dir a tcmu-runner uses the same data_dir
+        # as rbd-runner-api
+        if data_dir.endswith('.tcmu'):
+            data_dir = re.sub(r'\.tcmu$', '', data_dir)
+        log_dir = get_log_dir(fsid, ctx.log_dir)
+        mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir))
+
+    if daemon_type == Keepalived.daemon_type:
+        assert daemon_id
+        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+        mounts.update(Keepalived.get_container_mounts(data_dir))
+
+    if daemon_type == CustomContainer.daemon_type:
+        assert daemon_id
+        cc = CustomContainer.init(ctx, fsid, daemon_id)
+        data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+        mounts.update(cc.get_container_mounts(data_dir))
+
+    # Modifications podman makes to /etc/hosts causes issues with
+    # certain daemons (specifically referencing "host.containers.internal" entry
+    # being added to /etc/hosts in this case). To avoid that, but still
+    # allow users to use /etc/hosts for hostname resolution, we can
+    # mount the host's /etc/hosts file.
+    # https://tracker.ceph.com/issues/58532
+    # https://tracker.ceph.com/issues/57018
+    if isinstance(ctx.container_engine, Podman):
+        if os.path.exists('/etc/hosts'):
+            if '/etc/hosts' not in mounts:
+                mounts['/etc/hosts'] = '/etc/hosts:ro'
+
+    return mounts
+
+
+def get_ceph_volume_container(ctx: CephadmContext,
+                              privileged: bool = True,
+                              cname: str = '',
+                              volume_mounts: Dict[str, str] = {},
+                              bind_mounts: Optional[List[List[str]]] = None,
+                              args: List[str] = [],
+                              envs: Optional[List[str]] = None) -> 'CephContainer':
+    if envs is None:
+        envs = []
+    envs.append('CEPH_VOLUME_SKIP_RESTORECON=yes')
+    envs.append('CEPH_VOLUME_DEBUG=1')
+
+    return CephContainer(
+        ctx,
+        image=ctx.image,
+        entrypoint='/usr/sbin/ceph-volume',
+        args=args,
+        volume_mounts=volume_mounts,
+        bind_mounts=bind_mounts,
+        envs=envs,
+        privileged=privileged,
+        cname=cname,
+        memory_request=ctx.memory_request,
+        memory_limit=ctx.memory_limit,
+    )
+
+
+def set_pids_limit_unlimited(ctx: CephadmContext, container_args: List[str]) -> None:
+    # set container's pids-limit to unlimited rather than default (Docker 4096 / Podman 2048)
+    # Useful for daemons like iscsi where the default pids-limit limits the number of luns
+    # per iscsi target or rgw where increasing the rgw_thread_pool_size to a value near
+    # the default pids-limit may cause the container to crash.
+    if (
+        isinstance(ctx.container_engine, Podman)
+        and ctx.container_engine.version >= PIDS_LIMIT_UNLIMITED_PODMAN_VERSION
+    ):
+        container_args.append('--pids-limit=-1')
+    else:
+        container_args.append('--pids-limit=0')
+
+
+def get_container(ctx: CephadmContext,
+                  fsid: str, daemon_type: str, daemon_id: Union[int, str],
+                  privileged: bool = False,
+                  ptrace: bool = False,
+                  container_args: Optional[List[str]] = None) -> 'CephContainer':
+    entrypoint: str = ''
+    name: str = ''
+    ceph_args: List[str] = []
+    envs: List[str] = []
+    host_network: bool = True
+
+    if daemon_type in Ceph.daemons:
+        envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728')
+    if container_args is None:
+        container_args = []
+    if daemon_type in Ceph.daemons or daemon_type in Ceph.gateways:
+        set_pids_limit_unlimited(ctx, container_args)
+    if daemon_type in ['mon', 'osd']:
+        # mon and osd need privileged in order for libudev to query devices
+        privileged = True
+    if daemon_type == 'rgw':
+        entrypoint = '/usr/bin/radosgw'
+        name = 'client.rgw.%s' % daemon_id
+    elif daemon_type == 'rbd-mirror':
+        entrypoint = '/usr/bin/rbd-mirror'
+        name = 'client.rbd-mirror.%s' % daemon_id
+    elif daemon_type == 'cephfs-mirror':
+        entrypoint = '/usr/bin/cephfs-mirror'
+        name = 'client.cephfs-mirror.%s' % daemon_id
+    elif daemon_type == 'crash':
+        entrypoint = '/usr/bin/ceph-crash'
+        name = 'client.crash.%s' % daemon_id
+    elif daemon_type in ['mon', 'mgr', 'mds', 'osd']:
+        entrypoint = '/usr/bin/ceph-' + daemon_type
+        name = '%s.%s' % (daemon_type, daemon_id)
+    elif daemon_type in Monitoring.components:
+        entrypoint = ''
+    elif daemon_type in Tracing.components:
+        entrypoint = ''
+        name = '%s.%s' % (daemon_type, daemon_id)
+        config = fetch_configs(ctx)
+        Tracing.set_configuration(config, daemon_type)
+        envs.extend(Tracing.components[daemon_type].get('envs', []))
+    elif daemon_type == NFSGanesha.daemon_type:
+        entrypoint = NFSGanesha.entrypoint
+        name = '%s.%s' % (daemon_type, daemon_id)
+        envs.extend(NFSGanesha.get_container_envs())
+    elif daemon_type == CephExporter.daemon_type:
+        entrypoint = CephExporter.entrypoint
+        name = 'client.ceph-exporter.%s' % daemon_id
+    elif daemon_type == HAproxy.daemon_type:
+        name = '%s.%s' % (daemon_type, daemon_id)
+        container_args.extend(['--user=root'])  # haproxy 2.4 defaults to a different user
+    elif daemon_type == Keepalived.daemon_type:
+        name = '%s.%s' % (daemon_type, daemon_id)
+        envs.extend(Keepalived.get_container_envs())
+        container_args.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW'])
+    elif daemon_type == CephNvmeof.daemon_type:
+        name = '%s.%s' % (daemon_type, daemon_id)
+        container_args.extend(['--ulimit', 'memlock=-1:-1'])
+        container_args.extend(['--ulimit', 'nofile=10240'])
+        container_args.extend(['--cap-add=SYS_ADMIN', '--cap-add=CAP_SYS_NICE'])
+    elif daemon_type == CephIscsi.daemon_type:
+        entrypoint = CephIscsi.entrypoint
+        name = '%s.%s' % (daemon_type, daemon_id)
+        # So the container can modprobe iscsi_target_mod and have write perms
+        # to configfs we need to make this a privileged container.
+        privileged = True
+    elif daemon_type == CustomContainer.daemon_type:
+        cc = CustomContainer.init(ctx, fsid, daemon_id)
+        entrypoint = cc.entrypoint
+        host_network = False
+        envs.extend(cc.get_container_envs())
+        container_args.extend(cc.get_container_args())
+
+    if daemon_type in Monitoring.components:
+        uid, gid = extract_uid_gid_monitoring(ctx, daemon_type)
+        monitoring_args = [
+            '--user',
+            str(uid),
+            # FIXME: disable cpu/memory limits for the time being (not supported
+            # by ubuntu 18.04 kernel!)
+        ]
+        container_args.extend(monitoring_args)
+        if daemon_type == 'node-exporter':
+            # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys',
+            # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation
+            # between the node-exporter container and the host to avoid selinux denials
+            container_args.extend(['--security-opt', 'label=disable'])
+    elif daemon_type == 'crash':
+        ceph_args = ['-n', name]
+    elif daemon_type in Ceph.daemons:
+        ceph_args = ['-n', name, '-f']
+    elif daemon_type == SNMPGateway.daemon_type:
+        sg = SNMPGateway.init(ctx, fsid, daemon_id)
+        container_args.append(
+            f'--env-file={sg.conf_file_path}'
+        )
+
+    # if using podman, set -d, --conmon-pidfile & --cidfile flags
+    # so service can have Type=Forking
+    if isinstance(ctx.container_engine, Podman):
+        runtime_dir = '/run'
+        container_args.extend([
+            '-d', '--log-driver', 'journald',
+            '--conmon-pidfile',
+            runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, daemon_id),
+            '--cidfile',
+            runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, daemon_id),
+        ])
+        if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION and not ctx.no_cgroups_split:
+            container_args.append('--cgroups=split')
+        # if /etc/hosts doesn't exist, we can be confident
+        # users aren't using it for host name resolution
+        # and adding --no-hosts avoids bugs created in certain daemons
+        # by modifications podman makes to /etc/hosts
+        # https://tracker.ceph.com/issues/58532
+        # https://tracker.ceph.com/issues/57018
+        if not os.path.exists('/etc/hosts'):
+            container_args.extend(['--no-hosts'])
+
+    return CephContainer.for_daemon(
+        ctx,
+        fsid=fsid,
+        daemon_type=daemon_type,
+        daemon_id=str(daemon_id),
+        entrypoint=entrypoint,
+        args=ceph_args + get_daemon_args(ctx, fsid, daemon_type, daemon_id),
+        container_args=container_args,
+        volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
+        bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
+        envs=envs,
+        privileged=privileged,
+        ptrace=ptrace,
+        host_network=host_network,
+    )
+
+
+def extract_uid_gid(ctx, img='', file_path='/var/lib/ceph'):
+    # type: (CephadmContext, str, Union[str, List[str]]) -> Tuple[int, int]
+
+    if not img:
+        img = ctx.image
+
+    if isinstance(file_path, str):
+        paths = [file_path]
+    else:
+        paths = file_path
+
+    ex: Optional[Tuple[str, RuntimeError]] = None
+
+    for fp in paths:
+        try:
+            out = CephContainer(
+                ctx,
+                image=img,
+                entrypoint='stat',
+                args=['-c', '%u %g', fp]
+            ).run(verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+            uid, gid = out.split(' ')
+            return int(uid), int(gid)
+        except RuntimeError as e:
+            ex = (fp, e)
+    if ex:
+        raise Error(f'Failed to extract uid/gid for path {ex[0]}: {ex[1]}')
+
+    raise RuntimeError('uid/gid not found')
+
+
+def deploy_daemon(ctx: CephadmContext, fsid: str, daemon_type: str,
+                  daemon_id: Union[int, str], c: Optional['CephContainer'],
+                  uid: int, gid: int, config: Optional[str] = None,
+                  keyring: Optional[str] = None, osd_fsid: Optional[str] = None,
+                  deployment_type: DeploymentType = DeploymentType.DEFAULT,
+                  endpoints: Optional[List[EndPoint]] = None) -> None:
+
+    endpoints = endpoints or []
+    # only check port in use if fresh deployment since service
+    # we are redeploying/reconfiguring will already be using the port
+    if deployment_type == DeploymentType.DEFAULT:
+        if any([port_in_use(ctx, e) for e in endpoints]):
+            if daemon_type == 'mgr':
+                # non-fatal for mgr when we are in mgr_standby_modules=false, but we can't
+                # tell whether that is the case here.
+                logger.warning(
+                    f"ceph-mgr TCP port(s) {','.join(map(str, endpoints))} already in use"
+                )
+            else:
+                raise Error("TCP Port(s) '{}' required for {} already in use".format(','.join(map(str, endpoints)), daemon_type))
+
+    data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+    if deployment_type == DeploymentType.RECONFIG and not os.path.exists(data_dir):
+        raise Error('cannot reconfig, data path %s does not exist' % data_dir)
+    if daemon_type == 'mon' and not os.path.exists(data_dir):
+        assert config
+        assert keyring
+        # tmp keyring file
+        tmp_keyring = write_tmp(keyring, uid, gid)
+
+        # tmp config file
+        tmp_config = write_tmp(config, uid, gid)
+
+        # --mkfs
+        create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid)
+        mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', daemon_id)
+        log_dir = get_log_dir(fsid, ctx.log_dir)
+        CephContainer(
+            ctx,
+            image=ctx.image,
+            entrypoint='/usr/bin/ceph-mon',
+            args=[
+                '--mkfs',
+                '-i', str(daemon_id),
+                '--fsid', fsid,
+                '-c', '/tmp/config',
+                '--keyring', '/tmp/keyring',
+            ] + get_daemon_args(ctx, fsid, 'mon', daemon_id),
+            volume_mounts={
+                log_dir: '/var/log/ceph:z',
+                mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id),
+                tmp_keyring.name: '/tmp/keyring:z',
+                tmp_config.name: '/tmp/config:z',
+            },
+        ).run()
+
+        # write conf
+        with write_new(mon_dir + '/config', owner=(uid, gid)) as f:
+            f.write(config)
+    else:
+        # dirs, conf, keyring
+        create_daemon_dirs(
+            ctx,
+            fsid, daemon_type, daemon_id,
+            uid, gid,
+            config, keyring)
+
+    # only write out unit files and start daemon
+    # with systemd if this is not a reconfig
+    if deployment_type != DeploymentType.RECONFIG:
+        if daemon_type == CephadmAgent.daemon_type:
+            config_js = fetch_configs(ctx)
+            assert isinstance(config_js, dict)
+
+            cephadm_agent = CephadmAgent(ctx, fsid, daemon_id)
+            cephadm_agent.deploy_daemon_unit(config_js)
+        else:
+            if c:
+                deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id,
+                                    c, osd_fsid=osd_fsid, endpoints=endpoints)
+            else:
+                raise RuntimeError('attempting to deploy a daemon without a container image')
+
+    if not os.path.exists(data_dir + '/unit.created'):
+        with write_new(data_dir + '/unit.created', owner=(uid, gid)) as f:
+            f.write('mtime is time the daemon deployment was created\n')
+
+    with write_new(data_dir + '/unit.configured', owner=(uid, gid)) as f:
+        f.write('mtime is time we were last configured\n')
+
+    update_firewalld(ctx, daemon_type)
+
+    # Open ports explicitly required for the daemon
+    if endpoints:
+        fw = Firewalld(ctx)
+        fw.open_ports([e.port for e in endpoints] + fw.external_ports.get(daemon_type, []))
+        fw.apply_rules()
+
+    # If this was a reconfig and the daemon is not a Ceph daemon, restart it
+    # so it can pick up potential changes to its configuration files
+    if deployment_type == DeploymentType.RECONFIG and daemon_type not in Ceph.daemons:
+        # ceph daemons do not need a restart; others (presumably) do to pick
+        # up the new config
+        call_throws(ctx, ['systemctl', 'reset-failed',
+                          get_unit_name(fsid, daemon_type, daemon_id)])
+        call_throws(ctx, ['systemctl', 'restart',
+                          get_unit_name(fsid, daemon_type, daemon_id)])
+
+
+def _write_container_cmd_to_bash(ctx, file_obj, container, comment=None, background=False):
+    # type: (CephadmContext, IO[str], CephContainer, Optional[str], Optional[bool]) -> None
+    if comment:
+        # Sometimes adding a comment, especially if there are multiple containers in one
+        # unit file, makes it easier to read and grok.
+        file_obj.write('# ' + comment + '\n')
+    # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
+    file_obj.write('! ' + ' '.join(container.rm_cmd(old_cname=True)) + ' 2> /dev/null\n')
+    file_obj.write('! ' + ' '.join(container.rm_cmd()) + ' 2> /dev/null\n')
+    # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
+    if isinstance(ctx.container_engine, Podman):
+        file_obj.write(
+            '! '
+            + ' '.join([shlex.quote(a) for a in container.rm_cmd(storage=True)])
+            + ' 2> /dev/null\n')
+        file_obj.write(
+            '! '
+            + ' '.join([shlex.quote(a) for a in container.rm_cmd(old_cname=True, storage=True)])
+            + ' 2> /dev/null\n')
+
+    # container run command
+    file_obj.write(
+        ' '.join([shlex.quote(a) for a in container.run_cmd()])
+        + (' &' if background else '') + '\n')
+
+
+def clean_cgroup(ctx: CephadmContext, fsid: str, unit_name: str) -> None:
+    # systemd may fail to cleanup cgroups from previous stopped unit, which will cause next "systemctl start" to fail.
+    # see https://tracker.ceph.com/issues/50998
+
+    CGROUPV2_PATH = Path('/sys/fs/cgroup')
+    if not (CGROUPV2_PATH / 'system.slice').exists():
+        # Only unified cgroup is affected, skip if not the case
+        return
+
+    slice_name = 'system-ceph\\x2d{}.slice'.format(fsid.replace('-', '\\x2d'))
+    cg_path = CGROUPV2_PATH / 'system.slice' / slice_name / f'{unit_name}.service'
+    if not cg_path.exists():
+        return
+
+    def cg_trim(path: Path) -> None:
+        for p in path.iterdir():
+            if p.is_dir():
+                cg_trim(p)
+        path.rmdir()
+    try:
+        cg_trim(cg_path)
+    except OSError:
+        logger.warning(f'Failed to trim old cgroups {cg_path}')
+
+
+def deploy_daemon_units(
+    ctx: CephadmContext,
+    fsid: str,
+    uid: int,
+    gid: int,
+    daemon_type: str,
+    daemon_id: Union[int, str],
+    c: 'CephContainer',
+    enable: bool = True,
+    start: bool = True,
+    osd_fsid: Optional[str] = None,
+    endpoints: Optional[List[EndPoint]] = None,
+) -> None:
+    # cmd
+
+    def add_stop_actions(f: TextIO, timeout: Optional[int]) -> None:
+        # following generated script basically checks if the container exists
+        # before stopping it. Exit code will be success either if it doesn't
+        # exist or if it exists and is stopped successfully.
+        container_exists = f'{ctx.container_engine.path} inspect %s &>/dev/null'
+        f.write(f'! {container_exists % c.old_cname} || {" ".join(c.stop_cmd(old_cname=True, timeout=timeout))} \n')
+        f.write(f'! {container_exists % c.cname} || {" ".join(c.stop_cmd(timeout=timeout))} \n')
+
+    data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+    run_file_path = data_dir + '/unit.run'
+    meta_file_path = data_dir + '/unit.meta'
+    with write_new(run_file_path) as f, write_new(meta_file_path) as metaf:
+
+        f.write('set -e\n')
+
+        if daemon_type in Ceph.daemons:
+            install_path = find_program('install')
+            f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid))
+
+        # pre-start cmd(s)
+        if daemon_type == 'osd':
+            # osds have a pre-start step
+            assert osd_fsid
+            simple_fn = os.path.join('/etc/ceph/osd',
+                                     '%s-%s.json.adopted-by-cephadm' % (daemon_id, osd_fsid))
+            if os.path.exists(simple_fn):
+                f.write('# Simple OSDs need chown on startup:\n')
+                for n in ['block', 'block.db', 'block.wal']:
+                    p = os.path.join(data_dir, n)
+                    f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid))
+            else:
+                # if ceph-volume does not support 'ceph-volume activate', we must
+                # do 'ceph-volume lvm activate'.
+                test_cv = get_ceph_volume_container(
+                    ctx,
+                    args=['activate', '--bad-option'],
+                    volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
+                    bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
+                    cname='ceph-%s-%s.%s-activate-test' % (fsid, daemon_type, daemon_id),
+                )
+                out, err, ret = call(ctx, test_cv.run_cmd(), verbosity=CallVerbosity.SILENT)
+                #  bad: ceph-volume: error: unrecognized arguments: activate --bad-option
+                # good: ceph-volume: error: unrecognized arguments: --bad-option
+                if 'unrecognized arguments: activate' in err:
+                    # older ceph-volume without top-level activate or --no-tmpfs
+                    cmd = [
+                        'lvm', 'activate',
+                        str(daemon_id), osd_fsid,
+                        '--no-systemd',
+                    ]
+                else:
+                    cmd = [
+                        'activate',
+                        '--osd-id', str(daemon_id),
+                        '--osd-uuid', osd_fsid,
+                        '--no-systemd',
+                        '--no-tmpfs',
+                    ]
+
+                prestart = get_ceph_volume_container(
+                    ctx,
+                    args=cmd,
+                    volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
+                    bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
+                    cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id),
+                )
+                _write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate')
+        elif daemon_type == CephIscsi.daemon_type:
+            f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
+            ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
+            tcmu_container = ceph_iscsi.get_tcmu_runner_container()
+            _write_container_cmd_to_bash(ctx, f, tcmu_container, 'iscsi tcmu-runner container', background=True)
+
+        _write_container_cmd_to_bash(ctx, f, c, '%s.%s' % (daemon_type, str(daemon_id)))
+
+        # some metadata about the deploy
+        meta: Dict[str, Any] = fetch_meta(ctx)
+        meta.update({
+            'memory_request': int(ctx.memory_request) if ctx.memory_request else None,
+            'memory_limit': int(ctx.memory_limit) if ctx.memory_limit else None,
+        })
+        if not meta.get('ports'):
+            if endpoints:
+                meta['ports'] = [e.port for e in endpoints]
+            else:
+                meta['ports'] = []
+        metaf.write(json.dumps(meta, indent=4) + '\n')
+
+    timeout = 30 if daemon_type == 'osd' else None
+    # post-stop command(s)
+    with write_new(data_dir + '/unit.poststop') as f:
+        # this is a fallback to eventually stop any underlying container that was not stopped properly by unit.stop,
+        # this could happen in very slow setups as described in the issue https://tracker.ceph.com/issues/58242.
+        add_stop_actions(cast(TextIO, f), timeout)
+        if daemon_type == 'osd':
+            assert osd_fsid
+            poststop = get_ceph_volume_container(
+                ctx,
+                args=[
+                    'lvm', 'deactivate',
+                    str(daemon_id), osd_fsid,
+                ],
+                volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
+                bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
+                cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type,
+                                                    daemon_id),
+            )
+            _write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd')
+        elif daemon_type == CephIscsi.daemon_type:
+            # make sure we also stop the tcmu container
+            runtime_dir = '/run'
+            ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
+            tcmu_container = ceph_iscsi.get_tcmu_runner_container()
+            f.write('! ' + ' '.join(tcmu_container.stop_cmd()) + '\n')
+            f.write('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, str(daemon_id) + '.tcmu') + '\n')
+            f.write('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, str(daemon_id) + '.tcmu') + '\n')
+            f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n')
+
+    # post-stop command(s)
+    with write_new(data_dir + '/unit.stop') as f:
+        add_stop_actions(cast(TextIO, f), timeout)
+
+    if c:
+        with write_new(data_dir + '/unit.image') as f:
+            f.write(c.image + '\n')
+
+    # sysctl
+    install_sysctl(ctx, fsid, daemon_type)
+
+    # systemd
+    install_base_units(ctx, fsid)
+    unit = get_unit_file(ctx, fsid)
+    unit_file = 'ceph-%s@.service' % (fsid)
+    with write_new(ctx.unit_dir + '/' + unit_file, perms=None) as f:
+        f.write(unit)
+    call_throws(ctx, ['systemctl', 'daemon-reload'])
+
+    unit_name = get_unit_name(fsid, daemon_type, daemon_id)
+    call(ctx, ['systemctl', 'stop', unit_name],
+         verbosity=CallVerbosity.DEBUG)
+    call(ctx, ['systemctl', 'reset-failed', unit_name],
+         verbosity=CallVerbosity.DEBUG)
+    if enable:
+        call_throws(ctx, ['systemctl', 'enable', unit_name])
+    if start:
+        clean_cgroup(ctx, fsid, unit_name)
+        call_throws(ctx, ['systemctl', 'start', unit_name])
+
+
+class Firewalld(object):
+
+    # for specifying ports we should always open when opening
+    # ports for a daemon of that type. Main use case is for ports
+    # that we should open when deploying the daemon type but that
+    # the daemon itself may not necessarily need to bind to the port.
+    # This needs to be handed differently as we don't want to fail
+    # deployment if the port cannot be bound to but we still want to
+    # open the port in the firewall.
+    external_ports: Dict[str, List[int]] = {
+        'iscsi': [3260]  # 3260 is the well known iSCSI port
+    }
+
+    def __init__(self, ctx):
+        # type: (CephadmContext) -> None
+        self.ctx = ctx
+        self.available = self.check()
+
+    def check(self):
+        # type: () -> bool
+        self.cmd = find_executable('firewall-cmd')
+        if not self.cmd:
+            logger.debug('firewalld does not appear to be present')
+            return False
+        (enabled, state, _) = check_unit(self.ctx, 'firewalld.service')
+        if not enabled:
+            logger.debug('firewalld.service is not enabled')
+            return False
+        if state != 'running':
+            logger.debug('firewalld.service is not running')
+            return False
+
+        logger.info('firewalld ready')
+        return True
+
+    def enable_service_for(self, daemon_type):
+        # type: (str) -> None
+        if not self.available:
+            logger.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type)
+            return
+
+        if daemon_type == 'mon':
+            svc = 'ceph-mon'
+        elif daemon_type in ['mgr', 'mds', 'osd']:
+            svc = 'ceph'
+        elif daemon_type == NFSGanesha.daemon_type:
+            svc = 'nfs'
+        else:
+            return
+
+        if not self.cmd:
+            raise RuntimeError('command not defined')
+
+        out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG)
+        if ret:
+            logger.info('Enabling firewalld service %s in current zone...' % svc)
+            out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-service', svc])
+            if ret:
+                raise RuntimeError(
+                    'unable to add service %s to current zone: %s' % (svc, err))
+        else:
+            logger.debug('firewalld service %s is enabled in current zone' % svc)
+
+    def open_ports(self, fw_ports):
+        # type: (List[int]) -> None
+        if not self.available:
+            logger.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports)
+            return
+
+        if not self.cmd:
+            raise RuntimeError('command not defined')
+
+        for port in fw_ports:
+            tcp_port = str(port) + '/tcp'
+            out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
+            if ret:
+                logger.info('Enabling firewalld port %s in current zone...' % tcp_port)
+                out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-port', tcp_port])
+                if ret:
+                    raise RuntimeError('unable to add port %s to current zone: %s' %
+                                       (tcp_port, err))
+            else:
+                logger.debug('firewalld port %s is enabled in current zone' % tcp_port)
+
+    def close_ports(self, fw_ports):
+        # type: (List[int]) -> None
+        if not self.available:
+            logger.debug('Not possible to close ports <%s>. firewalld.service is not available' % fw_ports)
+            return
+
+        if not self.cmd:
+            raise RuntimeError('command not defined')
+
+        for port in fw_ports:
+            tcp_port = str(port) + '/tcp'
+            out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
+            if not ret:
+                logger.info('Disabling port %s in current zone...' % tcp_port)
+                out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--remove-port', tcp_port])
+                if ret:
+                    raise RuntimeError('unable to remove port %s from current zone: %s' %
+                                       (tcp_port, err))
+                else:
+                    logger.info(f'Port {tcp_port} disabled')
+            else:
+                logger.info(f'firewalld port {tcp_port} already closed')
+
+    def apply_rules(self):
+        # type: () -> None
+        if not self.available:
+            return
+
+        if not self.cmd:
+            raise RuntimeError('command not defined')
+
+        call_throws(self.ctx, [self.cmd, '--reload'])
+
+
+def update_firewalld(ctx, daemon_type):
+    # type: (CephadmContext, str) -> None
+    if not ('skip_firewalld' in ctx and ctx.skip_firewalld):
+        firewall = Firewalld(ctx)
+        firewall.enable_service_for(daemon_type)
+        firewall.apply_rules()
+
+
+def install_sysctl(ctx: CephadmContext, fsid: str, daemon_type: str) -> None:
+    """
+    Set up sysctl settings
+    """
+    def _write(conf: Path, lines: List[str]) -> None:
+        lines = [
+            '# created by cephadm',
+            '',
+            *lines,
+            '',
+        ]
+        with write_new(conf, owner=None, perms=None) as f:
+            f.write('\n'.join(lines))
+
+    conf = Path(ctx.sysctl_dir).joinpath(f'90-ceph-{fsid}-{daemon_type}.conf')
+    lines: List = []
+
+    if daemon_type == 'osd':
+        lines = OSD.get_sysctl_settings()
+    elif daemon_type == 'haproxy':
+        lines = HAproxy.get_sysctl_settings()
+    elif daemon_type == 'keepalived':
+        lines = Keepalived.get_sysctl_settings()
+    elif daemon_type == CephNvmeof.daemon_type:
+        lines = CephNvmeof.get_sysctl_settings()
+    lines = filter_sysctl_settings(ctx, lines)
+
+    # apply the sysctl settings
+    if lines:
+        Path(ctx.sysctl_dir).mkdir(mode=0o755, exist_ok=True)
+        _write(conf, lines)
+        call_throws(ctx, ['sysctl', '--system'])
+
+
+def sysctl_get(ctx: CephadmContext, variable: str) -> Union[str, None]:
+    """
+    Read a sysctl setting by executing 'sysctl -b {variable}'
+    """
+    out, err, code = call(ctx, ['sysctl', '-b', variable])
+    return out or None
+
+
+def filter_sysctl_settings(ctx: CephadmContext, lines: List[str]) -> List[str]:
+    """
+    Given a list of sysctl settings, examine the system's current configuration
+    and return those which are not currently set as described.
+    """
+    def test_setting(desired_line: str) -> bool:
+        # Remove any comments
+        comment_start = desired_line.find('#')
+        if comment_start != -1:
+            desired_line = desired_line[:comment_start]
+        desired_line = desired_line.strip()
+        if not desired_line or desired_line.isspace():
+            return False
+        setting, desired_value = map(lambda s: s.strip(), desired_line.split('='))
+        if not setting or not desired_value:
+            return False
+        actual_value = sysctl_get(ctx, setting)
+        return desired_value != actual_value
+    return list(filter(test_setting, lines))
+
+
+def migrate_sysctl_dir(ctx: CephadmContext, fsid: str) -> None:
+    """
+    Cephadm once used '/usr/lib/sysctl.d' for storing sysctl configuration.
+    This moves it to '/etc/sysctl.d'.
+    """
+    deprecated_location: str = '/usr/lib/sysctl.d'
+    deprecated_confs: List[str] = glob(f'{deprecated_location}/90-ceph-{fsid}-*.conf')
+    if not deprecated_confs:
+        return
+
+    file_count: int = len(deprecated_confs)
+    logger.info(f'Found sysctl {file_count} files in deprecated location {deprecated_location}. Starting Migration.')
+    for conf in deprecated_confs:
+        try:
+            shutil.move(conf, ctx.sysctl_dir)
+            file_count -= 1
+        except shutil.Error as err:
+            if str(err).endswith('already exists'):
+                logger.warning(f'Destination file already exists. Deleting {conf}.')
+                try:
+                    os.unlink(conf)
+                    file_count -= 1
+                except OSError as del_err:
+                    logger.warning(f'Could not remove {conf}: {del_err}.')
+            else:
+                logger.warning(f'Could not move {conf} from {deprecated_location} to {ctx.sysctl_dir}: {err}')
+
+    # Log successful migration
+    if file_count == 0:
+        logger.info(f'Successfully migrated sysctl config to {ctx.sysctl_dir}.')
+        return
+
+    # Log partially successful / unsuccessful migration
+    files_processed: int = len(deprecated_confs)
+    if file_count < files_processed:
+        status: str = f'partially successful (failed {file_count}/{files_processed})'
+    elif file_count == files_processed:
+        status = 'unsuccessful'
+    logger.warning(f'Migration of sysctl configuration {status}. You may want to perform a migration manually.')
+
+
+def install_base_units(ctx, fsid):
+    # type: (CephadmContext, str) -> None
+    """
+    Set up ceph.target and ceph-$fsid.target units.
+    """
+    # global unit
+    existed = os.path.exists(ctx.unit_dir + '/ceph.target')
+    with write_new(ctx.unit_dir + '/ceph.target', perms=None) as f:
+        f.write('[Unit]\n'
+                'Description=All Ceph clusters and services\n'
+                '\n'
+                '[Install]\n'
+                'WantedBy=multi-user.target\n')
+    if not existed:
+        # we disable before enable in case a different ceph.target
+        # (from the traditional package) is present; while newer
+        # systemd is smart enough to disable the old
+        # (/lib/systemd/...) and enable the new (/etc/systemd/...),
+        # some older versions of systemd error out with EEXIST.
+        call_throws(ctx, ['systemctl', 'disable', 'ceph.target'])
+        call_throws(ctx, ['systemctl', 'enable', 'ceph.target'])
+        call_throws(ctx, ['systemctl', 'start', 'ceph.target'])
+
+    # cluster unit
+    existed = os.path.exists(ctx.unit_dir + '/ceph-%s.target' % fsid)
+    with write_new(ctx.unit_dir + f'/ceph-{fsid}.target', perms=None) as f:
+        f.write(
+            '[Unit]\n'
+            'Description=Ceph cluster {fsid}\n'
+            'PartOf=ceph.target\n'
+            'Before=ceph.target\n'
+            '\n'
+            '[Install]\n'
+            'WantedBy=multi-user.target ceph.target\n'.format(
+                fsid=fsid)
+        )
+    if not existed:
+        call_throws(ctx, ['systemctl', 'enable', 'ceph-%s.target' % fsid])
+        call_throws(ctx, ['systemctl', 'start', 'ceph-%s.target' % fsid])
+
+    # don't overwrite file in order to allow users to manipulate it
+    if os.path.exists(ctx.logrotate_dir + f'/ceph-{fsid}'):
+        return
+
+    # logrotate for the cluster
+    with write_new(ctx.logrotate_dir + f'/ceph-{fsid}', perms=None) as f:
+        """
+        This is a bit sloppy in that the killall/pkill will touch all ceph daemons
+        in all containers, but I don't see an elegant way to send SIGHUP *just* to
+        the daemons for this cluster.  (1) systemd kill -s will get the signal to
+        podman, but podman will exit.  (2) podman kill will get the signal to the
+        first child (bash), but that isn't the ceph daemon.  This is simpler and
+        should be harmless.
+        """
+        targets: List[str] = [
+            'ceph-mon',
+            'ceph-mgr',
+            'ceph-mds',
+            'ceph-osd',
+            'ceph-fuse',
+            'radosgw',
+            'rbd-mirror',
+            'cephfs-mirror',
+            'tcmu-runner'
+        ]
+
+        f.write("""# created by cephadm
+/var/log/ceph/%s/*.log {
+    rotate 7
+    daily
+    compress
+    sharedscripts
+    postrotate
+        killall -q -1 %s || pkill -1 -x '%s' || true
+    endscript
+    missingok
+    notifempty
+    su root root
+}
+""" % (fsid, ' '.join(targets), '|'.join(targets)))
+
+
+def get_unit_file(ctx, fsid):
+    # type: (CephadmContext, str) -> str
+    extra_args = ''
+    if isinstance(ctx.container_engine, Podman):
+        extra_args = ('ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
+                      'ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
+                      'Type=forking\n'
+                      'PIDFile=%t/%n-pid\n')
+        if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION:
+            extra_args += 'Delegate=yes\n'
+
+    docker = isinstance(ctx.container_engine, Docker)
+    u = """# generated by cephadm
+[Unit]
+Description=Ceph %i for {fsid}
+
+# According to:
+#   http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
+# these can be removed once ceph-mon will dynamically change network
+# configuration.
+After=network-online.target local-fs.target time-sync.target{docker_after}
+Wants=network-online.target local-fs.target time-sync.target
+{docker_requires}
+
+PartOf=ceph-{fsid}.target
+Before=ceph-{fsid}.target
+
+[Service]
+LimitNOFILE=1048576
+LimitNPROC=1048576
+EnvironmentFile=-/etc/environment
+ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
+ExecStop=-/bin/bash -c 'bash {data_dir}/{fsid}/%i/unit.stop'
+ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
+KillMode=none
+Restart=on-failure
+RestartSec=10s
+TimeoutStartSec=200
+TimeoutStopSec=120
+StartLimitInterval=30min
+StartLimitBurst=5
+{extra_args}
+[Install]
+WantedBy=ceph-{fsid}.target
+""".format(fsid=fsid,
+           data_dir=ctx.data_dir,
+           extra_args=extra_args,
+           # if docker, we depend on docker.service
+           docker_after=' docker.service' if docker else '',
+           docker_requires='Requires=docker.service\n' if docker else '')
+
+    return u
+
+##################################
+
+
+class CephContainer:
+    def __init__(self,
+                 ctx: CephadmContext,
+                 image: str,
+                 entrypoint: str,
+                 args: List[str] = [],
+                 volume_mounts: Dict[str, str] = {},
+                 cname: str = '',
+                 container_args: List[str] = [],
+                 envs: Optional[List[str]] = None,
+                 privileged: bool = False,
+                 ptrace: bool = False,
+                 bind_mounts: Optional[List[List[str]]] = None,
+                 init: Optional[bool] = None,
+                 host_network: bool = True,
+                 memory_request: Optional[str] = None,
+                 memory_limit: Optional[str] = None,
+                 ) -> None:
+        self.ctx = ctx
+        self.image = image
+        self.entrypoint = entrypoint
+        self.args = args
+        self.volume_mounts = volume_mounts
+        self._cname = cname
+        self.container_args = container_args
+        self.envs = envs
+        self.privileged = privileged
+        self.ptrace = ptrace
+        self.bind_mounts = bind_mounts if bind_mounts else []
+        self.init = init if init else ctx.container_init
+        self.host_network = host_network
+        self.memory_request = memory_request
+        self.memory_limit = memory_limit
+
+    @classmethod
+    def for_daemon(cls,
+                   ctx: CephadmContext,
+                   fsid: str,
+                   daemon_type: str,
+                   daemon_id: str,
+                   entrypoint: str,
+                   args: List[str] = [],
+                   volume_mounts: Dict[str, str] = {},
+                   container_args: List[str] = [],
+                   envs: Optional[List[str]] = None,
+                   privileged: bool = False,
+                   ptrace: bool = False,
+                   bind_mounts: Optional[List[List[str]]] = None,
+                   init: Optional[bool] = None,
+                   host_network: bool = True,
+                   memory_request: Optional[str] = None,
+                   memory_limit: Optional[str] = None,
+                   ) -> 'CephContainer':
+        return cls(
+            ctx,
+            image=ctx.image,
+            entrypoint=entrypoint,
+            args=args,
+            volume_mounts=volume_mounts,
+            cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id),
+            container_args=container_args,
+            envs=envs,
+            privileged=privileged,
+            ptrace=ptrace,
+            bind_mounts=bind_mounts,
+            init=init,
+            host_network=host_network,
+            memory_request=memory_request,
+            memory_limit=memory_limit,
+        )
+
+    @property
+    def cname(self) -> str:
+        """
+        podman adds the current container name to the /etc/hosts
+        file. Turns out, python's `socket.getfqdn()` differs from
+        `hostname -f`, when we have the container names containing
+        dots in it.:
+
+        # podman run --name foo.bar.baz.com ceph/ceph /bin/bash
+        [root@sebastians-laptop /]# cat /etc/hosts
+        127.0.0.1   localhost
+        ::1         localhost
+        127.0.1.1   sebastians-laptop foo.bar.baz.com
+        [root@sebastians-laptop /]# hostname -f
+        sebastians-laptop
+        [root@sebastians-laptop /]# python3 -c 'import socket; print(socket.getfqdn())'
+        foo.bar.baz.com
+
+        Fascinatingly, this doesn't happen when using dashes.
+        """
+        return self._cname.replace('.', '-')
+
+    @cname.setter
+    def cname(self, val: str) -> None:
+        self._cname = val
+
+    @property
+    def old_cname(self) -> str:
+        return self._cname
+
+    def run_cmd(self) -> List[str]:
+        cmd_args: List[str] = [
+            str(self.ctx.container_engine.path),
+            'run',
+            '--rm',
+            '--ipc=host',
+            # some containers (ahem, haproxy) override this, but we want a fast
+            # shutdown always (and, more importantly, a successful exit even if we
+            # fall back to SIGKILL).
+            '--stop-signal=SIGTERM',
+        ]
+
+        if isinstance(self.ctx.container_engine, Podman):
+            if os.path.exists('/etc/ceph/podman-auth.json'):
+                cmd_args.append('--authfile=/etc/ceph/podman-auth.json')
+
+        if isinstance(self.ctx.container_engine, Docker):
+            cmd_args.extend(['--ulimit', 'nofile=1048576'])
+
+        envs: List[str] = [
+            '-e', 'CONTAINER_IMAGE=%s' % self.image,
+            '-e', 'NODE_NAME=%s' % get_hostname(),
+        ]
+        vols: List[str] = []
+        binds: List[str] = []
+
+        if self.memory_request:
+            cmd_args.extend(['-e', 'POD_MEMORY_REQUEST', str(self.memory_request)])
+        if self.memory_limit:
+            cmd_args.extend(['-e', 'POD_MEMORY_LIMIT', str(self.memory_limit)])
+            cmd_args.extend(['--memory', str(self.memory_limit)])
+
+        if self.host_network:
+            cmd_args.append('--net=host')
+        if self.entrypoint:
+            cmd_args.extend(['--entrypoint', self.entrypoint])
+        if self.privileged:
+            cmd_args.extend([
+                '--privileged',
+                # let OSD etc read block devs that haven't been chowned
+                '--group-add=disk'])
+        if self.ptrace and not self.privileged:
+            # if privileged, the SYS_PTRACE cap is already added
+            # in addition, --cap-add and --privileged are mutually
+            # exclusive since podman >= 2.0
+            cmd_args.append('--cap-add=SYS_PTRACE')
+        if self.init:
+            cmd_args.append('--init')
+            envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1']
+        if self.cname:
+            cmd_args.extend(['--name', self.cname])
+        if self.envs:
+            for env in self.envs:
+                envs.extend(['-e', env])
+
+        vols = sum(
+            [['-v', '%s:%s' % (host_dir, container_dir)]
+             for host_dir, container_dir in self.volume_mounts.items()], [])
+        binds = sum([['--mount', '{}'.format(','.join(bind))]
+                     for bind in self.bind_mounts], [])
+
+        return \
+            cmd_args + self.container_args + \
+            envs + vols + binds + \
+            [self.image] + self.args  # type: ignore
+
+    def shell_cmd(self, cmd: List[str]) -> List[str]:
+        cmd_args: List[str] = [
+            str(self.ctx.container_engine.path),
+            'run',
+            '--rm',
+            '--ipc=host',
+        ]
+        envs: List[str] = [
+            '-e', 'CONTAINER_IMAGE=%s' % self.image,
+            '-e', 'NODE_NAME=%s' % get_hostname(),
+        ]
+        vols: List[str] = []
+        binds: List[str] = []
+
+        if self.host_network:
+            cmd_args.append('--net=host')
+        if self.ctx.no_hosts:
+            cmd_args.append('--no-hosts')
+        if self.privileged:
+            cmd_args.extend([
+                '--privileged',
+                # let OSD etc read block devs that haven't been chowned
+                '--group-add=disk',
+            ])
+        if self.init:
+            cmd_args.append('--init')
+            envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1']
+        if self.envs:
+            for env in self.envs:
+                envs.extend(['-e', env])
+
+        vols = sum(
+            [['-v', '%s:%s' % (host_dir, container_dir)]
+             for host_dir, container_dir in self.volume_mounts.items()], [])
+        binds = sum([['--mount', '{}'.format(','.join(bind))]
+                     for bind in self.bind_mounts], [])
+
+        return cmd_args + self.container_args + envs + vols + binds + [
+            '--entrypoint', cmd[0],
+            self.image,
+        ] + cmd[1:]
+
+    def exec_cmd(self, cmd):
+        # type: (List[str]) -> List[str]
+        cname = get_running_container_name(self.ctx, self)
+        if not cname:
+            raise Error('unable to find container "{}"'.format(self.cname))
+        return [
+            str(self.ctx.container_engine.path),
+            'exec',
+        ] + self.container_args + [
+            self.cname,
+        ] + cmd
+
+    def rm_cmd(self, old_cname: bool = False, storage: bool = False) -> List[str]:
+        ret = [
+            str(self.ctx.container_engine.path),
+            'rm', '-f',
+        ]
+        if storage:
+            ret.append('--storage')
+        if old_cname:
+            ret.append(self.old_cname)
+        else:
+            ret.append(self.cname)
+        return ret
+
+    def stop_cmd(self, old_cname: bool = False, timeout: Optional[int] = None) -> List[str]:
+        if timeout is None:
+            ret = [
+                str(self.ctx.container_engine.path),
+                'stop', self.old_cname if old_cname else self.cname,
+            ]
+        else:
+            ret = [
+                str(self.ctx.container_engine.path),
+                'stop', '-t', f'{timeout}',
+                self.old_cname if old_cname else self.cname,
+            ]
+        return ret
+
+    def run(self, timeout=DEFAULT_TIMEOUT, verbosity=CallVerbosity.VERBOSE_ON_FAILURE):
+        # type: (Optional[int], CallVerbosity) -> str
+        out, _, _ = call_throws(self.ctx, self.run_cmd(),
+                                desc=self.entrypoint, timeout=timeout, verbosity=verbosity)
+        return out
+
+
+#####################################
+
+class MgrListener(Thread):
+    def __init__(self, agent: 'CephadmAgent') -> None:
+        self.agent = agent
+        self.stop = False
+        super(MgrListener, self).__init__(target=self.run)
+
+    def run(self) -> None:
+        listenSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        listenSocket.bind(('0.0.0.0', int(self.agent.listener_port)))
+        listenSocket.settimeout(60)
+        listenSocket.listen(1)
+        ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
+        ssl_ctx.verify_mode = ssl.CERT_REQUIRED
+        ssl_ctx.load_cert_chain(self.agent.listener_cert_path, self.agent.listener_key_path)
+        ssl_ctx.load_verify_locations(self.agent.ca_path)
+        secureListenSocket = ssl_ctx.wrap_socket(listenSocket, server_side=True)
+        while not self.stop:
+            try:
+                try:
+                    conn, _ = secureListenSocket.accept()
+                except socket.timeout:
+                    continue
+                try:
+                    length: int = int(conn.recv(10).decode())
+                except Exception as e:
+                    err_str = f'Failed to extract length of payload from message: {e}'
+                    conn.send(err_str.encode())
+                    logger.error(err_str)
+                    continue
+                while True:
+                    payload = conn.recv(length).decode()
+                    if not payload:
+                        break
+                    try:
+                        data: Dict[Any, Any] = json.loads(payload)
+                        self.handle_json_payload(data)
+                    except Exception as e:
+                        err_str = f'Failed to extract json payload from message: {e}'
+                        conn.send(err_str.encode())
+                        logger.error(err_str)
+                    else:
+                        conn.send(b'ACK')
+                        if 'config' in data:
+                            self.agent.wakeup()
+                        self.agent.ls_gatherer.wakeup()
+                        self.agent.volume_gatherer.wakeup()
+                        logger.debug(f'Got mgr message {data}')
+            except Exception as e:
+                logger.error(f'Mgr Listener encountered exception: {e}')
+
+    def shutdown(self) -> None:
+        self.stop = True
+
+    def handle_json_payload(self, data: Dict[Any, Any]) -> None:
+        self.agent.ack = int(data['counter'])
+        if 'config' in data:
+            logger.info('Received new config from mgr')
+            config = data['config']
+            for filename in config:
+                if filename in self.agent.required_files:
+                    file_path = os.path.join(self.agent.daemon_dir, filename)
+                    with write_new(file_path) as f:
+                        f.write(config[filename])
+            self.agent.pull_conf_settings()
+            self.agent.wakeup()
+
+
+class CephadmAgent():
+
+    daemon_type = 'agent'
+    default_port = 8498
+    loop_interval = 30
+    stop = False
+
+    required_files = [
+        'agent.json',
+        'keyring',
+        'root_cert.pem',
+        'listener.crt',
+        'listener.key',
+    ]
+
+    def __init__(self, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] = ''):
+        self.ctx = ctx
+        self.fsid = fsid
+        self.daemon_id = daemon_id
+        self.starting_port = 14873
+        self.target_ip = ''
+        self.target_port = ''
+        self.host = ''
+        self.daemon_dir = os.path.join(ctx.data_dir, self.fsid, f'{self.daemon_type}.{self.daemon_id}')
+        self.config_path = os.path.join(self.daemon_dir, 'agent.json')
+        self.keyring_path = os.path.join(self.daemon_dir, 'keyring')
+        self.ca_path = os.path.join(self.daemon_dir, 'root_cert.pem')
+        self.listener_cert_path = os.path.join(self.daemon_dir, 'listener.crt')
+        self.listener_key_path = os.path.join(self.daemon_dir, 'listener.key')
+        self.listener_port = ''
+        self.ack = 1
+        self.event = Event()
+        self.mgr_listener = MgrListener(self)
+        self.ls_gatherer = AgentGatherer(self, lambda: self._get_ls(), 'Ls')
+        self.volume_gatherer = AgentGatherer(self, lambda: self._ceph_volume(enhanced=False), 'Volume')
+        self.device_enhanced_scan = False
+        self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0]
+        self.recent_iteration_index: int = 0
+        self.cached_ls_values: Dict[str, Dict[str, str]] = {}
+
+    def validate(self, config: Dict[str, str] = {}) -> None:
+        # check for the required files
+        for fname in self.required_files:
+            if fname not in config:
+                raise Error('required file missing from config: %s' % fname)
+
+    def deploy_daemon_unit(self, config: Dict[str, str] = {}) -> None:
+        if not config:
+            raise Error('Agent needs a config')
+        assert isinstance(config, dict)
+        self.validate(config)
+
+        # Create the required config files in the daemons dir, with restricted permissions
+        for filename in config:
+            if filename in self.required_files:
+                file_path = os.path.join(self.daemon_dir, filename)
+                with write_new(file_path) as f:
+                    f.write(config[filename])
+
+        unit_run_path = os.path.join(self.daemon_dir, 'unit.run')
+        with write_new(unit_run_path) as f:
+            f.write(self.unit_run())
+
+        meta: Dict[str, Any] = fetch_meta(self.ctx)
+        meta_file_path = os.path.join(self.daemon_dir, 'unit.meta')
+        with write_new(meta_file_path) as f:
+            f.write(json.dumps(meta, indent=4) + '\n')
+
+        unit_file_path = os.path.join(self.ctx.unit_dir, self.unit_name())
+        with write_new(unit_file_path) as f:
+            f.write(self.unit_file())
+
+        call_throws(self.ctx, ['systemctl', 'daemon-reload'])
+        call(self.ctx, ['systemctl', 'stop', self.unit_name()],
+             verbosity=CallVerbosity.DEBUG)
+        call(self.ctx, ['systemctl', 'reset-failed', self.unit_name()],
+             verbosity=CallVerbosity.DEBUG)
+        call_throws(self.ctx, ['systemctl', 'enable', '--now', self.unit_name()])
+
+    def unit_name(self) -> str:
+        return '{}.service'.format(get_unit_name(self.fsid, self.daemon_type, self.daemon_id))
+
+    def unit_run(self) -> str:
+        py3 = shutil.which('python3')
+        binary_path = os.path.realpath(sys.argv[0])
+        return ('set -e\n' + f'{py3} {binary_path} agent --fsid {self.fsid} --daemon-id {self.daemon_id} &\n')
+
+    def unit_file(self) -> str:
+        return """#generated by cephadm
+[Unit]
+Description=cephadm agent for cluster {fsid}
+
+PartOf=ceph-{fsid}.target
+Before=ceph-{fsid}.target
+
+[Service]
+Type=forking
+ExecStart=/bin/bash {data_dir}/unit.run
+Restart=on-failure
+RestartSec=10s
+
+[Install]
+WantedBy=ceph-{fsid}.target
+""".format(
+            fsid=self.fsid,
+            data_dir=self.daemon_dir
+        )
+
+    def shutdown(self) -> None:
+        self.stop = True
+        if self.mgr_listener.is_alive():
+            self.mgr_listener.shutdown()
+        if self.ls_gatherer.is_alive():
+            self.ls_gatherer.shutdown()
+        if self.volume_gatherer.is_alive():
+            self.volume_gatherer.shutdown()
+
+    def wakeup(self) -> None:
+        self.event.set()
+
+    def pull_conf_settings(self) -> None:
+        try:
+            with open(self.config_path, 'r') as f:
+                config = json.load(f)
+                self.target_ip = config['target_ip']
+                self.target_port = config['target_port']
+                self.loop_interval = int(config['refresh_period'])
+                self.starting_port = int(config['listener_port'])
+                self.host = config['host']
+                use_lsm = config['device_enhanced_scan']
+        except Exception as e:
+            self.shutdown()
+            raise Error(f'Failed to get agent target ip and port from config: {e}')
+
+        try:
+            with open(self.keyring_path, 'r') as f:
+                self.keyring = f.read()
+        except Exception as e:
+            self.shutdown()
+            raise Error(f'Failed to get agent keyring: {e}')
+
+        assert self.target_ip and self.target_port
+
+        self.device_enhanced_scan = False
+        if use_lsm.lower() == 'true':
+            self.device_enhanced_scan = True
+        self.volume_gatherer.update_func(lambda: self._ceph_volume(enhanced=self.device_enhanced_scan))
+
+    def run(self) -> None:
+        self.pull_conf_settings()
+
+        try:
+            for _ in range(1001):
+                if not port_in_use(self.ctx, EndPoint('0.0.0.0', self.starting_port)):
+                    self.listener_port = str(self.starting_port)
+                    break
+                self.starting_port += 1
+            if not self.listener_port:
+                raise Error(f'All 1000 ports starting at {str(self.starting_port - 1001)} taken.')
+        except Exception as e:
+            raise Error(f'Failed to pick port for agent to listen on: {e}')
+
+        if not self.mgr_listener.is_alive():
+            self.mgr_listener.start()
+
+        if not self.ls_gatherer.is_alive():
+            self.ls_gatherer.start()
+
+        if not self.volume_gatherer.is_alive():
+            self.volume_gatherer.start()
+
+        ssl_ctx = ssl.create_default_context()
+        ssl_ctx.check_hostname = True
+        ssl_ctx.verify_mode = ssl.CERT_REQUIRED
+        ssl_ctx.load_verify_locations(self.ca_path)
+
+        while not self.stop:
+            start_time = time.monotonic()
+            ack = self.ack
+
+            # part of the networks info is returned as a set which is not JSON
+            # serializable. The set must be converted to a list
+            networks = list_networks(self.ctx)
+            networks_list: Dict[str, Dict[str, List[str]]] = {}
+            for key in networks.keys():
+                networks_list[key] = {}
+                for k, v in networks[key].items():
+                    networks_list[key][k] = list(v)
+
+            data = json.dumps({'host': self.host,
+                               'ls': (self.ls_gatherer.data if self.ack == self.ls_gatherer.ack
+                                      and self.ls_gatherer.data is not None else []),
+                               'networks': networks_list,
+                               'facts': HostFacts(self.ctx).dump(),
+                               'volume': (self.volume_gatherer.data if self.ack == self.volume_gatherer.ack
+                                          and self.volume_gatherer.data is not None else ''),
+                               'ack': str(ack),
+                               'keyring': self.keyring,
+                               'port': self.listener_port})
+            data = data.encode('ascii')
+
+            url = f'https://{self.target_ip}:{self.target_port}/data/'
+            try:
+                req = Request(url, data, {'Content-Type': 'application/json'})
+                send_time = time.monotonic()
+                with urlopen(req, context=ssl_ctx) as response:
+                    response_str = response.read()
+                    response_json = json.loads(response_str)
+                    total_request_time = datetime.timedelta(seconds=(time.monotonic() - send_time)).total_seconds()
+                    logger.info(f'Received mgr response: "{response_json["result"]}" {total_request_time} seconds after sending request.')
+            except Exception as e:
+                logger.error(f'Failed to send metadata to mgr: {e}')
+
+            end_time = time.monotonic()
+            run_time = datetime.timedelta(seconds=(end_time - start_time))
+            self.recent_iteration_run_times[self.recent_iteration_index] = run_time.total_seconds()
+            self.recent_iteration_index = (self.recent_iteration_index + 1) % 3
+            run_time_average = sum(self.recent_iteration_run_times, 0.0) / len([t for t in self.recent_iteration_run_times if t])
+
+            self.event.wait(max(self.loop_interval - int(run_time_average), 0))
+            self.event.clear()
+
+    def _ceph_volume(self, enhanced: bool = False) -> Tuple[str, bool]:
+        self.ctx.command = 'inventory --format=json'.split()
+        if enhanced:
+            self.ctx.command.append('--with-lsm')
+        self.ctx.fsid = self.fsid
+
+        stream = io.StringIO()
+        with redirect_stdout(stream):
+            command_ceph_volume(self.ctx)
+
+        stdout = stream.getvalue()
+
+        if stdout:
+            return (stdout, False)
+        else:
+            raise Exception('ceph-volume returned empty value')
+
+    def _daemon_ls_subset(self) -> Dict[str, Dict[str, Any]]:
+        # gets a subset of ls info quickly. The results of this will tell us if our
+        # cached info is still good or if we need to run the full ls again.
+        # for legacy containers, we just grab the full info. For cephadmv1 containers,
+        # we only grab enabled, state, mem_usage and container id. If container id has
+        # not changed for any daemon, we assume our cached info is good.
+        daemons: Dict[str, Dict[str, Any]] = {}
+        data_dir = self.ctx.data_dir
+        seen_memusage = {}  # type: Dict[str, int]
+        out, err, code = call(
+            self.ctx,
+            [self.ctx.container_engine.path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'],
+            verbosity=CallVerbosity.DEBUG
+        )
+        seen_memusage_cid_len, seen_memusage = _parse_mem_usage(code, out)
+        # we need a mapping from container names to ids. Later we will convert daemon
+        # names to container names to get daemons container id to see if it has changed
+        out, err, code = call(
+            self.ctx,
+            [self.ctx.container_engine.path, 'ps', '--format', '{{.ID}},{{.Names}}', '--no-trunc'],
+            verbosity=CallVerbosity.DEBUG
+        )
+        name_id_mapping: Dict[str, str] = self._parse_container_id_name(code, out)
+        for i in os.listdir(data_dir):
+            if i in ['mon', 'osd', 'mds', 'mgr']:
+                daemon_type = i
+                for j in os.listdir(os.path.join(data_dir, i)):
+                    if '-' not in j:
+                        continue
+                    (cluster, daemon_id) = j.split('-', 1)
+                    legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
+                    (enabled, state, _) = check_unit(self.ctx, legacy_unit_name)
+                    daemons[f'{daemon_type}.{daemon_id}'] = {
+                        'style': 'legacy',
+                        'name': '%s.%s' % (daemon_type, daemon_id),
+                        'fsid': self.ctx.fsid if self.ctx.fsid is not None else 'unknown',
+                        'systemd_unit': legacy_unit_name,
+                        'enabled': 'true' if enabled else 'false',
+                        'state': state,
+                    }
+            elif is_fsid(i):
+                fsid = str(i)  # convince mypy that fsid is a str here
+                for j in os.listdir(os.path.join(data_dir, i)):
+                    if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)):
+                        (daemon_type, daemon_id) = j.split('.', 1)
+                        unit_name = get_unit_name(fsid, daemon_type, daemon_id)
+                        (enabled, state, _) = check_unit(self.ctx, unit_name)
+                        daemons[j] = {
+                            'style': 'cephadm:v1',
+                            'systemd_unit': unit_name,
+                            'enabled': 'true' if enabled else 'false',
+                            'state': state,
+                        }
+                        c = CephContainer.for_daemon(self.ctx, self.ctx.fsid, daemon_type, daemon_id, 'bash')
+                        container_id: Optional[str] = None
+                        for name in (c.cname, c.old_cname):
+                            if name in name_id_mapping:
+                                container_id = name_id_mapping[name]
+                                break
+                        daemons[j]['container_id'] = container_id
+                        if container_id:
+                            daemons[j]['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len])
+        return daemons
+
+    def _parse_container_id_name(self, code: int, out: str) -> Dict[str, str]:
+        # map container names to ids from ps output
+        name_id_mapping = {}  # type: Dict[str, str]
+        if not code:
+            for line in out.splitlines():
+                id, name = line.split(',')
+                name_id_mapping[name] = id
+        return name_id_mapping
+
+    def _get_ls(self) -> Tuple[List[Dict[str, str]], bool]:
+        if not self.cached_ls_values:
+            logger.info('No cached ls output. Running full daemon ls')
+            ls = list_daemons(self.ctx)
+            for d in ls:
+                self.cached_ls_values[d['name']] = d
+            return (ls, True)
+        else:
+            ls_subset = self._daemon_ls_subset()
+            need_full_ls = False
+            state_change = False
+            if set(self.cached_ls_values.keys()) != set(ls_subset.keys()):
+                # case for a new daemon in ls or an old daemon no longer appearing.
+                # If that happens we need a full ls
+                logger.info('Change detected in state of daemons. Running full daemon ls')
+                self.cached_ls_values = {}
+                ls = list_daemons(self.ctx)
+                for d in ls:
+                    self.cached_ls_values[d['name']] = d
+                return (ls, True)
+            for daemon, info in self.cached_ls_values.items():
+                if info['style'] == 'legacy':
+                    # for legacy containers, ls_subset just grabs all the info
+                    self.cached_ls_values[daemon] = ls_subset[daemon]
+                else:
+                    if info['container_id'] != ls_subset[daemon]['container_id']:
+                        # case for container id having changed. We need full ls as
+                        # info we didn't grab like version and start time could have changed
+                        need_full_ls = True
+                        break
+
+                    # want to know if a daemons state change because in those cases we want
+                    # to report back quicker
+                    if (
+                        self.cached_ls_values[daemon]['enabled'] != ls_subset[daemon]['enabled']
+                        or self.cached_ls_values[daemon]['state'] != ls_subset[daemon]['state']
+                    ):
+                        state_change = True
+                    # if we reach here, container id matched. Update the few values we do track
+                    # from ls subset: state, enabled, memory_usage.
+                    self.cached_ls_values[daemon]['enabled'] = ls_subset[daemon]['enabled']
+                    self.cached_ls_values[daemon]['state'] = ls_subset[daemon]['state']
+                    if 'memory_usage' in ls_subset[daemon]:
+                        self.cached_ls_values[daemon]['memory_usage'] = ls_subset[daemon]['memory_usage']
+            if need_full_ls:
+                logger.info('Change detected in state of daemons. Running full daemon ls')
+                ls = list_daemons(self.ctx)
+                self.cached_ls_values = {}
+                for d in ls:
+                    self.cached_ls_values[d['name']] = d
+                return (ls, True)
+            else:
+                ls = [info for daemon, info in self.cached_ls_values.items()]
+                return (ls, state_change)
+
+
+class AgentGatherer(Thread):
+    def __init__(self, agent: 'CephadmAgent', func: Callable, gatherer_type: str = 'Unnamed', initial_ack: int = 0) -> None:
+        self.agent = agent
+        self.func = func
+        self.gatherer_type = gatherer_type
+        self.ack = initial_ack
+        self.event = Event()
+        self.data: Any = None
+        self.stop = False
+        self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0]
+        self.recent_iteration_index: int = 0
+        super(AgentGatherer, self).__init__(target=self.run)
+
+    def run(self) -> None:
+        while not self.stop:
+            try:
+                start_time = time.monotonic()
+
+                ack = self.agent.ack
+                change = False
+                try:
+                    self.data, change = self.func()
+                except Exception as e:
+                    logger.error(f'{self.gatherer_type} Gatherer encountered exception gathering data: {e}')
+                    self.data = None
+                if ack != self.ack or change:
+                    self.ack = ack
+                    self.agent.wakeup()
+
+                end_time = time.monotonic()
+                run_time = datetime.timedelta(seconds=(end_time - start_time))
+                self.recent_iteration_run_times[self.recent_iteration_index] = run_time.total_seconds()
+                self.recent_iteration_index = (self.recent_iteration_index + 1) % 3
+                run_time_average = sum(self.recent_iteration_run_times, 0.0) / len([t for t in self.recent_iteration_run_times if t])
+
+                self.event.wait(max(self.agent.loop_interval - int(run_time_average), 0))
+                self.event.clear()
+            except Exception as e:
+                logger.error(f'{self.gatherer_type} Gatherer encountered exception: {e}')
+
+    def shutdown(self) -> None:
+        self.stop = True
+
+    def wakeup(self) -> None:
+        self.event.set()
+
+    def update_func(self, func: Callable) -> None:
+        self.func = func
+
+
+def command_agent(ctx: CephadmContext) -> None:
+    agent = CephadmAgent(ctx, ctx.fsid, ctx.daemon_id)
+
+    if not os.path.isdir(agent.daemon_dir):
+        raise Error(f'Agent daemon directory {agent.daemon_dir} does not exist. Perhaps agent was never deployed?')
+
+    agent.run()
+
+
+##################################
+
+@executes_early
+def command_version(ctx):
+    # type: (CephadmContext) -> int
+    import importlib
+
+    try:
+        vmod = importlib.import_module('_version')
+    except ImportError:
+        print('cephadm version UNKNOWN')
+        return 1
+    _unset = '<UNSET>'
+    print('cephadm version {0} ({1}) {2} ({3})'.format(
+        getattr(vmod, 'CEPH_GIT_NICE_VER', _unset),
+        getattr(vmod, 'CEPH_GIT_VER', _unset),
+        getattr(vmod, 'CEPH_RELEASE_NAME', _unset),
+        getattr(vmod, 'CEPH_RELEASE_TYPE', _unset),
+    ))
+    return 0
+
+##################################
+
+
+@default_image
+def command_pull(ctx):
+    # type: (CephadmContext) -> int
+
+    try:
+        _pull_image(ctx, ctx.image, ctx.insecure)
+    except UnauthorizedRegistryError:
+        err_str = 'Failed to pull container image. Check that host(s) are logged into the registry'
+        logger.debug(f'Pulling image for `command_pull` failed: {err_str}')
+        raise Error(err_str)
+    return command_inspect_image(ctx)
+
+
+def _pull_image(ctx, image, insecure=False):
+    # type: (CephadmContext, str, bool) -> None
+    logger.info('Pulling container image %s...' % image)
+
+    ignorelist = [
+        'error creating read-write layer with ID',
+        'net/http: TLS handshake timeout',
+        'Digest did not match, expected',
+    ]
+
+    cmd = [ctx.container_engine.path, 'pull', image]
+    if isinstance(ctx.container_engine, Podman):
+        if insecure:
+            cmd.append('--tls-verify=false')
+
+        if os.path.exists('/etc/ceph/podman-auth.json'):
+            cmd.append('--authfile=/etc/ceph/podman-auth.json')
+    cmd_str = ' '.join(cmd)
+
+    for sleep_secs in [1, 4, 25]:
+        out, err, ret = call(ctx, cmd, verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+        if not ret:
+            return
+
+        if 'unauthorized' in err:
+            raise UnauthorizedRegistryError()
+
+        if not any(pattern in err for pattern in ignorelist):
+            raise Error('Failed command: %s' % cmd_str)
+
+        logger.info('`%s` failed transiently. Retrying. waiting %s seconds...' % (cmd_str, sleep_secs))
+        time.sleep(sleep_secs)
+
+    raise Error('Failed command: %s: maximum retries reached' % cmd_str)
+
+##################################
+
+
+@require_image
+@infer_image
+def command_inspect_image(ctx):
+    # type: (CephadmContext) -> int
+    out, err, ret = call_throws(ctx, [
+        ctx.container_engine.path, 'inspect',
+        '--format', '{{.ID}},{{.RepoDigests}}',
+        ctx.image])
+    if ret:
+        return errno.ENOENT
+    info_from = get_image_info_from_inspect(out.strip(), ctx.image)
+
+    ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip()
+    info_from['ceph_version'] = ver
+
+    print(json.dumps(info_from, indent=4, sort_keys=True))
+    return 0
+
+
+def normalize_image_digest(digest: str) -> str:
+    """
+    Normal case:
+    >>> normalize_image_digest('ceph/ceph', 'docker.io')
+    'docker.io/ceph/ceph'
+
+    No change:
+    >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'docker.io')
+    'quay.ceph.io/ceph/ceph'
+
+    >>> normalize_image_digest('docker.io/ubuntu', 'docker.io')
+    'docker.io/ubuntu'
+
+    >>> normalize_image_digest('localhost/ceph', 'docker.io')
+    'localhost/ceph'
+    """
+    known_shortnames = [
+        'ceph/ceph',
+        'ceph/daemon',
+        'ceph/daemon-base',
+    ]
+    for image in known_shortnames:
+        if digest.startswith(image):
+            return f'{DEFAULT_REGISTRY}/{digest}'
+    return digest
+
+
+def get_image_info_from_inspect(out, image):
+    # type: (str, str) -> Dict[str, Union[str,List[str]]]
+    image_id, digests = out.split(',', 1)
+    if not out:
+        raise Error('inspect {}: empty result'.format(image))
+    r = {
+        'image_id': normalize_container_id(image_id)
+    }  # type: Dict[str, Union[str,List[str]]]
+    if digests:
+        r['repo_digests'] = list(map(normalize_image_digest, digests[1: -1].split(' ')))
+    return r
+
+##################################
+
+
+def check_subnet(subnets: str) -> Tuple[int, List[int], str]:
+    """Determine whether the given string is a valid subnet
+
+    :param subnets: subnet string, a single definition or comma separated list of CIDR subnets
+    :returns: return code, IP version list of the subnets and msg describing any errors validation errors
+    """
+
+    rc = 0
+    versions = set()
+    errors = []
+    subnet_list = subnets.split(',')
+    for subnet in subnet_list:
+        # ensure the format of the string is as expected address/netmask
+        subnet = subnet.strip()
+        if not re.search(r'\/\d+$', subnet):
+            rc = 1
+            errors.append(f'{subnet} is not in CIDR format (address/netmask)')
+            continue
+        try:
+            v = ipaddress.ip_network(subnet).version
+            versions.add(v)
+        except ValueError as e:
+            rc = 1
+            errors.append(f'{subnet} invalid: {str(e)}')
+
+    return rc, list(versions), ', '.join(errors)
+
+
+def unwrap_ipv6(address):
+    # type: (str) -> str
+    if address.startswith('[') and address.endswith(']'):
+        return address[1: -1]
+    return address
+
+
+def wrap_ipv6(address):
+    # type: (str) -> str
+
+    # We cannot assume it's already wrapped or even an IPv6 address if
+    # it's already wrapped it'll not pass (like if it's a hostname) and trigger
+    # the ValueError
+    try:
+        if ipaddress.ip_address(address).version == 6:
+            return f'[{address}]'
+    except ValueError:
+        pass
+
+    return address
+
+
+def is_ipv6(address):
+    # type: (str) -> bool
+    address = unwrap_ipv6(address)
+    try:
+        return ipaddress.ip_address(address).version == 6
+    except ValueError:
+        logger.warning('Address: {} is not a valid IP address'.format(address))
+        return False
+
+
+def ip_in_subnets(ip_addr: str, subnets: str) -> bool:
+    """Determine if the ip_addr belongs to any of the subnets list."""
+    subnet_list = [x.strip() for x in subnets.split(',')]
+    for subnet in subnet_list:
+        ip_address = unwrap_ipv6(ip_addr) if is_ipv6(ip_addr) else ip_addr
+        if ipaddress.ip_address(ip_address) in ipaddress.ip_network(subnet):
+            return True
+    return False
+
+
+def parse_mon_addrv(addrv_arg: str) -> List[EndPoint]:
+    """Parse mon-addrv param into a list of mon end points."""
+    r = re.compile(r':(\d+)$')
+    addrv_args = []
+    addr_arg = addrv_arg
+    if addr_arg[0] != '[' or addr_arg[-1] != ']':
+        raise Error(f'--mon-addrv value {addr_arg} must use square brackets')
+
+    for addr in addr_arg[1: -1].split(','):
+        hasport = r.findall(addr)
+        if not hasport:
+            raise Error(f'--mon-addrv value {addr_arg} must include port number')
+        port_str = hasport[0]
+        addr = re.sub(r'^v\d+:', '', addr)  # strip off v1: or v2: prefix
+        base_ip = addr[0:-(len(port_str)) - 1]
+        addrv_args.append(EndPoint(base_ip, int(port_str)))
+
+    return addrv_args
+
+
+def parse_mon_ip(mon_ip: str) -> List[EndPoint]:
+    """Parse mon-ip param into a list of mon end points."""
+    r = re.compile(r':(\d+)$')
+    addrv_args = []
+    hasport = r.findall(mon_ip)
+    if hasport:
+        port_str = hasport[0]
+        base_ip = mon_ip[0:-(len(port_str)) - 1]
+        addrv_args.append(EndPoint(base_ip, int(port_str)))
+    else:
+        # No port provided: use fixed ports for ceph monitor
+        addrv_args.append(EndPoint(mon_ip, 3300))
+        addrv_args.append(EndPoint(mon_ip, 6789))
+
+    return addrv_args
+
+
+def build_addrv_params(addrv: List[EndPoint]) -> str:
+    """Convert mon end-points (ip:port) into the format: [v[1|2]:ip:port1]"""
+    if len(addrv) > 2:
+        raise Error('Detected a local mon-addrv list with more than 2 entries.')
+    port_to_ver: Dict[int, str] = {6789: 'v1', 3300: 'v2'}
+    addr_arg_list: List[str] = []
+    for ep in addrv:
+        if ep.port in port_to_ver:
+            ver = port_to_ver[ep.port]
+        else:
+            ver = 'v2'  # default mon protocol version if port is not provided
+            logger.warning(f'Using msgr2 protocol for unrecognized port {ep}')
+        addr_arg_list.append(f'{ver}:{ep.ip}:{ep.port}')
+
+    addr_arg = '[{0}]'.format(','.join(addr_arg_list))
+    return addr_arg
+
+
+def get_public_net_from_cfg(ctx: CephadmContext) -> Optional[str]:
+    """Get mon public network from configuration file."""
+    cp = read_config(ctx.config)
+    if not cp.has_option('global', 'public_network'):
+        return None
+
+    # Ensure all public CIDR networks are valid
+    public_network = cp.get('global', 'public_network').strip('"').strip("'")
+    rc, _, err_msg = check_subnet(public_network)
+    if rc:
+        raise Error(f'Invalid public_network {public_network} parameter: {err_msg}')
+
+    # Ensure all public CIDR networks are configured locally
+    configured_subnets = set([x.strip() for x in public_network.split(',')])
+    local_subnets = set([x[0] for x in list_networks(ctx).items()])
+    valid_public_net = False
+    for net in configured_subnets:
+        if net in local_subnets:
+            valid_public_net = True
+        else:
+            logger.warning(f'The public CIDR network {net} (from -c conf file) is not configured locally.')
+    if not valid_public_net:
+        raise Error(f'None of the public CIDR network(s) {configured_subnets} (from -c conf file) is configured locally.')
+
+    # Ensure public_network is compatible with the provided mon-ip (or mon-addrv)
+    if ctx.mon_ip:
+        if not ip_in_subnets(ctx.mon_ip, public_network):
+            raise Error(f'The provided --mon-ip {ctx.mon_ip} does not belong to any public_network(s) {public_network}')
+    elif ctx.mon_addrv:
+        addrv_args = parse_mon_addrv(ctx.mon_addrv)
+        for addrv in addrv_args:
+            if not ip_in_subnets(addrv.ip, public_network):
+                raise Error(f'The provided --mon-addrv {addrv.ip} ip does not belong to any public_network(s) {public_network}')
+
+    logger.debug(f'Using mon public network from configuration file {public_network}')
+    return public_network
+
+
+def infer_mon_network(ctx: CephadmContext, mon_eps: List[EndPoint]) -> Optional[str]:
+    """Infer mon public network from local network."""
+    # Make sure IP is configured locally, and then figure out the CIDR network
+    mon_networks = []
+    for net, ifaces in list_networks(ctx).items():
+        # build local_ips list for the specified network
+        local_ips: List[Union[ipaddress.IPv4Address, ipaddress.IPv6Address]] = []
+        for _, ls in ifaces.items():
+            local_ips.extend([ipaddress.ip_address(ip) for ip in ls])
+
+        # check if any of mon ips belong to this net
+        for mon_ep in mon_eps:
+            try:
+                if ipaddress.ip_address(unwrap_ipv6(mon_ep.ip)) in local_ips:
+                    mon_networks.append(net)
+                    logger.info(f'Mon IP `{mon_ep.ip}` is in CIDR network `{net}`')
+            except ValueError as e:
+                logger.warning(f'Cannot infer CIDR network for mon IP `{mon_ep.ip}` : {e}')
+
+    if not mon_networks:
+        raise Error('Cannot infer CIDR network. Pass --skip-mon-network to configure it later')
+    else:
+        logger.debug(f'Inferred mon public CIDR from local network configuration {mon_networks}')
+
+    mon_networks = list(set(mon_networks))  # remove duplicates
+    return ','.join(mon_networks)
+
+
+def prepare_mon_addresses(ctx: CephadmContext) -> Tuple[str, bool, Optional[str]]:
+    """Get mon public network configuration."""
+    ipv6 = False
+    addrv_args: List[EndPoint] = []
+    mon_addrv: str = ''  # i.e: [v2:192.168.100.1:3300,v1:192.168.100.1:6789]
+
+    if ctx.mon_ip:
+        ipv6 = is_ipv6(ctx.mon_ip)
+        if ipv6:
+            ctx.mon_ip = wrap_ipv6(ctx.mon_ip)
+        addrv_args = parse_mon_ip(ctx.mon_ip)
+        mon_addrv = build_addrv_params(addrv_args)
+    elif ctx.mon_addrv:
+        ipv6 = ctx.mon_addrv.count('[') > 1
+        addrv_args = parse_mon_addrv(ctx.mon_addrv)
+        mon_addrv = ctx.mon_addrv
+    else:
+        raise Error('must specify --mon-ip or --mon-addrv')
+
+    if addrv_args:
+        for end_point in addrv_args:
+            check_ip_port(ctx, end_point)
+
+    logger.debug(f'Base mon IP(s) is {addrv_args}, mon addrv is {mon_addrv}')
+    mon_network = None
+    if not ctx.skip_mon_network:
+        mon_network = get_public_net_from_cfg(ctx) or infer_mon_network(ctx, addrv_args)
+
+    return (mon_addrv, ipv6, mon_network)
+
+
+def prepare_cluster_network(ctx: CephadmContext) -> Tuple[str, bool]:
+    # the cluster network may not exist on this node, so all we can do is
+    # validate that the address given is valid ipv4 or ipv6 subnet
+    ipv6_cluster_network = False
+    cp = read_config(ctx.config)
+    cluster_network = ctx.cluster_network
+    if cluster_network is None and cp.has_option('global', 'cluster_network'):
+        cluster_network = cp.get('global', 'cluster_network').strip('"').strip("'")
+
+    if cluster_network:
+        cluster_nets = set([x.strip() for x in cluster_network.split(',')])
+        local_subnets = set([x[0] for x in list_networks(ctx).items()])
+        for net in cluster_nets:
+            if net not in local_subnets:
+                logger.warning(f'The cluster CIDR network {net} is not configured locally.')
+
+        rc, versions, err_msg = check_subnet(cluster_network)
+        if rc:
+            raise Error(f'Invalid --cluster-network parameter: {err_msg}')
+        ipv6_cluster_network = True if 6 in versions else False
+    else:
+        logger.info('Internal network (--cluster-network) has not '
+                    'been provided, OSD replication will default to '
+                    'the public_network')
+
+    return cluster_network, ipv6_cluster_network
+
+
+def create_initial_keys(
+    ctx: CephadmContext,
+    uid: int, gid: int,
+    mgr_id: str
+) -> Tuple[str, str, str, Any, Any]:  # type: ignore
+
+    _image = ctx.image
+
+    # create some initial keys
+    logger.info('Creating initial keys...')
+    mon_key = CephContainer(
+        ctx,
+        image=_image,
+        entrypoint='/usr/bin/ceph-authtool',
+        args=['--gen-print-key'],
+    ).run().strip()
+    admin_key = CephContainer(
+        ctx,
+        image=_image,
+        entrypoint='/usr/bin/ceph-authtool',
+        args=['--gen-print-key'],
+    ).run().strip()
+    mgr_key = CephContainer(
+        ctx,
+        image=_image,
+        entrypoint='/usr/bin/ceph-authtool',
+        args=['--gen-print-key'],
+    ).run().strip()
+
+    keyring = ('[mon.]\n'
+               '\tkey = %s\n'
+               '\tcaps mon = allow *\n'
+               '[client.admin]\n'
+               '\tkey = %s\n'
+               '\tcaps mon = allow *\n'
+               '\tcaps mds = allow *\n'
+               '\tcaps mgr = allow *\n'
+               '\tcaps osd = allow *\n'
+               '[mgr.%s]\n'
+               '\tkey = %s\n'
+               '\tcaps mon = profile mgr\n'
+               '\tcaps mds = allow *\n'
+               '\tcaps osd = allow *\n'
+               % (mon_key, admin_key, mgr_id, mgr_key))
+
+    admin_keyring = write_tmp('[client.admin]\n'
+                              '\tkey = ' + admin_key + '\n',
+                              uid, gid)
+
+    # tmp keyring file
+    bootstrap_keyring = write_tmp(keyring, uid, gid)
+    return (mon_key, mgr_key, admin_key,
+            bootstrap_keyring, admin_keyring)
+
+
+def create_initial_monmap(
+    ctx: CephadmContext,
+    uid: int, gid: int,
+    fsid: str,
+    mon_id: str, mon_addr: str
+) -> Any:
+    logger.info('Creating initial monmap...')
+    monmap = write_tmp('', 0, 0)
+    out = CephContainer(
+        ctx,
+        image=ctx.image,
+        entrypoint='/usr/bin/monmaptool',
+        args=[
+            '--create',
+            '--clobber',
+            '--fsid', fsid,
+            '--addv', mon_id, mon_addr,
+            '/tmp/monmap'
+        ],
+        volume_mounts={
+            monmap.name: '/tmp/monmap:z',
+        },
+    ).run()
+    logger.debug(f'monmaptool for {mon_id} {mon_addr} on {out}')
+
+    # pass monmap file to ceph user for use by ceph-mon --mkfs below
+    os.fchown(monmap.fileno(), uid, gid)
+    return monmap
+
+
+def prepare_create_mon(
+    ctx: CephadmContext,
+    uid: int, gid: int,
+    fsid: str, mon_id: str,
+    bootstrap_keyring_path: str,
+    monmap_path: str
+) -> Tuple[str, str]:
+    logger.info('Creating mon...')
+    create_daemon_dirs(ctx, fsid, 'mon', mon_id, uid, gid)
+    mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', mon_id)
+    log_dir = get_log_dir(fsid, ctx.log_dir)
+    out = CephContainer(
+        ctx,
+        image=ctx.image,
+        entrypoint='/usr/bin/ceph-mon',
+        args=[
+            '--mkfs',
+            '-i', mon_id,
+            '--fsid', fsid,
+            '-c', '/dev/null',
+            '--monmap', '/tmp/monmap',
+            '--keyring', '/tmp/keyring',
+        ] + get_daemon_args(ctx, fsid, 'mon', mon_id),
+        volume_mounts={
+            log_dir: '/var/log/ceph:z',
+            mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
+            bootstrap_keyring_path: '/tmp/keyring:z',
+            monmap_path: '/tmp/monmap:z',
+        },
+    ).run()
+    logger.debug(f'create mon.{mon_id} on {out}')
+    return (mon_dir, log_dir)
+
+
+def create_mon(
+    ctx: CephadmContext,
+    uid: int, gid: int,
+    fsid: str, mon_id: str
+) -> None:
+    mon_c = get_container(ctx, fsid, 'mon', mon_id)
+    ctx.meta_properties = {'service_name': 'mon'}
+    deploy_daemon(ctx, fsid, 'mon', mon_id, mon_c, uid, gid,
+                  config=None, keyring=None)
+
+
+def wait_for_mon(
+    ctx: CephadmContext,
+    mon_id: str, mon_dir: str,
+    admin_keyring_path: str, config_path: str
+) -> None:
+    logger.info('Waiting for mon to start...')
+    c = CephContainer(
+        ctx,
+        image=ctx.image,
+        entrypoint='/usr/bin/ceph',
+        args=[
+            'status'],
+        volume_mounts={
+            mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
+            admin_keyring_path: '/etc/ceph/ceph.client.admin.keyring:z',
+            config_path: '/etc/ceph/ceph.conf:z',
+        },
+    )
+
+    # wait for the service to become available
+    def is_mon_available():
+        # type: () -> bool
+        timeout = ctx.timeout if ctx.timeout else 60  # seconds
+        out, err, ret = call(ctx, c.run_cmd(),
+                             desc=c.entrypoint,
+                             timeout=timeout,
+                             verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+        return ret == 0
+
+    is_available(ctx, 'mon', is_mon_available)
+
+
+def create_mgr(
+    ctx: CephadmContext,
+    uid: int, gid: int,
+    fsid: str, mgr_id: str, mgr_key: str,
+    config: str, clifunc: Callable
+) -> None:
+    logger.info('Creating mgr...')
+    mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key)
+    mgr_c = get_container(ctx, fsid, 'mgr', mgr_id)
+    # Note:the default port used by the Prometheus node exporter is opened in fw
+    ctx.meta_properties = {'service_name': 'mgr'}
+    endpoints = [EndPoint('0.0.0.0', 9283), EndPoint('0.0.0.0', 8765)]
+    if not ctx.skip_monitoring_stack:
+        endpoints.append(EndPoint('0.0.0.0', 8443))
+    deploy_daemon(ctx, fsid, 'mgr', mgr_id, mgr_c, uid, gid,
+                  config=config, keyring=mgr_keyring, endpoints=endpoints)
+
+    # wait for the service to become available
+    logger.info('Waiting for mgr to start...')
+
+    def is_mgr_available():
+        # type: () -> bool
+        timeout = ctx.timeout if ctx.timeout else 60  # seconds
+        try:
+            out = clifunc(['status', '-f', 'json-pretty'],
+                          timeout=timeout,
+                          verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+            j = json.loads(out)
+            return j.get('mgrmap', {}).get('available', False)
+        except Exception as e:
+            logger.debug('status failed: %s' % e)
+            return False
+
+    is_available(ctx, 'mgr', is_mgr_available)
+
+
+def prepare_ssh(
+    ctx: CephadmContext,
+    cli: Callable, wait_for_mgr_restart: Callable
+) -> None:
+
+    cli(['cephadm', 'set-user', ctx.ssh_user])
+
+    if ctx.ssh_config:
+        logger.info('Using provided ssh config...')
+        mounts = {
+            pathify(ctx.ssh_config.name): '/tmp/cephadm-ssh-config:z',
+        }
+        cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts)
+
+    if ctx.ssh_private_key and ctx.ssh_public_key:
+        logger.info('Using provided ssh keys...')
+        mounts = {
+            pathify(ctx.ssh_private_key.name): '/tmp/cephadm-ssh-key:z',
+            pathify(ctx.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z'
+        }
+        cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts)
+        cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts)
+        ssh_pub = cli(['cephadm', 'get-pub-key'])
+        authorize_ssh_key(ssh_pub, ctx.ssh_user)
+    elif ctx.ssh_private_key and ctx.ssh_signed_cert:
+        logger.info('Using provided ssh private key and signed cert ...')
+        mounts = {
+            pathify(ctx.ssh_private_key.name): '/tmp/cephadm-ssh-key:z',
+            pathify(ctx.ssh_signed_cert.name): '/tmp/cephadm-ssh-key-cert.pub:z'
+        }
+        cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts)
+        cli(['cephadm', 'set-signed-cert', '-i', '/tmp/cephadm-ssh-key-cert.pub'], extra_mounts=mounts)
+    else:
+        logger.info('Generating ssh key...')
+        cli(['cephadm', 'generate-key'])
+        ssh_pub = cli(['cephadm', 'get-pub-key'])
+        with open(ctx.output_pub_ssh_key, 'w') as f:
+            f.write(ssh_pub)
+        logger.info('Wrote public SSH key to %s' % ctx.output_pub_ssh_key)
+        authorize_ssh_key(ssh_pub, ctx.ssh_user)
+
+    host = get_hostname()
+    logger.info('Adding host %s...' % host)
+    try:
+        args = ['orch', 'host', 'add', host]
+        if ctx.mon_ip:
+            args.append(unwrap_ipv6(ctx.mon_ip))
+        elif ctx.mon_addrv:
+            addrv_args = parse_mon_addrv(ctx.mon_addrv)
+            args.append(unwrap_ipv6(addrv_args[0].ip))
+        cli(args)
+    except RuntimeError as e:
+        raise Error('Failed to add host <%s>: %s' % (host, e))
+
+    for t in ['mon', 'mgr']:
+        if not ctx.orphan_initial_daemons:
+            logger.info('Deploying %s service with default placement...' % t)
+            cli(['orch', 'apply', t])
+        else:
+            logger.info('Deploying unmanaged %s service...' % t)
+            cli(['orch', 'apply', t, '--unmanaged'])
+
+    if not ctx.orphan_initial_daemons:
+        logger.info('Deploying crash service with default placement...')
+        cli(['orch', 'apply', 'crash'])
+
+    if not ctx.skip_monitoring_stack:
+        for t in ['ceph-exporter', 'prometheus', 'grafana', 'node-exporter', 'alertmanager']:
+            logger.info('Deploying %s service with default placement...' % t)
+            try:
+                cli(['orch', 'apply', t])
+            except RuntimeError:
+                ctx.error_code = -errno.EINVAL
+                logger.error(f'Failed to apply service type {t}. '
+                             'Perhaps the ceph version being bootstrapped does not support it')
+
+    if ctx.with_centralized_logging:
+        for t in ['loki', 'promtail']:
+            logger.info('Deploying %s service with default placement...' % t)
+            try:
+                cli(['orch', 'apply', t])
+            except RuntimeError:
+                ctx.error_code = -errno.EINVAL
+                logger.error(f'Failed to apply service type {t}. '
+                             'Perhaps the ceph version being bootstrapped does not support it')
+
+
+def enable_cephadm_mgr_module(
+    cli: Callable, wait_for_mgr_restart: Callable
+) -> None:
+
+    logger.info('Enabling cephadm module...')
+    cli(['mgr', 'module', 'enable', 'cephadm'])
+    wait_for_mgr_restart()
+    logger.info('Setting orchestrator backend to cephadm...')
+    cli(['orch', 'set', 'backend', 'cephadm'])
+
+
+def prepare_dashboard(
+    ctx: CephadmContext,
+    uid: int, gid: int,
+    cli: Callable, wait_for_mgr_restart: Callable
+) -> None:
+
+    # Configure SSL port (cephadm only allows to configure dashboard SSL port)
+    # if the user does not want to use SSL he can change this setting once the cluster is up
+    cli(['config', 'set', 'mgr', 'mgr/dashboard/ssl_server_port', str(ctx.ssl_dashboard_port)])
+
+    # configuring dashboard parameters
+    logger.info('Enabling the dashboard module...')
+    cli(['mgr', 'module', 'enable', 'dashboard'])
+    wait_for_mgr_restart()
+
+    # dashboard crt and key
+    if ctx.dashboard_key and ctx.dashboard_crt:
+        logger.info('Using provided dashboard certificate...')
+        mounts = {
+            pathify(ctx.dashboard_crt.name): '/tmp/dashboard.crt:z',
+            pathify(ctx.dashboard_key.name): '/tmp/dashboard.key:z'
+        }
+        cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts)
+        cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts)
+    else:
+        logger.info('Generating a dashboard self-signed certificate...')
+        cli(['dashboard', 'create-self-signed-cert'])
+
+    logger.info('Creating initial admin user...')
+    password = ctx.initial_dashboard_password or generate_password()
+    tmp_password_file = write_tmp(password, uid, gid)
+    cmd = ['dashboard', 'ac-user-create', ctx.initial_dashboard_user, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password']
+    if not ctx.dashboard_password_noupdate:
+        cmd.append('--pwd-update-required')
+    cli(cmd, extra_mounts={pathify(tmp_password_file.name): '/tmp/dashboard.pw:z'})
+    logger.info('Fetching dashboard port number...')
+    out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
+    port = int(out)
+
+    # Open dashboard port
+    if not ('skip_firewalld' in ctx and ctx.skip_firewalld):
+        fw = Firewalld(ctx)
+        fw.open_ports([port])
+        fw.apply_rules()
+
+    logger.info('Ceph Dashboard is now available at:\n\n'
+                '\t     URL: https://%s:%s/\n'
+                '\t    User: %s\n'
+                '\tPassword: %s\n' % (
+                    get_fqdn(), port,
+                    ctx.initial_dashboard_user,
+                    password))
+
+
+def prepare_bootstrap_config(
+    ctx: CephadmContext,
+    fsid: str, mon_addr: str, image: str
+
+) -> str:
+
+    cp = read_config(ctx.config)
+    if not cp.has_section('global'):
+        cp.add_section('global')
+    cp.set('global', 'fsid', fsid)
+    cp.set('global', 'mon_host', mon_addr)
+    cp.set('global', 'container_image', image)
+
+    if not cp.has_section('mon'):
+        cp.add_section('mon')
+    if (
+            not cp.has_option('mon', 'auth_allow_insecure_global_id_reclaim')
+            and not cp.has_option('mon', 'auth allow insecure global id reclaim')
+    ):
+        cp.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false')
+
+    if ctx.single_host_defaults:
+        logger.info('Adjusting default settings to suit single-host cluster...')
+        # replicate across osds, not hosts
+        if (
+                not cp.has_option('global', 'osd_crush_chooseleaf_type')
+                and not cp.has_option('global', 'osd crush chooseleaf type')
+        ):
+            cp.set('global', 'osd_crush_chooseleaf_type', '0')
+        # replica 2x
+        if (
+                not cp.has_option('global', 'osd_pool_default_size')
+                and not cp.has_option('global', 'osd pool default size')
+        ):
+            cp.set('global', 'osd_pool_default_size', '2')
+        # disable mgr standby modules (so we can colocate multiple mgrs on one host)
+        if not cp.has_section('mgr'):
+            cp.add_section('mgr')
+        if (
+                not cp.has_option('mgr', 'mgr_standby_modules')
+                and not cp.has_option('mgr', 'mgr standby modules')
+        ):
+            cp.set('mgr', 'mgr_standby_modules', 'false')
+    if ctx.log_to_file:
+        cp.set('global', 'log_to_file', 'true')
+        cp.set('global', 'log_to_stderr', 'false')
+        cp.set('global', 'log_to_journald', 'false')
+        cp.set('global', 'mon_cluster_log_to_file', 'true')
+        cp.set('global', 'mon_cluster_log_to_stderr', 'false')
+        cp.set('global', 'mon_cluster_log_to_journald', 'false')
+
+    cpf = StringIO()
+    cp.write(cpf)
+    config = cpf.getvalue()
+
+    if ctx.registry_json or ctx.registry_url:
+        command_registry_login(ctx)
+
+    return config
+
+
+def finish_bootstrap_config(
+    ctx: CephadmContext,
+    fsid: str,
+    config: str,
+    mon_id: str, mon_dir: str,
+    mon_network: Optional[str], ipv6: bool,
+    cli: Callable,
+    cluster_network: Optional[str], ipv6_cluster_network: bool
+
+) -> None:
+    if not ctx.no_minimize_config:
+        logger.info('Assimilating anything we can from ceph.conf...')
+        cli([
+            'config', 'assimilate-conf',
+            '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
+        ], {
+            mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
+        })
+        logger.info('Generating new minimal ceph.conf...')
+        cli([
+            'config', 'generate-minimal-conf',
+            '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
+        ], {
+            mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
+        })
+        # re-read our minimized config
+        with open(mon_dir + '/config', 'r') as f:
+            config = f.read()
+        logger.info('Restarting the monitor...')
+        call_throws(ctx, [
+            'systemctl',
+            'restart',
+            get_unit_name(fsid, 'mon', mon_id)
+        ])
+    elif 'image' in ctx and ctx.image:
+        # we still want to assimilate the given container image if provided
+        cli(['config', 'set', 'global', 'container_image', f'{ctx.image}'])
+
+    if mon_network:
+        cp = read_config(ctx.config)
+        cfg_section = 'global' if cp.has_option('global', 'public_network') else 'mon'
+        logger.info(f'Setting public_network to {mon_network} in {cfg_section} config section')
+        cli(['config', 'set', cfg_section, 'public_network', mon_network])
+
+    if cluster_network:
+        logger.info(f'Setting cluster_network to {cluster_network}')
+        cli(['config', 'set', 'global', 'cluster_network', cluster_network])
+
+    if ipv6 or ipv6_cluster_network:
+        logger.info('Enabling IPv6 (ms_bind_ipv6) binding')
+        cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
+
+    with open(ctx.output_config, 'w') as f:
+        f.write(config)
+    logger.info('Wrote config to %s' % ctx.output_config)
+    pass
+
+
+def _extract_host_info_from_applied_spec(f: Iterable[str]) -> List[Dict[str, str]]:
+    # overall goal of this function is to go through an applied spec and find
+    # the hostname (and addr is provided) for each host spec in the applied spec.
+    # Generally, we should be able to just pass the spec to the mgr module where
+    # proper yaml parsing can happen, but for host specs in particular we want to
+    # be able to distribute ssh keys, which requires finding the hostname (and addr
+    # if possible) for each potential host spec in the applied spec.
+
+    specs: List[List[str]] = []
+    current_spec: List[str] = []
+    for line in f:
+        if re.search(r'^---\s+', line):
+            if current_spec:
+                specs.append(current_spec)
+            current_spec = []
+        else:
+            line = line.strip()
+            if line:
+                current_spec.append(line)
+    if current_spec:
+        specs.append(current_spec)
+
+    host_specs: List[List[str]] = []
+    for spec in specs:
+        for line in spec:
+            if 'service_type' in line:
+                try:
+                    _, type = line.split(':')
+                    type = type.strip()
+                    if type == 'host':
+                        host_specs.append(spec)
+                except ValueError as e:
+                    spec_str = '\n'.join(spec)
+                    logger.error(f'Failed to pull service_type from spec:\n{spec_str}. Got error: {e}')
+                break
+            spec_str = '\n'.join(spec)
+            logger.error(f'Failed to find service_type within spec:\n{spec_str}')
+
+    host_dicts = []
+    for s in host_specs:
+        host_dict = _extract_host_info_from_spec(s)
+        # if host_dict is empty here, we failed to pull the hostname
+        # for the host from the spec. This should have already been logged
+        # so at this point we just don't want to include it in our output
+        if host_dict:
+            host_dicts.append(host_dict)
+
+    return host_dicts
+
+
+def _extract_host_info_from_spec(host_spec: List[str]) -> Dict[str, str]:
+    # note:for our purposes here, we only really want the hostname
+    # and address of the host from each of these specs in order to
+    # be able to distribute ssh keys. We will later apply the spec
+    # through the mgr module where proper yaml parsing can be done
+    # The returned dicts from this function should only contain
+    # one or two entries, one (required) for hostname, one (optional) for addr
+    # {
+    #   hostname: <hostname>
+    #   addr: <ip-addr>
+    # }
+    # if we fail to find the hostname, an empty dict is returned
+
+    host_dict = {}  # type: Dict[str, str]
+    for line in host_spec:
+        for field in ['hostname', 'addr']:
+            if field in line:
+                try:
+                    _, field_value = line.split(':')
+                    field_value = field_value.strip()
+                    host_dict[field] = field_value
+                except ValueError as e:
+                    spec_str = '\n'.join(host_spec)
+                    logger.error(f'Error trying to pull {field} from host spec:\n{spec_str}. Got error: {e}')
+
+    if 'hostname' not in host_dict:
+        spec_str = '\n'.join(host_spec)
+        logger.error(f'Could not find hostname in host spec:\n{spec_str}')
+        return {}
+    return host_dict
+
+
+def _distribute_ssh_keys(ctx: CephadmContext, host_info: Dict[str, str], bootstrap_hostname: str) -> int:
+    # copy ssh key to hosts in host spec (used for apply spec)
+    ssh_key = CEPH_DEFAULT_PUBKEY
+    if ctx.ssh_public_key:
+        ssh_key = ctx.ssh_public_key.name
+
+    if bootstrap_hostname != host_info['hostname']:
+        if 'addr' in host_info:
+            addr = host_info['addr']
+        else:
+            addr = host_info['hostname']
+        out, err, code = call(ctx, ['sudo', '-u', ctx.ssh_user, 'ssh-copy-id', '-f', '-i', ssh_key, '-o StrictHostKeyChecking=no', '%s@%s' % (ctx.ssh_user, addr)])
+        if code:
+            logger.error('\nCopying ssh key to host %s at address %s failed!\n' % (host_info['hostname'], addr))
+            return 1
+        else:
+            logger.info('Added ssh key to host %s at address %s' % (host_info['hostname'], addr))
+    return 0
+
+
+def save_cluster_config(ctx: CephadmContext, uid: int, gid: int, fsid: str) -> None:
+    """Save cluster configuration to the per fsid directory """
+    def copy_file(src: str, dst: str) -> None:
+        if src:
+            shutil.copyfile(src, dst)
+
+    conf_dir = f'{ctx.data_dir}/{fsid}/{CEPH_CONF_DIR}'
+    makedirs(conf_dir, uid, gid, DATA_DIR_MODE)
+    if os.path.exists(conf_dir):
+        logger.info(f'Saving cluster configuration to {conf_dir} directory')
+        copy_file(ctx.output_config, os.path.join(conf_dir, CEPH_CONF))
+        copy_file(ctx.output_keyring, os.path.join(conf_dir, CEPH_KEYRING))
+        # ctx.output_pub_ssh_key may not exist if user has provided custom ssh keys
+        if (os.path.exists(ctx.output_pub_ssh_key)):
+            copy_file(ctx.output_pub_ssh_key, os.path.join(conf_dir, CEPH_PUBKEY))
+    else:
+        logger.warning(f'Cannot create cluster configuration directory {conf_dir}')
+
+
+def rollback(func: FuncT) -> FuncT:
+    """
+    """
+    @wraps(func)
+    def _rollback(ctx: CephadmContext) -> Any:
+        try:
+            return func(ctx)
+        except ClusterAlreadyExists:
+            # another cluster with the provided fsid already exists: don't remove.
+            raise
+        except (KeyboardInterrupt, Exception) as e:
+            logger.error(f'{type(e).__name__}: {e}')
+            if ctx.cleanup_on_failure:
+                logger.info('\n\n'
+                            '\t***************\n'
+                            '\tCephadm hit an issue during cluster installation. Current cluster files will be deleted automatically,\n'
+                            '\tto disable this behaviour do not pass the --cleanup-on-failure flag. In case of any previous\n'
+                            '\tbroken installation user must use the following command to completely delete the broken cluster:\n\n'
+                            '\t> cephadm rm-cluster --force --zap-osds --fsid <fsid>\n\n'
+                            '\tfor more information please refer to https://docs.ceph.com/en/latest/cephadm/operations/#purging-a-cluster\n'
+                            '\t***************\n\n')
+                _rm_cluster(ctx, keep_logs=False, zap_osds=False)
+            else:
+                logger.info('\n\n'
+                            '\t***************\n'
+                            '\tCephadm hit an issue during cluster installation. Current cluster files will NOT BE DELETED automatically to change\n'
+                            '\tthis behaviour you can pass the --cleanup-on-failure. To remove this broken cluster manually please run:\n\n'
+                            f'\t   > cephadm rm-cluster --force --fsid {ctx.fsid}\n\n'
+                            '\tin case of any previous broken installation user must use the rm-cluster command to delete the broken cluster:\n\n'
+                            '\t   > cephadm rm-cluster --force --zap-osds --fsid <fsid>\n\n'
+                            '\tfor more information please refer to https://docs.ceph.com/en/latest/cephadm/operations/#purging-a-cluster\n'
+                            '\t***************\n\n')
+            raise
+    return cast(FuncT, _rollback)
+
+
+@rollback
+@default_image
+def command_bootstrap(ctx):
+    # type: (CephadmContext) -> int
+
+    ctx.error_code = 0
+
+    if not ctx.output_config:
+        ctx.output_config = os.path.join(ctx.output_dir, CEPH_CONF)
+    if not ctx.output_keyring:
+        ctx.output_keyring = os.path.join(ctx.output_dir, CEPH_KEYRING)
+    if not ctx.output_pub_ssh_key:
+        ctx.output_pub_ssh_key = os.path.join(ctx.output_dir, CEPH_PUBKEY)
+
+    if (
+        (bool(ctx.ssh_private_key) is not bool(ctx.ssh_public_key))
+        and (bool(ctx.ssh_private_key) is not bool(ctx.ssh_signed_cert))
+    ):
+        raise Error('--ssh-private-key must be passed with either --ssh-public-key in the case of standard pubkey '
+                    'authentication or with --ssh-signed-cert in the case of CA signed signed keys or not provided at all.')
+
+    if (bool(ctx.ssh_public_key) and bool(ctx.ssh_signed_cert)):
+        raise Error('--ssh-public-key and --ssh-signed-cert are mututally exclusive. --ssh-public-key is intended '
+                    'for standard pubkey encryption where the public key is set as an authorized key on cluster hosts. '
+                    '--ssh-signed-cert is intended for the CA signed keys use case where cluster hosts are configured to trust '
+                    'a CA pub key and authentication during SSH is done by authenticating the signed cert, requiring no '
+                    'public key to be installed on the cluster hosts.')
+
+    if ctx.fsid:
+        data_dir_base = os.path.join(ctx.data_dir, ctx.fsid)
+        if os.path.exists(data_dir_base):
+            raise ClusterAlreadyExists(f"A cluster with the same fsid '{ctx.fsid}' already exists.")
+        else:
+            logger.warning('Specifying an fsid for your cluster offers no advantages and may increase the likelihood of fsid conflicts.')
+
+    # initial vars
+    ctx.fsid = ctx.fsid or make_fsid()
+    fsid = ctx.fsid
+    if not is_fsid(fsid):
+        raise Error('not an fsid: %s' % fsid)
+
+    # verify output files
+    for f in [ctx.output_config, ctx.output_keyring, ctx.output_pub_ssh_key]:
+        if not ctx.allow_overwrite:
+            if os.path.exists(f):
+                raise ClusterAlreadyExists('%s already exists; delete or pass --allow-overwrite to overwrite' % f)
+        dirname = os.path.dirname(f)
+        if dirname and not os.path.exists(dirname):
+            fname = os.path.basename(f)
+            logger.info(f'Creating directory {dirname} for {fname}')
+            try:
+                # use makedirs to create intermediate missing dirs
+                os.makedirs(dirname, 0o755)
+            except PermissionError:
+                raise Error(f'Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.')
+
+    (user_conf, _) = get_config_and_keyring(ctx)
+
+    if ctx.ssh_user != 'root':
+        check_ssh_connectivity(ctx)
+
+    if not ctx.skip_prepare_host:
+        command_prepare_host(ctx)
+    else:
+        logger.info('Skip prepare_host')
+
+    logger.info('Cluster fsid: %s' % fsid)
+    hostname = get_hostname()
+    if '.' in hostname and not ctx.allow_fqdn_hostname:
+        raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0]))
+    mon_id = ctx.mon_id or get_short_hostname()
+    mgr_id = ctx.mgr_id or generate_service_id()
+
+    lock = FileLock(ctx, fsid)
+    lock.acquire()
+
+    (addr_arg, ipv6, mon_network) = prepare_mon_addresses(ctx)
+    cluster_network, ipv6_cluster_network = prepare_cluster_network(ctx)
+
+    config = prepare_bootstrap_config(ctx, fsid, addr_arg, ctx.image)
+
+    if not ctx.skip_pull:
+        try:
+            _pull_image(ctx, ctx.image)
+        except UnauthorizedRegistryError:
+            err_str = 'Failed to pull container image. Check that correct registry credentials are provided in bootstrap by --registry-url, --registry-username, --registry-password, or supply --registry-json with credentials'
+            logger.debug(f'Pulling image for bootstrap on {hostname} failed: {err_str}')
+            raise Error(err_str)
+
+    image_ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip()
+    logger.info(f'Ceph version: {image_ver}')
+
+    if not ctx.allow_mismatched_release:
+        image_release = image_ver.split()[4]
+        if image_release not in \
+                [DEFAULT_IMAGE_RELEASE, LATEST_STABLE_RELEASE]:
+            raise Error(
+                f'Container release {image_release} != cephadm release {DEFAULT_IMAGE_RELEASE};'
+                ' please use matching version of cephadm (pass --allow-mismatched-release to continue anyway)'
+            )
+
+    logger.info('Extracting ceph user uid/gid from container image...')
+    (uid, gid) = extract_uid_gid(ctx)
+
+    # create some initial keys
+    (mon_key, mgr_key, admin_key, bootstrap_keyring, admin_keyring) = create_initial_keys(ctx, uid, gid, mgr_id)
+
+    monmap = create_initial_monmap(ctx, uid, gid, fsid, mon_id, addr_arg)
+    (mon_dir, log_dir) = prepare_create_mon(ctx, uid, gid, fsid, mon_id,
+                                            bootstrap_keyring.name, monmap.name)
+
+    with write_new(mon_dir + '/config', owner=(uid, gid)) as f:
+        f.write(config)
+
+    make_var_run(ctx, fsid, uid, gid)
+    create_mon(ctx, uid, gid, fsid, mon_id)
+
+    # config to issue various CLI commands
+    tmp_config = write_tmp(config, uid, gid)
+
+    # a CLI helper to reduce our typing
+    def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT, verbosity=CallVerbosity.VERBOSE_ON_FAILURE):
+        # type: (List[str], Dict[str, str], Optional[int], CallVerbosity) -> str
+        mounts = {
+            log_dir: '/var/log/ceph:z',
+            admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
+            tmp_config.name: '/etc/ceph/ceph.conf:z',
+        }
+        for k, v in extra_mounts.items():
+            mounts[k] = v
+        timeout = timeout or ctx.timeout
+        return CephContainer(
+            ctx,
+            image=ctx.image,
+            entrypoint='/usr/bin/ceph',
+            args=cmd,
+            volume_mounts=mounts,
+        ).run(timeout=timeout, verbosity=verbosity)
+
+    wait_for_mon(ctx, mon_id, mon_dir, admin_keyring.name, tmp_config.name)
+
+    finish_bootstrap_config(ctx, fsid, config, mon_id, mon_dir,
+                            mon_network, ipv6, cli,
+                            cluster_network, ipv6_cluster_network)
+
+    # output files
+    with write_new(ctx.output_keyring) as f:
+        f.write('[client.admin]\n'
+                '\tkey = ' + admin_key + '\n')
+    logger.info('Wrote keyring to %s' % ctx.output_keyring)
+
+    # create mgr
+    create_mgr(ctx, uid, gid, fsid, mgr_id, mgr_key, config, cli)
+
+    if user_conf:
+        # user given config settings were already assimilated earlier
+        # but if the given settings contained any attributes in
+        # the mgr (e.g. mgr/cephadm/container_image_prometheus)
+        # they don't seem to be stored if there isn't a mgr yet.
+        # Since re-assimilating the same conf settings should be
+        # idempotent we can just do it again here.
+        with tempfile.NamedTemporaryFile(buffering=0) as tmp:
+            tmp.write(user_conf.encode('utf-8'))
+            cli(['config', 'assimilate-conf',
+                 '-i', '/var/lib/ceph/user.conf'],
+                {tmp.name: '/var/lib/ceph/user.conf:z'})
+
+    # wait for mgr to restart (after enabling a module)
+    def wait_for_mgr_restart() -> None:
+        # first get latest mgrmap epoch from the mon.  try newer 'mgr
+        # stat' command first, then fall back to 'mgr dump' if
+        # necessary
+        try:
+            j = json_loads_retry(lambda: cli(['mgr', 'stat'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR))
+        except Exception:
+            j = json_loads_retry(lambda: cli(['mgr', 'dump'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR))
+        epoch = j['epoch']
+
+        # wait for mgr to have it
+        logger.info('Waiting for the mgr to restart...')
+
+        def mgr_has_latest_epoch():
+            # type: () -> bool
+            try:
+                out = cli(['tell', 'mgr', 'mgr_status'])
+                j = json.loads(out)
+                return j['mgrmap_epoch'] >= epoch
+            except Exception as e:
+                logger.debug('tell mgr mgr_status failed: %s' % e)
+                return False
+        is_available(ctx, 'mgr epoch %d' % epoch, mgr_has_latest_epoch)
+
+    enable_cephadm_mgr_module(cli, wait_for_mgr_restart)
+
+    # ssh
+    if not ctx.skip_ssh:
+        prepare_ssh(ctx, cli, wait_for_mgr_restart)
+
+    if ctx.registry_url and ctx.registry_username and ctx.registry_password:
+        registry_credentials = {'url': ctx.registry_url, 'username': ctx.registry_username, 'password': ctx.registry_password}
+        cli(['config-key', 'set', 'mgr/cephadm/registry_credentials', json.dumps(registry_credentials)])
+
+    cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(ctx.container_init), '--force'])
+
+    if not ctx.skip_dashboard:
+        prepare_dashboard(ctx, uid, gid, cli, wait_for_mgr_restart)
+
+    if ctx.output_config == CEPH_DEFAULT_CONF and not ctx.skip_admin_label and not ctx.no_minimize_config:
+        logger.info('Enabling client.admin keyring and conf on hosts with "admin" label')
+        try:
+            cli(['orch', 'client-keyring', 'set', 'client.admin', 'label:_admin'])
+            cli(['orch', 'host', 'label', 'add', get_hostname(), '_admin'])
+        except Exception:
+            logger.info('Unable to set up "admin" label; assuming older version of Ceph')
+
+    if ctx.apply_spec:
+        logger.info('Applying %s to cluster' % ctx.apply_spec)
+        # copy ssh key to hosts in spec file
+        with open(ctx.apply_spec) as f:
+            host_dicts = _extract_host_info_from_applied_spec(f)
+            for h in host_dicts:
+                if ctx.ssh_signed_cert:
+                    logger.info('Key distribution is not supported for signed CA key setups. Skipping ...')
+                else:
+                    _distribute_ssh_keys(ctx, h, hostname)
+
+        mounts = {}
+        mounts[pathify(ctx.apply_spec)] = '/tmp/spec.yml:ro'
+        try:
+            out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
+            logger.info(out)
+        except Exception:
+            ctx.error_code = -errno.EINVAL
+            logger.info('\nApplying %s to cluster failed!\n' % ctx.apply_spec)
+
+    save_cluster_config(ctx, uid, gid, fsid)
+
+    # enable autotune for osd_memory_target
+    logger.info('Enabling autotune for osd_memory_target')
+    cli(['config', 'set', 'osd', 'osd_memory_target_autotune', 'true'])
+
+    # Notify the Dashboard to show the 'Expand cluster' page on first log in.
+    cli(['config-key', 'set', 'mgr/dashboard/cluster/status', 'INSTALLED'])
+
+    logger.info('You can access the Ceph CLI as following in case of multi-cluster or non-default config:\n\n'
+                '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
+                    sys.argv[0],
+                    fsid,
+                    ctx.output_config,
+                    ctx.output_keyring))
+
+    logger.info('Or, if you are only running a single cluster on this host:\n\n\tsudo %s shell \n' % (sys.argv[0]))
+
+    logger.info('Please consider enabling telemetry to help improve Ceph:\n\n'
+                '\tceph telemetry on\n\n'
+                'For more information see:\n\n'
+                '\thttps://docs.ceph.com/en/latest/mgr/telemetry/\n')
+    logger.info('Bootstrap complete.')
+    return ctx.error_code
+
+##################################
+
+
+def command_registry_login(ctx: CephadmContext) -> int:
+    if ctx.registry_json:
+        logger.info('Pulling custom registry login info from %s.' % ctx.registry_json)
+        d = get_parm(ctx.registry_json)
+        if d.get('url') and d.get('username') and d.get('password'):
+            ctx.registry_url = d.get('url')
+            ctx.registry_username = d.get('username')
+            ctx.registry_password = d.get('password')
+            registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password)
+        else:
+            raise Error('json provided for custom registry login did not include all necessary fields. '
+                        'Please setup json file as\n'
+                        '{\n'
+                        ' "url": "REGISTRY_URL",\n'
+                        ' "username": "REGISTRY_USERNAME",\n'
+                        ' "password": "REGISTRY_PASSWORD"\n'
+                        '}\n')
+    elif ctx.registry_url and ctx.registry_username and ctx.registry_password:
+        registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password)
+    else:
+        raise Error('Invalid custom registry arguments received. To login to a custom registry include '
+                    '--registry-url, --registry-username and --registry-password '
+                    'options or --registry-json option')
+    return 0
+
+
+def registry_login(ctx: CephadmContext, url: Optional[str], username: Optional[str], password: Optional[str]) -> None:
+    logger.info('Logging into custom registry.')
+    try:
+        engine = ctx.container_engine
+        cmd = [engine.path, 'login',
+               '-u', username, '-p', password,
+               url]
+        if isinstance(engine, Podman):
+            cmd.append('--authfile=/etc/ceph/podman-auth.json')
+        out, _, _ = call_throws(ctx, cmd)
+        if isinstance(engine, Podman):
+            os.chmod('/etc/ceph/podman-auth.json', DEFAULT_MODE)
+    except Exception:
+        raise Error('Failed to login to custom registry @ %s as %s with given password' % (ctx.registry_url, ctx.registry_username))
+
+##################################
+
+
+def extract_uid_gid_monitoring(ctx, daemon_type):
+    # type: (CephadmContext, str) -> Tuple[int, int]
+
+    if daemon_type == 'prometheus':
+        uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus')
+    elif daemon_type == 'node-exporter':
+        uid, gid = 65534, 65534
+    elif daemon_type == 'grafana':
+        uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana')
+    elif daemon_type == 'loki':
+        uid, gid = extract_uid_gid(ctx, file_path='/etc/loki')
+    elif daemon_type == 'promtail':
+        uid, gid = extract_uid_gid(ctx, file_path='/etc/promtail')
+    elif daemon_type == 'alertmanager':
+        uid, gid = extract_uid_gid(ctx, file_path=['/etc/alertmanager', '/etc/prometheus'])
+    else:
+        raise Error('{} not implemented yet'.format(daemon_type))
+    return uid, gid
+
+
+def get_deployment_container(ctx: CephadmContext,
+                             fsid: str, daemon_type: str, daemon_id: Union[int, str],
+                             privileged: bool = False,
+                             ptrace: bool = False,
+                             container_args: Optional[List[str]] = None) -> 'CephContainer':
+    # wrapper for get_container specifically for containers made during the `cephadm deploy`
+    # command. Adds some extra things such as extra container args and custom config files
+    c = get_container(ctx, fsid, daemon_type, daemon_id, privileged, ptrace, container_args)
+    if 'extra_container_args' in ctx and ctx.extra_container_args:
+        c.container_args.extend(ctx.extra_container_args)
+    if 'extra_entrypoint_args' in ctx and ctx.extra_entrypoint_args:
+        c.args.extend(ctx.extra_entrypoint_args)
+    ccfiles = fetch_custom_config_files(ctx)
+    if ccfiles:
+        mandatory_keys = ['mount_path', 'content']
+        for conf in ccfiles:
+            if all(k in conf for k in mandatory_keys):
+                mount_path = conf['mount_path']
+                file_path = os.path.join(
+                    ctx.data_dir,
+                    fsid,
+                    'custom_config_files',
+                    f'{daemon_type}.{daemon_id}',
+                    os.path.basename(mount_path)
+                )
+                c.volume_mounts[file_path] = mount_path
+    return c
+
+
+def get_deployment_type(ctx: CephadmContext, daemon_type: str, daemon_id: str) -> DeploymentType:
+    deployment_type: DeploymentType = DeploymentType.DEFAULT
+    if ctx.reconfig:
+        deployment_type = DeploymentType.RECONFIG
+    unit_name = get_unit_name(ctx.fsid, daemon_type, daemon_id)
+    (_, state, _) = check_unit(ctx, unit_name)
+    if state == 'running' or is_container_running(ctx, CephContainer.for_daemon(ctx, ctx.fsid, daemon_type, daemon_id, 'bash')):
+        # if reconfig was set, that takes priority over redeploy. If
+        # this is considered a fresh deployment at this stage,
+        # mark it as a redeploy to avoid port checking
+        if deployment_type == DeploymentType.DEFAULT:
+            deployment_type = DeploymentType.REDEPLOY
+
+    logger.info(f'{deployment_type.value} daemon {ctx.name} ...')
+
+    return deployment_type
+
+
+@default_image
+@deprecated_command
+def command_deploy(ctx):
+    # type: (CephadmContext) -> None
+    _common_deploy(ctx)
+
+
+def read_configuration_source(ctx: CephadmContext) -> Dict[str, Any]:
+    """Read a JSON configuration based on the `ctx.source` value."""
+    source = '-'
+    if 'source' in ctx and ctx.source:
+        source = ctx.source
+    if source == '-':
+        config_data = json.load(sys.stdin)
+    else:
+        with open(source, 'rb') as fh:
+            config_data = json.load(fh)
+    logger.debug('Loaded deploy configuration: %r', config_data)
+    return config_data
+
+
+def apply_deploy_config_to_ctx(
+    config_data: Dict[str, Any],
+    ctx: CephadmContext,
+) -> None:
+    """Bind properties taken from the config_data dictionary to our ctx,
+    similar to how cli options on `deploy` are bound to the context.
+    """
+    ctx.name = config_data['name']
+    image = config_data.get('image', '')
+    if image:
+        ctx.image = image
+    if 'fsid' in config_data:
+        ctx.fsid = config_data['fsid']
+    if 'meta' in config_data:
+        ctx.meta_properties = config_data['meta']
+    if 'config_blobs' in config_data:
+        ctx.config_blobs = config_data['config_blobs']
+
+    # many functions don't check that an attribute is set on the ctx
+    # (with getattr or the '__contains__' func on ctx).
+    # This reuses the defaults from the CLI options so we don't
+    # have to repeat things and they can stay in sync.
+    facade = ArgumentFacade()
+    _add_deploy_parser_args(facade)
+    facade.apply(ctx)
+    for key, value in config_data.get('params', {}).items():
+        if key not in facade.defaults:
+            logger.warning('unexpected parameter: %r=%r', key, value)
+        setattr(ctx, key, value)
+    update_default_image(ctx)
+    logger.debug('Determined image: %r', ctx.image)
+
+
+def command_deploy_from(ctx: CephadmContext) -> None:
+    """The deploy-from command is similar to deploy but sources nearly all
+    configuration parameters from an input JSON configuration file.
+    """
+    config_data = read_configuration_source(ctx)
+    apply_deploy_config_to_ctx(config_data, ctx)
+    _common_deploy(ctx)
+
+
+def _common_deploy(ctx: CephadmContext) -> None:
+    daemon_type, daemon_id = ctx.name.split('.', 1)
+    if daemon_type not in get_supported_daemons():
+        raise Error('daemon type %s not recognized' % daemon_type)
+
+    lock = FileLock(ctx, ctx.fsid)
+    lock.acquire()
+
+    deployment_type = get_deployment_type(ctx, daemon_type, daemon_id)
+
+    # Migrate sysctl conf files from /usr/lib to /etc
+    migrate_sysctl_dir(ctx, ctx.fsid)
+
+    # Get and check ports explicitly required to be opened
+    endpoints = fetch_tcp_ports(ctx)
+    _dispatch_deploy(ctx, daemon_type, daemon_id, endpoints, deployment_type)
+
+
+def _dispatch_deploy(
+    ctx: CephadmContext,
+    daemon_type: str,
+    daemon_id: str,
+    daemon_endpoints: List[EndPoint],
+    deployment_type: DeploymentType,
+) -> None:
+    if daemon_type in Ceph.daemons:
+        config, keyring = get_config_and_keyring(ctx)
+        uid, gid = extract_uid_gid(ctx)
+        make_var_run(ctx, ctx.fsid, uid, gid)
+
+        config_json = fetch_configs(ctx)
+
+        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id,
+                                     ptrace=ctx.allow_ptrace)
+
+        if daemon_type == 'mon' and config_json is not None:
+            if 'crush_location' in config_json:
+                c_loc = config_json['crush_location']
+                # was originally "c.args.extend(['--set-crush-location', c_loc])"
+                # but that doesn't seem to persist in the object after it's passed
+                # in further function calls
+                c.args = c.args + ['--set-crush-location', c_loc]
+
+        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
+                      config=config, keyring=keyring,
+                      osd_fsid=ctx.osd_fsid,
+                      deployment_type=deployment_type,
+                      endpoints=daemon_endpoints)
+
+    elif daemon_type in Monitoring.components:
+        # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
+        # Default Checks
+        # make sure provided config-json is sufficient
+        config = fetch_configs(ctx)  # type: ignore
+        required_files = Monitoring.components[daemon_type].get('config-json-files', list())
+        required_args = Monitoring.components[daemon_type].get('config-json-args', list())
+        if required_files:
+            if not config or not all(c in config.get('files', {}).keys() for c in required_files):  # type: ignore
+                raise Error('{} deployment requires config-json which must '
+                            'contain file content for {}'.format(daemon_type.capitalize(), ', '.join(required_files)))
+        if required_args:
+            if not config or not all(c in config.keys() for c in required_args):  # type: ignore
+                raise Error('{} deployment requires config-json which must '
+                            'contain arg for {}'.format(daemon_type.capitalize(), ', '.join(required_args)))
+
+        uid, gid = extract_uid_gid_monitoring(ctx, daemon_type)
+        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
+        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
+                      deployment_type=deployment_type,
+                      endpoints=daemon_endpoints)
+
+    elif daemon_type == NFSGanesha.daemon_type:
+        # only check ports if this is a fresh deployment
+        if deployment_type == DeploymentType.DEFAULT and not daemon_endpoints:
+            nfs_ports = list(NFSGanesha.port_map.values())
+            daemon_endpoints = [EndPoint('0.0.0.0', p) for p in nfs_ports]
+
+        config, keyring = get_config_and_keyring(ctx)
+        # TODO: extract ganesha uid/gid (997, 994) ?
+        uid, gid = extract_uid_gid(ctx)
+        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
+        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
+                      config=config, keyring=keyring,
+                      deployment_type=deployment_type,
+                      endpoints=daemon_endpoints)
+
+    elif daemon_type == CephIscsi.daemon_type:
+        config, keyring = get_config_and_keyring(ctx)
+        uid, gid = extract_uid_gid(ctx)
+        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
+        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
+                      config=config, keyring=keyring,
+                      deployment_type=deployment_type,
+                      endpoints=daemon_endpoints)
+    elif daemon_type == CephNvmeof.daemon_type:
+        config, keyring = get_config_and_keyring(ctx)
+        uid, gid = 167, 167  # TODO: need to get properly the uid/gid
+        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
+        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
+                      config=config, keyring=keyring,
+                      deployment_type=deployment_type,
+                      endpoints=daemon_endpoints)
+    elif daemon_type in Tracing.components:
+        uid, gid = 65534, 65534
+        c = get_container(ctx, ctx.fsid, daemon_type, daemon_id)
+        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
+                      deployment_type=deployment_type,
+                      endpoints=daemon_endpoints)
+    elif daemon_type == HAproxy.daemon_type:
+        haproxy = HAproxy.init(ctx, ctx.fsid, daemon_id)
+        uid, gid = haproxy.extract_uid_gid_haproxy()
+        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
+        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
+                      deployment_type=deployment_type,
+                      endpoints=daemon_endpoints)
+
+    elif daemon_type == Keepalived.daemon_type:
+        keepalived = Keepalived.init(ctx, ctx.fsid, daemon_id)
+        uid, gid = keepalived.extract_uid_gid_keepalived()
+        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
+        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
+                      deployment_type=deployment_type,
+                      endpoints=daemon_endpoints)
+
+    elif daemon_type == CustomContainer.daemon_type:
+        cc = CustomContainer.init(ctx, ctx.fsid, daemon_id)
+        # only check ports if this is a fresh deployment
+        if deployment_type == DeploymentType.DEFAULT:
+            daemon_endpoints.extend([EndPoint('0.0.0.0', p) for p in cc.ports])
+        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id,
+                                     privileged=cc.privileged,
+                                     ptrace=ctx.allow_ptrace)
+        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c,
+                      uid=cc.uid, gid=cc.gid, config=None,
+                      keyring=None,
+                      deployment_type=deployment_type,
+                      endpoints=daemon_endpoints)
+
+    elif daemon_type == CephadmAgent.daemon_type:
+        # get current user gid and uid
+        uid = os.getuid()
+        gid = os.getgid()
+        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, None,
+                      uid, gid,
+                      deployment_type=deployment_type,
+                      endpoints=daemon_endpoints)
+
+    elif daemon_type == SNMPGateway.daemon_type:
+        sc = SNMPGateway.init(ctx, ctx.fsid, daemon_id)
+        c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
+        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c,
+                      sc.uid, sc.gid,
+                      deployment_type=deployment_type,
+                      endpoints=daemon_endpoints)
+
+    else:
+        raise Error('daemon type {} not implemented in command_deploy function'
+                    .format(daemon_type))
+
+##################################
+
+
+@infer_image
+def command_run(ctx):
+    # type: (CephadmContext) -> int
+    (daemon_type, daemon_id) = ctx.name.split('.', 1)
+    c = get_container(ctx, ctx.fsid, daemon_type, daemon_id)
+    command = c.run_cmd()
+    return call_timeout(ctx, command, ctx.timeout)
+
+##################################
+
+
+@infer_fsid
+@infer_config
+@infer_image
+@validate_fsid
+def command_shell(ctx):
+    # type: (CephadmContext) -> int
+    cp = read_config(ctx.config)
+    if cp.has_option('global', 'fsid') and \
+       cp.get('global', 'fsid') != ctx.fsid:
+        raise Error('fsid does not match ceph.conf')
+
+    if ctx.name:
+        if '.' in ctx.name:
+            (daemon_type, daemon_id) = ctx.name.split('.', 1)
+        else:
+            daemon_type = ctx.name
+            daemon_id = None
+    else:
+        daemon_type = 'osd'  # get the most mounts
+        daemon_id = None
+
+    if ctx.fsid and daemon_type in Ceph.daemons:
+        make_log_dir(ctx, ctx.fsid)
+
+    if daemon_id and not ctx.fsid:
+        raise Error('must pass --fsid to specify cluster')
+
+    # in case a dedicated keyring for the specified fsid is found we us it.
+    # Otherwise, use /etc/ceph files by default, if present.  We do this instead of
+    # making these defaults in the arg parser because we don't want an error
+    # if they don't exist.
+    if not ctx.keyring:
+        keyring_file = f'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_KEYRING}'
+        if os.path.exists(keyring_file):
+            ctx.keyring = keyring_file
+        elif os.path.exists(CEPH_DEFAULT_KEYRING):
+            ctx.keyring = CEPH_DEFAULT_KEYRING
+
+    container_args: List[str] = ['-i']
+    mounts = get_container_mounts(ctx, ctx.fsid, daemon_type, daemon_id,
+                                  no_config=True if ctx.config else False)
+    binds = get_container_binds(ctx, ctx.fsid, daemon_type, daemon_id)
+    if ctx.config:
+        mounts[pathify(ctx.config)] = '/etc/ceph/ceph.conf:z'
+    if ctx.keyring:
+        mounts[pathify(ctx.keyring)] = '/etc/ceph/ceph.keyring:z'
+    if ctx.mount:
+        for _mount in ctx.mount:
+            split_src_dst = _mount.split(':')
+            mount = pathify(split_src_dst[0])
+            filename = os.path.basename(split_src_dst[0])
+            if len(split_src_dst) > 1:
+                dst = split_src_dst[1]
+                if len(split_src_dst) == 3:
+                    dst = '{}:{}'.format(dst, split_src_dst[2])
+                mounts[mount] = dst
+            else:
+                mounts[mount] = '/mnt/{}'.format(filename)
+    if ctx.command:
+        command = ctx.command
+    else:
+        command = ['bash']
+        container_args += [
+            '-t',
+            '-e', 'LANG=C',
+            '-e', 'PS1=%s' % CUSTOM_PS1,
+        ]
+        if ctx.fsid:
+            home = os.path.join(ctx.data_dir, ctx.fsid, 'home')
+            if not os.path.exists(home):
+                logger.debug('Creating root home at %s' % home)
+                makedirs(home, 0, 0, 0o660)
+                if os.path.exists('/etc/skel'):
+                    for f in os.listdir('/etc/skel'):
+                        if f.startswith('.bash'):
+                            shutil.copyfile(os.path.join('/etc/skel', f),
+                                            os.path.join(home, f))
+            mounts[home] = '/root'
+
+    for i in ctx.volume:
+        a, b = i.split(':', 1)
+        mounts[a] = b
+
+    c = CephContainer(
+        ctx,
+        image=ctx.image,
+        entrypoint='doesnotmatter',
+        args=[],
+        container_args=container_args,
+        volume_mounts=mounts,
+        bind_mounts=binds,
+        envs=ctx.env,
+        privileged=True)
+    command = c.shell_cmd(command)
+
+    if ctx.dry_run:
+        print(' '.join(shlex.quote(arg) for arg in command))
+        return 0
+
+    return call_timeout(ctx, command, ctx.timeout)
+
+##################################
+
+
+@infer_fsid
+def command_enter(ctx):
+    # type: (CephadmContext) -> int
+    if not ctx.fsid:
+        raise Error('must pass --fsid to specify cluster')
+    (daemon_type, daemon_id) = ctx.name.split('.', 1)
+    container_args = ['-i']  # type: List[str]
+    if ctx.command:
+        command = ctx.command
+    else:
+        command = ['sh']
+        container_args += [
+            '-t',
+            '-e', 'LANG=C',
+            '-e', 'PS1=%s' % CUSTOM_PS1,
+        ]
+    c = CephContainer(
+        ctx,
+        image=ctx.image,
+        entrypoint='doesnotmatter',
+        container_args=container_args,
+        cname='ceph-%s-%s.%s' % (ctx.fsid, daemon_type, daemon_id),
+    )
+    command = c.exec_cmd(command)
+    return call_timeout(ctx, command, ctx.timeout)
+
+##################################
+
+
+@infer_fsid
+@infer_image
+@validate_fsid
+def command_ceph_volume(ctx):
+    # type: (CephadmContext) -> None
+    cp = read_config(ctx.config)
+    if cp.has_option('global', 'fsid') and \
+       cp.get('global', 'fsid') != ctx.fsid:
+        raise Error('fsid does not match ceph.conf')
+
+    if ctx.fsid:
+        make_log_dir(ctx, ctx.fsid)
+
+        lock = FileLock(ctx, ctx.fsid)
+        lock.acquire()
+
+    (uid, gid) = (0, 0)  # ceph-volume runs as root
+    mounts = get_container_mounts(ctx, ctx.fsid, 'osd', None)
+
+    tmp_config = None
+    tmp_keyring = None
+
+    (config, keyring) = get_config_and_keyring(ctx)
+
+    if config:
+        # tmp config file
+        tmp_config = write_tmp(config, uid, gid)
+        mounts[tmp_config.name] = '/etc/ceph/ceph.conf:z'
+
+    if keyring:
+        # tmp keyring file
+        tmp_keyring = write_tmp(keyring, uid, gid)
+        mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
+
+    c = get_ceph_volume_container(
+        ctx,
+        envs=ctx.env,
+        args=ctx.command,
+        volume_mounts=mounts,
+    )
+
+    out, err, code = call_throws(ctx, c.run_cmd(), verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+    if not code:
+        print(out)
+
+##################################
+
+
+@infer_fsid
+def command_unit(ctx):
+    # type: (CephadmContext) -> int
+    if not ctx.fsid:
+        raise Error('must pass --fsid to specify cluster')
+
+    unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
+
+    _, _, code = call(
+        ctx,
+        ['systemctl', ctx.command, unit_name],
+        verbosity=CallVerbosity.VERBOSE,
+        desc=''
+    )
+    return code
+
+##################################
+
+
+@infer_fsid
+def command_logs(ctx):
+    # type: (CephadmContext) -> None
+    if not ctx.fsid:
+        raise Error('must pass --fsid to specify cluster')
+
+    unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
+
+    cmd = [find_program('journalctl')]
+    cmd.extend(['-u', unit_name])
+    if ctx.command:
+        cmd.extend(ctx.command)
+
+    # call this directly, without our wrapper, so that we get an unmolested
+    # stdout with logger prefixing.
+    logger.debug('Running command: %s' % ' '.join(cmd))
+    subprocess.call(cmd, env=os.environ.copy())  # type: ignore
+
+##################################
+
+
+def list_networks(ctx):
+    # type: (CephadmContext) -> Dict[str,Dict[str, Set[str]]]
+
+    # sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
+    # so we'll need to use a regex to parse 'ip' command output.
+    #
+    # out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
+    # j = json.loads(out)
+    # for x in j:
+    res = _list_ipv4_networks(ctx)
+    res.update(_list_ipv6_networks(ctx))
+    return res
+
+
+def _list_ipv4_networks(ctx: CephadmContext) -> Dict[str, Dict[str, Set[str]]]:
+    execstr: Optional[str] = find_executable('ip')
+    if not execstr:
+        raise FileNotFoundError("unable to find 'ip' command")
+    out, _, _ = call_throws(ctx, [execstr, 'route', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+    return _parse_ipv4_route(out)
+
+
+def _parse_ipv4_route(out: str) -> Dict[str, Dict[str, Set[str]]]:
+    r = {}  # type: Dict[str, Dict[str, Set[str]]]
+    p = re.compile(r'^(\S+) (?:via \S+)? ?dev (\S+) (.*)scope link (.*)src (\S+)')
+    for line in out.splitlines():
+        m = p.findall(line)
+        if not m:
+            continue
+        net = m[0][0]
+        if '/' not in net:  # aggregate /32 mask for single host sub-networks
+            net += '/32'
+        iface = m[0][1]
+        ip = m[0][4]
+        if net not in r:
+            r[net] = {}
+        if iface not in r[net]:
+            r[net][iface] = set()
+        r[net][iface].add(ip)
+    return r
+
+
+def _list_ipv6_networks(ctx: CephadmContext) -> Dict[str, Dict[str, Set[str]]]:
+    execstr: Optional[str] = find_executable('ip')
+    if not execstr:
+        raise FileNotFoundError("unable to find 'ip' command")
+    routes, _, _ = call_throws(ctx, [execstr, '-6', 'route', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+    ips, _, _ = call_throws(ctx, [execstr, '-6', 'addr', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+    return _parse_ipv6_route(routes, ips)
+
+
+def _parse_ipv6_route(routes: str, ips: str) -> Dict[str, Dict[str, Set[str]]]:
+    r = {}  # type: Dict[str, Dict[str, Set[str]]]
+    route_p = re.compile(r'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$')
+    ip_p = re.compile(r'^\s+inet6 (\S+)/(.*)scope (.*)$')
+    iface_p = re.compile(r'^(\d+): (\S+): (.*)$')
+    for line in routes.splitlines():
+        m = route_p.findall(line)
+        if not m or m[0][0].lower() == 'default':
+            continue
+        net = m[0][0]
+        if '/' not in net:  # aggregate /128 mask for single host sub-networks
+            net += '/128'
+        iface = m[0][1]
+        if iface == 'lo':  # skip loopback devices
+            continue
+        if net not in r:
+            r[net] = {}
+        if iface not in r[net]:
+            r[net][iface] = set()
+
+    iface = None
+    for line in ips.splitlines():
+        m = ip_p.findall(line)
+        if not m:
+            m = iface_p.findall(line)
+            if m:
+                # drop @... suffix, if present
+                iface = m[0][1].split('@')[0]
+            continue
+        ip = m[0][0]
+        # find the network it belongs to
+        net = [n for n in r.keys()
+               if ipaddress.ip_address(ip) in ipaddress.ip_network(n)]
+        if net and iface in r[net[0]]:
+            assert iface
+            r[net[0]][iface].add(ip)
+
+    return r
+
+
+def command_list_networks(ctx):
+    # type: (CephadmContext) -> None
+    r = list_networks(ctx)
+
+    def serialize_sets(obj: Any) -> Any:
+        return list(obj) if isinstance(obj, set) else obj
+
+    print(json.dumps(r, indent=4, default=serialize_sets))
+
+##################################
+
+
+def command_ls(ctx):
+    # type: (CephadmContext) -> None
+    ls = list_daemons(ctx, detail=not ctx.no_detail,
+                      legacy_dir=ctx.legacy_dir)
+    print(json.dumps(ls, indent=4))
+
+
+def with_units_to_int(v: str) -> int:
+    if v.endswith('iB'):
+        v = v[:-2]
+    elif v.endswith('B'):
+        v = v[:-1]
+    mult = 1
+    if v[-1].upper() == 'K':
+        mult = 1024
+        v = v[:-1]
+    elif v[-1].upper() == 'M':
+        mult = 1024 * 1024
+        v = v[:-1]
+    elif v[-1].upper() == 'G':
+        mult = 1024 * 1024 * 1024
+        v = v[:-1]
+    elif v[-1].upper() == 'T':
+        mult = 1024 * 1024 * 1024 * 1024
+        v = v[:-1]
+    return int(float(v) * mult)
+
+
+def list_daemons(ctx, detail=True, legacy_dir=None):
+    # type: (CephadmContext, bool, Optional[str]) -> List[Dict[str, str]]
+    host_version: Optional[str] = None
+    ls = []
+    container_path = ctx.container_engine.path
+
+    data_dir = ctx.data_dir
+    if legacy_dir is not None:
+        data_dir = os.path.abspath(legacy_dir + data_dir)
+
+    # keep track of ceph versions we see
+    seen_versions = {}  # type: Dict[str, Optional[str]]
+
+    # keep track of image digests
+    seen_digests = {}   # type: Dict[str, List[str]]
+
+    # keep track of memory and cpu usage we've seen
+    seen_memusage = {}  # type: Dict[str, int]
+    seen_cpuperc = {}  # type: Dict[str, str]
+    out, err, code = call(
+        ctx,
+        [container_path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'],
+        verbosity=CallVerbosity.QUIET
+    )
+    seen_memusage_cid_len, seen_memusage = _parse_mem_usage(code, out)
+
+    out, err, code = call(
+        ctx,
+        [container_path, 'stats', '--format', '{{.ID}},{{.CPUPerc}}', '--no-stream'],
+        verbosity=CallVerbosity.QUIET
+    )
+    seen_cpuperc_cid_len, seen_cpuperc = _parse_cpu_perc(code, out)
+
+    # /var/lib/ceph
+    if os.path.exists(data_dir):
+        for i in os.listdir(data_dir):
+            if i in ['mon', 'osd', 'mds', 'mgr']:
+                daemon_type = i
+                for j in os.listdir(os.path.join(data_dir, i)):
+                    if '-' not in j:
+                        continue
+                    (cluster, daemon_id) = j.split('-', 1)
+                    fsid = get_legacy_daemon_fsid(ctx,
+                                                  cluster, daemon_type, daemon_id,
+                                                  legacy_dir=legacy_dir)
+                    legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
+                    val: Dict[str, Any] = {
+                        'style': 'legacy',
+                        'name': '%s.%s' % (daemon_type, daemon_id),
+                        'fsid': fsid if fsid is not None else 'unknown',
+                        'systemd_unit': legacy_unit_name,
+                    }
+                    if detail:
+                        (val['enabled'], val['state'], _) = check_unit(ctx, legacy_unit_name)
+                        if not host_version:
+                            try:
+                                out, err, code = call(ctx,
+                                                      ['ceph', '-v'],
+                                                      verbosity=CallVerbosity.QUIET)
+                                if not code and out.startswith('ceph version '):
+                                    host_version = out.split(' ')[2]
+                            except Exception:
+                                pass
+                        val['host_version'] = host_version
+                    ls.append(val)
+            elif is_fsid(i):
+                fsid = str(i)  # convince mypy that fsid is a str here
+                for j in os.listdir(os.path.join(data_dir, i)):
+                    if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)):
+                        name = j
+                        (daemon_type, daemon_id) = j.split('.', 1)
+                        unit_name = get_unit_name(fsid,
+                                                  daemon_type,
+                                                  daemon_id)
+                    else:
+                        continue
+                    val = {
+                        'style': 'cephadm:v1',
+                        'name': name,
+                        'fsid': fsid,
+                        'systemd_unit': unit_name,
+                    }
+                    if detail:
+                        # get container id
+                        (val['enabled'], val['state'], _) = check_unit(ctx, unit_name)
+                        container_id = None
+                        image_name = None
+                        image_id = None
+                        image_digests = None
+                        version = None
+                        start_stamp = None
+
+                        out, err, code = get_container_stats(ctx, container_path, fsid, daemon_type, daemon_id)
+                        if not code:
+                            (container_id, image_name, image_id, start,
+                             version) = out.strip().split(',')
+                            image_id = normalize_container_id(image_id)
+                            daemon_type = name.split('.', 1)[0]
+                            start_stamp = try_convert_datetime(start)
+
+                            # collect digests for this image id
+                            image_digests = seen_digests.get(image_id)
+                            if not image_digests:
+                                out, err, code = call(
+                                    ctx,
+                                    [
+                                        container_path, 'image', 'inspect', image_id,
+                                        '--format', '{{.RepoDigests}}',
+                                    ],
+                                    verbosity=CallVerbosity.QUIET)
+                                if not code:
+                                    image_digests = list(set(map(
+                                        normalize_image_digest,
+                                        out.strip()[1:-1].split(' '))))
+                                    seen_digests[image_id] = image_digests
+
+                            # identify software version inside the container (if we can)
+                            if not version or '.' not in version:
+                                version = seen_versions.get(image_id, None)
+                            if daemon_type == NFSGanesha.daemon_type:
+                                version = NFSGanesha.get_version(ctx, container_id)
+                            if daemon_type == CephIscsi.daemon_type:
+                                version = CephIscsi.get_version(ctx, container_id)
+                            if daemon_type == CephNvmeof.daemon_type:
+                                version = CephNvmeof.get_version(ctx, container_id)
+                            elif not version:
+                                if daemon_type in Ceph.daemons:
+                                    out, err, code = call(ctx,
+                                                          [container_path, 'exec', container_id,
+                                                           'ceph', '-v'],
+                                                          verbosity=CallVerbosity.QUIET)
+                                    if not code and \
+                                       out.startswith('ceph version '):
+                                        version = out.split(' ')[2]
+                                        seen_versions[image_id] = version
+                                elif daemon_type == 'grafana':
+                                    out, err, code = call(ctx,
+                                                          [container_path, 'exec', container_id,
+                                                           'grafana-server', '-v'],
+                                                          verbosity=CallVerbosity.QUIET)
+                                    if not code and \
+                                       out.startswith('Version '):
+                                        version = out.split(' ')[1]
+                                        seen_versions[image_id] = version
+                                elif daemon_type in ['prometheus',
+                                                     'alertmanager',
+                                                     'node-exporter',
+                                                     'loki',
+                                                     'promtail']:
+                                    version = Monitoring.get_version(ctx, container_id, daemon_type)
+                                    seen_versions[image_id] = version
+                                elif daemon_type == 'haproxy':
+                                    out, err, code = call(ctx,
+                                                          [container_path, 'exec', container_id,
+                                                           'haproxy', '-v'],
+                                                          verbosity=CallVerbosity.QUIET)
+                                    if not code and \
+                                       out.startswith('HA-Proxy version ') or \
+                                       out.startswith('HAProxy version '):
+                                        version = out.split(' ')[2]
+                                        seen_versions[image_id] = version
+                                elif daemon_type == 'keepalived':
+                                    out, err, code = call(ctx,
+                                                          [container_path, 'exec', container_id,
+                                                           'keepalived', '--version'],
+                                                          verbosity=CallVerbosity.QUIET)
+                                    if not code and \
+                                       err.startswith('Keepalived '):
+                                        version = err.split(' ')[1]
+                                        if version[0] == 'v':
+                                            version = version[1:]
+                                        seen_versions[image_id] = version
+                                elif daemon_type == CustomContainer.daemon_type:
+                                    # Because a custom container can contain
+                                    # everything, we do not know which command
+                                    # to execute to get the version.
+                                    pass
+                                elif daemon_type == SNMPGateway.daemon_type:
+                                    version = SNMPGateway.get_version(ctx, fsid, daemon_id)
+                                    seen_versions[image_id] = version
+                                else:
+                                    logger.warning('version for unknown daemon type %s' % daemon_type)
+                        else:
+                            vfile = os.path.join(data_dir, fsid, j, 'unit.image')  # type: ignore
+                            try:
+                                with open(vfile, 'r') as f:
+                                    image_name = f.read().strip() or None
+                            except IOError:
+                                pass
+
+                        # unit.meta?
+                        mfile = os.path.join(data_dir, fsid, j, 'unit.meta')  # type: ignore
+                        try:
+                            with open(mfile, 'r') as f:
+                                meta = json.loads(f.read())
+                                val.update(meta)
+                        except IOError:
+                            pass
+
+                        val['container_id'] = container_id
+                        val['container_image_name'] = image_name
+                        val['container_image_id'] = image_id
+                        val['container_image_digests'] = image_digests
+                        if container_id:
+                            val['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len])
+                            val['cpu_percentage'] = seen_cpuperc.get(container_id[0:seen_cpuperc_cid_len])
+                        val['version'] = version
+                        val['started'] = start_stamp
+                        val['created'] = get_file_timestamp(
+                            os.path.join(data_dir, fsid, j, 'unit.created')
+                        )
+                        val['deployed'] = get_file_timestamp(
+                            os.path.join(data_dir, fsid, j, 'unit.image'))
+                        val['configured'] = get_file_timestamp(
+                            os.path.join(data_dir, fsid, j, 'unit.configured'))
+                    ls.append(val)
+
+    return ls
+
+
+def _parse_mem_usage(code: int, out: str) -> Tuple[int, Dict[str, int]]:
+    # keep track of memory usage we've seen
+    seen_memusage = {}  # type: Dict[str, int]
+    seen_memusage_cid_len = 0
+    if not code:
+        for line in out.splitlines():
+            (cid, usage) = line.split(',')
+            (used, limit) = usage.split(' / ')
+            try:
+                seen_memusage[cid] = with_units_to_int(used)
+                if not seen_memusage_cid_len:
+                    seen_memusage_cid_len = len(cid)
+            except ValueError:
+                logger.info('unable to parse memory usage line\n>{}'.format(line))
+                pass
+    return seen_memusage_cid_len, seen_memusage
+
+
+def _parse_cpu_perc(code: int, out: str) -> Tuple[int, Dict[str, str]]:
+    seen_cpuperc = {}
+    seen_cpuperc_cid_len = 0
+    if not code:
+        for line in out.splitlines():
+            (cid, cpuperc) = line.split(',')
+            try:
+                seen_cpuperc[cid] = cpuperc
+                if not seen_cpuperc_cid_len:
+                    seen_cpuperc_cid_len = len(cid)
+            except ValueError:
+                logger.info('unable to parse cpu percentage line\n>{}'.format(line))
+                pass
+    return seen_cpuperc_cid_len, seen_cpuperc
+
+
+def get_daemon_description(ctx, fsid, name, detail=False, legacy_dir=None):
+    # type: (CephadmContext, str, str, bool, Optional[str]) -> Dict[str, str]
+
+    for d in list_daemons(ctx, detail=detail, legacy_dir=legacy_dir):
+        if d['fsid'] != fsid:
+            continue
+        if d['name'] != name:
+            continue
+        return d
+    raise Error('Daemon not found: {}. See `cephadm ls`'.format(name))
+
+
+def get_container_stats(ctx: CephadmContext, container_path: str, fsid: str, daemon_type: str, daemon_id: str) -> Tuple[str, str, int]:
+    c = CephContainer.for_daemon(ctx, fsid, daemon_type, daemon_id, 'bash')
+    out, err, code = '', '', -1
+    for name in (c.cname, c.old_cname):
+        cmd = [
+            container_path, 'inspect',
+            '--format', '{{.Id}},{{.Config.Image}},{{.Image}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}',
+            name
+        ]
+        out, err, code = call(ctx, cmd, verbosity=CallVerbosity.QUIET)
+        if not code:
+            break
+    return out, err, code
+
+##################################
+
+
+@default_image
+def command_adopt(ctx):
+    # type: (CephadmContext) -> None
+
+    if not ctx.skip_pull:
+        try:
+            _pull_image(ctx, ctx.image)
+        except UnauthorizedRegistryError:
+            err_str = 'Failed to pull container image. Host may not be logged into container registry. Try `cephadm registry-login --registry-url <url> --registry-username <username> --registry-password <password>` or supply login info via a json file with `cephadm registry-login --registry-json <file>`'
+            logger.debug(f'Pulling image for `command_adopt` failed: {err_str}')
+            raise Error(err_str)
+
+    (daemon_type, daemon_id) = ctx.name.split('.', 1)
+
+    # legacy check
+    if ctx.style != 'legacy':
+        raise Error('adoption of style %s not implemented' % ctx.style)
+
+    # lock
+    fsid = get_legacy_daemon_fsid(ctx,
+                                  ctx.cluster,
+                                  daemon_type,
+                                  daemon_id,
+                                  legacy_dir=ctx.legacy_dir)
+    if not fsid:
+        raise Error('could not detect legacy fsid; set fsid in ceph.conf')
+    lock = FileLock(ctx, fsid)
+    lock.acquire()
+
+    # call correct adoption
+    if daemon_type in Ceph.daemons:
+        command_adopt_ceph(ctx, daemon_type, daemon_id, fsid)
+    elif daemon_type == 'prometheus':
+        command_adopt_prometheus(ctx, daemon_id, fsid)
+    elif daemon_type == 'grafana':
+        command_adopt_grafana(ctx, daemon_id, fsid)
+    elif daemon_type == 'node-exporter':
+        raise Error('adoption of node-exporter not implemented')
+    elif daemon_type == 'alertmanager':
+        command_adopt_alertmanager(ctx, daemon_id, fsid)
+    else:
+        raise Error('daemon type %s not recognized' % daemon_type)
+
+
+class AdoptOsd(object):
+    def __init__(self, ctx, osd_data_dir, osd_id):
+        # type: (CephadmContext, str, str) -> None
+        self.ctx = ctx
+        self.osd_data_dir = osd_data_dir
+        self.osd_id = osd_id
+
+    def check_online_osd(self):
+        # type: () -> Tuple[Optional[str], Optional[str]]
+
+        osd_fsid, osd_type = None, None
+
+        path = os.path.join(self.osd_data_dir, 'fsid')
+        try:
+            with open(path, 'r') as f:
+                osd_fsid = f.read().strip()
+            logger.info('Found online OSD at %s' % path)
+        except IOError:
+            logger.info('Unable to read OSD fsid from %s' % path)
+        if os.path.exists(os.path.join(self.osd_data_dir, 'type')):
+            with open(os.path.join(self.osd_data_dir, 'type')) as f:
+                osd_type = f.read().strip()
+        else:
+            logger.info('"type" file missing for OSD data dir')
+
+        return osd_fsid, osd_type
+
+    def check_offline_lvm_osd(self):
+        # type: () -> Tuple[Optional[str], Optional[str]]
+        osd_fsid, osd_type = None, None
+
+        c = get_ceph_volume_container(
+            self.ctx,
+            args=['lvm', 'list', '--format=json'],
+        )
+        out, err, code = call_throws(self.ctx, c.run_cmd())
+        if not code:
+            try:
+                js = json.loads(out)
+                if self.osd_id in js:
+                    logger.info('Found offline LVM OSD {}'.format(self.osd_id))
+                    osd_fsid = js[self.osd_id][0]['tags']['ceph.osd_fsid']
+                    for device in js[self.osd_id]:
+                        if device['tags']['ceph.type'] == 'block':
+                            osd_type = 'bluestore'
+                            break
+                        if device['tags']['ceph.type'] == 'data':
+                            osd_type = 'filestore'
+                            break
+            except ValueError as e:
+                logger.info('Invalid JSON in ceph-volume lvm list: {}'.format(e))
+
+        return osd_fsid, osd_type
+
+    def check_offline_simple_osd(self):
+        # type: () -> Tuple[Optional[str], Optional[str]]
+        osd_fsid, osd_type = None, None
+
+        osd_file = glob('/etc/ceph/osd/{}-[a-f0-9-]*.json'.format(self.osd_id))
+        if len(osd_file) == 1:
+            with open(osd_file[0], 'r') as f:
+                try:
+                    js = json.loads(f.read())
+                    logger.info('Found offline simple OSD {}'.format(self.osd_id))
+                    osd_fsid = js['fsid']
+                    osd_type = js['type']
+                    if osd_type != 'filestore':
+                        # need this to be mounted for the adopt to work, as it
+                        # needs to move files from this directory
+                        call_throws(self.ctx, ['mount', js['data']['path'], self.osd_data_dir])
+                except ValueError as e:
+                    logger.info('Invalid JSON in {}: {}'.format(osd_file, e))
+
+        return osd_fsid, osd_type
+
+    def change_cluster_name(self) -> None:
+        logger.info('Attempting to convert osd cluster name to ceph . . .')
+        c = get_ceph_volume_container(
+            self.ctx,
+            args=['lvm', 'list', '{}'.format(self.osd_id), '--format=json'],
+        )
+        out, err, code = call_throws(self.ctx, c.run_cmd())
+        if code:
+            raise Exception(f'Failed to get list of LVs: {err}\nceph-volume failed with rc {code}')
+        try:
+            js = json.loads(out)
+            if not js:
+                raise RuntimeError(f'Failed to find osd.{self.osd_id}')
+            device: Optional[Dict[Any, Any]] = None
+            for d in js[self.osd_id]:
+                if d['type'] == 'block':
+                    device = d
+                    break
+            if not device:
+                raise RuntimeError(f'Failed to find block device for osd.{self.osd_id}')
+            vg = device['vg_name']
+            out, err, code = call_throws(self.ctx, ['lvchange', '--deltag', f'ceph.cluster_name={self.ctx.cluster}', vg])
+            if code:
+                raise RuntimeError(f"Can't delete tag ceph.cluster_name={self.ctx.cluster} on osd.{self.osd_id}.\nlvchange failed with rc {code}")
+            out, err, code = call_throws(self.ctx, ['lvchange', '--addtag', 'ceph.cluster_name=ceph', vg])
+            if code:
+                raise RuntimeError(f"Can't add tag ceph.cluster_name=ceph on osd.{self.osd_id}.\nlvchange failed with rc {code}")
+            logger.info('Successfully converted osd cluster name')
+        except (Exception, RuntimeError) as e:
+            logger.info(f'Failed to convert osd cluster name: {e}')
+
+
+def command_adopt_ceph(ctx, daemon_type, daemon_id, fsid):
+    # type: (CephadmContext, str, str, str) -> None
+
+    (uid, gid) = extract_uid_gid(ctx)
+
+    data_dir_src = ('/var/lib/ceph/%s/%s-%s' %
+                    (daemon_type, ctx.cluster, daemon_id))
+    data_dir_src = os.path.abspath(ctx.legacy_dir + data_dir_src)
+
+    if not os.path.exists(data_dir_src):
+        raise Error("{}.{} data directory '{}' does not exist.  "
+                    'Incorrect ID specified, or daemon already adopted?'.format(
+                        daemon_type, daemon_id, data_dir_src))
+
+    osd_fsid = None
+    if daemon_type == 'osd':
+        adopt_osd = AdoptOsd(ctx, data_dir_src, daemon_id)
+        osd_fsid, osd_type = adopt_osd.check_online_osd()
+        if not osd_fsid:
+            osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd()
+        if not osd_fsid:
+            osd_fsid, osd_type = adopt_osd.check_offline_simple_osd()
+        if not osd_fsid:
+            raise Error('Unable to find OSD {}'.format(daemon_id))
+        elif ctx.cluster != 'ceph':
+            adopt_osd.change_cluster_name()
+        logger.info('objectstore_type is %s' % osd_type)
+        assert osd_type
+        if osd_type == 'filestore':
+            raise Error('FileStore is not supported by cephadm')
+
+    # NOTE: implicit assumption here that the units correspond to the
+    # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
+    # CLUSTER field.
+    unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
+    (enabled, state, _) = check_unit(ctx, unit_name)
+    if state == 'running':
+        logger.info('Stopping old systemd unit %s...' % unit_name)
+        call_throws(ctx, ['systemctl', 'stop', unit_name])
+    if enabled:
+        logger.info('Disabling old systemd unit %s...' % unit_name)
+        call_throws(ctx, ['systemctl', 'disable', unit_name])
+
+    # data
+    logger.info('Moving data...')
+    data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
+                                 uid=uid, gid=gid)
+    move_files(ctx, glob(os.path.join(data_dir_src, '*')),
+               data_dir_dst,
+               uid=uid, gid=gid)
+    logger.debug('Remove dir `%s`' % (data_dir_src))
+    if os.path.ismount(data_dir_src):
+        call_throws(ctx, ['umount', data_dir_src])
+    os.rmdir(data_dir_src)
+
+    logger.info('Chowning content...')
+    call_throws(ctx, ['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst])
+
+    if daemon_type == 'mon':
+        # rename *.ldb -> *.sst, in case they are coming from ubuntu
+        store = os.path.join(data_dir_dst, 'store.db')
+        num_renamed = 0
+        if os.path.exists(store):
+            for oldf in os.listdir(store):
+                if oldf.endswith('.ldb'):
+                    newf = oldf.replace('.ldb', '.sst')
+                    oldp = os.path.join(store, oldf)
+                    newp = os.path.join(store, newf)
+                    logger.debug('Renaming %s -> %s' % (oldp, newp))
+                    os.rename(oldp, newp)
+        if num_renamed:
+            logger.info('Renamed %d leveldb *.ldb files to *.sst',
+                        num_renamed)
+    if daemon_type == 'osd':
+        for n in ['block', 'block.db', 'block.wal']:
+            p = os.path.join(data_dir_dst, n)
+            if os.path.exists(p):
+                logger.info('Chowning %s...' % p)
+                os.chown(p, uid, gid)
+        # disable the ceph-volume 'simple' mode files on the host
+        simple_fn = os.path.join('/etc/ceph/osd',
+                                 '%s-%s.json' % (daemon_id, osd_fsid))
+        if os.path.exists(simple_fn):
+            new_fn = simple_fn + '.adopted-by-cephadm'
+            logger.info('Renaming %s -> %s', simple_fn, new_fn)
+            os.rename(simple_fn, new_fn)
+            logger.info('Disabling host unit ceph-volume@ simple unit...')
+            call(ctx, ['systemctl', 'disable',
+                       'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)])
+        else:
+            # assume this is an 'lvm' c-v for now, but don't error
+            # out if it's not.
+            logger.info('Disabling host unit ceph-volume@ lvm unit...')
+            call(ctx, ['systemctl', 'disable',
+                       'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)])
+
+    # config
+    config_src = '/etc/ceph/%s.conf' % (ctx.cluster)
+    config_src = os.path.abspath(ctx.legacy_dir + config_src)
+    config_dst = os.path.join(data_dir_dst, 'config')
+    copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
+
+    # logs
+    logger.info('Moving logs...')
+    log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' %
+                   (ctx.cluster, daemon_type, daemon_id))
+    log_dir_src = os.path.abspath(ctx.legacy_dir + log_dir_src)
+    log_dir_dst = make_log_dir(ctx, fsid, uid=uid, gid=gid)
+    move_files(ctx, glob(log_dir_src),
+               log_dir_dst,
+               uid=uid, gid=gid)
+
+    logger.info('Creating new units...')
+    make_var_run(ctx, fsid, uid, gid)
+    c = get_container(ctx, fsid, daemon_type, daemon_id)
+    deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, c,
+                        enable=True,  # unconditionally enable the new unit
+                        start=(state == 'running' or ctx.force_start),
+                        osd_fsid=osd_fsid)
+    update_firewalld(ctx, daemon_type)
+
+
+def command_adopt_prometheus(ctx, daemon_id, fsid):
+    # type: (CephadmContext, str, str) -> None
+    daemon_type = 'prometheus'
+    (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
+    # should try to set the ports we know cephadm defaults
+    # to for these services in the firewall.
+    ports = Monitoring.port_map['prometheus']
+    endpoints = [EndPoint('0.0.0.0', p) for p in ports]
+
+    _stop_and_disable(ctx, 'prometheus')
+
+    data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
+                                 uid=uid, gid=gid)
+
+    # config
+    config_src = '/etc/prometheus/prometheus.yml'
+    config_src = os.path.abspath(ctx.legacy_dir + config_src)
+    config_dst = os.path.join(data_dir_dst, 'etc/prometheus')
+    makedirs(config_dst, uid, gid, 0o755)
+    copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
+
+    # data
+    data_src = '/var/lib/prometheus/metrics/'
+    data_src = os.path.abspath(ctx.legacy_dir + data_src)
+    data_dst = os.path.join(data_dir_dst, 'data')
+    copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
+
+    make_var_run(ctx, fsid, uid, gid)
+    c = get_container(ctx, fsid, daemon_type, daemon_id)
+    deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid,
+                  deployment_type=DeploymentType.REDEPLOY, endpoints=endpoints)
+    update_firewalld(ctx, daemon_type)
+
+
+def command_adopt_grafana(ctx, daemon_id, fsid):
+    # type: (CephadmContext, str, str) -> None
+
+    daemon_type = 'grafana'
+    (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
+    # should try to set the ports we know cephadm defaults
+    # to for these services in the firewall.
+    ports = Monitoring.port_map['grafana']
+    endpoints = [EndPoint('0.0.0.0', p) for p in ports]
+
+    _stop_and_disable(ctx, 'grafana-server')
+
+    data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
+                                 uid=uid, gid=gid)
+
+    # config
+    config_src = '/etc/grafana/grafana.ini'
+    config_src = os.path.abspath(ctx.legacy_dir + config_src)
+    config_dst = os.path.join(data_dir_dst, 'etc/grafana')
+    makedirs(config_dst, uid, gid, 0o755)
+    copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
+
+    prov_src = '/etc/grafana/provisioning/'
+    prov_src = os.path.abspath(ctx.legacy_dir + prov_src)
+    prov_dst = os.path.join(data_dir_dst, 'etc/grafana')
+    copy_tree(ctx, [prov_src], prov_dst, uid=uid, gid=gid)
+
+    # cert
+    cert = '/etc/grafana/grafana.crt'
+    key = '/etc/grafana/grafana.key'
+    if os.path.exists(cert) and os.path.exists(key):
+        cert_src = '/etc/grafana/grafana.crt'
+        cert_src = os.path.abspath(ctx.legacy_dir + cert_src)
+        makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755)
+        cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file')
+        copy_files(ctx, [cert_src], cert_dst, uid=uid, gid=gid)
+
+        key_src = '/etc/grafana/grafana.key'
+        key_src = os.path.abspath(ctx.legacy_dir + key_src)
+        key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key')
+        copy_files(ctx, [key_src], key_dst, uid=uid, gid=gid)
+
+        _adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini'))
+    else:
+        logger.debug('Skipping ssl, missing cert {} or key {}'.format(cert, key))
+
+    # data - possible custom dashboards/plugins
+    data_src = '/var/lib/grafana/'
+    data_src = os.path.abspath(ctx.legacy_dir + data_src)
+    data_dst = os.path.join(data_dir_dst, 'data')
+    copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
+
+    make_var_run(ctx, fsid, uid, gid)
+    c = get_container(ctx, fsid, daemon_type, daemon_id)
+    deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid,
+                  deployment_type=DeploymentType.REDEPLOY, endpoints=endpoints)
+    update_firewalld(ctx, daemon_type)
+
+
+def command_adopt_alertmanager(ctx, daemon_id, fsid):
+    # type: (CephadmContext, str, str) -> None
+
+    daemon_type = 'alertmanager'
+    (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
+    # should try to set the ports we know cephadm defaults
+    # to for these services in the firewall.
+    ports = Monitoring.port_map['alertmanager']
+    endpoints = [EndPoint('0.0.0.0', p) for p in ports]
+
+    _stop_and_disable(ctx, 'prometheus-alertmanager')
+
+    data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
+                                 uid=uid, gid=gid)
+
+    # config
+    config_src = '/etc/prometheus/alertmanager.yml'
+    config_src = os.path.abspath(ctx.legacy_dir + config_src)
+    config_dst = os.path.join(data_dir_dst, 'etc/alertmanager')
+    makedirs(config_dst, uid, gid, 0o755)
+    copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
+
+    # data
+    data_src = '/var/lib/prometheus/alertmanager/'
+    data_src = os.path.abspath(ctx.legacy_dir + data_src)
+    data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data')
+    copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
+
+    make_var_run(ctx, fsid, uid, gid)
+    c = get_container(ctx, fsid, daemon_type, daemon_id)
+    deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid,
+                  deployment_type=DeploymentType.REDEPLOY, endpoints=endpoints)
+    update_firewalld(ctx, daemon_type)
+
+
+def _adjust_grafana_ini(filename):
+    # type: (str) -> None
+
+    # Update cert_file, cert_key pathnames in server section
+    # ConfigParser does not preserve comments
+    try:
+        with open(filename, 'r') as grafana_ini:
+            lines = grafana_ini.readlines()
+        with write_new(filename, perms=None) as grafana_ini:
+            server_section = False
+            for line in lines:
+                if line.startswith('['):
+                    server_section = False
+                if line.startswith('[server]'):
+                    server_section = True
+                if server_section:
+                    line = re.sub(r'^cert_file.*',
+                                  'cert_file = /etc/grafana/certs/cert_file', line)
+                    line = re.sub(r'^cert_key.*',
+                                  'cert_key = /etc/grafana/certs/cert_key', line)
+                grafana_ini.write(line)
+    except OSError as err:
+        raise Error('Cannot update {}: {}'.format(filename, err))
+
+
+def _stop_and_disable(ctx, unit_name):
+    # type: (CephadmContext, str) -> None
+
+    (enabled, state, _) = check_unit(ctx, unit_name)
+    if state == 'running':
+        logger.info('Stopping old systemd unit %s...' % unit_name)
+        call_throws(ctx, ['systemctl', 'stop', unit_name])
+    if enabled:
+        logger.info('Disabling old systemd unit %s...' % unit_name)
+        call_throws(ctx, ['systemctl', 'disable', unit_name])
+
+##################################
+
+
+def command_rm_daemon(ctx):
+    # type: (CephadmContext) -> None
+    lock = FileLock(ctx, ctx.fsid)
+    lock.acquire()
+
+    (daemon_type, daemon_id) = ctx.name.split('.', 1)
+    unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
+
+    if daemon_type in ['mon', 'osd'] and not ctx.force:
+        raise Error('must pass --force to proceed: '
+                    'this command may destroy precious data!')
+
+    call(ctx, ['systemctl', 'stop', unit_name],
+         verbosity=CallVerbosity.DEBUG)
+    call(ctx, ['systemctl', 'reset-failed', unit_name],
+         verbosity=CallVerbosity.DEBUG)
+    call(ctx, ['systemctl', 'disable', unit_name],
+         verbosity=CallVerbosity.DEBUG)
+
+    # force remove rgw admin socket file if leftover
+    if daemon_type in ['rgw']:
+        rgw_asok_path = f'/var/run/ceph/{ctx.fsid}/ceph-client.{ctx.name}.*.asok'
+        call(ctx, ['rm', '-rf', rgw_asok_path],
+             verbosity=CallVerbosity.DEBUG)
+
+    data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_id)
+    if daemon_type in ['mon', 'osd', 'prometheus'] and \
+       not ctx.force_delete_data:
+        # rename it out of the way -- do not delete
+        backup_dir = os.path.join(ctx.data_dir, ctx.fsid, 'removed')
+        if not os.path.exists(backup_dir):
+            makedirs(backup_dir, 0, 0, DATA_DIR_MODE)
+        dirname = '%s.%s_%s' % (daemon_type, daemon_id,
+                                datetime.datetime.utcnow().strftime(DATEFMT))
+        os.rename(data_dir,
+                  os.path.join(backup_dir, dirname))
+    else:
+        call_throws(ctx, ['rm', '-rf', data_dir])
+
+    endpoints = fetch_tcp_ports(ctx)
+    ports: List[int] = [e.port for e in endpoints]
+    if ports:
+        try:
+            fw = Firewalld(ctx)
+            fw.close_ports(ports)
+            fw.apply_rules()
+        except RuntimeError as e:
+            # in case we cannot close the ports we will remove
+            # the daemon but keep them open.
+            logger.warning(f' Error when trying to close ports: {e}')
+
+
+##################################
+
+
+def _zap(ctx: CephadmContext, what: str) -> None:
+    mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None)
+    c = get_ceph_volume_container(ctx,
+                                  args=['lvm', 'zap', '--destroy', what],
+                                  volume_mounts=mounts,
+                                  envs=ctx.env)
+    logger.info(f'Zapping {what}...')
+    out, err, code = call_throws(ctx, c.run_cmd())
+
+
+@infer_image
+def _zap_osds(ctx: CephadmContext) -> None:
+    # assume fsid lock already held
+
+    # list
+    mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None)
+    c = get_ceph_volume_container(ctx,
+                                  args=['inventory', '--format', 'json'],
+                                  volume_mounts=mounts,
+                                  envs=ctx.env)
+    out, err, code = call_throws(ctx, c.run_cmd())
+    if code:
+        raise Error('failed to list osd inventory')
+    try:
+        ls = json.loads(out)
+    except ValueError as e:
+        raise Error(f'Invalid JSON in ceph-volume inventory: {e}')
+
+    for i in ls:
+        matches = [lv.get('cluster_fsid') == ctx.fsid and i.get('ceph_device') for lv in i.get('lvs', [])]
+        if any(matches) and all(matches):
+            _zap(ctx, i.get('path'))
+        elif any(matches):
+            lv_names = [lv['name'] for lv in i.get('lvs', [])]
+            # TODO: we need to map the lv_names back to device paths (the vg
+            # id isn't part of the output here!)
+            logger.warning(f'Not zapping LVs (not implemented): {lv_names}')
+
+
+def command_zap_osds(ctx: CephadmContext) -> None:
+    if not ctx.force:
+        raise Error('must pass --force to proceed: '
+                    'this command may destroy precious data!')
+
+    lock = FileLock(ctx, ctx.fsid)
+    lock.acquire()
+
+    _zap_osds(ctx)
+
+##################################
+
+
+def get_ceph_cluster_count(ctx: CephadmContext) -> int:
+    return len([c for c in os.listdir(ctx.data_dir) if is_fsid(c)])
+
+
+def command_rm_cluster(ctx: CephadmContext) -> None:
+    if not ctx.force:
+        raise Error('must pass --force to proceed: '
+                    'this command may destroy precious data!')
+
+    lock = FileLock(ctx, ctx.fsid)
+    lock.acquire()
+    _rm_cluster(ctx, ctx.keep_logs, ctx.zap_osds)
+
+
+def _rm_cluster(ctx: CephadmContext, keep_logs: bool, zap_osds: bool) -> None:
+
+    if not ctx.fsid:
+        raise Error('must select the cluster to delete by passing --fsid to proceed')
+
+    def disable_systemd_service(unit_name: str) -> None:
+        call(ctx, ['systemctl', 'stop', unit_name],
+             verbosity=CallVerbosity.DEBUG)
+        call(ctx, ['systemctl', 'reset-failed', unit_name],
+             verbosity=CallVerbosity.DEBUG)
+        call(ctx, ['systemctl', 'disable', unit_name],
+             verbosity=CallVerbosity.DEBUG)
+
+    logger.info(f'Deleting cluster with fsid: {ctx.fsid}')
+
+    # stop + disable individual daemon units
+    for d in list_daemons(ctx, detail=False):
+        if d['fsid'] != ctx.fsid:
+            continue
+        if d['style'] != 'cephadm:v1':
+            continue
+        disable_systemd_service(get_unit_name(ctx.fsid, d['name']))
+
+    # cluster units
+    for unit_name in ['ceph-%s.target' % ctx.fsid]:
+        disable_systemd_service(unit_name)
+
+    slice_name = 'system-ceph\\x2d{}.slice'.format(ctx.fsid.replace('-', '\\x2d'))
+    call(ctx, ['systemctl', 'stop', slice_name],
+         verbosity=CallVerbosity.DEBUG)
+
+    # osds?
+    if zap_osds:
+        _zap_osds(ctx)
+
+    # rm units
+    call_throws(ctx, ['rm', '-f', ctx.unit_dir
+                      + '/ceph-%s@.service' % ctx.fsid])
+    call_throws(ctx, ['rm', '-f', ctx.unit_dir
+                      + '/ceph-%s.target' % ctx.fsid])
+    call_throws(ctx, ['rm', '-rf',
+                      ctx.unit_dir + '/ceph-%s.target.wants' % ctx.fsid])
+    # rm data
+    call_throws(ctx, ['rm', '-rf', ctx.data_dir + '/' + ctx.fsid])
+
+    if not keep_logs:
+        # rm logs
+        call_throws(ctx, ['rm', '-rf', ctx.log_dir + '/' + ctx.fsid])
+        call_throws(ctx, ['rm', '-rf', ctx.log_dir
+                          + '/*.wants/ceph-%s@*' % ctx.fsid])
+
+    # rm logrotate config
+    call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/ceph-%s' % ctx.fsid])
+
+    # if last cluster on host remove shared files
+    if get_ceph_cluster_count(ctx) == 0:
+        disable_systemd_service('ceph.target')
+
+        # rm shared ceph target files
+        call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/multi-user.target.wants/ceph.target'])
+        call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/ceph.target'])
+
+        # rm cephadm logrotate config
+        call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/cephadm'])
+
+        if not keep_logs:
+            # remove all cephadm logs
+            for fname in glob(f'{ctx.log_dir}/cephadm.log*'):
+                os.remove(fname)
+
+    # rm sysctl settings
+    sysctl_dirs: List[Path] = [Path(ctx.sysctl_dir), Path('/usr/lib/sysctl.d')]
+
+    for sysctl_dir in sysctl_dirs:
+        for p in sysctl_dir.glob(f'90-ceph-{ctx.fsid}-*.conf'):
+            p.unlink()
+
+    # cleanup remaining ceph directories
+    ceph_dirs = [f'/run/ceph/{ctx.fsid}', f'/tmp/cephadm-{ctx.fsid}', f'/var/run/ceph/{ctx.fsid}']
+    for dd in ceph_dirs:
+        shutil.rmtree(dd, ignore_errors=True)
+
+    # clean up config, keyring, and pub key files
+    files = [CEPH_DEFAULT_CONF, CEPH_DEFAULT_PUBKEY, CEPH_DEFAULT_KEYRING]
+    if os.path.exists(files[0]):
+        valid_fsid = False
+        with open(files[0]) as f:
+            if ctx.fsid in f.read():
+                valid_fsid = True
+        if valid_fsid:
+            # rm configuration files on /etc/ceph
+            for n in range(0, len(files)):
+                if os.path.exists(files[n]):
+                    os.remove(files[n])
+
+##################################
+
+
+def check_time_sync(ctx, enabler=None):
+    # type: (CephadmContext, Optional[Packager]) -> bool
+    units = [
+        'chrony.service',  # 18.04 (at least)
+        'chronyd.service',  # el / opensuse
+        'systemd-timesyncd.service',
+        'ntpd.service',  # el7 (at least)
+        'ntp.service',  # 18.04 (at least)
+        'ntpsec.service',  # 20.04 (at least) / buster
+        'openntpd.service',  # ubuntu / debian
+    ]
+    if not check_units(ctx, units, enabler):
+        logger.warning('No time sync service is running; checked for %s' % units)
+        return False
+    return True
+
+
+def command_check_host(ctx: CephadmContext) -> None:
+    errors = []
+    commands = ['systemctl', 'lvcreate']
+
+    try:
+        engine = check_container_engine(ctx)
+        logger.info(f'{engine} is present')
+    except Error as e:
+        errors.append(str(e))
+
+    for command in commands:
+        try:
+            find_program(command)
+            logger.info('%s is present' % command)
+        except ValueError:
+            errors.append('%s binary does not appear to be installed' % command)
+
+    # check for configured+running chronyd or ntp
+    if not check_time_sync(ctx):
+        errors.append('No time synchronization is active')
+
+    if 'expect_hostname' in ctx and ctx.expect_hostname:
+        if get_hostname().lower() != ctx.expect_hostname.lower():
+            errors.append('hostname "%s" does not match expected hostname "%s"' % (
+                get_hostname(), ctx.expect_hostname))
+        else:
+            logger.info('Hostname "%s" matches what is expected.',
+                        ctx.expect_hostname)
+
+    if errors:
+        raise Error('\nERROR: '.join(errors))
+
+    logger.info('Host looks OK')
+
+##################################
+
+
+def get_ssh_vars(ssh_user: str) -> Tuple[int, int, str]:
+    try:
+        s_pwd = pwd.getpwnam(ssh_user)
+    except KeyError:
+        raise Error('Cannot find uid/gid for ssh-user: %s' % (ssh_user))
+
+    ssh_uid = s_pwd.pw_uid
+    ssh_gid = s_pwd.pw_gid
+    ssh_dir = os.path.join(s_pwd.pw_dir, '.ssh')
+    return ssh_uid, ssh_gid, ssh_dir
+
+
+def authorize_ssh_key(ssh_pub_key: str, ssh_user: str) -> bool:
+    """Authorize the public key for the provided ssh user"""
+
+    def key_in_file(path: str, key: str) -> bool:
+        if not os.path.exists(path):
+            return False
+        with open(path) as f:
+            lines = f.readlines()
+            for line in lines:
+                if line.strip() == key.strip():
+                    return True
+        return False
+
+    logger.info(f'Adding key to {ssh_user}@localhost authorized_keys...')
+    if ssh_pub_key is None or ssh_pub_key.isspace():
+        raise Error('Trying to authorize an empty ssh key')
+
+    ssh_pub_key = ssh_pub_key.strip()
+    ssh_uid, ssh_gid, ssh_dir = get_ssh_vars(ssh_user)
+    if not os.path.exists(ssh_dir):
+        makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700)
+
+    auth_keys_file = '%s/authorized_keys' % ssh_dir
+    if key_in_file(auth_keys_file, ssh_pub_key):
+        logger.info(f'key already in {ssh_user}@localhost authorized_keys...')
+        return False
+
+    add_newline = False
+    if os.path.exists(auth_keys_file):
+        with open(auth_keys_file, 'r') as f:
+            f.seek(0, os.SEEK_END)
+            if f.tell() > 0:
+                f.seek(f.tell() - 1, os.SEEK_SET)  # go to last char
+                if f.read() != '\n':
+                    add_newline = True
+
+    with open(auth_keys_file, 'a') as f:
+        os.fchown(f.fileno(), ssh_uid, ssh_gid)  # just in case we created it
+        os.fchmod(f.fileno(), DEFAULT_MODE)  # just in case we created it
+        if add_newline:
+            f.write('\n')
+        f.write(ssh_pub_key + '\n')
+
+    return True
+
+
+def revoke_ssh_key(key: str, ssh_user: str) -> None:
+    """Revoke the public key authorization for the ssh user"""
+    ssh_uid, ssh_gid, ssh_dir = get_ssh_vars(ssh_user)
+    auth_keys_file = '%s/authorized_keys' % ssh_dir
+    deleted = False
+    if os.path.exists(auth_keys_file):
+        with open(auth_keys_file, 'r') as f:
+            lines = f.readlines()
+        _, filename = tempfile.mkstemp()
+        with open(filename, 'w') as f:
+            os.fchown(f.fileno(), ssh_uid, ssh_gid)
+            os.fchmod(f.fileno(), DEFAULT_MODE)  # secure access to the keys file
+            for line in lines:
+                if line.strip() == key.strip():
+                    deleted = True
+                else:
+                    f.write(line)
+
+    if deleted:
+        shutil.move(filename, auth_keys_file)
+    else:
+        logger.warning('Cannot find the ssh key to be deleted')
+
+
+def check_ssh_connectivity(ctx: CephadmContext) -> None:
+
+    def cmd_is_available(cmd: str) -> bool:
+        if shutil.which(cmd) is None:
+            logger.warning(f'Command not found: {cmd}')
+            return False
+        return True
+
+    if not cmd_is_available('ssh') or not cmd_is_available('ssh-keygen'):
+        logger.warning('Cannot check ssh connectivity. Skipping...')
+        return
+
+    ssh_priv_key_path = ''
+    ssh_pub_key_path = ''
+    ssh_signed_cert_path = ''
+    if ctx.ssh_private_key and ctx.ssh_public_key:
+        # let's use the keys provided by the user
+        ssh_priv_key_path = pathify(ctx.ssh_private_key.name)
+        ssh_pub_key_path = pathify(ctx.ssh_public_key.name)
+    elif ctx.ssh_private_key and ctx.ssh_signed_cert:
+        # CA signed keys use case
+        ssh_priv_key_path = pathify(ctx.ssh_private_key.name)
+        ssh_signed_cert_path = pathify(ctx.ssh_signed_cert.name)
+    else:
+        # no custom keys, let's generate some random keys just for this check
+        ssh_priv_key_path = f'/tmp/ssh_key_{uuid.uuid1()}'
+        ssh_pub_key_path = f'{ssh_priv_key_path}.pub'
+        ssh_key_gen_cmd = ['ssh-keygen', '-q', '-t', 'rsa', '-N', '', '-C', '', '-f', ssh_priv_key_path]
+        _, _, code = call(ctx, ssh_key_gen_cmd)
+        if code != 0:
+            logger.warning('Cannot generate keys to check ssh connectivity.')
+            return
+
+    if ssh_signed_cert_path:
+        logger.info('Verification for CA signed keys authentication not implemented. Skipping ...')
+    elif ssh_pub_key_path:
+        logger.info('Verifying ssh connectivity using standard pubkey authentication ...')
+        with open(ssh_pub_key_path, 'r') as f:
+            key = f.read().strip()
+        new_key = authorize_ssh_key(key, ctx.ssh_user)
+        ssh_cfg_file_arg = ['-F', pathify(ctx.ssh_config.name)] if ctx.ssh_config else []
+        _, _, code = call(ctx, ['ssh', '-o StrictHostKeyChecking=no',
+                                *ssh_cfg_file_arg, '-i', ssh_priv_key_path,
+                                '-o PasswordAuthentication=no',
+                                f'{ctx.ssh_user}@{get_hostname()}',
+                                'sudo echo'])
+
+        # we only remove the key if it's a new one. In case the user has provided
+        # some already existing key then we don't alter authorized_keys file
+        if new_key:
+            revoke_ssh_key(key, ctx.ssh_user)
+
+        pub_key_msg = '- The public key file configured by --ssh-public-key is valid\n' if ctx.ssh_public_key else ''
+        prv_key_msg = '- The private key file configured by --ssh-private-key is valid\n' if ctx.ssh_private_key else ''
+        ssh_cfg_msg = '- The ssh configuration file configured by --ssh-config is valid\n' if ctx.ssh_config else ''
+        err_msg = f"""
+** Please verify your user's ssh configuration and make sure:
+- User {ctx.ssh_user} must have passwordless sudo access
+{pub_key_msg}{prv_key_msg}{ssh_cfg_msg}
+"""
+        if code != 0:
+            raise Error(err_msg)
+
+
+def command_prepare_host(ctx: CephadmContext) -> None:
+    logger.info('Verifying podman|docker is present...')
+    pkg = None
+    try:
+        check_container_engine(ctx)
+    except Error as e:
+        logger.warning(str(e))
+        if not pkg:
+            pkg = create_packager(ctx)
+        pkg.install_podman()
+
+    logger.info('Verifying lvm2 is present...')
+    if not find_executable('lvcreate'):
+        if not pkg:
+            pkg = create_packager(ctx)
+        pkg.install(['lvm2'])
+
+    logger.info('Verifying time synchronization is in place...')
+    if not check_time_sync(ctx):
+        if not pkg:
+            pkg = create_packager(ctx)
+        pkg.install(['chrony'])
+        # check again, and this time try to enable
+        # the service
+        check_time_sync(ctx, enabler=pkg)
+
+    if 'expect_hostname' in ctx and ctx.expect_hostname and ctx.expect_hostname != get_hostname():
+        logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), ctx.expect_hostname))
+        call_throws(ctx, ['hostname', ctx.expect_hostname])
+        with open('/etc/hostname', 'w') as f:
+            f.write(ctx.expect_hostname + '\n')
+
+    logger.info('Repeating the final host check...')
+    command_check_host(ctx)
+
+##################################
+
+
+class CustomValidation(argparse.Action):
+
+    def _check_name(self, values: str) -> None:
+        try:
+            (daemon_type, daemon_id) = values.split('.', 1)
+        except ValueError:
+            raise argparse.ArgumentError(self,
+                                         'must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com')
+
+        daemons = get_supported_daemons()
+        if daemon_type not in daemons:
+            raise argparse.ArgumentError(self,
+                                         'name must declare the type of daemon e.g. '
+                                         '{}'.format(', '.join(daemons)))
+
+    def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: Union[str, Sequence[Any], None],
+                 option_string: Optional[str] = None) -> None:
+        assert isinstance(values, str)
+        if self.dest == 'name':
+            self._check_name(values)
+            setattr(namespace, self.dest, values)
+
+##################################
+
+
+def get_distro():
+    # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
+    distro = None
+    distro_version = None
+    distro_codename = None
+    with open('/etc/os-release', 'r') as f:
+        for line in f.readlines():
+            line = line.strip()
+            if '=' not in line or line.startswith('#'):
+                continue
+            (var, val) = line.split('=', 1)
+            if val[0] == '"' and val[-1] == '"':
+                val = val[1:-1]
+            if var == 'ID':
+                distro = val.lower()
+            elif var == 'VERSION_ID':
+                distro_version = val.lower()
+            elif var == 'VERSION_CODENAME':
+                distro_codename = val.lower()
+    return distro, distro_version, distro_codename
+
+
+class Packager(object):
+    def __init__(self, ctx: CephadmContext,
+                 stable: Optional[str] = None, version: Optional[str] = None,
+                 branch: Optional[str] = None, commit: Optional[str] = None):
+        assert \
+            (stable and not version and not branch and not commit) or \
+            (not stable and version and not branch and not commit) or \
+            (not stable and not version and branch) or \
+            (not stable and not version and not branch and not commit)
+        self.ctx = ctx
+        self.stable = stable
+        self.version = version
+        self.branch = branch
+        self.commit = commit
+
+    def validate(self) -> None:
+        """Validate parameters before writing any state to disk."""
+        pass
+
+    def add_repo(self) -> None:
+        raise NotImplementedError
+
+    def rm_repo(self) -> None:
+        raise NotImplementedError
+
+    def install(self, ls: List[str]) -> None:
+        raise NotImplementedError
+
+    def install_podman(self) -> None:
+        raise NotImplementedError
+
+    def query_shaman(self, distro: str, distro_version: Any, branch: Optional[str], commit: Optional[str]) -> str:
+        # query shaman
+        logger.info('Fetching repo metadata from shaman and chacra...')
+        shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
+            distro=distro,
+            distro_version=distro_version,
+            branch=branch,
+            sha1=commit or 'latest',
+            arch=get_arch()
+        )
+        try:
+            shaman_response = urlopen(shaman_url)
+        except HTTPError as err:
+            logger.error('repository not found in shaman (might not be available yet)')
+            raise Error('%s, failed to fetch %s' % (err, shaman_url))
+        chacra_url = ''
+        try:
+            chacra_url = shaman_response.geturl()
+            chacra_response = urlopen(chacra_url)
+        except HTTPError as err:
+            logger.error('repository not found in chacra (might not be available yet)')
+            raise Error('%s, failed to fetch %s' % (err, chacra_url))
+        return chacra_response.read().decode('utf-8')
+
+    def repo_gpgkey(self) -> Tuple[str, str]:
+        if self.ctx.gpg_url:
+            return self.ctx.gpg_url, 'manual'
+        if self.stable or self.version:
+            return 'https://download.ceph.com/keys/release.gpg', 'release'
+        else:
+            return 'https://download.ceph.com/keys/autobuild.gpg', 'autobuild'
+
+    def enable_service(self, service: str) -> None:
+        """
+        Start and enable the service (typically using systemd).
+        """
+        call_throws(self.ctx, ['systemctl', 'enable', '--now', service])
+
+
+class Apt(Packager):
+    DISTRO_NAMES = {
+        'ubuntu': 'ubuntu',
+        'debian': 'debian',
+    }
+
+    def __init__(self, ctx: CephadmContext,
+                 stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str],
+                 distro: Optional[str], distro_version: Optional[str], distro_codename: Optional[str]) -> None:
+        super(Apt, self).__init__(ctx, stable=stable, version=version,
+                                  branch=branch, commit=commit)
+        assert distro
+        self.ctx = ctx
+        self.distro = self.DISTRO_NAMES[distro]
+        self.distro_codename = distro_codename
+        self.distro_version = distro_version
+
+    def repo_path(self) -> str:
+        return '/etc/apt/sources.list.d/ceph.list'
+
+    def add_repo(self) -> None:
+
+        url, name = self.repo_gpgkey()
+        logger.info('Installing repo GPG key from %s...' % url)
+        try:
+            response = urlopen(url)
+        except HTTPError as err:
+            logger.error('failed to fetch GPG repo key from %s: %s' % (
+                url, err))
+            raise Error('failed to fetch GPG key')
+        key = response.read()
+        with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'wb') as f:
+            f.write(key)
+
+        if self.version:
+            content = 'deb %s/debian-%s/ %s main\n' % (
+                self.ctx.repo_url, self.version, self.distro_codename)
+        elif self.stable:
+            content = 'deb %s/debian-%s/ %s main\n' % (
+                self.ctx.repo_url, self.stable, self.distro_codename)
+        else:
+            content = self.query_shaman(self.distro, self.distro_codename, self.branch,
+                                        self.commit)
+
+        logger.info('Installing repo file at %s...' % self.repo_path())
+        with open(self.repo_path(), 'w') as f:
+            f.write(content)
+
+        self.update()
+
+    def rm_repo(self) -> None:
+        for name in ['autobuild', 'release', 'manual']:
+            p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
+            if os.path.exists(p):
+                logger.info('Removing repo GPG key %s...' % p)
+                os.unlink(p)
+        if os.path.exists(self.repo_path()):
+            logger.info('Removing repo at %s...' % self.repo_path())
+            os.unlink(self.repo_path())
+
+        if self.distro == 'ubuntu':
+            self.rm_kubic_repo()
+
+    def install(self, ls: List[str]) -> None:
+        logger.info('Installing packages %s...' % ls)
+        call_throws(self.ctx, ['apt-get', 'install', '-y'] + ls)
+
+    def update(self) -> None:
+        logger.info('Updating package list...')
+        call_throws(self.ctx, ['apt-get', 'update'])
+
+    def install_podman(self) -> None:
+        if self.distro == 'ubuntu':
+            logger.info('Setting up repo for podman...')
+            self.add_kubic_repo()
+            self.update()
+
+        logger.info('Attempting podman install...')
+        try:
+            self.install(['podman'])
+        except Error:
+            logger.info('Podman did not work.  Falling back to docker...')
+            self.install(['docker.io'])
+
+    def kubic_repo_url(self) -> str:
+        return 'https://download.opensuse.org/repositories/devel:/kubic:/' \
+               'libcontainers:/stable/xUbuntu_%s/' % self.distro_version
+
+    def kubic_repo_path(self) -> str:
+        return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list'
+
+    def kubic_repo_gpgkey_url(self) -> str:
+        return '%s/Release.key' % self.kubic_repo_url()
+
+    def kubic_repo_gpgkey_path(self) -> str:
+        return '/etc/apt/trusted.gpg.d/kubic.release.gpg'
+
+    def add_kubic_repo(self) -> None:
+        url = self.kubic_repo_gpgkey_url()
+        logger.info('Installing repo GPG key from %s...' % url)
+        try:
+            response = urlopen(url)
+        except HTTPError as err:
+            logger.error('failed to fetch GPG repo key from %s: %s' % (
+                url, err))
+            raise Error('failed to fetch GPG key')
+        key = response.read().decode('utf-8')
+        tmp_key = write_tmp(key, 0, 0)
+        keyring = self.kubic_repo_gpgkey_path()
+        call_throws(self.ctx, ['apt-key', '--keyring', keyring, 'add', tmp_key.name])
+
+        logger.info('Installing repo file at %s...' % self.kubic_repo_path())
+        content = 'deb %s /\n' % self.kubic_repo_url()
+        with open(self.kubic_repo_path(), 'w') as f:
+            f.write(content)
+
+    def rm_kubic_repo(self) -> None:
+        keyring = self.kubic_repo_gpgkey_path()
+        if os.path.exists(keyring):
+            logger.info('Removing repo GPG key %s...' % keyring)
+            os.unlink(keyring)
+
+        p = self.kubic_repo_path()
+        if os.path.exists(p):
+            logger.info('Removing repo at %s...' % p)
+            os.unlink(p)
+
+
+class YumDnf(Packager):
+    DISTRO_NAMES = {
+        'centos': ('centos', 'el'),
+        'rhel': ('centos', 'el'),
+        'scientific': ('centos', 'el'),
+        'rocky': ('centos', 'el'),
+        'almalinux': ('centos', 'el'),
+        'ol': ('centos', 'el'),
+        'fedora': ('fedora', 'fc'),
+        'mariner': ('mariner', 'cm'),
+    }
+
+    def __init__(self, ctx: CephadmContext,
+                 stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str],
+                 distro: Optional[str], distro_version: Optional[str]) -> None:
+        super(YumDnf, self).__init__(ctx, stable=stable, version=version,
+                                     branch=branch, commit=commit)
+        assert distro
+        assert distro_version
+        self.ctx = ctx
+        self.major = int(distro_version.split('.')[0])
+        self.distro_normalized = self.DISTRO_NAMES[distro][0]
+        self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major)
+        if (self.distro_code == 'fc' and self.major >= 30) or \
+           (self.distro_code == 'el' and self.major >= 8):
+            self.tool = 'dnf'
+        elif (self.distro_code == 'cm'):
+            self.tool = 'tdnf'
+        else:
+            self.tool = 'yum'
+
+    def custom_repo(self, **kw: Any) -> str:
+        """
+        Repo files need special care in that a whole line should not be present
+        if there is no value for it. Because we were using `format()` we could
+        not conditionally add a line for a repo file. So the end result would
+        contain a key with a missing value (say if we were passing `None`).
+
+        For example, it could look like::
+
+        [ceph repo]
+        name= ceph repo
+        proxy=
+        gpgcheck=
+
+        Which breaks. This function allows us to conditionally add lines,
+        preserving an order and be more careful.
+
+        Previously, and for historical purposes, this is how the template used
+        to look::
+
+        custom_repo =
+        [{repo_name}]
+        name={name}
+        baseurl={baseurl}
+        enabled={enabled}
+        gpgcheck={gpgcheck}
+        type={_type}
+        gpgkey={gpgkey}
+        proxy={proxy}
+
+        """
+        lines = []
+
+        # by using tuples (vs a dict) we preserve the order of what we want to
+        # return, like starting with a [repo name]
+        tmpl = (
+            ('reponame', '[%s]'),
+            ('name', 'name=%s'),
+            ('baseurl', 'baseurl=%s'),
+            ('enabled', 'enabled=%s'),
+            ('gpgcheck', 'gpgcheck=%s'),
+            ('_type', 'type=%s'),
+            ('gpgkey', 'gpgkey=%s'),
+            ('proxy', 'proxy=%s'),
+            ('priority', 'priority=%s'),
+        )
+
+        for line in tmpl:
+            tmpl_key, tmpl_value = line  # key values from tmpl
+
+            # ensure that there is an actual value (not None nor empty string)
+            if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
+                lines.append(tmpl_value % kw.get(tmpl_key))
+
+        return '\n'.join(lines)
+
+    def repo_path(self) -> str:
+        return '/etc/yum.repos.d/ceph.repo'
+
+    def repo_baseurl(self) -> str:
+        assert self.stable or self.version
+        if self.version:
+            return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.version,
+                                     self.distro_code)
+        else:
+            return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.stable,
+                                     self.distro_code)
+
+    def validate(self) -> None:
+        if self.distro_code.startswith('fc'):
+            raise Error('Ceph team does not build Fedora specific packages and therefore cannot add repos for this distro')
+        if self.distro_code == 'el7':
+            if self.stable and self.stable >= 'pacific':
+                raise Error('Ceph does not support pacific or later for this version of this linux distro and therefore cannot add a repo for it')
+            if self.version and self.version.split('.')[0] >= '16':
+                raise Error('Ceph does not support 16.y.z or later for this version of this linux distro and therefore cannot add a repo for it')
+
+        if self.stable or self.version:
+            # we know that yum & dnf require there to be a
+            # $base_url/$arch/repodata/repomd.xml so we can test if this URL
+            # is gettable in order to validate the inputs
+            test_url = self.repo_baseurl() + '/noarch/repodata/repomd.xml'
+            try:
+                urlopen(test_url)
+            except HTTPError as err:
+                logger.error('unable to fetch repo metadata: %r', err)
+                raise Error('failed to fetch repository metadata. please check'
+                            ' the provided parameters are correct and try again')
+
+    def add_repo(self) -> None:
+        if self.stable or self.version:
+            content = ''
+            for n, t in {
+                    'Ceph': '$basearch',
+                    'Ceph-noarch': 'noarch',
+                    'Ceph-source': 'SRPMS'}.items():
+                content += '[%s]\n' % (n)
+                content += self.custom_repo(
+                    name='Ceph %s' % t,
+                    baseurl=self.repo_baseurl() + '/' + t,
+                    enabled=1,
+                    gpgcheck=1,
+                    gpgkey=self.repo_gpgkey()[0],
+                )
+                content += '\n\n'
+        else:
+            content = self.query_shaman(self.distro_normalized, self.major,
+                                        self.branch,
+                                        self.commit)
+
+        logger.info('Writing repo to %s...' % self.repo_path())
+        with open(self.repo_path(), 'w') as f:
+            f.write(content)
+
+        if self.distro_code.startswith('el'):
+            logger.info('Enabling EPEL...')
+            call_throws(self.ctx, [self.tool, 'install', '-y', 'epel-release'])
+
+    def rm_repo(self) -> None:
+        if os.path.exists(self.repo_path()):
+            os.unlink(self.repo_path())
+
+    def install(self, ls: List[str]) -> None:
+        logger.info('Installing packages %s...' % ls)
+        call_throws(self.ctx, [self.tool, 'install', '-y'] + ls)
+
+    def install_podman(self) -> None:
+        self.install(['podman'])
+
+
+class Zypper(Packager):
+    DISTRO_NAMES = [
+        'sles',
+        'opensuse-tumbleweed',
+        'opensuse-leap'
+    ]
+
+    def __init__(self, ctx: CephadmContext,
+                 stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str],
+                 distro: Optional[str], distro_version: Optional[str]) -> None:
+        super(Zypper, self).__init__(ctx, stable=stable, version=version,
+                                     branch=branch, commit=commit)
+        assert distro is not None
+        self.ctx = ctx
+        self.tool = 'zypper'
+        self.distro = 'opensuse'
+        self.distro_version = '15.1'
+        if 'tumbleweed' not in distro and distro_version is not None:
+            self.distro_version = distro_version
+
+    def custom_repo(self, **kw: Any) -> str:
+        """
+        See YumDnf for format explanation.
+        """
+        lines = []
+
+        # by using tuples (vs a dict) we preserve the order of what we want to
+        # return, like starting with a [repo name]
+        tmpl = (
+            ('reponame', '[%s]'),
+            ('name', 'name=%s'),
+            ('baseurl', 'baseurl=%s'),
+            ('enabled', 'enabled=%s'),
+            ('gpgcheck', 'gpgcheck=%s'),
+            ('_type', 'type=%s'),
+            ('gpgkey', 'gpgkey=%s'),
+            ('proxy', 'proxy=%s'),
+            ('priority', 'priority=%s'),
+        )
+
+        for line in tmpl:
+            tmpl_key, tmpl_value = line  # key values from tmpl
+
+            # ensure that there is an actual value (not None nor empty string)
+            if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
+                lines.append(tmpl_value % kw.get(tmpl_key))
+
+        return '\n'.join(lines)
+
+    def repo_path(self) -> str:
+        return '/etc/zypp/repos.d/ceph.repo'
+
+    def repo_baseurl(self) -> str:
+        assert self.stable or self.version
+        if self.version:
+            return '%s/rpm-%s/%s' % (self.ctx.repo_url,
+                                     self.stable, self.distro)
+        else:
+            return '%s/rpm-%s/%s' % (self.ctx.repo_url,
+                                     self.stable, self.distro)
+
+    def add_repo(self) -> None:
+        if self.stable or self.version:
+            content = ''
+            for n, t in {
+                    'Ceph': '$basearch',
+                    'Ceph-noarch': 'noarch',
+                    'Ceph-source': 'SRPMS'}.items():
+                content += '[%s]\n' % (n)
+                content += self.custom_repo(
+                    name='Ceph %s' % t,
+                    baseurl=self.repo_baseurl() + '/' + t,
+                    enabled=1,
+                    gpgcheck=1,
+                    gpgkey=self.repo_gpgkey()[0],
+                )
+                content += '\n\n'
+        else:
+            content = self.query_shaman(self.distro, self.distro_version,
+                                        self.branch,
+                                        self.commit)
+
+        logger.info('Writing repo to %s...' % self.repo_path())
+        with open(self.repo_path(), 'w') as f:
+            f.write(content)
+
+    def rm_repo(self) -> None:
+        if os.path.exists(self.repo_path()):
+            os.unlink(self.repo_path())
+
+    def install(self, ls: List[str]) -> None:
+        logger.info('Installing packages %s...' % ls)
+        call_throws(self.ctx, [self.tool, 'in', '-y'] + ls)
+
+    def install_podman(self) -> None:
+        self.install(['podman'])
+
+
+def create_packager(ctx: CephadmContext,
+                    stable: Optional[str] = None, version: Optional[str] = None,
+                    branch: Optional[str] = None, commit: Optional[str] = None) -> Packager:
+    distro, distro_version, distro_codename = get_distro()
+    if distro in YumDnf.DISTRO_NAMES:
+        return YumDnf(ctx, stable=stable, version=version,
+                      branch=branch, commit=commit,
+                      distro=distro, distro_version=distro_version)
+    elif distro in Apt.DISTRO_NAMES:
+        return Apt(ctx, stable=stable, version=version,
+                   branch=branch, commit=commit,
+                   distro=distro, distro_version=distro_version,
+                   distro_codename=distro_codename)
+    elif distro in Zypper.DISTRO_NAMES:
+        return Zypper(ctx, stable=stable, version=version,
+                      branch=branch, commit=commit,
+                      distro=distro, distro_version=distro_version)
+    raise Error('Distro %s version %s not supported' % (distro, distro_version))
+
+
+def command_add_repo(ctx: CephadmContext) -> None:
+    if ctx.version and ctx.release:
+        raise Error('you can specify either --release or --version but not both')
+    if not ctx.version and not ctx.release and not ctx.dev and not ctx.dev_commit:
+        raise Error('please supply a --release, --version, --dev or --dev-commit argument')
+    if ctx.version:
+        try:
+            (x, y, z) = ctx.version.split('.')
+        except Exception:
+            raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
+    if ctx.release:
+        # Pacific =/= pacific in this case, set to undercase to avoid confusion
+        ctx.release = ctx.release.lower()
+
+    pkg = create_packager(ctx, stable=ctx.release,
+                          version=ctx.version,
+                          branch=ctx.dev,
+                          commit=ctx.dev_commit)
+    pkg.validate()
+    pkg.add_repo()
+    logger.info('Completed adding repo.')
+
+
+def command_rm_repo(ctx: CephadmContext) -> None:
+    pkg = create_packager(ctx)
+    pkg.rm_repo()
+
+
+def command_install(ctx: CephadmContext) -> None:
+    pkg = create_packager(ctx)
+    pkg.install(ctx.packages)
+
+
+def command_rescan_disks(ctx: CephadmContext) -> str:
+
+    def probe_hba(scan_path: str) -> None:
+        """Tell the adapter to rescan"""
+        with open(scan_path, 'w') as f:
+            f.write('- - -')
+
+    cmd = ctx.func.__name__.replace('command_', '')
+    logger.info(f'{cmd}: starting')
+    start = time.time()
+
+    all_scan_files = glob('/sys/class/scsi_host/*/scan')
+    scan_files = []
+    skipped = []
+    for scan_path in all_scan_files:
+        adapter_name = os.path.basename(os.path.dirname(scan_path))
+        proc_name = read_file([os.path.join(os.path.dirname(scan_path), 'proc_name')])
+        if proc_name in ['unknown', 'usb-storage']:
+            skipped.append(os.path.basename(scan_path))
+            logger.info(f'{cmd}: rescan skipping incompatible host adapter {adapter_name} : {proc_name}')
+            continue
+
+        scan_files.append(scan_path)
+
+    if not scan_files:
+        logger.info(f'{cmd}: no compatible HBAs found')
+        return 'Ok. No compatible HBAs found'
+
+    responses = async_run(concurrent_tasks(probe_hba, scan_files))
+    failures = [r for r in responses if r]
+
+    logger.info(f'{cmd}: Complete. {len(scan_files)} adapters rescanned, {len(failures)} failures, {len(skipped)} skipped')
+
+    elapsed = time.time() - start
+    if failures:
+        plural = 's' if len(failures) > 1 else ''
+        if len(failures) == len(scan_files):
+            return f'Failed. All {len(scan_files)} rescan requests failed'
+        else:
+            return f'Partial. {len(scan_files) - len(failures)} successful, {len(failures)} failure{plural} against: {", ".join(failures)}'
+
+    return f'Ok. {len(all_scan_files)} adapters detected: {len(scan_files)} rescanned, {len(skipped)} skipped, {len(failures)} failed ({elapsed:.2f}s)'
+
+##################################
+
+
+def get_ipv4_address(ifname):
+    # type: (str) -> str
+    def _extract(sock: socket.socket, offset: int) -> str:
+        return socket.inet_ntop(
+            socket.AF_INET,
+            fcntl.ioctl(
+                sock.fileno(),
+                offset,
+                struct.pack('256s', bytes(ifname[:15], 'utf-8'))
+            )[20:24])
+
+    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+    try:
+        addr = _extract(s, 35093)  # '0x8915' = SIOCGIFADDR
+        dq_mask = _extract(s, 35099)  # 0x891b = SIOCGIFNETMASK
+    except OSError:
+        # interface does not have an ipv4 address
+        return ''
+
+    dec_mask = sum([bin(int(i)).count('1')
+                    for i in dq_mask.split('.')])
+    return '{}/{}'.format(addr, dec_mask)
+
+
+def get_ipv6_address(ifname):
+    # type: (str) -> str
+    if not os.path.exists('/proc/net/if_inet6'):
+        return ''
+
+    raw = read_file(['/proc/net/if_inet6'])
+    data = raw.splitlines()
+    # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
+    # field 0 is ipv6, field 2 is scope
+    for iface_setting in data:
+        field = iface_setting.split()
+        if field[-1] == ifname:
+            ipv6_raw = field[0]
+            ipv6_fmtd = ':'.join([ipv6_raw[_p:_p + 4] for _p in range(0, len(field[0]), 4)])
+            # apply naming rules using ipaddress module
+            ipv6 = ipaddress.ip_address(ipv6_fmtd)
+            return '{}/{}'.format(str(ipv6), int('0x{}'.format(field[2]), 16))
+    return ''
+
+
+def bytes_to_human(num, mode='decimal'):
+    # type: (float, str) -> str
+    """Convert a bytes value into it's human-readable form.
+
+    :param num: number, in bytes, to convert
+    :param mode: Either decimal (default) or binary to determine divisor
+    :returns: string representing the bytes value in a more readable format
+    """
+    unit_list = ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
+    divisor = 1000.0
+    yotta = 'YB'
+
+    if mode == 'binary':
+        unit_list = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
+        divisor = 1024.0
+        yotta = 'YiB'
+
+    for unit in unit_list:
+        if abs(num) < divisor:
+            return '%3.1f%s' % (num, unit)
+        num /= divisor
+    return '%.1f%s' % (num, yotta)
+
+
+def read_file(path_list, file_name=''):
+    # type: (List[str], str) -> str
+    """Returns the content of the first file found within the `path_list`
+
+    :param path_list: list of file paths to search
+    :param file_name: optional file_name to be applied to a file path
+    :returns: content of the file or 'Unknown'
+    """
+    for path in path_list:
+        if file_name:
+            file_path = os.path.join(path, file_name)
+        else:
+            file_path = path
+        if os.path.exists(file_path):
+            with open(file_path, 'rb') as f:
+                try:
+                    content = f.read().decode('utf-8', 'ignore').strip()
+                except OSError:
+                    # sysfs may populate the file, but for devices like
+                    # virtio reads can fail
+                    return 'Unknown'
+                else:
+                    return content
+    return 'Unknown'
+
+##################################
+
+
+class Enclosure:
+    def __init__(self, enc_id: str, enc_path: str, dev_path: str):
+        """External disk enclosure metadata
+
+        Args:
+        :param enc_id: enclosure id (normally a WWN)
+        :param enc_path: sysfs path to HBA attached to the enclosure
+                         e.g. /sys/class/scsi_generic/sg11/device/enclosure/0:0:9:0
+        :param dev_path: sysfs path to the generic scsi device for the enclosure HBA
+                         e.g. /sys/class/scsi_generic/sg2
+        """
+        self._path: str = dev_path
+        self._dev_path: str = os.path.join(dev_path, 'device')
+        self._enc_path: str = enc_path
+        self.ses_paths: List[str] = []
+        self.path_count: int = 0
+        self.vendor: str = ''
+        self.model: str = ''
+        self.enc_id: str = enc_id
+        self.components: Union[int, str] = 0
+        self.device_lookup: Dict[str, str] = {}
+        self.device_count: int = 0
+        self.slot_map: Dict[str, Dict[str, str]] = {}
+
+        self._probe()
+
+    def _probe(self) -> None:
+        """Analyse the dev paths to identify enclosure related information"""
+
+        self.vendor = read_file([os.path.join(self._dev_path, 'vendor')])
+        self.model = read_file([os.path.join(self._dev_path, 'model')])
+        self.components = read_file([os.path.join(self._enc_path, 'components')])
+        slot_paths = glob(os.path.join(self._enc_path, '*', 'slot'))
+        for slot_path in slot_paths:
+            slot = read_file([slot_path])
+            serial_path = os.path.join(os.path.dirname(slot_path), 'device', 'vpd_pg80')
+            serial = ''
+            if os.path.exists(serial_path):
+                serial_raw = read_file([serial_path])
+                serial = (''.join(char for char in serial_raw if char in string.printable)).strip()
+                self.device_lookup[serial] = slot
+            slot_dir = os.path.dirname(slot_path)
+            self.slot_map[slot] = {
+                'status': read_file([os.path.join(slot_dir, 'status')]),
+                'fault': read_file([os.path.join(slot_dir, 'fault')]),
+                'locate': read_file([os.path.join(slot_dir, 'locate')]),
+                'serial': serial,
+            }
+
+        self.device_count = len(self.device_lookup)
+        self.update(os.path.basename(self._path))
+
+    def update(self, dev_id: str) -> None:
+        """Update an enclosure object with a related sg device name
+
+        :param dev_id (str): device name e.g. sg2
+        """
+        self.ses_paths.append(dev_id)
+        self.path_count = len(self.ses_paths)
+
+    def _dump(self) -> Dict[str, Any]:
+        """Return a dict representation of the object"""
+        return {k: v for k, v in self.__dict__.items() if not k.startswith('_')}
+
+    def __str__(self) -> str:
+        """Return a formatted json representation of the object as a string"""
+        return json.dumps(self._dump(), indent=2)
+
+    def __repr__(self) -> str:
+        """Return a json representation of the object as a string"""
+        return json.dumps(self._dump())
+
+    def as_json(self) -> Dict[str, Any]:
+        """Return a dict representing the object"""
+        return self._dump()
+
+
+class HostFacts():
+    _dmi_path_list = ['/sys/class/dmi/id']
+    _nic_path_list = ['/sys/class/net']
+    _apparmor_path_list = ['/etc/apparmor']
+    _disk_vendor_workarounds = {
+        '0x1af4': 'Virtio Block Device'
+    }
+    _excluded_block_devices = ('sr', 'zram', 'dm-', 'loop', 'md')
+    _sg_generic_glob = '/sys/class/scsi_generic/*'
+
+    def __init__(self, ctx: CephadmContext):
+        self.ctx: CephadmContext = ctx
+        self.cpu_model: str = 'Unknown'
+        self.sysctl_options: Dict[str, str] = self._populate_sysctl_options()
+        self.cpu_count: int = 0
+        self.cpu_cores: int = 0
+        self.cpu_threads: int = 0
+        self.interfaces: Dict[str, Any] = {}
+
+        self._meminfo: List[str] = read_file(['/proc/meminfo']).splitlines()
+        self._get_cpuinfo()
+        self._process_nics()
+        self.arch: str = platform.processor()
+        self.kernel: str = platform.release()
+        self._enclosures = self._discover_enclosures()
+        self._block_devices = self._get_block_devs()
+        self._device_list = self._get_device_info()
+
+    def _populate_sysctl_options(self) -> Dict[str, str]:
+        sysctl_options = {}
+        out, _, _ = call_throws(self.ctx, ['sysctl', '-a'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+        if out:
+            for line in out.splitlines():
+                option, value = line.split('=')
+                sysctl_options[option.strip()] = value.strip()
+        return sysctl_options
+
+    def _discover_enclosures(self) -> Dict[str, Enclosure]:
+        """Build a dictionary of discovered scsi enclosures
+
+        Enclosures are detected by walking the scsi generic sysfs hierarchy.
+        Any device tree that holds an 'enclosure' subdirectory is interpreted as
+        an enclosure. Once identified the enclosure directory is analysis to
+        identify key descriptors that will help relate disks to enclosures and
+        disks to enclosure slots.
+
+        :return: Dict[str, Enclosure]: a map of enclosure id (hex) to enclosure object
+        """
+        sg_paths: List[str] = glob(HostFacts._sg_generic_glob)
+        enclosures: Dict[str, Enclosure] = {}
+
+        for sg_path in sg_paths:
+            enc_path = os.path.join(sg_path, 'device', 'enclosure')
+            if os.path.exists(enc_path):
+                enc_dirs = glob(os.path.join(enc_path, '*'))
+                if len(enc_dirs) != 1:
+                    # incomplete enclosure spec - expecting ONE dir in the format
+                    # host(adapter):bus:target:lun e.g. 16:0:0:0
+                    continue
+                enc_path = enc_dirs[0]
+                enc_id = read_file([os.path.join(enc_path, 'id')])
+                if enc_id in enclosures:
+                    enclosures[enc_id].update(os.path.basename(sg_path))
+                    continue
+
+                enclosure = Enclosure(enc_id, enc_path, sg_path)
+                enclosures[enc_id] = enclosure
+
+        return enclosures
+
+    @property
+    def enclosures(self) -> Dict[str, Dict[str, Any]]:
+        """Dump the enclosure objects as dicts"""
+        return {k: v._dump() for k, v in self._enclosures.items()}
+
+    @property
+    def enclosure_count(self) -> int:
+        """Return the number of enclosures detected"""
+        return len(self._enclosures.keys())
+
+    def _get_cpuinfo(self):
+        # type: () -> None
+        """Determine cpu information via /proc/cpuinfo"""
+        raw = read_file(['/proc/cpuinfo'])
+        output = raw.splitlines()
+        cpu_set = set()
+
+        for line in output:
+            field = [f.strip() for f in line.split(':')]
+            if 'model name' in line:
+                self.cpu_model = field[1]
+            if 'physical id' in line:
+                cpu_set.add(field[1])
+            if 'siblings' in line:
+                self.cpu_threads = int(field[1].strip())
+            if 'cpu cores' in line:
+                self.cpu_cores = int(field[1].strip())
+            pass
+        self.cpu_count = len(cpu_set)
+
+    def _get_block_devs(self):
+        # type: () -> List[str]
+        """Determine the list of block devices by looking at /sys/block"""
+        return [dev for dev in os.listdir('/sys/block')
+                if not dev.startswith(HostFacts._excluded_block_devices)]
+
+    @property
+    def operating_system(self):
+        # type: () -> str
+        """Determine OS version"""
+        raw_info = read_file(['/etc/os-release'])
+        os_release = raw_info.splitlines()
+        rel_str = 'Unknown'
+        rel_dict = dict()
+
+        for line in os_release:
+            if '=' in line:
+                var_name, var_value = line.split('=')
+                rel_dict[var_name] = var_value.strip('"')
+
+        # Would normally use PRETTY_NAME, but NAME and VERSION are more
+        # consistent
+        if all(_v in rel_dict for _v in ['NAME', 'VERSION']):
+            rel_str = '{} {}'.format(rel_dict['NAME'], rel_dict['VERSION'])
+        return rel_str
+
+    @property
+    def hostname(self):
+        # type: () -> str
+        """Return the hostname"""
+        return platform.node()
+
+    @property
+    def shortname(self) -> str:
+        return platform.node().split('.', 1)[0]
+
+    @property
+    def fqdn(self) -> str:
+        return get_fqdn()
+
+    @property
+    def subscribed(self):
+        # type: () -> str
+        """Highlevel check to see if the host is subscribed to receive updates/support"""
+        def _red_hat():
+            # type: () -> str
+            # RHEL 7 and RHEL 8
+            entitlements_dir = '/etc/pki/entitlement'
+            if os.path.exists(entitlements_dir):
+                pems = glob('{}/*.pem'.format(entitlements_dir))
+                if len(pems) >= 2:
+                    return 'Yes'
+
+            return 'No'
+
+        os_name = self.operating_system
+        if os_name.upper().startswith('RED HAT'):
+            return _red_hat()
+
+        return 'Unknown'
+
+    @property
+    def hdd_count(self):
+        # type: () -> int
+        """Return a count of HDDs (spinners)"""
+        return len(self.hdd_list)
+
+    def _get_capacity(self, dev):
+        # type: (str) -> int
+        """Determine the size of a given device
+
+        The kernel always bases device size calculations based on a 512 byte
+        sector. For more information see
+        https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/include/linux/types.h?h=v5.15.63#n120
+        """
+        size_path = os.path.join('/sys/block', dev, 'size')
+        size_blocks = int(read_file([size_path]))
+        return size_blocks * 512
+
+    def _get_capacity_by_type(self, disk_type='hdd'):
+        # type: (str) -> int
+        """Return the total capacity of a category of device (flash or hdd)"""
+        capacity: int = 0
+        for dev in self._device_list:
+            if dev['disk_type'] == disk_type:
+                disk_capacity = cast(int, dev.get('disk_size_bytes', 0))
+                capacity += disk_capacity
+        return capacity
+
+    def _get_device_info(self):
+        # type: () -> List[Dict[str, object]]
+        """Return a 'pretty' name list for each unique device in the `dev_list`"""
+        disk_list = list()
+
+        # serial_num_lookup is a dict of serial number -> List of devices with that serial number
+        serial_num_lookup: Dict[str, List[str]] = {}
+
+        # make a map of devname -> disk path. this path name may indicate the physical slot
+        # of a drive (phyXX)
+        disk_path_map: Dict[str, str] = {}
+        for path in glob('/dev/disk/by-path/*'):
+            tgt_raw = Path(path).resolve()
+            tgt = os.path.basename(str(tgt_raw))
+            disk_path_map[tgt] = path
+
+        # make a map of holder (dm-XX) -> full mpath name
+        dm_device_map: Dict[str, str] = {}
+        for mpath in glob('/dev/mapper/mpath*'):
+            tgt_raw = Path(mpath).resolve()
+            tgt = os.path.basename(str(tgt_raw))
+            dm_device_map[tgt] = mpath
+
+        # main loop to process all eligible block devices
+        for dev in self._block_devices:
+            enclosure_id = ''
+            enclosure_slot = ''
+            scsi_addr = ''
+            mpath = ''
+
+            disk_model = read_file(['/sys/block/{}/device/model'.format(dev)]).strip()
+            disk_rev = read_file(['/sys/block/{}/device/rev'.format(dev)]).strip()
+            disk_wwid = read_file(['/sys/block/{}/device/wwid'.format(dev)]).strip()
+            vendor = read_file(['/sys/block/{}/device/vendor'.format(dev)]).strip()
+            rotational = read_file(['/sys/block/{}/queue/rotational'.format(dev)])
+            holders_raw = glob('/sys/block/{}/holders/*'.format(dev))
+            if len(holders_raw) == 1:
+                # mpath will have 1 holder entry
+                holder = os.path.basename(holders_raw[0])
+                mpath = dm_device_map.get(holder, '')
+
+            disk_type = 'hdd' if rotational == '1' else 'flash'
+            scsi_addr_path = glob('/sys/block/{}/device/bsg/*'.format(dev))
+            if len(scsi_addr_path) == 1:
+                scsi_addr = os.path.basename(scsi_addr_path[0])
+
+            # vpd_pg80 isn't guaranteed (libvirt, vmware for example)
+            serial_raw = read_file(['/sys/block/{}/device/vpd_pg80'.format(dev)])
+            serial = (''.join(i for i in serial_raw if i in string.printable)).strip()
+            if serial.lower() == 'unknown':
+                serial = ''
+            else:
+                if serial in serial_num_lookup:
+                    serial_num_lookup[serial].append(dev)
+                else:
+                    serial_num_lookup[serial] = [dev]
+                for enc_id, enclosure in self._enclosures.items():
+                    if serial in enclosure.device_lookup.keys():
+                        enclosure_id = enc_id
+                        enclosure_slot = enclosure.device_lookup[serial]
+
+            disk_vendor = HostFacts._disk_vendor_workarounds.get(vendor, vendor)
+            disk_size_bytes = self._get_capacity(dev)
+            disk_list.append({
+                'description': '{} {} ({})'.format(disk_vendor, disk_model, bytes_to_human(disk_size_bytes)),
+                'vendor': disk_vendor,
+                'model': disk_model,
+                'rev': disk_rev,
+                'wwid': disk_wwid,
+                'dev_name': dev,
+                'disk_size_bytes': disk_size_bytes,
+                'disk_type': disk_type,
+                'serial': serial,
+                'alt_dev_name': '',
+                'scsi_addr': scsi_addr,
+                'enclosure_id': enclosure_id,
+                'enclosure_slot': enclosure_slot,
+                'path_id': disk_path_map.get(dev, ''),
+                'mpath': mpath,
+            })
+
+        # process the devices to drop duplicate physical devs based on matching
+        # the unique serial number
+        disk_list_unique: List[Dict[str, Any]] = []
+        serials_seen: List[str] = []
+        for dev in disk_list:
+            serial = str(dev['serial'])
+            if serial:
+                if serial in serials_seen:
+                    continue
+                else:
+                    serials_seen.append(serial)
+                    devs = serial_num_lookup[serial].copy()
+                    devs.remove(str(dev['dev_name']))
+                    dev['alt_dev_name'] = ','.join(devs)
+            disk_list_unique.append(dev)
+
+        return disk_list_unique
+
+    @property
+    def hdd_list(self):
+        # type: () -> List[Dict[str, object]]
+        """Return a list of devices that are HDDs (spinners)"""
+        return [dev for dev in self._device_list if dev['disk_type'] == 'hdd']
+
+    @property
+    def flash_list(self):
+        # type: () -> List[Dict[str, object]]
+        """Return a list of devices that are flash based (SSD, NVMe)"""
+        return [dev for dev in self._device_list if dev['disk_type'] == 'flash']
+
+    @property
+    def hdd_capacity_bytes(self):
+        # type: () -> int
+        """Return the total capacity for all HDD devices (bytes)"""
+        return self._get_capacity_by_type(disk_type='hdd')
+
+    @property
+    def hdd_capacity(self):
+        # type: () -> str
+        """Return the total capacity for all HDD devices (human readable format)"""
+        return bytes_to_human(self.hdd_capacity_bytes)
+
+    @property
+    def cpu_load(self):
+        # type: () -> Dict[str, float]
+        """Return the cpu load average data for the host"""
+        raw = read_file(['/proc/loadavg']).strip()
+        data = raw.split()
+        return {
+            '1min': float(data[0]),
+            '5min': float(data[1]),
+            '15min': float(data[2]),
+        }
+
+    @property
+    def flash_count(self):
+        # type: () -> int
+        """Return the number of flash devices in the system (SSD, NVMe)"""
+        return len(self.flash_list)
+
+    @property
+    def flash_capacity_bytes(self):
+        # type: () -> int
+        """Return the total capacity for all flash devices (bytes)"""
+        return self._get_capacity_by_type(disk_type='flash')
+
+    @property
+    def flash_capacity(self):
+        # type: () -> str
+        """Return the total capacity for all Flash devices (human readable format)"""
+        return bytes_to_human(self.flash_capacity_bytes)
+
+    def _process_nics(self):
+        # type: () -> None
+        """Look at the NIC devices and extract network related metadata"""
+        # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
+        hw_lookup = {
+            '1': 'ethernet',
+            '32': 'infiniband',
+            '772': 'loopback',
+        }
+
+        for nic_path in HostFacts._nic_path_list:
+            if not os.path.exists(nic_path):
+                continue
+            for iface in os.listdir(nic_path):
+
+                if os.path.exists(os.path.join(nic_path, iface, 'bridge')):
+                    nic_type = 'bridge'
+                elif os.path.exists(os.path.join(nic_path, iface, 'bonding')):
+                    nic_type = 'bonding'
+                else:
+                    nic_type = hw_lookup.get(read_file([os.path.join(nic_path, iface, 'type')]), 'Unknown')
+
+                if nic_type == 'loopback':  # skip loopback devices
+                    continue
+
+                lower_devs_list = [os.path.basename(link.replace('lower_', '')) for link in glob(os.path.join(nic_path, iface, 'lower_*'))]
+                upper_devs_list = [os.path.basename(link.replace('upper_', '')) for link in glob(os.path.join(nic_path, iface, 'upper_*'))]
+
+                try:
+                    mtu = int(read_file([os.path.join(nic_path, iface, 'mtu')]))
+                except ValueError:
+                    mtu = 0
+
+                operstate = read_file([os.path.join(nic_path, iface, 'operstate')])
+                try:
+                    speed = int(read_file([os.path.join(nic_path, iface, 'speed')]))
+                except (OSError, ValueError):
+                    # OSError : device doesn't support the ethtool get_link_ksettings
+                    # ValueError : raised when the read fails, and returns Unknown
+                    #
+                    # Either way, we show a -1 when speed isn't available
+                    speed = -1
+
+                dev_link = os.path.join(nic_path, iface, 'device')
+                if os.path.exists(dev_link):
+                    iftype = 'physical'
+                    driver_path = os.path.join(dev_link, 'driver')
+                    if os.path.exists(driver_path):
+                        driver = os.path.basename(os.path.realpath(driver_path))
+                    else:
+                        driver = 'Unknown'
+
+                else:
+                    iftype = 'logical'
+                    driver = ''
+
+                self.interfaces[iface] = {
+                    'mtu': mtu,
+                    'upper_devs_list': upper_devs_list,
+                    'lower_devs_list': lower_devs_list,
+                    'operstate': operstate,
+                    'iftype': iftype,
+                    'nic_type': nic_type,
+                    'driver': driver,
+                    'speed': speed,
+                    'ipv4_address': get_ipv4_address(iface),
+                    'ipv6_address': get_ipv6_address(iface),
+                }
+
+    @property
+    def nic_count(self):
+        # type: () -> int
+        """Return a total count of all physical NICs detected in the host"""
+        phys_devs = []
+        for iface in self.interfaces:
+            if self.interfaces[iface]['iftype'] == 'physical':
+                phys_devs.append(iface)
+        return len(phys_devs)
+
+    def _get_mem_data(self, field_name):
+        # type: (str) -> int
+        for line in self._meminfo:
+            if line.startswith(field_name):
+                _d = line.split()
+                return int(_d[1])
+        return 0
+
+    @property
+    def memory_total_kb(self):
+        # type: () -> int
+        """Determine the memory installed (kb)"""
+        return self._get_mem_data('MemTotal')
+
+    @property
+    def memory_free_kb(self):
+        # type: () -> int
+        """Determine the memory free (not cache, immediately usable)"""
+        return self._get_mem_data('MemFree')
+
+    @property
+    def memory_available_kb(self):
+        # type: () -> int
+        """Determine the memory available to new applications without swapping"""
+        return self._get_mem_data('MemAvailable')
+
+    @property
+    def vendor(self):
+        # type: () -> str
+        """Determine server vendor from DMI data in sysfs"""
+        return read_file(HostFacts._dmi_path_list, 'sys_vendor')
+
+    @property
+    def model(self):
+        # type: () -> str
+        """Determine server model information from DMI data in sysfs"""
+        family = read_file(HostFacts._dmi_path_list, 'product_family')
+        product = read_file(HostFacts._dmi_path_list, 'product_name')
+        if family == 'Unknown' and product:
+            return '{}'.format(product)
+
+        return '{} ({})'.format(family, product)
+
+    @property
+    def bios_version(self):
+        # type: () -> str
+        """Determine server BIOS version from  DMI data in sysfs"""
+        return read_file(HostFacts._dmi_path_list, 'bios_version')
+
+    @property
+    def bios_date(self):
+        # type: () -> str
+        """Determine server BIOS date from  DMI data in sysfs"""
+        return read_file(HostFacts._dmi_path_list, 'bios_date')
+
+    @property
+    def chassis_serial(self):
+        # type: () -> str
+        """Determine chassis serial number from DMI data in sysfs"""
+        return read_file(HostFacts._dmi_path_list, 'chassis_serial')
+
+    @property
+    def board_serial(self):
+        # type: () -> str
+        """Determine mainboard serial number from DMI data in sysfs"""
+        return read_file(HostFacts._dmi_path_list, 'board_serial')
+
+    @property
+    def product_serial(self):
+        # type: () -> str
+        """Determine server's serial number from DMI data in sysfs"""
+        return read_file(HostFacts._dmi_path_list, 'product_serial')
+
+    @property
+    def timestamp(self):
+        # type: () -> float
+        """Return the current time as Epoch seconds"""
+        return time.time()
+
+    @property
+    def system_uptime(self):
+        # type: () -> float
+        """Return the system uptime (in secs)"""
+        raw_time = read_file(['/proc/uptime'])
+        up_secs, _ = raw_time.split()
+        return float(up_secs)
+
+    @property
+    def kernel_security(self):
+        # type: () -> Dict[str, str]
+        """Determine the security features enabled in the kernel - SELinux, AppArmor"""
+        def _fetch_selinux() -> Dict[str, str]:
+            """Get the selinux status"""
+            security = {}
+            try:
+                out, err, code = call(self.ctx, ['sestatus'],
+                                      verbosity=CallVerbosity.QUIET)
+                security['type'] = 'SELinux'
+                status, mode, policy = '', '', ''
+                for line in out.split('\n'):
+                    if line.startswith('SELinux status:'):
+                        k, v = line.split(':')
+                        status = v.strip()
+                    elif line.startswith('Current mode:'):
+                        k, v = line.split(':')
+                        mode = v.strip()
+                    elif line.startswith('Loaded policy name:'):
+                        k, v = line.split(':')
+                        policy = v.strip()
+                if status == 'disabled':
+                    security['description'] = 'SELinux: Disabled'
+                else:
+                    security['description'] = 'SELinux: Enabled({}, {})'.format(mode, policy)
+            except Exception as e:
+                logger.info('unable to get selinux status: %s' % e)
+            return security
+
+        def _fetch_apparmor() -> Dict[str, str]:
+            """Read the apparmor profiles directly, returning an overview of AppArmor status"""
+            security = {}
+            for apparmor_path in HostFacts._apparmor_path_list:
+                if os.path.exists(apparmor_path):
+                    security['type'] = 'AppArmor'
+                    security['description'] = 'AppArmor: Enabled'
+                    try:
+                        profiles = read_file(['/sys/kernel/security/apparmor/profiles'])
+                        if len(profiles) == 0:
+                            return {}
+                    except OSError:
+                        pass
+                    else:
+                        summary = {}  # type: Dict[str, int]
+                        for line in profiles.split('\n'):
+                            item, mode = line.split(' ')
+                            mode = mode.strip('()')
+                            if mode in summary:
+                                summary[mode] += 1
+                            else:
+                                summary[mode] = 0
+                        summary_str = ','.join(['{} {}'.format(v, k) for k, v in summary.items()])
+                        security = {**security, **summary}  # type: ignore
+                        security['description'] += '({})'.format(summary_str)
+
+                    return security
+            return {}
+
+        ret = {}
+        if os.path.exists('/sys/kernel/security/lsm'):
+            lsm = read_file(['/sys/kernel/security/lsm']).strip()
+            if 'selinux' in lsm:
+                ret = _fetch_selinux()
+            elif 'apparmor' in lsm:
+                ret = _fetch_apparmor()
+            else:
+                return {
+                    'type': 'Unknown',
+                    'description': 'Linux Security Module framework is active, but is not using SELinux or AppArmor'
+                }
+
+        if ret:
+            return ret
+
+        return {
+            'type': 'None',
+            'description': 'Linux Security Module framework is not available'
+        }
+
+    @property
+    def selinux_enabled(self) -> bool:
+        return (self.kernel_security['type'] == 'SELinux') and \
+               (self.kernel_security['description'] != 'SELinux: Disabled')
+
+    @property
+    def kernel_parameters(self):
+        # type: () -> Dict[str, str]
+        """Get kernel parameters required/used in Ceph clusters"""
+
+        k_param = {}
+        out, _, _ = call_throws(self.ctx, ['sysctl', '-a'], verbosity=CallVerbosity.SILENT)
+        if out:
+            param_list = out.split('\n')
+            param_dict = {param.split(' = ')[0]: param.split(' = ')[-1] for param in param_list}
+
+            # return only desired parameters
+            if 'net.ipv4.ip_nonlocal_bind' in param_dict:
+                k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind']
+
+        return k_param
+
+    @staticmethod
+    def _process_net_data(tcp_file: str, protocol: str = 'tcp') -> List[int]:
+        listening_ports = []
+        # Connections state documentation
+        # tcp - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/net/tcp_states.h
+        # udp - uses 07 (TCP_CLOSE or UNCONN, since udp is stateless. test with netcat -ul <port>)
+        listening_state = {
+            'tcp': '0A',
+            'udp': '07'
+        }
+
+        if protocol not in listening_state.keys():
+            return []
+
+        if os.path.exists(tcp_file):
+            with open(tcp_file) as f:
+                tcp_data = f.readlines()[1:]
+
+            for con in tcp_data:
+                con_info = con.strip().split()
+                if con_info[3] == listening_state[protocol]:
+                    local_port = int(con_info[1].split(':')[1], 16)
+                    listening_ports.append(local_port)
+
+        return listening_ports
+
+    @property
+    def tcp_ports_used(self) -> List[int]:
+        return HostFacts._process_net_data('/proc/net/tcp')
+
+    @property
+    def tcp6_ports_used(self) -> List[int]:
+        return HostFacts._process_net_data('/proc/net/tcp6')
+
+    @property
+    def udp_ports_used(self) -> List[int]:
+        return HostFacts._process_net_data('/proc/net/udp', 'udp')
+
+    @property
+    def udp6_ports_used(self) -> List[int]:
+        return HostFacts._process_net_data('/proc/net/udp6', 'udp')
+
+    def dump(self):
+        # type: () -> str
+        """Return the attributes of this HostFacts object as json"""
+        data = {
+            k: getattr(self, k) for k in dir(self)
+            if not k.startswith('_')
+            and isinstance(getattr(self, k), (float, int, str, list, dict, tuple))
+        }
+        return json.dumps(data, indent=2, sort_keys=True)
+
+##################################
+
+
+def command_gather_facts(ctx: CephadmContext) -> None:
+    """gather_facts is intended to provide host related metadata to the caller"""
+    host = HostFacts(ctx)
+    print(host.dump())
+
+
+##################################
+
+
+def systemd_target_state(ctx: CephadmContext, target_name: str, subsystem: str = 'ceph') -> bool:
+    # TODO: UNITTEST
+    return os.path.exists(
+        os.path.join(
+            ctx.unit_dir,
+            f'{subsystem}.target.wants',
+            target_name
+        )
+    )
+
+
+def target_exists(ctx: CephadmContext) -> bool:
+    return os.path.exists(ctx.unit_dir + '/ceph.target')
+
+
+@infer_fsid
+def command_maintenance(ctx: CephadmContext) -> str:
+    if not ctx.fsid:
+        raise Error('failed - must pass --fsid to specify cluster')
+
+    target = f'ceph-{ctx.fsid}.target'
+
+    if ctx.maintenance_action.lower() == 'enter':
+        logger.info('Requested to place host into maintenance')
+        if systemd_target_state(ctx, target):
+            _out, _err, code = call(ctx,
+                                    ['systemctl', 'disable', target],
+                                    verbosity=CallVerbosity.DEBUG)
+            if code:
+                logger.error(f'Failed to disable the {target} target')
+                return 'failed - to disable the target'
+            else:
+                # stopping a target waits by default
+                _out, _err, code = call(ctx,
+                                        ['systemctl', 'stop', target],
+                                        verbosity=CallVerbosity.DEBUG)
+                if code:
+                    logger.error(f'Failed to stop the {target} target')
+                    return 'failed - to disable the target'
+                else:
+                    return f'success - systemd target {target} disabled'
+
+        else:
+            return 'skipped - target already disabled'
+
+    else:
+        logger.info('Requested to exit maintenance state')
+        # if we've never deployed a daemon on this host there will be no systemd
+        # target to disable so attempting a disable will fail. We still need to
+        # return success here or host will be permanently stuck in maintenance mode
+        # as no daemons can be deployed so no systemd target will ever exist to disable.
+        if not target_exists(ctx):
+            return 'skipped - systemd target not present on this host. Host removed from maintenance mode.'
+        # exit maintenance request
+        if not systemd_target_state(ctx, target):
+            _out, _err, code = call(ctx,
+                                    ['systemctl', 'enable', target],
+                                    verbosity=CallVerbosity.DEBUG)
+            if code:
+                logger.error(f'Failed to enable the {target} target')
+                return 'failed - unable to enable the target'
+            else:
+                # starting a target waits by default
+                _out, _err, code = call(ctx,
+                                        ['systemctl', 'start', target],
+                                        verbosity=CallVerbosity.DEBUG)
+                if code:
+                    logger.error(f'Failed to start the {target} target')
+                    return 'failed - unable to start the target'
+                else:
+                    return f'success - systemd target {target} enabled and started'
+        return f'success - systemd target {target} enabled and started'
+
+##################################
+
+
+class ArgumentFacade:
+    def __init__(self) -> None:
+        self.defaults: Dict[str, Any] = {}
+
+    def add_argument(self, *args: Any, **kwargs: Any) -> None:
+        if not args:
+            raise ValueError('expected at least one argument')
+        name = args[0]
+        if not name.startswith('--'):
+            raise ValueError(f'expected long option, got: {name!r}')
+        name = name[2:].replace('-', '_')
+        value = kwargs.pop('default', None)
+        self.defaults[name] = value
+
+    def apply(self, ctx: CephadmContext) -> None:
+        for key, value in self.defaults.items():
+            setattr(ctx, key, value)
+
+
+def _add_deploy_parser_args(
+    parser_deploy: Union[argparse.ArgumentParser, ArgumentFacade],
+) -> None:
+    parser_deploy.add_argument(
+        '--config', '-c',
+        help='config file for new daemon')
+    parser_deploy.add_argument(
+        '--config-json',
+        help='Additional configuration information in JSON format')
+    parser_deploy.add_argument(
+        '--keyring',
+        help='keyring for new daemon')
+    parser_deploy.add_argument(
+        '--key',
+        help='key for new daemon')
+    parser_deploy.add_argument(
+        '--osd-fsid',
+        help='OSD uuid, if creating an OSD container')
+    parser_deploy.add_argument(
+        '--skip-firewalld',
+        action='store_true',
+        help='Do not configure firewalld')
+    parser_deploy.add_argument(
+        '--tcp-ports',
+        help='List of tcp ports to open in the host firewall')
+    parser_deploy.add_argument(
+        '--port-ips',
+        help='JSON dict mapping ports to IPs they need to be bound on'
+    )
+    parser_deploy.add_argument(
+        '--reconfig',
+        action='store_true',
+        help='Reconfigure a previously deployed daemon')
+    parser_deploy.add_argument(
+        '--allow-ptrace',
+        action='store_true',
+        help='Allow SYS_PTRACE on daemon container')
+    parser_deploy.add_argument(
+        '--container-init',
+        action='store_true',
+        default=CONTAINER_INIT,
+        help=argparse.SUPPRESS)
+    parser_deploy.add_argument(
+        '--memory-request',
+        help='Container memory request/target'
+    )
+    parser_deploy.add_argument(
+        '--memory-limit',
+        help='Container memory hard limit'
+    )
+    parser_deploy.add_argument(
+        '--meta-json',
+        help='JSON dict of additional metadata'
+    )
+    parser_deploy.add_argument(
+        '--extra-container-args',
+        action='append',
+        default=[],
+        help='Additional container arguments to apply to daemon'
+    )
+    parser_deploy.add_argument(
+        '--extra-entrypoint-args',
+        action='append',
+        default=[],
+        help='Additional entrypoint arguments to apply to deamon'
+    )
+
+
+def _get_parser():
+    # type: () -> argparse.ArgumentParser
+    parser = argparse.ArgumentParser(
+        description='Bootstrap Ceph daemons with systemd and containers.',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument(
+        '--image',
+        help='container image. Can also be set via the "CEPHADM_IMAGE" '
+        'env var')
+    parser.add_argument(
+        '--docker',
+        action='store_true',
+        help='use docker instead of podman')
+    parser.add_argument(
+        '--data-dir',
+        default=DATA_DIR,
+        help='base directory for daemon data')
+    parser.add_argument(
+        '--log-dir',
+        default=LOG_DIR,
+        help='base directory for daemon logs')
+    parser.add_argument(
+        '--logrotate-dir',
+        default=LOGROTATE_DIR,
+        help='location of logrotate configuration files')
+    parser.add_argument(
+        '--sysctl-dir',
+        default=SYSCTL_DIR,
+        help='location of sysctl configuration files')
+    parser.add_argument(
+        '--unit-dir',
+        default=UNIT_DIR,
+        help='base directory for systemd units')
+    parser.add_argument(
+        '--verbose', '-v',
+        action='store_true',
+        help='Show debug-level log messages')
+    parser.add_argument(
+        '--timeout',
+        type=int,
+        default=DEFAULT_TIMEOUT,
+        help='timeout in seconds')
+    parser.add_argument(
+        '--retry',
+        type=int,
+        default=DEFAULT_RETRY,
+        help='max number of retries')
+    parser.add_argument(
+        '--env', '-e',
+        action='append',
+        default=[],
+        help='set environment variable')
+    parser.add_argument(
+        '--no-container-init',
+        action='store_true',
+        default=not CONTAINER_INIT,
+        help='Do not run podman/docker with `--init`')
+    parser.add_argument(
+        '--no-cgroups-split',
+        action='store_true',
+        default=False,
+        help='Do not run containers with --cgroups=split (currently only relevant when using podman)')
+
+    subparsers = parser.add_subparsers(help='sub-command')
+
+    parser_version = subparsers.add_parser(
+        'version', help='get cephadm version')
+    parser_version.set_defaults(func=command_version)
+
+    parser_pull = subparsers.add_parser(
+        'pull', help='pull the default container image')
+    parser_pull.set_defaults(func=command_pull)
+    parser_pull.add_argument(
+        '--insecure',
+        action='store_true',
+        help=argparse.SUPPRESS,
+    )
+
+    parser_inspect_image = subparsers.add_parser(
+        'inspect-image', help='inspect local container image')
+    parser_inspect_image.set_defaults(func=command_inspect_image)
+
+    parser_ls = subparsers.add_parser(
+        'ls', help='list daemon instances on this host')
+    parser_ls.set_defaults(func=command_ls)
+    parser_ls.add_argument(
+        '--no-detail',
+        action='store_true',
+        help='Do not include daemon status')
+    parser_ls.add_argument(
+        '--legacy-dir',
+        default='/',
+        help='base directory for legacy daemon data')
+
+    parser_list_networks = subparsers.add_parser(
+        'list-networks', help='list IP networks')
+    parser_list_networks.set_defaults(func=command_list_networks)
+
+    parser_adopt = subparsers.add_parser(
+        'adopt', help='adopt daemon deployed with a different tool')
+    parser_adopt.set_defaults(func=command_adopt)
+    parser_adopt.add_argument(
+        '--name', '-n',
+        required=True,
+        help='daemon name (type.id)')
+    parser_adopt.add_argument(
+        '--style',
+        required=True,
+        help='deployment style (legacy, ...)')
+    parser_adopt.add_argument(
+        '--cluster',
+        default='ceph',
+        help='cluster name')
+    parser_adopt.add_argument(
+        '--legacy-dir',
+        default='/',
+        help='base directory for legacy daemon data')
+    parser_adopt.add_argument(
+        '--config-json',
+        help='Additional configuration information in JSON format')
+    parser_adopt.add_argument(
+        '--skip-firewalld',
+        action='store_true',
+        help='Do not configure firewalld')
+    parser_adopt.add_argument(
+        '--skip-pull',
+        action='store_true',
+        help='do not pull the default image before adopting')
+    parser_adopt.add_argument(
+        '--force-start',
+        action='store_true',
+        help='start newly adopted daemon, even if it was not running previously')
+    parser_adopt.add_argument(
+        '--container-init',
+        action='store_true',
+        default=CONTAINER_INIT,
+        help=argparse.SUPPRESS)
+
+    parser_rm_daemon = subparsers.add_parser(
+        'rm-daemon', help='remove daemon instance')
+    parser_rm_daemon.set_defaults(func=command_rm_daemon)
+    parser_rm_daemon.add_argument(
+        '--name', '-n',
+        required=True,
+        action=CustomValidation,
+        help='daemon name (type.id)')
+    parser_rm_daemon.add_argument(
+        '--tcp-ports',
+        help='List of tcp ports to close in the host firewall')
+    parser_rm_daemon.add_argument(
+        '--fsid',
+        required=True,
+        help='cluster FSID')
+    parser_rm_daemon.add_argument(
+        '--force',
+        action='store_true',
+        help='proceed, even though this may destroy valuable data')
+    parser_rm_daemon.add_argument(
+        '--force-delete-data',
+        action='store_true',
+        help='delete valuable daemon data instead of making a backup')
+
+    parser_rm_cluster = subparsers.add_parser(
+        'rm-cluster', help='remove all daemons for a cluster')
+    parser_rm_cluster.set_defaults(func=command_rm_cluster)
+    parser_rm_cluster.add_argument(
+        '--fsid',
+        required=True,
+        help='cluster FSID')
+    parser_rm_cluster.add_argument(
+        '--force',
+        action='store_true',
+        help='proceed, even though this may destroy valuable data')
+    parser_rm_cluster.add_argument(
+        '--keep-logs',
+        action='store_true',
+        help='do not remove log files')
+    parser_rm_cluster.add_argument(
+        '--zap-osds',
+        action='store_true',
+        help='zap OSD devices for this cluster')
+
+    parser_run = subparsers.add_parser(
+        'run', help='run a ceph daemon, in a container, in the foreground')
+    parser_run.set_defaults(func=command_run)
+    parser_run.add_argument(
+        '--name', '-n',
+        required=True,
+        help='daemon name (type.id)')
+    parser_run.add_argument(
+        '--fsid',
+        required=True,
+        help='cluster FSID')
+
+    parser_shell = subparsers.add_parser(
+        'shell', help='run an interactive shell inside a daemon container')
+    parser_shell.set_defaults(func=command_shell)
+    parser_shell.add_argument(
+        '--shared_ceph_folder',
+        metavar='CEPH_SOURCE_FOLDER',
+        help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
+    parser_shell.add_argument(
+        '--fsid',
+        help='cluster FSID')
+    parser_shell.add_argument(
+        '--name', '-n',
+        help='daemon name (type.id)')
+    parser_shell.add_argument(
+        '--config', '-c',
+        help='ceph.conf to pass through to the container')
+    parser_shell.add_argument(
+        '--keyring', '-k',
+        help='ceph.keyring to pass through to the container')
+    parser_shell.add_argument(
+        '--mount', '-m',
+        help=('mount a file or directory in the container. '
+              'Support multiple mounts. '
+              'ie: `--mount /foo /bar:/bar`. '
+              'When no destination is passed, default is /mnt'),
+        nargs='+')
+    parser_shell.add_argument(
+        '--env', '-e',
+        action='append',
+        default=[],
+        help='set environment variable')
+    parser_shell.add_argument(
+        '--volume', '-v',
+        action='append',
+        default=[],
+        help='set environment variable')
+    parser_shell.add_argument(
+        'command', nargs=argparse.REMAINDER,
+        help='command (optional)')
+    parser_shell.add_argument(
+        '--no-hosts',
+        action='store_true',
+        help='dont pass /etc/hosts through to the container')
+    parser_shell.add_argument(
+        '--dry-run',
+        action='store_true',
+        help='print, but do not execute, the container command to start the shell')
+
+    parser_enter = subparsers.add_parser(
+        'enter', help='run an interactive shell inside a running daemon container')
+    parser_enter.set_defaults(func=command_enter)
+    parser_enter.add_argument(
+        '--fsid',
+        help='cluster FSID')
+    parser_enter.add_argument(
+        '--name', '-n',
+        required=True,
+        help='daemon name (type.id)')
+    parser_enter.add_argument(
+        'command', nargs=argparse.REMAINDER,
+        help='command')
+
+    parser_ceph_volume = subparsers.add_parser(
+        'ceph-volume', help='run ceph-volume inside a container')
+    parser_ceph_volume.set_defaults(func=command_ceph_volume)
+    parser_ceph_volume.add_argument(
+        '--shared_ceph_folder',
+        metavar='CEPH_SOURCE_FOLDER',
+        help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
+    parser_ceph_volume.add_argument(
+        '--fsid',
+        help='cluster FSID')
+    parser_ceph_volume.add_argument(
+        '--config-json',
+        help='JSON file with config and (client.bootstrap-osd) key')
+    parser_ceph_volume.add_argument(
+        '--config', '-c',
+        help='ceph conf file')
+    parser_ceph_volume.add_argument(
+        '--keyring', '-k',
+        help='ceph.keyring to pass through to the container')
+    parser_ceph_volume.add_argument(
+        'command', nargs=argparse.REMAINDER,
+        help='command')
+
+    parser_zap_osds = subparsers.add_parser(
+        'zap-osds', help='zap all OSDs associated with a particular fsid')
+    parser_zap_osds.set_defaults(func=command_zap_osds)
+    parser_zap_osds.add_argument(
+        '--fsid',
+        required=True,
+        help='cluster FSID')
+    parser_zap_osds.add_argument(
+        '--force',
+        action='store_true',
+        help='proceed, even though this may destroy valuable data')
+
+    parser_unit = subparsers.add_parser(
+        'unit', help="operate on the daemon's systemd unit")
+    parser_unit.set_defaults(func=command_unit)
+    parser_unit.add_argument(
+        'command',
+        help='systemd command (start, stop, restart, enable, disable, ...)')
+    parser_unit.add_argument(
+        '--fsid',
+        help='cluster FSID')
+    parser_unit.add_argument(
+        '--name', '-n',
+        required=True,
+        help='daemon name (type.id)')
+
+    parser_logs = subparsers.add_parser(
+        'logs', help='print journald logs for a daemon container')
+    parser_logs.set_defaults(func=command_logs)
+    parser_logs.add_argument(
+        '--fsid',
+        help='cluster FSID')
+    parser_logs.add_argument(
+        '--name', '-n',
+        required=True,
+        help='daemon name (type.id)')
+    parser_logs.add_argument(
+        'command', nargs='*',
+        help='additional journalctl args')
+
+    parser_bootstrap = subparsers.add_parser(
+        'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
+    parser_bootstrap.set_defaults(func=command_bootstrap)
+    parser_bootstrap.add_argument(
+        '--config', '-c',
+        help='ceph conf file to incorporate')
+    parser_bootstrap.add_argument(
+        '--mon-id',
+        required=False,
+        help='mon id (default: local hostname)')
+    group = parser_bootstrap.add_mutually_exclusive_group()
+    group.add_argument(
+        '--mon-addrv',
+        help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
+    group.add_argument(
+        '--mon-ip',
+        help='mon IP')
+    parser_bootstrap.add_argument(
+        '--mgr-id',
+        required=False,
+        help='mgr id (default: randomly generated)')
+    parser_bootstrap.add_argument(
+        '--fsid',
+        help='cluster FSID')
+    parser_bootstrap.add_argument(
+        '--output-dir',
+        default='/etc/ceph',
+        help='directory to write config, keyring, and pub key files')
+    parser_bootstrap.add_argument(
+        '--output-keyring',
+        help='location to write keyring file with new cluster admin and mon keys')
+    parser_bootstrap.add_argument(
+        '--output-config',
+        help='location to write conf file to connect to new cluster')
+    parser_bootstrap.add_argument(
+        '--output-pub-ssh-key',
+        help="location to write the cluster's public SSH key")
+    parser_bootstrap.add_argument(
+        '--skip-admin-label',
+        action='store_true',
+        help='do not create admin label for ceph.conf and client.admin keyring distribution')
+    parser_bootstrap.add_argument(
+        '--skip-ssh',
+        action='store_true',
+        help='skip setup of ssh key on local host')
+    parser_bootstrap.add_argument(
+        '--initial-dashboard-user',
+        default='admin',
+        help='Initial user for the dashboard')
+    parser_bootstrap.add_argument(
+        '--initial-dashboard-password',
+        help='Initial password for the initial dashboard user')
+    parser_bootstrap.add_argument(
+        '--ssl-dashboard-port',
+        type=int,
+        default=8443,
+        help='Port number used to connect with dashboard using SSL')
+    parser_bootstrap.add_argument(
+        '--dashboard-key',
+        type=argparse.FileType('r'),
+        help='Dashboard key')
+    parser_bootstrap.add_argument(
+        '--dashboard-crt',
+        type=argparse.FileType('r'),
+        help='Dashboard certificate')
+
+    parser_bootstrap.add_argument(
+        '--ssh-config',
+        type=argparse.FileType('r'),
+        help='SSH config')
+    parser_bootstrap.add_argument(
+        '--ssh-private-key',
+        type=argparse.FileType('r'),
+        help='SSH private key')
+    parser_bootstrap.add_argument(
+        '--ssh-public-key',
+        type=argparse.FileType('r'),
+        help='SSH public key')
+    parser_bootstrap.add_argument(
+        '--ssh-signed-cert',
+        type=argparse.FileType('r'),
+        help='Signed cert for setups using CA signed SSH keys')
+    parser_bootstrap.add_argument(
+        '--ssh-user',
+        default='root',
+        help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users')
+    parser_bootstrap.add_argument(
+        '--skip-mon-network',
+        action='store_true',
+        help='set mon public_network based on bootstrap mon ip')
+    parser_bootstrap.add_argument(
+        '--skip-dashboard',
+        action='store_true',
+        help='do not enable the Ceph Dashboard')
+    parser_bootstrap.add_argument(
+        '--dashboard-password-noupdate',
+        action='store_true',
+        help='stop forced dashboard password change')
+    parser_bootstrap.add_argument(
+        '--no-minimize-config',
+        action='store_true',
+        help='do not assimilate and minimize the config file')
+    parser_bootstrap.add_argument(
+        '--skip-ping-check',
+        action='store_true',
+        help='do not verify that mon IP is pingable')
+    parser_bootstrap.add_argument(
+        '--skip-pull',
+        action='store_true',
+        help='do not pull the default image before bootstrapping')
+    parser_bootstrap.add_argument(
+        '--skip-firewalld',
+        action='store_true',
+        help='Do not configure firewalld')
+    parser_bootstrap.add_argument(
+        '--allow-overwrite',
+        action='store_true',
+        help='allow overwrite of existing --output-* config/keyring/ssh files')
+    parser_bootstrap.add_argument(
+        '--cleanup-on-failure',
+        action='store_true',
+        default=False,
+        help='Delete cluster files in case of a failed installation')
+    parser_bootstrap.add_argument(
+        '--allow-fqdn-hostname',
+        action='store_true',
+        help='allow hostname that is fully-qualified (contains ".")')
+    parser_bootstrap.add_argument(
+        '--allow-mismatched-release',
+        action='store_true',
+        help="allow bootstrap of ceph that doesn't match this version of cephadm")
+    parser_bootstrap.add_argument(
+        '--skip-prepare-host',
+        action='store_true',
+        help='Do not prepare host')
+    parser_bootstrap.add_argument(
+        '--orphan-initial-daemons',
+        action='store_true',
+        help='Set mon and mgr service to `unmanaged`, Do not create the crash service')
+    parser_bootstrap.add_argument(
+        '--skip-monitoring-stack',
+        action='store_true',
+        help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
+    parser_bootstrap.add_argument(
+        '--with-centralized-logging',
+        action='store_true',
+        help='Automatically provision centralized logging (promtail, loki)')
+    parser_bootstrap.add_argument(
+        '--apply-spec',
+        help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
+    parser_bootstrap.add_argument(
+        '--shared_ceph_folder',
+        metavar='CEPH_SOURCE_FOLDER',
+        help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
+
+    parser_bootstrap.add_argument(
+        '--registry-url',
+        help='url for custom registry')
+    parser_bootstrap.add_argument(
+        '--registry-username',
+        help='username for custom registry')
+    parser_bootstrap.add_argument(
+        '--registry-password',
+        help='password for custom registry')
+    parser_bootstrap.add_argument(
+        '--registry-json',
+        help='json file with custom registry login info (URL, Username, Password)')
+    parser_bootstrap.add_argument(
+        '--container-init',
+        action='store_true',
+        default=CONTAINER_INIT,
+        help=argparse.SUPPRESS)
+    parser_bootstrap.add_argument(
+        '--cluster-network',
+        help='subnet to use for cluster replication, recovery and heartbeats (in CIDR notation network/mask)')
+    parser_bootstrap.add_argument(
+        '--single-host-defaults',
+        action='store_true',
+        help='adjust configuration defaults to suit a single-host cluster')
+    parser_bootstrap.add_argument(
+        '--log-to-file',
+        action='store_true',
+        help='configure cluster to log to traditional log files in /var/log/ceph/$fsid')
+
+    parser_deploy = subparsers.add_parser(
+        'deploy', help='deploy a daemon')
+    parser_deploy.set_defaults(func=command_deploy)
+    parser_deploy.add_argument(
+        '--name',
+        required=True,
+        action=CustomValidation,
+        help='daemon name (type.id)')
+    parser_deploy.add_argument(
+        '--fsid',
+        required=True,
+        help='cluster FSID')
+    _add_deploy_parser_args(parser_deploy)
+
+    parser_orch = subparsers.add_parser(
+        '_orch',
+    )
+    subparsers_orch = parser_orch.add_subparsers(
+        title='Orchestrator Driven Commands',
+        description='Commands that are typically only run by cephadm mgr module',
+    )
+
+    parser_deploy_from = subparsers_orch.add_parser(
+        'deploy', help='deploy a daemon')
+    parser_deploy_from.set_defaults(func=command_deploy_from)
+    # currently cephadm mgr module passes an fsid option on the CLI too
+    # TODO: remove this and always source fsid from the JSON?
+    parser_deploy_from.add_argument(
+        '--fsid',
+        help='cluster FSID')
+    parser_deploy_from.add_argument(
+        'source',
+        default='-',
+        nargs='?',
+        help='Configuration input source file',
+    )
+
+    parser_check_host = subparsers.add_parser(
+        'check-host', help='check host configuration')
+    parser_check_host.set_defaults(func=command_check_host)
+    parser_check_host.add_argument(
+        '--expect-hostname',
+        help='Check that hostname matches an expected value')
+
+    parser_prepare_host = subparsers.add_parser(
+        'prepare-host', help='prepare a host for cephadm use')
+    parser_prepare_host.set_defaults(func=command_prepare_host)
+    parser_prepare_host.add_argument(
+        '--expect-hostname',
+        help='Set hostname')
+
+    parser_add_repo = subparsers.add_parser(
+        'add-repo', help='configure package repository')
+    parser_add_repo.set_defaults(func=command_add_repo)
+    parser_add_repo.add_argument(
+        '--release',
+        help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE))
+    parser_add_repo.add_argument(
+        '--version',
+        help='use specific upstream version (x.y.z)')
+    parser_add_repo.add_argument(
+        '--dev',
+        help='use specified bleeding edge build from git branch or tag')
+    parser_add_repo.add_argument(
+        '--dev-commit',
+        help='use specified bleeding edge build from git commit')
+    parser_add_repo.add_argument(
+        '--gpg-url',
+        help='specify alternative GPG key location')
+    parser_add_repo.add_argument(
+        '--repo-url',
+        default='https://download.ceph.com',
+        help='specify alternative repo location')
+    # TODO: proxy?
+
+    parser_rm_repo = subparsers.add_parser(
+        'rm-repo', help='remove package repository configuration')
+    parser_rm_repo.set_defaults(func=command_rm_repo)
+
+    parser_install = subparsers.add_parser(
+        'install', help='install ceph package(s)')
+    parser_install.set_defaults(func=command_install)
+    parser_install.add_argument(
+        'packages', nargs='*',
+        default=['cephadm'],
+        help='packages')
+
+    parser_registry_login = subparsers.add_parser(
+        'registry-login', help='log host into authenticated registry')
+    parser_registry_login.set_defaults(func=command_registry_login)
+    parser_registry_login.add_argument(
+        '--registry-url',
+        help='url for custom registry')
+    parser_registry_login.add_argument(
+        '--registry-username',
+        help='username for custom registry')
+    parser_registry_login.add_argument(
+        '--registry-password',
+        help='password for custom registry')
+    parser_registry_login.add_argument(
+        '--registry-json',
+        help='json file with custom registry login info (URL, Username, Password)')
+    parser_registry_login.add_argument(
+        '--fsid',
+        help='cluster FSID')
+
+    parser_gather_facts = subparsers.add_parser(
+        'gather-facts', help='gather and return host related information (JSON format)')
+    parser_gather_facts.set_defaults(func=command_gather_facts)
+
+    parser_maintenance = subparsers.add_parser(
+        'host-maintenance', help='Manage the maintenance state of a host')
+    parser_maintenance.add_argument(
+        '--fsid',
+        help='cluster FSID')
+    parser_maintenance.add_argument(
+        'maintenance_action',
+        type=str,
+        choices=['enter', 'exit'],
+        help='Maintenance action - enter maintenance, or exit maintenance')
+    parser_maintenance.set_defaults(func=command_maintenance)
+
+    parser_agent = subparsers.add_parser(
+        'agent', help='start cephadm agent')
+    parser_agent.set_defaults(func=command_agent)
+    parser_agent.add_argument(
+        '--fsid',
+        required=True,
+        help='cluster FSID')
+    parser_agent.add_argument(
+        '--daemon-id',
+        help='daemon id for agent')
+
+    parser_disk_rescan = subparsers.add_parser(
+        'disk-rescan', help='rescan all HBAs to detect new/removed devices')
+    parser_disk_rescan.set_defaults(func=command_rescan_disks)
+
+    return parser
+
+
+def _parse_args(av: List[str]) -> argparse.Namespace:
+    parser = _get_parser()
+
+    args = parser.parse_args(av)
+    if 'command' in args and args.command and args.command[0] == '--':
+        args.command.pop(0)
+
+    # workaround argparse to deprecate the subparser `--container-init` flag
+    # container_init and no_container_init must always be mutually exclusive
+    container_init_args = ('--container-init', '--no-container-init')
+    if set(container_init_args).issubset(av):
+        parser.error('argument %s: not allowed with argument %s' % (container_init_args))
+    elif '--container-init' in av:
+        args.no_container_init = not args.container_init
+    else:
+        args.container_init = not args.no_container_init
+    assert args.container_init is not args.no_container_init
+
+    return args
+
+
+def cephadm_init_ctx(args: List[str]) -> CephadmContext:
+    ctx = CephadmContext()
+    ctx.set_args(_parse_args(args))
+    return ctx
+
+
+def cephadm_init_logging(ctx: CephadmContext, args: List[str]) -> None:
+    """Configure the logging for cephadm as well as updating the system
+    to have the expected log dir and logrotate configuration.
+    """
+    logging.addLevelName(QUIET_LOG_LEVEL, 'QUIET')
+    global logger
+    if not os.path.exists(LOG_DIR):
+        os.makedirs(LOG_DIR)
+    operations = ['bootstrap', 'rm-cluster']
+    if any(op in args for op in operations):
+        dictConfig(interactive_logging_config)
+    else:
+        dictConfig(logging_config)
+
+    logger = logging.getLogger()
+    logger.setLevel(QUIET_LOG_LEVEL)
+
+    if not os.path.exists(ctx.logrotate_dir + '/cephadm'):
+        with open(ctx.logrotate_dir + '/cephadm', 'w') as f:
+            f.write("""# created by cephadm
+/var/log/ceph/cephadm.log {
+    rotate 7
+    daily
+    compress
+    missingok
+    notifempty
+    su root root
+}
+""")
+
+    if ctx.verbose:
+        for handler in logger.handlers:
+            if handler.name in ['console', 'log_file', 'console_stdout']:
+                handler.setLevel(QUIET_LOG_LEVEL)
+    logger.debug('%s\ncephadm %s' % ('-' * 80, args))
+
+
+def cephadm_require_root() -> None:
+    """Exit if the process is not running as root."""
+    if os.geteuid() != 0:
+        sys.stderr.write('ERROR: cephadm should be run as root\n')
+        sys.exit(1)
+
+
+def main() -> None:
+    av: List[str] = []
+    av = sys.argv[1:]
+
+    ctx = cephadm_init_ctx(av)
+    if not ctx.has_function():
+        sys.stderr.write('No command specified; pass -h or --help for usage\n')
+        sys.exit(1)
+
+    if ctx.has_function() and getattr(ctx.func, '_execute_early', False):
+        try:
+            sys.exit(ctx.func(ctx))
+        except Error as e:
+            if ctx.verbose:
+                raise
+            logger.error('ERROR: %s' % e)
+            sys.exit(1)
+
+    cephadm_require_root()
+    cephadm_init_logging(ctx, av)
+    try:
+        # podman or docker?
+        ctx.container_engine = find_container_engine(ctx)
+        if ctx.func not in \
+                [
+                    command_check_host,
+                    command_prepare_host,
+                    command_add_repo,
+                    command_rm_repo,
+                    command_install
+                ]:
+            check_container_engine(ctx)
+        # command handler
+        r = ctx.func(ctx)
+    except (Error, ClusterAlreadyExists) as e:
+        if ctx.verbose:
+            raise
+        logger.error('ERROR: %s' % e)
+        sys.exit(1)
+    if not r:
+        r = 0
+    sys.exit(r)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/cephadm/containers/keepalived/Dockerfile b/src/cephadm/containers/keepalived/Dockerfile
new file mode 100644
index 000000000..ac305f72b
--- /dev/null
+++ b/src/cephadm/containers/keepalived/Dockerfile
@@ -0,0 +1,24 @@
+FROM registry.access.redhat.com/ubi8/ubi-minimal:latest
+
+RUN microdnf install --nodocs \
+    bash       \
+    curl       \
+    iproute    \
+    keepalived-2.1.5 \
+ && rm /etc/keepalived/keepalived.conf && microdnf clean all
+
+COPY /skel /
+
+RUN chmod +x init.sh
+
+CMD ["./init.sh"]
+
+# Build specific labels
+LABEL maintainer="Guillaume Abrioux <gabrioux@redhat.com>"
+LABEL com.redhat.component="keepalived-container"
+LABEL version=2.1.5
+LABEL name="keepalived"
+LABEL description="keepalived for Ceph"
+LABEL summary="Provides keepalived on RHEL 8 for Ceph."
+LABEL io.k8s.display-name="Keepalived on RHEL 8"
+LABEL io.openshift.tags="Ceph keepalived"
diff --git a/src/cephadm/containers/keepalived/LICENSE b/src/cephadm/containers/keepalived/LICENSE
new file mode 100644
index 000000000..74b10b143
--- /dev/null
+++ b/src/cephadm/containers/keepalived/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017 University of Michigan
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/src/cephadm/containers/keepalived/README.md b/src/cephadm/containers/keepalived/README.md
new file mode 100644
index 000000000..bd7b605ac
--- /dev/null
+++ b/src/cephadm/containers/keepalived/README.md
@@ -0,0 +1,233 @@
+# quay.io/ceph/keepalived
+
+A small [ubi8-minimal](https://catalog.redhat.com/software/containers/registry/registry.access.redhat.com/repository/ubi8/ubi-minimal) based Docker container that provides a method of IP high availability via [keepalived](http://www.keepalived.org/) (VRRP failover), and optional Kubernetes API Server monitoring. If allowed to auto configure (default behaviour) it will automatically generate a unicast based failover configuration with a minimal amount of user supplied information.
+
+For specific information on Keepalived, please see the man page on [keepalived.conf](http://linux.die.net/man/5/keepalived.conf) or the [Keepalived User Guide](http://www.keepalived.org/pdf/UserGuide.pdf).
+
+
+## Index
+- [quay.io/ceph/keepalived](#cephkeepalived)
+  - [Index](#index)
+  - [Prerequisites](#prerequisites)
+  - [Configuration](#configuration)
+    - [Execution Control](#execution-control)
+    - [Autoconfiguration Options](#autoconfiguration-options)
+    - [Kubernetes Options](#kubernetes-options)
+    - [Suggested Kubernetes Settings](#suggested-kubernetes-settings)
+    - [Example Keepalived Configs](#example-keepalived-configs)
+        - [Example Autogenerated Keepalived Master Config](#example-autogenerated-keepalived-master-config)
+        - [Example Autogenerated Keepalived Backup Config](#example-autogenerated-keepalived-backup-config)
+  - [Example Run Commands](#example-run-commands)
+        - [Example Master Run Command](#example-master-run-command)
+        - [Example Backup Run Command](#example-backup-run-command)
+
+
+## Prerequisites
+
+Before attempting to deploy the keepalived container, the host must allow non local binding of ipv4 addresses. To do this, configure the sysctl tunable `net.ipv4.ip_nonlocal_bind=1`.
+
+In addition to enabling the nonlocal binds, the container must be run with both host networking (`--net=host`) and security setting CAP_NET_ADMIN (`--cap-add NET_ADMIN`) capability. These allow the container to manage the host's networking configuration, and this is essential to the function of keepalived.
+
+
+## Configuration
+### Execution Control
+
+|        Variable       |                      Default                     |
+|:---------------------:|:------------------------------------------------:|
+| `KEEPALIVED_AUTOCONF` |                      `true`                      |
+|   `KEEPALIVED_CONF`   |         `/etc/keepalived/keepalived.conf`        |
+|    `KEEPALIVED_CMD`   | `/usr/sbin/keepalived -n -l -f $KEEPALIVED_CONF` |
+|   `KEEPALIVED_DEBUG`  |                      `false`                     |
+
+* `KEEPALIVED_AUTOCONF` -  Enables or disables the auto-configuration of keepalived.
+
+* `KEEPALIVED_CONF` - The path to the keepalived configuration file.
+
+* `KEEPALIVED_CMD` - The command called to execute keepalived.
+
+* `KEEPALIVED_DEBUG` - Enables or disables debug level logging for keepalived (adds `-D` to `KEEPALIVED_CMD`.
+
+
+### Autoconfiguration Options
+
+|                   Variable                  |               Default              |
+|:-------------------------------------------:|:----------------------------------:|
+|           `KEEPALIVED_ADVERT_INT`           |                 `1`                |
+|            `KEEPALIVED_AUTH_PASS`           | `pwd$KEEPALIVED_VIRTUAL_ROUTER_ID` |
+|            `KEEPALIVED_INTERFACE`           |               `eth0`               |
+|            `KEEPALIVED_PRIORITY`            |                `200`               |
+|              `KEEPALIVED_STATE`             |              `MASTER`              |
+|       `KEEPALIVED_TRACK_INTERFACE_###`      |                                    |
+|         `KEEPALIVED_UNICAST_SRC_IP`         |                                    |
+|        `KEEPALIVED_UNICAST_PEER_###`        |                                    |
+|      `KEEPALIVED_VIRTUAL_IPADDRESS_###`     |                                    |
+| `KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_###` |                                    |
+|        `KEEPALIVED_VIRTUAL_ROUTER_ID`       |                 `1`                |
+|      `KEEPALIVED_KUBE_APISERVER_CHECK`      |               `false`              |
+
+* `KEEPALIVED_ADVERT_INT` - The VRRP advertisement interval (in seconds).
+
+* `KEEPALIVED_AUTH_PASS` - A shared password used to authenticate each node in a VRRP group (**Note:** If password is longer than 8 characters, only the first 8 characters are used).
+
+* `KEEPALIVED_INTERFACE` - The host interface that keepalived will monitor and use for VRRP traffic.
+
+* `KEEPALIVED_PRIORITY` - Election value, the server configured with the highest priority will become the Master.
+
+* `KEEPALIVED_STATE` - Defines the server role as Master or Backup. (**Options:** `MASTER` or `BACKUP`).
+
+* `KEEPALIVED_TRACK_INTERFACE_###` - An interface that's state should be monitored (e.g. eth0). More than one can be supplied as long as the variable name ends in a number from 0-999.
+
+* `KEEPALIVED_UNICAST_SRC_IP` - The IP on the host that the keepalived daemon should bind to. **Note:** If not specified, it will be the first IP bound to the interface specified in `KEEPALIVED_INTERFACE`.
+
+* `KEEPALIVED_UNICAST_PEER_###` - An IP of a peer participating in the VRRP group. More tha one can be supplied as long as the variable name ends in a number from 0-999.
+
+* `KEEPALIVED_VIRTUAL_IPADDRESS_###` - An instance of an address that will be monitored and failed over from one host to another. These should be a quoted string in the form of: `<IPADDRESS>/<MASK> brd <BROADCAST_IP> dev <DEVICE> scope <SCOPE> label <LABEL>` At a minimum the ip address, mask and device should be specified e.g. `KEEPALIVED_VIRTUAL_IPADDRESS_1="10.10.0.2/24 dev eth0"`. More than one can be supplied as long as the variable name ends in a number from 0-999. **Note:** Keepalived has a hard limit of **20** addresses that can be monitored. More can be failed over with the monitored addresses via `KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_###`.
+
+
+* `KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_###` - An instance of an address that will be failed over with the monitored addresses supplied via `KEEPALIVED_VIRTUAL_IPADDRESS_###`.  These should be a quoted string in the form of: `<IPADDRESS>/<MASK> brd <BROADCAST_IP> dev <DEVICE> scope <SCOPE> label <LABEL>` At a minimum the ip address, mask and device should be specified e.g. `KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_1="172.16.1.20/24 dev eth1"`. More than one can be supplied as long as the variable name ends in a number from 0-999.
+
+* `KEEPALIVED_VIRTUAL_ROUTER_ID` - A unique number from 0 to 255 that should identify the VRRP group. Master and Backup should have the same value. Multiple instances of keepalived can be run on the same host, but each pair **MUST** have a unique virtual router id.
+
+* `KEEPALIVED_KUBE_APISERVER_CHECK` -  If enabled it configures a simple check script for the Kubernetes API-Server. For more information on this feature, please see the [Kubernetes Options](#kubernetes-options) section.
+
+
+### Kubernetes Options
+
+
+|          **Variable**         |                   **Default**                  |
+|:-----------------------------:|:----------------------------------------------:|
+|    `KUBE_APISERVER_ADDRESS`   | parsed from `KEEPALIVED_VIRTUAL_IPADDRESS_###` |
+|     `KUBE_APISERVER_PORT`     |                     `6443`                     |
+| `KUBE_APISERVER_CHK_INTERVAL` |                       `3`                      |
+|   `KUBE_APISERVER_CHK_FALL`   |                      `10`                      |
+|   `KUBE_APISERVER_CHK_RISE`   |                       `2`                      |
+|  `KUBE_APISERVER_CHK_WEIGHT`  |                      `-50`                     |
+
+
+
+* `KUBE_APISERVER_ADDRESS` - The Virtual IP being used for the Kube API Server. If none is supplied, it is assumed to be the lowest numbered entry in the `KEEPALIVED_VIRTUAL_IPADDRESS_###` variables.
+
+* `KUBE_APISERVER_PORT` - The port to use in conjunction with the `KUBE_APISERVER_ADDRESS`.
+
+* `KUBE_APISERVER_CHK_INTERVAL` - The interval in seconds between calling the script.
+
+* `KUBE_APISERVER_CHK_FALL` - The number of consecutive non-zero script exits before setting the state to `FAULT`.
+
+* `KUBE_APISERVER_CHK_RISE` - The number of consecutive zero script exits before exiting the `FAULT` state.
+
+* `KUBE_APISERVER_CHK_WEIGHT` - The weight to apply to the priority when the service enters the `FAULT` state.
+
+
+
+---
+
+### Suggested Kubernetes Settings
+
+Assuming there are three nodes running the kube-apiserver, you cannot rely on setting just the`KEEPALIVED_STATE` parameter to manage failover across the nodes.
+
+To manage kube-apiserver failover, enable the healthcheck option with `KEEPALIVED_KUBE_APISERVER_CHECK`, and set the `KEEPALIVED_PRIORITY` manually for the three instances.
+
+| **Node** | **Priority** |
+|:--------:|:------------:|
+|  node-01 |      200     |
+|  node-02 |      190     |
+|  node-03 |      180     |
+
+With the default weight of `-50`, if `node-01` has an issue, it's priority will drop to `150` and allow `node-02` to take over, the same is repeated if `node-02` has a failure dropping it's weight to `140` and `node-03` takes over.
+
+Recovery occurs in the same order with the system with the highest priority being promoted to master.
+
+### Example Keepalived Configs
+
+##### Example Autogenerated Keepalived Master Config
+```
+vrrp_instance MAIN {
+  state MASTER
+  interface eth0
+  virtual_router_id 2
+  priority 200
+  advert_int 1
+  unicast_src_ip 10.10.0.21
+  unicast_peer {
+    10.10.0.22
+  }
+  authentication {
+    auth_type PASS
+    auth_pass pwd1
+  }
+  virtual_ipaddress {
+    10.10.0.2/24 dev eth0
+  }
+  virtual_ipaddress_excluded {
+    172.16.1.20/24 dev eth1
+  }
+  track_interface {
+    eth0
+    eth1
+  }
+}
+```
+
+##### Example Autogenerated Keepalived Backup Config
+```
+vrrp_instance MAIN {
+  state BACKUP
+  interface eth0
+  virtual_router_id 2
+  priority 100
+  advert_int 1
+  unicast_src_ip 10.10.0.22
+  unicast_peer {
+    10.10.0.21
+  }
+  authentication {
+    auth_type PASS
+    auth_pass pwd1
+  }
+  virtual_ipaddress {
+    10.10.0.2/24 dev eth0
+  }
+  virtual_ipaddress_excluded {
+    172.16.1.20/24 dev eth1
+  }
+  track_interface {
+    eth0
+    eth1
+  }
+}
+
+```
+
+
+## Example Run Commands
+##### Example Master Run Command
+```bash
+docker run -d --net=host --cap-add NET_ADMIN \
+-e KEEPALIVED_AUTOCONF=true                  \
+-e KEEPALIVED_STATE=MASTER                   \
+-e KEEPALIVED_INTERFACE=eth0                 \
+-e KEEPALIVED_VIRTUAL_ROUTER_ID=2            \
+-e KEEPALIVED_UNICAST_SRC_IP=10.10.0.21      \
+-e KEEPALIVED_UNICAST_PEER_0=10.10.0.22      \
+-e KEEPALIVED_TRACK_INTERFACE_1=eth0         \
+-e KEEPALIVED_TRACK_INTERFACE_2=eth1         \
+-e KEEPALIVED_VIRTUAL_IPADDRESS_1="10.10.0.3/24 dev eth0" \
+-e KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_1="172.16.1.20/24 dev eth1" \
+quay.io/ceph/keepalived
+```
+
+##### Example Backup Run Command
+```bash
+docker run -d --net=host --cap-add NET_ADMIN \
+-e KEEPALIVED_AUTOCONF=true                  \
+-e KEEPALIVED_STATE=BACKUP                   \
+-e KEEPALIVED_INTERFACE=eth0                 \
+-e KEEPALIVED_VIRTUAL_ROUTER_ID=2            \
+-e KEEPALIVED_UNICAST_SRC_IP=10.10.0.22      \
+-e KEEPALIVED_UNICAST_PEER_0=10.10.0.21      \
+-e KEEPALIVED_TRACK_INTERFACE_1=eth0         \
+-e KEEPALIVED_TRACK_INTERFACE_2=eth1         \
+-e KEEPALIVED_VIRTUAL_IPADDRESS_1="10.10.0.3/24 dev eth0" \
+-e KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_1="172.16.1.20/24 dev eth1" \
+quay.io/ceph/keepalived
+```
diff --git a/src/cephadm/containers/keepalived/skel/init.sh b/src/cephadm/containers/keepalived/skel/init.sh
new file mode 100755
index 000000000..9c86cfad2
--- /dev/null
+++ b/src/cephadm/containers/keepalived/skel/init.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+set -e
+set -o pipefail
+
+
+KEEPALIVED_DEBUG=${KEEPALIVED_DEBUG:-false}
+KEEPALIVED_KUBE_APISERVER_CHECK=${KEEPALIVED_KUBE_APISERVER_CHECK:-false}
+KEEPALIVED_CONF=${KEEPALIVED_CONF:-/etc/keepalived/keepalived.conf}
+KEEPALIVED_VAR_RUN=${KEEPALIVED_VAR_RUN:-/var/run/keepalived}
+
+if [[ ${KEEPALIVED_DEBUG,,} == 'true' ]]; then
+  kd_cmd="/usr/sbin/keepalived -n -l -D -f $KEEPALIVED_CONF"
+else
+  kd_cmd="/usr/sbin/keepalived -n -l -f $KEEPALIVED_CONF"
+fi
+
+KEEPALIVED_CMD=${KEEPALIVED_CMD:-"$kd_cmd"}
+
+rm -fr "$KEEPALIVED_VAR_RUN"
+
+exec $KEEPALIVED_CMD
+\ No newline at end of file
diff --git a/src/cephadm/samples/alertmanager.json b/src/cephadm/samples/alertmanager.json
new file mode 100644
index 000000000..bacbad300
--- /dev/null
+++ b/src/cephadm/samples/alertmanager.json
@@ -0,0 +1,27 @@
+{
+    "files": {
+        "alertmanager.yml": [
+            "global:",
+            "  resolve_timeout: 5m",
+            "",
+            "route:",
+            "  group_by: ['alertname']",
+            "  group_wait: 10s",
+            "  group_interval: 10s",
+            "  repeat_interval: 1h",
+            "  receiver: 'web.hook'",
+            "receivers:",
+            "- name: 'web.hook'",
+            "  webhook_configs:",
+            "  - url: 'http://127.0.0.1:5001/'",
+            "inhibit_rules:",
+            "  - source_match:",
+            "      severity: 'critical'",
+            "    target_match:",
+            "      severity: 'warning'",
+            "    equal: ['alertname', 'dev', 'instance']"
+        ]
+    },
+    "peers": []
+}
+
diff --git a/src/cephadm/samples/custom_container.json b/src/cephadm/samples/custom_container.json
new file mode 100644
index 000000000..194a44d2a
--- /dev/null
+++ b/src/cephadm/samples/custom_container.json
@@ -0,0 +1,35 @@
+{
+    "image": "docker.io/prom/alertmanager:v0.20.0",
+    "ports": [9093, 9094],
+    "args": [
+        "-p", "9093:9093",
+        "-p", "9094:9094"
+    ],
+    "dirs": ["etc/alertmanager"],
+    "files": {
+        "etc/alertmanager/alertmanager.yml": [
+            "global:",
+            "  resolve_timeout: 5m",
+            "",
+            "route:",
+            "  group_by: ['alertname']",
+            "  group_wait: 10s",
+            "  group_interval: 10s",
+            "  repeat_interval: 1h",
+            "  receiver: 'web.hook'",
+            "receivers:",
+            "- name: 'web.hook'",
+            "  webhook_configs:",
+            "  - url: 'http://127.0.0.1:5001/'",
+            "inhibit_rules:",
+            "  - source_match:",
+            "      severity: 'critical'",
+            "    target_match:",
+            "      severity: 'warning'",
+            "    equal: ['alertname', 'dev', 'instance']"
+        ]
+    },
+    "volume_mounts": {
+        "etc/alertmanager": "/etc/alertmanager"
+    }
+}
diff --git a/src/cephadm/samples/grafana.json b/src/cephadm/samples/grafana.json
new file mode 100644
index 000000000..0e0689b7e
--- /dev/null
+++ b/src/cephadm/samples/grafana.json
@@ -0,0 +1,90 @@
+{
+    "files": {
+	"grafana.ini": [
+            "[users]",
+            "  default_theme = light",
+            "[auth.anonymous]",
+            "  enabled = true",
+            "  org_name = 'Main Org.'",
+            "  org_role = 'Viewer'",
+            "[server]",
+            "  domain = 'bootstrap.storage.lab'",
+            "  protocol = https",
+            "  cert_file = /etc/grafana/certs/cert_file",
+            "  cert_key = /etc/grafana/certs/cert_key",
+            "  http_port = 3000",
+            "  http_addr = localhost",
+            "[security]",
+            "  admin_user = admin",
+            "  admin_password = admin",
+            "  allow_embedding = true"
+	],
+	"provisioning/datasources/ceph-dashboard.yml": [
+            "deleteDatasources:",
+            "  - name: 'Dashboard'",
+            "    orgId: 1",
+            " ",
+            "datasources:",
+            "  - name: 'Dashboard'",
+            "    type: 'prometheus'",
+            "    access: 'proxy'",
+            "    orgId: 1",
+            "    url: 'http://localhost:9095'",
+            "    basicAuth: false",
+            "    isDefault: true",
+            "    editable: false"
+	],
+	"certs/cert_file": [
+            "-----BEGIN CERTIFICATE-----",
+            "MIIDLTCCAhWgAwIBAgIUEH0mq6u93LKsWlNXst5pxWcuqkQwDQYJKoZIhvcNAQEL",
+            "BQAwJjELMAkGA1UECgwCSVQxFzAVBgNVBAMMDmNlcGgtZGFzaGJvYXJkMB4XDTIw",
+            "MDEwNTIyNDYyMFoXDTMwMDEwMjIyNDYyMFowJjELMAkGA1UECgwCSVQxFzAVBgNV",
+            "BAMMDmNlcGgtZGFzaGJvYXJkMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC",
+            "AQEAqxh6eO0NTZJe+DoKZG/kozJCf+83eB3gWzwXoNinRmV/49f5WPR20DIxAe0R",
+            "saO6XynJXTrhvXT1bsARUq+LSmjWNFoYXopFuOJhGdWn4dmpuHwtpcFv2kjzNOKj",
+            "U2EG8j6bsRp1jFAzn7kdbSWT0UHySRXp9DPAjDiF3LjykMXiJMReccFXrB1pRi93",
+            "nJxED8d6oT5GazGB44svb+Zi6ABamZu5SDJC1Fr/O5rWFNQkH4hQEqDPj1817H9O",
+            "sm0mZiNy77ZQuAzOgZN153L3QOsyJismwNHfAMGMH9mzPKOjyhc13VlZyeEzml8p",
+            "ZpWQ2gi8P2r/FAr8bFL3MFnHKwIDAQABo1MwUTAdBgNVHQ4EFgQUZg3v7MX4J+hx",
+            "w3HENCrUkMK8tbwwHwYDVR0jBBgwFoAUZg3v7MX4J+hxw3HENCrUkMK8tbwwDwYD",
+            "VR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAaR/XPGKwUgVwH3KXAb6+",
+            "s9NTAt6lCmFdQz1ngoqFSizW7KGSXnOgd6xTiUCR0Tjjo2zKCwhIINaI6mwqMbrg",
+            "BOjb7diaqwFaitRs27AtdmaqMGndUqEBUn/k64Ld3VPGL4p0W2W+tXsyzZg1qQIn",
+            "JXb7c4+oWzXny7gHFheYQTwnHzDcNOf9vJiMGyYYvU1xTOGucu6dwtOVDDe1Z4Nq",
+            "AyIYWDScRr2FeAOXyx4aW2v5bjpTxvP+79/OOBbQ+p4y5F4PDrPeOSweGoo6huTR",
+            "+T+YI9Jfw2XCgV7NHWhfdt3fHHwUQzO6WszWU557pmCODLvXWsQ8P+GRiG7Nywm3",
+            "uA==",
+            "-----END CERTIFICATE-----"
+	],
+	"certs/cert_key": [
+            "-----BEGIN PRIVATE KEY-----",
+            "MIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCrGHp47Q1Nkl74",
+            "Ogpkb+SjMkJ/7zd4HeBbPBeg2KdGZX/j1/lY9HbQMjEB7RGxo7pfKcldOuG9dPVu",
+            "wBFSr4tKaNY0WhheikW44mEZ1afh2am4fC2lwW/aSPM04qNTYQbyPpuxGnWMUDOf",
+            "uR1tJZPRQfJJFen0M8CMOIXcuPKQxeIkxF5xwVesHWlGL3ecnEQPx3qhPkZrMYHj",
+            "iy9v5mLoAFqZm7lIMkLUWv87mtYU1CQfiFASoM+PXzXsf06ybSZmI3LvtlC4DM6B",
+            "k3XncvdA6zImKybA0d8AwYwf2bM8o6PKFzXdWVnJ4TOaXylmlZDaCLw/av8UCvxs",
+            "UvcwWccrAgMBAAECggEAeBv0BiYrm5QwdUORfhaKxAIJavRM1Vbr5EBYOgM90o54",
+            "bEN2ePsM2XUSsE5ziGfu8tVL1dX7GNwdW8UbpBc1ymO0VAYXa27YKUVKcy9o7oS1",
+            "v5v1E5Kq6esiSLL9gw/vJ2nKNFblxD2dL/hs7u1dSp5n7uSiW1tlRUp8toljRzts",
+            "1Cenp0J/a82HwWDE8j/H9NvitTOZ2cdwJ76V8GkBynlvr2ARjRfZGx0WXEJmoZYD",
+            "YUQVU303DB6Q2tkFco4LbPofkuhhMPhXsz3fZ/blHj/c78tqP9L5sQ29oqoPE1pS",
+            "DBOwKC/eoi5FY34RdLNL0dKq9MzbuYqEcCfZOJgxoQKBgQDf+5XF+aXQz2OmSaj6",
+            "1Yr+3KAKdfX/AYp22X1Wy4zWcZlgujgwQ1FG0zay8HVBM0/xn4UgOtcKCoXibePh",
+            "ag1t8aZINdRE1JcMzKmZoSvU9Xk30CNvygizuJVEKsJFPDbPzCpauDSplzcQb4pZ",
+            "wepucPuowkPMBx0iU3x0qSThWwKBgQDDjYs7d30xxSqWWXyCOZshy7UtHMNfqP15",
+            "kDfTXIZzuHvDf6ZNci10VY1eDZbpZfHgc6x1ElbKv2H4dYsgkENJZUi1YQDpVPKq",
+            "4N5teNykgAuagiR7dRFltSju3S7hIE6HInTv3hShaFPymlEE7zuBMuEUcuvYz5YN",
+            "RjxsvypKcQKBgCuuV+Y1KqZPW8K5SNAqRyIvCrMfkCr8NPG6tpvvtHa5zsyzZHPd",
+            "HQOv+1HoXSWrCSM5FfBUKU3XAYdIIRH76cSQRPp+LPiDcTXY0Baa/P5aJRrCZ7bM",
+            "cugBznJt2FdCR/o8eeIZXIPabq2w4w1gKQUC2cFuqWQn2wGvwGzL89pTAoGAAfpx",
+            "mSVpT9KVzrWTC+I3To04BP/QfixAfDVYSzwZZBxOrDijXw8zpISlDHmIuE2+t62T",
+            "5g9Mb3qmLBRMVwT+mUR8CtGzZ6jjV5U0yti5KrTc6TA93D3f8i51/oygR8jC4p0X",
+            "n8GYZdWfW8nx3eHpsTHpkwJinmvjMbkvLU51yBECgYAnUAMyhNOWjbYS5QWd8i1W",
+            "SFQansVDeeT98RebrzmGwlgrCImHItJz0Tz8gkNB3+S2B2balqT0WHaDxQ8vCtwX",
+            "xB4wd+gMomgdYtHGRnRwj1UyRXDk0c1TgGdRjOn3URaezBMibHTQSbFgPciJgAuU",
+            "mEl75h1ToBX9yvnH39o50g==",
+            "-----END PRIVATE KEY-----"
+	]
+    }
+}
diff --git a/src/cephadm/samples/nfs.json b/src/cephadm/samples/nfs.json
new file mode 100644
index 000000000..2e6625101
--- /dev/null
+++ b/src/cephadm/samples/nfs.json
@@ -0,0 +1,14 @@
+{
+    "pool" : "nfs-ganesha",
+    "namespace" : "nfs-ns",
+    "files": {
+        "ganesha.conf": [
+            "RADOS_URLS {",
+            "        userid = admin;",
+            "}",
+            "",
+            "%url    rados://nfs-ganesha/nfs-ns/conf-nfs.a",
+            ""
+        ]
+    }
+}
diff --git a/src/cephadm/samples/prometheus.json b/src/cephadm/samples/prometheus.json
new file mode 100644
index 000000000..64727fb59
--- /dev/null
+++ b/src/cephadm/samples/prometheus.json
@@ -0,0 +1,17 @@
+{
+    "files": {
+	"prometheus.yml": [
+	    "global:",
+	    "  scrape_interval: 5s",
+	    "  evaluation_interval: 10s",
+	    "",
+	    "rule_files: ",
+	    "  - '/etc/prometheus/alerting/*'",
+	    "",
+	    "scrape_configs:",
+	    "  - job_name: 'prometheus'",
+	    "    static_configs:",
+	    "      - targets: ['localhost:9095']"
+	]
+    }
+}
diff --git a/src/cephadm/samples/rgw_ssl.json b/src/cephadm/samples/rgw_ssl.json
new file mode 100644
index 000000000..3fe6fea1c
--- /dev/null
+++ b/src/cephadm/samples/rgw_ssl.json
@@ -0,0 +1,101 @@
+{
+  "rgw_realm": "default",
+  "rgw_zone": "default",
+  "service_type": "rgw",
+  "placement": {
+    "hosts": [{
+      "hostname": "ironic-moliver",
+      "name": "",
+      "network": ""
+    }],
+    "count": 1
+  },
+  "ssl": true,
+  "rgw_frontend_port": 4343,
+  "rgw_frontend_ssl_certificate": [
+      "-----BEGIN CERTIFICATE-----",
+      "MIIFmjCCA4KgAwIBAgIJAIZ2n35bmwXTMA0GCSqGSIb3DQEBCwUAMGIxCzAJBgNV",
+      "BAYTAkFVMQwwCgYDVQQIDANOU1cxHTAbBgNVBAoMFEV4YW1wbGUgUkdXIFNTTCBp",
+      "bmMuMSYwJAYDVQQDDB1yZ3ctZW5kcG9pbnQuZXhhbXBsZS1jZXBoLmNvbTAeFw0y",
+      "MDAyMDcwMDEzNTFaFw0zMDAyMDQwMDEzNTFaMGIxCzAJBgNVBAYTAkFVMQwwCgYD",
+      "VQQIDANOU1cxHTAbBgNVBAoMFEV4YW1wbGUgUkdXIFNTTCBpbmMuMSYwJAYDVQQD",
+      "DB1yZ3ctZW5kcG9pbnQuZXhhbXBsZS1jZXBoLmNvbTCCAiIwDQYJKoZIhvcNAQEB",
+      "BQADggIPADCCAgoCggIBAMptGJ523QkEbc37za8iuCTahj0Zr6hy+ToSX/Vfdzxj",
+      "iYHuD2PiZZyJB7t2eOqiA8sQ5N513EUtf2ZIBwtnnqFIzD5TqI3BxRajUTlOyXUX",
+      "onMwQwXu2ifDUy3LCmuQfzanOTWvVLac1NmkWbJHpJCXYbUnPb1Nvd0QjTTEH1jt",
+      "5bDHhfxwCIYK6PY+MqC72a09wB2ZF+EKsSdqghOKmibfJHtoJdsqGeLrysBLrzUJ",
+      "e/5ZW3V4Z85T2lja5KZnWgRofrUy5TmJV10HO4Hht92xvWvEi/rmjg2AVYZFUQQx",
+      "xKXpUBbF5T46eSVmaT7IH88Yp5ytgBTaigym7ETCjohp/DfCaK1DUehh0ce7iUq2",
+      "yCLviZsX4WdPYxzkoLflNrqm4YZP6iKcZSUR/A+qPKCzCXgMXFNA1JxilDwEq35F",
+      "zGN++ehJqdNmOQ1eQScsLwZQa6mC97d+upWdCvyntf1+S6vNcXhtRQpjNM4W37oW",
+      "r5nicsGA3/0rpDEHZW85KlkdWO1uCS/6ftgt8UUMaf5ew3PigzusqymBWTlMOjtW",
+      "uAQXxgZZvkRp+xdspn/uTCAP+bNShGD6Q+TO3U6IjTqHk83sGKCvg2dyU/dqgPr9",
+      "2IIzgQBFGk0W0nM/E83E8hUSwX17COLL3drhPZb4VRMChQ8PAa6u9nIymkX2wSVv",
+      "AgMBAAGjUzBRMB0GA1UdDgQWBBSsZHuY7KK80RrZHp+Gx+k16skuRDAfBgNVHSME",
+      "GDAWgBSsZHuY7KK80RrZHp+Gx+k16skuRDAPBgNVHRMBAf8EBTADAQH/MA0GCSqG",
+      "SIb3DQEBCwUAA4ICAQAE+BLtnu0p8FtK7vrBCRcCdvycWaSFGJUt7r5Nm8TD7sKw",
+      "bWeDLgXrRouyA7n6yt/JqQbXYcxt4MLAM0P6NQd5BlNrrnDk4rBnJiJgejppNE+S",
+      "BazR7Dv0uYcs8kPT4DPpwzv4aJ2aXCBaxYrq8Rx2xOqANCPVOrtPUk9yGpaQ5adU",
+      "GfxkVbpgIEz1c71PeQuK1KUU/Wpk7cpm+FQCizl9ftP2lHWsGhSLCuyWoMTjt68P",
+      "gYEWoV54eo/bzwj2ei6TcfNo+uHyzEiiG2qEvMh/cnYUFzs8O1t0mN19WPB1pSh1",
+      "faci5lGdtkRbLgP0g5RvpagE7Lw3mCc5Om8jmHs4mPfuVkssBVV23CrFpqLLrDX3",
+      "Acwb/zRGvA7T4WESBTJMYFOLgm0W0Y+AN8RcYNU9QbDhe++Te0uz/3Sy3GN2Xg5z",
+      "MxfD1+34x6KvMfCh8NjII2mFQ9ukcfrhcfO3oWDLlwsqlVbhkZxNiUOEIx9nzHcF",
+      "kWpZ2ypBDH45h2o3LyqvGjsu/BFkeG6JpEDCWbClKWcjKxOrLVDufhSDduffDjja",
+      "zOsgQJg0Yf//Ubb5p0c54GjHM/XDXEcV3m3sEtbmMYz6xGwuag4bx8P2E/QY8sFp",
+      "JxgIdS8vdl6YhDCjKJ2XzI30JwCdftgDIAiWSE0ivoDc+8+gG1nb11GT52HFzA==",
+      "-----END CERTIFICATE-----",
+      "-----BEGIN PRIVATE KEY-----",
+      "MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQDKbRiedt0JBG3N",
+      "+82vIrgk2oY9Ga+ocvk6El/1X3c8Y4mB7g9j4mWciQe7dnjqogPLEOTeddxFLX9m",
+      "SAcLZ56hSMw+U6iNwcUWo1E5Tsl1F6JzMEMF7tonw1MtywprkH82pzk1r1S2nNTZ",
+      "pFmyR6SQl2G1Jz29Tb3dEI00xB9Y7eWwx4X8cAiGCuj2PjKgu9mtPcAdmRfhCrEn",
+      "aoITipom3yR7aCXbKhni68rAS681CXv+WVt1eGfOU9pY2uSmZ1oEaH61MuU5iVdd",
+      "BzuB4bfdsb1rxIv65o4NgFWGRVEEMcSl6VAWxeU+OnklZmk+yB/PGKecrYAU2ooM",
+      "puxEwo6Iafw3wmitQ1HoYdHHu4lKtsgi74mbF+FnT2Mc5KC35Ta6puGGT+oinGUl",
+      "EfwPqjygswl4DFxTQNScYpQ8BKt+RcxjfvnoSanTZjkNXkEnLC8GUGupgve3frqV",
+      "nQr8p7X9fkurzXF4bUUKYzTOFt+6Fq+Z4nLBgN/9K6QxB2VvOSpZHVjtbgkv+n7Y",
+      "LfFFDGn+XsNz4oM7rKspgVk5TDo7VrgEF8YGWb5EafsXbKZ/7kwgD/mzUoRg+kPk",
+      "zt1OiI06h5PN7Bigr4NnclP3aoD6/diCM4EARRpNFtJzPxPNxPIVEsF9ewjiy93a",
+      "4T2W+FUTAoUPDwGurvZyMppF9sElbwIDAQABAoICAQC4sATwP563pXTRpNYq3lCI",
+      "P2COyqq70/qUA0PNygYt8Nr60srz5RG0WknVvefgm2U+lvFaDsqjyzkbhsf2ndnb",
+      "aWH/07BLdeluGB/5W2rvDFtJIVVlSmF8OffgJgohzbpjkPrfglKWMkz5LbwwrrD0",
+      "w0mAUIdB+nYqBfnvlKjNKHCSc9hJU6ZTNg0K7gCfKgUWzOpFlvJ0fp7XSZPYZHL0",
+      "2E6e0Y0Ig0cPBPb9r4/xoe+hRsHtUafUVik3PK+1K0K0FurUQ9VkQ2yUEg83F0v8",
+      "Vzht5OuaRVSB+P8O/JtIamfywAY0YOYhepQhjWikwU5UUzhJ+PqNDD87/+g9bA1B",
+      "xC25eoDxThiQlgDmRoH18ZsWDVf9TuJnm4cpxnZYX6ip+BLm/aidT39auZo0Fl+r",
+      "cJxRn0Qlm0Vm4Tc/6ZG6PQWB+Q6CjVFdoxeOvEQcTSuKA6VZBStLmqX++5In1Lmj",
+      "hVr3/aueHiZvXS5bNIdd2IfzatR+nP+uxzM/ryJRvGO2B2XTS00Cvv/lH84BDJYV",
+      "yt1PJIBoM9Dh7aUAHmKNVfRt83xzvcSPZx9VmSzA6wwqCQcO1GJk6keAuxOuligu",
+      "YdSFcfChOg90WvBcl+NzMblLkwrFSBQR7kgG0+dedv+Wkm4xO4T7B4W2G5+VIJKG",
+      "mrEAq6XQMFnfEJzNVg7JUQKCAQEA91eMvphoVVz+cxa4Ew7OokNXk5kSlvmQ8+Ij",
+      "ngFBvniXPZecxVzFEJglSthH5KI2ZqxwF3GJhKjxUihwf6K13Hx54EM7S/qV57ie",
+      "kVeKdAs+SGv+hRk1gQOoPBInbtKGKTni1V8T7iNginLueC/YikFugzv6IxiliBSG",
+      "3R7zjRepOW69aEoCPecx9amU4CkAwgeLJgBloBoqWD8sKM+bl7p5juQCU2sQ9D4/",
+      "kLnpG9+zPRUNjI4sog3L1wql3zthI6/4gf0TNuDhJTZ68vpMSi02pOUkVa0MmVOA",
+      "ex16luIp0BhxG/sUAeoevFL4KqR0CBbyAstbt2E/oPYOWMJ4MwKCAQEA0YMNXY7I",
+      "RNFOmiZ2Wn8kENCyJguqbOMd/li2+ercgp3MaSgTjC5KDFvZqTwXXlrURPu9hcyv",
+      "sJBSjp45g1T1LsUUq8UJgOIQgxykurIstGg33TAR+TN9VUu/xonLQF23GD8M6Vzd",
+      "EcZEVlBY33hgNXw4mRcBPnaoG5FZCBfHOgdBCExoYKW/RNKcmu0q+h9uhDBCbopv",
+      "04ROzw+HW1qc5qvNPR47buZ9+5QdonVK8s2bguMJ0phXwdSxL21wsjIsXyAO9m7w",
+      "qLHOq/hVokM0Fki09Exg4ppB8cLHC2ITpsVSgn4Dcz5zRtyvhozSKX4R9kMC64a0",
+      "AgMPVMllmGlR1QKCAQBIIGCrh7gNBIQyvXJKJGw/RxH3uZCBNB9/7vbh8Y3hZDr+",
+      "PAL8TpQsxaFCVRWJ53+jqy84tuQaKkXM5nv/zEvqEuZbbl+RRW6HVv/udC+srUap",
+      "Scy7tWEz0QQzGDwlhgCXbwjlnccrD2fsl51QsOsdTf1TCZ9ksqA6sXmua4MsJrUz",
+      "SUa0bbh/oraf46bFQ0+0RQzftQftixPEDg/rirbdpQQjlfvTpYoZHzncE0qV1ULo",
+      "UgZUcXU0gH9rovBBy4gFJyB5j3oV67fb6SorRrAOhWbE6QkSbtcYsw/pVuxTqXn1",
+      "89qwBSSNdl8mHa++h5xKa56BEBobvKEYaAhA+9yfAoIBAQDPFEE5n87Cdj7CjhGd",
+      "EN2M4Tmz8QPZ7AHRS85O5pxdXaqUpU/s1jPKU16nrwVJ9WypYkjI3q8oTP3MiQc/",
+      "j9FnENSFkpL6GHdJoB4Rido11myg6spZDVNr4xsCGWATlo1KIceZQHghAV66EWBG",
+      "QKyXMNigN+S64Hz4AomFPjtkV5cnpJ3mKO0MET9IwfIglsCdVzXSHHK7FaLvdeHL",
+      "oZxDQrvxFNiZnKgY6SUBVf1mT2LN06n5xSm4I4md3wXsmzrQKtefK7gihNxJjYLW",
+      "hqYNAIAalwOL9fwIAQTLc30I8S/EWtj+J1O5TpcO3lE7QahvR3yzXsi81Flq7ETG",
+      "iBKhAoIBAGHGpnjrLlCarNk9axh4Dw1OjgEvwPlEqsiWXt2tylLeab0OGC47MmJx",
+      "RmKwgVukMuxkQb8v4ANSRtih7R+E+qXfexjEFYtzh/uaRP1Z7ZrO/oqq0oLbPpsx",
+      "yTSRDL1i5/fgdIlKVH3N4IF7E8Pc3REgYIwLQxYjTdgVHEAM65XegQ2Lkpr4iae3",
+      "hm4IsD2PrsVITrlsLg65XnfcbsCs/OfQ5GuUp+xUBw5e0bQBmsWEiCaCjrq/EHJa",
+      "/oeJRqS7lyGYDC+wiSsE70x4dvu1um2F+V1Jw4LWjhu8Z8dNSXPSf8vLqXGkWAlk",
+      "805lq+iy7Mkhb+dlr4R9WhMWDyGwgYs=",
+      "-----END PRIVATE KEY-----"
+  ]
+}
diff --git a/src/cephadm/tests/__init__.py b/src/cephadm/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/cephadm/tests/__init__.py
diff --git a/src/cephadm/tests/fixtures.py b/src/cephadm/tests/fixtures.py
new file mode 100644
index 000000000..76ac0b44c
--- /dev/null
+++ b/src/cephadm/tests/fixtures.py
@@ -0,0 +1,162 @@
+import mock
+import os
+import pytest
+import time
+
+from contextlib import contextmanager
+from pyfakefs import fake_filesystem
+
+from typing import Dict, List, Optional
+
+
+def import_cephadm():
+    """Import cephadm as a module."""
+    import cephadm as _cephadm
+
+    return _cephadm
+
+
+def mock_docker():
+    _cephadm = import_cephadm()
+    docker = mock.Mock(_cephadm.Docker)
+    docker.path = '/usr/bin/docker'
+    return docker
+
+
+def mock_podman():
+    _cephadm = import_cephadm()
+    podman = mock.Mock(_cephadm.Podman)
+    podman.path = '/usr/bin/podman'
+    podman.version = (2, 1, 0)
+    return podman
+
+
+def _daemon_path():
+    return os.getcwd()
+
+
+def mock_bad_firewalld():
+    def raise_bad_firewalld():
+        raise Exception('Called bad firewalld')
+
+    _cephadm = import_cephadm()
+    f = mock.Mock(_cephadm.Firewalld)
+    f.enable_service_for = lambda _: raise_bad_firewalld()
+    f.apply_rules = lambda: raise_bad_firewalld()
+    f.open_ports = lambda _: raise_bad_firewalld()
+
+
+def _mock_scrape_host(obj, interval):
+    try:
+        raise ValueError("wah")
+    except Exception as e:
+        obj._handle_thread_exception(e, 'host')
+
+
+def _mock_run(obj):
+    t = obj._create_thread(obj._scrape_host_facts, 'host', 5)
+    time.sleep(1)
+    if not t.is_alive():
+        obj.cephadm_cache.update_health('host', "inactive", "host thread stopped")
+
+
+@pytest.fixture()
+def cephadm_fs(
+    fs: fake_filesystem.FakeFilesystem,
+):
+    """
+    use pyfakefs to stub filesystem calls
+    """
+    uid = os.getuid()
+    gid = os.getgid()
+
+    def fchown(fd, _uid, _gid):
+        """pyfakefs doesn't provide a working fchown or fchmod.
+        In order to get permissions working generally across renames
+        we need to provide our own implemenation.
+        """
+        file_obj = fs.get_open_file(fd).get_object()
+        file_obj.st_uid = _uid
+        file_obj.st_gid = _gid
+
+    _cephadm = import_cephadm()
+    with mock.patch('os.fchown', side_effect=fchown), \
+         mock.patch('os.fchmod'), \
+         mock.patch('platform.processor', return_value='x86_64'), \
+         mock.patch('cephadm.extract_uid_gid', return_value=(uid, gid)):
+
+        try:
+            if not fake_filesystem.is_root():
+                fake_filesystem.set_uid(0)
+        except AttributeError:
+            pass
+
+        fs.create_dir(_cephadm.DATA_DIR)
+        fs.create_dir(_cephadm.LOG_DIR)
+        fs.create_dir(_cephadm.LOCK_DIR)
+        fs.create_dir(_cephadm.LOGROTATE_DIR)
+        fs.create_dir(_cephadm.UNIT_DIR)
+        fs.create_dir('/sys/block')
+
+        yield fs
+
+
+@pytest.fixture()
+def host_sysfs(fs: fake_filesystem.FakeFilesystem):
+    """Create a fake filesystem to represent sysfs"""
+    enc_path = '/sys/class/scsi_generic/sg2/device/enclosure/0:0:1:0'
+    dev_path = '/sys/class/scsi_generic/sg2/device'
+    slot_count = 12
+    fs.create_dir(dev_path)
+    fs.create_file(os.path.join(dev_path, 'vendor'), contents="EnclosuresInc")
+    fs.create_file(os.path.join(dev_path, 'model'), contents="D12")
+    fs.create_file(os.path.join(enc_path, 'id'), contents='1')
+    fs.create_file(os.path.join(enc_path, 'components'), contents=str(slot_count))
+    for slot_num in range(slot_count):
+        slot_dir = os.path.join(enc_path, str(slot_num))
+        fs.create_file(os.path.join(slot_dir, 'locate'), contents='0')
+        fs.create_file(os.path.join(slot_dir, 'fault'), contents='0')
+        fs.create_file(os.path.join(slot_dir, 'slot'), contents=str(slot_num))
+        if slot_num < 6:
+            fs.create_file(os.path.join(slot_dir, 'status'), contents='Ok')
+            slot_dev = os.path.join(slot_dir, 'device')
+            fs.create_dir(slot_dev)
+            fs.create_file(os.path.join(slot_dev, 'vpd_pg80'), contents=f'fake{slot_num:0>3}')
+        else:
+            fs.create_file(os.path.join(slot_dir, 'status'), contents='not installed')
+
+    yield fs
+
+
+@contextmanager
+def with_cephadm_ctx(
+    cmd: List[str],
+    list_networks: Optional[Dict[str, Dict[str, List[str]]]] = None,
+    hostname: Optional[str] = None,
+):
+    """
+    :param cmd: cephadm command argv
+    :param list_networks: mock 'list-networks' return
+    :param hostname: mock 'socket.gethostname' return
+    """
+    if not hostname:
+        hostname = 'host1'
+
+    _cephadm = import_cephadm()
+    with mock.patch('cephadm.attempt_bind'), \
+         mock.patch('cephadm.call', return_value=('', '', 0)), \
+         mock.patch('cephadm.call_timeout', return_value=0), \
+         mock.patch('cephadm.find_executable', return_value='foo'), \
+         mock.patch('cephadm.get_container_info', return_value=None), \
+         mock.patch('cephadm.is_available', return_value=True), \
+         mock.patch('cephadm.json_loads_retry', return_value={'epoch' : 1}), \
+         mock.patch('cephadm.logger'), \
+         mock.patch('socket.gethostname', return_value=hostname):
+        ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(cmd)
+        ctx.container_engine = mock_podman()
+        if list_networks is not None:
+            with mock.patch('cephadm.list_networks', return_value=list_networks):
+                yield ctx
+        else:
+            yield ctx
+
diff --git a/src/cephadm/tests/test_agent.py b/src/cephadm/tests/test_agent.py
new file mode 100644
index 000000000..f9cf201e2
--- /dev/null
+++ b/src/cephadm/tests/test_agent.py
@@ -0,0 +1,800 @@
+from unittest import mock
+import copy, datetime, json, os, socket, threading
+
+import pytest
+
+from tests.fixtures import with_cephadm_ctx, cephadm_fs, import_cephadm
+
+from typing import Optional
+
+_cephadm = import_cephadm()
+
+
+FSID = "beefbeef-beef-beef-1234-beefbeefbeef"
+AGENT_ID = 'host1'
+AGENT_DIR = f'/var/lib/ceph/{FSID}/agent.{AGENT_ID}'
+
+
+def test_agent_validate():
+    required_files = _cephadm.CephadmAgent.required_files
+    with with_cephadm_ctx([]) as ctx:
+        agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+        for i in range(len(required_files)):
+            incomplete_files = {s: 'text' for s in [f for j, f in enumerate(required_files) if j != i]}
+            with pytest.raises(_cephadm.Error, match=f'required file missing from config: {required_files[i]}'):
+                agent.validate(incomplete_files)
+        all_files = {s: 'text' for s in required_files}
+        agent.validate(all_files)
+
+
+def _check_file(path, content):
+    assert os.path.exists(path)
+    with open(path) as f:
+        fcontent = f.read()
+        assert fcontent == content
+
+
+@mock.patch('cephadm.call_throws')
+def test_agent_deploy_daemon_unit(_call_throws, cephadm_fs):
+    _call_throws.return_value = ('', '', 0)
+    agent_id = AGENT_ID
+
+    with with_cephadm_ctx([]) as ctx:
+        ctx.meta_json = json.dumps({'meta': 'data'})
+        agent = _cephadm.CephadmAgent(ctx, FSID, agent_id)
+        cephadm_fs.create_dir(AGENT_DIR)
+
+        with pytest.raises(_cephadm.Error, match='Agent needs a config'):
+            agent.deploy_daemon_unit()
+
+        config = {s: f'text for {s}' for s in _cephadm.CephadmAgent.required_files}
+        config['not-required-file.txt'] = 'don\'t write me'
+
+        agent.deploy_daemon_unit(config)
+
+        # check required config file were all created
+        for fname in _cephadm.CephadmAgent.required_files:
+            _check_file(f'{AGENT_DIR}/{fname}', f'text for {fname}')
+
+        # assert non-required file was not written
+        assert not os.path.exists(f'{AGENT_DIR}/not-required-file.txt')
+
+        # check unit.run file was created correctly
+        _check_file(f'{AGENT_DIR}/unit.run', agent.unit_run())
+
+        # check unit.meta file created correctly
+        _check_file(f'{AGENT_DIR}/unit.meta', json.dumps({'meta': 'data'}, indent=4) + '\n')
+
+        # check unit file was created correctly
+        _check_file(f'{ctx.unit_dir}/{agent.unit_name()}', agent.unit_file())
+
+        expected_call_throws_calls = [
+            mock.call(ctx, ['systemctl', 'daemon-reload']),
+            mock.call(ctx, ['systemctl', 'enable', '--now', agent.unit_name()]),
+        ]
+        _call_throws.assert_has_calls(expected_call_throws_calls)
+
+        expected_call_calls = [
+            mock.call(ctx, ['systemctl', 'stop', agent.unit_name()], verbosity=_cephadm.CallVerbosity.DEBUG),
+            mock.call(ctx, ['systemctl', 'reset-failed', agent.unit_name()], verbosity=_cephadm.CallVerbosity.DEBUG),
+        ]
+        _cephadm.call.assert_has_calls(expected_call_calls)
+
+
+@mock.patch('threading.Thread.is_alive')
+def test_agent_shutdown(_is_alive):
+    with with_cephadm_ctx([]) as ctx:
+        agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+        _is_alive.return_value = True
+        assert agent.stop == False
+        assert agent.mgr_listener.stop == False
+        assert agent.ls_gatherer.stop == False
+        assert agent.volume_gatherer.stop == False
+        agent.shutdown()
+        assert agent.stop == True
+        assert agent.mgr_listener.stop == True
+        assert agent.ls_gatherer.stop == True
+        assert agent.volume_gatherer.stop == True
+
+
+def test_agent_wakeup():
+    with with_cephadm_ctx([]) as ctx:
+        agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+        assert agent.event.is_set() == False
+        agent.wakeup()
+        assert agent.event.is_set() == True
+
+
+@mock.patch("cephadm.CephadmAgent.shutdown")
+@mock.patch("cephadm.AgentGatherer.update_func")
+def test_pull_conf_settings(_update_func, _shutdown, cephadm_fs):
+    target_ip = '192.168.0.0'
+    target_port = 9876
+    refresh_period = 20
+    listener_port = 5678
+    host = AGENT_ID
+    device_enhanced_scan = 'True'
+    with with_cephadm_ctx([]) as ctx:
+        agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+        full_config = {
+            'target_ip': target_ip,
+            'target_port': target_port,
+            'refresh_period': refresh_period,
+            'listener_port': listener_port,
+            'host': host,
+            'device_enhanced_scan': device_enhanced_scan
+        }
+        cephadm_fs.create_dir(AGENT_DIR)
+        with open(agent.config_path, 'w') as f:
+            f.write(json.dumps(full_config))
+
+        with pytest.raises(_cephadm.Error, match="Failed to get agent keyring:"):
+            agent.pull_conf_settings()
+        _shutdown.assert_called()
+        with open(agent.keyring_path, 'w') as f:
+            f.write('keyring')
+
+        assert agent.device_enhanced_scan == False
+        agent.pull_conf_settings()
+        assert agent.host == host
+        assert agent.target_ip == target_ip
+        assert agent.target_port == target_port
+        assert agent.loop_interval == refresh_period
+        assert agent.starting_port == listener_port
+        assert agent.device_enhanced_scan == True
+        assert agent.keyring == 'keyring'
+        _update_func.assert_called()
+
+        full_config.pop('target_ip')
+        with open(agent.config_path, 'w') as f:
+            f.write(json.dumps(full_config))
+        with pytest.raises(_cephadm.Error, match="Failed to get agent target ip and port from config:"):
+            agent.pull_conf_settings()
+
+
+@mock.patch("cephadm.command_ceph_volume")
+def test_agent_ceph_volume(_ceph_volume):
+
+    def _ceph_volume_outputter(_):
+        print("ceph-volume output")
+
+    def _ceph_volume_empty(_):
+        pass
+
+    with with_cephadm_ctx([]) as ctx:
+        agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+
+        _ceph_volume.side_effect = _ceph_volume_outputter
+        out, _ = agent._ceph_volume(False)
+        assert ctx.command == ['inventory', '--format=json']
+        assert out == "ceph-volume output\n"
+
+        out, _ = agent._ceph_volume(True)
+        assert ctx.command == ['inventory', '--format=json', '--with-lsm']
+        assert out == "ceph-volume output\n"
+
+        _ceph_volume.side_effect = _ceph_volume_empty
+        with pytest.raises(Exception, match='ceph-volume returned empty value'):
+            out, _ = agent._ceph_volume(False)
+
+
+def test_agent_daemon_ls_subset(cephadm_fs):
+    # Basing part of this test on some actual sample output
+
+    # Some sample "podman stats --format '{{.ID}},{{.MemUsage}}' --no-stream" output
+    # 3f2b31d19ecd,456.4MB / 41.96GB
+    # 5aca2499e0f8,7.082MB / 41.96GB
+    # fe0cef07d5f7,35.91MB / 41.96GB
+
+    # Sample "podman ps --format '{{.ID}},{{.Names}}' --no-trunc" output with the same containers
+    # fe0cef07d5f71c5c604f7d1b4a4ac2e27873c96089d015014524e803361b4a30,ceph-4434fa7c-5602-11ed-b719-5254006ef86b-mon-host1
+    # 3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e70660340cd1f,ceph-4434fa7c-5602-11ed-b719-5254006ef86b-mgr-host1-pntmho
+    # 5aca2499e0f8fb903788ff90eb03fe6ed58c7ed177caf278fed199936aff7b4a,ceph-4434fa7c-5602-11ed-b719-5254006ef86b-crash-host1
+
+    # Some of the components from that output
+    mgr_cid = '3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e70660340cd1f'
+    mon_cid = 'fe0cef07d5f71c5c604f7d1b4a4ac2e27873c96089d015014524e803361b4a30'
+    crash_cid = '5aca2499e0f8fb903788ff90eb03fe6ed58c7ed177caf278fed199936aff7b4a'
+    mgr_short_cid = mgr_cid[0:12]
+    mon_short_cid = mon_cid[0:12]
+    crash_short_cid = crash_cid[0:12]
+
+    #Rebuilding the output but with our testing FSID and components (to allow alteration later for whatever reason)
+    mem_out = f"""{mgr_short_cid},456.4MB / 41.96GB
+{crash_short_cid},7.082MB / 41.96GB
+{mon_short_cid},35.91MB / 41.96GB"""
+
+    ps_out = f"""{mon_cid},ceph-{FSID}-mon-host1
+{mgr_cid},ceph-{FSID}-mgr-host1-pntmho
+{crash_cid},ceph-{FSID}-crash-host1"""
+
+    def _fake_call(ctx, cmd, desc=None, verbosity=_cephadm.CallVerbosity.VERBOSE_ON_FAILURE, timeout=_cephadm.DEFAULT_TIMEOUT, **kwargs):
+        if 'stats' in cmd:
+            return (mem_out, '', 0)
+        elif 'ps' in cmd:
+            return (ps_out, '', 0)
+        return ('out', 'err', 0)
+
+    cephadm_fs.create_dir(AGENT_DIR)
+    cephadm_fs.create_dir(f'/var/lib/ceph/mon/ceph-host1')  # legacy daemon
+    cephadm_fs.create_dir(f'/var/lib/ceph/osd/nothing')  # improper directory, should be skipped
+    cephadm_fs.create_dir(f'/var/lib/ceph/{FSID}/mgr.host1.pntmho')  # cephadm daemon
+    cephadm_fs.create_dir(f'/var/lib/ceph/{FSID}/crash.host1')  # cephadm daemon
+
+    with with_cephadm_ctx([]) as ctx:
+        ctx.fsid = FSID
+        agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+        _cephadm.call.side_effect = _fake_call
+        daemons = agent._daemon_ls_subset()
+
+        assert 'agent.host1' in daemons
+        assert 'mgr.host1.pntmho' in daemons
+        assert 'crash.host1' in daemons
+        assert 'mon.host1' in daemons
+
+        assert daemons['mon.host1']['style'] == 'legacy'
+        assert daemons['mgr.host1.pntmho']['style'] == 'cephadm:v1'
+        assert daemons['crash.host1']['style'] == 'cephadm:v1'
+        assert daemons['agent.host1']['style'] == 'cephadm:v1'
+
+        assert daemons['mgr.host1.pntmho']['systemd_unit'] == f'ceph-{FSID}@mgr.host1.pntmho'
+        assert daemons['agent.host1']['systemd_unit'] == f'ceph-{FSID}@agent.host1'
+        assert daemons['crash.host1']['systemd_unit'] == f'ceph-{FSID}@crash.host1'
+
+        assert daemons['mgr.host1.pntmho']['container_id'] == mgr_cid
+        assert daemons['crash.host1']['container_id'] == crash_cid
+
+        assert daemons['mgr.host1.pntmho']['memory_usage'] == 478570086  # 456.4 MB
+        assert daemons['crash.host1']['memory_usage'] == 7426015  # 7.082 MB
+
+
+@mock.patch("cephadm.list_daemons")
+@mock.patch("cephadm.CephadmAgent._daemon_ls_subset")
+def test_agent_get_ls(_ls_subset, _ls, cephadm_fs):
+    ls_out = [{
+        "style": "cephadm:v1",
+        "name": "mgr.host1.pntmho",
+        "fsid": FSID,
+        "systemd_unit": f"ceph-{FSID}@mgr.host1.pntmho",
+        "enabled": True,
+        "state": "running",
+        "service_name": "mgr",
+        "memory_request": None,
+        "memory_limit": None,
+        "ports": [
+            9283,
+            8765
+        ],
+        "container_id": "3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e70660340cd1f",
+        "container_image_name": "quay.io/ceph/ceph:testing",
+        "container_image_id": "3300e39269f0c13ae45026cf233d8b3fff1303d52f2598a69c7fba0bb8405164",
+        "container_image_digests": [
+            "quay.io/ceph/ceph@sha256:d4f3522528ee79904f9e530bdce438acac30a039e9a0b3cf31d8b614f9f96a30"
+        ],
+        "memory_usage": 507510784,
+        "cpu_percentage": "5.95%",
+        "version": "18.0.0-556-gb4d1a199",
+        "started": "2022-10-27T14:19:36.086664Z",
+        "created": "2022-10-27T14:19:36.282281Z",
+        "deployed": "2022-10-27T14:19:35.377275Z",
+        "configured": "2022-10-27T14:22:40.316912Z"
+    },{
+        "style": "cephadm:v1",
+        "name": "agent.host1",
+        "fsid": FSID,
+        "systemd_unit": f"ceph-{FSID}@agent.host1",
+        "enabled": True,
+        "state": "running",
+        "service_name": "agent",
+        "ports": [],
+        "ip": None,
+        "deployed_by": [
+            "quay.io/ceph/ceph@sha256:d4f3522528ee79904f9e530bdce438acac30a039e9a0b3cf31d8b614f9f96a30"
+        ],
+        "rank": None,
+        "rank_generation": None,
+        "extra_container_args": None,
+        "container_id": None,
+        "container_image_name": None,
+        "container_image_id": None,
+        "container_image_digests": None,
+        "version": None,
+        "started": None,
+        "created": "2022-10-27T19:46:49.751594Z",
+        "deployed": None,
+        "configured": "2022-10-27T19:46:49.751594Z"
+    }, {
+        "style": "legacy",
+        "name": "mon.host1",
+        "fsid": FSID,
+        "systemd_unit": "ceph-mon@host1",
+        "enabled": False,
+        "state": "stopped",
+        "host_version": None
+    }]
+
+    ls_subset_out = {
+    'mgr.host1.pntmho': {
+        "style": "cephadm:v1",
+        "fsid": FSID,
+        "systemd_unit": f"ceph-{FSID}@mgr.host1.pntmho",
+        "enabled": True,
+        "state": "running",
+        "container_id": "3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e70660340cd1f",
+        "memory_usage": 507510784,
+    },
+    'agent.host1': {
+        "style": "cephadm:v1",
+        "fsid": FSID,
+        "systemd_unit": f"ceph-{FSID}@agent.host1",
+        "enabled": True,
+        "state": "running",
+        "container_id": None
+    }, 'mon.host1': {
+        "style": "legacy",
+        "name": "mon.host1",
+        "fsid": FSID,
+        "systemd_unit": "ceph-mon@host1",
+        "enabled": False,
+        "state": "stopped",
+        "host_version": None
+    }}
+
+    _ls.return_value = ls_out
+    _ls_subset.return_value = ls_subset_out
+
+    with with_cephadm_ctx([]) as ctx:
+        ctx.fsid = FSID
+        agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+
+        # first pass, no cached daemon metadata
+        daemons, changed = agent._get_ls()
+        assert daemons == ls_out
+        assert changed
+
+        # second pass, should recognize that daemons have not changed and just keep cached values
+        daemons, changed = agent._get_ls()
+        assert daemons == daemons
+        assert not changed
+
+        # change a container id so it needs to get more info
+        ls_subset_out2 = copy.deepcopy(ls_subset_out)
+        ls_out2 = copy.deepcopy(ls_out)
+        ls_subset_out2['mgr.host1.pntmho']['container_id'] = '3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e7066034aaaaa'
+        ls_out2[0]['container_id'] = '3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e7066034aaaaa'
+        _ls.return_value = ls_out2
+        _ls_subset.return_value = ls_subset_out2
+        assert agent.cached_ls_values['mgr.host1.pntmho']['container_id'] == "3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e70660340cd1f"
+        daemons, changed = agent._get_ls()
+        assert daemons == ls_out2
+        assert changed
+
+        # run again with the same data so it should use cached values
+        daemons, changed = agent._get_ls()
+        assert daemons == ls_out2
+        assert not changed
+
+        # change the state of a container so new daemon metadata is needed
+        ls_subset_out3 = copy.deepcopy(ls_subset_out2)
+        ls_out3 = copy.deepcopy(ls_out2)
+        ls_subset_out3['mgr.host1.pntmho']['enabled'] = False
+        ls_out3[0]['enabled'] = False
+        _ls.return_value = ls_out3
+        _ls_subset.return_value = ls_subset_out3
+        assert agent.cached_ls_values['mgr.host1.pntmho']['enabled'] == True
+        daemons, changed = agent._get_ls()
+        assert daemons == ls_out3
+        assert changed
+
+        # run again with the same data so it should use cached values
+        daemons, changed = agent._get_ls()
+        assert daemons == ls_out3
+        assert not changed
+
+        # remove a daemon so new metadats is needed
+        ls_subset_out4 = copy.deepcopy(ls_subset_out3)
+        ls_out4 = copy.deepcopy(ls_out3)
+        ls_subset_out4.pop('mon.host1')
+        ls_out4.pop()
+        _ls.return_value = ls_out4
+        _ls_subset.return_value = ls_subset_out4
+        assert 'mon.host1' in agent.cached_ls_values
+        daemons, changed = agent._get_ls()
+        assert daemons == ls_out4
+        assert changed
+
+        # run again with the same data so it should use cached values
+        daemons, changed = agent._get_ls()
+        assert daemons == ls_out4
+        assert not changed
+
+
+@mock.patch("threading.Event.clear")
+@mock.patch("threading.Event.wait")
+@mock.patch("urllib.request.Request.__init__")
+@mock.patch("cephadm.urlopen")
+@mock.patch("cephadm.list_networks")
+@mock.patch("cephadm.HostFacts.dump")
+@mock.patch("cephadm.HostFacts.__init__", lambda _, __: None)
+@mock.patch("ssl.SSLContext.load_verify_locations")
+@mock.patch("threading.Thread.is_alive")
+@mock.patch("cephadm.MgrListener.start")
+@mock.patch("cephadm.AgentGatherer.start")
+@mock.patch("cephadm.port_in_use")
+@mock.patch("cephadm.CephadmAgent.pull_conf_settings")
+def test_agent_run(_pull_conf_settings, _port_in_use, _gatherer_start,
+                   _listener_start, _is_alive, _load_verify_locations,
+                    _HF_dump, _list_networks, _urlopen, _RQ_init, _wait, _clear):
+    target_ip = '192.168.0.0'
+    target_port = '9999'
+    refresh_period = 20
+    listener_port = 7770
+    open_listener_port = 7777
+    host = AGENT_ID
+    device_enhanced_scan = False
+
+    def _fake_port_in_use(ctx, endpoint):
+        if endpoint.port == open_listener_port:
+            return False
+        return True
+
+    network_data: Dict[str, Dict[str, Set[str]]] = {
+        "10.2.1.0/24": {
+            "eth1": set(["10.2.1.122"])
+        },
+        "192.168.122.0/24": {
+            "eth0": set(["192.168.122.221"])
+        },
+        "fe80::/64": {
+            "eth0": set(["fe80::5054:ff:fe3f:d94e"]),
+            "eth1": set(["fe80::5054:ff:fe3f:aa4a"]),
+        }
+    }
+
+    # the json serializable version of the networks data
+    # we expect the agent to actually send
+    network_data_no_sets: Dict[str, Dict[str, List[str]]] = {
+        "10.2.1.0/24": {
+            "eth1": ["10.2.1.122"]
+        },
+        "192.168.122.0/24": {
+            "eth0": ["192.168.122.221"]
+        },
+        "fe80::/64": {
+            "eth0": ["fe80::5054:ff:fe3f:d94e"],
+            "eth1": ["fe80::5054:ff:fe3f:aa4a"],
+        }
+    }
+
+    class FakeHTTPResponse():
+        def __init__(self):
+            pass
+
+        def __enter__(self):
+            return self
+
+        def __exit__(self, type, value, tb):
+            pass
+
+        def read(self):
+            return json.dumps({'valid': 'output', 'result': '400'})
+
+    _port_in_use.side_effect = _fake_port_in_use
+    _is_alive.return_value = False
+    _HF_dump.return_value = 'Host Facts'
+    _list_networks.return_value = network_data
+    _urlopen.side_effect = lambda *args, **kwargs: FakeHTTPResponse()
+    _RQ_init.side_effect = lambda *args, **kwargs: None
+    with with_cephadm_ctx([]) as ctx:
+        ctx.fsid = FSID
+        agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+        agent.keyring = 'agent keyring'
+        agent.ack = 7
+        agent.volume_gatherer.ack = 7
+        agent.volume_gatherer.data = 'ceph-volume inventory data'
+        agent.ls_gatherer.ack = 7
+        agent.ls_gatherer.data = [{'valid_daemon': 'valid_metadata'}]
+
+        def _set_conf():
+            agent.target_ip = target_ip
+            agent.target_port = target_port
+            agent.loop_interval = refresh_period
+            agent.starting_port = listener_port
+            agent.host = host
+            agent.device_enhanced_scan = device_enhanced_scan
+        _pull_conf_settings.side_effect = _set_conf
+
+        # technically the run function loops forever unless the agent
+        # is told to stop. To get around that we're going to have the
+        # event.wait() (which happens at the end of the loop) to throw
+        # a special exception type. If we catch this exception we can
+        # consider it as being a "success" run
+        class EventCleared(Exception):
+            pass
+
+        _clear.side_effect = EventCleared('SUCCESS')
+        with pytest.raises(EventCleared, match='SUCCESS'):
+            agent.run()
+
+        expected_data = {
+           'host': host,
+           'ls': [{'valid_daemon': 'valid_metadata'}],
+           'networks': network_data_no_sets,
+           'facts': 'Host Facts',
+           'volume': 'ceph-volume inventory data',
+           'ack': str(7),
+           'keyring': 'agent keyring',
+           'port': str(open_listener_port)
+        }
+        _RQ_init.assert_called_with(
+            f'https://{target_ip}:{target_port}/data/',
+            json.dumps(expected_data).encode('ascii'),
+            {'Content-Type': 'application/json'}
+        )
+        _listener_start.assert_called()
+        _gatherer_start.assert_called()
+        _urlopen.assert_called()
+
+        # agent should not go down if connections fail
+        _urlopen.side_effect = Exception()
+        with pytest.raises(EventCleared, match='SUCCESS'):
+            agent.run()
+
+        # should fail if no ports are open for listener
+        _port_in_use.side_effect = lambda _, __: True
+        agent.listener_port = None
+        with pytest.raises(Exception, match='Failed to pick port for agent to listen on: All 1000 ports starting at 7770 taken.'):
+            agent.run()
+
+
+@mock.patch("cephadm.CephadmAgent.pull_conf_settings")
+@mock.patch("cephadm.CephadmAgent.wakeup")
+def test_mgr_listener_handle_json_payload(_agent_wakeup, _pull_conf_settings, cephadm_fs):
+    with with_cephadm_ctx([]) as ctx:
+        ctx.fsid = FSID
+        agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+        cephadm_fs.create_dir(AGENT_DIR)
+
+        data_no_config = {
+            'counter': 7
+        }
+        agent.mgr_listener.handle_json_payload(data_no_config)
+        _agent_wakeup.assert_not_called()
+        _pull_conf_settings.assert_not_called()
+        assert not any(os.path.exists(os.path.join(AGENT_DIR, s)) for s in agent.required_files)
+
+        data_with_config = {
+            'counter': 7,
+            'config': {
+                'unrequired-file': 'unrequired-text'
+            }
+        }
+        data_with_config['config'].update({s: f'{s} text' for s in agent.required_files if s != agent.required_files[2]})
+        agent.mgr_listener.handle_json_payload(data_with_config)
+        _agent_wakeup.assert_called()
+        _pull_conf_settings.assert_called()
+        assert all(os.path.exists(os.path.join(AGENT_DIR, s)) for s in agent.required_files if s != agent.required_files[2])
+        assert not os.path.exists(os.path.join(AGENT_DIR, agent.required_files[2]))
+        assert not os.path.exists(os.path.join(AGENT_DIR, 'unrequired-file'))
+
+
+@mock.patch("socket.socket")
+@mock.patch("ssl.SSLContext.wrap_socket")
+@mock.patch("cephadm.MgrListener.handle_json_payload")
+@mock.patch("ssl.SSLContext.load_verify_locations")
+@mock.patch("ssl.SSLContext.load_cert_chain")
+def test_mgr_listener_run(_load_cert_chain, _load_verify_locations, _handle_json_payload,
+                          _wrap_context, _socket, cephadm_fs):
+
+    with with_cephadm_ctx([]) as ctx:
+        ctx.fsid = FSID
+        agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+        cephadm_fs.create_dir(AGENT_DIR)
+
+        payload = json.dumps({'counter': 3,
+                              'config': {s: f'{s} text' for s in agent.required_files if s != agent.required_files[1]}})
+
+        class FakeSocket:
+
+            def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, fileno=None):
+                self.family = family
+                self.type = type
+
+            def bind(*args, **kwargs):
+                return
+
+            def settimeout(*args, **kwargs):
+                return
+
+            def listen(*args, **kwargs):
+                return
+
+        class FakeSecureSocket:
+
+            def __init__(self, pload):
+                self.payload = pload
+                self._conn = FakeConn(self.payload)
+                self.accepted = False
+
+            def accept(self):
+                # to make mgr listener run loop stop running,
+                # set it to stop after accepting a "connection"
+                # on our fake socket so only one iteration of the loop
+                # actually happens
+                agent.mgr_listener.stop = True
+                accepted = True
+                return self._conn, None
+
+            def load_cert_chain(*args, **kwargs):
+                return
+
+            def load_verify_locations(*args, **kwargs):
+                return
+
+        class FakeConn:
+
+            def __init__(self, payload: str = ''):
+                payload_len_str = str(len(payload.encode('utf-8')))
+                while len(payload_len_str.encode('utf-8')) < 10:
+                    payload_len_str = '0' + payload_len_str
+                self.payload = (payload_len_str + payload).encode('utf-8')
+                self.buffer_len = len(self.payload)
+
+            def recv(self, len: Optional[int] = None):
+                if not len or len >= self.buffer_len:
+                    ret = self.payload
+                    self.payload = b''
+                    self.buffer_len = 0
+                    return ret
+                else:
+                    ret = self.payload[:len]
+                    self.payload = self.payload[len:]
+                    self.buffer_len = self.buffer_len - len
+                    return ret
+
+        FSS_good_data = FakeSecureSocket(payload)
+        FSS_bad_json = FakeSecureSocket('bad json')
+        _socket = FakeSocket
+        agent.listener_port = 7777
+
+        # first run, should successfully receive properly structured json payload
+        _wrap_context.side_effect = [FSS_good_data]
+        agent.mgr_listener.stop = False
+        FakeConn.send = mock.Mock(return_value=None)
+        agent.mgr_listener.run()
+
+        # verify payload was correctly extracted
+        assert _handle_json_payload.called_with(json.loads(payload))
+        FakeConn.send.assert_called_once_with(b'ACK')
+
+        # second run, with bad json data received
+        _wrap_context.side_effect = [FSS_bad_json]
+        agent.mgr_listener.stop = False
+        FakeConn.send = mock.Mock(return_value=None)
+        agent.mgr_listener.run()
+        FakeConn.send.assert_called_once_with(b'Failed to extract json payload from message: Expecting value: line 1 column 1 (char 0)')
+
+        # third run, no proper length as beginning og payload
+        FSS_no_length = FakeSecureSocket(payload)
+        FSS_no_length.payload = FSS_no_length.payload[10:]
+        FSS_no_length._conn.payload = FSS_no_length._conn.payload[10:]
+        FSS_no_length._conn.buffer_len -= 10
+        _wrap_context.side_effect = [FSS_no_length]
+        agent.mgr_listener.stop = False
+        FakeConn.send = mock.Mock(return_value=None)
+        agent.mgr_listener.run()
+        FakeConn.send.assert_called_once_with(b'Failed to extract length of payload from message: invalid literal for int() with base 10: \'{"counter"\'')
+
+        # some exception handling for full coverage
+        FSS_exc_testing = FakeSecureSocket(payload)
+        FSS_exc_testing.accept = mock.MagicMock()
+
+        def _accept(*args, **kwargs):
+            if not FSS_exc_testing.accepted:
+                FSS_exc_testing.accepted = True
+                raise socket.timeout()
+            else:
+                agent.mgr_listener.stop = True
+                raise Exception()
+
+        FSS_exc_testing.accept.side_effect = _accept
+        _wrap_context.side_effect = [FSS_exc_testing]
+        agent.mgr_listener.stop = False
+        FakeConn.send = mock.Mock(return_value=None)
+        agent.mgr_listener.run()
+        FakeConn.send.assert_not_called()
+        FSS_exc_testing.accept.call_count == 3
+
+
+@mock.patch("cephadm.CephadmAgent._get_ls")
+def test_gatherer_update_func(_get_ls, cephadm_fs):
+    with with_cephadm_ctx([]) as ctx:
+        ctx.fsid = FSID
+        agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+        cephadm_fs.create_dir(AGENT_DIR)
+
+        def _sample_func():
+            return 7
+
+        agent.ls_gatherer.func()
+        _get_ls.assert_called()
+
+        _get_ls = mock.MagicMock()
+        agent.ls_gatherer.update_func(_sample_func)
+        out = agent.ls_gatherer.func()
+        assert out == 7
+        _get_ls.assert_not_called()
+
+
+@mock.patch("cephadm.CephadmAgent.wakeup")
+@mock.patch("time.monotonic")
+@mock.patch("threading.Event.wait")
+def test_gatherer_run(_wait, _time, _agent_wakeup, cephadm_fs):
+    with with_cephadm_ctx([]) as ctx:
+        ctx.fsid = FSID
+        agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+        cephadm_fs.create_dir(AGENT_DIR)
+        agent.loop_interval = 30
+        agent.ack = 23
+
+        _sample_func = lambda *args, **kwargs: ('sample out', True)
+        agent.ls_gatherer.update_func(_sample_func)
+        agent.ls_gatherer.ack = 20
+        agent.ls_gatherer.stop = False
+
+        def _fake_clear(*args, **kwargs):
+            agent.ls_gatherer.stop = True
+
+        _time.side_effect = [0, 20, 0, 20, 0, 20]  # start at time 0, complete at time 20
+        _wait.return_value = None
+
+        with mock.patch("threading.Event.clear") as _clear:
+            _clear.side_effect = _fake_clear
+            agent.ls_gatherer.run()
+
+            _wait.assert_called_with(10)  # agent loop_interval - run time
+            assert agent.ls_gatherer.data == 'sample out'
+            assert agent.ls_gatherer.ack == 23
+            _agent_wakeup.assert_called_once()
+            _clear.assert_called_once()
+
+        _exc_func = lambda *args, **kwargs: Exception()
+        agent.ls_gatherer.update_func(_exc_func)
+        agent.ls_gatherer.ack = 20
+        agent.ls_gatherer.stop = False
+
+        with mock.patch("threading.Event.clear") as _clear:
+            _clear.side_effect = _fake_clear
+            agent.ls_gatherer.run()
+            assert agent.ls_gatherer.data is None
+            assert agent.ls_gatherer.ack == agent.ack
+            # should have run full loop despite exception
+            _clear.assert_called_once()
+
+        # test general exception for full coverage
+        _agent_wakeup.side_effect = [Exception()]
+        agent.ls_gatherer.update_func(_sample_func)
+        agent.ls_gatherer.stop = False
+        # just to force only one iteration
+        _time.side_effect = _fake_clear
+        with mock.patch("threading.Event.clear") as _clear:
+            _clear.side_effect = Exception()
+            agent.ls_gatherer.run()
+            assert agent.ls_gatherer.data == 'sample out'
+            assert agent.ls_gatherer.ack == agent.ack
+            # should not have gotten to end of loop
+            _clear.assert_not_called()
+
+
+@mock.patch("cephadm.CephadmAgent.run")
+def test_command_agent(_agent_run, cephadm_fs):
+    with with_cephadm_ctx([]) as ctx:
+        ctx.fsid = FSID
+        ctx.daemon_id = AGENT_ID
+
+        with pytest.raises(Exception, match=f"Agent daemon directory {AGENT_DIR} does not exist. Perhaps agent was never deployed?"):
+            _cephadm.command_agent(ctx)
+
+        cephadm_fs.create_dir(AGENT_DIR)
+        _cephadm.command_agent(ctx)
+        _agent_run.assert_called()
diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py
new file mode 100644
index 000000000..d310215f6
--- /dev/null
+++ b/src/cephadm/tests/test_cephadm.py
@@ -0,0 +1,2708 @@
+# type: ignore
+
+import errno
+import json
+import mock
+import os
+import pytest
+import socket
+import unittest
+from textwrap import dedent
+
+from .fixtures import (
+    cephadm_fs,
+    mock_docker,
+    mock_podman,
+    with_cephadm_ctx,
+    mock_bad_firewalld,
+    import_cephadm,
+)
+
+from pyfakefs import fake_filesystem
+from pyfakefs import fake_filesystem_unittest
+
+_cephadm = import_cephadm()
+
+
+def get_ceph_conf(
+        fsid='00000000-0000-0000-0000-0000deadbeef',
+        mon_host='[v2:192.168.1.1:3300/0,v1:192.168.1.1:6789/0]'):
+    return f'''
+# minimal ceph.conf for {fsid}
+[global]
+        fsid = {fsid}
+        mon_host = {mon_host}
+'''
+
+class TestCephAdm(object):
+
+    def test_docker_unit_file(self):
+        ctx = _cephadm.CephadmContext()
+        ctx.container_engine = mock_docker()
+        r = _cephadm.get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
+        assert 'Requires=docker.service' in r
+        ctx.container_engine = mock_podman()
+        r = _cephadm.get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
+        assert 'Requires=docker.service' not in r
+
+    @mock.patch('cephadm.logger')
+    def test_attempt_bind(self, _logger):
+        ctx = None
+        address = None
+        port = 0
+
+        def os_error(errno):
+            _os_error = OSError()
+            _os_error.errno = errno
+            return _os_error
+
+        for side_effect, expected_exception in (
+            (os_error(errno.EADDRINUSE), _cephadm.PortOccupiedError),
+            (os_error(errno.EAFNOSUPPORT), OSError),
+            (os_error(errno.EADDRNOTAVAIL), OSError),
+            (None, None),
+        ):
+            _socket = mock.Mock()
+            _socket.bind.side_effect = side_effect
+            try:
+                _cephadm.attempt_bind(ctx, _socket, address, port)
+            except Exception as e:
+                assert isinstance(e, expected_exception)
+            else:
+                if expected_exception is not None:
+                    assert False
+
+    @mock.patch('cephadm.attempt_bind')
+    @mock.patch('cephadm.logger')
+    def test_port_in_use(self, _logger, _attempt_bind):
+        empty_ctx = None
+
+        assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('0.0.0.0', 9100)) == False
+
+        _attempt_bind.side_effect = _cephadm.PortOccupiedError('msg')
+        assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('0.0.0.0', 9100)) == True
+
+        os_error = OSError()
+        os_error.errno = errno.EADDRNOTAVAIL
+        _attempt_bind.side_effect = os_error
+        assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('0.0.0.0', 9100)) == False
+
+        os_error = OSError()
+        os_error.errno = errno.EAFNOSUPPORT
+        _attempt_bind.side_effect = os_error
+        assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('0.0.0.0', 9100)) == False
+
+    @mock.patch('cephadm.socket.socket.bind')
+    @mock.patch('cephadm.logger')
+    def test_port_in_use_special_cases(self, _logger, _bind):
+        # port_in_use has special handling for
+        # EAFNOSUPPORT and EADDRNOTAVAIL errno OSErrors.
+        # If we get those specific errors when attempting
+        # to bind to the ip:port we should not say the
+        # port is in use
+
+        def os_error(errno):
+            _os_error = OSError()
+            _os_error.errno = errno
+            return _os_error
+
+        _bind.side_effect = os_error(errno.EADDRNOTAVAIL)
+        in_use = _cephadm.port_in_use(None, _cephadm.EndPoint('1.2.3.4', 10000))
+        assert in_use == False
+
+        _bind.side_effect = os_error(errno.EAFNOSUPPORT)
+        in_use = _cephadm.port_in_use(None, _cephadm.EndPoint('1.2.3.4', 10000))
+        assert in_use == False
+
+        # this time, have it raise the actual port taken error
+        # so it should report the port is in use
+        _bind.side_effect = os_error(errno.EADDRINUSE)
+        in_use = _cephadm.port_in_use(None, _cephadm.EndPoint('1.2.3.4', 10000))
+        assert in_use == True
+
+    @mock.patch('cephadm.attempt_bind')
+    @mock.patch('cephadm.logger')
+    def test_port_in_use_with_specific_ips(self, _logger, _attempt_bind):
+        empty_ctx = None
+
+        def _fake_attempt_bind(ctx, s: socket.socket, addr: str, port: int) -> None:
+            occupied_error = _cephadm.PortOccupiedError('msg')
+            if addr.startswith('200'):
+                raise occupied_error
+            if addr.startswith('100'):
+                if port == 4567:
+                    raise occupied_error
+
+        _attempt_bind.side_effect = _fake_attempt_bind
+
+        assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('200.0.0.0', 9100)) == True
+        assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('100.0.0.0', 9100)) == False
+        assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('100.0.0.0', 4567)) == True
+        assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('155.0.0.0', 4567)) == False
+
+    @mock.patch('socket.socket')
+    @mock.patch('cephadm.logger')
+    def test_check_ip_port_success(self, _logger, _socket):
+        ctx = _cephadm.CephadmContext()
+        ctx.skip_ping_check = False  # enables executing port check with `check_ip_port`
+
+        for address, address_family in (
+            ('0.0.0.0', socket.AF_INET),
+            ('::', socket.AF_INET6),
+        ):
+            try:
+                _cephadm.check_ip_port(ctx, _cephadm.EndPoint(address, 9100))
+            except:
+                assert False
+            else:
+                assert _socket.call_args == mock.call(address_family, socket.SOCK_STREAM)
+
+    @mock.patch('socket.socket')
+    @mock.patch('cephadm.logger')
+    def test_check_ip_port_failure(self, _logger, _socket):
+        ctx = _cephadm.CephadmContext()
+        ctx.skip_ping_check = False  # enables executing port check with `check_ip_port`
+
+        def os_error(errno):
+            _os_error = OSError()
+            _os_error.errno = errno
+            return _os_error
+
+        for address, address_family in (
+            ('0.0.0.0', socket.AF_INET),
+            ('::', socket.AF_INET6),
+        ):
+            for side_effect, expected_exception in (
+                (os_error(errno.EADDRINUSE), _cephadm.PortOccupiedError),
+                (os_error(errno.EADDRNOTAVAIL), OSError),
+                (os_error(errno.EAFNOSUPPORT), OSError),
+                (None, None),
+            ):
+                mock_socket_obj = mock.Mock()
+                mock_socket_obj.bind.side_effect = side_effect
+                _socket.return_value = mock_socket_obj
+                try:
+                    _cephadm.check_ip_port(ctx, _cephadm.EndPoint(address, 9100))
+                except Exception as e:
+                    assert isinstance(e, expected_exception)
+                else:
+                    if side_effect is not None:
+                        assert False
+
+
+    def test_is_not_fsid(self):
+        assert not _cephadm.is_fsid('no-uuid')
+
+    def test_is_fsid(self):
+        assert _cephadm.is_fsid('e863154d-33c7-4350-bca5-921e0467e55b')
+
+    def test__get_parser_image(self):
+        args = _cephadm._parse_args(['--image', 'foo', 'version'])
+        assert args.image == 'foo'
+
+    def test_check_required_global_args(self):
+        ctx = _cephadm.CephadmContext()
+        mock_fn = mock.Mock()
+        mock_fn.return_value = 0
+        require_image = _cephadm.require_image(mock_fn)
+
+        with pytest.raises(_cephadm.Error, match='This command requires the global --image option to be set'):
+            require_image(ctx)
+
+        ctx.image = 'sample-image'
+        require_image(ctx)
+
+    @mock.patch('cephadm.logger')
+    def test_parse_mem_usage(self, _logger):
+        len, summary = _cephadm._parse_mem_usage(0, 'c6290e3f1489,-- / --')
+        assert summary == {}
+
+    def test_CustomValidation(self):
+        assert _cephadm._parse_args(['deploy', '--name', 'mon.a', '--fsid', 'fsid'])
+
+        with pytest.raises(SystemExit):
+            _cephadm._parse_args(['deploy', '--name', 'wrong', '--fsid', 'fsid'])
+
+    @pytest.mark.parametrize("test_input, expected", [
+        ("1.6.2", (1,6,2)),
+        ("1.6.2-stable2", (1,6,2)),
+    ])
+    def test_parse_podman_version(self, test_input, expected):
+        assert _cephadm._parse_podman_version(test_input) == expected
+
+    def test_parse_podman_version_invalid(self):
+        with pytest.raises(ValueError) as res:
+            _cephadm._parse_podman_version('inval.id')
+        assert 'inval' in str(res.value)
+
+    @mock.patch('cephadm.logger')
+    def test_is_ipv6(self, _logger):
+        for good in ("[::1]", "::1",
+                     "fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"):
+            assert _cephadm.is_ipv6(good)
+        for bad in ("127.0.0.1",
+                    "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffg",
+                    "1:2:3:4:5:6:7:8:9", "fd00::1::1", "[fg::1]"):
+            assert not _cephadm.is_ipv6(bad)
+
+    def test_unwrap_ipv6(self):
+        def unwrap_test(address, expected):
+            assert _cephadm.unwrap_ipv6(address) == expected
+
+        tests = [
+            ('::1', '::1'), ('[::1]', '::1'),
+            ('[fde4:8dba:82e1:0:5054:ff:fe6a:357]', 'fde4:8dba:82e1:0:5054:ff:fe6a:357'),
+            ('can actually be any string', 'can actually be any string'),
+            ('[but needs to be stripped] ', '[but needs to be stripped] ')]
+        for address, expected in tests:
+            unwrap_test(address, expected)
+
+    def test_wrap_ipv6(self):
+        def wrap_test(address, expected):
+            assert _cephadm.wrap_ipv6(address) == expected
+
+        tests = [
+            ('::1', '[::1]'), ('[::1]', '[::1]'),
+            ('fde4:8dba:82e1:0:5054:ff:fe6a:357',
+             '[fde4:8dba:82e1:0:5054:ff:fe6a:357]'),
+            ('myhost.example.com', 'myhost.example.com'),
+            ('192.168.0.1', '192.168.0.1'),
+            ('', ''), ('fd00::1::1', 'fd00::1::1')]
+        for address, expected in tests:
+            wrap_test(address, expected)
+
+    @mock.patch('cephadm.Firewalld', mock_bad_firewalld)
+    @mock.patch('cephadm.logger')
+    def test_skip_firewalld(self, _logger, cephadm_fs):
+        """
+        test --skip-firewalld actually skips changing firewall
+        """
+
+        ctx = _cephadm.CephadmContext()
+        with pytest.raises(Exception):
+            _cephadm.update_firewalld(ctx, 'mon')
+
+        ctx.skip_firewalld = True
+        _cephadm.update_firewalld(ctx, 'mon')
+
+        ctx.skip_firewalld = False
+        with pytest.raises(Exception):
+            _cephadm.update_firewalld(ctx, 'mon')
+
+        ctx = _cephadm.CephadmContext()
+        ctx.ssl_dashboard_port = 8888
+        ctx.dashboard_key = None
+        ctx.dashboard_password_noupdate = True
+        ctx.initial_dashboard_password = 'password'
+        ctx.initial_dashboard_user = 'User'
+        with pytest.raises(Exception):
+            _cephadm.prepare_dashboard(ctx, 0, 0, lambda _, extra_mounts=None, ___=None : '5', lambda : None)
+
+        ctx.skip_firewalld = True
+        _cephadm.prepare_dashboard(ctx, 0, 0, lambda _, extra_mounts=None, ___=None : '5', lambda : None)
+
+        ctx.skip_firewalld = False
+        with pytest.raises(Exception):
+            _cephadm.prepare_dashboard(ctx, 0, 0, lambda _, extra_mounts=None, ___=None : '5', lambda : None)
+
+    @mock.patch('cephadm.logger')
+    @mock.patch('cephadm.fetch_custom_config_files')
+    @mock.patch('cephadm.get_container')
+    def test_get_deployment_container(self, _get_container, _get_config, _logger):
+        """
+        test get_deployment_container properly makes use of extra container args and custom conf files
+        """
+
+        ctx = _cephadm.CephadmContext()
+        ctx.config_json = '-'
+        ctx.extra_container_args = [
+            '--pids-limit=12345',
+            '--something',
+        ]
+        ctx.data_dir = 'data'
+        _get_config.return_value = [
+            {
+                'mount_path': '/etc/testing.str',
+                'content': 'this\nis\na\nstring',
+            }
+        ]
+        _get_container.return_value = _cephadm.CephContainer.for_daemon(
+            ctx,
+            fsid='9b9d7609-f4d5-4aba-94c8-effa764d96c9',
+            daemon_type='grafana',
+            daemon_id='host1',
+            entrypoint='',
+            args=[],
+            container_args=[],
+            volume_mounts={},
+            bind_mounts=[],
+            envs=[],
+            privileged=False,
+            ptrace=False,
+            host_network=True,
+        )
+        c = _cephadm.get_deployment_container(ctx,
+                                    '9b9d7609-f4d5-4aba-94c8-effa764d96c9',
+                                    'grafana',
+                                    'host1',)
+
+        assert '--pids-limit=12345' in c.container_args
+        assert '--something' in c.container_args
+        assert os.path.join('data', '9b9d7609-f4d5-4aba-94c8-effa764d96c9', 'custom_config_files', 'grafana.host1', 'testing.str') in c.volume_mounts
+        assert c.volume_mounts[os.path.join('data', '9b9d7609-f4d5-4aba-94c8-effa764d96c9', 'custom_config_files', 'grafana.host1', 'testing.str')] == '/etc/testing.str'
+
+    @mock.patch('cephadm.logger')
+    @mock.patch('cephadm.FileLock')
+    @mock.patch('cephadm.deploy_daemon')
+    @mock.patch('cephadm.fetch_configs')
+    @mock.patch('cephadm.make_var_run')
+    @mock.patch('cephadm.migrate_sysctl_dir')
+    @mock.patch('cephadm.check_unit', lambda *args, **kwargs: (None, 'running', None))
+    @mock.patch('cephadm.get_unit_name', lambda *args, **kwargs: 'mon-unit-name')
+    @mock.patch('cephadm.get_deployment_container')
+    @mock.patch('cephadm.read_configuration_source', lambda c: {})
+    @mock.patch('cephadm.apply_deploy_config_to_ctx', lambda d, c: None)
+    @mock.patch('cephadm.extract_uid_gid', lambda *args, **kwargs: ('ceph', 'ceph'))
+    def test_mon_crush_location(self, _get_deployment_container, _migrate_sysctl, _make_var_run, _fetch_configs, _deploy_daemon, _file_lock, _logger):
+        """
+        test that crush location for mon is set if it is included in config_json
+        """
+
+        ctx = _cephadm.CephadmContext()
+        ctx.name = 'mon.test'
+        ctx.fsid = '9b9d7609-f4d5-4aba-94c8-effa764d96c9'
+        ctx.reconfig = False
+        ctx.container_engine = mock_docker()
+        ctx.allow_ptrace = True
+        ctx.config_json = '-'
+        ctx.osd_fsid = '0'
+        ctx.tcp_ports = '3300 6789'
+        _fetch_configs.return_value = {
+            'crush_location': 'database=a'
+        }
+
+        _get_deployment_container.return_value = _cephadm.CephContainer.for_daemon(
+            ctx,
+            fsid='9b9d7609-f4d5-4aba-94c8-effa764d96c9',
+            daemon_type='mon',
+            daemon_id='test',
+            entrypoint='',
+            args=[],
+            container_args=[],
+            volume_mounts={},
+            bind_mounts=[],
+            envs=[],
+            privileged=False,
+            ptrace=False,
+            host_network=True,
+        )
+
+        def _crush_location_checker(ctx, fsid, daemon_type, daemon_id, container, uid, gid, **kwargs):
+            print(container.args)
+            raise Exception(' '.join(container.args))
+
+        _deploy_daemon.side_effect = _crush_location_checker
+
+        with pytest.raises(Exception, match='--set-crush-location database=a'):
+            _cephadm.command_deploy_from(ctx)
+
+    @mock.patch('cephadm.logger')
+    @mock.patch('cephadm.fetch_custom_config_files')
+    def test_write_custom_conf_files(self, _get_config, _logger, cephadm_fs):
+        """
+        test _write_custom_conf_files writes the conf files correctly
+        """
+
+        ctx = _cephadm.CephadmContext()
+        ctx.config_json = '-'
+        ctx.data_dir = _cephadm.DATA_DIR
+        _get_config.return_value = [
+            {
+                'mount_path': '/etc/testing.str',
+                'content': 'this\nis\na\nstring',
+            },
+            {
+                'mount_path': '/etc/testing.conf',
+                'content': 'very_cool_conf_setting: very_cool_conf_value\nx: y',
+            },
+            {
+                'mount_path': '/etc/no-content.conf',
+            },
+        ]
+        _cephadm._write_custom_conf_files(ctx, 'mon', 'host1', 'fsid', 0, 0)
+        with open(os.path.join(_cephadm.DATA_DIR, 'fsid', 'custom_config_files', 'mon.host1', 'testing.str'), 'r') as f:
+            assert 'this\nis\na\nstring' == f.read()
+        with open(os.path.join(_cephadm.DATA_DIR, 'fsid', 'custom_config_files', 'mon.host1', 'testing.conf'), 'r') as f:
+            assert 'very_cool_conf_setting: very_cool_conf_value\nx: y' == f.read()
+        with pytest.raises(FileNotFoundError):
+            open(os.path.join(_cephadm.DATA_DIR, 'fsid', 'custom_config_files', 'mon.host1', 'no-content.conf'), 'r')
+
+    @mock.patch('cephadm.call_throws')
+    @mock.patch('cephadm.get_parm')
+    @mock.patch('cephadm.logger')
+    def test_registry_login(self, _logger, _get_parm, _call_throws):
+        # test normal valid login with url, username and password specified
+        _call_throws.return_value = '', '', 0
+        ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
+            ['registry-login', '--registry-url', 'sample-url',
+            '--registry-username', 'sample-user', '--registry-password',
+            'sample-pass'])
+        ctx.container_engine = mock_docker()
+        retval = _cephadm.command_registry_login(ctx)
+        assert retval == 0
+
+        # test bad login attempt with invalid arguments given
+        ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
+            ['registry-login', '--registry-url', 'bad-args-url'])
+        with pytest.raises(Exception) as e:
+            assert _cephadm.command_registry_login(ctx)
+        assert str(e.value) == ('Invalid custom registry arguments received. To login to a custom registry include '
+                                '--registry-url, --registry-username and --registry-password options or --registry-json option')
+
+        # test normal valid login with json file
+        _get_parm.return_value = {"url": "sample-url", "username": "sample-username", "password": "sample-password"}
+        ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
+            ['registry-login', '--registry-json', 'sample-json'])
+        ctx.container_engine = mock_docker()
+        retval = _cephadm.command_registry_login(ctx)
+        assert retval == 0
+
+        # test bad login attempt with bad json file
+        _get_parm.return_value = {"bad-json": "bad-json"}
+        ctx: _cephadm.CephadmContext =  _cephadm.cephadm_init_ctx(
+            ['registry-login', '--registry-json', 'sample-json'])
+        with pytest.raises(Exception) as e:
+            assert _cephadm.command_registry_login(ctx)
+        assert str(e.value) == ("json provided for custom registry login did not include all necessary fields. "
+                        "Please setup json file as\n"
+                        "{\n"
+                          " \"url\": \"REGISTRY_URL\",\n"
+                          " \"username\": \"REGISTRY_USERNAME\",\n"
+                          " \"password\": \"REGISTRY_PASSWORD\"\n"
+                        "}\n")
+
+        # test login attempt with valid arguments where login command fails
+        _call_throws.side_effect = Exception
+        ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
+            ['registry-login', '--registry-url', 'sample-url',
+            '--registry-username', 'sample-user', '--registry-password',
+            'sample-pass'])
+        with pytest.raises(Exception) as e:
+            _cephadm.command_registry_login(ctx)
+        assert str(e.value) == "Failed to login to custom registry @ sample-url as sample-user with given password"
+
+    def test_get_image_info_from_inspect(self):
+        # podman
+        out = """204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1,[docker.io/ceph/ceph@sha256:1cc9b824e1b076cdff52a9aa3f0cc8557d879fb2fbbba0cafed970aca59a3992]"""
+        r = _cephadm.get_image_info_from_inspect(out, 'registry/ceph/ceph:latest')
+        print(r)
+        assert r == {
+            'image_id': '204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1',
+            'repo_digests': ['docker.io/ceph/ceph@sha256:1cc9b824e1b076cdff52a9aa3f0cc8557d879fb2fbbba0cafed970aca59a3992']
+        }
+
+        # docker
+        out = """sha256:16f4549cf7a8f112bbebf7946749e961fbbd1b0838627fe619aab16bc17ce552,[quay.ceph.io/ceph-ci/ceph@sha256:4e13da36c1bd6780b312a985410ae678984c37e6a9493a74c87e4a50b9bda41f]"""
+        r = _cephadm.get_image_info_from_inspect(out, 'registry/ceph/ceph:latest')
+        assert r == {
+            'image_id': '16f4549cf7a8f112bbebf7946749e961fbbd1b0838627fe619aab16bc17ce552',
+            'repo_digests': ['quay.ceph.io/ceph-ci/ceph@sha256:4e13da36c1bd6780b312a985410ae678984c37e6a9493a74c87e4a50b9bda41f']
+        }
+
+        # multiple digests (podman)
+        out = """e935122ab143a64d92ed1fbb27d030cf6e2f0258207be1baf1b509c466aeeb42,[docker.io/prom/prometheus@sha256:e4ca62c0d62f3e886e684806dfe9d4e0cda60d54986898173c1083856cfda0f4 docker.io/prom/prometheus@sha256:efd99a6be65885c07c559679a0df4ec709604bcdd8cd83f0d00a1a683b28fb6a]"""
+        r = _cephadm.get_image_info_from_inspect(out, 'registry/prom/prometheus:latest')
+        assert r == {
+            'image_id': 'e935122ab143a64d92ed1fbb27d030cf6e2f0258207be1baf1b509c466aeeb42',
+            'repo_digests': [
+                'docker.io/prom/prometheus@sha256:e4ca62c0d62f3e886e684806dfe9d4e0cda60d54986898173c1083856cfda0f4',
+                'docker.io/prom/prometheus@sha256:efd99a6be65885c07c559679a0df4ec709604bcdd8cd83f0d00a1a683b28fb6a',
+            ]
+        }
+
+
+    def test_dict_get(self):
+        result = _cephadm.dict_get({'a': 1}, 'a', require=True)
+        assert result == 1
+        result = _cephadm.dict_get({'a': 1}, 'b')
+        assert result is None
+        result = _cephadm.dict_get({'a': 1}, 'b', default=2)
+        assert result == 2
+
+    def test_dict_get_error(self):
+        with pytest.raises(_cephadm.Error):
+            _cephadm.dict_get({'a': 1}, 'b', require=True)
+
+    def test_dict_get_join(self):
+        result = _cephadm.dict_get_join({'foo': ['a', 'b']}, 'foo')
+        assert result == 'a\nb'
+        result = _cephadm.dict_get_join({'foo': [1, 2]}, 'foo')
+        assert result == '1\n2'
+        result = _cephadm.dict_get_join({'bar': 'a'}, 'bar')
+        assert result == 'a'
+        result = _cephadm.dict_get_join({'a': 1}, 'a')
+        assert result == 1
+
+    @mock.patch('os.listdir', return_value=[])
+    @mock.patch('cephadm.logger')
+    def test_infer_local_ceph_image(self, _logger, _listdir):
+        ctx = _cephadm.CephadmContext()
+        ctx.fsid = '00000000-0000-0000-0000-0000deadbeez'
+        ctx.container_engine = mock_podman()
+
+        # make sure the right image is selected when container is found
+        cinfo = _cephadm.ContainerInfo('935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972',
+                                 'registry.hub.docker.com/rkachach/ceph:custom-v0.5',
+                                 '514e6a882f6e74806a5856468489eeff8d7106095557578da96935e4d0ba4d9d',
+                                 '2022-04-19 13:45:20.97146228 +0000 UTC',
+                                 '')
+        out = '''quay.ceph.io/ceph-ci/ceph@sha256:87f200536bb887b36b959e887d5984dd7a3f008a23aa1f283ab55d48b22c6185|dad864ee21e9|main|2022-03-23 16:29:19 +0000 UTC
+        quay.ceph.io/ceph-ci/ceph@sha256:b50b130fcda2a19f8507ddde3435bb4722266956e1858ac395c838bc1dcf1c0e|514e6a882f6e|pacific|2022-03-23 15:58:34 +0000 UTC
+        docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
+        with mock.patch('cephadm.call_throws', return_value=(out, '', '')):
+            with mock.patch('cephadm.get_container_info', return_value=cinfo):
+                image = _cephadm.infer_local_ceph_image(ctx, ctx.container_engine)
+                assert image == 'quay.ceph.io/ceph-ci/ceph@sha256:b50b130fcda2a19f8507ddde3435bb4722266956e1858ac395c838bc1dcf1c0e'
+
+        # make sure first valid image is used when no container_info is found
+        out = '''quay.ceph.io/ceph-ci/ceph@sha256:87f200536bb887b36b959e887d5984dd7a3f008a23aa1f283ab55d48b22c6185|dad864ee21e9|main|2022-03-23 16:29:19 +0000 UTC
+        quay.ceph.io/ceph-ci/ceph@sha256:b50b130fcda2a19f8507ddde3435bb4722266956e1858ac395c838bc1dcf1c0e|514e6a882f6e|pacific|2022-03-23 15:58:34 +0000 UTC
+        docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
+        with mock.patch('cephadm.call_throws', return_value=(out, '', '')):
+            with mock.patch('cephadm.get_container_info', return_value=None):
+                image = _cephadm.infer_local_ceph_image(ctx, ctx.container_engine)
+                assert image == 'quay.ceph.io/ceph-ci/ceph@sha256:87f200536bb887b36b959e887d5984dd7a3f008a23aa1f283ab55d48b22c6185'
+
+        # make sure images without digest are discarded (no container_info is found)
+        out = '''quay.ceph.io/ceph-ci/ceph@|||
+        docker.io/ceph/ceph@|||
+        docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
+        with mock.patch('cephadm.call_throws', return_value=(out, '', '')):
+            with mock.patch('cephadm.get_container_info', return_value=None):
+                image = _cephadm.infer_local_ceph_image(ctx, ctx.container_engine)
+                assert image == 'docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508'
+
+
+
+    @pytest.mark.parametrize('daemon_filter, by_name, daemon_list, container_stats, output',
+        [
+            # get container info by type ('mon')
+            (
+                'mon',
+                False,
+                [
+                    {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+                    {'name': 'mgr.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+                ],
+                ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,",
+                 "",
+                 0),
+                _cephadm.ContainerInfo('935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972',
+                                 'registry.hub.docker.com/rkachach/ceph:custom-v0.5',
+                                 '666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4',
+                                 '2022-04-19 13:45:20.97146228 +0000 UTC',
+                                 '')
+            ),
+            # get container info by name ('mon.ceph-node-0')
+            (
+                'mon.ceph-node-0',
+                True,
+                [
+                    {'name': 'mgr.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+                    {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+                ],
+                ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,",
+                 "",
+                 0),
+                _cephadm.ContainerInfo('935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972',
+                                 'registry.hub.docker.com/rkachach/ceph:custom-v0.5',
+                                 '666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4',
+                                 '2022-04-19 13:45:20.97146228 +0000 UTC',
+                                 '')
+            ),
+            # get container info by name (same daemon but two different fsids)
+            (
+                'mon.ceph-node-0',
+                True,
+                [
+                    {'name': 'mon.ceph-node-0', 'fsid': '10000000-0000-0000-0000-0000deadbeef'},
+                    {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+                ],
+                ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,",
+                 "",
+                 0),
+                _cephadm.ContainerInfo('935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972',
+                                 'registry.hub.docker.com/rkachach/ceph:custom-v0.5',
+                                 '666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4',
+                                 '2022-04-19 13:45:20.97146228 +0000 UTC',
+                                 '')
+            ),
+            # get container info by type (bad container stats: 127 code)
+            (
+                'mon',
+                False,
+                [
+                    {'name': 'mon.ceph-node-0', 'fsid': '00000000-FFFF-0000-0000-0000deadbeef'},
+                    {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+                ],
+                ("",
+                 "",
+                 127),
+                None
+            ),
+            # get container info by name (bad container stats: 127 code)
+            (
+                'mon.ceph-node-0',
+                True,
+                [
+                    {'name': 'mgr.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+                    {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+                ],
+                ("",
+                 "",
+                 127),
+                None
+            ),
+            # get container info by invalid name (doens't contain '.')
+            (
+                'mon-ceph-node-0',
+                True,
+                [
+                    {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+                    {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+                ],
+                ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,",
+                 "",
+                 0),
+                None
+            ),
+            # get container info by invalid name (empty)
+            (
+                '',
+                True,
+                [
+                    {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+                    {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+                ],
+                ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,",
+                 "",
+                 0),
+                None
+            ),
+            # get container info by invalid type (empty)
+            (
+                '',
+                False,
+                [
+                    {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+                    {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+                ],
+                ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,",
+                 "",
+                 0),
+                None
+            ),
+            # get container info by name: no match (invalid fsid)
+            (
+                'mon',
+                False,
+                [
+                    {'name': 'mon.ceph-node-0', 'fsid': '00000000-1111-0000-0000-0000deadbeef'},
+                    {'name': 'mon.ceph-node-0', 'fsid': '00000000-2222-0000-0000-0000deadbeef'},
+                ],
+                ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,",
+                 "",
+                 0),
+                None
+            ),
+            # get container info by name: no match
+            (
+                'mon.ceph-node-0',
+                True,
+                [],
+                None,
+                None
+            ),
+            # get container info by type: no match
+            (
+                'mgr',
+                False,
+                [],
+                None,
+                None
+            ),
+        ])
+    @mock.patch('cephadm.logger')
+    def test_get_container_info(self, _logger, daemon_filter, by_name, daemon_list, container_stats, output):
+        ctx = _cephadm.CephadmContext()
+        ctx.fsid = '00000000-0000-0000-0000-0000deadbeef'
+        ctx.container_engine = mock_podman()
+        with mock.patch('cephadm.list_daemons', return_value=daemon_list):
+            with mock.patch('cephadm.get_container_stats', return_value=container_stats):
+                assert _cephadm.get_container_info(ctx, daemon_filter, by_name) == output
+
+    def test_should_log_to_journald(self):
+        ctx = _cephadm.CephadmContext()
+        # explicit
+        ctx.log_to_journald = True
+        assert _cephadm.should_log_to_journald(ctx)
+
+        ctx.log_to_journald = None
+        # enable if podman support --cgroup=split
+        ctx.container_engine = mock_podman()
+        ctx.container_engine.version = (2, 1, 0)
+        assert _cephadm.should_log_to_journald(ctx)
+
+        # disable on old podman
+        ctx.container_engine.version = (2, 0, 0)
+        assert not _cephadm.should_log_to_journald(ctx)
+
+        # disable on docker
+        ctx.container_engine = mock_docker()
+        assert not _cephadm.should_log_to_journald(ctx)
+
+    def test_normalize_image_digest(self):
+        s = 'myhostname:5000/ceph/ceph@sha256:753886ad9049004395ae990fbb9b096923b5a518b819283141ee8716ddf55ad1'
+        assert _cephadm.normalize_image_digest(s) == s
+
+        s = 'ceph/ceph:latest'
+        assert _cephadm.normalize_image_digest(s) == f'{_cephadm.DEFAULT_REGISTRY}/{s}'
+
+    @pytest.mark.parametrize('fsid, ceph_conf, list_daemons, result, err, ',
+        [
+            (
+                None,
+                None,
+                [],
+                None,
+                None,
+            ),
+            (
+                '00000000-0000-0000-0000-0000deadbeef',
+                None,
+                [],
+                '00000000-0000-0000-0000-0000deadbeef',
+                None,
+            ),
+            (
+                '00000000-0000-0000-0000-0000deadbeef',
+                None,
+                [
+                    {'fsid': '10000000-0000-0000-0000-0000deadbeef'},
+                    {'fsid': '20000000-0000-0000-0000-0000deadbeef'},
+                ],
+                '00000000-0000-0000-0000-0000deadbeef',
+                None,
+            ),
+            (
+                None,
+                None,
+                [
+                    {'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+                ],
+                '00000000-0000-0000-0000-0000deadbeef',
+                None,
+            ),
+            (
+                None,
+                None,
+                [
+                    {'fsid': '10000000-0000-0000-0000-0000deadbeef'},
+                    {'fsid': '20000000-0000-0000-0000-0000deadbeef'},
+                ],
+                None,
+                r'Cannot infer an fsid',
+            ),
+            (
+                None,
+                get_ceph_conf(fsid='00000000-0000-0000-0000-0000deadbeef'),
+                [],
+                '00000000-0000-0000-0000-0000deadbeef',
+                None,
+            ),
+            (
+                None,
+                get_ceph_conf(fsid='00000000-0000-0000-0000-0000deadbeef'),
+                [
+                    {'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+                ],
+                '00000000-0000-0000-0000-0000deadbeef',
+                None,
+            ),
+            (
+                None,
+                get_ceph_conf(fsid='00000000-0000-0000-0000-0000deadbeef'),
+                [
+                    {'fsid': '10000000-0000-0000-0000-0000deadbeef'},
+                    {'fsid': '20000000-0000-0000-0000-0000deadbeef'},
+                ],
+                None,
+                r'Cannot infer an fsid',
+            ),
+        ])
+    @mock.patch('cephadm.call')
+    @mock.patch('cephadm.logger')
+    def test_infer_fsid(self, _logger, _call, fsid, ceph_conf, list_daemons, result, err, cephadm_fs):
+        # build the context
+        ctx = _cephadm.CephadmContext()
+        ctx.fsid = fsid
+
+        # mock the decorator
+        mock_fn = mock.Mock()
+        mock_fn.return_value = 0
+        infer_fsid = _cephadm.infer_fsid(mock_fn)
+
+        # mock the ceph.conf file content
+        if ceph_conf:
+            f = cephadm_fs.create_file('ceph.conf', contents=ceph_conf)
+            ctx.config = f.path
+
+        # test
+        with mock.patch('cephadm.list_daemons', return_value=list_daemons):
+            if err:
+                with pytest.raises(_cephadm.Error, match=err):
+                    infer_fsid(ctx)
+            else:
+                infer_fsid(ctx)
+            assert ctx.fsid == result
+
+    @pytest.mark.parametrize('fsid, other_conf_files, config, name, list_daemons, result, ',
+        [
+            # per cluster conf has more precedence than default conf
+            (
+                '00000000-0000-0000-0000-0000deadbeef',
+                [_cephadm.CEPH_DEFAULT_CONF],
+                None,
+                None,
+                [],
+                '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/config/ceph.conf',
+            ),
+            # mon daemon conf has more precedence than cluster conf and default conf
+            (
+                '00000000-0000-0000-0000-0000deadbeef',
+                ['/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/config/ceph.conf',
+                 _cephadm.CEPH_DEFAULT_CONF],
+                None,
+                None,
+                [{'name': 'mon.a', 'fsid': '00000000-0000-0000-0000-0000deadbeef', 'style': 'cephadm:v1'}],
+                '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/mon.a/config',
+            ),
+            # daemon conf (--name option) has more precedence than cluster, default and mon conf
+            (
+                '00000000-0000-0000-0000-0000deadbeef',
+                ['/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/config/ceph.conf',
+                 '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/mon.a/config',
+                 _cephadm.CEPH_DEFAULT_CONF],
+                None,
+                'osd.0',
+                [{'name': 'mon.a', 'fsid': '00000000-0000-0000-0000-0000deadbeef', 'style': 'cephadm:v1'},
+                 {'name': 'osd.0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'}],
+                '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/osd.0/config',
+            ),
+            # user provided conf ('/foo/ceph.conf') more precedence than any other conf
+            (
+                '00000000-0000-0000-0000-0000deadbeef',
+                ['/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/config/ceph.conf',
+                 _cephadm.CEPH_DEFAULT_CONF,
+                 '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/mon.a/config'],
+                '/foo/ceph.conf',
+                None,
+                [{'name': 'mon.a', 'fsid': '00000000-0000-0000-0000-0000deadbeef', 'style': 'cephadm:v1'}],
+                '/foo/ceph.conf',
+            ),
+        ])
+    @mock.patch('cephadm.call')
+    @mock.patch('cephadm.logger')
+    def test_infer_config_precedence(self, _logger, _call, other_conf_files, fsid, config, name, list_daemons, result, cephadm_fs):
+        # build the context
+        ctx = _cephadm.CephadmContext()
+        ctx.fsid = fsid
+        ctx.config = config
+        ctx.name = name
+
+        # mock the decorator
+        mock_fn = mock.Mock()
+        mock_fn.return_value = 0
+        infer_config = _cephadm.infer_config(mock_fn)
+
+        # mock the config file
+        cephadm_fs.create_file(result)
+
+        # mock other potential config files
+        for f in other_conf_files:
+            cephadm_fs.create_file(f)
+
+        # test
+        with mock.patch('cephadm.list_daemons', return_value=list_daemons):
+            infer_config(ctx)
+            assert ctx.config == result
+
+    @pytest.mark.parametrize('fsid, config, name, list_daemons, result, ',
+        [
+            (
+                None,
+                '/foo/bar.conf',
+                None,
+                [],
+                '/foo/bar.conf',
+            ),
+            (
+                '00000000-0000-0000-0000-0000deadbeef',
+                None,
+                None,
+                [],
+                _cephadm.CEPH_DEFAULT_CONF,
+            ),
+            (
+                '00000000-0000-0000-0000-0000deadbeef',
+                None,
+                None,
+                [],
+                '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/config/ceph.conf',
+            ),
+            (
+                '00000000-0000-0000-0000-0000deadbeef',
+                None,
+                None,
+                [{'name': 'mon.a', 'fsid': '00000000-0000-0000-0000-0000deadbeef', 'style': 'cephadm:v1'}],
+                '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/mon.a/config',
+            ),
+            (
+                '00000000-0000-0000-0000-0000deadbeef',
+                None,
+                None,
+                [{'name': 'mon.a', 'fsid': 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa', 'style': 'cephadm:v1'}],
+                _cephadm.CEPH_DEFAULT_CONF,
+            ),
+            (
+                '00000000-0000-0000-0000-0000deadbeef',
+                None,
+                None,
+                [{'name': 'mon.a', 'fsid': '00000000-0000-0000-0000-0000deadbeef', 'style': 'legacy'}],
+                _cephadm.CEPH_DEFAULT_CONF,
+            ),
+            (
+                '00000000-0000-0000-0000-0000deadbeef',
+                None,
+                None,
+                [{'name': 'osd.0'}],
+                _cephadm.CEPH_DEFAULT_CONF,
+            ),
+            (
+                '00000000-0000-0000-0000-0000deadbeef',
+                '/foo/bar.conf',
+                'mon.a',
+                [{'name': 'mon.a', 'style': 'cephadm:v1'}],
+                '/foo/bar.conf',
+            ),
+            (
+                '00000000-0000-0000-0000-0000deadbeef',
+                None,
+                'mon.a',
+                [],
+                '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/mon.a/config',
+            ),
+            (
+                '00000000-0000-0000-0000-0000deadbeef',
+                None,
+                'osd.0',
+                [],
+                '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/osd.0/config',
+            ),
+            (
+                None,
+                None,
+                None,
+                [],
+                _cephadm.CEPH_DEFAULT_CONF,
+            ),
+        ])
+    @mock.patch('cephadm.call')
+    @mock.patch('cephadm.logger')
+    def test_infer_config(self, _logger, _call, fsid, config, name, list_daemons, result, cephadm_fs):
+        # build the context
+        ctx = _cephadm.CephadmContext()
+        ctx.fsid = fsid
+        ctx.config = config
+        ctx.name = name
+
+        # mock the decorator
+        mock_fn = mock.Mock()
+        mock_fn.return_value = 0
+        infer_config = _cephadm.infer_config(mock_fn)
+
+        # mock the config file
+        cephadm_fs.create_file(result)
+
+        # test
+        with mock.patch('cephadm.list_daemons', return_value=list_daemons):
+            infer_config(ctx)
+            assert ctx.config == result
+
+    @mock.patch('cephadm.call')
+    def test_extract_uid_gid_fail(self, _call):
+        err = """Error: container_linux.go:370: starting container process caused: process_linux.go:459: container init caused: process_linux.go:422: setting cgroup config for procHooks process caused: Unit libpod-056038e1126191fba41d8a037275136f2d7aeec9710b9ee
+ff792c06d8544b983.scope not found.: OCI runtime error"""
+        _call.return_value = ('', err, 127)
+        ctx = _cephadm.CephadmContext()
+        ctx.container_engine = mock_podman()
+        with pytest.raises(_cephadm.Error, match='OCI'):
+            _cephadm.extract_uid_gid(ctx)
+
+    @pytest.mark.parametrize('test_input, expected', [
+        ([_cephadm.make_fsid(), _cephadm.make_fsid(), _cephadm.make_fsid()], 3),
+        ([_cephadm.make_fsid(), 'invalid-fsid', _cephadm.make_fsid(), '0b87e50c-8e77-11ec-b890-'], 2),
+        (['f6860ec2-8e76-11ec-', '0b87e50c-8e77-11ec-b890-', ''], 0),
+        ([], 0),
+    ])
+    def test_get_ceph_cluster_count(self, test_input, expected):
+        ctx = _cephadm.CephadmContext()
+        with mock.patch('os.listdir', return_value=test_input):
+            assert _cephadm.get_ceph_cluster_count(ctx) == expected
+
+    def test_set_image_minimize_config(self):
+        def throw_cmd(cmd):
+            raise _cephadm.Error(' '.join(cmd))
+        ctx = _cephadm.CephadmContext()
+        ctx.image = 'test_image'
+        ctx.no_minimize_config = True
+        fake_cli = lambda cmd, __=None, ___=None: throw_cmd(cmd)
+        with pytest.raises(_cephadm.Error, match='config set global container_image test_image'):
+            _cephadm.finish_bootstrap_config(
+                ctx=ctx,
+                fsid=_cephadm.make_fsid(),
+                config='',
+                mon_id='a', mon_dir='mon_dir',
+                mon_network=None, ipv6=False,
+                cli=fake_cli,
+                cluster_network=None,
+                ipv6_cluster_network=False
+            )
+
+
+class TestCustomContainer(unittest.TestCase):
+    cc: _cephadm.CustomContainer
+
+    def setUp(self):
+        self.cc = _cephadm.CustomContainer(
+            'e863154d-33c7-4350-bca5-921e0467e55b',
+            'container',
+            config_json={
+                'entrypoint': 'bash',
+                'gid': 1000,
+                'args': [
+                    '--no-healthcheck',
+                    '-p 6800:6800'
+                ],
+                'envs': ['SECRET=password'],
+                'ports': [8080, 8443],
+                'volume_mounts': {
+                    '/CONFIG_DIR': '/foo/conf',
+                    'bar/config': '/bar:ro'
+                },
+                'bind_mounts': [
+                    [
+                        'type=bind',
+                        'source=/CONFIG_DIR',
+                        'destination=/foo/conf',
+                        ''
+                    ],
+                    [
+                        'type=bind',
+                        'source=bar/config',
+                        'destination=/bar:ro',
+                        'ro=true'
+                    ]
+                ]
+            },
+            image='docker.io/library/hello-world:latest'
+        )
+
+    def test_entrypoint(self):
+        self.assertEqual(self.cc.entrypoint, 'bash')
+
+    def test_uid_gid(self):
+        self.assertEqual(self.cc.uid, 65534)
+        self.assertEqual(self.cc.gid, 1000)
+
+    def test_ports(self):
+        self.assertEqual(self.cc.ports, [8080, 8443])
+
+    def test_get_container_args(self):
+        result = self.cc.get_container_args()
+        self.assertEqual(result, [
+            '--no-healthcheck',
+            '-p 6800:6800'
+        ])
+
+    def test_get_container_envs(self):
+        result = self.cc.get_container_envs()
+        self.assertEqual(result, ['SECRET=password'])
+
+    def test_get_container_mounts(self):
+        result = self.cc.get_container_mounts('/xyz')
+        self.assertDictEqual(result, {
+            '/CONFIG_DIR': '/foo/conf',
+            '/xyz/bar/config': '/bar:ro'
+        })
+
+    def test_get_container_binds(self):
+        result = self.cc.get_container_binds('/xyz')
+        self.assertEqual(result, [
+            [
+                'type=bind',
+                'source=/CONFIG_DIR',
+                'destination=/foo/conf',
+                ''
+            ],
+            [
+                'type=bind',
+                'source=/xyz/bar/config',
+                'destination=/bar:ro',
+                'ro=true'
+            ]
+        ])
+
+
+class TestMaintenance:
+    systemd_target = "ceph.00000000-0000-0000-0000-000000c0ffee.target"
+    fsid = '0ea8cdd0-1bbf-11ec-a9c7-5254002763fa'
+
+    def test_systemd_target_OK(self, tmp_path):
+        base = tmp_path
+        wants = base / "ceph.target.wants"
+        wants.mkdir()
+        target = wants / TestMaintenance.systemd_target
+        target.touch()
+        ctx = _cephadm.CephadmContext()
+        ctx.unit_dir = str(base)
+
+        assert _cephadm.systemd_target_state(ctx, target.name)
+
+    def test_systemd_target_NOTOK(self, tmp_path):
+        base = tmp_path
+        ctx = _cephadm.CephadmContext()
+        ctx.unit_dir = str(base)
+        assert not _cephadm.systemd_target_state(ctx, TestMaintenance.systemd_target)
+
+    def test_parser_OK(self):
+        args = _cephadm._parse_args(['host-maintenance', 'enter'])
+        assert args.maintenance_action == 'enter'
+
+    def test_parser_BAD(self):
+        with pytest.raises(SystemExit):
+            _cephadm._parse_args(['host-maintenance', 'wah'])
+
+    @mock.patch('os.listdir', return_value=[])
+    @mock.patch('cephadm.call')
+    @mock.patch('cephadm.logger')
+    @mock.patch('cephadm.systemd_target_state')
+    def test_enter_failure_1(self, _target_state, _logger, _call, _listdir):
+        _call.return_value = '', '', 999
+        _target_state.return_value = True
+        ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
+            ['host-maintenance', 'enter', '--fsid', TestMaintenance.fsid])
+        ctx.container_engine = mock_podman()
+        retval = _cephadm.command_maintenance(ctx)
+        assert retval.startswith('failed')
+
+    @mock.patch('os.listdir', return_value=[])
+    @mock.patch('cephadm.call')
+    @mock.patch('cephadm.logger')
+    @mock.patch('cephadm.systemd_target_state')
+    def test_enter_failure_2(self, _target_state, _logger, _call, _listdir):
+        _call.side_effect = [('', '', 0), ('', '', 999), ('', '', 0), ('', '', 999)]
+        _target_state.return_value = True
+        ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
+            ['host-maintenance', 'enter', '--fsid', TestMaintenance.fsid])
+        ctx.container_engine = mock_podman()
+        retval = _cephadm.command_maintenance(ctx)
+        assert retval.startswith('failed')
+
+    @mock.patch('os.listdir', return_value=[])
+    @mock.patch('cephadm.call')
+    @mock.patch('cephadm.logger')
+    @mock.patch('cephadm.systemd_target_state')
+    @mock.patch('cephadm.target_exists')
+    def test_exit_failure_1(self, _target_exists, _target_state, _logger, _call, _listdir):
+        _call.return_value = '', '', 999
+        _target_state.return_value = False
+        _target_exists.return_value = True
+        ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
+            ['host-maintenance', 'exit', '--fsid', TestMaintenance.fsid])
+        ctx.container_engine = mock_podman()
+        retval = _cephadm.command_maintenance(ctx)
+        assert retval.startswith('failed')
+
+    @mock.patch('os.listdir', return_value=[])
+    @mock.patch('cephadm.call')
+    @mock.patch('cephadm.logger')
+    @mock.patch('cephadm.systemd_target_state')
+    @mock.patch('cephadm.target_exists')
+    def test_exit_failure_2(self, _target_exists, _target_state, _logger, _call, _listdir):
+        _call.side_effect = [('', '', 0), ('', '', 999), ('', '', 0), ('', '', 999)]
+        _target_state.return_value = False
+        _target_exists.return_value = True
+        ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
+            ['host-maintenance', 'exit', '--fsid', TestMaintenance.fsid])
+        ctx.container_engine = mock_podman()
+        retval = _cephadm.command_maintenance(ctx)
+        assert retval.startswith('failed')
+
+
+class TestMonitoring(object):
+    @mock.patch('cephadm.call')
+    def test_get_version_alertmanager(self, _call):
+        ctx = _cephadm.CephadmContext()
+        ctx.container_engine = mock_podman()
+        daemon_type = 'alertmanager'
+
+        # binary `prometheus`
+        _call.return_value = '', '{}, version 0.16.1'.format(daemon_type), 0
+        version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type)
+        assert version == '0.16.1'
+
+        # binary `prometheus-alertmanager`
+        _call.side_effect = (
+            ('', '', 1),
+            ('', '{}, version 0.16.1'.format(daemon_type), 0),
+        )
+        version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type)
+        assert version == '0.16.1'
+
+    @mock.patch('cephadm.call')
+    def test_get_version_prometheus(self, _call):
+        ctx = _cephadm.CephadmContext()
+        ctx.container_engine = mock_podman()
+        daemon_type = 'prometheus'
+        _call.return_value = '', '{}, version 0.16.1'.format(daemon_type), 0
+        version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type)
+        assert version == '0.16.1'
+
+    def test_prometheus_external_url(self):
+        ctx = _cephadm.CephadmContext()
+        ctx.config_json = json.dumps({'files': {}, 'retention_time': '15d'})
+        daemon_type = 'prometheus'
+        daemon_id = 'home'
+        fsid = 'aaf5a720-13fe-4a3b-82b9-2d99b7fd9704'
+        args = _cephadm.get_daemon_args(ctx, fsid, daemon_type, daemon_id)
+        assert any([x.startswith('--web.external-url=http://') for x in args])
+
+    @mock.patch('cephadm.call')
+    def test_get_version_node_exporter(self, _call):
+        ctx = _cephadm.CephadmContext()
+        ctx.container_engine = mock_podman()
+        daemon_type = 'node-exporter'
+        _call.return_value = '', '{}, version 0.16.1'.format(daemon_type.replace('-', '_')), 0
+        version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type)
+        assert version == '0.16.1'
+
+    def test_create_daemon_dirs_prometheus(self, cephadm_fs):
+        """
+        Ensures the required and optional files given in the configuration are
+        created and mapped correctly inside the container. Tests absolute and
+        relative file paths given in the configuration.
+        """
+
+        fsid = 'aaf5a720-13fe-4a3b-82b9-2d99b7fd9704'
+        daemon_type = 'prometheus'
+        uid, gid = 50, 50
+        daemon_id = 'home'
+        ctx = _cephadm.CephadmContext()
+        ctx.data_dir = '/somedir'
+        ctx.config_json = json.dumps({
+            'files': {
+                'prometheus.yml': 'foo',
+                '/etc/prometheus/alerting/ceph_alerts.yml': 'bar'
+            }
+        })
+
+        _cephadm.create_daemon_dirs(ctx,
+                              fsid,
+                              daemon_type,
+                              daemon_id,
+                              uid,
+                              gid,
+                              config=None,
+                              keyring=None)
+
+        prefix = '{data_dir}/{fsid}/{daemon_type}.{daemon_id}'.format(
+            data_dir=ctx.data_dir,
+            fsid=fsid,
+            daemon_type=daemon_type,
+            daemon_id=daemon_id
+        )
+
+        expected = {
+            'etc/prometheus/prometheus.yml': 'foo',
+            'etc/prometheus/alerting/ceph_alerts.yml': 'bar',
+        }
+
+        for file,content in expected.items():
+            file = os.path.join(prefix, file)
+            assert os.path.exists(file)
+            with open(file) as f:
+                assert f.read() == content
+
+        # assert uid/gid after redeploy
+        new_uid = uid+1
+        new_gid = gid+1
+        _cephadm.create_daemon_dirs(ctx,
+                              fsid,
+                              daemon_type,
+                              daemon_id,
+                              new_uid,
+                              new_gid,
+                              config=None,
+                              keyring=None)
+        for file,content in expected.items():
+            file = os.path.join(prefix, file)
+            assert os.stat(file).st_uid == new_uid
+            assert os.stat(file).st_gid == new_gid
+
+
+class TestBootstrap(object):
+
+    @staticmethod
+    def _get_cmd(*args):
+        return [
+            'bootstrap',
+            '--allow-mismatched-release',
+            '--skip-prepare-host',
+            '--skip-dashboard',
+            *args,
+        ]
+
+
+###############################################3
+
+    def test_config(self, cephadm_fs):
+        conf_file = 'foo'
+        cmd = self._get_cmd(
+            '--mon-ip', '192.168.1.1',
+            '--skip-mon-network',
+            '--config', conf_file,
+        )
+
+        with with_cephadm_ctx(cmd) as ctx:
+            msg = r'No such file or directory'
+            with pytest.raises(_cephadm.Error, match=msg):
+                _cephadm.command_bootstrap(ctx)
+
+        cephadm_fs.create_file(conf_file)
+        with with_cephadm_ctx(cmd) as ctx:
+            retval = _cephadm.command_bootstrap(ctx)
+            assert retval == 0
+
+    def test_no_mon_addr(self, cephadm_fs):
+        cmd = self._get_cmd()
+        with with_cephadm_ctx(cmd) as ctx:
+            msg = r'must specify --mon-ip or --mon-addrv'
+            with pytest.raises(_cephadm.Error, match=msg):
+                _cephadm.command_bootstrap(ctx)
+
+    def test_skip_mon_network(self, cephadm_fs):
+        cmd = self._get_cmd('--mon-ip', '192.168.1.1')
+
+        with with_cephadm_ctx(cmd, list_networks={}) as ctx:
+            msg = r'--skip-mon-network'
+            with pytest.raises(_cephadm.Error, match=msg):
+                _cephadm.command_bootstrap(ctx)
+
+        cmd += ['--skip-mon-network']
+        with with_cephadm_ctx(cmd, list_networks={}) as ctx:
+            retval = _cephadm.command_bootstrap(ctx)
+            assert retval == 0
+
+    @pytest.mark.parametrize('mon_ip, list_networks, result',
+        [
+            # IPv4
+            (
+                'eth0',
+                {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+                False,
+            ),
+            (
+                '0.0.0.0',
+                {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+                False,
+            ),
+            (
+                '192.168.1.0',
+                {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+                False,
+            ),
+            (
+                '192.168.1.1',
+                {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+                True,
+            ),
+            (
+                '192.168.1.1:1234',
+                {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+                True,
+            ),
+            (
+                '192.168.1.1:0123',
+                {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+                True,
+            ),
+            # IPv6
+            (
+                '::',
+                {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+                False,
+            ),
+            (
+                '::ffff:192.168.1.0',
+                {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}},
+                False,
+            ),
+            (
+                '::ffff:192.168.1.1',
+                {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}},
+                True,
+            ),
+            (
+                '::ffff:c0a8:101',
+                {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}},
+                True,
+            ),
+            (
+                '[::ffff:c0a8:101]:1234',
+                {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}},
+                True,
+            ),
+            (
+                '[::ffff:c0a8:101]:0123',
+                {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}},
+                True,
+            ),
+            (
+                '0000:0000:0000:0000:0000:FFFF:C0A8:0101',
+                {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}},
+                True,
+            ),
+        ])
+    def test_mon_ip(self, mon_ip, list_networks, result, cephadm_fs):
+        cmd = self._get_cmd('--mon-ip', mon_ip)
+        if not result:
+            with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx:
+                msg = r'--skip-mon-network'
+                with pytest.raises(_cephadm.Error, match=msg):
+                    _cephadm.command_bootstrap(ctx)
+        else:
+            with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx:
+                retval = _cephadm.command_bootstrap(ctx)
+                assert retval == 0
+
+    @pytest.mark.parametrize('mon_addrv, list_networks, err',
+        [
+            # IPv4
+            (
+                '192.168.1.1',
+                {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+                r'must use square brackets',
+            ),
+            (
+                '[192.168.1.1]',
+                {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+                r'must include port number',
+            ),
+            (
+                '[192.168.1.1:1234]',
+                {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+                None,
+            ),
+            (
+                '[192.168.1.1:0123]',
+                {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+                None,
+            ),
+            (
+                '[v2:192.168.1.1:3300,v1:192.168.1.1:6789]',
+                {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+                None,
+            ),
+            # IPv6
+            (
+                '[::ffff:192.168.1.1:1234]',
+                {'ffff::/64': {'eth0': ['::ffff:c0a8:101']}},
+                None,
+            ),
+            (
+                '[::ffff:192.168.1.1:0123]',
+                {'ffff::/64': {'eth0': ['::ffff:c0a8:101']}},
+                None,
+            ),
+            (
+                '[0000:0000:0000:0000:0000:FFFF:C0A8:0101:1234]',
+                {'ffff::/64': {'eth0': ['::ffff:c0a8:101']}},
+                None,
+            ),
+            (
+                '[v2:0000:0000:0000:0000:0000:FFFF:C0A8:0101:3300,v1:0000:0000:0000:0000:0000:FFFF:C0A8:0101:6789]',
+                {'ffff::/64': {'eth0': ['::ffff:c0a8:101']}},
+                None,
+            ),
+        ])
+    def test_mon_addrv(self, mon_addrv, list_networks, err, cephadm_fs):
+        cmd = self._get_cmd('--mon-addrv', mon_addrv)
+        if err:
+            with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx:
+                with pytest.raises(_cephadm.Error, match=err):
+                    _cephadm.command_bootstrap(ctx)
+        else:
+            with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx:
+                retval = _cephadm.command_bootstrap(ctx)
+                assert retval == 0
+
+    def test_allow_fqdn_hostname(self, cephadm_fs):
+        hostname = 'foo.bar'
+        cmd = self._get_cmd(
+            '--mon-ip', '192.168.1.1',
+            '--skip-mon-network',
+        )
+
+        with with_cephadm_ctx(cmd, hostname=hostname) as ctx:
+            msg = r'--allow-fqdn-hostname'
+            with pytest.raises(_cephadm.Error, match=msg):
+                _cephadm.command_bootstrap(ctx)
+
+        cmd += ['--allow-fqdn-hostname']
+        with with_cephadm_ctx(cmd, hostname=hostname) as ctx:
+            retval = _cephadm.command_bootstrap(ctx)
+            assert retval == 0
+
+    @pytest.mark.parametrize('fsid, err',
+        [
+            ('', None),
+            ('00000000-0000-0000-0000-0000deadbeef', None),
+            ('00000000-0000-0000-0000-0000deadbeez', 'not an fsid'),
+        ])
+    def test_fsid(self, fsid, err, cephadm_fs):
+        cmd = self._get_cmd(
+            '--mon-ip', '192.168.1.1',
+            '--skip-mon-network',
+            '--fsid', fsid,
+        )
+
+        with with_cephadm_ctx(cmd) as ctx:
+            if err:
+                with pytest.raises(_cephadm.Error, match=err):
+                    _cephadm.command_bootstrap(ctx)
+            else:
+                retval = _cephadm.command_bootstrap(ctx)
+                assert retval == 0
+
+
+class TestShell(object):
+
+    def test_fsid(self, cephadm_fs):
+        fsid = '00000000-0000-0000-0000-0000deadbeef'
+
+        cmd = ['shell', '--fsid', fsid]
+        with with_cephadm_ctx(cmd) as ctx:
+            retval = _cephadm.command_shell(ctx)
+            assert retval == 0
+            assert ctx.fsid == fsid
+
+        cmd = ['shell', '--fsid', '00000000-0000-0000-0000-0000deadbeez']
+        with with_cephadm_ctx(cmd) as ctx:
+            err = 'not an fsid'
+            with pytest.raises(_cephadm.Error, match=err):
+                retval = _cephadm.command_shell(ctx)
+                assert retval == 1
+                assert ctx.fsid == None
+
+        s = get_ceph_conf(fsid=fsid)
+        f = cephadm_fs.create_file('ceph.conf', contents=s)
+
+        cmd = ['shell', '--fsid', fsid, '--config', f.path]
+        with with_cephadm_ctx(cmd) as ctx:
+            retval = _cephadm.command_shell(ctx)
+            assert retval == 0
+            assert ctx.fsid == fsid
+
+        cmd = ['shell', '--fsid', '10000000-0000-0000-0000-0000deadbeef', '--config', f.path]
+        with with_cephadm_ctx(cmd) as ctx:
+            err = 'fsid does not match ceph.conf'
+            with pytest.raises(_cephadm.Error, match=err):
+                retval = _cephadm.command_shell(ctx)
+                assert retval == 1
+                assert ctx.fsid == None
+
+    def test_name(self, cephadm_fs):
+        cmd = ['shell', '--name', 'foo']
+        with with_cephadm_ctx(cmd) as ctx:
+            retval = _cephadm.command_shell(ctx)
+            assert retval == 0
+
+        cmd = ['shell', '--name', 'foo.bar']
+        with with_cephadm_ctx(cmd) as ctx:
+            err = r'must pass --fsid'
+            with pytest.raises(_cephadm.Error, match=err):
+                retval = _cephadm.command_shell(ctx)
+                assert retval == 1
+
+        fsid = '00000000-0000-0000-0000-0000deadbeef'
+        cmd = ['shell', '--name', 'foo.bar', '--fsid', fsid]
+        with with_cephadm_ctx(cmd) as ctx:
+            retval = _cephadm.command_shell(ctx)
+            assert retval == 0
+
+    def test_config(self, cephadm_fs):
+        cmd = ['shell']
+        with with_cephadm_ctx(cmd) as ctx:
+            retval = _cephadm.command_shell(ctx)
+            assert retval == 0
+            assert ctx.config == None
+
+        cephadm_fs.create_file(_cephadm.CEPH_DEFAULT_CONF)
+        with with_cephadm_ctx(cmd) as ctx:
+            retval = _cephadm.command_shell(ctx)
+            assert retval == 0
+            assert ctx.config == _cephadm.CEPH_DEFAULT_CONF
+
+        cmd = ['shell', '--config', 'foo']
+        with with_cephadm_ctx(cmd) as ctx:
+            retval = _cephadm.command_shell(ctx)
+            assert retval == 0
+            assert ctx.config == 'foo'
+
+    def test_keyring(self, cephadm_fs):
+        cmd = ['shell']
+        with with_cephadm_ctx(cmd) as ctx:
+            retval = _cephadm.command_shell(ctx)
+            assert retval == 0
+            assert ctx.keyring == None
+
+        cephadm_fs.create_file(_cephadm.CEPH_DEFAULT_KEYRING)
+        with with_cephadm_ctx(cmd) as ctx:
+            retval = _cephadm.command_shell(ctx)
+            assert retval == 0
+            assert ctx.keyring == _cephadm.CEPH_DEFAULT_KEYRING
+
+        cmd = ['shell', '--keyring', 'foo']
+        with with_cephadm_ctx(cmd) as ctx:
+            retval = _cephadm.command_shell(ctx)
+            assert retval == 0
+            assert ctx.keyring == 'foo'
+
+    @mock.patch('cephadm.CephContainer')
+    def test_mount_no_dst(self, _ceph_container, cephadm_fs):
+        cmd = ['shell', '--mount', '/etc/foo']
+        with with_cephadm_ctx(cmd) as ctx:
+            retval = _cephadm.command_shell(ctx)
+            assert retval == 0
+            assert _ceph_container.call_args.kwargs['volume_mounts']['/etc/foo'] == '/mnt/foo'
+
+    @mock.patch('cephadm.CephContainer')
+    def test_mount_with_dst_no_opt(self, _ceph_container, cephadm_fs):
+        cmd = ['shell', '--mount', '/etc/foo:/opt/foo/bar']
+        with with_cephadm_ctx(cmd) as ctx:
+            retval = _cephadm.command_shell(ctx)
+            assert retval == 0
+            assert _ceph_container.call_args.kwargs['volume_mounts']['/etc/foo'] == '/opt/foo/bar'
+
+    @mock.patch('cephadm.CephContainer')
+    def test_mount_with_dst_and_opt(self, _ceph_container, cephadm_fs):
+        cmd = ['shell', '--mount', '/etc/foo:/opt/foo/bar:Z']
+        with with_cephadm_ctx(cmd) as ctx:
+            retval = _cephadm.command_shell(ctx)
+            assert retval == 0
+            assert _ceph_container.call_args.kwargs['volume_mounts']['/etc/foo'] == '/opt/foo/bar:Z'
+
+class TestCephVolume(object):
+
+    @staticmethod
+    def _get_cmd(*args):
+        return [
+            'ceph-volume',
+            *args,
+            '--', 'inventory', '--format', 'json'
+        ]
+
+    def test_noop(self, cephadm_fs):
+        cmd = self._get_cmd()
+        with with_cephadm_ctx(cmd) as ctx:
+            _cephadm.command_ceph_volume(ctx)
+            assert ctx.fsid == None
+            assert ctx.config == None
+            assert ctx.keyring == None
+            assert ctx.config_json == None
+
+    def test_fsid(self, cephadm_fs):
+        fsid = '00000000-0000-0000-0000-0000deadbeef'
+
+        cmd = self._get_cmd('--fsid', fsid)
+        with with_cephadm_ctx(cmd) as ctx:
+            _cephadm.command_ceph_volume(ctx)
+            assert ctx.fsid == fsid
+
+        cmd = self._get_cmd('--fsid', '00000000-0000-0000-0000-0000deadbeez')
+        with with_cephadm_ctx(cmd) as ctx:
+            err = 'not an fsid'
+            with pytest.raises(_cephadm.Error, match=err):
+                retval = _cephadm.command_shell(ctx)
+                assert retval == 1
+                assert ctx.fsid == None
+
+        s = get_ceph_conf(fsid=fsid)
+        f = cephadm_fs.create_file('ceph.conf', contents=s)
+
+        cmd = self._get_cmd('--fsid', fsid, '--config', f.path)
+        with with_cephadm_ctx(cmd) as ctx:
+            _cephadm.command_ceph_volume(ctx)
+            assert ctx.fsid == fsid
+
+        cmd = self._get_cmd('--fsid', '10000000-0000-0000-0000-0000deadbeef', '--config', f.path)
+        with with_cephadm_ctx(cmd) as ctx:
+            err = 'fsid does not match ceph.conf'
+            with pytest.raises(_cephadm.Error, match=err):
+                _cephadm.command_ceph_volume(ctx)
+                assert ctx.fsid == None
+
+    def test_config(self, cephadm_fs):
+        cmd = self._get_cmd('--config', 'foo')
+        with with_cephadm_ctx(cmd) as ctx:
+            err = r'No such file or directory'
+            with pytest.raises(_cephadm.Error, match=err):
+                _cephadm.command_ceph_volume(ctx)
+
+        cephadm_fs.create_file('bar')
+        cmd = self._get_cmd('--config', 'bar')
+        with with_cephadm_ctx(cmd) as ctx:
+            _cephadm.command_ceph_volume(ctx)
+            assert ctx.config == 'bar'
+
+    def test_keyring(self, cephadm_fs):
+        cmd = self._get_cmd('--keyring', 'foo')
+        with with_cephadm_ctx(cmd) as ctx:
+            err = r'No such file or directory'
+            with pytest.raises(_cephadm.Error, match=err):
+                _cephadm.command_ceph_volume(ctx)
+
+        cephadm_fs.create_file('bar')
+        cmd = self._get_cmd('--keyring', 'bar')
+        with with_cephadm_ctx(cmd) as ctx:
+            _cephadm.command_ceph_volume(ctx)
+            assert ctx.keyring == 'bar'
+
+
+class TestIscsi:
+    def test_unit_run(self, cephadm_fs):
+        fsid = '9b9d7609-f4d5-4aba-94c8-effa764d96c9'
+        config_json = {
+                'files': {'iscsi-gateway.cfg': ''}
+            }
+        with with_cephadm_ctx(['--image=ceph/ceph'], list_networks={}) as ctx:
+            import json
+            ctx.container_engine = mock_docker()
+            ctx.config_json = json.dumps(config_json)
+            ctx.fsid = fsid
+            _cephadm.get_parm.return_value = config_json
+            c = _cephadm.get_container(ctx, fsid, 'iscsi', 'daemon_id')
+
+            _cephadm.make_data_dir(ctx, fsid, 'iscsi', 'daemon_id')
+            _cephadm.deploy_daemon_units(
+                ctx,
+                fsid,
+                0, 0,
+                'iscsi',
+                'daemon_id',
+                c,
+                True, True
+            )
+
+            with open('/var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/unit.run') as f:
+                assert f.read() == """set -e
+if ! grep -qs /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs /proc/mounts; then mount -t configfs none /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs; fi
+# iscsi tcmu-runner container
+! /usr/bin/docker rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi.daemon_id-tcmu 2> /dev/null
+! /usr/bin/docker rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id-tcmu 2> /dev/null
+/usr/bin/docker run --rm --ipc=host --stop-signal=SIGTERM --ulimit nofile=1048576 --net=host --entrypoint /usr/local/scripts/tcmu-runner-entrypoint.sh --privileged --group-add=disk --init --name ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id-tcmu --pids-limit=0 -e CONTAINER_IMAGE=ceph/ceph -e NODE_NAME=host1 -e CEPH_USE_RANDOM_NONCE=1 -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/config:/etc/ceph/ceph.conf:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/keyring:/etc/ceph/keyring:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/iscsi-gateway.cfg:/etc/ceph/iscsi-gateway.cfg:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs:/sys/kernel/config -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/tcmu-runner-entrypoint.sh:/usr/local/scripts/tcmu-runner-entrypoint.sh -v /var/log/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9:/var/log:z -v /dev:/dev --mount type=bind,source=/lib/modules,destination=/lib/modules,ro=true ceph/ceph &
+# iscsi.daemon_id
+! /usr/bin/docker rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi.daemon_id 2> /dev/null
+! /usr/bin/docker rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id 2> /dev/null
+/usr/bin/docker run --rm --ipc=host --stop-signal=SIGTERM --ulimit nofile=1048576 --net=host --entrypoint /usr/bin/rbd-target-api --privileged --group-add=disk --init --name ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id --pids-limit=0 -e CONTAINER_IMAGE=ceph/ceph -e NODE_NAME=host1 -e CEPH_USE_RANDOM_NONCE=1 -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/config:/etc/ceph/ceph.conf:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/keyring:/etc/ceph/keyring:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/iscsi-gateway.cfg:/etc/ceph/iscsi-gateway.cfg:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs:/sys/kernel/config -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/tcmu-runner-entrypoint.sh:/usr/local/scripts/tcmu-runner-entrypoint.sh -v /var/log/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9:/var/log:z -v /dev:/dev --mount type=bind,source=/lib/modules,destination=/lib/modules,ro=true ceph/ceph
+"""
+
+    def test_get_container(self):
+        """
+        Due to a combination of socket.getfqdn() and podman's behavior to
+        add the container name into the /etc/hosts file, we cannot use periods
+        in container names. But we need to be able to detect old existing containers.
+        Assert this behaviour. I think we can remove this in Ceph R
+        """
+        fsid = '9b9d7609-f4d5-4aba-94c8-effa764d96c9'
+        with with_cephadm_ctx(['--image=ceph/ceph'], list_networks={}) as ctx:
+            ctx.fsid = fsid
+            c = _cephadm.get_container(ctx, fsid, 'iscsi', 'something')
+            assert c.cname == 'ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-something'
+            assert c.old_cname == 'ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi.something'
+
+
+class TestCheckHost:
+
+    @mock.patch('cephadm.find_executable', return_value='foo')
+    @mock.patch('cephadm.check_time_sync', return_value=True)
+    @mock.patch('cephadm.logger')
+    def test_container_engine(self, _logger, _find_executable, _check_time_sync):
+        ctx = _cephadm.CephadmContext()
+
+        ctx.container_engine = None
+        err = r'No container engine binary found'
+        with pytest.raises(_cephadm.Error, match=err):
+            _cephadm.command_check_host(ctx)
+
+        ctx.container_engine = mock_podman()
+        _cephadm.command_check_host(ctx)
+
+        ctx.container_engine = mock_docker()
+        _cephadm.command_check_host(ctx)
+
+
+class TestRmRepo:
+
+    @pytest.mark.parametrize('os_release',
+        [
+            # Apt
+            dedent("""
+            NAME="Ubuntu"
+            VERSION="20.04 LTS (Focal Fossa)"
+            ID=ubuntu
+            ID_LIKE=debian
+            PRETTY_NAME="Ubuntu 20.04 LTS"
+            VERSION_ID="20.04"
+            HOME_URL="https://www.ubuntu.com/"
+            SUPPORT_URL="https://help.ubuntu.com/"
+            BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
+            PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
+            VERSION_CODENAME=focal
+            UBUNTU_CODENAME=focal
+            """),
+
+            # YumDnf
+            dedent("""
+            NAME="CentOS Linux"
+            VERSION="8 (Core)"
+            ID="centos"
+            ID_LIKE="rhel fedora"
+            VERSION_ID="8"
+            PLATFORM_ID="platform:el8"
+            PRETTY_NAME="CentOS Linux 8 (Core)"
+            ANSI_COLOR="0;31"
+            CPE_NAME="cpe:/o:centos:centos:8"
+            HOME_URL="https://www.centos.org/"
+            BUG_REPORT_URL="https://bugs.centos.org/"
+
+            CENTOS_MANTISBT_PROJECT="CentOS-8"
+            CENTOS_MANTISBT_PROJECT_VERSION="8"
+            REDHAT_SUPPORT_PRODUCT="centos"
+            REDHAT_SUPPORT_PRODUCT_VERSION="8"
+            """),
+
+            # Zypper
+            dedent("""
+            NAME="openSUSE Tumbleweed"
+            # VERSION="20210810"
+            ID="opensuse-tumbleweed"
+            ID_LIKE="opensuse suse"
+            VERSION_ID="20210810"
+            PRETTY_NAME="openSUSE Tumbleweed"
+            ANSI_COLOR="0;32"
+            CPE_NAME="cpe:/o:opensuse:tumbleweed:20210810"
+            BUG_REPORT_URL="https://bugs.opensuse.org"
+            HOME_URL="https://www.opensuse.org/"
+            DOCUMENTATION_URL="https://en.opensuse.org/Portal:Tumbleweed"
+            LOGO="distributor-logo"
+            """),
+        ])
+    @mock.patch('cephadm.find_executable', return_value='foo')
+    def test_container_engine(self, _find_executable, os_release, cephadm_fs):
+        cephadm_fs.create_file('/etc/os-release', contents=os_release)
+        ctx = _cephadm.CephadmContext()
+
+        ctx.container_engine = None
+        _cephadm.command_rm_repo(ctx)
+
+        ctx.container_engine = mock_podman()
+        _cephadm.command_rm_repo(ctx)
+
+        ctx.container_engine = mock_docker()
+        _cephadm.command_rm_repo(ctx)
+
+
+class TestValidateRepo:
+
+    @pytest.mark.parametrize('values',
+        [
+            # Apt - no checks
+            dict(
+            version="",
+            release="pacific",
+            err_text="",
+            os_release=dedent("""
+            NAME="Ubuntu"
+            VERSION="20.04 LTS (Focal Fossa)"
+            ID=ubuntu
+            ID_LIKE=debian
+            PRETTY_NAME="Ubuntu 20.04 LTS"
+            VERSION_ID="20.04"
+            HOME_URL="https://www.ubuntu.com/"
+            SUPPORT_URL="https://help.ubuntu.com/"
+            BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
+            PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
+            VERSION_CODENAME=focal
+            UBUNTU_CODENAME=focal
+            """)),
+
+            # YumDnf on Centos8 - OK
+            dict(
+            version="",
+            release="pacific",
+            err_text="",
+            os_release=dedent("""
+            NAME="CentOS Linux"
+            VERSION="8 (Core)"
+            ID="centos"
+            ID_LIKE="rhel fedora"
+            VERSION_ID="8"
+            PLATFORM_ID="platform:el8"
+            PRETTY_NAME="CentOS Linux 8 (Core)"
+            ANSI_COLOR="0;31"
+            CPE_NAME="cpe:/o:centos:centos:8"
+            HOME_URL="https://www.centos.org/"
+            BUG_REPORT_URL="https://bugs.centos.org/"
+
+            CENTOS_MANTISBT_PROJECT="CentOS-8"
+            CENTOS_MANTISBT_PROJECT_VERSION="8"
+            REDHAT_SUPPORT_PRODUCT="centos"
+            REDHAT_SUPPORT_PRODUCT_VERSION="8"
+            """)),
+
+            # YumDnf on Fedora - Fedora not supported
+            dict(
+            version="",
+            release="pacific",
+            err_text="does not build Fedora",
+            os_release=dedent("""
+            NAME="Fedora Linux"
+            VERSION="35 (Cloud Edition)"
+            ID=fedora
+            VERSION_ID=35
+            VERSION_CODENAME=""
+            PLATFORM_ID="platform:f35"
+            PRETTY_NAME="Fedora Linux 35 (Cloud Edition)"
+            ANSI_COLOR="0;38;2;60;110;180"
+            LOGO=fedora-logo-icon
+            CPE_NAME="cpe:/o:fedoraproject:fedora:35"
+            HOME_URL="https://fedoraproject.org/"
+            DOCUMENTATION_URL="https://docs.fedoraproject.org/en-US/fedora/f35/system-administrators-guide/"
+            SUPPORT_URL="https://ask.fedoraproject.org/"
+            BUG_REPORT_URL="https://bugzilla.redhat.com/"
+            REDHAT_BUGZILLA_PRODUCT="Fedora"
+            REDHAT_BUGZILLA_PRODUCT_VERSION=35
+            REDHAT_SUPPORT_PRODUCT="Fedora"
+            REDHAT_SUPPORT_PRODUCT_VERSION=35
+            PRIVACY_POLICY_URL="https://fedoraproject.org/wiki/Legal:PrivacyPolicy"
+            VARIANT="Cloud Edition"
+            VARIANT_ID=cloud
+            """)),
+
+            # YumDnf on Centos 7 - no pacific
+            dict(
+            version="",
+            release="pacific",
+            err_text="does not support pacific",
+            os_release=dedent("""
+            NAME="CentOS Linux"
+            VERSION="7 (Core)"
+            ID="centos"
+            ID_LIKE="rhel fedora"
+            VERSION_ID="7"
+            PRETTY_NAME="CentOS Linux 7 (Core)"
+            ANSI_COLOR="0;31"
+            CPE_NAME="cpe:/o:centos:centos:7"
+            HOME_URL="https://www.centos.org/"
+            BUG_REPORT_URL="https://bugs.centos.org/"
+
+            CENTOS_MANTISBT_PROJECT="CentOS-7"
+            CENTOS_MANTISBT_PROJECT_VERSION="7"
+            REDHAT_SUPPORT_PRODUCT="centos"
+            REDHAT_SUPPORT_PRODUCT_VERSION="7"
+            """)),
+
+            # YumDnf on Centos 7 - nothing after pacific
+            dict(
+            version="",
+            release="zillions",
+            err_text="does not support pacific",
+            os_release=dedent("""
+            NAME="CentOS Linux"
+            VERSION="7 (Core)"
+            ID="centos"
+            ID_LIKE="rhel fedora"
+            VERSION_ID="7"
+            PRETTY_NAME="CentOS Linux 7 (Core)"
+            ANSI_COLOR="0;31"
+            CPE_NAME="cpe:/o:centos:centos:7"
+            HOME_URL="https://www.centos.org/"
+            BUG_REPORT_URL="https://bugs.centos.org/"
+
+            CENTOS_MANTISBT_PROJECT="CentOS-7"
+            CENTOS_MANTISBT_PROJECT_VERSION="7"
+            REDHAT_SUPPORT_PRODUCT="centos"
+            REDHAT_SUPPORT_PRODUCT_VERSION="7"
+            """)),
+
+            # YumDnf on Centos 7 - nothing v16 or higher
+            dict(
+            version="v16.1.3",
+            release="",
+            err_text="does not support",
+            os_release=dedent("""
+            NAME="CentOS Linux"
+            VERSION="7 (Core)"
+            ID="centos"
+            ID_LIKE="rhel fedora"
+            VERSION_ID="7"
+            PRETTY_NAME="CentOS Linux 7 (Core)"
+            ANSI_COLOR="0;31"
+            CPE_NAME="cpe:/o:centos:centos:7"
+            HOME_URL="https://www.centos.org/"
+            BUG_REPORT_URL="https://bugs.centos.org/"
+
+            CENTOS_MANTISBT_PROJECT="CentOS-7"
+            CENTOS_MANTISBT_PROJECT_VERSION="7"
+            REDHAT_SUPPORT_PRODUCT="centos"
+            REDHAT_SUPPORT_PRODUCT_VERSION="7"
+            """)),
+        ])
+    @mock.patch('cephadm.find_executable', return_value='foo')
+    def test_distro_validation(self, _find_executable, values, cephadm_fs):
+        os_release = values['os_release']
+        release = values['release']
+        version = values['version']
+        err_text = values['err_text']
+
+        cephadm_fs.create_file('/etc/os-release', contents=os_release)
+        ctx = _cephadm.CephadmContext()
+        ctx.repo_url = 'http://localhost'
+        pkg = _cephadm.create_packager(ctx, stable=release, version=version)
+
+        if err_text:
+            with pytest.raises(_cephadm.Error, match=err_text):
+                pkg.validate()
+        else:
+            with mock.patch('cephadm.urlopen', return_value=None):
+                pkg.validate()
+
+    @pytest.mark.parametrize('values',
+        [
+            # Apt - not checked
+            dict(
+            version="",
+            release="pacific",
+            err_text="",
+            os_release=dedent("""
+            NAME="Ubuntu"
+            VERSION="20.04 LTS (Focal Fossa)"
+            ID=ubuntu
+            ID_LIKE=debian
+            PRETTY_NAME="Ubuntu 20.04 LTS"
+            VERSION_ID="20.04"
+            HOME_URL="https://www.ubuntu.com/"
+            SUPPORT_URL="https://help.ubuntu.com/"
+            BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
+            PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
+            VERSION_CODENAME=focal
+            UBUNTU_CODENAME=focal
+            """)),
+
+            # YumDnf on Centos8 - force failure
+            dict(
+            version="",
+            release="foobar",
+            err_text="failed to fetch repository metadata",
+            os_release=dedent("""
+            NAME="CentOS Linux"
+            VERSION="8 (Core)"
+            ID="centos"
+            ID_LIKE="rhel fedora"
+            VERSION_ID="8"
+            PLATFORM_ID="platform:el8"
+            PRETTY_NAME="CentOS Linux 8 (Core)"
+            ANSI_COLOR="0;31"
+            CPE_NAME="cpe:/o:centos:centos:8"
+            HOME_URL="https://www.centos.org/"
+            BUG_REPORT_URL="https://bugs.centos.org/"
+
+            CENTOS_MANTISBT_PROJECT="CentOS-8"
+            CENTOS_MANTISBT_PROJECT_VERSION="8"
+            REDHAT_SUPPORT_PRODUCT="centos"
+            REDHAT_SUPPORT_PRODUCT_VERSION="8"
+            """)),
+        ])
+    @mock.patch('cephadm.find_executable', return_value='foo')
+    @mock.patch('cephadm.logger')
+    def test_http_validation(self, _logger, _find_executable, values, cephadm_fs):
+        from urllib.error import HTTPError
+
+        os_release = values['os_release']
+        release = values['release']
+        version = values['version']
+        err_text = values['err_text']
+
+        cephadm_fs.create_file('/etc/os-release', contents=os_release)
+        ctx = _cephadm.CephadmContext()
+        ctx.repo_url = 'http://localhost'
+        pkg = _cephadm.create_packager(ctx, stable=release, version=version)
+
+        with mock.patch('cephadm.urlopen') as _urlopen:
+            _urlopen.side_effect = HTTPError(ctx.repo_url, 404, "not found", None, fp=None)
+            if err_text:
+                with pytest.raises(_cephadm.Error, match=err_text):
+                    pkg.validate()
+            else:
+                pkg.validate()
+
+
+class TestPull:
+
+    @mock.patch('time.sleep')
+    @mock.patch('cephadm.call', return_value=('', '', 0))
+    @mock.patch('cephadm.get_image_info_from_inspect', return_value={})
+    @mock.patch('cephadm.logger')
+    def test_error(self, _logger, _get_image_info_from_inspect, _call, _sleep):
+        ctx = _cephadm.CephadmContext()
+        ctx.container_engine = mock_podman()
+        ctx.insecure = False
+
+        _call.return_value = ('', '', 0)
+        retval = _cephadm.command_pull(ctx)
+        assert retval == 0
+
+        err = 'maximum retries reached'
+
+        _call.return_value = ('', 'foobar', 1)
+        with pytest.raises(_cephadm.Error) as e:
+            _cephadm.command_pull(ctx)
+        assert err not in str(e.value)
+
+        _call.return_value = ('', 'net/http: TLS handshake timeout', 1)
+        with pytest.raises(_cephadm.Error) as e:
+            _cephadm.command_pull(ctx)
+        assert err in str(e.value)
+
+    @mock.patch('cephadm.get_image_info_from_inspect', return_value={})
+    @mock.patch('cephadm.infer_local_ceph_image', return_value='last_local_ceph_image')
+    def test_image(self, _infer_local_ceph_image, _get_image_info_from_inspect):
+        cmd = ['pull']
+        with with_cephadm_ctx(cmd) as ctx:
+            retval = _cephadm.command_pull(ctx)
+            assert retval == 0
+            assert ctx.image == _cephadm.DEFAULT_IMAGE
+
+        with mock.patch.dict(os.environ, {"CEPHADM_IMAGE": 'cephadm_image_environ'}):
+            cmd = ['pull']
+            with with_cephadm_ctx(cmd) as ctx:
+                retval = _cephadm.command_pull(ctx)
+                assert retval == 0
+                assert ctx.image == 'cephadm_image_environ'
+
+            cmd = ['--image',  'cephadm_image_param', 'pull']
+            with with_cephadm_ctx(cmd) as ctx:
+                retval = _cephadm.command_pull(ctx)
+                assert retval == 0
+                assert ctx.image == 'cephadm_image_param'
+
+
+class TestApplySpec:
+
+    def test_extract_host_info_from_applied_spec(self, cephadm_fs):
+        yaml = '''---
+service_type: host
+hostname: vm-00
+addr: 192.168.122.44
+labels:
+ - example1
+ - example2
+---
+service_type: host
+hostname: vm-01
+addr: 192.168.122.247
+labels:
+ - grafana
+---      
+service_type: host
+hostname: vm-02
+---
+---      
+service_type: rgw
+service_id: myrgw
+spec:
+  rgw_frontend_ssl_certificate: |
+    -----BEGIN PRIVATE KEY-----
+    V2VyIGRhcyBsaWVzdCBpc3QgZG9vZi4gTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFt
+    ZXQsIGNvbnNldGV0dXIgc2FkaXBzY2luZyBlbGl0ciwgc2VkIGRpYW0gbm9udW15
+    IGVpcm1vZCB0ZW1wb3IgaW52aWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu
+    YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg
+    ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=
+    -----END PRIVATE KEY-----
+    -----BEGIN CERTIFICATE-----
+    V2VyIGRhcyBsaWVzdCBpc3QgZG9vZi4gTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFt
+    ZXQsIGNvbnNldGV0dXIgc2FkaXBzY2luZyBlbGl0ciwgc2VkIGRpYW0gbm9udW15
+    IGVpcm1vZCB0ZW1wb3IgaW52aWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu
+    YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg
+    ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=
+    -----END CERTIFICATE-----
+  ssl: true
+---  
+'''
+
+        cephadm_fs.create_file('spec.yml', contents=yaml)
+        retdic = [{'hostname': 'vm-00', 'addr': '192.168.122.44'},
+                  {'hostname': 'vm-01', 'addr': '192.168.122.247'},
+                  {'hostname': 'vm-02',}]
+
+        with open('spec.yml') as f:
+            dic = _cephadm._extract_host_info_from_applied_spec(f)
+            assert dic == retdic
+
+    @mock.patch('cephadm.call', return_value=('', '', 0))
+    @mock.patch('cephadm.logger')
+    def test_distribute_ssh_keys(self, _logger, _call):
+        ctx = _cephadm.CephadmContext()
+        ctx.ssh_public_key = None
+        ctx.ssh_user = 'root'
+
+        host_spec = {'service_type': 'host', 'hostname': 'vm-02', 'addr': '192.168.122.165'}
+
+        retval = _cephadm._distribute_ssh_keys(ctx, host_spec, 'bootstrap_hostname')
+
+        assert retval == 0
+
+        _call.return_value = ('', '', 1)
+
+        retval = _cephadm._distribute_ssh_keys(ctx, host_spec, 'bootstrap_hostname')
+
+        assert retval == 1
+
+
+class TestSNMPGateway:
+    V2c_config = {
+        'snmp_community': 'public',
+        'destination': '192.168.1.10:162',
+        'snmp_version': 'V2c',
+    }
+    V3_no_priv_config = {
+        'destination': '192.168.1.10:162',
+        'snmp_version': 'V3',
+        'snmp_v3_auth_username': 'myuser',
+        'snmp_v3_auth_password': 'mypassword',
+        'snmp_v3_auth_protocol': 'SHA',
+        'snmp_v3_engine_id': '8000C53F00000000',
+    }
+    V3_priv_config = {
+        'destination': '192.168.1.10:162',
+        'snmp_version': 'V3',
+        'snmp_v3_auth_username': 'myuser',
+        'snmp_v3_auth_password': 'mypassword',
+        'snmp_v3_auth_protocol': 'SHA',
+        'snmp_v3_priv_protocol': 'DES',
+        'snmp_v3_priv_password': 'mysecret',
+        'snmp_v3_engine_id': '8000C53F00000000',
+    }
+    no_destination_config = {
+        'snmp_version': 'V3',
+        'snmp_v3_auth_username': 'myuser',
+        'snmp_v3_auth_password': 'mypassword',
+        'snmp_v3_auth_protocol': 'SHA',
+        'snmp_v3_priv_protocol': 'DES',
+        'snmp_v3_priv_password': 'mysecret',
+        'snmp_v3_engine_id': '8000C53F00000000',
+    }
+    bad_version_config = {
+        'snmp_community': 'public',
+        'destination': '192.168.1.10:162',
+        'snmp_version': 'V1',
+    }
+
+    def test_unit_run_V2c(self, cephadm_fs):
+        fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
+        with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
+            import json
+            ctx.config_json = json.dumps(self.V2c_config)
+            ctx.fsid = fsid
+            ctx.tcp_ports = '9464'
+            _cephadm.get_parm.return_value = self.V2c_config
+            c = _cephadm.get_container(ctx, fsid, 'snmp-gateway', 'daemon_id')
+
+            _cephadm.make_data_dir(ctx, fsid, 'snmp-gateway', 'daemon_id')
+
+            _cephadm.create_daemon_dirs(ctx, fsid, 'snmp-gateway', 'daemon_id', 0, 0)
+            with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/snmp-gateway.conf', 'r') as f:
+                conf = f.read().rstrip()
+                assert conf == 'SNMP_NOTIFIER_COMMUNITY=public'
+
+            _cephadm.deploy_daemon_units(
+                ctx,
+                fsid,
+                0, 0,
+                'snmp-gateway',
+                'daemon_id',
+                c,
+                True, True
+            )
+            with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/unit.run', 'r') as f:
+                run_cmd = f.readlines()[-1].rstrip()
+                assert run_cmd.endswith('docker.io/maxwo/snmp-notifier:v1.2.1 --web.listen-address=:9464 --snmp.destination=192.168.1.10:162 --snmp.version=V2c --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl')
+
+    def test_unit_run_V3_noPriv(self, cephadm_fs):
+        fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
+        with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
+            import json
+            ctx.config_json = json.dumps(self.V3_no_priv_config)
+            ctx.fsid = fsid
+            ctx.tcp_ports = '9465'
+            _cephadm.get_parm.return_value = self.V3_no_priv_config
+            c = _cephadm.get_container(ctx, fsid, 'snmp-gateway', 'daemon_id')
+
+            _cephadm.make_data_dir(ctx, fsid, 'snmp-gateway', 'daemon_id')
+
+            _cephadm.create_daemon_dirs(ctx, fsid, 'snmp-gateway', 'daemon_id', 0, 0)
+            with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/snmp-gateway.conf', 'r') as f:
+                conf = f.read()
+                assert conf == 'SNMP_NOTIFIER_AUTH_USERNAME=myuser\nSNMP_NOTIFIER_AUTH_PASSWORD=mypassword\n'
+
+            _cephadm.deploy_daemon_units(
+                ctx,
+                fsid,
+                0, 0,
+                'snmp-gateway',
+                'daemon_id',
+                c,
+                True, True
+            )
+            with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/unit.run', 'r') as f:
+                run_cmd = f.readlines()[-1].rstrip()
+                assert run_cmd.endswith('docker.io/maxwo/snmp-notifier:v1.2.1 --web.listen-address=:9465 --snmp.destination=192.168.1.10:162 --snmp.version=V3 --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl --snmp.authentication-enabled --snmp.authentication-protocol=SHA --snmp.security-engine-id=8000C53F00000000')
+
+    def test_unit_run_V3_Priv(self, cephadm_fs):
+        fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
+        with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
+            import json
+            ctx.config_json = json.dumps(self.V3_priv_config)
+            ctx.fsid = fsid
+            ctx.tcp_ports = '9464'
+            _cephadm.get_parm.return_value = self.V3_priv_config
+            c = _cephadm.get_container(ctx, fsid, 'snmp-gateway', 'daemon_id')
+
+            _cephadm.make_data_dir(ctx, fsid, 'snmp-gateway', 'daemon_id')
+
+            _cephadm.create_daemon_dirs(ctx, fsid, 'snmp-gateway', 'daemon_id', 0, 0)
+            with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/snmp-gateway.conf', 'r') as f:
+                conf = f.read()
+                assert conf == 'SNMP_NOTIFIER_AUTH_USERNAME=myuser\nSNMP_NOTIFIER_AUTH_PASSWORD=mypassword\nSNMP_NOTIFIER_PRIV_PASSWORD=mysecret\n'
+
+            _cephadm.deploy_daemon_units(
+                ctx,
+                fsid,
+                0, 0,
+                'snmp-gateway',
+                'daemon_id',
+                c,
+                True, True
+            )
+            with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/unit.run', 'r') as f:
+                run_cmd = f.readlines()[-1].rstrip()
+                assert run_cmd.endswith('docker.io/maxwo/snmp-notifier:v1.2.1 --web.listen-address=:9464 --snmp.destination=192.168.1.10:162 --snmp.version=V3 --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl --snmp.authentication-enabled --snmp.authentication-protocol=SHA --snmp.security-engine-id=8000C53F00000000 --snmp.private-enabled --snmp.private-protocol=DES')
+
+    def test_unit_run_no_dest(self, cephadm_fs):
+        fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
+        with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
+            import json
+            ctx.config_json = json.dumps(self.no_destination_config)
+            ctx.fsid = fsid
+            ctx.tcp_ports = '9464'
+            _cephadm.get_parm.return_value = self.no_destination_config
+
+            with pytest.raises(Exception) as e:
+                c = _cephadm.get_container(ctx, fsid, 'snmp-gateway', 'daemon_id')
+            assert str(e.value) == "config is missing destination attribute(<ip>:<port>) of the target SNMP listener"
+
+    def test_unit_run_bad_version(self, cephadm_fs):
+        fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
+        with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
+            import json
+            ctx.config_json = json.dumps(self.bad_version_config)
+            ctx.fsid = fsid
+            ctx.tcp_ports = '9464'
+            _cephadm.get_parm.return_value = self.bad_version_config
+
+            with pytest.raises(Exception) as e:
+                c = _cephadm.get_container(ctx, fsid, 'snmp-gateway', 'daemon_id')
+            assert str(e.value) == 'not a valid snmp version: V1'
+
+class TestNetworkValidation:
+
+    def test_ipv4_subnet(self):
+        rc, v, msg = _cephadm.check_subnet('192.168.1.0/24')
+        assert rc == 0 and v[0] == 4
+
+    def test_ipv4_subnet_list(self):
+        rc, v, msg = _cephadm.check_subnet('192.168.1.0/24,10.90.90.0/24')
+        assert rc == 0 and not msg
+
+    def test_ipv4_subnet_list_with_spaces(self):
+        rc, v, msg = _cephadm.check_subnet('192.168.1.0/24, 10.90.90.0/24 ')
+        assert rc == 0 and not msg
+
+    def test_ipv4_subnet_badlist(self):
+        rc, v, msg = _cephadm.check_subnet('192.168.1.0/24,192.168.1.1')
+        assert rc == 1 and msg
+
+    def test_ipv4_subnet_mixed(self):
+        rc, v, msg = _cephadm.check_subnet('192.168.100.0/24,fe80::/64')
+        assert rc == 0 and v == [4,6]
+
+    def test_ipv6_subnet(self):
+        rc, v, msg = _cephadm.check_subnet('fe80::/64')
+        assert rc == 0 and v[0] == 6
+
+    def test_subnet_mask_missing(self):
+        rc, v, msg = _cephadm.check_subnet('192.168.1.58')
+        assert rc == 1 and msg
+
+    def test_subnet_mask_junk(self):
+        rc, v, msg = _cephadm.check_subnet('wah')
+        assert rc == 1 and msg
+
+    def test_ip_in_subnet(self):
+        # valid ip and only one valid subnet
+        rc = _cephadm.ip_in_subnets('192.168.100.1', '192.168.100.0/24')
+        assert rc is True
+
+        # valid ip and valid subnets list without spaces
+        rc = _cephadm.ip_in_subnets('192.168.100.1', '192.168.100.0/24,10.90.90.0/24')
+        assert rc is True
+
+        # valid ip and valid subnets list with spaces
+        rc = _cephadm.ip_in_subnets('10.90.90.2', '192.168.1.0/24, 192.168.100.0/24, 10.90.90.0/24')
+        assert rc is True
+
+        # valid ip that doesn't belong to any subnet
+        rc = _cephadm.ip_in_subnets('192.168.100.2', '192.168.50.0/24, 10.90.90.0/24')
+        assert rc is False
+
+        # valid ip that doesn't belong to the subnet (only 14 hosts)
+        rc = _cephadm.ip_in_subnets('192.168.100.20', '192.168.100.0/28')
+        assert rc is False
+
+        # valid ip and valid IPV6 network
+        rc = _cephadm.ip_in_subnets('fe80::5054:ff:fef4:873a', 'fe80::/64')
+        assert rc is True
+
+        # valid wrapped ip and valid IPV6 network
+        rc = _cephadm.ip_in_subnets('[fe80::5054:ff:fef4:873a]', 'fe80::/64')
+        assert rc is True
+
+        # valid ip and that doesn't belong to IPV6 network
+        rc = _cephadm.ip_in_subnets('fe80::5054:ff:fef4:873a', '2001:db8:85a3::/64')
+        assert rc is False
+
+        # invalid IPv4 and valid subnets list
+        with pytest.raises(Exception):
+            rc = _cephadm.ip_in_sublets('10.90.200.', '192.168.1.0/24, 192.168.100.0/24, 10.90.90.0/24')
+
+        # invalid IPv6 and valid subnets list
+        with pytest.raises(Exception):
+            rc = _cephadm.ip_in_sublets('fe80:2030:31:24', 'fe80::/64')
+
+    @pytest.mark.parametrize("conf", [
+    """[global]
+public_network='1.1.1.0/24,2.2.2.0/24'
+cluster_network="3.3.3.0/24, 4.4.4.0/24"
+""",
+    """[global]
+public_network=" 1.1.1.0/24,2.2.2.0/24 "
+cluster_network=3.3.3.0/24, 4.4.4.0/24
+""",
+    """[global]
+    public_network= 1.1.1.0/24,  2.2.2.0/24 
+    cluster_network='3.3.3.0/24,4.4.4.0/24'
+"""])
+    @mock.patch('cephadm.list_networks')
+    @mock.patch('cephadm.logger')
+    def test_get_networks_from_conf(self, _logger, _list_networks, conf, cephadm_fs):
+        cephadm_fs.create_file('ceph.conf', contents=conf)
+        _list_networks.return_value = {'1.1.1.0/24': {'eth0': ['1.1.1.1']},
+                                       '2.2.2.0/24': {'eth1': ['2.2.2.2']},
+                                       '3.3.3.0/24': {'eth2': ['3.3.3.3']},
+                                       '4.4.4.0/24': {'eth3': ['4.4.4.4']}}
+        ctx = _cephadm.CephadmContext()
+        ctx.config = 'ceph.conf'
+        ctx.mon_ip = '1.1.1.1'
+        ctx.cluster_network = None
+        # what the cephadm module does with the public network string is
+        # [x.strip() for x in out.split(',')]
+        # so we must make sure our output, through that alteration,
+        # generates correctly formatted networks
+        def _str_to_networks(s):
+            return [x.strip() for x in s.split(',')]
+        public_network = _cephadm.get_public_net_from_cfg(ctx)
+        assert _str_to_networks(public_network) == ['1.1.1.0/24', '2.2.2.0/24']
+        cluster_network, ipv6 = _cephadm.prepare_cluster_network(ctx)
+        assert not ipv6
+        assert _str_to_networks(cluster_network) == ['3.3.3.0/24', '4.4.4.0/24']
+
+class TestSysctl:
+    @mock.patch('cephadm.sysctl_get')
+    def test_filter_sysctl_settings(self, _sysctl_get):
+        ctx = _cephadm.CephadmContext()
+        input = [
+            # comment-only lines should be ignored
+            "# just a comment",
+            # As should whitespace-only lines",
+            "   \t ",
+            "   =  \t  ",
+            # inline comments are stripped when querying
+            "something = value # inline comment",
+            "fs.aio-max-nr = 1048576",
+            "kernel.pid_max = 4194304",
+            "vm.lowmem_reserve_ratio = 256\t256\t32\t0\t0",
+            "  vm.max_map_count       =            65530    ",
+            "  vm.max_map_count       =            65530    ",
+        ]
+        _sysctl_get.side_effect = [
+            "value",
+            "1",
+            "4194304",
+            "256\t256\t32\t0\t0",
+            "65530",
+            "something else",
+        ]
+        result = _cephadm.filter_sysctl_settings(ctx, input)
+        assert len(_sysctl_get.call_args_list) == 6
+        assert _sysctl_get.call_args_list[0].args[1] == "something"
+        assert _sysctl_get.call_args_list[1].args[1] == "fs.aio-max-nr"
+        assert _sysctl_get.call_args_list[2].args[1] == "kernel.pid_max"
+        assert _sysctl_get.call_args_list[3].args[1] == "vm.lowmem_reserve_ratio"
+        assert _sysctl_get.call_args_list[4].args[1] == "vm.max_map_count"
+        assert _sysctl_get.call_args_list[5].args[1] == "vm.max_map_count"
+        assert result == [
+            "fs.aio-max-nr = 1048576",
+            "  vm.max_map_count       =            65530    ",
+        ]
+
+class TestJaeger:
+    single_es_node_conf = {
+        'elasticsearch_nodes': 'http://192.168.0.1:9200'}
+    multiple_es_nodes_conf = {
+        'elasticsearch_nodes': 'http://192.168.0.1:9200,http://192.168.0.2:9300'}
+    agent_conf = {
+        'collector_nodes': 'test:14250'}
+
+    def test_single_es(self, cephadm_fs):
+        fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
+        with with_cephadm_ctx(['--image=quay.io/jaegertracing/jaeger-collector:1.29'], list_networks={}) as ctx:
+            import json
+            ctx.config_json = json.dumps(self.single_es_node_conf)
+            ctx.fsid = fsid
+            c = _cephadm.get_container(ctx, fsid, 'jaeger-collector', 'daemon_id')
+            _cephadm.create_daemon_dirs(ctx, fsid, 'jaeger-collector', 'daemon_id', 0, 0)
+            _cephadm.deploy_daemon_units(
+                ctx,
+                fsid,
+                0, 0,
+                'jaeger-collector',
+                'daemon_id',
+                c,
+                True, True
+            )
+            with open(f'/var/lib/ceph/{fsid}/jaeger-collector.daemon_id/unit.run', 'r') as f:
+                run_cmd = f.readlines()[-1].rstrip()
+                assert run_cmd.endswith('SPAN_STORAGE_TYPE=elasticsearch -e ES_SERVER_URLS=http://192.168.0.1:9200 quay.io/jaegertracing/jaeger-collector:1.29')
+
+    def test_multiple_es(self, cephadm_fs):
+        fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
+        with with_cephadm_ctx(['--image=quay.io/jaegertracing/jaeger-collector:1.29'], list_networks={}) as ctx:
+            import json
+            ctx.config_json = json.dumps(self.multiple_es_nodes_conf)
+            ctx.fsid = fsid
+            c = _cephadm.get_container(ctx, fsid, 'jaeger-collector', 'daemon_id')
+            _cephadm.create_daemon_dirs(ctx, fsid, 'jaeger-collector', 'daemon_id', 0, 0)
+            _cephadm.deploy_daemon_units(
+                ctx,
+                fsid,
+                0, 0,
+                'jaeger-collector',
+                'daemon_id',
+                c,
+                True, True
+            )
+            with open(f'/var/lib/ceph/{fsid}/jaeger-collector.daemon_id/unit.run', 'r') as f:
+                run_cmd = f.readlines()[-1].rstrip()
+                assert run_cmd.endswith('SPAN_STORAGE_TYPE=elasticsearch -e ES_SERVER_URLS=http://192.168.0.1:9200,http://192.168.0.2:9300 quay.io/jaegertracing/jaeger-collector:1.29')
+
+    def test_jaeger_agent(self, cephadm_fs):
+        fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
+        with with_cephadm_ctx(['--image=quay.io/jaegertracing/jaeger-agent:1.29'], list_networks={}) as ctx:
+            import json
+            ctx.config_json = json.dumps(self.agent_conf)
+            ctx.fsid = fsid
+            c = _cephadm.get_container(ctx, fsid, 'jaeger-agent', 'daemon_id')
+            _cephadm.create_daemon_dirs(ctx, fsid, 'jaeger-agent', 'daemon_id', 0, 0)
+            _cephadm.deploy_daemon_units(
+                ctx,
+                fsid,
+                0, 0,
+                'jaeger-agent',
+                'daemon_id',
+                c,
+                True, True
+            )
+            with open(f'/var/lib/ceph/{fsid}/jaeger-agent.daemon_id/unit.run', 'r') as f:
+                run_cmd = f.readlines()[-1].rstrip()
+                assert run_cmd.endswith('quay.io/jaegertracing/jaeger-agent:1.29 --reporter.grpc.host-port=test:14250 --processor.jaeger-compact.server-host-port=6799')
+
+class TestRescan(fake_filesystem_unittest.TestCase):
+
+    def setUp(self):
+        self.setUpPyfakefs()
+        if not fake_filesystem.is_root():
+            fake_filesystem.set_uid(0)
+
+        self.fs.create_dir('/sys/class')
+        self.ctx = _cephadm.CephadmContext()
+        self.ctx.func = _cephadm.command_rescan_disks
+
+    @mock.patch('cephadm.logger')
+    def test_no_hbas(self, _logger):
+        out = _cephadm.command_rescan_disks(self.ctx)
+        assert out == 'Ok. No compatible HBAs found'
+
+    @mock.patch('cephadm.logger')
+    def test_success(self, _logger):
+        self.fs.create_file('/sys/class/scsi_host/host0/scan')
+        self.fs.create_file('/sys/class/scsi_host/host1/scan')
+        out = _cephadm.command_rescan_disks(self.ctx)
+        assert out.startswith('Ok. 2 adapters detected: 2 rescanned, 0 skipped, 0 failed')
+
+    @mock.patch('cephadm.logger')
+    def test_skip_usb_adapter(self, _logger):
+        self.fs.create_file('/sys/class/scsi_host/host0/scan')
+        self.fs.create_file('/sys/class/scsi_host/host1/scan')
+        self.fs.create_file('/sys/class/scsi_host/host1/proc_name', contents='usb-storage')
+        out = _cephadm.command_rescan_disks(self.ctx)
+        assert out.startswith('Ok. 2 adapters detected: 1 rescanned, 1 skipped, 0 failed')
+
+    @mock.patch('cephadm.logger')
+    def test_skip_unknown_adapter(self, _logger):
+        self.fs.create_file('/sys/class/scsi_host/host0/scan')
+        self.fs.create_file('/sys/class/scsi_host/host1/scan')
+        self.fs.create_file('/sys/class/scsi_host/host1/proc_name', contents='unknown')
+        out = _cephadm.command_rescan_disks(self.ctx)
+        assert out.startswith('Ok. 2 adapters detected: 1 rescanned, 1 skipped, 0 failed')
diff --git a/src/cephadm/tests/test_container_engine.py b/src/cephadm/tests/test_container_engine.py
new file mode 100644
index 000000000..433f01270
--- /dev/null
+++ b/src/cephadm/tests/test_container_engine.py
@@ -0,0 +1,54 @@
+from unittest import mock
+
+import pytest
+
+from tests.fixtures import with_cephadm_ctx, import_cephadm
+
+_cephadm = import_cephadm()
+
+
+def test_container_engine():
+    with pytest.raises(NotImplementedError):
+        _cephadm.ContainerEngine()
+
+    class PhonyContainerEngine(_cephadm.ContainerEngine):
+        EXE = "true"
+
+    with mock.patch("cephadm.find_program") as find_program:
+        find_program.return_value = "/usr/bin/true"
+        pce = PhonyContainerEngine()
+        assert str(pce) == "true (/usr/bin/true)"
+
+
+def test_podman():
+    with mock.patch("cephadm.find_program") as find_program:
+        find_program.return_value = "/usr/bin/podman"
+        pm = _cephadm.Podman()
+        find_program.assert_called()
+        with pytest.raises(RuntimeError):
+            pm.version
+        with mock.patch("cephadm.call_throws") as call_throws:
+            call_throws.return_value = ("4.9.9", None, None)
+            with with_cephadm_ctx([]) as ctx:
+                pm.get_version(ctx)
+        assert pm.version == (4, 9, 9)
+        assert str(pm) == "podman (/usr/bin/podman) version 4.9.9"
+
+
+def test_podman_badversion():
+    with mock.patch("cephadm.find_program") as find_program:
+        find_program.return_value = "/usr/bin/podman"
+        pm = _cephadm.Podman()
+        find_program.assert_called()
+        with mock.patch("cephadm.call_throws") as call_throws:
+            call_throws.return_value = ("4.10.beta2", None, None)
+            with with_cephadm_ctx([]) as ctx:
+                with pytest.raises(ValueError):
+                    pm.get_version(ctx)
+
+
+def test_docker():
+    with mock.patch("cephadm.find_program") as find_program:
+        find_program.return_value = "/usr/bin/docker"
+        docker = _cephadm.Docker()
+        assert str(docker) == "docker (/usr/bin/docker)"
diff --git a/src/cephadm/tests/test_enclosure.py b/src/cephadm/tests/test_enclosure.py
new file mode 100644
index 000000000..1ea419fb3
--- /dev/null
+++ b/src/cephadm/tests/test_enclosure.py
@@ -0,0 +1,72 @@
+import pytest
+
+from unittest import mock
+from tests.fixtures import host_sysfs, import_cephadm
+
+_cephadm = import_cephadm()
+
+
+@pytest.fixture
+def enclosure(host_sysfs):
+    e = _cephadm.Enclosure(
+        enc_id='1',
+        enc_path='/sys/class/scsi_generic/sg2/device/enclosure/0:0:1:0',
+        dev_path='/sys/class/scsi_generic/sg2')
+    yield e
+
+
+class TestEnclosure:
+
+    def test_enc_metadata(self, enclosure):
+        """Check metadata for the enclosure e.g. vendor and model"""
+       
+        assert enclosure.vendor == "EnclosuresInc"
+        assert enclosure.components == '12'
+        assert enclosure.model == "D12"
+        assert enclosure.enc_id == '1'
+
+        assert enclosure.ses_paths == ['sg2']
+        assert enclosure.path_count == 1
+
+    def test_enc_slots(self, enclosure):
+        """Check slot count"""
+
+        assert len(enclosure.slot_map) == 12
+
+    def test_enc_slot_format(self, enclosure):
+        """Check the attributes of a slot are as expected"""
+
+        assert all(k in ['fault', 'locate', 'serial', 'status'] 
+                   for k, _v in enclosure.slot_map['0'].items())
+
+    def test_enc_slot_status(self, enclosure):
+        """Check the number of occupied slots is correct"""
+
+        occupied_slots = [slot_id for slot_id in enclosure.slot_map 
+                          if enclosure.slot_map[slot_id].get('status').upper() == 'OK']
+
+        assert len(occupied_slots) == 6
+
+    def test_enc_disk_count(self, enclosure):
+        """Check the disks found matches the slot info"""
+
+        assert len(enclosure.device_lookup) == 6
+        assert enclosure.device_count == 6
+
+    def test_enc_device_serial(self, enclosure):
+        """Check the device serial numbers are as expected"""
+        
+        assert all(fake_serial in enclosure.device_lookup.keys() 
+                   for fake_serial in [
+                       'fake000',
+                       'fake001',
+                       'fake002',
+                       'fake003',
+                       'fake004',
+                       'fake005'])
+
+    def test_enc_slot_to_serial(self, enclosure):
+        """Check serial number to slot matches across slot_map and device_lookup"""
+
+        for serial, slot in enclosure.device_lookup.items():
+            assert enclosure.slot_map[slot].get('serial') == serial
diff --git a/src/cephadm/tests/test_ingress.py b/src/cephadm/tests/test_ingress.py
new file mode 100644
index 000000000..798c73708
--- /dev/null
+++ b/src/cephadm/tests/test_ingress.py
@@ -0,0 +1,350 @@
+from unittest import mock
+import json
+
+import pytest
+
+from tests.fixtures import with_cephadm_ctx, cephadm_fs, import_cephadm
+
+_cephadm = import_cephadm()
+
+SAMPLE_UUID = "2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae"
+SAMPLE_HAPROXY_IMAGE = "registry.example.net/haproxy/haproxy:latest"
+SAMPLE_KEEPALIVED_IMAGE = "registry.example.net/keepalive/keepalived:latest"
+
+
+def good_haproxy_json():
+    return haproxy_json(files=True)
+
+
+def haproxy_json(**kwargs):
+    if kwargs.get("files"):
+        return {
+            "files": {
+                "haproxy.cfg": "",
+            },
+        }
+    return {}
+
+
+def good_keepalived_json():
+    return keepalived_json(files=True)
+
+
+def keepalived_json(**kwargs):
+    if kwargs.get("files"):
+        return {
+            "files": {
+                "keepalived.conf": "",
+            },
+        }
+    return {}
+
+
+@pytest.mark.parametrize(
+    "args",
+    # args: <fsid>, <daemon_id>, <config_json>, <image>
+    [
+        # fail due to: invalid fsid
+        (["foobar", "wilma", good_haproxy_json(), SAMPLE_HAPROXY_IMAGE]),
+        # fail due to: invalid daemon_id
+        ([SAMPLE_UUID, "", good_haproxy_json(), SAMPLE_HAPROXY_IMAGE]),
+        # fail due to: invalid image
+        ([SAMPLE_UUID, "wilma", good_haproxy_json(), ""]),
+        # fail due to: no files in config_json
+        (
+            [
+                SAMPLE_UUID,
+                "wilma",
+                haproxy_json(files=False),
+                SAMPLE_HAPROXY_IMAGE,
+            ]
+        ),
+    ],
+)
+def test_haproxy_validation_errors(args):
+    with pytest.raises(_cephadm.Error):
+        with with_cephadm_ctx([]) as ctx:
+            _cephadm.HAproxy(ctx, *args)
+
+
+def test_haproxy_init():
+    with with_cephadm_ctx([]) as ctx:
+        ctx.config_json = json.dumps(good_haproxy_json())
+        ctx.image = SAMPLE_HAPROXY_IMAGE
+        hap = _cephadm.HAproxy.init(
+            ctx,
+            SAMPLE_UUID,
+            "wilma",
+        )
+    assert hap.fsid == SAMPLE_UUID
+    assert hap.daemon_id == "wilma"
+    assert hap.image == SAMPLE_HAPROXY_IMAGE
+
+
+def test_haproxy_container_mounts():
+    with with_cephadm_ctx([]) as ctx:
+        hap = _cephadm.HAproxy(
+            ctx,
+            SAMPLE_UUID,
+            "wilma",
+            good_haproxy_json(),
+            SAMPLE_HAPROXY_IMAGE,
+        )
+        cmounts = hap.get_container_mounts("/var/tmp")
+        assert len(cmounts) == 1
+        assert cmounts["/var/tmp/haproxy"] == "/var/lib/haproxy"
+
+
+def test_haproxy_get_daemon_name():
+    with with_cephadm_ctx([]) as ctx:
+        hap = _cephadm.HAproxy(
+            ctx,
+            SAMPLE_UUID,
+            "wilma",
+            good_haproxy_json(),
+            SAMPLE_HAPROXY_IMAGE,
+        )
+        assert hap.get_daemon_name() == "haproxy.wilma"
+
+
+def test_haproxy_get_container_name():
+    with with_cephadm_ctx([]) as ctx:
+        hap = _cephadm.HAproxy(
+            ctx,
+            SAMPLE_UUID,
+            "wilma",
+            good_haproxy_json(),
+            SAMPLE_HAPROXY_IMAGE,
+        )
+        name1 = hap.get_container_name()
+        assert (
+            name1 == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-haproxy.wilma"
+        )
+        name2 = hap.get_container_name(desc="extra")
+        assert (
+            name2
+            == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-haproxy.wilma-extra"
+        )
+
+
+def test_haproxy_get_daemon_args():
+    with with_cephadm_ctx([]) as ctx:
+        hap = _cephadm.HAproxy(
+            ctx,
+            SAMPLE_UUID,
+            "wilma",
+            good_haproxy_json(),
+            SAMPLE_HAPROXY_IMAGE,
+        )
+        args = hap.get_daemon_args()
+        assert args == ["haproxy", "-f", "/var/lib/haproxy/haproxy.cfg"]
+
+
+@mock.patch("cephadm.logger")
+def test_haproxy_create_daemon_dirs(_logger, cephadm_fs):
+    with with_cephadm_ctx([]) as ctx:
+        hap = _cephadm.HAproxy(
+            ctx,
+            SAMPLE_UUID,
+            "wilma",
+            good_haproxy_json(),
+            SAMPLE_HAPROXY_IMAGE,
+        )
+        with pytest.raises(OSError):
+            hap.create_daemon_dirs("/var/tmp", 45, 54)
+        cephadm_fs.create_dir("/var/tmp")
+        hap.create_daemon_dirs("/var/tmp", 45, 54)
+        # TODO: make assertions about the dirs created
+
+
+def test_haproxy_extract_uid_gid_haproxy():
+    with with_cephadm_ctx([]) as ctx:
+        hap = _cephadm.HAproxy(
+            ctx,
+            SAMPLE_UUID,
+            "wilma",
+            good_haproxy_json(),
+            SAMPLE_HAPROXY_IMAGE,
+        )
+        with mock.patch("cephadm.CephContainer") as cc:
+            cc.return_value.run.return_value = "500 500"
+            uid, gid = hap.extract_uid_gid_haproxy()
+            cc.return_value.run.assert_called()
+        assert uid == 500
+        assert gid == 500
+
+
+def test_haproxy_get_sysctl_settings():
+    with with_cephadm_ctx([]) as ctx:
+        hap = _cephadm.HAproxy(
+            ctx,
+            SAMPLE_UUID,
+            "wilma",
+            good_haproxy_json(),
+            SAMPLE_HAPROXY_IMAGE,
+        )
+        ss = hap.get_sysctl_settings()
+        assert len(ss) == 3
+
+
+@pytest.mark.parametrize(
+    "args",
+    # args: <fsid>, <daemon_id>, <config_json>, <image>
+    [
+        # fail due to: invalid fsid
+        (
+            [
+                "foobar",
+                "barney",
+                good_keepalived_json(),
+                SAMPLE_KEEPALIVED_IMAGE,
+            ]
+        ),
+        # fail due to: invalid daemon_id
+        ([SAMPLE_UUID, "", good_keepalived_json(), SAMPLE_KEEPALIVED_IMAGE]),
+        # fail due to: invalid image
+        ([SAMPLE_UUID, "barney", good_keepalived_json(), ""]),
+        # fail due to: no files in config_json
+        (
+            [
+                SAMPLE_UUID,
+                "barney",
+                keepalived_json(files=False),
+                SAMPLE_KEEPALIVED_IMAGE,
+            ]
+        ),
+    ],
+)
+def test_keepalived_validation_errors(args):
+    with pytest.raises(_cephadm.Error):
+        with with_cephadm_ctx([]) as ctx:
+            _cephadm.Keepalived(ctx, *args)
+
+
+def test_keepalived_init():
+    with with_cephadm_ctx([]) as ctx:
+        ctx.config_json = json.dumps(good_keepalived_json())
+        ctx.image = SAMPLE_KEEPALIVED_IMAGE
+        kad = _cephadm.Keepalived.init(
+            ctx,
+            SAMPLE_UUID,
+            "barney",
+        )
+    assert kad.fsid == SAMPLE_UUID
+    assert kad.daemon_id == "barney"
+    assert kad.image == SAMPLE_KEEPALIVED_IMAGE
+
+
+def test_keepalived_container_mounts():
+    with with_cephadm_ctx([]) as ctx:
+        kad = _cephadm.Keepalived(
+            ctx,
+            SAMPLE_UUID,
+            "barney",
+            good_keepalived_json(),
+            SAMPLE_KEEPALIVED_IMAGE,
+        )
+        cmounts = kad.get_container_mounts("/var/tmp")
+        assert len(cmounts) == 1
+        assert (
+            cmounts["/var/tmp/keepalived.conf"]
+            == "/etc/keepalived/keepalived.conf"
+        )
+
+
+def test_keepalived_get_daemon_name():
+    with with_cephadm_ctx([]) as ctx:
+        kad = _cephadm.Keepalived(
+            ctx,
+            SAMPLE_UUID,
+            "barney",
+            good_keepalived_json(),
+            SAMPLE_KEEPALIVED_IMAGE,
+        )
+        assert kad.get_daemon_name() == "keepalived.barney"
+
+
+def test_keepalived_get_container_name():
+    with with_cephadm_ctx([]) as ctx:
+        kad = _cephadm.Keepalived(
+            ctx,
+            SAMPLE_UUID,
+            "barney",
+            good_keepalived_json(),
+            SAMPLE_KEEPALIVED_IMAGE,
+        )
+        name1 = kad.get_container_name()
+        assert (
+            name1
+            == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-keepalived.barney"
+        )
+        name2 = kad.get_container_name(desc="extra")
+        assert (
+            name2
+            == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-keepalived.barney-extra"
+        )
+
+
+def test_keepalived_get_container_envs():
+    with with_cephadm_ctx([]) as ctx:
+        kad = _cephadm.Keepalived(
+            ctx,
+            SAMPLE_UUID,
+            "barney",
+            good_keepalived_json(),
+            SAMPLE_KEEPALIVED_IMAGE,
+        )
+        args = kad.get_container_envs()
+        assert args == [
+            "KEEPALIVED_AUTOCONF=false",
+            "KEEPALIVED_CONF=/etc/keepalived/keepalived.conf",
+            "KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf",
+            "KEEPALIVED_DEBUG=false",
+        ]
+
+
+@mock.patch("cephadm.logger")
+def test_keepalived_create_daemon_dirs(_logger, cephadm_fs):
+    with with_cephadm_ctx([]) as ctx:
+        kad = _cephadm.Keepalived(
+            ctx,
+            SAMPLE_UUID,
+            "barney",
+            good_keepalived_json(),
+            SAMPLE_KEEPALIVED_IMAGE,
+        )
+        with pytest.raises(OSError):
+            kad.create_daemon_dirs("/var/tmp", 45, 54)
+        cephadm_fs.create_dir("/var/tmp")
+        kad.create_daemon_dirs("/var/tmp", 45, 54)
+        # TODO: make assertions about the dirs created
+
+
+def test_keepalived_extract_uid_gid_keepalived():
+    with with_cephadm_ctx([]) as ctx:
+        kad = _cephadm.Keepalived(
+            ctx,
+            SAMPLE_UUID,
+            "barney",
+            good_keepalived_json(),
+            SAMPLE_KEEPALIVED_IMAGE,
+        )
+        with mock.patch("cephadm.CephContainer") as cc:
+            cc.return_value.run.return_value = "500 500"
+            uid, gid = kad.extract_uid_gid_keepalived()
+            cc.return_value.run.assert_called()
+        assert uid == 500
+        assert gid == 500
+
+
+def test_keepalived_get_sysctl_settings():
+    with with_cephadm_ctx([]) as ctx:
+        kad = _cephadm.Keepalived(
+            ctx,
+            SAMPLE_UUID,
+            "barney",
+            good_keepalived_json(),
+            SAMPLE_KEEPALIVED_IMAGE,
+        )
+        ss = kad.get_sysctl_settings()
+        assert len(ss) == 3
diff --git a/src/cephadm/tests/test_networks.py b/src/cephadm/tests/test_networks.py
new file mode 100644
index 000000000..7c0575046
--- /dev/null
+++ b/src/cephadm/tests/test_networks.py
@@ -0,0 +1,233 @@
+import json
+from textwrap import dedent
+from unittest import mock
+
+import pytest
+
+from tests.fixtures import with_cephadm_ctx, cephadm_fs, import_cephadm
+
+_cephadm = import_cephadm()
+
+
+class TestCommandListNetworks:
+    @pytest.mark.parametrize("test_input, expected", [
+        (
+            dedent("""
+            default via 192.168.178.1 dev enxd89ef3f34260 proto dhcp metric 100
+            10.0.0.0/8 via 10.4.0.1 dev tun0 proto static metric 50
+            10.3.0.0/21 via 10.4.0.1 dev tun0 proto static metric 50
+            10.4.0.1 dev tun0 proto kernel scope link src 10.4.0.2 metric 50
+            137.1.0.0/16 via 10.4.0.1 dev tun0 proto static metric 50
+            138.1.0.0/16 via 10.4.0.1 dev tun0 proto static metric 50
+            139.1.0.0/16 via 10.4.0.1 dev tun0 proto static metric 50
+            140.1.0.0/17 via 10.4.0.1 dev tun0 proto static metric 50
+            141.1.0.0/16 via 10.4.0.1 dev tun0 proto static metric 50
+            172.16.100.34 via 172.16.100.34 dev eth1 proto kernel scope link src 172.16.100.34
+            192.168.122.1 dev ens3 proto dhcp scope link src 192.168.122.236 metric 100
+            169.254.0.0/16 dev docker0 scope link metric 1000
+            172.17.0.0/16 dev docker0 proto kernel scope link src 172.17.0.1
+            192.168.39.0/24 dev virbr1 proto kernel scope link src 192.168.39.1 linkdown
+            192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 linkdown
+            192.168.178.0/24 dev enxd89ef3f34260 proto kernel scope link src 192.168.178.28 metric 100
+            192.168.178.1 dev enxd89ef3f34260 proto static scope link metric 100
+            195.135.221.12 via 192.168.178.1 dev enxd89ef3f34260 proto static metric 100
+            """),
+            {
+                '172.16.100.34/32': {'eth1': {'172.16.100.34'}},
+                '192.168.122.1/32': {'ens3': {'192.168.122.236'}},
+                '10.4.0.1/32': {'tun0': {'10.4.0.2'}},
+                '172.17.0.0/16': {'docker0': {'172.17.0.1'}},
+                '192.168.39.0/24': {'virbr1': {'192.168.39.1'}},
+                '192.168.122.0/24': {'virbr0': {'192.168.122.1'}},
+                '192.168.178.0/24': {'enxd89ef3f34260': {'192.168.178.28'}}
+            }
+        ), (
+            dedent("""
+            default via 10.3.64.1 dev eno1 proto static metric 100
+            10.3.64.0/24 dev eno1 proto kernel scope link src 10.3.64.23 metric 100
+            10.3.64.0/24 dev eno1 proto kernel scope link src 10.3.64.27 metric 100
+            10.88.0.0/16 dev cni-podman0 proto kernel scope link src 10.88.0.1 linkdown
+            172.21.0.0/20 via 172.21.3.189 dev tun0
+            172.21.1.0/20 via 172.21.3.189 dev tun0
+            172.21.2.1 via 172.21.3.189 dev tun0
+            172.21.3.1 dev tun0 proto kernel scope link src 172.21.3.2
+            172.21.4.0/24 via 172.21.3.1 dev tun0
+            172.21.5.0/24 via 172.21.3.1 dev tun0
+            172.21.6.0/24 via 172.21.3.1 dev tun0
+            172.21.7.0/24 via 172.21.3.1 dev tun0
+            192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 linkdown
+            192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 linkdown
+            192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 linkdown
+            192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 linkdown
+            """),
+            {
+                '10.3.64.0/24': {'eno1': {'10.3.64.23', '10.3.64.27'}},
+                '10.88.0.0/16': {'cni-podman0': {'10.88.0.1'}},
+                '172.21.3.1/32': {'tun0': {'172.21.3.2'}},
+                '192.168.122.0/24': {'virbr0': {'192.168.122.1'}}
+            }
+        ),
+    ])
+    def test_parse_ipv4_route(self, test_input, expected):
+        assert _cephadm._parse_ipv4_route(test_input) == expected
+
+    @pytest.mark.parametrize("test_routes, test_ips, expected", [
+        (
+            dedent("""
+            ::1 dev lo proto kernel metric 256 pref medium
+            fe80::/64 dev eno1 proto kernel metric 100 pref medium
+            fe80::/64 dev br-3d443496454c proto kernel metric 256 linkdown pref medium
+            fe80::/64 dev tun0 proto kernel metric 256 pref medium
+            fe80::/64 dev br-4355f5dbb528 proto kernel metric 256 pref medium
+            fe80::/64 dev docker0 proto kernel metric 256 linkdown pref medium
+            fe80::/64 dev cni-podman0 proto kernel metric 256 linkdown pref medium
+            fe80::/64 dev veth88ba1e8 proto kernel metric 256 pref medium
+            fe80::/64 dev vethb6e5fc7 proto kernel metric 256 pref medium
+            fe80::/64 dev vethaddb245 proto kernel metric 256 pref medium
+            fe80::/64 dev vethbd14d6b proto kernel metric 256 pref medium
+            fe80::/64 dev veth13e8fd2 proto kernel metric 256 pref medium
+            fe80::/64 dev veth1d3aa9e proto kernel metric 256 pref medium
+            fe80::/64 dev vethe485ca9 proto kernel metric 256 pref medium
+            """),
+            dedent("""
+            1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 state UNKNOWN qlen 1000
+                inet6 ::1/128 scope host 
+                   valid_lft forever preferred_lft forever
+            2: eno1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000
+                inet6 fe80::225:90ff:fee5:26e8/64 scope link noprefixroute 
+                   valid_lft forever preferred_lft forever
+            6: br-3d443496454c: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 state DOWN 
+                inet6 fe80::42:23ff:fe9d:ee4/64 scope link 
+                   valid_lft forever preferred_lft forever
+            7: br-4355f5dbb528: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP 
+                inet6 fe80::42:6eff:fe35:41fe/64 scope link 
+                   valid_lft forever preferred_lft forever
+            8: docker0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 state DOWN 
+                inet6 fe80::42:faff:fee6:40a0/64 scope link 
+                   valid_lft forever preferred_lft forever
+            11: tun0: <POINTOPOINT,MULTICAST,NOARP,UP,LOWER_UP> mtu 1500 state UNKNOWN qlen 100
+                inet6 fe80::98a6:733e:dafd:350/64 scope link stable-privacy 
+                   valid_lft forever preferred_lft forever
+            28: cni-podman0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 state DOWN qlen 1000
+                inet6 fe80::3449:cbff:fe89:b87e/64 scope link 
+                   valid_lft forever preferred_lft forever
+            31: vethaddb245@if30: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP 
+                inet6 fe80::90f7:3eff:feed:a6bb/64 scope link 
+                   valid_lft forever preferred_lft forever
+            33: veth88ba1e8@if32: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP 
+                inet6 fe80::d:f5ff:fe73:8c82/64 scope link 
+                   valid_lft forever preferred_lft forever
+            35: vethbd14d6b@if34: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP 
+                inet6 fe80::b44f:8ff:fe6f:813d/64 scope link 
+                   valid_lft forever preferred_lft forever
+            37: vethb6e5fc7@if36: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP 
+                inet6 fe80::4869:c6ff:feaa:8afe/64 scope link 
+                   valid_lft forever preferred_lft forever
+            39: veth13e8fd2@if38: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP 
+                inet6 fe80::78f4:71ff:fefe:eb40/64 scope link 
+                   valid_lft forever preferred_lft forever
+            41: veth1d3aa9e@if40: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP 
+                inet6 fe80::24bd:88ff:fe28:5b18/64 scope link 
+                   valid_lft forever preferred_lft forever
+            43: vethe485ca9@if42: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP 
+                inet6 fe80::6425:87ff:fe42:b9f0/64 scope link 
+                   valid_lft forever preferred_lft forever
+            """),
+            {
+                "fe80::/64": {
+                    "eno1": {"fe80::225:90ff:fee5:26e8"},
+                    "br-3d443496454c": {"fe80::42:23ff:fe9d:ee4"},
+                    "tun0": {"fe80::98a6:733e:dafd:350"},
+                    "br-4355f5dbb528": {"fe80::42:6eff:fe35:41fe"},
+                    "docker0": {"fe80::42:faff:fee6:40a0"},
+                    "cni-podman0": {"fe80::3449:cbff:fe89:b87e"},
+                    "veth88ba1e8": {"fe80::d:f5ff:fe73:8c82"},
+                    "vethb6e5fc7": {"fe80::4869:c6ff:feaa:8afe"},
+                    "vethaddb245": {"fe80::90f7:3eff:feed:a6bb"},
+                    "vethbd14d6b": {"fe80::b44f:8ff:fe6f:813d"},
+                    "veth13e8fd2": {"fe80::78f4:71ff:fefe:eb40"},
+                    "veth1d3aa9e": {"fe80::24bd:88ff:fe28:5b18"},
+                    "vethe485ca9": {"fe80::6425:87ff:fe42:b9f0"},
+                }
+            }
+        ),
+        (
+            dedent("""
+            ::1 dev lo proto kernel metric 256 pref medium
+            2001:1458:301:eb::100:1a dev ens20f0 proto kernel metric 100 pref medium
+            2001:1458:301:eb::/64 dev ens20f0 proto ra metric 100 pref medium
+            fd01:1458:304:5e::/64 dev ens20f0 proto ra metric 100 pref medium
+            fe80::/64 dev ens20f0 proto kernel metric 100 pref medium
+            default proto ra metric 100
+                    nexthop via fe80::46ec:ce00:b8a0:d3c8 dev ens20f0 weight 1
+                    nexthop via fe80::46ec:ce00:b8a2:33c8 dev ens20f0 weight 1 pref medium
+            """),
+            dedent("""
+            1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 state UNKNOWN qlen 1000
+                inet6 ::1/128 scope host
+                   valid_lft forever preferred_lft forever
+            2: ens20f0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000
+                inet6 2001:1458:301:eb::100:1a/128 scope global dynamic noprefixroute
+                   valid_lft 590879sec preferred_lft 590879sec
+                inet6 fe80::2e60:cff:fef8:da41/64 scope link noprefixroute
+                   valid_lft forever preferred_lft forever
+                inet6 fe80::2e60:cff:fef8:da41/64 scope link noprefixroute
+                   valid_lft forever preferred_lft forever
+                inet6 fe80::2e60:cff:fef8:da41/64 scope link noprefixroute
+                   valid_lft forever preferred_lft forever
+            """),
+            {
+                '2001:1458:301:eb::100:1a/128': {
+                    'ens20f0': {
+                        '2001:1458:301:eb::100:1a'
+                    },
+                },
+                '2001:1458:301:eb::/64': {
+                    'ens20f0': set(),
+                },
+                'fe80::/64': {
+                    'ens20f0': {'fe80::2e60:cff:fef8:da41'},
+                },
+                'fd01:1458:304:5e::/64': {
+                    'ens20f0': set()
+                },
+            }
+        ),
+        (
+            dedent("""
+            ::1 dev lo proto kernel metric 256 pref medium
+            fe80::/64 dev ceph-brx proto kernel metric 256 pref medium
+            fe80::/64 dev brx.0 proto kernel metric 256 pref medium
+            default via fe80::327c:5e00:6487:71e0 dev enp3s0f1 proto ra metric 1024 expires 1790sec hoplimit 64 pref medium            """),
+            dedent("""
+            1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 state UNKNOWN qlen 1000
+                inet6 ::1/128 scope host
+                   valid_lft forever preferred_lft forever
+            5: enp3s0f1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000
+                inet6 fe80::ec4:7aff:fe8f:cb83/64 scope link noprefixroute
+                   valid_lft forever preferred_lft forever
+            6: ceph-brx: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000
+                inet6 fe80::d8a1:69ff:fede:8f58/64 scope link
+                   valid_lft forever preferred_lft forever
+            7: brx.0@eno1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000
+                inet6 fe80::a4cb:54ff:fecc:f2a2/64 scope link
+                   valid_lft forever preferred_lft forever
+            """),
+            {
+                'fe80::/64': {
+                    'brx.0': {'fe80::a4cb:54ff:fecc:f2a2'},
+                    'ceph-brx': {'fe80::d8a1:69ff:fede:8f58'}
+                }
+            }
+        ),
+    ])
+    def test_parse_ipv6_route(self, test_routes, test_ips, expected):
+        assert _cephadm._parse_ipv6_route(test_routes, test_ips) == expected
+
+    @mock.patch.object(_cephadm, 'call_throws', return_value=('10.4.0.1 dev tun0 proto kernel scope link src 10.4.0.2 metric 50\n', '', ''))
+    def test_command_list_networks(self, cephadm_fs, capsys):
+        with with_cephadm_ctx([]) as ctx:
+            _cephadm.command_list_networks(ctx)
+            assert json.loads(capsys.readouterr().out) == {
+                '10.4.0.1/32': {'tun0': ['10.4.0.2']}
+            }
diff --git a/src/cephadm/tests/test_nfs.py b/src/cephadm/tests/test_nfs.py
new file mode 100644
index 000000000..0649ef934
--- /dev/null
+++ b/src/cephadm/tests/test_nfs.py
@@ -0,0 +1,239 @@
+from unittest import mock
+import json
+
+import pytest
+
+from tests.fixtures import with_cephadm_ctx, cephadm_fs, import_cephadm
+
+_cephadm = import_cephadm()
+
+
+SAMPLE_UUID = "2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae"
+
+
+def good_nfs_json():
+    return nfs_json(
+        pool=True,
+        files=True,
+    )
+
+
+def nfs_json(**kwargs):
+    result = {}
+    if kwargs.get("pool"):
+        result["pool"] = "party"
+    if kwargs.get("files"):
+        result["files"] = {
+            "ganesha.conf": "",
+        }
+    if kwargs.get("rgw_content"):
+        result["rgw"] = dict(kwargs["rgw_content"])
+    elif kwargs.get("rgw"):
+        result["rgw"] = {
+            "keyring": "foobar",
+            "user": "jsmith",
+        }
+    return result
+
+
+@pytest.mark.parametrize(
+    "args,kwargs",
+    # args: <fsid>, <daemon_id>, <config_json>; kwargs: <image>
+    [
+        # fail due to: invalid fsid
+        (["foobar", "fred", good_nfs_json()], {}),
+        # fail due to: invalid daemon_id
+        ([SAMPLE_UUID, "", good_nfs_json()], {}),
+        # fail due to: invalid image
+        (
+            [SAMPLE_UUID, "fred", good_nfs_json()],
+            {"image": ""},
+        ),
+        # fail due to: no files in config_json
+        (
+            [
+                SAMPLE_UUID,
+                "fred",
+                nfs_json(pool=True),
+            ],
+            {},
+        ),
+        # fail due to: no pool in config_json
+        (
+            [
+                SAMPLE_UUID,
+                "fred",
+                nfs_json(files=True),
+            ],
+            {},
+        ),
+        # fail due to: bad rgw content
+        (
+            [
+                SAMPLE_UUID,
+                "fred",
+                nfs_json(pool=True, files=True, rgw_content={"foo": True}),
+            ],
+            {},
+        ),
+        # fail due to: rgw keyring given but no user
+        (
+            [
+                SAMPLE_UUID,
+                "fred",
+                nfs_json(
+                    pool=True, files=True, rgw_content={"keyring": "foo"}
+                ),
+            ],
+            {},
+        ),
+    ],
+)
+def test_nfsganesha_validation_errors(args, kwargs):
+    with pytest.raises(_cephadm.Error):
+        with with_cephadm_ctx([]) as ctx:
+            _cephadm.NFSGanesha(ctx, *args, **kwargs)
+
+
+def test_nfsganesha_init():
+    with with_cephadm_ctx([]) as ctx:
+        ctx.config_json = json.dumps(good_nfs_json())
+        ctx.image = "test_image"
+        nfsg = _cephadm.NFSGanesha.init(
+            ctx,
+            SAMPLE_UUID,
+            "fred",
+        )
+    assert nfsg.fsid == SAMPLE_UUID
+    assert nfsg.daemon_id == "fred"
+    assert nfsg.pool == "party"
+
+
+def test_nfsganesha_container_mounts():
+    with with_cephadm_ctx([]) as ctx:
+        nfsg = _cephadm.NFSGanesha(
+            ctx,
+            SAMPLE_UUID,
+            "fred",
+            good_nfs_json(),
+        )
+        cmounts = nfsg.get_container_mounts("/var/tmp")
+        assert len(cmounts) == 3
+        assert cmounts["/var/tmp/config"] == "/etc/ceph/ceph.conf:z"
+        assert cmounts["/var/tmp/keyring"] == "/etc/ceph/keyring:z"
+        assert cmounts["/var/tmp/etc/ganesha"] == "/etc/ganesha:z"
+
+    with with_cephadm_ctx([]) as ctx:
+        nfsg = _cephadm.NFSGanesha(
+            ctx,
+            SAMPLE_UUID,
+            "fred",
+            nfs_json(pool=True, files=True, rgw=True),
+        )
+        cmounts = nfsg.get_container_mounts("/var/tmp")
+        assert len(cmounts) == 4
+        assert cmounts["/var/tmp/config"] == "/etc/ceph/ceph.conf:z"
+        assert cmounts["/var/tmp/keyring"] == "/etc/ceph/keyring:z"
+        assert cmounts["/var/tmp/etc/ganesha"] == "/etc/ganesha:z"
+        assert (
+            cmounts["/var/tmp/keyring.rgw"]
+            == "/var/lib/ceph/radosgw/ceph-jsmith/keyring:z"
+        )
+
+
+def test_nfsganesha_container_envs():
+    with with_cephadm_ctx([]) as ctx:
+        nfsg = _cephadm.NFSGanesha(
+            ctx,
+            SAMPLE_UUID,
+            "fred",
+            good_nfs_json(),
+        )
+        envs = nfsg.get_container_envs()
+        assert len(envs) == 1
+        assert envs[0] == "CEPH_CONF=/etc/ceph/ceph.conf"
+
+
+def test_nfsganesha_get_version():
+    with with_cephadm_ctx([]) as ctx:
+        nfsg = _cephadm.NFSGanesha(
+            ctx,
+            SAMPLE_UUID,
+            "fred",
+            good_nfs_json(),
+        )
+
+        with mock.patch("cephadm.call") as _call:
+            _call.return_value = ("NFS-Ganesha Release = V100", "", 0)
+            ver = nfsg.get_version(ctx, "fake_version")
+            _call.assert_called()
+        assert ver == "100"
+
+
+def test_nfsganesha_get_daemon_name():
+    with with_cephadm_ctx([]) as ctx:
+        nfsg = _cephadm.NFSGanesha(
+            ctx,
+            SAMPLE_UUID,
+            "fred",
+            good_nfs_json(),
+        )
+        assert nfsg.get_daemon_name() == "nfs.fred"
+
+
+def test_nfsganesha_get_container_name():
+    with with_cephadm_ctx([]) as ctx:
+        nfsg = _cephadm.NFSGanesha(
+            ctx,
+            SAMPLE_UUID,
+            "fred",
+            good_nfs_json(),
+        )
+        name1 = nfsg.get_container_name()
+        assert name1 == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-nfs.fred"
+        name2 = nfsg.get_container_name(desc="extra")
+        assert (
+            name2 == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-nfs.fred-extra"
+        )
+
+
+def test_nfsganesha_get_daemon_args():
+    with with_cephadm_ctx([]) as ctx:
+        nfsg = _cephadm.NFSGanesha(
+            ctx,
+            SAMPLE_UUID,
+            "fred",
+            good_nfs_json(),
+        )
+        args = nfsg.get_daemon_args()
+        assert args == ["-F", "-L", "STDERR"]
+
+
+@mock.patch("cephadm.logger")
+def test_nfsganesha_create_daemon_dirs(_logger, cephadm_fs):
+    with with_cephadm_ctx([]) as ctx:
+        nfsg = _cephadm.NFSGanesha(
+            ctx,
+            SAMPLE_UUID,
+            "fred",
+            good_nfs_json(),
+        )
+        with pytest.raises(OSError):
+            nfsg.create_daemon_dirs("/var/tmp", 45, 54)
+        cephadm_fs.create_dir("/var/tmp")
+        nfsg.create_daemon_dirs("/var/tmp", 45, 54)
+        # TODO: make assertions about the dirs created
+
+
+@mock.patch("cephadm.logger")
+def test_nfsganesha_create_daemon_dirs_rgw(_logger, cephadm_fs):
+    with with_cephadm_ctx([]) as ctx:
+        nfsg = _cephadm.NFSGanesha(
+            ctx,
+            SAMPLE_UUID,
+            "fred",
+            nfs_json(pool=True, files=True, rgw=True),
+        )
+        cephadm_fs.create_dir("/var/tmp")
+        nfsg.create_daemon_dirs("/var/tmp", 45, 54)
+        # TODO: make assertions about the dirs created
diff --git a/src/cephadm/tests/test_util_funcs.py b/src/cephadm/tests/test_util_funcs.py
new file mode 100644
index 000000000..270753a55
--- /dev/null
+++ b/src/cephadm/tests/test_util_funcs.py
@@ -0,0 +1,808 @@
+# Tests for various assorted utility functions found within cephadm
+#
+from unittest import mock
+
+import functools
+import io
+import os
+import sys
+
+import pytest
+
+from tests.fixtures import with_cephadm_ctx, import_cephadm
+
+_cephadm = import_cephadm()
+
+
+class TestCopyTree:
+    def _copy_tree(self, *args, **kwargs):
+        with with_cephadm_ctx([]) as ctx:
+            with mock.patch("cephadm.extract_uid_gid") as eug:
+                eug.return_value = (os.getuid(), os.getgid())
+                _cephadm.copy_tree(ctx, *args, **kwargs)
+
+    def test_one_dir(self, tmp_path):
+        """Copy one dir into a non-existing dest dir."""
+        src1 = tmp_path / "src1"
+        dst = tmp_path / "dst"
+        src1.mkdir(parents=True)
+
+        with (src1 / "foo.txt").open("w") as fh:
+            fh.write("hello\n")
+            fh.write("earth\n")
+
+        assert not (dst / "foo.txt").exists()
+
+        self._copy_tree([src1], dst)
+        assert (dst / "foo.txt").exists()
+
+    def test_one_existing_dir(self, tmp_path):
+        """Copy one dir into an existing dest dir."""
+        src1 = tmp_path / "src1"
+        dst = tmp_path / "dst"
+        src1.mkdir(parents=True)
+        dst.mkdir(parents=True)
+
+        with (src1 / "foo.txt").open("w") as fh:
+            fh.write("hello\n")
+            fh.write("earth\n")
+
+        assert not (dst / "src1").exists()
+
+        self._copy_tree([src1], dst)
+        assert (dst / "src1/foo.txt").exists()
+
+    def test_two_dirs(self, tmp_path):
+        """Copy two source directories into an existing dest dir."""
+        src1 = tmp_path / "src1"
+        src2 = tmp_path / "src2"
+        dst = tmp_path / "dst"
+        src1.mkdir(parents=True)
+        src2.mkdir(parents=True)
+        dst.mkdir(parents=True)
+
+        with (src1 / "foo.txt").open("w") as fh:
+            fh.write("hello\n")
+            fh.write("earth\n")
+        with (src2 / "bar.txt").open("w") as fh:
+            fh.write("goodbye\n")
+            fh.write("mars\n")
+
+        assert not (dst / "src1").exists()
+        assert not (dst / "src2").exists()
+
+        self._copy_tree([src1, src2], dst)
+        assert (dst / "src1/foo.txt").exists()
+        assert (dst / "src2/bar.txt").exists()
+
+    def test_one_dir_set_uid(self, tmp_path):
+        """Explicity pass uid/gid values and assert these are passed to chown."""
+        # Because this test will often be run by non-root users it is necessary
+        # to mock os.chown or we too easily run into perms issues.
+        src1 = tmp_path / "src1"
+        dst = tmp_path / "dst"
+        src1.mkdir(parents=True)
+
+        with (src1 / "foo.txt").open("w") as fh:
+            fh.write("hello\n")
+            fh.write("earth\n")
+
+        assert not (dst / "foo.txt").exists()
+
+        with mock.patch("os.chown") as _chown:
+            _chown.return_value = None
+            self._copy_tree([src1], dst, uid=0, gid=0)
+            assert len(_chown.mock_calls) >= 2
+            for c in _chown.mock_calls:
+                assert c == mock.call(mock.ANY, 0, 0)
+        assert (dst / "foo.txt").exists()
+
+
+class TestCopyFiles:
+    def _copy_files(self, *args, **kwargs):
+        with with_cephadm_ctx([]) as ctx:
+            with mock.patch("cephadm.extract_uid_gid") as eug:
+                eug.return_value = (os.getuid(), os.getgid())
+                _cephadm.copy_files(ctx, *args, **kwargs)
+
+    def test_one_file(self, tmp_path):
+        """Copy one file into the dest dir."""
+        file1 = tmp_path / "f1.txt"
+        dst = tmp_path / "dst"
+        dst.mkdir(parents=True)
+
+        with file1.open("w") as fh:
+            fh.write("its test time\n")
+
+        self._copy_files([file1], dst)
+        assert (dst / "f1.txt").exists()
+
+    def test_one_file_nodest(self, tmp_path):
+        """Copy one file to the given destination path."""
+        file1 = tmp_path / "f1.txt"
+        dst = tmp_path / "dst"
+
+        with file1.open("w") as fh:
+            fh.write("its test time\n")
+
+        self._copy_files([file1], dst)
+        assert not dst.is_dir()
+        assert dst.is_file()
+        assert dst.open("r").read() == "its test time\n"
+
+    def test_three_files(self, tmp_path):
+        """Copy one file into the dest dir."""
+        file1 = tmp_path / "f1.txt"
+        file2 = tmp_path / "f2.txt"
+        file3 = tmp_path / "f3.txt"
+        dst = tmp_path / "dst"
+        dst.mkdir(parents=True)
+
+        with file1.open("w") as fh:
+            fh.write("its test time\n")
+        with file2.open("w") as fh:
+            fh.write("f2\n")
+        with file3.open("w") as fh:
+            fh.write("f3\n")
+
+        self._copy_files([file1, file2, file3], dst)
+        assert (dst / "f1.txt").exists()
+        assert (dst / "f2.txt").exists()
+        assert (dst / "f3.txt").exists()
+
+    def test_three_files_nodest(self, tmp_path):
+        """Copy files to dest path (not a dir). This is not a useful operation."""
+        file1 = tmp_path / "f1.txt"
+        file2 = tmp_path / "f2.txt"
+        file3 = tmp_path / "f3.txt"
+        dst = tmp_path / "dst"
+
+        with file1.open("w") as fh:
+            fh.write("its test time\n")
+        with file2.open("w") as fh:
+            fh.write("f2\n")
+        with file3.open("w") as fh:
+            fh.write("f3\n")
+
+        self._copy_files([file1, file2, file3], dst)
+        assert not dst.is_dir()
+        assert dst.is_file()
+        assert dst.open("r").read() == "f3\n"
+
+    def test_one_file_set_uid(self, tmp_path):
+        """Explicity pass uid/gid values and assert these are passed to chown."""
+        # Because this test will often be run by non-root users it is necessary
+        # to mock os.chown or we too easily run into perms issues.
+        file1 = tmp_path / "f1.txt"
+        dst = tmp_path / "dst"
+        dst.mkdir(parents=True)
+
+        with file1.open("w") as fh:
+            fh.write("its test time\n")
+
+        assert not (dst / "f1.txt").exists()
+
+        with mock.patch("os.chown") as _chown:
+            _chown.return_value = None
+            self._copy_files([file1], dst, uid=0, gid=0)
+            assert len(_chown.mock_calls) >= 1
+            for c in _chown.mock_calls:
+                assert c == mock.call(mock.ANY, 0, 0)
+        assert (dst / "f1.txt").exists()
+
+
+class TestMoveFiles:
+    def _move_files(self, *args, **kwargs):
+        with with_cephadm_ctx([]) as ctx:
+            with mock.patch("cephadm.extract_uid_gid") as eug:
+                eug.return_value = (os.getuid(), os.getgid())
+                _cephadm.move_files(ctx, *args, **kwargs)
+
+    def test_one_file(self, tmp_path):
+        """Move a named file to test dest path."""
+        file1 = tmp_path / "f1.txt"
+        dst = tmp_path / "dst"
+
+        with file1.open("w") as fh:
+            fh.write("lets moove\n")
+
+        assert not dst.exists()
+        assert file1.is_file()
+
+        self._move_files([file1], dst)
+        assert dst.is_file()
+        assert not file1.exists()
+
+    def test_one_file_destdir(self, tmp_path):
+        """Move a file into an existing dest dir."""
+        file1 = tmp_path / "f1.txt"
+        dst = tmp_path / "dst"
+        dst.mkdir(parents=True)
+
+        with file1.open("w") as fh:
+            fh.write("lets moove\n")
+
+        assert not (dst / "f1.txt").exists()
+        assert file1.is_file()
+
+        self._move_files([file1], dst)
+        assert (dst / "f1.txt").is_file()
+        assert not file1.exists()
+
+    def test_one_file_one_link(self, tmp_path):
+        """Move a file and a symlink to that file to a dest dir."""
+        file1 = tmp_path / "f1.txt"
+        link1 = tmp_path / "lnk"
+        dst = tmp_path / "dst"
+        dst.mkdir(parents=True)
+
+        with file1.open("w") as fh:
+            fh.write("lets moove\n")
+        os.symlink("f1.txt", link1)
+
+        assert not (dst / "f1.txt").exists()
+        assert file1.is_file()
+        assert link1.exists()
+
+        self._move_files([file1, link1], dst)
+        assert (dst / "f1.txt").is_file()
+        assert (dst / "lnk").is_symlink()
+        assert not file1.exists()
+        assert not link1.exists()
+        assert (dst / "f1.txt").open("r").read() == "lets moove\n"
+        assert (dst / "lnk").open("r").read() == "lets moove\n"
+
+    def test_one_file_set_uid(self, tmp_path):
+        """Explicity pass uid/gid values and assert these are passed to chown."""
+        # Because this test will often be run by non-root users it is necessary
+        # to mock os.chown or we too easily run into perms issues.
+        file1 = tmp_path / "f1.txt"
+        dst = tmp_path / "dst"
+
+        with file1.open("w") as fh:
+            fh.write("lets moove\n")
+
+        assert not dst.exists()
+        assert file1.is_file()
+
+        with mock.patch("os.chown") as _chown:
+            _chown.return_value = None
+            self._move_files([file1], dst, uid=0, gid=0)
+            assert len(_chown.mock_calls) >= 1
+            for c in _chown.mock_calls:
+                assert c == mock.call(mock.ANY, 0, 0)
+        assert dst.is_file()
+        assert not file1.exists()
+
+
+def test_recursive_chown(tmp_path):
+    d1 = tmp_path / "dir1"
+    d2 = d1 / "dir2"
+    f1 = d2 / "file1.txt"
+    d2.mkdir(parents=True)
+
+    with f1.open("w") as fh:
+        fh.write("low down\n")
+
+    with mock.patch("os.chown") as _chown:
+        _chown.return_value = None
+        _cephadm.recursive_chown(str(d1), uid=500, gid=500)
+    assert len(_chown.mock_calls) == 3
+    assert _chown.mock_calls[0] == mock.call(str(d1), 500, 500)
+    assert _chown.mock_calls[1] == mock.call(str(d2), 500, 500)
+    assert _chown.mock_calls[2] == mock.call(str(f1), 500, 500)
+
+
+class TestFindExecutable:
+    def test_standard_exe(self):
+        # pretty much every system will have `true` on the path. It's a safe choice
+        # for the first assertion
+        exe = _cephadm.find_executable("true")
+        assert exe.endswith("true")
+
+    def test_custom_path(self, tmp_path):
+        foo_sh = tmp_path / "foo.sh"
+        with open(foo_sh, "w") as fh:
+            fh.write("#!/bin/sh\n")
+            fh.write("echo foo\n")
+        foo_sh.chmod(0o755)
+
+        exe = _cephadm.find_executable(foo_sh)
+        assert str(exe) == str(foo_sh)
+
+    def test_no_path(self, monkeypatch):
+        monkeypatch.delenv("PATH")
+        exe = _cephadm.find_executable("true")
+        assert exe.endswith("true")
+
+    def test_no_path_no_confstr(self, monkeypatch):
+        def _fail(_):
+            raise ValueError("fail")
+
+        monkeypatch.delenv("PATH")
+        monkeypatch.setattr("os.confstr", _fail)
+        exe = _cephadm.find_executable("true")
+        assert exe.endswith("true")
+
+    def test_unset_path(self):
+        exe = _cephadm.find_executable("true", path="")
+        assert exe is None
+
+    def test_no_such_exe(self):
+        exe = _cephadm.find_executable("foo_bar-baz.noway")
+        assert exe is None
+
+
+def test_find_program():
+    exe = _cephadm.find_program("true")
+    assert exe.endswith("true")
+
+    with pytest.raises(ValueError):
+        _cephadm.find_program("foo_bar-baz.noway")
+
+
+def _mk_fake_call(enabled, active):
+    def _fake_call(ctx, cmd, **kwargs):
+        if "is-enabled" in cmd:
+            if isinstance(enabled, Exception):
+                raise enabled
+            return enabled
+        if "is-active" in cmd:
+            if isinstance(active, Exception):
+                raise active
+            return active
+        raise ValueError("should not get here")
+
+    return _fake_call
+
+
+@pytest.mark.parametrize(
+    "enabled_out, active_out, expected",
+    [
+        (
+            # ok, all is well
+            ("", "", 0),
+            ("active", "", 0),
+            (True, "running", True),
+        ),
+        (
+            # disabled, unknown if active
+            ("disabled", "", 1),
+            ("", "", 0),
+            (False, "unknown", True),
+        ),
+        (
+            # is-enabled error (not disabled, unknown if active
+            ("bleh", "", 1),
+            ("", "", 0),
+            (False, "unknown", False),
+        ),
+        (
+            # is-enabled ok, inactive is stopped
+            ("", "", 0),
+            ("inactive", "", 0),
+            (True, "stopped", True),
+        ),
+        (
+            # is-enabled ok, failed is error
+            ("", "", 0),
+            ("failed", "", 0),
+            (True, "error", True),
+        ),
+        (
+            # is-enabled ok, auto-restart is error
+            ("", "", 0),
+            ("auto-restart", "", 0),
+            (True, "error", True),
+        ),
+        (
+            # error exec'ing is-enabled cmd
+            ValueError("bonk"),
+            ("active", "", 0),
+            (False, "running", False),
+        ),
+        (
+            # error exec'ing is-enabled cmd
+            ("", "", 0),
+            ValueError("blat"),
+            (True, "unknown", True),
+        ),
+    ],
+)
+def test_check_unit(enabled_out, active_out, expected):
+    with with_cephadm_ctx([]) as ctx:
+        _cephadm.call.side_effect = _mk_fake_call(
+            enabled=enabled_out,
+            active=active_out,
+        )
+        enabled, state, installed = _cephadm.check_unit(ctx, "foobar")
+    assert (enabled, state, installed) == expected
+
+
+class FakeEnabler:
+    def __init__(self, should_be_called):
+        self._should_be_called = should_be_called
+        self._services = []
+
+    def enable_service(self, service):
+        self._services.append(service)
+
+    def check_expected(self):
+        if not self._should_be_called:
+            assert not self._services
+            return
+        # there are currently seven chron/chrony type services that
+        # cephadm looks for. Make sure it probed for each of them
+        # or more in case someone adds to the list.
+        assert len(self._services) >= 7
+        assert "chrony.service" in self._services
+        assert "ntp.service" in self._services
+
+
+@pytest.mark.parametrize(
+    "call_fn, enabler, expected",
+    [
+        # Test that time sync services are not enabled
+        (
+            _mk_fake_call(
+                enabled=("", "", 1),
+                active=("", "", 1),
+            ),
+            None,
+            False,
+        ),
+        # Test that time sync service is enabled
+        (
+            _mk_fake_call(
+                enabled=("", "", 0),
+                active=("active", "", 0),
+            ),
+            None,
+            True,
+        ),
+        # Test that time sync is not enabled, and try to enable them.
+        # This one needs to be not running, but installed in order to
+        # call the enabler. It should call the enabler with every known
+        # service name.
+        (
+            _mk_fake_call(
+                enabled=("disabled", "", 1),
+                active=("", "", 1),
+            ),
+            FakeEnabler(True),
+            False,
+        ),
+        # Test that time sync is enabled, with an enabler passed which
+        # will check that the enabler was never called.
+        (
+            _mk_fake_call(
+                enabled=("", "", 0),
+                active=("active", "", 0),
+            ),
+            FakeEnabler(False),
+            True,
+        ),
+    ],
+)
+def test_check_time_sync(call_fn, enabler, expected):
+    """The check_time_sync call actually checks if a time synchronization service
+    is enabled. It is also the only consumer of check_units.
+    """
+    with with_cephadm_ctx([]) as ctx:
+        _cephadm.call.side_effect = call_fn
+        result = _cephadm.check_time_sync(ctx, enabler=enabler)
+        assert result == expected
+        if enabler is not None:
+            enabler.check_expected()
+
+
+@pytest.mark.parametrize(
+    "content, expected",
+    [
+        (
+            """#JUNK
+            FOO=1
+            """,
+            (None, None, None),
+        ),
+        (
+            """# A sample from a real centos system
+NAME="CentOS Stream"
+VERSION="8"
+ID="centos"
+ID_LIKE="rhel fedora"
+VERSION_ID="8"
+PLATFORM_ID="platform:el8"
+PRETTY_NAME="CentOS Stream 8"
+ANSI_COLOR="0;31"
+CPE_NAME="cpe:/o:centos:centos:8"
+HOME_URL="https://centos.org/"
+BUG_REPORT_URL="https://bugzilla.redhat.com/"
+REDHAT_SUPPORT_PRODUCT="Red Hat Enterprise Linux 8"
+REDHAT_SUPPORT_PRODUCT_VERSION="CentOS Stream"
+            """,
+            ("centos", "8", None),
+        ),
+        (
+            """# Minimal but complete, made up vals
+ID="hpec"
+VERSION_ID="33"
+VERSION_CODENAME="hpec nimda"
+            """,
+            ("hpec", "33", "hpec nimda"),
+        ),
+        (
+            """# Minimal but complete, no quotes
+ID=hpec
+VERSION_ID=33
+VERSION_CODENAME=hpec nimda
+            """,
+            ("hpec", "33", "hpec nimda"),
+        ),
+    ],
+)
+def test_get_distro(monkeypatch, content, expected):
+    def _fake_open(*args, **kwargs):
+        return io.StringIO(content)
+
+    monkeypatch.setattr("builtins.open", _fake_open)
+    assert _cephadm.get_distro() == expected
+
+
+class FakeContext:
+    """FakeContext is a minimal type for passing as a ctx, when
+    with_cephadm_ctx is not appropriate (it enables too many mocks, etc).
+    """
+
+    timeout = 30
+
+
+def _has_non_zero_exit(clog):
+    assert any("Non-zero exit" in ll for _, _, ll in clog.record_tuples)
+
+
+def _has_values_somewhere(clog, values, non_zero=True):
+    if non_zero:
+        _has_non_zero_exit(clog)
+    for value in values:
+        assert any(value in ll for _, _, ll in clog.record_tuples)
+
+
+@pytest.mark.parametrize(
+    "pyline, expected, call_kwargs, log_check",
+    [
+        pytest.param(
+            "import time; time.sleep(0.1)",
+            ("", "", 0),
+            {},
+            None,
+            id="brief-sleep",
+        ),
+        pytest.param(
+            "import sys; sys.exit(2)",
+            ("", "", 2),
+            {},
+            _has_non_zero_exit,
+            id="exit-non-zero",
+        ),
+        pytest.param(
+            "import sys; sys.exit(0)",
+            ("", "", 0),
+            {"desc": "success"},
+            None,
+            id="success-with-desc",
+        ),
+        pytest.param(
+            "print('foo'); print('bar')",
+            ("foo\nbar\n", "", 0),
+            {"desc": "stdout"},
+            None,
+            id="stdout-print",
+        ),
+        pytest.param(
+            "import sys; sys.stderr.write('la\\nla\\nla\\n')",
+            ("", "la\nla\nla\n", 0),
+            {"desc": "stderr"},
+            None,
+            id="stderr-print",
+        ),
+        pytest.param(
+            "for i in range(501): print(i, flush=True)",
+            lambda r: r[2] == 0 and r[1] == "" and "500" in r[0].splitlines(),
+            {},
+            None,
+            id="stdout-long",
+        ),
+        pytest.param(
+            "for i in range(1000000): print(i, flush=True)",
+            lambda r: r[2] == 0
+            and r[1] == ""
+            and len(r[0].splitlines()) == 1000000,
+            {},
+            None,
+            id="stdout-very-long",
+        ),
+        pytest.param(
+            "import sys; sys.stderr.write('pow\\noof\\nouch\\n'); sys.exit(1)",
+            ("", "pow\noof\nouch\n", 1),
+            {"desc": "stderr"},
+            functools.partial(
+                _has_values_somewhere,
+                values=["pow", "oof", "ouch"],
+                non_zero=True,
+            ),
+            id="stderr-logged-non-zero",
+        ),
+        pytest.param(
+            "import time; time.sleep(4)",
+            ("", "", 124),
+            {"timeout": 1},
+            None,
+            id="long-sleep",
+        ),
+        pytest.param(
+            "import time\nfor i in range(100):\n\tprint(i, flush=True); time.sleep(0.01)",
+            ("", "", 124),
+            {"timeout": 0.5},
+            None,
+            id="slow-print-timeout",
+        ),
+        # Commands that time out collect no logs, return empty std{out,err} strings
+    ],
+)
+def test_call(caplog, monkeypatch, pyline, expected, call_kwargs, log_check):
+    import logging
+
+    caplog.set_level(logging.INFO)
+    monkeypatch.setattr("cephadm.logger", logging.getLogger())
+    ctx = FakeContext()
+    result = _cephadm.call(ctx, [sys.executable, "-c", pyline], **call_kwargs)
+    if callable(expected):
+        assert expected(result)
+    else:
+        assert result == expected
+    if callable(log_check):
+        log_check(caplog)
+
+
+class TestWriteNew:
+    def test_success(self, tmp_path):
+        "Test the simple basic feature of writing a file."
+        dest = tmp_path / "foo.txt"
+        with _cephadm.write_new(dest) as fh:
+            fh.write("something\n")
+            fh.write("something else\n")
+
+        with open(dest, "r") as fh:
+            assert fh.read() == "something\nsomething else\n"
+
+    def test_write_ower_mode(self, tmp_path):
+        "Test that the owner and perms options function."
+        dest = tmp_path / "foo.txt"
+
+        # if this is test run as non-root, we can't really change ownership
+        uid = os.getuid()
+        gid = os.getgid()
+
+        with _cephadm.write_new(dest, owner=(uid, gid), perms=0o600) as fh:
+            fh.write("xomething\n")
+            fh.write("xomething else\n")
+
+        with open(dest, "r") as fh:
+            assert fh.read() == "xomething\nxomething else\n"
+            sr = os.fstat(fh.fileno())
+            assert sr.st_uid == uid
+            assert sr.st_gid == gid
+            assert (sr.st_mode & 0o777) == 0o600
+
+    def test_encoding(self, tmp_path):
+        "Test that the encoding option functions."
+        dest = tmp_path / "foo.txt"
+        msg = "\u2603\u26C5\n"
+        with _cephadm.write_new(dest, encoding='utf-8') as fh:
+            fh.write(msg)
+        with open(dest, "rb") as fh:
+            b1 = fh.read()
+            assert b1.decode('utf-8') == msg
+
+        dest = tmp_path / "foo2.txt"
+        with _cephadm.write_new(dest, encoding='utf-16le') as fh:
+            fh.write(msg)
+        with open(dest, "rb") as fh:
+            b2 = fh.read()
+            assert b2.decode('utf-16le') == msg
+
+        # the binary data should differ due to the different encodings
+        assert b1 != b2
+
+    def test_cleanup(self, tmp_path):
+        "Test that an exception during write leaves no file behind."
+        dest = tmp_path / "foo.txt"
+        with pytest.raises(ValueError):
+            with _cephadm.write_new(dest) as fh:
+                fh.write("hello\n")
+                raise ValueError("foo")
+                fh.write("world\n")
+        assert not dest.exists()
+        assert not dest.with_name(dest.name+".new").exists()
+        assert list(dest.parent.iterdir()) == []
+
+
+class CompareContext1:
+    cfg_data = {
+        "name": "mane",
+        "fsid": "foobar",
+        "image": "fake.io/noway/nohow:gndn",
+        "meta": {
+            "fruit": "banana",
+            "vegetable": "carrot",
+        },
+        "params": {
+            "osd_fsid": "robble",
+            "tcp_ports": [404, 9999],
+        },
+        "config_blobs": {
+            "alpha": {"sloop": "John B"},
+            "beta": {"forest": "birch"},
+            "gamma": {"forest": "pine"},
+        },
+    }
+
+    def check(self, ctx):
+        assert ctx.name == 'mane'
+        assert ctx.fsid == 'foobar'
+        assert ctx.image == 'fake.io/noway/nohow:gndn'
+        assert ctx.meta_properties == {"fruit": "banana", "vegetable": "carrot"}
+        assert ctx.config_blobs == {
+            "alpha": {"sloop": "John B"},
+            "beta": {"forest": "birch"},
+            "gamma": {"forest": "pine"},
+        }
+        assert ctx.osd_fsid == "robble"
+        assert ctx.tcp_ports == [404, 9999]
+
+
+class CompareContext2:
+    cfg_data = {
+        "name": "cc2",
+        "fsid": "foobar",
+        "meta": {
+            "fruit": "banana",
+            "vegetable": "carrot",
+        },
+        "params": {},
+        "config_blobs": {
+            "alpha": {"sloop": "John B"},
+            "beta": {"forest": "birch"},
+            "gamma": {"forest": "pine"},
+        },
+    }
+
+    def check(self, ctx):
+        assert ctx.name == 'cc2'
+        assert ctx.fsid == 'foobar'
+        assert ctx.image == 'quay.io/ceph/ceph:v18'
+        assert ctx.meta_properties == {"fruit": "banana", "vegetable": "carrot"}
+        assert ctx.config_blobs == {
+            "alpha": {"sloop": "John B"},
+            "beta": {"forest": "birch"},
+            "gamma": {"forest": "pine"},
+        }
+        assert ctx.osd_fsid is None
+        assert ctx.tcp_ports is None
+
+
+@pytest.mark.parametrize(
+    "cc",
+    [
+        CompareContext1(),
+        CompareContext2(),
+    ],
+)
+def test_apply_deploy_config_to_ctx(cc, monkeypatch):
+    import logging
+
+    monkeypatch.setattr("cephadm.logger", logging.getLogger())
+    ctx = FakeContext()
+    _cephadm.apply_deploy_config_to_ctx(cc.cfg_data, ctx)
+    cc.check(ctx)
diff --git a/src/cephadm/tox.ini b/src/cephadm/tox.ini
new file mode 100644
index 000000000..2cbfca70f
--- /dev/null
+++ b/src/cephadm/tox.ini
@@ -0,0 +1,77 @@
+[tox]
+envlist =
+    py3
+    mypy
+    fix
+    flake8
+skipsdist = true
+
+[flake8]
+max-line-length = 100
+inline-quotes = '
+ignore =
+    E501, \
+    W503,
+exclude =
+    .tox, \
+    .vagrant, \
+    __pycache__, \
+    *.pyc, \
+    templates, \
+    .eggs
+statistics = True
+
+[autopep8]
+addopts =
+    --max-line-length {[flake8]max-line-length} \
+    --ignore "{[flake8]ignore}" \
+    --exclude "{[flake8]exclude}" \
+    --in-place \
+    --recursive \
+    --ignore-local-config
+
+[testenv]
+skip_install=true
+deps =
+  pyfakefs == 4.5.6 ; python_version < "3.7"
+  pyfakefs >= 5, < 6 ; python_version >= "3.7"
+  mock
+  pytest
+commands=pytest {posargs}
+
+[testenv:mypy]
+basepython = python3
+deps =
+    mypy
+    -c{toxinidir}/../mypy-constrains.txt
+commands = mypy --config-file ../mypy.ini {posargs:cephadm.py}
+
+[testenv:fix]
+basepython = python3
+deps =
+    autopep8
+commands =
+    python --version
+    autopep8 {[autopep8]addopts} {posargs: cephadm.py}
+
+[testenv:flake8]
+basepython = python3
+allowlist_externals = bash
+deps =
+    flake8 == 5.0.4
+    flake8-quotes
+commands =
+    flake8 --config=tox.ini {posargs:cephadm.py}
+    bash -c "test $(grep -c 'docker.io' cephadm.py) == 11"
+# Downstream distributions may choose to alter this "docker.io" number,
+# to make sure no new references to docker.io are creeping in unnoticed.
+
+# coverage env is intentionally left out of the envlist. It is here for developers
+# to run locally to generate and review test coverage of cephadm.
+[testenv:coverage]
+skip_install=true
+deps =
+  {[testenv]deps}
+  pytest-cov
+commands =
+  pytest -v --cov=cephadm --cov-report=term-missing --cov-report=html {posargs}
diff --git a/src/cephadm/vstart-cleanup.sh b/src/cephadm/vstart-cleanup.sh
new file mode 100755
index 000000000..facbdd100
--- /dev/null
+++ b/src/cephadm/vstart-cleanup.sh
@@ -0,0 +1,6 @@
+#!/bin/sh -ex
+
+bin/ceph mon rm `hostname`
+for f in `bin/ceph orch ls | grep -v NAME | awk '{print $1}'` ; do
+    bin/ceph orch rm $f --force
+done
diff --git a/src/cephadm/vstart-smoke.sh b/src/cephadm/vstart-smoke.sh
new file mode 100755
index 000000000..ecdb59d18
--- /dev/null
+++ b/src/cephadm/vstart-smoke.sh
@@ -0,0 +1,86 @@
+#!/bin/bash -ex
+
+# this is a smoke test, meant to be run against vstart.sh.
+
+host="$(hostname)"
+
+bin/init-ceph stop || true
+MON=1 OSD=1 MDS=0 MGR=1 ../src/vstart.sh -d -n -x -l --cephadm
+
+export CEPH_DEV=1
+
+bin/ceph orch ls
+bin/ceph orch apply mds foo 1
+bin/ceph orch ls | grep foo
+while ! bin/ceph orch ps | grep mds.foo ; do sleep 1 ; done
+bin/ceph orch ps
+
+bin/ceph orch host ls
+
+bin/ceph orch rm crash
+! bin/ceph orch ls | grep crash
+bin/ceph orch apply crash '*'
+bin/ceph orch ls | grep crash
+
+while ! bin/ceph orch ps | grep crash ; do sleep 1 ; done
+bin/ceph orch ps | grep crash.$host | grep running
+bin/ceph orch ls | grep crash | grep 1/1
+bin/ceph orch daemon rm crash.$host
+while ! bin/ceph orch ps | grep crash ; do sleep 1 ; done
+
+bin/ceph orch daemon stop crash.$host
+bin/ceph orch daemon start crash.$host
+bin/ceph orch daemon restart crash.$host
+bin/ceph orch daemon reconfig crash.$host
+bin/ceph orch daemon redeploy crash.$host
+
+bin/ceph orch host ls | grep $host
+bin/ceph orch host label add $host fooxyz
+bin/ceph orch host ls | grep $host | grep fooxyz
+bin/ceph orch host label rm $host fooxyz
+! bin/ceph orch host ls | grep $host | grep fooxyz
+bin/ceph orch host set-addr $host $host
+
+bin/ceph cephadm check-host $host
+#! bin/ceph cephadm check-host $host 1.2.3.4
+#bin/ceph orch host set-addr $host 1.2.3.4
+#! bin/ceph cephadm check-host $host
+bin/ceph orch host set-addr $host $host
+bin/ceph cephadm check-host $host
+
+bin/ceph orch apply mgr 1
+bin/ceph orch rm mgr --force     # we don't want a mgr to take over for ours
+
+bin/ceph orch daemon add mon $host:127.0.0.1
+
+while ! bin/ceph mon dump | grep 'epoch 2' ; do sleep 1 ; done
+
+bin/ceph orch apply rbd-mirror 1
+
+bin/ceph orch apply node-exporter '*'
+bin/ceph orch apply prometheus 1
+bin/ceph orch apply alertmanager 1
+bin/ceph orch apply grafana 1
+
+while ! bin/ceph dashboard get-grafana-api-url | grep $host ; do sleep 1 ; done
+
+bin/ceph orch apply rgw foo --placement=1
+
+bin/ceph orch ps
+bin/ceph orch ls
+
+# clean up
+bin/ceph orch rm mds.foo
+bin/ceph orch rm rgw.myrealm.myzone
+bin/ceph orch rm rbd-mirror
+bin/ceph orch rm node-exporter
+bin/ceph orch rm alertmanager
+bin/ceph orch rm grafana
+bin/ceph orch rm prometheus
+bin/ceph orch rm crash
+
+bin/ceph mon rm $host
+! bin/ceph orch daemon rm mon.$host
+bin/ceph orch daemon rm mon.$host --force
+
+echo OK