summaryrefslogtreecommitdiffstats
path: root/src/cephadm
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/cephadm
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/cephadm/.gitignore6
-rw-r--r--src/cephadm/CMakeLists.txt28
-rw-r--r--src/cephadm/box/DockerfileDocker33
-rw-r--r--src/cephadm/box/DockerfilePodman64
-rw-r--r--src/cephadm/box/__init__.py0
-rwxr-xr-xsrc/cephadm/box/box.py414
-rw-r--r--src/cephadm/box/daemon.json3
-rw-r--r--src/cephadm/box/docker-compose-docker.yml39
-rw-r--r--src/cephadm/box/docker-compose.cgroup1.yml10
-rw-r--r--src/cephadm/box/docker/ceph/.bashrc0
-rw-r--r--src/cephadm/box/docker/ceph/Dockerfile3
-rw-r--r--src/cephadm/box/docker/ceph/locale.conf2
-rw-r--r--src/cephadm/box/host.py120
-rw-r--r--src/cephadm/box/osd.py157
-rw-r--r--src/cephadm/box/util.py421
-rwxr-xr-xsrc/cephadm/build.py204
-rwxr-xr-xsrc/cephadm/build.sh5
-rwxr-xr-xsrc/cephadm/cephadm.py10700
-rw-r--r--src/cephadm/containers/keepalived/Dockerfile24
-rw-r--r--src/cephadm/containers/keepalived/LICENSE21
-rw-r--r--src/cephadm/containers/keepalived/README.md233
-rwxr-xr-xsrc/cephadm/containers/keepalived/skel/init.sh22
-rw-r--r--src/cephadm/samples/alertmanager.json27
-rw-r--r--src/cephadm/samples/custom_container.json35
-rw-r--r--src/cephadm/samples/grafana.json90
-rw-r--r--src/cephadm/samples/nfs.json14
-rw-r--r--src/cephadm/samples/prometheus.json17
-rw-r--r--src/cephadm/samples/rgw_ssl.json101
-rw-r--r--src/cephadm/tests/__init__.py0
-rw-r--r--src/cephadm/tests/fixtures.py162
-rw-r--r--src/cephadm/tests/test_agent.py800
-rw-r--r--src/cephadm/tests/test_cephadm.py2708
-rw-r--r--src/cephadm/tests/test_container_engine.py54
-rw-r--r--src/cephadm/tests/test_enclosure.py72
-rw-r--r--src/cephadm/tests/test_ingress.py350
-rw-r--r--src/cephadm/tests/test_networks.py233
-rw-r--r--src/cephadm/tests/test_nfs.py239
-rw-r--r--src/cephadm/tests/test_util_funcs.py808
-rw-r--r--src/cephadm/tox.ini77
-rwxr-xr-xsrc/cephadm/vstart-cleanup.sh6
-rwxr-xr-xsrc/cephadm/vstart-smoke.sh86
41 files changed, 18388 insertions, 0 deletions
diff --git a/src/cephadm/.gitignore b/src/cephadm/.gitignore
new file mode 100644
index 000000000..8d1529027
--- /dev/null
+++ b/src/cephadm/.gitignore
@@ -0,0 +1,6 @@
+# tox related
+.coverage*
+htmlcov
+.tox
+coverage.xml
+.mypy_cache
diff --git a/src/cephadm/CMakeLists.txt b/src/cephadm/CMakeLists.txt
new file mode 100644
index 000000000..8b969bc33
--- /dev/null
+++ b/src/cephadm/CMakeLists.txt
@@ -0,0 +1,28 @@
+if(WITH_TESTS)
+ include(AddCephTest)
+ add_tox_test(cephadm TOX_ENVS py3 mypy flake8)
+endif()
+
+set(bin_target_file ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/cephadm)
+
+add_custom_command(
+ OUTPUT "${bin_target_file}"
+ DEPENDS
+ ${CMAKE_CURRENT_SOURCE_DIR}/cephadm.py
+ ${CMAKE_CURRENT_SOURCE_DIR}/build.py
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ COMMAND ${Python3_EXECUTABLE} build.py
+ --set-version-var=CEPH_GIT_VER=${CEPH_GIT_VER}
+ --set-version-var=CEPH_GIT_NICE_VER=${CEPH_GIT_NICE_VER}
+ --set-version-var=CEPH_RELEASE=${CEPH_RELEASE}
+ --set-version-var=CEPH_RELEASE_NAME=${CEPH_RELEASE_NAME}
+ --set-version-var=CEPH_RELEASE_TYPE=${CEPH_RELEASE_TYPE}
+ ${bin_target_file}
+)
+
+add_custom_target(cephadm ALL
+ DEPENDS "${bin_target_file}")
+
+install(PROGRAMS
+ ${bin_target_file}
+ DESTINATION ${CMAKE_INSTALL_SBINDIR})
diff --git a/src/cephadm/box/DockerfileDocker b/src/cephadm/box/DockerfileDocker
new file mode 100644
index 000000000..f64b48e4c
--- /dev/null
+++ b/src/cephadm/box/DockerfileDocker
@@ -0,0 +1,33 @@
+# https://developers.redhat.com/blog/2014/05/05/running-systemd-within-docker-container/
+FROM centos:8 as centos-systemd
+ENV container docker
+ENV CEPHADM_PATH=/usr/local/sbin/cephadm
+
+# Centos met EOL and the content of the CentOS 8 repos has been moved to vault.centos.org
+RUN sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-Linux-*
+RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=https://vault.centos.org|g' /etc/yum.repos.d/CentOS-Linux-*
+
+RUN dnf -y install chrony firewalld lvm2 \
+ openssh-server openssh-clients python3 \
+ yum-utils sudo which && dnf clean all
+
+RUN systemctl enable chronyd firewalld sshd
+
+
+FROM centos-systemd as centos-systemd-docker
+# To cache cephadm images
+RUN yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
+RUN dnf -y install docker-ce && \
+ dnf clean all && systemctl enable docker
+
+# ssh utilities
+RUN dnf install epel-release -y && dnf makecache && dnf install sshpass -y
+RUN touch /.box_container # empty file to check if inside a container
+
+EXPOSE 8443
+EXPOSE 22
+
+FROM centos-systemd-docker
+WORKDIR /root
+
+CMD [ "/usr/sbin/init" ]
diff --git a/src/cephadm/box/DockerfilePodman b/src/cephadm/box/DockerfilePodman
new file mode 100644
index 000000000..115c3c730
--- /dev/null
+++ b/src/cephadm/box/DockerfilePodman
@@ -0,0 +1,64 @@
+# stable/Dockerfile
+#
+# Build a Podman container image from the latest
+# stable version of Podman on the Fedoras Updates System.
+# https://bodhi.fedoraproject.org/updates/?search=podman
+# This image can be used to create a secured container
+# that runs safely with privileges within the container.
+#
+FROM fedora:34
+
+ENV CEPHADM_PATH=/usr/local/sbin/cephadm
+RUN ln -s /ceph/src/cephadm/cephadm.py $CEPHADM_PATH # NOTE: assume path of ceph volume
+
+# Don't include container-selinux and remove
+# directories used by yum that are just taking
+# up space.
+RUN dnf -y update; rpm --restore shadow-utils 2>/dev/null; \
+yum -y install podman fuse-overlayfs --exclude container-selinux; \
+rm -rf /var/cache /var/log/dnf* /var/log/yum.*
+
+RUN dnf install which firewalld chrony procps systemd openssh openssh-server openssh-clients sshpass lvm2 -y
+
+ADD https://raw.githubusercontent.com/containers/podman/main/contrib/podmanimage/stable/containers.conf /etc/containers/containers.conf
+ADD https://raw.githubusercontent.com/containers/podman/main/contrib/podmanimage/stable/podman-containers.conf /root/.config/containers/containers.conf
+
+RUN mkdir -p /root/.local/share/containers; # chown podman:podman -R /home/podman
+
+# Note VOLUME options must always happen after the chown call above
+# RUN commands can not modify existing volumes
+VOLUME /var/lib/containers
+VOLUME /root/.local/share/containers
+
+# chmod containers.conf and adjust storage.conf to enable Fuse storage.
+RUN chmod 644 /etc/containers/containers.conf; sed -i -e 's|^#mount_program|mount_program|g' -e '/additionalimage.*/a "/var/lib/shared",' -e 's|^mountopt[[:space:]]*=.*$|mountopt = "nodev,fsync=0"|g' /etc/containers/storage.conf
+RUN mkdir -p /var/lib/shared/overlay-images /var/lib/shared/overlay-layers /var/lib/shared/vfs-images /var/lib/shared/vfs-layers; touch /var/lib/shared/overlay-images/images.lock; touch /var/lib/shared/overlay-layers/layers.lock; touch /var/lib/shared/vfs-images/images.lock; touch /var/lib/shared/vfs-layers/layers.lock
+
+RUN echo 'root:root' | chpasswd
+
+RUN dnf install -y adjtimex # adjtimex syscall doesn't exist in fedora 35+ therefore we have to install it manually
+ # so chronyd works
+RUN dnf install -y strace sysstat # debugging tools
+RUN dnf -y install hostname iproute udev
+ENV _CONTAINERS_USERNS_CONFIGURED=""
+
+RUN useradd podman; \
+echo podman:0:5000 > /etc/subuid; \
+echo podman:0:5000 > /etc/subgid; \
+echo root:0:65535 > /etc/subuid; \
+echo root:0:65535 > /etc/subgid;
+
+VOLUME /home/podman/.local/share/containers
+
+ADD https://raw.githubusercontent.com/containers/libpod/master/contrib/podmanimage/stable/containers.conf /etc/containers/containers.conf
+ADD https://raw.githubusercontent.com/containers/libpod/master/contrib/podmanimage/stable/podman-containers.conf /home/podman/.config/containers/containers.conf
+
+RUN chown podman:podman -R /home/podman
+
+RUN echo 'podman:podman' | chpasswd
+RUN touch /.box_container # empty file to check if inside a container
+
+EXPOSE 8443
+EXPOSE 22
+
+ENTRYPOINT ["/usr/sbin/init"]
diff --git a/src/cephadm/box/__init__.py b/src/cephadm/box/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/cephadm/box/__init__.py
diff --git a/src/cephadm/box/box.py b/src/cephadm/box/box.py
new file mode 100755
index 000000000..db2f24233
--- /dev/null
+++ b/src/cephadm/box/box.py
@@ -0,0 +1,414 @@
+#!/bin/python3
+import argparse
+import os
+import stat
+import json
+import sys
+import host
+import osd
+from multiprocessing import Process, Pool
+from util import (
+ BoxType,
+ Config,
+ Target,
+ ensure_inside_container,
+ ensure_outside_container,
+ get_boxes_container_info,
+ run_cephadm_shell_command,
+ run_dc_shell_command,
+ run_dc_shell_commands,
+ get_container_engine,
+ run_shell_command,
+ run_shell_commands,
+ ContainerEngine,
+ DockerEngine,
+ PodmanEngine,
+ colored,
+ engine,
+ engine_compose,
+ Colors,
+ get_seed_name
+)
+
+CEPH_IMAGE = 'quay.ceph.io/ceph-ci/ceph:main'
+BOX_IMAGE = 'cephadm-box:latest'
+
+# NOTE: this image tar is a trickeroo so cephadm won't pull the image everytime
+# we deploy a cluster. Keep in mind that you'll be responsible for pulling the
+# image yourself with `./box.py -v cluster setup`
+CEPH_IMAGE_TAR = 'docker/ceph/image/quay.ceph.image.tar'
+CEPH_ROOT = '../../../'
+DASHBOARD_PATH = '../../../src/pybind/mgr/dashboard/frontend/'
+
+root_error_msg = """
+WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+sudo with this script can kill your computer, try again without sudo
+if you value your time.
+"""
+
+def remove_ceph_image_tar():
+ if os.path.exists(CEPH_IMAGE_TAR):
+ os.remove(CEPH_IMAGE_TAR)
+
+
+def cleanup_box() -> None:
+ osd.cleanup_osds()
+ remove_ceph_image_tar()
+
+
+def image_exists(image_name: str):
+ # extract_tag
+ assert image_name.find(':')
+ image_name, tag = image_name.split(':')
+ engine = get_container_engine()
+ images = engine.run('image ls').split('\n')
+ IMAGE_NAME = 0
+ TAG = 1
+ for image in images:
+ image = image.split()
+ print(image)
+ print(image_name, tag)
+ if image[IMAGE_NAME] == image_name and image[TAG] == tag:
+ return True
+ return False
+
+
+def get_ceph_image():
+ print('Getting ceph image')
+ engine = get_container_engine()
+ engine.run(f'pull {CEPH_IMAGE}')
+ # update
+ engine.run(f'build -t {CEPH_IMAGE} docker/ceph')
+ if not os.path.exists('docker/ceph/image'):
+ os.mkdir('docker/ceph/image')
+
+ remove_ceph_image_tar()
+
+ engine.run(f'save {CEPH_IMAGE} -o {CEPH_IMAGE_TAR}')
+ run_shell_command(f'chmod 777 {CEPH_IMAGE_TAR}')
+ print('Ceph image added')
+
+
+def get_box_image():
+ print('Getting box image')
+ engine = get_container_engine()
+ engine.run(f'build -t cephadm-box -f {engine.dockerfile} .')
+ print('Box image added')
+
+def check_dashboard():
+ if not os.path.exists(os.path.join(CEPH_ROOT, 'dist')):
+ print(colored('Missing build in dashboard', Colors.WARNING))
+
+def check_cgroups():
+ if not os.path.exists('/sys/fs/cgroup/cgroup.controllers'):
+ print(colored('cgroups v1 is not supported', Colors.FAIL))
+ print('Enable cgroups v2 please')
+ sys.exit(666)
+
+def check_selinux():
+ selinux = run_shell_command('getenforce')
+ if 'Disabled' not in selinux:
+ print(colored('selinux should be disabled, please disable it if you '
+ 'don\'t want unexpected behaviour.', Colors.WARNING))
+def dashboard_setup():
+ command = f'cd {DASHBOARD_PATH} && npm install'
+ run_shell_command(command)
+ command = f'cd {DASHBOARD_PATH} && npm run build'
+ run_shell_command(command)
+
+class Cluster(Target):
+ _help = 'Manage docker cephadm boxes'
+ actions = ['bootstrap', 'start', 'down', 'list', 'bash', 'setup', 'cleanup']
+
+ def set_args(self):
+ self.parser.add_argument(
+ 'action', choices=Cluster.actions, help='Action to perform on the box'
+ )
+ self.parser.add_argument('--osds', type=int, default=3, help='Number of osds')
+
+ self.parser.add_argument('--hosts', type=int, default=1, help='Number of hosts')
+ self.parser.add_argument('--skip-deploy-osds', action='store_true', help='skip deploy osd')
+ self.parser.add_argument('--skip-create-loop', action='store_true', help='skip create loopback device')
+ self.parser.add_argument('--skip-monitoring-stack', action='store_true', help='skip monitoring stack')
+ self.parser.add_argument('--skip-dashboard', action='store_true', help='skip dashboard')
+ self.parser.add_argument('--expanded', action='store_true', help='deploy 3 hosts and 3 osds')
+ self.parser.add_argument('--jobs', type=int, help='Number of jobs scheduled in parallel')
+
+ @ensure_outside_container
+ def setup(self):
+ check_cgroups()
+ check_selinux()
+
+ targets = [
+ get_ceph_image,
+ get_box_image,
+ dashboard_setup
+ ]
+ results = []
+ jobs = Config.get('jobs')
+ if jobs:
+ jobs = int(jobs)
+ else:
+ jobs = None
+ pool = Pool(jobs)
+ for target in targets:
+ results.append(pool.apply_async(target))
+
+ for result in results:
+ result.wait()
+
+
+ @ensure_outside_container
+ def cleanup(self):
+ cleanup_box()
+
+ @ensure_inside_container
+ def bootstrap(self):
+ print('Running bootstrap on seed')
+ cephadm_path = str(os.environ.get('CEPHADM_PATH'))
+
+ engine = get_container_engine()
+ if isinstance(engine, DockerEngine):
+ engine.restart()
+ st = os.stat(cephadm_path)
+ os.chmod(cephadm_path, st.st_mode | stat.S_IEXEC)
+
+ engine.run('load < /cephadm/box/docker/ceph/image/quay.ceph.image.tar')
+ # cephadm guid error because it sometimes tries to use quay.ceph.io/ceph-ci/ceph:<none>
+ # instead of main branch's tag
+ run_shell_command('export CEPH_SOURCE_FOLDER=/ceph')
+ run_shell_command('export CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:main')
+ run_shell_command(
+ 'echo "export CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:main" >> ~/.bashrc'
+ )
+
+ extra_args = []
+
+ extra_args.append('--skip-pull')
+
+ # cephadm prints in warning, let's redirect it to the output so shell_command doesn't
+ # complain
+ extra_args.append('2>&0')
+
+ extra_args = ' '.join(extra_args)
+ skip_monitoring_stack = (
+ '--skip-monitoring-stack' if Config.get('skip-monitoring-stack') else ''
+ )
+ skip_dashboard = '--skip-dashboard' if Config.get('skip-dashboard') else ''
+
+ fsid = Config.get('fsid')
+ config_folder = str(Config.get('config_folder'))
+ config = str(Config.get('config'))
+ keyring = str(Config.get('keyring'))
+ if not os.path.exists(config_folder):
+ os.mkdir(config_folder)
+
+ cephadm_bootstrap_command = (
+ '$CEPHADM_PATH --verbose bootstrap '
+ '--mon-ip "$(hostname -i)" '
+ '--allow-fqdn-hostname '
+ '--initial-dashboard-password admin '
+ '--dashboard-password-noupdate '
+ '--shared_ceph_folder /ceph '
+ '--allow-overwrite '
+ f'--output-config {config} '
+ f'--output-keyring {keyring} '
+ f'--output-config {config} '
+ f'--fsid "{fsid}" '
+ '--log-to-file '
+ f'{skip_dashboard} '
+ f'{skip_monitoring_stack} '
+ f'{extra_args} '
+ )
+
+ print('Running cephadm bootstrap...')
+ run_shell_command(cephadm_bootstrap_command, expect_exit_code=120)
+ print('Cephadm bootstrap complete')
+
+ run_shell_command('sudo vgchange --refresh')
+ run_shell_command('cephadm ls')
+ run_shell_command('ln -s /ceph/src/cephadm/box/box.py /usr/bin/box')
+
+ run_cephadm_shell_command('ceph -s')
+
+ print('Bootstrap completed!')
+
+ @ensure_outside_container
+ def start(self):
+ check_cgroups()
+ check_selinux()
+ osds = int(Config.get('osds'))
+ hosts = int(Config.get('hosts'))
+ engine = get_container_engine()
+
+ # ensure boxes don't exist
+ self.down()
+
+ # podman is ran without sudo
+ if isinstance(engine, PodmanEngine):
+ I_am = run_shell_command('whoami')
+ if 'root' in I_am:
+ print(root_error_msg)
+ sys.exit(1)
+
+ print('Checking docker images')
+ if not image_exists(CEPH_IMAGE):
+ get_ceph_image()
+ if not image_exists(BOX_IMAGE):
+ get_box_image()
+
+ used_loop = ""
+ if not Config.get('skip_create_loop'):
+ print('Creating OSD devices...')
+ used_loop = osd.create_loopback_devices(osds)
+ print(f'Added {osds} logical volumes in a loopback device')
+
+ print('Starting containers')
+
+ engine.up(hosts)
+
+ containers = engine.get_containers()
+ seed = engine.get_seed()
+ # Umounting somehow brings back the contents of the host /sys/dev/block.
+ # On startup /sys/dev/block is empty. After umount, we can see symlinks again
+ # so that lsblk is able to run as expected
+ run_dc_shell_command('umount /sys/dev/block', seed)
+
+ run_shell_command('sudo sysctl net.ipv4.conf.all.forwarding=1')
+ run_shell_command('sudo iptables -P FORWARD ACCEPT')
+
+ # don't update clock with chronyd / setup chronyd on all boxes
+ chronyd_setup = """
+ sed 's/$OPTIONS/-x/g' /usr/lib/systemd/system/chronyd.service -i
+ systemctl daemon-reload
+ systemctl start chronyd
+ systemctl status --no-pager chronyd
+ """
+ for container in containers:
+ print(colored('Got container:', Colors.OKCYAN), str(container))
+ for container in containers:
+ run_dc_shell_commands(chronyd_setup, container)
+
+ print('Seting up host ssh servers')
+ for container in containers:
+ print(colored('Setting up ssh server for:', Colors.OKCYAN), str(container))
+ host._setup_ssh(container)
+
+ verbose = '-v' if Config.get('verbose') else ''
+ skip_deploy = '--skip-deploy-osds' if Config.get('skip-deploy-osds') else ''
+ skip_monitoring_stack = (
+ '--skip-monitoring-stack' if Config.get('skip-monitoring-stack') else ''
+ )
+ skip_dashboard = '--skip-dashboard' if Config.get('skip-dashboard') else ''
+ box_bootstrap_command = (
+ f'/cephadm/box/box.py {verbose} --engine {engine.command} cluster bootstrap '
+ f'--osds {osds} '
+ f'--hosts {hosts} '
+ f'{skip_deploy} '
+ f'{skip_dashboard} '
+ f'{skip_monitoring_stack} '
+ )
+ print(box_bootstrap_command)
+ run_dc_shell_command(box_bootstrap_command, seed)
+
+ expanded = Config.get('expanded')
+ if expanded:
+ info = get_boxes_container_info()
+ ips = info['ips']
+ hostnames = info['hostnames']
+ print(ips)
+ if hosts > 0:
+ host._copy_cluster_ssh_key(ips)
+ host._add_hosts(ips, hostnames)
+ if not Config.get('skip-deploy-osds'):
+ print('Deploying osds... This could take up to minutes')
+ osd.deploy_osds(osds)
+ print('Osds deployed')
+
+
+ dashboard_ip = 'localhost'
+ info = get_boxes_container_info(with_seed=True)
+ if isinstance(engine, DockerEngine):
+ for i in range(info['size']):
+ if get_seed_name() in info['container_names'][i]:
+ dashboard_ip = info["ips"][i]
+ print(colored(f'dashboard available at https://{dashboard_ip}:8443', Colors.OKGREEN))
+
+ print('Bootstrap finished successfully')
+
+ @ensure_outside_container
+ def down(self):
+ engine = get_container_engine()
+ if isinstance(engine, PodmanEngine):
+ containers = json.loads(engine.run('container ls --format json'))
+ for container in containers:
+ for name in container['Names']:
+ if name.startswith('box_hosts_'):
+ engine.run(f'container kill {name}')
+ engine.run(f'container rm {name}')
+ pods = json.loads(engine.run('pod ls --format json'))
+ for pod in pods:
+ if 'Name' in pod and pod['Name'].startswith('box_pod_host'):
+ name = pod['Name']
+ engine.run(f'pod kill {name}')
+ engine.run(f'pod rm {name}')
+ else:
+ run_shell_command(f'{engine_compose()} -f {Config.get("docker_yaml")} down')
+ print('Successfully killed all boxes')
+
+ @ensure_outside_container
+ def list(self):
+ info = get_boxes_container_info(with_seed=True)
+ for i in range(info['size']):
+ ip = info['ips'][i]
+ name = info['container_names'][i]
+ hostname = info['hostnames'][i]
+ print(f'{name} \t{ip} \t{hostname}')
+
+ @ensure_outside_container
+ def bash(self):
+ # we need verbose to see the prompt after running shell command
+ Config.set('verbose', True)
+ print('Seed bash')
+ engine = get_container_engine()
+ engine.run(f'exec -it {engine.seed_name} bash')
+
+
+targets = {
+ 'cluster': Cluster,
+ 'osd': osd.Osd,
+ 'host': host.Host,
+}
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '-v', action='store_true', dest='verbose', help='be more verbose'
+ )
+ parser.add_argument(
+ '--engine', type=str, default='podman',
+ dest='engine', help='choose engine between "docker" and "podman"'
+ )
+
+ subparsers = parser.add_subparsers()
+ target_instances = {}
+ for name, target in targets.items():
+ target_instances[name] = target(None, subparsers)
+
+ for count, arg in enumerate(sys.argv, 1):
+ if arg in targets:
+ instance = target_instances[arg]
+ if hasattr(instance, 'main'):
+ instance.argv = sys.argv[count:]
+ instance.set_args()
+ args = parser.parse_args()
+ Config.add_args(vars(args))
+ instance.main()
+ sys.exit(0)
+
+ parser.print_help()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/cephadm/box/daemon.json b/src/cephadm/box/daemon.json
new file mode 100644
index 000000000..5cfcaa87f
--- /dev/null
+++ b/src/cephadm/box/daemon.json
@@ -0,0 +1,3 @@
+{
+ "storage-driver": "fuse-overlayfs"
+}
diff --git a/src/cephadm/box/docker-compose-docker.yml b/src/cephadm/box/docker-compose-docker.yml
new file mode 100644
index 000000000..fdecf6677
--- /dev/null
+++ b/src/cephadm/box/docker-compose-docker.yml
@@ -0,0 +1,39 @@
+version: "2.4"
+services:
+ cephadm-host-base:
+ build:
+ context: .
+ environment:
+ - CEPH_BRANCH=master
+ image: cephadm-box
+ privileged: true
+ stop_signal: RTMIN+3
+ volumes:
+ - ../../../:/ceph
+ - ..:/cephadm
+ - ./daemon.json:/etc/docker/daemon.json
+ # dangerous, maybe just map the loopback
+ # https://stackoverflow.com/questions/36880565/why-dont-my-udev-rules-work-inside-of-a-running-docker-container
+ - /dev:/dev
+ networks:
+ - public
+ mem_limit: "20g"
+ scale: -1
+ seed:
+ extends:
+ service: cephadm-host-base
+ ports:
+ - "3000:3000"
+ - "8443:8443"
+ - "9095:9095"
+ scale: 1
+ hosts:
+ extends:
+ service: cephadm-host-base
+ scale: 3
+
+
+volumes:
+ var-lib-docker:
+networks:
+ public:
diff --git a/src/cephadm/box/docker-compose.cgroup1.yml b/src/cephadm/box/docker-compose.cgroup1.yml
new file mode 100644
index 000000000..ea23dec1e
--- /dev/null
+++ b/src/cephadm/box/docker-compose.cgroup1.yml
@@ -0,0 +1,10 @@
+version: "2.4"
+
+# If cgroups v2 is disabled then add cgroup fs
+services:
+ seed:
+ volumes:
+ - "/sys/fs/cgroup:/sys/fs/cgroup:ro"
+ hosts:
+ volumes:
+ - "/sys/fs/cgroup:/sys/fs/cgroup:ro"
diff --git a/src/cephadm/box/docker/ceph/.bashrc b/src/cephadm/box/docker/ceph/.bashrc
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/cephadm/box/docker/ceph/.bashrc
diff --git a/src/cephadm/box/docker/ceph/Dockerfile b/src/cephadm/box/docker/ceph/Dockerfile
new file mode 100644
index 000000000..b950750e9
--- /dev/null
+++ b/src/cephadm/box/docker/ceph/Dockerfile
@@ -0,0 +1,3 @@
+FROM quay.ceph.io/ceph-ci/ceph:main
+RUN pip3 install packaging
+EXPOSE 8443
diff --git a/src/cephadm/box/docker/ceph/locale.conf b/src/cephadm/box/docker/ceph/locale.conf
new file mode 100644
index 000000000..00d76c8cd
--- /dev/null
+++ b/src/cephadm/box/docker/ceph/locale.conf
@@ -0,0 +1,2 @@
+LANG="en_US.UTF-8"
+LC_ALL="en_US.UTF-8"
diff --git a/src/cephadm/box/host.py b/src/cephadm/box/host.py
new file mode 100644
index 000000000..aae16d07f
--- /dev/null
+++ b/src/cephadm/box/host.py
@@ -0,0 +1,120 @@
+import os
+from typing import List, Union
+
+from util import (
+ Config,
+ HostContainer,
+ Target,
+ get_boxes_container_info,
+ get_container_engine,
+ inside_container,
+ run_cephadm_shell_command,
+ run_dc_shell_command,
+ run_shell_command,
+ engine,
+ BoxType
+)
+
+
+def _setup_ssh(container: HostContainer):
+ if inside_container():
+ if not os.path.exists('/root/.ssh/known_hosts'):
+ run_shell_command('echo "y" | ssh-keygen -b 2048 -t rsa -f /root/.ssh/id_rsa -q -N ""',
+ expect_error=True)
+
+ run_shell_command('echo "root:root" | chpasswd')
+ with open('/etc/ssh/sshd_config', 'a+') as f:
+ f.write('PermitRootLogin yes\n')
+ f.write('PasswordAuthentication yes\n')
+ f.flush()
+ run_shell_command('systemctl restart sshd')
+ else:
+ print('Redirecting to _setup_ssh to container')
+ verbose = '-v' if Config.get('verbose') else ''
+ run_dc_shell_command(
+ f'/cephadm/box/box.py {verbose} --engine {engine()} host setup_ssh {container.name}',
+ container
+ )
+
+
+def _add_hosts(ips: Union[List[str], str], hostnames: Union[List[str], str]):
+ if inside_container():
+ assert len(ips) == len(hostnames)
+ for i in range(len(ips)):
+ run_cephadm_shell_command(f'ceph orch host add {hostnames[i]} {ips[i]}')
+ else:
+ print('Redirecting to _add_hosts to container')
+ verbose = '-v' if Config.get('verbose') else ''
+ print(ips)
+ ips = ' '.join(ips)
+ ips = f'{ips}'
+ hostnames = ' '.join(hostnames)
+ hostnames = f'{hostnames}'
+ seed = get_container_engine().get_seed()
+ run_dc_shell_command(
+ f'/cephadm/box/box.py {verbose} --engine {engine()} host add_hosts {seed.name} --ips {ips} --hostnames {hostnames}',
+ seed
+ )
+
+
+def _copy_cluster_ssh_key(ips: Union[List[str], str]):
+ if inside_container():
+ local_ip = run_shell_command('hostname -i')
+ for ip in ips:
+ if ip != local_ip:
+ run_shell_command(
+ (
+ 'sshpass -p "root" ssh-copy-id -f '
+ f'-o StrictHostKeyChecking=no -i /etc/ceph/ceph.pub "root@{ip}"'
+ )
+ )
+
+ else:
+ print('Redirecting to _copy_cluster_ssh to container')
+ verbose = '-v' if Config.get('verbose') else ''
+ print(ips)
+ ips = ' '.join(ips)
+ ips = f'{ips}'
+ # assume we only have one seed
+ seed = get_container_engine().get_seed()
+ run_dc_shell_command(
+ f'/cephadm/box/box.py {verbose} --engine {engine()} host copy_cluster_ssh_key {seed.name} --ips {ips}',
+ seed
+ )
+
+
+class Host(Target):
+ _help = 'Run seed/host related commands'
+ actions = ['setup_ssh', 'copy_cluster_ssh_key', 'add_hosts']
+
+ def set_args(self):
+ self.parser.add_argument('action', choices=Host.actions)
+ self.parser.add_argument(
+ 'container_name',
+ type=str,
+ help='box_{type}_{index}. In docker, type can be seed or hosts. In podman only hosts.'
+ )
+ self.parser.add_argument('--ips', nargs='*', help='List of host ips')
+ self.parser.add_argument(
+ '--hostnames', nargs='*', help='List of hostnames ips(relative to ip list)'
+ )
+
+ def setup_ssh(self):
+ container_name = Config.get('container_name')
+ engine = get_container_engine()
+ _setup_ssh(engine.get_container(container_name))
+
+ def add_hosts(self):
+ ips = Config.get('ips')
+ if not ips:
+ ips = get_boxes_container_info()['ips']
+ hostnames = Config.get('hostnames')
+ if not hostnames:
+ hostnames = get_boxes_container_info()['hostnames']
+ _add_hosts(ips, hostnames)
+
+ def copy_cluster_ssh_key(self):
+ ips = Config.get('ips')
+ if not ips:
+ ips = get_boxes_container_info()['ips']
+ _copy_cluster_ssh_key(ips)
diff --git a/src/cephadm/box/osd.py b/src/cephadm/box/osd.py
new file mode 100644
index 000000000..827a4de36
--- /dev/null
+++ b/src/cephadm/box/osd.py
@@ -0,0 +1,157 @@
+import json
+import os
+import time
+import re
+from typing import Dict
+
+from util import (
+ BoxType,
+ Config,
+ Target,
+ ensure_inside_container,
+ ensure_outside_container,
+ get_orch_hosts,
+ run_cephadm_shell_command,
+ run_dc_shell_command,
+ get_container_engine,
+ run_shell_command,
+)
+
+DEVICES_FILE="./devices.json"
+
+def remove_loop_img() -> None:
+ loop_image = Config.get('loop_img')
+ if os.path.exists(loop_image):
+ os.remove(loop_image)
+
+def create_loopback_devices(osds: int) -> Dict[int, Dict[str, str]]:
+ assert osds
+ cleanup_osds()
+ osd_devs = dict()
+
+ for i in range(osds):
+ img_name = f'osd{i}'
+ loop_dev = create_loopback_device(img_name)
+ osd_devs[i] = dict(img_name=img_name, device=loop_dev)
+ with open(DEVICES_FILE, 'w') as dev_file:
+ dev_file.write(json.dumps(osd_devs))
+ return osd_devs
+
+def create_loopback_device(img_name, size_gb=5):
+ loop_img_dir = Config.get('loop_img_dir')
+ run_shell_command(f'mkdir -p {loop_img_dir}')
+ loop_img = os.path.join(loop_img_dir, img_name)
+ run_shell_command(f'rm -f {loop_img}')
+ run_shell_command(f'dd if=/dev/zero of={loop_img} bs=1 count=0 seek={size_gb}G')
+ loop_dev = run_shell_command(f'sudo losetup -f')
+ if not os.path.exists(loop_dev):
+ dev_minor = re.match(r'\/dev\/[^\d]+(\d+)', loop_dev).groups()[0]
+ run_shell_command(f'sudo mknod -m777 {loop_dev} b 7 {dev_minor}')
+ run_shell_command(f'sudo chown {os.getuid()}:{os.getgid()} {loop_dev}')
+ if os.path.ismount(loop_dev):
+ os.umount(loop_dev)
+ run_shell_command(f'sudo losetup {loop_dev} {loop_img}')
+ run_shell_command(f'sudo chown {os.getuid()}:{os.getgid()} {loop_dev}')
+ return loop_dev
+
+
+def get_lvm_osd_data(data: str) -> Dict[str, str]:
+ osd_lvm_info = run_cephadm_shell_command(f'ceph-volume lvm list {data}')
+ osd_data = {}
+ for line in osd_lvm_info.split('\n'):
+ line = line.strip()
+ if not line:
+ continue
+ line = line.split()
+ if line[0].startswith('===') or line[0].startswith('[block]'):
+ continue
+ # "block device" key -> "block_device"
+ key = '_'.join(line[:-1])
+ osd_data[key] = line[-1]
+ return osd_data
+
+def load_osd_devices():
+ if not os.path.exists(DEVICES_FILE):
+ return dict()
+ with open(DEVICES_FILE) as dev_file:
+ devs = json.loads(dev_file.read())
+ return devs
+
+
+@ensure_inside_container
+def deploy_osd(data: str, hostname: str) -> bool:
+ out = run_cephadm_shell_command(f'ceph orch daemon add osd {hostname}:{data} raw')
+ return 'Created osd(s)' in out
+
+
+def cleanup_osds() -> None:
+ loop_img_dir = Config.get('loop_img_dir')
+ osd_devs = load_osd_devices()
+ for osd in osd_devs.values():
+ device = osd['device']
+ if 'loop' in device:
+ loop_img = os.path.join(loop_img_dir, osd['img_name'])
+ run_shell_command(f'sudo losetup -d {device}', expect_error=True)
+ if os.path.exists(loop_img):
+ os.remove(loop_img)
+ run_shell_command(f'rm -rf {loop_img_dir}')
+
+
+def deploy_osds(count: int):
+ osd_devs = load_osd_devices()
+ hosts = get_orch_hosts()
+ host_index = 0
+ seed = get_container_engine().get_seed()
+ v = '-v' if Config.get('verbose') else ''
+ for osd in osd_devs.values():
+ deployed = False
+ while not deployed:
+ print(hosts)
+ hostname = hosts[host_index]['hostname']
+ deployed = run_dc_shell_command(
+ f'/cephadm/box/box.py {v} osd deploy --data {osd["device"]} --hostname {hostname}',
+ seed
+ )
+ deployed = 'created osd' in deployed.lower() or 'already created?' in deployed.lower()
+ print('Waiting 5 seconds to re-run deploy osd...')
+ time.sleep(5)
+ host_index = (host_index + 1) % len(hosts)
+
+
+class Osd(Target):
+ _help = """
+ Deploy osds and create needed block devices with loopback devices:
+ Actions:
+ - deploy: Deploy an osd given a block device
+ - create_loop: Create needed loopback devices and block devices in logical volumes
+ for a number of osds.
+ - destroy: Remove all osds and the underlying loopback devices.
+ """
+ actions = ['deploy', 'create_loop', 'destroy']
+
+ def set_args(self):
+ self.parser.add_argument('action', choices=Osd.actions)
+ self.parser.add_argument('--data', type=str, help='path to a block device')
+ self.parser.add_argument('--hostname', type=str, help='host to deploy osd')
+ self.parser.add_argument('--osds', type=int, default=0, help='number of osds')
+
+ def deploy(self):
+ data = Config.get('data')
+ hostname = Config.get('hostname')
+ if not hostname:
+ # assume this host
+ hostname = run_shell_command('hostname')
+ if not data:
+ deploy_osds(Config.get('osds'))
+ else:
+ deploy_osd(data, hostname)
+
+ @ensure_outside_container
+ def create_loop(self):
+ osds = Config.get('osds')
+ create_loopback_devices(int(osds))
+ print('Successfully created loopback devices')
+
+ @ensure_outside_container
+ def destroy(self):
+ cleanup_osds()
diff --git a/src/cephadm/box/util.py b/src/cephadm/box/util.py
new file mode 100644
index 000000000..7dcf883f8
--- /dev/null
+++ b/src/cephadm/box/util.py
@@ -0,0 +1,421 @@
+import json
+import os
+import subprocess
+import sys
+import copy
+from abc import ABCMeta, abstractmethod
+from enum import Enum
+from typing import Any, Callable, Dict, List
+
+class Colors:
+ HEADER = '\033[95m'
+ OKBLUE = '\033[94m'
+ OKCYAN = '\033[96m'
+ OKGREEN = '\033[92m'
+ WARNING = '\033[93m'
+ FAIL = '\033[91m'
+ ENDC = '\033[0m'
+ BOLD = '\033[1m'
+ UNDERLINE = '\033[4m'
+
+class Config:
+ args = {
+ 'fsid': '00000000-0000-0000-0000-0000deadbeef',
+ 'config_folder': '/etc/ceph/',
+ 'config': '/etc/ceph/ceph.conf',
+ 'keyring': '/etc/ceph/ceph.keyring',
+ 'loop_img': 'loop-images/loop.img',
+ 'engine': 'podman',
+ 'docker_yaml': 'docker-compose-docker.yml',
+ 'docker_v1_yaml': 'docker-compose.cgroup1.yml',
+ 'podman_yaml': 'docker-compose-podman.yml',
+ 'loop_img_dir': 'loop-images',
+ }
+
+ @staticmethod
+ def set(key, value):
+ Config.args[key] = value
+
+ @staticmethod
+ def get(key):
+ if key in Config.args:
+ return Config.args[key]
+ return None
+
+ @staticmethod
+ def add_args(args: Dict[str, str]) -> None:
+ Config.args.update(args)
+
+class Target:
+ def __init__(self, argv, subparsers):
+ self.argv = argv
+ self.parser = subparsers.add_parser(
+ self.__class__.__name__.lower(), help=self.__class__._help
+ )
+
+ def set_args(self):
+ """
+ adding the required arguments of the target should go here, example:
+ self.parser.add_argument(..)
+ """
+ raise NotImplementedError()
+
+ def main(self):
+ """
+ A target will be setup by first calling this main function
+ where the parser is initialized.
+ """
+ args = self.parser.parse_args(self.argv)
+ Config.add_args(vars(args))
+ function = getattr(self, args.action)
+ function()
+
+
+def ensure_outside_container(func) -> Callable:
+ def wrapper(*args, **kwargs):
+ if not inside_container():
+ return func(*args, **kwargs)
+ else:
+ raise RuntimeError('This command should be ran outside a container')
+
+ return wrapper
+
+
+def ensure_inside_container(func) -> bool:
+ def wrapper(*args, **kwargs):
+ if inside_container():
+ return func(*args, **kwargs)
+ else:
+ raise RuntimeError('This command should be ran inside a container')
+
+ return wrapper
+
+
+def colored(msg, color: Colors):
+ return color + msg + Colors.ENDC
+
+class BoxType(str, Enum):
+ SEED = 'seed'
+ HOST = 'host'
+
+class HostContainer:
+ def __init__(self, _name, _type) -> None:
+ self._name: str = _name
+ self._type: BoxType = _type
+
+ @property
+ def name(self) -> str:
+ return self._name
+
+ @property
+ def type(self) -> BoxType:
+ return self._type
+ def __str__(self) -> str:
+ return f'{self.name} {self.type}'
+
+def run_shell_command(command: str, expect_error=False, verbose=True, expect_exit_code=0) -> str:
+ if Config.get('verbose'):
+ print(f'{colored("Running command", Colors.HEADER)}: {colored(command, Colors.OKBLUE)}')
+
+ process = subprocess.Popen(
+ command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+ )
+
+ out = ''
+ err = ''
+ # let's read when output comes so it is in real time
+ while True:
+ # TODO: improve performance of this part, I think this part is a problem
+ pout = process.stdout.read(1).decode('latin1')
+ if pout == '' and process.poll() is not None:
+ break
+ if pout:
+ if Config.get('verbose') and verbose:
+ sys.stdout.write(pout)
+ sys.stdout.flush()
+ out += pout
+
+ process.wait()
+
+ err += process.stderr.read().decode('latin1').strip()
+ out = out.strip()
+
+ if process.returncode != 0 and not expect_error and process.returncode != expect_exit_code:
+ err = colored(err, Colors.FAIL);
+
+ raise RuntimeError(f'Failed command: {command}\n{err}\nexit code: {process.returncode}')
+ sys.exit(1)
+ return out
+
+
+def run_dc_shell_commands(commands: str, container: HostContainer, expect_error=False) -> str:
+ for command in commands.split('\n'):
+ command = command.strip()
+ if not command:
+ continue
+ run_dc_shell_command(command.strip(), container, expect_error=expect_error)
+
+def run_shell_commands(commands: str, expect_error=False) -> str:
+ for command in commands.split('\n'):
+ command = command.strip()
+ if not command:
+ continue
+ run_shell_command(command, expect_error=expect_error)
+
+@ensure_inside_container
+def run_cephadm_shell_command(command: str, expect_error=False) -> str:
+ config = Config.get('config')
+ keyring = Config.get('keyring')
+ fsid = Config.get('fsid')
+
+ with_cephadm_image = 'CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:main'
+ out = run_shell_command(
+ f'{with_cephadm_image} cephadm --verbose shell --fsid {fsid} --config {config} --keyring {keyring} -- {command}',
+ expect_error,
+ )
+ return out
+
+
+def run_dc_shell_command(
+ command: str, container: HostContainer, expect_error=False
+) -> str:
+ out = get_container_engine().run_exec(container, command, expect_error=expect_error)
+ return out
+
+def inside_container() -> bool:
+ return os.path.exists('/.box_container')
+
+def get_container_id(container_name: str):
+ return run_shell_command(f"{engine()} ps | \grep " + container_name + " | awk '{ print $1 }'")
+
+def engine():
+ return Config.get('engine')
+
+def engine_compose():
+ return f'{engine()}-compose'
+
+def get_seed_name():
+ if engine() == 'docker':
+ return 'seed'
+ elif engine() == 'podman':
+ return 'box_hosts_0'
+ else:
+ print(f'unkown engine {engine()}')
+ sys.exit(1)
+
+
+@ensure_outside_container
+def get_boxes_container_info(with_seed: bool = False) -> Dict[str, Any]:
+ # NOTE: this could be cached
+ ips_query = engine() + " inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}} %tab% {{.Name}} %tab% {{.Config.Hostname}}' $("+ engine() + " ps -aq) --format json"
+ containers = json.loads(run_shell_command(ips_query, verbose=False))
+ # FIXME: if things get more complex a class representing a container info might be useful,
+ # for now representing data this way is faster.
+ info = {'size': 0, 'ips': [], 'container_names': [], 'hostnames': []}
+ for container in containers:
+ # Most commands use hosts only
+ name = container['Name']
+ if name.startswith('box_hosts'):
+ if not with_seed and name == get_seed_name():
+ continue
+ info['size'] += 1
+ print(container['NetworkSettings'])
+ if 'Networks' in container['NetworkSettings']:
+ info['ips'].append(container['NetworkSettings']['Networks']['box_network']['IPAddress'])
+ else:
+ info['ips'].append('n/a')
+ info['container_names'].append(name)
+ info['hostnames'].append(container['Config']['Hostname'])
+ return info
+
+
+def get_orch_hosts():
+ if inside_container():
+ orch_host_ls_out = run_cephadm_shell_command('ceph orch host ls --format json')
+ else:
+ orch_host_ls_out = run_dc_shell_command(f'cephadm shell --keyring /etc/ceph/ceph.keyring --config /etc/ceph/ceph.conf -- ceph orch host ls --format json',
+ get_container_engine().get_seed())
+ sp = orch_host_ls_out.split('\n')
+ orch_host_ls_out = sp[len(sp) - 1]
+ hosts = json.loads(orch_host_ls_out)
+ return hosts
+
+
+class ContainerEngine(metaclass=ABCMeta):
+ @property
+ @abstractmethod
+ def command(self) -> str: pass
+
+ @property
+ @abstractmethod
+ def seed_name(self) -> str: pass
+
+ @property
+ @abstractmethod
+ def dockerfile(self) -> str: pass
+
+ @property
+ def host_name_prefix(self) -> str:
+ return 'box_hosts_'
+
+ @abstractmethod
+ def up(self, hosts: int): pass
+
+ def run_exec(self, container: HostContainer, command: str, expect_error: bool = False):
+ return run_shell_command(' '.join([self.command, 'exec', container.name, command]),
+ expect_error=expect_error)
+
+ def run(self, engine_command: str, expect_error: bool = False):
+ return run_shell_command(' '.join([self.command, engine_command]), expect_error=expect_error)
+
+ def get_containers(self) -> List[HostContainer]:
+ ps_out = json.loads(run_shell_command('podman ps --format json'))
+ containers = []
+ for container in ps_out:
+ if not container['Names']:
+ raise RuntimeError(f'Container {container} missing name')
+ name = container['Names'][0]
+ if name == self.seed_name:
+ containers.append(HostContainer(name, BoxType.SEED))
+ elif name.startswith(self.host_name_prefix):
+ containers.append(HostContainer(name, BoxType.HOST))
+ return containers
+
+ def get_seed(self) -> HostContainer:
+ for container in self.get_containers():
+ if container.type == BoxType.SEED:
+ return container
+ raise RuntimeError('Missing seed container')
+
+ def get_container(self, container_name: str):
+ containers = self.get_containers()
+ for container in containers:
+ if container.name == container_name:
+ return container
+ return None
+
+
+ def restart(self):
+ pass
+
+
+class DockerEngine(ContainerEngine):
+ command = 'docker'
+ seed_name = 'seed'
+ dockerfile = 'DockerfileDocker'
+
+ def restart(self):
+ run_shell_command('systemctl restart docker')
+
+ def up(self, hosts: int):
+ dcflags = f'-f {Config.get("docker_yaml")}'
+ if not os.path.exists('/sys/fs/cgroup/cgroup.controllers'):
+ dcflags += f' -f {Config.get("docker_v1_yaml")}'
+ run_shell_command(f'{engine_compose()} {dcflags} up --scale hosts={hosts} -d')
+
+class PodmanEngine(ContainerEngine):
+ command = 'podman'
+ seed_name = 'box_hosts_0'
+ dockerfile = 'DockerfilePodman'
+
+ CAPS = [
+ "SYS_ADMIN",
+ "NET_ADMIN",
+ "SYS_TIME",
+ "SYS_RAWIO",
+ "MKNOD",
+ "NET_RAW",
+ "SETUID",
+ "SETGID",
+ "CHOWN",
+ "SYS_PTRACE",
+ "SYS_TTY_CONFIG",
+ "CAP_AUDIT_WRITE",
+ "CAP_AUDIT_CONTROL",
+ ]
+
+ VOLUMES = [
+ '../../../:/ceph:z',
+ '../:/cephadm:z',
+ '/run/udev:/run/udev',
+ '/sys/dev/block:/sys/dev/block',
+ '/sys/fs/cgroup:/sys/fs/cgroup:ro',
+ '/dev/fuse:/dev/fuse',
+ '/dev/disk:/dev/disk',
+ '/sys/devices/virtual/block:/sys/devices/virtual/block',
+ '/sys/block:/dev/block',
+ '/dev/mapper:/dev/mapper',
+ '/dev/mapper/control:/dev/mapper/control',
+ ]
+
+ TMPFS = ['/run', '/tmp']
+
+ # FIXME: right now we are assuming every service will be exposed through the seed, but this is far
+ # from the truth. Services can be deployed on different hosts so we need a system to manage this.
+ SEED_PORTS = [
+ 8443, # dashboard
+ 3000, # grafana
+ 9093, # alertmanager
+ 9095 # prometheus
+ ]
+
+
+ def setup_podman_env(self, hosts: int = 1, osd_devs={}):
+ network_name = 'box_network'
+ networks = run_shell_command('podman network ls')
+ if network_name not in networks:
+ run_shell_command(f'podman network create -d bridge {network_name}')
+
+ args = [
+ '--group-add', 'keep-groups',
+ '--device', '/dev/fuse' ,
+ '-it' ,
+ '-d',
+ '-e', 'CEPH_BRANCH=main',
+ '--stop-signal', 'RTMIN+3'
+ ]
+
+ for cap in self.CAPS:
+ args.append('--cap-add')
+ args.append(cap)
+
+ for volume in self.VOLUMES:
+ args.append('-v')
+ args.append(volume)
+
+ for tmp in self.TMPFS:
+ args.append('--tmpfs')
+ args.append(tmp)
+
+
+ for osd_dev in osd_devs.values():
+ device = osd_dev["device"]
+ args.append('--device')
+ args.append(f'{device}:{device}')
+
+
+ for host in range(hosts+1): # 0 will be the seed
+ options = copy.copy(args)
+ options.append('--name')
+ options.append(f'box_hosts_{host}')
+ options.append('--network')
+ options.append(f'{network_name}')
+ if host == 0:
+ for port in self.SEED_PORTS:
+ options.append('-p')
+ options.append(f'{port}:{port}')
+
+ options.append('cephadm-box')
+ options = ' '.join(options)
+
+ run_shell_command(f'podman run {options}')
+
+ def up(self, hosts: int):
+ import osd
+ self.setup_podman_env(hosts=hosts, osd_devs=osd.load_osd_devices())
+
+def get_container_engine() -> ContainerEngine:
+ if engine() == 'docker':
+ return DockerEngine()
+ else:
+ return PodmanEngine()
diff --git a/src/cephadm/build.py b/src/cephadm/build.py
new file mode 100755
index 000000000..4264b814f
--- /dev/null
+++ b/src/cephadm/build.py
@@ -0,0 +1,204 @@
+#!/usr/bin/python3
+"""Build cephadm from one or more files into a standalone executable.
+"""
+# TODO: If cephadm is being built and packaged within a format such as RPM
+# do we have to do anything special wrt passing in the version
+# of python to build with? Even with the intermediate cmake layer?
+
+import argparse
+import compileall
+import logging
+import os
+import pathlib
+import shutil
+import subprocess
+import tempfile
+import sys
+
+HAS_ZIPAPP = False
+try:
+ import zipapp
+
+ HAS_ZIPAPP = True
+except ImportError:
+ pass
+
+
+log = logging.getLogger(__name__)
+
+
+_VALID_VERS_VARS = [
+ "CEPH_GIT_VER",
+ "CEPH_GIT_NICE_VER",
+ "CEPH_RELEASE",
+ "CEPH_RELEASE_NAME",
+ "CEPH_RELEASE_TYPE",
+]
+
+
+def _reexec(python):
+ """Switch to the selected version of python by exec'ing into the desired
+ python path.
+ Sets the _BUILD_PYTHON_SET env variable as a sentinel to indicate exec has
+ been performed.
+ """
+ env = os.environ.copy()
+ env["_BUILD_PYTHON_SET"] = python
+ os.execvpe(python, [python, __file__] + sys.argv[1:], env)
+
+
+def _did_rexec():
+ """Returns true if the process has already exec'ed into the desired python
+ version.
+ """
+ return bool(os.environ.get("_BUILD_PYTHON_SET", ""))
+
+
+def _build(dest, src, versioning_vars=None):
+ """Build the binary."""
+ os.chdir(src)
+ tempdir = pathlib.Path(tempfile.mkdtemp(suffix=".cephadm.build"))
+ log.debug("working in %s", tempdir)
+ try:
+ if os.path.isfile("requirements.txt"):
+ _install_deps(tempdir)
+ log.info("Copying contents")
+ # TODO: currently the only file relevant to a compiled cephadm is the
+ # cephadm.py file. Once cephadm is broken up into multiple py files
+ # (and possibly other libs from python-common, etc) we'll want some
+ # sort organized structure to track what gets copied into the
+ # dir to be zipped. For now we just have a simple call to copy
+ # (and rename) the one file we care about.
+ shutil.copy("cephadm.py", tempdir / "__main__.py")
+ if versioning_vars:
+ generate_version_file(versioning_vars, tempdir / "_version.py")
+ _compile(dest, tempdir)
+ finally:
+ shutil.rmtree(tempdir)
+
+
+def _compile(dest, tempdir):
+ """Compile the zipapp."""
+ log.info("Byte-compiling py to pyc")
+ compileall.compile_dir(
+ tempdir,
+ maxlevels=16,
+ legacy=True,
+ quiet=1,
+ workers=0,
+ )
+ # TODO we could explicitly pass a python version here
+ log.info("Constructing the zipapp file")
+ try:
+ zipapp.create_archive(
+ source=tempdir,
+ target=dest,
+ interpreter=sys.executable,
+ compressed=True,
+ )
+ log.info("Zipapp created with compression")
+ except TypeError:
+ # automatically fall back to uncompressed
+ zipapp.create_archive(
+ source=tempdir,
+ target=dest,
+ interpreter=sys.executable,
+ )
+ log.info("Zipapp created without compression")
+
+
+def _install_deps(tempdir):
+ """Install dependencies with pip."""
+ # TODO we could explicitly pass a python version here
+ log.info("Installing dependencies")
+ # apparently pip doesn't have an API, just a cli.
+ subprocess.check_call(
+ [
+ sys.executable,
+ "-m",
+ "pip",
+ "install",
+ "--requirement",
+ "requirements.txt",
+ "--target",
+ tempdir,
+ ]
+ )
+
+
+def generate_version_file(versioning_vars, dest):
+ log.info("Generating version file")
+ log.debug("versioning_vars=%r", versioning_vars)
+ with open(dest, "w") as fh:
+ print("# GENERATED FILE -- do not edit", file=fh)
+ for key, value in versioning_vars:
+ print(f"{key} = {value!r}", file=fh)
+
+
+def version_kv_pair(value):
+ if "=" not in value:
+ raise argparse.ArgumentTypeError(f"not a key=value pair: {value!r}")
+ key, value = value.split("=", 1)
+ if key not in _VALID_VERS_VARS:
+ raise argparse.ArgumentTypeError(f"Unexpected key: {key!r}")
+ return key, value
+
+
+def main():
+ handler = logging.StreamHandler(sys.stdout)
+ handler.setFormatter(logging.Formatter("cephadm/build.py: %(message)s"))
+ log.addHandler(handler)
+ log.setLevel(logging.INFO)
+
+ log.debug("argv: %r", sys.argv)
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "dest", help="Destination path name for new cephadm binary"
+ )
+ parser.add_argument(
+ "--source", help="Directory containing cephadm sources"
+ )
+ parser.add_argument(
+ "--python", help="The path to the desired version of python"
+ )
+ parser.add_argument(
+ "--set-version-var",
+ "-S",
+ type=version_kv_pair,
+ dest="version_vars",
+ action="append",
+ help="Set a key=value pair in the generated version info file",
+ )
+ args = parser.parse_args()
+
+ if not _did_rexec() and args.python:
+ _reexec(args.python)
+
+ log.info(
+ "Python Version: {v.major}.{v.minor}.{v.micro}".format(
+ v=sys.version_info
+ )
+ )
+ log.info("Args: %s", vars(args))
+ if not HAS_ZIPAPP:
+ # Unconditionally display an error that the version of python
+ # lacks zipapp (probably too old).
+ print("error: zipapp module not found", file=sys.stderr)
+ print(
+ "(zipapp is available in Python 3.5 or later."
+ " are you using a new enough version?)",
+ file=sys.stderr,
+ )
+ sys.exit(2)
+ if args.source:
+ source = pathlib.Path(args.source).absolute()
+ else:
+ source = pathlib.Path(__file__).absolute().parent
+ dest = pathlib.Path(args.dest).absolute()
+ log.info("Source Dir: %s", source)
+ log.info("Destination Path: %s", dest)
+ _build(dest, source, versioning_vars=args.version_vars)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/src/cephadm/build.sh b/src/cephadm/build.sh
new file mode 100755
index 000000000..84b58f14f
--- /dev/null
+++ b/src/cephadm/build.sh
@@ -0,0 +1,5 @@
+#!/bin/bash -ex
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+exec python3 $SCRIPT_DIR/build.py "$@"
diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py
new file mode 100755
index 000000000..bcb82c4c4
--- /dev/null
+++ b/src/cephadm/cephadm.py
@@ -0,0 +1,10700 @@
+#!/usr/bin/python3
+
+import asyncio
+import asyncio.subprocess
+import argparse
+import datetime
+import fcntl
+import ipaddress
+import io
+import json
+import logging
+from logging.config import dictConfig
+import os
+import platform
+import pwd
+import random
+import shlex
+import shutil
+import socket
+import string
+import subprocess
+import sys
+import tempfile
+import time
+import errno
+import struct
+import ssl
+from enum import Enum
+from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO, Sequence, TypeVar, cast, Set, Iterable, TextIO, Generator
+
+import re
+import uuid
+
+from configparser import ConfigParser
+from contextlib import redirect_stdout, contextmanager
+from functools import wraps
+from glob import glob
+from io import StringIO
+from threading import Thread, Event
+from urllib.error import HTTPError, URLError
+from urllib.request import urlopen, Request
+from pathlib import Path
+
+FuncT = TypeVar('FuncT', bound=Callable)
+
+# Default container images -----------------------------------------------------
+DEFAULT_IMAGE = 'quay.io/ceph/ceph:v18'
+DEFAULT_IMAGE_IS_MAIN = False
+DEFAULT_IMAGE_RELEASE = 'reef'
+DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.43.0'
+DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0'
+DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0'
+DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.5.0'
+DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.25.0'
+DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:9.4.7'
+DEFAULT_HAPROXY_IMAGE = 'quay.io/ceph/haproxy:2.3'
+DEFAULT_KEEPALIVED_IMAGE = 'quay.io/ceph/keepalived:2.2.4'
+DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:0.0.1'
+DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1'
+DEFAULT_ELASTICSEARCH_IMAGE = 'quay.io/omrizeneva/elasticsearch:6.8.23'
+DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29'
+DEFAULT_JAEGER_AGENT_IMAGE = 'quay.io/jaegertracing/jaeger-agent:1.29'
+DEFAULT_JAEGER_QUERY_IMAGE = 'quay.io/jaegertracing/jaeger-query:1.29'
+DEFAULT_REGISTRY = 'docker.io' # normalize unqualified digests to this
+# ------------------------------------------------------------------------------
+
+LATEST_STABLE_RELEASE = 'reef'
+DATA_DIR = '/var/lib/ceph'
+LOG_DIR = '/var/log/ceph'
+LOCK_DIR = '/run/cephadm'
+LOGROTATE_DIR = '/etc/logrotate.d'
+SYSCTL_DIR = '/etc/sysctl.d'
+UNIT_DIR = '/etc/systemd/system'
+CEPH_CONF_DIR = 'config'
+CEPH_CONF = 'ceph.conf'
+CEPH_PUBKEY = 'ceph.pub'
+CEPH_KEYRING = 'ceph.client.admin.keyring'
+CEPH_DEFAULT_CONF = f'/etc/ceph/{CEPH_CONF}'
+CEPH_DEFAULT_KEYRING = f'/etc/ceph/{CEPH_KEYRING}'
+CEPH_DEFAULT_PUBKEY = f'/etc/ceph/{CEPH_PUBKEY}'
+LOG_DIR_MODE = 0o770
+DATA_DIR_MODE = 0o700
+DEFAULT_MODE = 0o600
+CONTAINER_INIT = True
+MIN_PODMAN_VERSION = (2, 0, 2)
+CGROUPS_SPLIT_PODMAN_VERSION = (2, 1, 0)
+PIDS_LIMIT_UNLIMITED_PODMAN_VERSION = (3, 4, 1)
+CUSTOM_PS1 = r'[ceph: \u@\h \W]\$ '
+DEFAULT_TIMEOUT = None # in seconds
+DEFAULT_RETRY = 15
+DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ'
+QUIET_LOG_LEVEL = 9 # DEBUG is 10, so using 9 to be lower level than DEBUG
+NO_DEPRECATED = False
+
+logger: logging.Logger = None # type: ignore
+
+"""
+You can invoke cephadm in two ways:
+
+1. The normal way, at the command line.
+
+2. By piping the script to the python3 binary. In this latter case, you should
+ prepend one or more lines to the beginning of the script.
+
+ For arguments,
+
+ injected_argv = [...]
+
+ e.g.,
+
+ injected_argv = ['ls']
+
+ For reading stdin from the '--config-json -' argument,
+
+ injected_stdin = '...'
+"""
+cached_stdin = None
+
+
+##################################
+
+
+async def run_func(func: Callable, cmd: str) -> subprocess.CompletedProcess:
+ logger.debug(f'running function {func.__name__}, with parms: {cmd}')
+ response = func(cmd)
+ return response
+
+
+async def concurrent_tasks(func: Callable, cmd_list: List[str]) -> List[Any]:
+ tasks = []
+ for cmd in cmd_list:
+ tasks.append(run_func(func, cmd))
+
+ data = await asyncio.gather(*tasks)
+
+ return data
+
+
+class EndPoint:
+ """EndPoint representing an ip:port format"""
+
+ def __init__(self, ip: str, port: int) -> None:
+ self.ip = ip
+ self.port = port
+
+ def __str__(self) -> str:
+ return f'{self.ip}:{self.port}'
+
+ def __repr__(self) -> str:
+ return f'{self.ip}:{self.port}'
+
+
+class ContainerInfo:
+ def __init__(self, container_id: str,
+ image_name: str,
+ image_id: str,
+ start: str,
+ version: str) -> None:
+ self.container_id = container_id
+ self.image_name = image_name
+ self.image_id = image_id
+ self.start = start
+ self.version = version
+
+ def __eq__(self, other: Any) -> bool:
+ if not isinstance(other, ContainerInfo):
+ return NotImplemented
+ return (self.container_id == other.container_id
+ and self.image_name == other.image_name
+ and self.image_id == other.image_id
+ and self.start == other.start
+ and self.version == other.version)
+
+
+class DeploymentType(Enum):
+ # Fresh deployment of a daemon.
+ DEFAULT = 'Deploy'
+ # Redeploying a daemon. Works the same as fresh
+ # deployment minus port checking.
+ REDEPLOY = 'Redeploy'
+ # Reconfiguring a daemon. Rewrites config
+ # files and potentially restarts daemon.
+ RECONFIG = 'Reconfig'
+
+
+class BaseConfig:
+
+ def __init__(self) -> None:
+ self.image: str = ''
+ self.docker: bool = False
+ self.data_dir: str = DATA_DIR
+ self.log_dir: str = LOG_DIR
+ self.logrotate_dir: str = LOGROTATE_DIR
+ self.sysctl_dir: str = SYSCTL_DIR
+ self.unit_dir: str = UNIT_DIR
+ self.verbose: bool = False
+ self.timeout: Optional[int] = DEFAULT_TIMEOUT
+ self.retry: int = DEFAULT_RETRY
+ self.env: List[str] = []
+ self.memory_request: Optional[int] = None
+ self.memory_limit: Optional[int] = None
+ self.log_to_journald: Optional[bool] = None
+
+ self.container_init: bool = CONTAINER_INIT
+ self.container_engine: Optional[ContainerEngine] = None
+
+ def set_from_args(self, args: argparse.Namespace) -> None:
+ argdict: Dict[str, Any] = vars(args)
+ for k, v in argdict.items():
+ if hasattr(self, k):
+ setattr(self, k, v)
+
+
+class CephadmContext:
+
+ def __init__(self) -> None:
+ self.__dict__['_args'] = None
+ self.__dict__['_conf'] = BaseConfig()
+
+ def set_args(self, args: argparse.Namespace) -> None:
+ self._conf.set_from_args(args)
+ self._args = args
+
+ def has_function(self) -> bool:
+ return 'func' in self._args
+
+ def __contains__(self, name: str) -> bool:
+ return hasattr(self, name)
+
+ def __getattr__(self, name: str) -> Any:
+ if '_conf' in self.__dict__ and hasattr(self._conf, name):
+ return getattr(self._conf, name)
+ elif '_args' in self.__dict__ and hasattr(self._args, name):
+ return getattr(self._args, name)
+ else:
+ return super().__getattribute__(name)
+
+ def __setattr__(self, name: str, value: Any) -> None:
+ if hasattr(self._conf, name):
+ setattr(self._conf, name, value)
+ elif hasattr(self._args, name):
+ setattr(self._args, name, value)
+ else:
+ super().__setattr__(name, value)
+
+
+class ContainerEngine:
+ def __init__(self) -> None:
+ self.path = find_program(self.EXE)
+
+ @property
+ def EXE(self) -> str:
+ raise NotImplementedError()
+
+ def __str__(self) -> str:
+ return f'{self.EXE} ({self.path})'
+
+
+class Podman(ContainerEngine):
+ EXE = 'podman'
+
+ def __init__(self) -> None:
+ super().__init__()
+ self._version: Optional[Tuple[int, ...]] = None
+
+ @property
+ def version(self) -> Tuple[int, ...]:
+ if self._version is None:
+ raise RuntimeError('Please call `get_version` first')
+ return self._version
+
+ def get_version(self, ctx: CephadmContext) -> None:
+ out, _, _ = call_throws(ctx, [self.path, 'version', '--format', '{{.Client.Version}}'], verbosity=CallVerbosity.QUIET)
+ self._version = _parse_podman_version(out)
+
+ def __str__(self) -> str:
+ version = '.'.join(map(str, self.version))
+ return f'{self.EXE} ({self.path}) version {version}'
+
+
+class Docker(ContainerEngine):
+ EXE = 'docker'
+
+
+CONTAINER_PREFERENCE = (Podman, Docker) # prefer podman to docker
+
+
+# During normal cephadm operations (cephadm ls, gather-facts, etc ) we use:
+# stdout: for JSON output only
+# stderr: for error, debug, info, etc
+logging_config = {
+ 'version': 1,
+ 'disable_existing_loggers': True,
+ 'formatters': {
+ 'cephadm': {
+ 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s'
+ },
+ },
+ 'handlers': {
+ 'console': {
+ 'level': 'INFO',
+ 'class': 'logging.StreamHandler',
+ },
+ 'log_file': {
+ 'level': 'DEBUG',
+ 'class': 'logging.handlers.WatchedFileHandler',
+ 'formatter': 'cephadm',
+ 'filename': '%s/cephadm.log' % LOG_DIR,
+ }
+ },
+ 'loggers': {
+ '': {
+ 'level': 'DEBUG',
+ 'handlers': ['console', 'log_file'],
+ }
+ }
+}
+
+
+class ExcludeErrorsFilter(logging.Filter):
+ def filter(self, record: logging.LogRecord) -> bool:
+ """Only lets through log messages with log level below WARNING ."""
+ return record.levelno < logging.WARNING
+
+
+# When cephadm is used as standard binary (bootstrap, rm-cluster, etc) we use:
+# stdout: for debug and info
+# stderr: for errors and warnings
+interactive_logging_config = {
+ 'version': 1,
+ 'filters': {
+ 'exclude_errors': {
+ '()': ExcludeErrorsFilter
+ }
+ },
+ 'disable_existing_loggers': True,
+ 'formatters': {
+ 'cephadm': {
+ 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s'
+ },
+ },
+ 'handlers': {
+ 'console_stdout': {
+ 'level': 'INFO',
+ 'class': 'logging.StreamHandler',
+ 'filters': ['exclude_errors'],
+ 'stream': sys.stdout
+ },
+ 'console_stderr': {
+ 'level': 'WARNING',
+ 'class': 'logging.StreamHandler',
+ 'stream': sys.stderr
+ },
+ 'log_file': {
+ 'level': 'DEBUG',
+ 'class': 'logging.handlers.WatchedFileHandler',
+ 'formatter': 'cephadm',
+ 'filename': '%s/cephadm.log' % LOG_DIR,
+ }
+ },
+ 'loggers': {
+ '': {
+ 'level': 'DEBUG',
+ 'handlers': ['console_stdout', 'console_stderr', 'log_file'],
+ }
+ }
+}
+
+
+class termcolor:
+ yellow = '\033[93m'
+ red = '\033[31m'
+ end = '\033[0m'
+
+
+class Error(Exception):
+ pass
+
+
+class ClusterAlreadyExists(Exception):
+ pass
+
+
+class TimeoutExpired(Error):
+ pass
+
+
+class UnauthorizedRegistryError(Error):
+ pass
+
+##################################
+
+
+class Ceph(object):
+ daemons = ('mon', 'mgr', 'osd', 'mds', 'rgw', 'rbd-mirror',
+ 'crash', 'cephfs-mirror', 'ceph-exporter')
+ gateways = ('iscsi', 'nfs', 'nvmeof')
+
+##################################
+
+
+class OSD(object):
+ @staticmethod
+ def get_sysctl_settings() -> List[str]:
+ return [
+ '# allow a large number of OSDs',
+ 'fs.aio-max-nr = 1048576',
+ 'kernel.pid_max = 4194304',
+ ]
+
+
+##################################
+
+
+class SNMPGateway:
+ """Defines an SNMP gateway between Prometheus and SNMP monitoring Frameworks"""
+ daemon_type = 'snmp-gateway'
+ SUPPORTED_VERSIONS = ['V2c', 'V3']
+ default_image = DEFAULT_SNMP_GATEWAY_IMAGE
+ DEFAULT_PORT = 9464
+ env_filename = 'snmp-gateway.conf'
+
+ def __init__(self,
+ ctx: CephadmContext,
+ fsid: str,
+ daemon_id: Union[int, str],
+ config_json: Dict[str, Any],
+ image: Optional[str] = None) -> None:
+ self.ctx = ctx
+ self.fsid = fsid
+ self.daemon_id = daemon_id
+ self.image = image or SNMPGateway.default_image
+
+ self.uid = config_json.get('uid', 0)
+ self.gid = config_json.get('gid', 0)
+
+ self.destination = config_json.get('destination', '')
+ self.snmp_version = config_json.get('snmp_version', 'V2c')
+ self.snmp_community = config_json.get('snmp_community', 'public')
+ self.log_level = config_json.get('log_level', 'info')
+ self.snmp_v3_auth_username = config_json.get('snmp_v3_auth_username', '')
+ self.snmp_v3_auth_password = config_json.get('snmp_v3_auth_password', '')
+ self.snmp_v3_auth_protocol = config_json.get('snmp_v3_auth_protocol', '')
+ self.snmp_v3_priv_protocol = config_json.get('snmp_v3_priv_protocol', '')
+ self.snmp_v3_priv_password = config_json.get('snmp_v3_priv_password', '')
+ self.snmp_v3_engine_id = config_json.get('snmp_v3_engine_id', '')
+
+ self.validate()
+
+ @classmethod
+ def init(cls, ctx: CephadmContext, fsid: str,
+ daemon_id: Union[int, str]) -> 'SNMPGateway':
+ cfgs = fetch_configs(ctx)
+ assert cfgs # assert some config data was found
+ return cls(ctx, fsid, daemon_id, cfgs, ctx.image)
+
+ @staticmethod
+ def get_version(ctx: CephadmContext, fsid: str, daemon_id: str) -> Optional[str]:
+ """Return the version of the notifier from it's http endpoint"""
+ path = os.path.join(ctx.data_dir, fsid, f'snmp-gateway.{daemon_id}', 'unit.meta')
+ try:
+ with open(path, 'r') as env:
+ metadata = json.loads(env.read())
+ except (OSError, json.JSONDecodeError):
+ return None
+
+ ports = metadata.get('ports', [])
+ if not ports:
+ return None
+
+ try:
+ with urlopen(f'http://127.0.0.1:{ports[0]}/') as r:
+ html = r.read().decode('utf-8').split('\n')
+ except (HTTPError, URLError):
+ return None
+
+ for h in html:
+ stripped = h.strip()
+ if stripped.startswith(('<pre>', '<PRE>')) and \
+ stripped.endswith(('</pre>', '</PRE>')):
+ # <pre>(version=1.2.1, branch=HEAD, revision=7...
+ return stripped.split(',')[0].split('version=')[1]
+
+ return None
+
+ @property
+ def port(self) -> int:
+ endpoints = fetch_tcp_ports(self.ctx)
+ if not endpoints:
+ return self.DEFAULT_PORT
+ return endpoints[0].port
+
+ def get_daemon_args(self) -> List[str]:
+ v3_args = []
+ base_args = [
+ f'--web.listen-address=:{self.port}',
+ f'--snmp.destination={self.destination}',
+ f'--snmp.version={self.snmp_version}',
+ f'--log.level={self.log_level}',
+ '--snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl'
+ ]
+
+ if self.snmp_version == 'V3':
+ # common auth settings
+ v3_args.extend([
+ '--snmp.authentication-enabled',
+ f'--snmp.authentication-protocol={self.snmp_v3_auth_protocol}',
+ f'--snmp.security-engine-id={self.snmp_v3_engine_id}'
+ ])
+ # authPriv setting is applied if we have a privacy protocol setting
+ if self.snmp_v3_priv_protocol:
+ v3_args.extend([
+ '--snmp.private-enabled',
+ f'--snmp.private-protocol={self.snmp_v3_priv_protocol}'
+ ])
+
+ return base_args + v3_args
+
+ @property
+ def data_dir(self) -> str:
+ return os.path.join(self.ctx.data_dir, self.ctx.fsid, f'{self.daemon_type}.{self.daemon_id}')
+
+ @property
+ def conf_file_path(self) -> str:
+ return os.path.join(self.data_dir, self.env_filename)
+
+ def create_daemon_conf(self) -> None:
+ """Creates the environment file holding 'secrets' passed to the snmp-notifier daemon"""
+ with write_new(self.conf_file_path) as f:
+ if self.snmp_version == 'V2c':
+ f.write(f'SNMP_NOTIFIER_COMMUNITY={self.snmp_community}\n')
+ else:
+ f.write(f'SNMP_NOTIFIER_AUTH_USERNAME={self.snmp_v3_auth_username}\n')
+ f.write(f'SNMP_NOTIFIER_AUTH_PASSWORD={self.snmp_v3_auth_password}\n')
+ if self.snmp_v3_priv_password:
+ f.write(f'SNMP_NOTIFIER_PRIV_PASSWORD={self.snmp_v3_priv_password}\n')
+
+ def validate(self) -> None:
+ """Validate the settings
+
+ Raises:
+ Error: if the fsid doesn't look like an fsid
+ Error: if the snmp version is not supported
+ Error: destination IP and port address missing
+ """
+ if not is_fsid(self.fsid):
+ raise Error(f'not a valid fsid: {self.fsid}')
+
+ if self.snmp_version not in SNMPGateway.SUPPORTED_VERSIONS:
+ raise Error(f'not a valid snmp version: {self.snmp_version}')
+
+ if not self.destination:
+ raise Error('config is missing destination attribute(<ip>:<port>) of the target SNMP listener')
+
+
+##################################
+class Monitoring(object):
+ """Define the configs for the monitoring containers"""
+
+ port_map = {
+ 'prometheus': [9095], # Avoid default 9090, due to conflict with cockpit UI
+ 'node-exporter': [9100],
+ 'grafana': [3000],
+ 'alertmanager': [9093, 9094],
+ 'loki': [3100],
+ 'promtail': [9080]
+ }
+
+ components = {
+ 'prometheus': {
+ 'image': DEFAULT_PROMETHEUS_IMAGE,
+ 'cpus': '2',
+ 'memory': '4GB',
+ 'args': [
+ '--config.file=/etc/prometheus/prometheus.yml',
+ '--storage.tsdb.path=/prometheus',
+ ],
+ 'config-json-files': [
+ 'prometheus.yml',
+ ],
+ },
+ 'loki': {
+ 'image': DEFAULT_LOKI_IMAGE,
+ 'cpus': '1',
+ 'memory': '1GB',
+ 'args': [
+ '--config.file=/etc/loki/loki.yml',
+ ],
+ 'config-json-files': [
+ 'loki.yml'
+ ],
+ },
+ 'promtail': {
+ 'image': DEFAULT_PROMTAIL_IMAGE,
+ 'cpus': '1',
+ 'memory': '1GB',
+ 'args': [
+ '--config.file=/etc/promtail/promtail.yml',
+ ],
+ 'config-json-files': [
+ 'promtail.yml',
+ ],
+ },
+ 'node-exporter': {
+ 'image': DEFAULT_NODE_EXPORTER_IMAGE,
+ 'cpus': '1',
+ 'memory': '1GB',
+ 'args': [
+ '--no-collector.timex'
+ ],
+ },
+ 'grafana': {
+ 'image': DEFAULT_GRAFANA_IMAGE,
+ 'cpus': '2',
+ 'memory': '4GB',
+ 'args': [],
+ 'config-json-files': [
+ 'grafana.ini',
+ 'provisioning/datasources/ceph-dashboard.yml',
+ 'certs/cert_file',
+ 'certs/cert_key',
+ ],
+ },
+ 'alertmanager': {
+ 'image': DEFAULT_ALERT_MANAGER_IMAGE,
+ 'cpus': '2',
+ 'memory': '2GB',
+ 'args': [
+ '--cluster.listen-address=:{}'.format(port_map['alertmanager'][1]),
+ ],
+ 'config-json-files': [
+ 'alertmanager.yml',
+ ],
+ 'config-json-args': [
+ 'peers',
+ ],
+ },
+ } # type: ignore
+
+ @staticmethod
+ def get_version(ctx, container_id, daemon_type):
+ # type: (CephadmContext, str, str) -> str
+ """
+ :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter"
+ """
+ assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter', 'loki', 'promtail')
+ cmd = daemon_type.replace('-', '_')
+ code = -1
+ err = ''
+ out = ''
+ version = ''
+ if daemon_type == 'alertmanager':
+ for cmd in ['alertmanager', 'prometheus-alertmanager']:
+ out, err, code = call(ctx, [
+ ctx.container_engine.path, 'exec', container_id, cmd,
+ '--version'
+ ], verbosity=CallVerbosity.QUIET)
+ if code == 0:
+ break
+ cmd = 'alertmanager' # reset cmd for version extraction
+ else:
+ out, err, code = call(ctx, [
+ ctx.container_engine.path, 'exec', container_id, cmd, '--version'
+ ], verbosity=CallVerbosity.QUIET)
+ if code == 0:
+ if err.startswith('%s, version ' % cmd):
+ version = err.split(' ')[2]
+ elif out.startswith('%s, version ' % cmd):
+ version = out.split(' ')[2]
+ return version
+
+##################################
+
+
+@contextmanager
+def write_new(
+ destination: Union[str, Path],
+ *,
+ owner: Optional[Tuple[int, int]] = None,
+ perms: Optional[int] = DEFAULT_MODE,
+ encoding: Optional[str] = None,
+) -> Generator[IO, None, None]:
+ """Write a new file in a robust manner, optionally specifying the owner,
+ permissions, or encoding. This function takes care to never leave a file in
+ a partially-written state due to a crash or power outage by writing to
+ temporary file and then renaming that temp file over to the final
+ destination once all data is written. Note that the temporary files can be
+ leaked but only for a "crash" or power outage - regular exceptions will
+ clean up the temporary file.
+ """
+ destination = os.path.abspath(destination)
+ tempname = f'{destination}.new'
+ open_kwargs: Dict[str, Any] = {}
+ if encoding:
+ open_kwargs['encoding'] = encoding
+ try:
+ with open(tempname, 'w', **open_kwargs) as fh:
+ yield fh
+ fh.flush()
+ os.fsync(fh.fileno())
+ if owner is not None:
+ os.fchown(fh.fileno(), *owner)
+ if perms is not None:
+ os.fchmod(fh.fileno(), perms)
+ except Exception:
+ os.unlink(tempname)
+ raise
+ os.rename(tempname, destination)
+
+
+def populate_files(config_dir, config_files, uid, gid):
+ # type: (str, Dict, int, int) -> None
+ """create config files for different services"""
+ for fname in config_files:
+ config_file = os.path.join(config_dir, fname)
+ config_content = dict_get_join(config_files, fname)
+ logger.info('Write file: %s' % (config_file))
+ with write_new(config_file, owner=(uid, gid), encoding='utf-8') as f:
+ f.write(config_content)
+
+
+class NFSGanesha(object):
+ """Defines a NFS-Ganesha container"""
+
+ daemon_type = 'nfs'
+ entrypoint = '/usr/bin/ganesha.nfsd'
+ daemon_args = ['-F', '-L', 'STDERR']
+
+ required_files = ['ganesha.conf']
+
+ port_map = {
+ 'nfs': 2049,
+ }
+
+ def __init__(self,
+ ctx,
+ fsid,
+ daemon_id,
+ config_json,
+ image=DEFAULT_IMAGE):
+ # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
+ self.ctx = ctx
+ self.fsid = fsid
+ self.daemon_id = daemon_id
+ self.image = image
+
+ # config-json options
+ self.pool = dict_get(config_json, 'pool', require=True)
+ self.namespace = dict_get(config_json, 'namespace')
+ self.userid = dict_get(config_json, 'userid')
+ self.extra_args = dict_get(config_json, 'extra_args', [])
+ self.files = dict_get(config_json, 'files', {})
+ self.rgw = dict_get(config_json, 'rgw', {})
+
+ # validate the supplied args
+ self.validate()
+
+ @classmethod
+ def init(cls, ctx, fsid, daemon_id):
+ # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha
+ return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
+
+ def get_container_mounts(self, data_dir):
+ # type: (str) -> Dict[str, str]
+ mounts = dict()
+ mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
+ mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
+ mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z'
+ if self.rgw:
+ cluster = self.rgw.get('cluster', 'ceph')
+ rgw_user = self.rgw.get('user', 'admin')
+ mounts[os.path.join(data_dir, 'keyring.rgw')] = \
+ '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user)
+ return mounts
+
+ @staticmethod
+ def get_container_envs():
+ # type: () -> List[str]
+ envs = [
+ 'CEPH_CONF=%s' % (CEPH_DEFAULT_CONF)
+ ]
+ return envs
+
+ @staticmethod
+ def get_version(ctx, container_id):
+ # type: (CephadmContext, str) -> Optional[str]
+ version = None
+ out, err, code = call(ctx,
+ [ctx.container_engine.path, 'exec', container_id,
+ NFSGanesha.entrypoint, '-v'],
+ verbosity=CallVerbosity.QUIET)
+ if code == 0:
+ match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out)
+ if match:
+ version = match.group(1)
+ return version
+
+ def validate(self):
+ # type: () -> None
+ if not is_fsid(self.fsid):
+ raise Error('not an fsid: %s' % self.fsid)
+ if not self.daemon_id:
+ raise Error('invalid daemon_id: %s' % self.daemon_id)
+ if not self.image:
+ raise Error('invalid image: %s' % self.image)
+
+ # check for the required files
+ if self.required_files:
+ for fname in self.required_files:
+ if fname not in self.files:
+ raise Error('required file missing from config-json: %s' % fname)
+
+ # check for an RGW config
+ if self.rgw:
+ if not self.rgw.get('keyring'):
+ raise Error('RGW keyring is missing')
+ if not self.rgw.get('user'):
+ raise Error('RGW user is missing')
+
+ def get_daemon_name(self):
+ # type: () -> str
+ return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+ def get_container_name(self, desc=None):
+ # type: (Optional[str]) -> str
+ cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
+ if desc:
+ cname = '%s-%s' % (cname, desc)
+ return cname
+
+ def get_daemon_args(self):
+ # type: () -> List[str]
+ return self.daemon_args + self.extra_args
+
+ def create_daemon_dirs(self, data_dir, uid, gid):
+ # type: (str, int, int) -> None
+ """Create files under the container data dir"""
+ if not os.path.isdir(data_dir):
+ raise OSError('data_dir is not a directory: %s' % (data_dir))
+
+ logger.info('Creating ganesha config...')
+
+ # create the ganesha conf dir
+ config_dir = os.path.join(data_dir, 'etc/ganesha')
+ makedirs(config_dir, uid, gid, 0o755)
+
+ # populate files from the config-json
+ populate_files(config_dir, self.files, uid, gid)
+
+ # write the RGW keyring
+ if self.rgw:
+ keyring_path = os.path.join(data_dir, 'keyring.rgw')
+ with write_new(keyring_path, owner=(uid, gid)) as f:
+ f.write(self.rgw.get('keyring', ''))
+
+##################################
+
+
+class CephIscsi(object):
+ """Defines a Ceph-Iscsi container"""
+
+ daemon_type = 'iscsi'
+ entrypoint = '/usr/bin/rbd-target-api'
+
+ required_files = ['iscsi-gateway.cfg']
+
+ def __init__(self,
+ ctx,
+ fsid,
+ daemon_id,
+ config_json,
+ image=DEFAULT_IMAGE):
+ # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
+ self.ctx = ctx
+ self.fsid = fsid
+ self.daemon_id = daemon_id
+ self.image = image
+
+ # config-json options
+ self.files = dict_get(config_json, 'files', {})
+
+ # validate the supplied args
+ self.validate()
+
+ @classmethod
+ def init(cls, ctx, fsid, daemon_id):
+ # type: (CephadmContext, str, Union[int, str]) -> CephIscsi
+ return cls(ctx, fsid, daemon_id,
+ fetch_configs(ctx), ctx.image)
+
+ @staticmethod
+ def get_container_mounts(data_dir, log_dir):
+ # type: (str, str) -> Dict[str, str]
+ mounts = dict()
+ mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
+ mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
+ mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
+ mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
+ mounts[os.path.join(data_dir, 'tcmu-runner-entrypoint.sh')] = '/usr/local/scripts/tcmu-runner-entrypoint.sh'
+ mounts[log_dir] = '/var/log:z'
+ mounts['/dev'] = '/dev'
+ return mounts
+
+ @staticmethod
+ def get_container_binds():
+ # type: () -> List[List[str]]
+ binds = []
+ lib_modules = ['type=bind',
+ 'source=/lib/modules',
+ 'destination=/lib/modules',
+ 'ro=true']
+ binds.append(lib_modules)
+ return binds
+
+ @staticmethod
+ def get_version(ctx, container_id):
+ # type: (CephadmContext, str) -> Optional[str]
+ version = None
+ out, err, code = call(ctx,
+ [ctx.container_engine.path, 'exec', container_id,
+ '/usr/bin/python3', '-c',
+ "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"],
+ verbosity=CallVerbosity.QUIET)
+ if code == 0:
+ version = out.strip()
+ return version
+
+ def validate(self):
+ # type: () -> None
+ if not is_fsid(self.fsid):
+ raise Error('not an fsid: %s' % self.fsid)
+ if not self.daemon_id:
+ raise Error('invalid daemon_id: %s' % self.daemon_id)
+ if not self.image:
+ raise Error('invalid image: %s' % self.image)
+
+ # check for the required files
+ if self.required_files:
+ for fname in self.required_files:
+ if fname not in self.files:
+ raise Error('required file missing from config-json: %s' % fname)
+
+ def get_daemon_name(self):
+ # type: () -> str
+ return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+ def get_container_name(self, desc=None):
+ # type: (Optional[str]) -> str
+ cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
+ if desc:
+ cname = '%s-%s' % (cname, desc)
+ return cname
+
+ def create_daemon_dirs(self, data_dir, uid, gid):
+ # type: (str, int, int) -> None
+ """Create files under the container data dir"""
+ if not os.path.isdir(data_dir):
+ raise OSError('data_dir is not a directory: %s' % (data_dir))
+
+ logger.info('Creating ceph-iscsi config...')
+ configfs_dir = os.path.join(data_dir, 'configfs')
+ makedirs(configfs_dir, uid, gid, 0o755)
+
+ # set up the tcmu-runner entrypoint script
+ # to be mounted into the container. For more info
+ # on why we need this script, see the
+ # tcmu_runner_entrypoint_script function
+ self.files['tcmu-runner-entrypoint.sh'] = self.tcmu_runner_entrypoint_script()
+
+ # populate files from the config-json
+ populate_files(data_dir, self.files, uid, gid)
+
+ # we want the tcmu runner entrypoint script to be executable
+ # populate_files will give it 0o600 by default
+ os.chmod(os.path.join(data_dir, 'tcmu-runner-entrypoint.sh'), 0o700)
+
+ @staticmethod
+ def configfs_mount_umount(data_dir, mount=True):
+ # type: (str, bool) -> List[str]
+ mount_path = os.path.join(data_dir, 'configfs')
+ if mount:
+ cmd = 'if ! grep -qs {0} /proc/mounts; then ' \
+ 'mount -t configfs none {0}; fi'.format(mount_path)
+ else:
+ cmd = 'if grep -qs {0} /proc/mounts; then ' \
+ 'umount {0}; fi'.format(mount_path)
+ return cmd.split()
+
+ @staticmethod
+ def tcmu_runner_entrypoint_script() -> str:
+ # since we are having tcmu-runner be a background
+ # process in its systemd unit (rbd-target-api being
+ # the main process) systemd will not restart it when
+ # it fails. in order to try and get around that for now
+ # we can have a script mounted in the container that
+ # that attempts to do the restarting for us. This script
+ # can then become the entrypoint for the tcmu-runner
+ # container
+
+ # This is intended to be dropped for a better solution
+ # for at least the squid release onward
+ return """#!/bin/bash
+RUN_DIR=/var/run/tcmu-runner
+
+if [ ! -d "${RUN_DIR}" ] ; then
+ mkdir -p "${RUN_DIR}"
+fi
+
+rm -rf "${RUN_DIR}"/*
+
+while true
+do
+ touch "${RUN_DIR}"/start-up-$(date -Ins)
+ /usr/bin/tcmu-runner
+
+ # If we got around 3 kills/segfaults in the last minute,
+ # don't start anymore
+ if [ $(find "${RUN_DIR}" -type f -cmin -1 | wc -l) -ge 3 ] ; then
+ exit 0
+ fi
+
+ sleep 1
+done
+"""
+
+ def get_tcmu_runner_container(self):
+ # type: () -> CephContainer
+ # daemon_id, is used to generated the cid and pid files used by podman but as both tcmu-runner
+ # and rbd-target-api have the same daemon_id, it conflits and prevent the second container from
+ # starting. .tcmu runner is appended to the daemon_id to fix that.
+ tcmu_container = get_deployment_container(self.ctx, self.fsid, self.daemon_type, str(self.daemon_id) + '.tcmu')
+ # TODO: Eventually we don't want to run tcmu-runner through this script.
+ # This is intended to be a workaround backported to older releases
+ # and should eventually be removed in at least squid onward
+ tcmu_container.entrypoint = '/usr/local/scripts/tcmu-runner-entrypoint.sh'
+ tcmu_container.cname = self.get_container_name(desc='tcmu')
+ return tcmu_container
+
+
+##################################
+
+
+class CephNvmeof(object):
+ """Defines a Ceph-Nvmeof container"""
+
+ daemon_type = 'nvmeof'
+ required_files = ['ceph-nvmeof.conf']
+ default_image = DEFAULT_NVMEOF_IMAGE
+
+ def __init__(self,
+ ctx,
+ fsid,
+ daemon_id,
+ config_json,
+ image=DEFAULT_NVMEOF_IMAGE):
+ # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
+ self.ctx = ctx
+ self.fsid = fsid
+ self.daemon_id = daemon_id
+ self.image = image
+
+ # config-json options
+ self.files = dict_get(config_json, 'files', {})
+
+ # validate the supplied args
+ self.validate()
+
+ @classmethod
+ def init(cls, ctx, fsid, daemon_id):
+ # type: (CephadmContext, str, Union[int, str]) -> CephNvmeof
+ return cls(ctx, fsid, daemon_id,
+ fetch_configs(ctx), ctx.image)
+
+ @staticmethod
+ def get_container_mounts(data_dir: str) -> Dict[str, str]:
+ mounts = dict()
+ mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
+ mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
+ mounts[os.path.join(data_dir, 'ceph-nvmeof.conf')] = '/src/ceph-nvmeof.conf:z'
+ mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
+ mounts['/dev/hugepages'] = '/dev/hugepages'
+ mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio'
+ return mounts
+
+ @staticmethod
+ def get_container_binds():
+ # type: () -> List[List[str]]
+ binds = []
+ lib_modules = ['type=bind',
+ 'source=/lib/modules',
+ 'destination=/lib/modules',
+ 'ro=true']
+ binds.append(lib_modules)
+ return binds
+
+ @staticmethod
+ def get_version(ctx: CephadmContext, container_id: str) -> Optional[str]:
+ out, err, ret = call(ctx,
+ [ctx.container_engine.path, 'inspect',
+ '--format', '{{index .Config.Labels "io.ceph.version"}}',
+ ctx.image])
+ version = None
+ if ret == 0:
+ version = out.strip()
+ return version
+
+ def validate(self):
+ # type: () -> None
+ if not is_fsid(self.fsid):
+ raise Error('not an fsid: %s' % self.fsid)
+ if not self.daemon_id:
+ raise Error('invalid daemon_id: %s' % self.daemon_id)
+ if not self.image:
+ raise Error('invalid image: %s' % self.image)
+
+ # check for the required files
+ if self.required_files:
+ for fname in self.required_files:
+ if fname not in self.files:
+ raise Error('required file missing from config-json: %s' % fname)
+
+ def get_daemon_name(self):
+ # type: () -> str
+ return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+ def get_container_name(self, desc=None):
+ # type: (Optional[str]) -> str
+ cname = '%s-%s' % (self.fsid, self.get_daemon_name())
+ if desc:
+ cname = '%s-%s' % (cname, desc)
+ return cname
+
+ def create_daemon_dirs(self, data_dir, uid, gid):
+ # type: (str, int, int) -> None
+ """Create files under the container data dir"""
+ if not os.path.isdir(data_dir):
+ raise OSError('data_dir is not a directory: %s' % (data_dir))
+
+ logger.info('Creating ceph-nvmeof config...')
+ configfs_dir = os.path.join(data_dir, 'configfs')
+ makedirs(configfs_dir, uid, gid, 0o755)
+
+ # populate files from the config-json
+ populate_files(data_dir, self.files, uid, gid)
+
+ @staticmethod
+ def configfs_mount_umount(data_dir, mount=True):
+ # type: (str, bool) -> List[str]
+ mount_path = os.path.join(data_dir, 'configfs')
+ if mount:
+ cmd = 'if ! grep -qs {0} /proc/mounts; then ' \
+ 'mount -t configfs none {0}; fi'.format(mount_path)
+ else:
+ cmd = 'if grep -qs {0} /proc/mounts; then ' \
+ 'umount {0}; fi'.format(mount_path)
+ return cmd.split()
+
+ @staticmethod
+ def get_sysctl_settings() -> List[str]:
+ return [
+ 'vm.nr_hugepages = 4096',
+ ]
+
+
+##################################
+
+
+class CephExporter(object):
+ """Defines a Ceph exporter container"""
+
+ daemon_type = 'ceph-exporter'
+ entrypoint = '/usr/bin/ceph-exporter'
+ DEFAULT_PORT = 9926
+ port_map = {
+ 'ceph-exporter': DEFAULT_PORT,
+ }
+
+ def __init__(self,
+ ctx: CephadmContext,
+ fsid: str, daemon_id: Union[int, str],
+ config_json: Dict[str, Any],
+ image: str = DEFAULT_IMAGE) -> None:
+ self.ctx = ctx
+ self.fsid = fsid
+ self.daemon_id = daemon_id
+ self.image = image
+
+ self.sock_dir = config_json.get('sock-dir', '/var/run/ceph/')
+ ipv4_addrs, _ = get_ip_addresses(get_hostname())
+ addrs = '0.0.0.0' if ipv4_addrs else '::'
+ self.addrs = config_json.get('addrs', addrs)
+ self.port = config_json.get('port', self.DEFAULT_PORT)
+ self.prio_limit = config_json.get('prio-limit', 5)
+ self.stats_period = config_json.get('stats-period', 5)
+
+ self.validate()
+
+ @classmethod
+ def init(cls, ctx: CephadmContext, fsid: str,
+ daemon_id: Union[int, str]) -> 'CephExporter':
+ return cls(ctx, fsid, daemon_id,
+ fetch_configs(ctx), ctx.image)
+
+ @staticmethod
+ def get_container_mounts() -> Dict[str, str]:
+ mounts = dict()
+ mounts['/var/run/ceph'] = '/var/run/ceph:z'
+ return mounts
+
+ def get_daemon_args(self) -> List[str]:
+ args = [
+ f'--sock-dir={self.sock_dir}',
+ f'--addrs={self.addrs}',
+ f'--port={self.port}',
+ f'--prio-limit={self.prio_limit}',
+ f'--stats-period={self.stats_period}',
+ ]
+ return args
+
+ def validate(self) -> None:
+ if not os.path.isdir(self.sock_dir):
+ raise Error(f'Directory does not exist. Got: {self.sock_dir}')
+
+
+##################################
+
+
+class HAproxy(object):
+ """Defines an HAproxy container"""
+ daemon_type = 'haproxy'
+ required_files = ['haproxy.cfg']
+ default_image = DEFAULT_HAPROXY_IMAGE
+
+ def __init__(self,
+ ctx: CephadmContext,
+ fsid: str, daemon_id: Union[int, str],
+ config_json: Dict, image: str) -> None:
+ self.ctx = ctx
+ self.fsid = fsid
+ self.daemon_id = daemon_id
+ self.image = image
+
+ # config-json options
+ self.files = dict_get(config_json, 'files', {})
+
+ self.validate()
+
+ @classmethod
+ def init(cls, ctx: CephadmContext,
+ fsid: str, daemon_id: Union[int, str]) -> 'HAproxy':
+ return cls(ctx, fsid, daemon_id, fetch_configs(ctx),
+ ctx.image)
+
+ def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
+ """Create files under the container data dir"""
+ if not os.path.isdir(data_dir):
+ raise OSError('data_dir is not a directory: %s' % (data_dir))
+
+ # create additional directories in data dir for HAproxy to use
+ if not os.path.isdir(os.path.join(data_dir, 'haproxy')):
+ makedirs(os.path.join(data_dir, 'haproxy'), uid, gid, DATA_DIR_MODE)
+
+ data_dir = os.path.join(data_dir, 'haproxy')
+ populate_files(data_dir, self.files, uid, gid)
+
+ def get_daemon_args(self) -> List[str]:
+ return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg']
+
+ def validate(self):
+ # type: () -> None
+ if not is_fsid(self.fsid):
+ raise Error('not an fsid: %s' % self.fsid)
+ if not self.daemon_id:
+ raise Error('invalid daemon_id: %s' % self.daemon_id)
+ if not self.image:
+ raise Error('invalid image: %s' % self.image)
+
+ # check for the required files
+ if self.required_files:
+ for fname in self.required_files:
+ if fname not in self.files:
+ raise Error('required file missing from config-json: %s' % fname)
+
+ def get_daemon_name(self):
+ # type: () -> str
+ return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+ def get_container_name(self, desc=None):
+ # type: (Optional[str]) -> str
+ cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
+ if desc:
+ cname = '%s-%s' % (cname, desc)
+ return cname
+
+ def extract_uid_gid_haproxy(self) -> Tuple[int, int]:
+ # better directory for this?
+ return extract_uid_gid(self.ctx, file_path='/var/lib')
+
+ @staticmethod
+ def get_container_mounts(data_dir: str) -> Dict[str, str]:
+ mounts = dict()
+ mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy'
+ return mounts
+
+ @staticmethod
+ def get_sysctl_settings() -> List[str]:
+ return [
+ '# IP forwarding and non-local bind',
+ 'net.ipv4.ip_forward = 1',
+ 'net.ipv4.ip_nonlocal_bind = 1',
+ ]
+
+##################################
+
+
+class Keepalived(object):
+ """Defines an Keepalived container"""
+ daemon_type = 'keepalived'
+ required_files = ['keepalived.conf']
+ default_image = DEFAULT_KEEPALIVED_IMAGE
+
+ def __init__(self,
+ ctx: CephadmContext,
+ fsid: str, daemon_id: Union[int, str],
+ config_json: Dict, image: str) -> None:
+ self.ctx = ctx
+ self.fsid = fsid
+ self.daemon_id = daemon_id
+ self.image = image
+
+ # config-json options
+ self.files = dict_get(config_json, 'files', {})
+
+ self.validate()
+
+ @classmethod
+ def init(cls, ctx: CephadmContext, fsid: str,
+ daemon_id: Union[int, str]) -> 'Keepalived':
+ return cls(ctx, fsid, daemon_id,
+ fetch_configs(ctx), ctx.image)
+
+ def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
+ """Create files under the container data dir"""
+ if not os.path.isdir(data_dir):
+ raise OSError('data_dir is not a directory: %s' % (data_dir))
+
+ # create additional directories in data dir for keepalived to use
+ if not os.path.isdir(os.path.join(data_dir, 'keepalived')):
+ makedirs(os.path.join(data_dir, 'keepalived'), uid, gid, DATA_DIR_MODE)
+
+ # populate files from the config-json
+ populate_files(data_dir, self.files, uid, gid)
+
+ def validate(self):
+ # type: () -> None
+ if not is_fsid(self.fsid):
+ raise Error('not an fsid: %s' % self.fsid)
+ if not self.daemon_id:
+ raise Error('invalid daemon_id: %s' % self.daemon_id)
+ if not self.image:
+ raise Error('invalid image: %s' % self.image)
+
+ # check for the required files
+ if self.required_files:
+ for fname in self.required_files:
+ if fname not in self.files:
+ raise Error('required file missing from config-json: %s' % fname)
+
+ def get_daemon_name(self):
+ # type: () -> str
+ return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+ def get_container_name(self, desc=None):
+ # type: (Optional[str]) -> str
+ cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
+ if desc:
+ cname = '%s-%s' % (cname, desc)
+ return cname
+
+ @staticmethod
+ def get_container_envs():
+ # type: () -> List[str]
+ envs = [
+ 'KEEPALIVED_AUTOCONF=false',
+ 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf',
+ 'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf',
+ 'KEEPALIVED_DEBUG=false'
+ ]
+ return envs
+
+ @staticmethod
+ def get_sysctl_settings() -> List[str]:
+ return [
+ '# IP forwarding and non-local bind',
+ 'net.ipv4.ip_forward = 1',
+ 'net.ipv4.ip_nonlocal_bind = 1',
+ ]
+
+ def extract_uid_gid_keepalived(self) -> Tuple[int, int]:
+ # better directory for this?
+ return extract_uid_gid(self.ctx, file_path='/var/lib')
+
+ @staticmethod
+ def get_container_mounts(data_dir: str) -> Dict[str, str]:
+ mounts = dict()
+ mounts[os.path.join(data_dir, 'keepalived.conf')] = '/etc/keepalived/keepalived.conf'
+ return mounts
+
+##################################
+
+
+class Tracing(object):
+ """Define the configs for the jaeger tracing containers"""
+
+ components: Dict[str, Dict[str, Any]] = {
+ 'elasticsearch': {
+ 'image': DEFAULT_ELASTICSEARCH_IMAGE,
+ 'envs': ['discovery.type=single-node']
+ },
+ 'jaeger-agent': {
+ 'image': DEFAULT_JAEGER_AGENT_IMAGE,
+ },
+ 'jaeger-collector': {
+ 'image': DEFAULT_JAEGER_COLLECTOR_IMAGE,
+ },
+ 'jaeger-query': {
+ 'image': DEFAULT_JAEGER_QUERY_IMAGE,
+ },
+ } # type: ignore
+
+ @staticmethod
+ def set_configuration(config: Dict[str, str], daemon_type: str) -> None:
+ if daemon_type in ['jaeger-collector', 'jaeger-query']:
+ assert 'elasticsearch_nodes' in config
+ Tracing.components[daemon_type]['envs'] = [
+ 'SPAN_STORAGE_TYPE=elasticsearch',
+ f'ES_SERVER_URLS={config["elasticsearch_nodes"]}']
+ if daemon_type == 'jaeger-agent':
+ assert 'collector_nodes' in config
+ Tracing.components[daemon_type]['daemon_args'] = [
+ f'--reporter.grpc.host-port={config["collector_nodes"]}',
+ '--processor.jaeger-compact.server-host-port=6799'
+ ]
+
+##################################
+
+
+class CustomContainer(object):
+ """Defines a custom container"""
+ daemon_type = 'container'
+
+ def __init__(self,
+ fsid: str, daemon_id: Union[int, str],
+ config_json: Dict, image: str) -> None:
+ self.fsid = fsid
+ self.daemon_id = daemon_id
+ self.image = image
+
+ # config-json options
+ self.entrypoint = dict_get(config_json, 'entrypoint')
+ self.uid = dict_get(config_json, 'uid', 65534) # nobody
+ self.gid = dict_get(config_json, 'gid', 65534) # nobody
+ self.volume_mounts = dict_get(config_json, 'volume_mounts', {})
+ self.args = dict_get(config_json, 'args', [])
+ self.envs = dict_get(config_json, 'envs', [])
+ self.privileged = dict_get(config_json, 'privileged', False)
+ self.bind_mounts = dict_get(config_json, 'bind_mounts', [])
+ self.ports = dict_get(config_json, 'ports', [])
+ self.dirs = dict_get(config_json, 'dirs', [])
+ self.files = dict_get(config_json, 'files', {})
+
+ @classmethod
+ def init(cls, ctx: CephadmContext,
+ fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer':
+ return cls(fsid, daemon_id,
+ fetch_configs(ctx), ctx.image)
+
+ def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
+ """
+ Create dirs/files below the container data directory.
+ """
+ logger.info('Creating custom container configuration '
+ 'dirs/files in {} ...'.format(data_dir))
+
+ if not os.path.isdir(data_dir):
+ raise OSError('data_dir is not a directory: %s' % data_dir)
+
+ for dir_path in self.dirs:
+ logger.info('Creating directory: {}'.format(dir_path))
+ dir_path = os.path.join(data_dir, dir_path.strip('/'))
+ makedirs(dir_path, uid, gid, 0o755)
+
+ for file_path in self.files:
+ logger.info('Creating file: {}'.format(file_path))
+ content = dict_get_join(self.files, file_path)
+ file_path = os.path.join(data_dir, file_path.strip('/'))
+ with write_new(file_path, owner=(uid, gid), encoding='utf-8') as f:
+ f.write(content)
+
+ def get_daemon_args(self) -> List[str]:
+ return []
+
+ def get_container_args(self) -> List[str]:
+ return self.args
+
+ def get_container_envs(self) -> List[str]:
+ return self.envs
+
+ def get_container_mounts(self, data_dir: str) -> Dict[str, str]:
+ """
+ Get the volume mounts. Relative source paths will be located below
+ `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
+
+ Example:
+ {
+ /foo/conf: /conf
+ foo/conf: /conf
+ }
+ becomes
+ {
+ /foo/conf: /conf
+ /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
+ }
+ """
+ mounts = {}
+ for source, destination in self.volume_mounts.items():
+ source = os.path.join(data_dir, source)
+ mounts[source] = destination
+ return mounts
+
+ def get_container_binds(self, data_dir: str) -> List[List[str]]:
+ """
+ Get the bind mounts. Relative `source=...` paths will be located below
+ `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
+
+ Example:
+ [
+ 'type=bind',
+ 'source=lib/modules',
+ 'destination=/lib/modules',
+ 'ro=true'
+ ]
+ becomes
+ [
+ ...
+ 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
+ ...
+ ]
+ """
+ binds = self.bind_mounts.copy()
+ for bind in binds:
+ for index, value in enumerate(bind):
+ match = re.match(r'^source=(.+)$', value)
+ if match:
+ bind[index] = 'source={}'.format(os.path.join(
+ data_dir, match.group(1)))
+ return binds
+
+##################################
+
+
+def touch(file_path: str, uid: Optional[int] = None, gid: Optional[int] = None) -> None:
+ Path(file_path).touch()
+ if uid and gid:
+ os.chown(file_path, uid, gid)
+
+
+##################################
+
+
+def dict_get(d: Dict, key: str, default: Any = None, require: bool = False) -> Any:
+ """
+ Helper function to get a key from a dictionary.
+ :param d: The dictionary to process.
+ :param key: The name of the key to get.
+ :param default: The default value in case the key does not
+ exist. Default is `None`.
+ :param require: Set to `True` if the key is required. An
+ exception will be raised if the key does not exist in
+ the given dictionary.
+ :return: Returns the value of the given key.
+ :raises: :exc:`self.Error` if the given key does not exist
+ and `require` is set to `True`.
+ """
+ if require and key not in d.keys():
+ raise Error('{} missing from dict'.format(key))
+ return d.get(key, default) # type: ignore
+
+##################################
+
+
+def dict_get_join(d: Dict[str, Any], key: str) -> Any:
+ """
+ Helper function to get the value of a given key from a dictionary.
+ `List` values will be converted to a string by joining them with a
+ line break.
+ :param d: The dictionary to process.
+ :param key: The name of the key to get.
+ :return: Returns the value of the given key. If it was a `list`, it
+ will be joining with a line break.
+ """
+ value = d.get(key)
+ if isinstance(value, list):
+ value = '\n'.join(map(str, value))
+ return value
+
+##################################
+
+
+def get_supported_daemons():
+ # type: () -> List[str]
+ supported_daemons = list(Ceph.daemons)
+ supported_daemons.extend(Monitoring.components)
+ supported_daemons.append(NFSGanesha.daemon_type)
+ supported_daemons.append(CephIscsi.daemon_type)
+ supported_daemons.append(CephNvmeof.daemon_type)
+ supported_daemons.append(CustomContainer.daemon_type)
+ supported_daemons.append(HAproxy.daemon_type)
+ supported_daemons.append(Keepalived.daemon_type)
+ supported_daemons.append(CephadmAgent.daemon_type)
+ supported_daemons.append(SNMPGateway.daemon_type)
+ supported_daemons.extend(Tracing.components)
+ assert len(supported_daemons) == len(set(supported_daemons))
+ return supported_daemons
+
+##################################
+
+
+class PortOccupiedError(Error):
+ pass
+
+
+def attempt_bind(ctx, s, address, port):
+ # type: (CephadmContext, socket.socket, str, int) -> None
+ try:
+ s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ s.bind((address, port))
+ except OSError as e:
+ if e.errno == errno.EADDRINUSE:
+ msg = 'Cannot bind to IP %s port %d: %s' % (address, port, e)
+ logger.warning(msg)
+ raise PortOccupiedError(msg)
+ else:
+ raise e
+ except Exception as e:
+ raise Error(e)
+ finally:
+ s.close()
+
+
+def port_in_use(ctx: CephadmContext, endpoint: EndPoint) -> bool:
+ """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
+ logger.info('Verifying port %s ...' % str(endpoint))
+
+ def _port_in_use(af: socket.AddressFamily, address: str) -> bool:
+ try:
+ s = socket.socket(af, socket.SOCK_STREAM)
+ attempt_bind(ctx, s, address, endpoint.port)
+ except PortOccupiedError:
+ return True
+ except OSError as e:
+ if e.errno in (errno.EAFNOSUPPORT, errno.EADDRNOTAVAIL):
+ # Ignore EAFNOSUPPORT and EADDRNOTAVAIL as two interfaces are
+ # being tested here and one might be intentionally be disabled.
+ # In that case no error should be raised.
+ return False
+ else:
+ raise e
+ return False
+
+ if endpoint.ip != '0.0.0.0' and endpoint.ip != '::':
+ if is_ipv6(endpoint.ip):
+ return _port_in_use(socket.AF_INET6, endpoint.ip)
+ else:
+ return _port_in_use(socket.AF_INET, endpoint.ip)
+
+ return any(_port_in_use(af, address) for af, address in (
+ (socket.AF_INET, '0.0.0.0'),
+ (socket.AF_INET6, '::')
+ ))
+
+
+def check_ip_port(ctx, ep):
+ # type: (CephadmContext, EndPoint) -> None
+ if not ctx.skip_ping_check:
+ logger.info(f'Verifying IP {ep.ip} port {ep.port} ...')
+ if is_ipv6(ep.ip):
+ s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+ ip = unwrap_ipv6(ep.ip)
+ else:
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ ip = ep.ip
+ attempt_bind(ctx, s, ip, ep.port)
+
+##################################
+
+
+# this is an abbreviated version of
+# https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
+# that drops all of the compatibility (this is Unix/Linux only).
+
+class Timeout(TimeoutError):
+ """
+ Raised when the lock could not be acquired in *timeout*
+ seconds.
+ """
+
+ def __init__(self, lock_file: str) -> None:
+ """
+ """
+ #: The path of the file lock.
+ self.lock_file = lock_file
+ return None
+
+ def __str__(self) -> str:
+ temp = "The file lock '{}' could not be acquired."\
+ .format(self.lock_file)
+ return temp
+
+
+class _Acquire_ReturnProxy(object):
+ def __init__(self, lock: 'FileLock') -> None:
+ self.lock = lock
+ return None
+
+ def __enter__(self) -> 'FileLock':
+ return self.lock
+
+ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+ self.lock.release()
+ return None
+
+
+class FileLock(object):
+ def __init__(self, ctx: CephadmContext, name: str, timeout: int = -1) -> None:
+ if not os.path.exists(LOCK_DIR):
+ os.mkdir(LOCK_DIR, 0o700)
+ self._lock_file = os.path.join(LOCK_DIR, name + '.lock')
+ self.ctx = ctx
+
+ # The file descriptor for the *_lock_file* as it is returned by the
+ # os.open() function.
+ # This file lock is only NOT None, if the object currently holds the
+ # lock.
+ self._lock_file_fd: Optional[int] = None
+ self.timeout = timeout
+ # The lock counter is used for implementing the nested locking
+ # mechanism. Whenever the lock is acquired, the counter is increased and
+ # the lock is only released, when this value is 0 again.
+ self._lock_counter = 0
+ return None
+
+ @property
+ def is_locked(self) -> bool:
+ return self._lock_file_fd is not None
+
+ def acquire(self, timeout: Optional[int] = None, poll_intervall: float = 0.05) -> _Acquire_ReturnProxy:
+ """
+ Acquires the file lock or fails with a :exc:`Timeout` error.
+ .. code-block:: python
+ # You can use this method in the context manager (recommended)
+ with lock.acquire():
+ pass
+ # Or use an equivalent try-finally construct:
+ lock.acquire()
+ try:
+ pass
+ finally:
+ lock.release()
+ :arg float timeout:
+ The maximum time waited for the file lock.
+ If ``timeout < 0``, there is no timeout and this method will
+ block until the lock could be acquired.
+ If ``timeout`` is None, the default :attr:`~timeout` is used.
+ :arg float poll_intervall:
+ We check once in *poll_intervall* seconds if we can acquire the
+ file lock.
+ :raises Timeout:
+ if the lock could not be acquired in *timeout* seconds.
+ .. versionchanged:: 2.0.0
+ This method returns now a *proxy* object instead of *self*,
+ so that it can be used in a with statement without side effects.
+ """
+
+ # Use the default timeout, if no timeout is provided.
+ if timeout is None:
+ timeout = self.timeout
+
+ # Increment the number right at the beginning.
+ # We can still undo it, if something fails.
+ self._lock_counter += 1
+
+ lock_id = id(self)
+ lock_filename = self._lock_file
+ start_time = time.time()
+ try:
+ while True:
+ if not self.is_locked:
+ logger.log(QUIET_LOG_LEVEL, 'Acquiring lock %s on %s', lock_id,
+ lock_filename)
+ self._acquire()
+
+ if self.is_locked:
+ logger.log(QUIET_LOG_LEVEL, 'Lock %s acquired on %s', lock_id,
+ lock_filename)
+ break
+ elif timeout >= 0 and time.time() - start_time > timeout:
+ logger.warning('Timeout acquiring lock %s on %s', lock_id,
+ lock_filename)
+ raise Timeout(self._lock_file)
+ else:
+ logger.log(
+ QUIET_LOG_LEVEL,
+ 'Lock %s not acquired on %s, waiting %s seconds ...',
+ lock_id, lock_filename, poll_intervall
+ )
+ time.sleep(poll_intervall)
+ except Exception:
+ # Something did go wrong, so decrement the counter.
+ self._lock_counter = max(0, self._lock_counter - 1)
+
+ raise
+ return _Acquire_ReturnProxy(lock=self)
+
+ def release(self, force: bool = False) -> None:
+ """
+ Releases the file lock.
+ Please note, that the lock is only completely released, if the lock
+ counter is 0.
+ Also note, that the lock file itself is not automatically deleted.
+ :arg bool force:
+ If true, the lock counter is ignored and the lock is released in
+ every case.
+ """
+ if self.is_locked:
+ self._lock_counter -= 1
+
+ if self._lock_counter == 0 or force:
+ # lock_id = id(self)
+ # lock_filename = self._lock_file
+
+ # Can't log in shutdown:
+ # File "/usr/lib64/python3.9/logging/__init__.py", line 1175, in _open
+ # NameError: name 'open' is not defined
+ # logger.debug('Releasing lock %s on %s', lock_id, lock_filename)
+ self._release()
+ self._lock_counter = 0
+ # logger.debug('Lock %s released on %s', lock_id, lock_filename)
+
+ return None
+
+ def __enter__(self) -> 'FileLock':
+ self.acquire()
+ return self
+
+ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+ self.release()
+ return None
+
+ def __del__(self) -> None:
+ self.release(force=True)
+ return None
+
+ def _acquire(self) -> None:
+ open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC
+ fd = os.open(self._lock_file, open_mode)
+
+ try:
+ fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ except (IOError, OSError):
+ os.close(fd)
+ else:
+ self._lock_file_fd = fd
+ return None
+
+ def _release(self) -> None:
+ # Do not remove the lockfile:
+ #
+ # https://github.com/benediktschmitt/py-filelock/issues/31
+ # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
+ fd = self._lock_file_fd
+ self._lock_file_fd = None
+ fcntl.flock(fd, fcntl.LOCK_UN) # type: ignore
+ os.close(fd) # type: ignore
+ return None
+
+
+##################################
+# Popen wrappers, lifted from ceph-volume
+
+class CallVerbosity(Enum):
+ #####
+ # Format:
+ # Normal Operation: <log-level-when-no-errors>, Errors: <log-level-when-error>
+ #
+ # NOTE: QUIET log level is custom level only used when --verbose is passed
+ #####
+
+ # Normal Operation: None, Errors: None
+ SILENT = 0
+ # Normal Operation: QUIET, Error: QUIET
+ QUIET = 1
+ # Normal Operation: DEBUG, Error: DEBUG
+ DEBUG = 2
+ # Normal Operation: QUIET, Error: INFO
+ QUIET_UNLESS_ERROR = 3
+ # Normal Operation: DEBUG, Error: INFO
+ VERBOSE_ON_FAILURE = 4
+ # Normal Operation: INFO, Error: INFO
+ VERBOSE = 5
+
+ def success_log_level(self) -> int:
+ _verbosity_level_to_log_level = {
+ self.SILENT: 0,
+ self.QUIET: QUIET_LOG_LEVEL,
+ self.DEBUG: logging.DEBUG,
+ self.QUIET_UNLESS_ERROR: QUIET_LOG_LEVEL,
+ self.VERBOSE_ON_FAILURE: logging.DEBUG,
+ self.VERBOSE: logging.INFO
+ }
+ return _verbosity_level_to_log_level[self] # type: ignore
+
+ def error_log_level(self) -> int:
+ _verbosity_level_to_log_level = {
+ self.SILENT: 0,
+ self.QUIET: QUIET_LOG_LEVEL,
+ self.DEBUG: logging.DEBUG,
+ self.QUIET_UNLESS_ERROR: logging.INFO,
+ self.VERBOSE_ON_FAILURE: logging.INFO,
+ self.VERBOSE: logging.INFO
+ }
+ return _verbosity_level_to_log_level[self] # type: ignore
+
+
+# disable coverage for the next block. this is copy-n-paste
+# from other code for compatibilty on older python versions
+if sys.version_info < (3, 8): # pragma: no cover
+ import itertools
+ import threading
+ import warnings
+ from asyncio import events
+
+ class ThreadedChildWatcher(asyncio.AbstractChildWatcher):
+ """Threaded child watcher implementation.
+ The watcher uses a thread per process
+ for waiting for the process finish.
+ It doesn't require subscription on POSIX signal
+ but a thread creation is not free.
+ The watcher has O(1) complexity, its performance doesn't depend
+ on amount of spawn processes.
+ """
+
+ def __init__(self) -> None:
+ self._pid_counter = itertools.count(0)
+ self._threads: Dict[Any, Any] = {}
+
+ def is_active(self) -> bool:
+ return True
+
+ def close(self) -> None:
+ self._join_threads()
+
+ def _join_threads(self) -> None:
+ """Internal: Join all non-daemon threads"""
+ threads = [thread for thread in list(self._threads.values())
+ if thread.is_alive() and not thread.daemon]
+ for thread in threads:
+ thread.join()
+
+ def __enter__(self) -> Any:
+ return self
+
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+ pass
+
+ def __del__(self, _warn: Any = warnings.warn) -> None:
+ threads = [thread for thread in list(self._threads.values())
+ if thread.is_alive()]
+ if threads:
+ _warn(f'{self.__class__} has registered but not finished child processes',
+ ResourceWarning,
+ source=self)
+
+ def add_child_handler(self, pid: Any, callback: Any, *args: Any) -> None:
+ loop = events.get_event_loop()
+ thread = threading.Thread(target=self._do_waitpid,
+ name=f'waitpid-{next(self._pid_counter)}',
+ args=(loop, pid, callback, args),
+ daemon=True)
+ self._threads[pid] = thread
+ thread.start()
+
+ def remove_child_handler(self, pid: Any) -> bool:
+ # asyncio never calls remove_child_handler() !!!
+ # The method is no-op but is implemented because
+ # abstract base classe requires it
+ return True
+
+ def attach_loop(self, loop: Any) -> None:
+ pass
+
+ def _do_waitpid(self, loop: Any, expected_pid: Any, callback: Any, args: Any) -> None:
+ assert expected_pid > 0
+
+ try:
+ pid, status = os.waitpid(expected_pid, 0)
+ except ChildProcessError:
+ # The child process is already reaped
+ # (may happen if waitpid() is called elsewhere).
+ pid = expected_pid
+ returncode = 255
+ logger.warning(
+ 'Unknown child process pid %d, will report returncode 255',
+ pid)
+ else:
+ if os.WIFEXITED(status):
+ returncode = os.WEXITSTATUS(status)
+ elif os.WIFSIGNALED(status):
+ returncode = -os.WTERMSIG(status)
+ else:
+ raise ValueError(f'unknown wait status {status}')
+ if loop.get_debug():
+ logger.debug('process %s exited with returncode %s',
+ expected_pid, returncode)
+
+ if loop.is_closed():
+ logger.warning('Loop %r that handles pid %r is closed', loop, pid)
+ else:
+ loop.call_soon_threadsafe(callback, pid, returncode, *args)
+
+ self._threads.pop(expected_pid)
+
+ # unlike SafeChildWatcher which handles SIGCHLD in the main thread,
+ # ThreadedChildWatcher runs in a separated thread, hence allows us to
+ # run create_subprocess_exec() in non-main thread, see
+ # https://bugs.python.org/issue35621
+ asyncio.set_child_watcher(ThreadedChildWatcher())
+
+
+try:
+ from asyncio import run as async_run # type: ignore[attr-defined]
+except ImportError: # pragma: no cover
+ # disable coverage for this block. it should be a copy-n-paste from
+ # from newer libs for compatibilty on older python versions
+ def async_run(coro): # type: ignore
+ loop = asyncio.new_event_loop()
+ try:
+ asyncio.set_event_loop(loop)
+ return loop.run_until_complete(coro)
+ finally:
+ try:
+ loop.run_until_complete(loop.shutdown_asyncgens())
+ finally:
+ asyncio.set_event_loop(None)
+ loop.close()
+
+
+def call(ctx: CephadmContext,
+ command: List[str],
+ desc: Optional[str] = None,
+ verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
+ timeout: Optional[int] = DEFAULT_TIMEOUT,
+ **kwargs: Any) -> Tuple[str, str, int]:
+ """
+ Wrap subprocess.Popen to
+
+ - log stdout/stderr to a logger,
+ - decode utf-8
+ - cleanly return out, err, returncode
+
+ :param timeout: timeout in seconds
+ """
+
+ prefix = command[0] if desc is None else desc
+ if prefix:
+ prefix += ': '
+ timeout = timeout or ctx.timeout
+
+ async def run_with_timeout() -> Tuple[str, str, int]:
+ process = await asyncio.create_subprocess_exec(
+ *command,
+ stdout=asyncio.subprocess.PIPE,
+ stderr=asyncio.subprocess.PIPE,
+ env=os.environ.copy())
+ assert process.stdout
+ assert process.stderr
+ try:
+ stdout, stderr = await asyncio.wait_for(
+ process.communicate(),
+ timeout,
+ )
+ except asyncio.TimeoutError:
+ # try to terminate the process assuming it is still running. It's
+ # possible that even after killing the process it will not
+ # complete, particularly if it is D-state. If that happens the
+ # process.wait call will block, but we're no worse off than before
+ # when the timeout did not work. Additionally, there are other
+ # corner-cases we could try and handle here but we decided to start
+ # simple.
+ process.kill()
+ await process.wait()
+ logger.info(prefix + f'timeout after {timeout} seconds')
+ return '', '', 124
+ else:
+ assert process.returncode is not None
+ return (
+ stdout.decode('utf-8'),
+ stderr.decode('utf-8'),
+ process.returncode,
+ )
+
+ stdout, stderr, returncode = async_run(run_with_timeout())
+ log_level = verbosity.success_log_level()
+ if returncode != 0:
+ log_level = verbosity.error_log_level()
+ logger.log(log_level, f'Non-zero exit code {returncode} from {" ".join(command)}')
+ for line in stdout.splitlines():
+ logger.log(log_level, prefix + 'stdout ' + line)
+ for line in stderr.splitlines():
+ logger.log(log_level, prefix + 'stderr ' + line)
+ return stdout, stderr, returncode
+
+
+def call_throws(
+ ctx: CephadmContext,
+ command: List[str],
+ desc: Optional[str] = None,
+ verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
+ timeout: Optional[int] = DEFAULT_TIMEOUT,
+ **kwargs: Any) -> Tuple[str, str, int]:
+ out, err, ret = call(ctx, command, desc, verbosity, timeout, **kwargs)
+ if ret:
+ for s in (out, err):
+ if s.strip() and len(s.splitlines()) <= 2: # readable message?
+ raise RuntimeError(f'Failed command: {" ".join(command)}: {s}')
+ raise RuntimeError('Failed command: %s' % ' '.join(command))
+ return out, err, ret
+
+
+def call_timeout(ctx, command, timeout):
+ # type: (CephadmContext, List[str], int) -> int
+ logger.debug('Running command (timeout=%s): %s'
+ % (timeout, ' '.join(command)))
+
+ def raise_timeout(command, timeout):
+ # type: (List[str], int) -> NoReturn
+ msg = 'Command `%s` timed out after %s seconds' % (command, timeout)
+ logger.debug(msg)
+ raise TimeoutExpired(msg)
+
+ try:
+ return subprocess.call(command, timeout=timeout, env=os.environ.copy())
+ except subprocess.TimeoutExpired:
+ raise_timeout(command, timeout)
+
+##################################
+
+
+def json_loads_retry(cli_func: Callable[[], str]) -> Any:
+ for sleep_secs in [1, 4, 4]:
+ try:
+ return json.loads(cli_func())
+ except json.JSONDecodeError:
+ logger.debug('Invalid JSON. Retrying in %s seconds...' % sleep_secs)
+ time.sleep(sleep_secs)
+ return json.loads(cli_func())
+
+
+def is_available(ctx, what, func):
+ # type: (CephadmContext, str, Callable[[], bool]) -> None
+ """
+ Wait for a service to become available
+
+ :param what: the name of the service
+ :param func: the callable object that determines availability
+ """
+ retry = ctx.retry
+ logger.info('Waiting for %s...' % what)
+ num = 1
+ while True:
+ if func():
+ logger.info('%s is available'
+ % what)
+ break
+ elif num > retry:
+ raise Error('%s not available after %s tries'
+ % (what, retry))
+
+ logger.info('%s not available, waiting (%s/%s)...'
+ % (what, num, retry))
+
+ num += 1
+ time.sleep(2)
+
+
+def read_config(fn):
+ # type: (Optional[str]) -> ConfigParser
+ cp = ConfigParser()
+ if fn:
+ cp.read(fn)
+ return cp
+
+
+def pathify(p):
+ # type: (str) -> str
+ p = os.path.expanduser(p)
+ return os.path.abspath(p)
+
+
+def get_file_timestamp(fn):
+ # type: (str) -> Optional[str]
+ try:
+ mt = os.path.getmtime(fn)
+ return datetime.datetime.fromtimestamp(
+ mt, tz=datetime.timezone.utc
+ ).strftime(DATEFMT)
+ except Exception:
+ return None
+
+
+def try_convert_datetime(s):
+ # type: (str) -> Optional[str]
+ # This is super irritating because
+ # 1) podman and docker use different formats
+ # 2) python's strptime can't parse either one
+ #
+ # I've seen:
+ # docker 18.09.7: 2020-03-03T09:21:43.636153304Z
+ # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
+ # 2020-03-03 15:52:30.136257504 -0600 CST
+ # (In the podman case, there is a different string format for
+ # 'inspect' and 'inspect --format {{.Created}}'!!)
+
+ # In *all* cases, the 9 digit second precision is too much for
+ # python's strptime. Shorten it to 6 digits.
+ p = re.compile(r'(\.[\d]{6})[\d]*')
+ s = p.sub(r'\1', s)
+
+ # replace trailing Z with -0000, since (on python 3.6.8) it won't parse
+ if s and s[-1] == 'Z':
+ s = s[:-1] + '-0000'
+
+ # cut off the redundant 'CST' part that strptime can't parse, if
+ # present.
+ v = s.split(' ')
+ s = ' '.join(v[0:3])
+
+ # try parsing with several format strings
+ fmts = [
+ '%Y-%m-%dT%H:%M:%S.%f%z',
+ '%Y-%m-%d %H:%M:%S.%f %z',
+ ]
+ for f in fmts:
+ try:
+ # return timestamp normalized to UTC, rendered as DATEFMT.
+ return datetime.datetime.strptime(s, f).astimezone(tz=datetime.timezone.utc).strftime(DATEFMT)
+ except ValueError:
+ pass
+ return None
+
+
+def _parse_podman_version(version_str):
+ # type: (str) -> Tuple[int, ...]
+ def to_int(val: str, org_e: Optional[Exception] = None) -> int:
+ if not val and org_e:
+ raise org_e
+ try:
+ return int(val)
+ except ValueError as e:
+ return to_int(val[0:-1], org_e or e)
+
+ return tuple(map(to_int, version_str.split('.')))
+
+
+def get_hostname():
+ # type: () -> str
+ return socket.gethostname()
+
+
+def get_short_hostname():
+ # type: () -> str
+ return get_hostname().split('.', 1)[0]
+
+
+def get_fqdn():
+ # type: () -> str
+ return socket.getfqdn() or socket.gethostname()
+
+
+def get_ip_addresses(hostname: str) -> Tuple[List[str], List[str]]:
+ items = socket.getaddrinfo(hostname, None,
+ flags=socket.AI_CANONNAME,
+ type=socket.SOCK_STREAM)
+ ipv4_addresses = [i[4][0] for i in items if i[0] == socket.AF_INET]
+ ipv6_addresses = [i[4][0] for i in items if i[0] == socket.AF_INET6]
+ return ipv4_addresses, ipv6_addresses
+
+
+def get_arch():
+ # type: () -> str
+ return platform.uname().machine
+
+
+def generate_service_id():
+ # type: () -> str
+ return get_short_hostname() + '.' + ''.join(random.choice(string.ascii_lowercase)
+ for _ in range(6))
+
+
+def generate_password():
+ # type: () -> str
+ return ''.join(random.choice(string.ascii_lowercase + string.digits)
+ for i in range(10))
+
+
+def normalize_container_id(i):
+ # type: (str) -> str
+ # docker adds the sha256: prefix, but AFAICS both
+ # docker (18.09.7 in bionic at least) and podman
+ # both always use sha256, so leave off the prefix
+ # for consistency.
+ prefix = 'sha256:'
+ if i.startswith(prefix):
+ i = i[len(prefix):]
+ return i
+
+
+def make_fsid():
+ # type: () -> str
+ return str(uuid.uuid1())
+
+
+def is_fsid(s):
+ # type: (str) -> bool
+ try:
+ uuid.UUID(s)
+ except ValueError:
+ return False
+ return True
+
+
+def validate_fsid(func: FuncT) -> FuncT:
+ @wraps(func)
+ def _validate_fsid(ctx: CephadmContext) -> Any:
+ if 'fsid' in ctx and ctx.fsid:
+ if not is_fsid(ctx.fsid):
+ raise Error('not an fsid: %s' % ctx.fsid)
+ return func(ctx)
+ return cast(FuncT, _validate_fsid)
+
+
+def infer_fsid(func: FuncT) -> FuncT:
+ """
+ If we only find a single fsid in /var/lib/ceph/*, use that
+ """
+ @infer_config
+ @wraps(func)
+ def _infer_fsid(ctx: CephadmContext) -> Any:
+ if 'fsid' in ctx and ctx.fsid:
+ logger.debug('Using specified fsid: %s' % ctx.fsid)
+ return func(ctx)
+
+ fsids = set()
+
+ cp = read_config(ctx.config)
+ if cp.has_option('global', 'fsid'):
+ fsids.add(cp.get('global', 'fsid'))
+
+ daemon_list = list_daemons(ctx, detail=False)
+ for daemon in daemon_list:
+ if not is_fsid(daemon['fsid']):
+ # 'unknown' fsid
+ continue
+ elif 'name' not in ctx or not ctx.name:
+ # ctx.name not specified
+ fsids.add(daemon['fsid'])
+ elif daemon['name'] == ctx.name:
+ # ctx.name is a match
+ fsids.add(daemon['fsid'])
+ fsids = sorted(fsids)
+
+ if not fsids:
+ # some commands do not always require an fsid
+ pass
+ elif len(fsids) == 1:
+ logger.info('Inferring fsid %s' % fsids[0])
+ ctx.fsid = fsids[0]
+ else:
+ raise Error('Cannot infer an fsid, one must be specified (using --fsid): %s' % fsids)
+ return func(ctx)
+
+ return cast(FuncT, _infer_fsid)
+
+
+def infer_config(func: FuncT) -> FuncT:
+ """
+ Infer the cluster configuration using the following priority order:
+ 1- if the user has provided custom conf file (-c option) use it
+ 2- otherwise if daemon --name has been provided use daemon conf
+ 3- otherwise find the mon daemon conf file and use it (if v1)
+ 4- otherwise if {ctx.data_dir}/{fsid}/{CEPH_CONF_DIR} dir exists use it
+ 5- finally: fallback to the default file /etc/ceph/ceph.conf
+ """
+ @wraps(func)
+ def _infer_config(ctx: CephadmContext) -> Any:
+
+ def config_path(daemon_type: str, daemon_name: str) -> str:
+ data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_name)
+ return os.path.join(data_dir, 'config')
+
+ def get_mon_daemon_name(fsid: str) -> Optional[str]:
+ daemon_list = list_daemons(ctx, detail=False)
+ for daemon in daemon_list:
+ if (
+ daemon.get('name', '').startswith('mon.')
+ and daemon.get('fsid', '') == fsid
+ and daemon.get('style', '') == 'cephadm:v1'
+ and os.path.exists(config_path('mon', daemon['name'].split('.', 1)[1]))
+ ):
+ return daemon['name']
+ return None
+
+ ctx.config = ctx.config if 'config' in ctx else None
+ # check if user has provided conf by using -c option
+ if ctx.config and (ctx.config != CEPH_DEFAULT_CONF):
+ logger.debug(f'Using specified config: {ctx.config}')
+ return func(ctx)
+
+ if 'fsid' in ctx and ctx.fsid:
+ name = ctx.name if ('name' in ctx and ctx.name) else get_mon_daemon_name(ctx.fsid)
+ if name is not None:
+ # daemon name has been specified (or inferred from mon), let's use its conf
+ ctx.config = config_path(name.split('.', 1)[0], name.split('.', 1)[1])
+ else:
+ # no daemon, in case the cluster has a config dir then use it
+ ceph_conf = f'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_CONF}'
+ if os.path.exists(ceph_conf):
+ ctx.config = ceph_conf
+
+ if ctx.config:
+ logger.info(f'Inferring config {ctx.config}')
+ elif os.path.exists(CEPH_DEFAULT_CONF):
+ logger.debug(f'Using default config {CEPH_DEFAULT_CONF}')
+ ctx.config = CEPH_DEFAULT_CONF
+ return func(ctx)
+
+ return cast(FuncT, _infer_config)
+
+
+def _get_default_image(ctx: CephadmContext) -> str:
+ if DEFAULT_IMAGE_IS_MAIN:
+ warn = """This is a development version of cephadm.
+For information regarding the latest stable release:
+ https://docs.ceph.com/docs/{}/cephadm/install
+""".format(LATEST_STABLE_RELEASE)
+ for line in warn.splitlines():
+ logger.warning('{}{}{}'.format(termcolor.yellow, line, termcolor.end))
+ return DEFAULT_IMAGE
+
+
+def infer_image(func: FuncT) -> FuncT:
+ """
+ Use the most recent ceph image
+ """
+ @wraps(func)
+ def _infer_image(ctx: CephadmContext) -> Any:
+ if not ctx.image:
+ ctx.image = os.environ.get('CEPHADM_IMAGE')
+ if not ctx.image:
+ ctx.image = infer_local_ceph_image(ctx, ctx.container_engine.path)
+ if not ctx.image:
+ ctx.image = _get_default_image(ctx)
+ return func(ctx)
+
+ return cast(FuncT, _infer_image)
+
+
+def require_image(func: FuncT) -> FuncT:
+ """
+ Require the global --image flag to be set
+ """
+ @wraps(func)
+ def _require_image(ctx: CephadmContext) -> Any:
+ if not ctx.image:
+ raise Error('This command requires the global --image option to be set')
+ return func(ctx)
+
+ return cast(FuncT, _require_image)
+
+
+def default_image(func: FuncT) -> FuncT:
+ @wraps(func)
+ def _default_image(ctx: CephadmContext) -> Any:
+ update_default_image(ctx)
+ return func(ctx)
+
+ return cast(FuncT, _default_image)
+
+
+def update_default_image(ctx: CephadmContext) -> None:
+ if getattr(ctx, 'image', None):
+ return
+ ctx.image = None # ensure ctx.image exists to avoid repeated `getattr`s
+ name = getattr(ctx, 'name', None)
+ if name:
+ type_ = name.split('.', 1)[0]
+ if type_ in Monitoring.components:
+ ctx.image = Monitoring.components[type_]['image']
+ if type_ == 'haproxy':
+ ctx.image = HAproxy.default_image
+ if type_ == 'keepalived':
+ ctx.image = Keepalived.default_image
+ if type_ == SNMPGateway.daemon_type:
+ ctx.image = SNMPGateway.default_image
+ if type_ == CephNvmeof.daemon_type:
+ ctx.image = CephNvmeof.default_image
+ if type_ in Tracing.components:
+ ctx.image = Tracing.components[type_]['image']
+ if not ctx.image:
+ ctx.image = os.environ.get('CEPHADM_IMAGE')
+ if not ctx.image:
+ ctx.image = _get_default_image(ctx)
+
+
+def executes_early(func: FuncT) -> FuncT:
+ """Decorator that indicates the command function is meant to have no
+ dependencies and no environmental requirements and can therefore be
+ executed as non-root and with no logging, etc. Commands that have this
+ decorator applied must be simple and self-contained.
+ """
+ cast(Any, func)._execute_early = True
+ return func
+
+
+def deprecated_command(func: FuncT) -> FuncT:
+ @wraps(func)
+ def _deprecated_command(ctx: CephadmContext) -> Any:
+ logger.warning(f'Deprecated command used: {func}')
+ if NO_DEPRECATED:
+ raise Error('running deprecated commands disabled')
+ return func(ctx)
+
+ return cast(FuncT, _deprecated_command)
+
+
+def get_container_info(ctx: CephadmContext, daemon_filter: str, by_name: bool) -> Optional[ContainerInfo]:
+ """
+ :param ctx: Cephadm context
+ :param daemon_filter: daemon name or type
+ :param by_name: must be set to True if daemon name is provided
+ :return: Container information or None
+ """
+ def daemon_name_or_type(daemon: Dict[str, str]) -> str:
+ return daemon['name'] if by_name else daemon['name'].split('.', 1)[0]
+
+ if by_name and '.' not in daemon_filter:
+ logger.warning(f'Trying to get container info using invalid daemon name {daemon_filter}')
+ return None
+ daemons = list_daemons(ctx, detail=False)
+ matching_daemons = [d for d in daemons if daemon_name_or_type(d) == daemon_filter and d['fsid'] == ctx.fsid]
+ if matching_daemons:
+ d_type, d_id = matching_daemons[0]['name'].split('.', 1)
+ out, _, code = get_container_stats(ctx, ctx.container_engine.path, ctx.fsid, d_type, d_id)
+ if not code:
+ (container_id, image_name, image_id, start, version) = out.strip().split(',')
+ return ContainerInfo(container_id, image_name, image_id, start, version)
+ return None
+
+
+def infer_local_ceph_image(ctx: CephadmContext, container_path: str) -> Optional[str]:
+ """
+ Infer the local ceph image based on the following priority criteria:
+ 1- the image specified by --image arg (if provided).
+ 2- the same image as the daemon container specified by --name arg (if provided).
+ 3- image used by any ceph container running on the host. In this case we use daemon types.
+ 4- if no container is found then we use the most ceph recent image on the host.
+
+ Note: any selected container must have the same fsid inferred previously.
+
+ :return: The most recent local ceph image (already pulled)
+ """
+ # '|' special character is used to separate the output fields into:
+ # - Repository@digest
+ # - Image Id
+ # - Image Tag
+ # - Image creation date
+ out, _, _ = call_throws(ctx,
+ [container_path, 'images',
+ '--filter', 'label=ceph=True',
+ '--filter', 'dangling=false',
+ '--format', '{{.Repository}}@{{.Digest}}|{{.ID}}|{{.Tag}}|{{.CreatedAt}}'])
+
+ container_info = None
+ daemon_name = ctx.name if ('name' in ctx and ctx.name and '.' in ctx.name) else None
+ daemons_ls = [daemon_name] if daemon_name is not None else Ceph.daemons # daemon types: 'mon', 'mgr', etc
+ for daemon in daemons_ls:
+ container_info = get_container_info(ctx, daemon, daemon_name is not None)
+ if container_info is not None:
+ logger.debug(f"Using container info for daemon '{daemon}'")
+ break
+
+ for image in out.splitlines():
+ if image and not image.isspace():
+ (digest, image_id, tag, created_date) = image.lstrip().split('|')
+ if container_info is not None and image_id not in container_info.image_id:
+ continue
+ if digest and not digest.endswith('@'):
+ logger.info(f"Using ceph image with id '{image_id}' and tag '{tag}' created on {created_date}\n{digest}")
+ return digest
+ return None
+
+
+def write_tmp(s, uid, gid):
+ # type: (str, int, int) -> IO[str]
+ tmp_f = tempfile.NamedTemporaryFile(mode='w',
+ prefix='ceph-tmp')
+ os.fchown(tmp_f.fileno(), uid, gid)
+ tmp_f.write(s)
+ tmp_f.flush()
+
+ return tmp_f
+
+
+def makedirs(dir, uid, gid, mode):
+ # type: (str, int, int, int) -> None
+ if not os.path.exists(dir):
+ os.makedirs(dir, mode=mode)
+ else:
+ os.chmod(dir, mode)
+ os.chown(dir, uid, gid)
+ os.chmod(dir, mode) # the above is masked by umask...
+
+
+def get_data_dir(fsid, data_dir, t, n):
+ # type: (str, str, str, Union[int, str]) -> str
+ return os.path.join(data_dir, fsid, '%s.%s' % (t, n))
+
+
+def get_log_dir(fsid, log_dir):
+ # type: (str, str) -> str
+ return os.path.join(log_dir, fsid)
+
+
+def make_data_dir_base(fsid, data_dir, uid, gid):
+ # type: (str, str, int, int) -> str
+ data_dir_base = os.path.join(data_dir, fsid)
+ makedirs(data_dir_base, uid, gid, DATA_DIR_MODE)
+ makedirs(os.path.join(data_dir_base, 'crash'), uid, gid, DATA_DIR_MODE)
+ makedirs(os.path.join(data_dir_base, 'crash', 'posted'), uid, gid,
+ DATA_DIR_MODE)
+ return data_dir_base
+
+
+def make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=None, gid=None):
+ # type: (CephadmContext, str, str, Union[int, str], Optional[int], Optional[int]) -> str
+ if uid is None or gid is None:
+ uid, gid = extract_uid_gid(ctx)
+ make_data_dir_base(fsid, ctx.data_dir, uid, gid)
+ data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+ makedirs(data_dir, uid, gid, DATA_DIR_MODE)
+ return data_dir
+
+
+def make_log_dir(ctx, fsid, uid=None, gid=None):
+ # type: (CephadmContext, str, Optional[int], Optional[int]) -> str
+ if uid is None or gid is None:
+ uid, gid = extract_uid_gid(ctx)
+ log_dir = get_log_dir(fsid, ctx.log_dir)
+ makedirs(log_dir, uid, gid, LOG_DIR_MODE)
+ return log_dir
+
+
+def make_var_run(ctx, fsid, uid, gid):
+ # type: (CephadmContext, str, int, int) -> None
+ call_throws(ctx, ['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid),
+ '/var/run/ceph/%s' % fsid])
+
+
+def copy_tree(ctx, src, dst, uid=None, gid=None):
+ # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
+ """
+ Copy a directory tree from src to dst
+ """
+ if uid is None or gid is None:
+ (uid, gid) = extract_uid_gid(ctx)
+
+ for src_dir in src:
+ dst_dir = dst
+ if os.path.isdir(dst):
+ dst_dir = os.path.join(dst, os.path.basename(src_dir))
+
+ logger.debug('copy directory `%s` -> `%s`' % (src_dir, dst_dir))
+ shutil.rmtree(dst_dir, ignore_errors=True)
+ shutil.copytree(src_dir, dst_dir) # dirs_exist_ok needs python 3.8
+
+ for dirpath, dirnames, filenames in os.walk(dst_dir):
+ logger.debug('chown %s:%s `%s`' % (uid, gid, dirpath))
+ os.chown(dirpath, uid, gid)
+ for filename in filenames:
+ logger.debug('chown %s:%s `%s`' % (uid, gid, filename))
+ os.chown(os.path.join(dirpath, filename), uid, gid)
+
+
+def copy_files(ctx, src, dst, uid=None, gid=None):
+ # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
+ """
+ Copy a files from src to dst
+ """
+ if uid is None or gid is None:
+ (uid, gid) = extract_uid_gid(ctx)
+
+ for src_file in src:
+ dst_file = dst
+ if os.path.isdir(dst):
+ dst_file = os.path.join(dst, os.path.basename(src_file))
+
+ logger.debug('copy file `%s` -> `%s`' % (src_file, dst_file))
+ shutil.copyfile(src_file, dst_file)
+
+ logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file))
+ os.chown(dst_file, uid, gid)
+
+
+def move_files(ctx, src, dst, uid=None, gid=None):
+ # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
+ """
+ Move files from src to dst
+ """
+ if uid is None or gid is None:
+ (uid, gid) = extract_uid_gid(ctx)
+
+ for src_file in src:
+ dst_file = dst
+ if os.path.isdir(dst):
+ dst_file = os.path.join(dst, os.path.basename(src_file))
+
+ if os.path.islink(src_file):
+ # shutil.move() in py2 does not handle symlinks correctly
+ src_rl = os.readlink(src_file)
+ logger.debug("symlink '%s' -> '%s'" % (dst_file, src_rl))
+ os.symlink(src_rl, dst_file)
+ os.unlink(src_file)
+ else:
+ logger.debug("move file '%s' -> '%s'" % (src_file, dst_file))
+ shutil.move(src_file, dst_file)
+ logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file))
+ os.chown(dst_file, uid, gid)
+
+
+def recursive_chown(path: str, uid: int, gid: int) -> None:
+ for dirpath, dirnames, filenames in os.walk(path):
+ os.chown(dirpath, uid, gid)
+ for filename in filenames:
+ os.chown(os.path.join(dirpath, filename), uid, gid)
+
+
+# copied from distutils
+def find_executable(executable: str, path: Optional[str] = None) -> Optional[str]:
+ """Tries to find 'executable' in the directories listed in 'path'.
+ A string listing directories separated by 'os.pathsep'; defaults to
+ os.environ['PATH']. Returns the complete filename or None if not found.
+ """
+ _, ext = os.path.splitext(executable)
+ if (sys.platform == 'win32') and (ext != '.exe'):
+ executable = executable + '.exe' # pragma: no cover
+
+ if os.path.isfile(executable):
+ return executable
+
+ if path is None:
+ path = os.environ.get('PATH', None)
+ if path is None:
+ try:
+ path = os.confstr('CS_PATH')
+ except (AttributeError, ValueError):
+ # os.confstr() or CS_PATH is not available
+ path = os.defpath
+ # bpo-35755: Don't use os.defpath if the PATH environment variable is
+ # set to an empty string
+
+ # PATH='' doesn't match, whereas PATH=':' looks in the current directory
+ if not path:
+ return None
+
+ paths = path.split(os.pathsep)
+ for p in paths:
+ f = os.path.join(p, executable)
+ if os.path.isfile(f):
+ # the file exists, we have a shot at spawn working
+ return f
+ return None
+
+
+def find_program(filename):
+ # type: (str) -> str
+ name = find_executable(filename)
+ if name is None:
+ raise ValueError('%s not found' % filename)
+ return name
+
+
+def find_container_engine(ctx: CephadmContext) -> Optional[ContainerEngine]:
+ if ctx.docker:
+ return Docker()
+ else:
+ for i in CONTAINER_PREFERENCE:
+ try:
+ return i()
+ except Exception:
+ pass
+ return None
+
+
+def check_container_engine(ctx: CephadmContext) -> ContainerEngine:
+ engine = ctx.container_engine
+ if not isinstance(engine, CONTAINER_PREFERENCE):
+ # See https://github.com/python/mypy/issues/8993
+ exes: List[str] = [i.EXE for i in CONTAINER_PREFERENCE] # type: ignore
+ raise Error('No container engine binary found ({}). Try run `apt/dnf/yum/zypper install <container engine>`'.format(' or '.join(exes)))
+ elif isinstance(engine, Podman):
+ engine.get_version(ctx)
+ if engine.version < MIN_PODMAN_VERSION:
+ raise Error('podman version %d.%d.%d or later is required' % MIN_PODMAN_VERSION)
+ return engine
+
+
+def get_unit_name(fsid, daemon_type, daemon_id=None):
+ # type: (str, str, Optional[Union[int, str]]) -> str
+ # accept either name or type + id
+ if daemon_id is not None:
+ return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id)
+ else:
+ return 'ceph-%s@%s' % (fsid, daemon_type)
+
+
+def get_unit_name_by_daemon_name(ctx: CephadmContext, fsid: str, name: str) -> str:
+ daemon = get_daemon_description(ctx, fsid, name)
+ try:
+ return daemon['systemd_unit']
+ except KeyError:
+ raise Error('Failed to get unit name for {}'.format(daemon))
+
+
+def check_unit(ctx, unit_name):
+ # type: (CephadmContext, str) -> Tuple[bool, str, bool]
+ # NOTE: we ignore the exit code here because systemctl outputs
+ # various exit codes based on the state of the service, but the
+ # string result is more explicit (and sufficient).
+ enabled = False
+ installed = False
+ try:
+ out, err, code = call(ctx, ['systemctl', 'is-enabled', unit_name],
+ verbosity=CallVerbosity.QUIET)
+ if code == 0:
+ enabled = True
+ installed = True
+ elif 'disabled' in out:
+ installed = True
+ except Exception as e:
+ logger.warning('unable to run systemctl: %s' % e)
+ enabled = False
+ installed = False
+
+ state = 'unknown'
+ try:
+ out, err, code = call(ctx, ['systemctl', 'is-active', unit_name],
+ verbosity=CallVerbosity.QUIET)
+ out = out.strip()
+ if out in ['active']:
+ state = 'running'
+ elif out in ['inactive']:
+ state = 'stopped'
+ elif out in ['failed', 'auto-restart']:
+ state = 'error'
+ else:
+ state = 'unknown'
+ except Exception as e:
+ logger.warning('unable to run systemctl: %s' % e)
+ state = 'unknown'
+ return (enabled, state, installed)
+
+
+def check_units(ctx, units, enabler=None):
+ # type: (CephadmContext, List[str], Optional[Packager]) -> bool
+ for u in units:
+ (enabled, state, installed) = check_unit(ctx, u)
+ if enabled and state == 'running':
+ logger.info('Unit %s is enabled and running' % u)
+ return True
+ if enabler is not None:
+ if installed:
+ logger.info('Enabling unit %s' % u)
+ enabler.enable_service(u)
+ return False
+
+
+def is_container_running(ctx: CephadmContext, c: 'CephContainer') -> bool:
+ if ctx.name.split('.', 1)[0] in ['agent', 'cephadm-exporter']:
+ # these are non-containerized daemon types
+ return False
+ return bool(get_running_container_name(ctx, c))
+
+
+def get_running_container_name(ctx: CephadmContext, c: 'CephContainer') -> Optional[str]:
+ for name in [c.cname, c.old_cname]:
+ out, err, ret = call(ctx, [
+ ctx.container_engine.path, 'container', 'inspect',
+ '--format', '{{.State.Status}}', name
+ ])
+ if out.strip() == 'running':
+ return name
+ return None
+
+
+def get_legacy_config_fsid(cluster, legacy_dir=None):
+ # type: (str, Optional[str]) -> Optional[str]
+ config_file = '/etc/ceph/%s.conf' % cluster
+ if legacy_dir is not None:
+ config_file = os.path.abspath(legacy_dir + config_file)
+
+ if os.path.exists(config_file):
+ config = read_config(config_file)
+ if config.has_section('global') and config.has_option('global', 'fsid'):
+ return config.get('global', 'fsid')
+ return None
+
+
+def get_legacy_daemon_fsid(ctx, cluster,
+ daemon_type, daemon_id, legacy_dir=None):
+ # type: (CephadmContext, str, str, Union[int, str], Optional[str]) -> Optional[str]
+ fsid = None
+ if daemon_type == 'osd':
+ try:
+ fsid_file = os.path.join(ctx.data_dir,
+ daemon_type,
+ 'ceph-%s' % daemon_id,
+ 'ceph_fsid')
+ if legacy_dir is not None:
+ fsid_file = os.path.abspath(legacy_dir + fsid_file)
+ with open(fsid_file, 'r') as f:
+ fsid = f.read().strip()
+ except IOError:
+ pass
+ if not fsid:
+ fsid = get_legacy_config_fsid(cluster, legacy_dir=legacy_dir)
+ return fsid
+
+
+def should_log_to_journald(ctx: CephadmContext) -> bool:
+ if ctx.log_to_journald is not None:
+ return ctx.log_to_journald
+ return isinstance(ctx.container_engine, Podman) and \
+ ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION
+
+
+def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
+ # type: (CephadmContext, str, str, Union[int, str]) -> List[str]
+ r = list() # type: List[str]
+
+ if daemon_type in Ceph.daemons and daemon_type not in ['crash', 'ceph-exporter']:
+ r += [
+ '--setuser', 'ceph',
+ '--setgroup', 'ceph',
+ '--default-log-to-file=false',
+ ]
+ log_to_journald = should_log_to_journald(ctx)
+ if log_to_journald:
+ r += [
+ '--default-log-to-journald=true',
+ '--default-log-to-stderr=false',
+ ]
+ else:
+ r += [
+ '--default-log-to-stderr=true',
+ '--default-log-stderr-prefix=debug ',
+ ]
+ if daemon_type == 'mon':
+ r += [
+ '--default-mon-cluster-log-to-file=false',
+ ]
+ if log_to_journald:
+ r += [
+ '--default-mon-cluster-log-to-journald=true',
+ '--default-mon-cluster-log-to-stderr=false',
+ ]
+ else:
+ r += ['--default-mon-cluster-log-to-stderr=true']
+ elif daemon_type in Monitoring.components:
+ metadata = Monitoring.components[daemon_type]
+ r += metadata.get('args', list())
+ # set ip and port to bind to for nodeexporter,alertmanager,prometheus
+ if daemon_type not in ['grafana', 'loki', 'promtail']:
+ ip = ''
+ port = Monitoring.port_map[daemon_type][0]
+ meta = fetch_meta(ctx)
+ if meta:
+ if 'ip' in meta and meta['ip']:
+ ip = meta['ip']
+ if 'ports' in meta and meta['ports']:
+ port = meta['ports'][0]
+ r += [f'--web.listen-address={ip}:{port}']
+ if daemon_type == 'prometheus':
+ config = fetch_configs(ctx)
+ retention_time = config.get('retention_time', '15d')
+ retention_size = config.get('retention_size', '0') # default to disabled
+ r += [f'--storage.tsdb.retention.time={retention_time}']
+ r += [f'--storage.tsdb.retention.size={retention_size}']
+ scheme = 'http'
+ host = get_fqdn()
+ # in case host is not an fqdn then we use the IP to
+ # avoid producing a broken web.external-url link
+ if '.' not in host:
+ ipv4_addrs, ipv6_addrs = get_ip_addresses(get_hostname())
+ # use the first ipv4 (if any) otherwise use the first ipv6
+ addr = next(iter(ipv4_addrs or ipv6_addrs), None)
+ host = wrap_ipv6(addr) if addr else host
+ r += [f'--web.external-url={scheme}://{host}:{port}']
+ if daemon_type == 'alertmanager':
+ config = fetch_configs(ctx)
+ peers = config.get('peers', list()) # type: ignore
+ for peer in peers:
+ r += ['--cluster.peer={}'.format(peer)]
+ try:
+ r += [f'--web.config.file={config["web_config"]}']
+ except KeyError:
+ pass
+ # some alertmanager, by default, look elsewhere for a config
+ r += ['--config.file=/etc/alertmanager/alertmanager.yml']
+ if daemon_type == 'promtail':
+ r += ['--config.expand-env']
+ if daemon_type == 'prometheus':
+ config = fetch_configs(ctx)
+ try:
+ r += [f'--web.config.file={config["web_config"]}']
+ except KeyError:
+ pass
+ if daemon_type == 'node-exporter':
+ config = fetch_configs(ctx)
+ try:
+ r += [f'--web.config.file={config["web_config"]}']
+ except KeyError:
+ pass
+ r += ['--path.procfs=/host/proc',
+ '--path.sysfs=/host/sys',
+ '--path.rootfs=/rootfs']
+ elif daemon_type == 'jaeger-agent':
+ r.extend(Tracing.components[daemon_type]['daemon_args'])
+ elif daemon_type == NFSGanesha.daemon_type:
+ nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
+ r += nfs_ganesha.get_daemon_args()
+ elif daemon_type == CephExporter.daemon_type:
+ ceph_exporter = CephExporter.init(ctx, fsid, daemon_id)
+ r.extend(ceph_exporter.get_daemon_args())
+ elif daemon_type == HAproxy.daemon_type:
+ haproxy = HAproxy.init(ctx, fsid, daemon_id)
+ r += haproxy.get_daemon_args()
+ elif daemon_type == CustomContainer.daemon_type:
+ cc = CustomContainer.init(ctx, fsid, daemon_id)
+ r.extend(cc.get_daemon_args())
+ elif daemon_type == SNMPGateway.daemon_type:
+ sc = SNMPGateway.init(ctx, fsid, daemon_id)
+ r.extend(sc.get_daemon_args())
+
+ return r
+
+
+def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid,
+ config=None, keyring=None):
+ # type: (CephadmContext, str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None
+ data_dir = make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=uid, gid=gid)
+
+ if daemon_type in Ceph.daemons:
+ make_log_dir(ctx, fsid, uid=uid, gid=gid)
+
+ if config:
+ config_path = os.path.join(data_dir, 'config')
+ with write_new(config_path, owner=(uid, gid)) as f:
+ f.write(config)
+
+ if keyring:
+ keyring_path = os.path.join(data_dir, 'keyring')
+ with write_new(keyring_path, owner=(uid, gid)) as f:
+ f.write(keyring)
+
+ if daemon_type in Monitoring.components.keys():
+ config_json = fetch_configs(ctx)
+
+ # Set up directories specific to the monitoring component
+ config_dir = ''
+ data_dir_root = ''
+ if daemon_type == 'prometheus':
+ data_dir_root = get_data_dir(fsid, ctx.data_dir,
+ daemon_type, daemon_id)
+ config_dir = 'etc/prometheus'
+ makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
+ makedirs(os.path.join(data_dir_root, config_dir, 'alerting'), uid, gid, 0o755)
+ makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
+ recursive_chown(os.path.join(data_dir_root, 'etc'), uid, gid)
+ recursive_chown(os.path.join(data_dir_root, 'data'), uid, gid)
+ elif daemon_type == 'grafana':
+ data_dir_root = get_data_dir(fsid, ctx.data_dir,
+ daemon_type, daemon_id)
+ config_dir = 'etc/grafana'
+ makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
+ makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755)
+ makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755)
+ makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
+ touch(os.path.join(data_dir_root, 'data', 'grafana.db'), uid, gid)
+ elif daemon_type == 'alertmanager':
+ data_dir_root = get_data_dir(fsid, ctx.data_dir,
+ daemon_type, daemon_id)
+ config_dir = 'etc/alertmanager'
+ makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
+ makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755)
+ elif daemon_type == 'promtail':
+ data_dir_root = get_data_dir(fsid, ctx.data_dir,
+ daemon_type, daemon_id)
+ config_dir = 'etc/promtail'
+ makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
+ makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
+ elif daemon_type == 'loki':
+ data_dir_root = get_data_dir(fsid, ctx.data_dir,
+ daemon_type, daemon_id)
+ config_dir = 'etc/loki'
+ makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
+ makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
+ elif daemon_type == 'node-exporter':
+ data_dir_root = get_data_dir(fsid, ctx.data_dir,
+ daemon_type, daemon_id)
+ config_dir = 'etc/node-exporter'
+ makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
+ recursive_chown(os.path.join(data_dir_root, 'etc'), uid, gid)
+
+ # populate the config directory for the component from the config-json
+ if 'files' in config_json:
+ for fname in config_json['files']:
+ # work around mypy wierdness where it thinks `str`s aren't Anys
+ # when used for dictionary values! feels like possibly a mypy bug?!
+ cfg = cast(Dict[str, Any], config_json['files'])
+ content = dict_get_join(cfg, fname)
+ if os.path.isabs(fname):
+ fpath = os.path.join(data_dir_root, fname.lstrip(os.path.sep))
+ else:
+ fpath = os.path.join(data_dir_root, config_dir, fname)
+ with write_new(fpath, owner=(uid, gid), encoding='utf-8') as f:
+ f.write(content)
+
+ elif daemon_type == NFSGanesha.daemon_type:
+ nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
+ nfs_ganesha.create_daemon_dirs(data_dir, uid, gid)
+
+ elif daemon_type == CephIscsi.daemon_type:
+ ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
+ ceph_iscsi.create_daemon_dirs(data_dir, uid, gid)
+
+ elif daemon_type == CephNvmeof.daemon_type:
+ ceph_nvmeof = CephNvmeof.init(ctx, fsid, daemon_id)
+ ceph_nvmeof.create_daemon_dirs(data_dir, uid, gid)
+
+ elif daemon_type == HAproxy.daemon_type:
+ haproxy = HAproxy.init(ctx, fsid, daemon_id)
+ haproxy.create_daemon_dirs(data_dir, uid, gid)
+
+ elif daemon_type == Keepalived.daemon_type:
+ keepalived = Keepalived.init(ctx, fsid, daemon_id)
+ keepalived.create_daemon_dirs(data_dir, uid, gid)
+
+ elif daemon_type == CustomContainer.daemon_type:
+ cc = CustomContainer.init(ctx, fsid, daemon_id)
+ cc.create_daemon_dirs(data_dir, uid, gid)
+
+ elif daemon_type == SNMPGateway.daemon_type:
+ sg = SNMPGateway.init(ctx, fsid, daemon_id)
+ sg.create_daemon_conf()
+
+ _write_custom_conf_files(ctx, daemon_type, str(daemon_id), fsid, uid, gid)
+
+
+def _write_custom_conf_files(ctx: CephadmContext, daemon_type: str, daemon_id: str, fsid: str, uid: int, gid: int) -> None:
+ # mostly making this its own function to make unit testing easier
+ ccfiles = fetch_custom_config_files(ctx)
+ if not ccfiles:
+ return
+ custom_config_dir = os.path.join(ctx.data_dir, fsid, 'custom_config_files', f'{daemon_type}.{daemon_id}')
+ if not os.path.exists(custom_config_dir):
+ makedirs(custom_config_dir, uid, gid, 0o755)
+ mandatory_keys = ['mount_path', 'content']
+ for ccf in ccfiles:
+ if all(k in ccf for k in mandatory_keys):
+ file_path = os.path.join(custom_config_dir, os.path.basename(ccf['mount_path']))
+ with write_new(file_path, owner=(uid, gid), encoding='utf-8') as f:
+ f.write(ccf['content'])
+ # temporary workaround to make custom config files work for tcmu-runner
+ # container we deploy with iscsi until iscsi is refactored
+ if daemon_type == 'iscsi':
+ tcmu_config_dir = custom_config_dir + '.tcmu'
+ if not os.path.exists(tcmu_config_dir):
+ makedirs(tcmu_config_dir, uid, gid, 0o755)
+ tcmu_file_path = os.path.join(tcmu_config_dir, os.path.basename(ccf['mount_path']))
+ with write_new(tcmu_file_path, owner=(uid, gid), encoding='utf-8') as f:
+ f.write(ccf['content'])
+
+
+def get_parm(option: str) -> Dict[str, str]:
+ js = _get_config_json(option)
+ # custom_config_files is a special field that may be in the config
+ # dict. It is used for mounting custom config files into daemon's containers
+ # and should be accessed through the "fetch_custom_config_files" function.
+ # For get_parm we need to discard it.
+ js.pop('custom_config_files', None)
+ return js
+
+
+def _get_config_json(option: str) -> Dict[str, Any]:
+ if not option:
+ return dict()
+
+ global cached_stdin
+ if option == '-':
+ if cached_stdin is not None:
+ j = cached_stdin
+ else:
+ j = sys.stdin.read()
+ cached_stdin = j
+ else:
+ # inline json string
+ if option[0] == '{' and option[-1] == '}':
+ j = option
+ # json file
+ elif os.path.exists(option):
+ with open(option, 'r') as f:
+ j = f.read()
+ else:
+ raise Error('Config file {} not found'.format(option))
+
+ try:
+ js = json.loads(j)
+ except ValueError as e:
+ raise Error('Invalid JSON in {}: {}'.format(option, e))
+ else:
+ return js
+
+
+def fetch_meta(ctx: CephadmContext) -> Dict[str, Any]:
+ """Return a dict containing metadata about a deployment.
+ """
+ meta = getattr(ctx, 'meta_properties', None)
+ if meta is not None:
+ return meta
+ mjson = getattr(ctx, 'meta_json', None)
+ if mjson is not None:
+ meta = json.loads(mjson) or {}
+ ctx.meta_properties = meta
+ return meta
+ return {}
+
+
+def fetch_configs(ctx: CephadmContext) -> Dict[str, str]:
+ """Return a dict containing arbitrary configuration parameters.
+ This function filters out the key 'custom_config_files' which
+ must not be part of a deployment's configuration key-value pairs.
+ To access custom configuration file data, use `fetch_custom_config_files`.
+ """
+ # ctx.config_blobs is *always* a dict. it is created once when
+ # a command is parsed/processed and stored "forever"
+ cfg_blobs = getattr(ctx, 'config_blobs', None)
+ if cfg_blobs:
+ cfg_blobs = dict(cfg_blobs)
+ cfg_blobs.pop('custom_config_files', None)
+ return cfg_blobs
+ # ctx.config_json is the legacy equivalent of config_blobs. it is a
+ # string that either contains json or refers to a file name where
+ # the file contains json.
+ cfg_json = getattr(ctx, 'config_json', None)
+ if cfg_json:
+ jdata = _get_config_json(cfg_json) or {}
+ jdata.pop('custom_config_files', None)
+ return jdata
+ return {}
+
+
+def fetch_custom_config_files(ctx: CephadmContext) -> List[Dict[str, Any]]:
+ """Return a list containing dicts that can be used to populate
+ custom configuration files for containers.
+ """
+ # NOTE: this function works like the opposite of fetch_configs.
+ # instead of filtering out custom_config_files, it returns only
+ # the content in that key.
+ cfg_blobs = getattr(ctx, 'config_blobs', None)
+ if cfg_blobs:
+ return cfg_blobs.get('custom_config_files', [])
+ cfg_json = getattr(ctx, 'config_json', None)
+ if cfg_json:
+ jdata = _get_config_json(cfg_json)
+ return jdata.get('custom_config_files', [])
+ return []
+
+
+def fetch_tcp_ports(ctx: CephadmContext) -> List[EndPoint]:
+ """Return a list of Endpoints, which have a port and ip attribute
+ """
+ ports = getattr(ctx, 'tcp_ports', None)
+ if ports is None:
+ ports = []
+ if isinstance(ports, str):
+ ports = list(map(int, ports.split()))
+ port_ips: Dict[str, str] = {}
+ port_ips_attr: Union[str, Dict[str, str], None] = getattr(ctx, 'port_ips', None)
+ if isinstance(port_ips_attr, str):
+ port_ips = json.loads(port_ips_attr)
+ elif port_ips_attr is not None:
+ # if it's not None or a str, assume it's already the dict we want
+ port_ips = port_ips_attr
+
+ endpoints: List[EndPoint] = []
+ for port in ports:
+ if str(port) in port_ips:
+ endpoints.append(EndPoint(port_ips[str(port)], port))
+ else:
+ endpoints.append(EndPoint('0.0.0.0', port))
+
+ return endpoints
+
+
+def get_config_and_keyring(ctx):
+ # type: (CephadmContext) -> Tuple[Optional[str], Optional[str]]
+ config = None
+ keyring = None
+
+ d = fetch_configs(ctx)
+ if d:
+ config = d.get('config')
+ keyring = d.get('keyring')
+ if config and keyring:
+ return config, keyring
+
+ if 'config' in ctx and ctx.config:
+ try:
+ with open(ctx.config, 'r') as f:
+ config = f.read()
+ except FileNotFoundError as e:
+ raise Error(e)
+
+ if 'key' in ctx and ctx.key:
+ keyring = '[%s]\n\tkey = %s\n' % (ctx.name, ctx.key)
+ elif 'keyring' in ctx and ctx.keyring:
+ try:
+ with open(ctx.keyring, 'r') as f:
+ keyring = f.read()
+ except FileNotFoundError as e:
+ raise Error(e)
+
+ return config, keyring
+
+
+def get_container_binds(ctx, fsid, daemon_type, daemon_id):
+ # type: (CephadmContext, str, str, Union[int, str, None]) -> List[List[str]]
+ binds = list()
+
+ if daemon_type == CephIscsi.daemon_type:
+ binds.extend(CephIscsi.get_container_binds())
+ if daemon_type == CephNvmeof.daemon_type:
+ binds.extend(CephNvmeof.get_container_binds())
+ elif daemon_type == CustomContainer.daemon_type:
+ assert daemon_id
+ cc = CustomContainer.init(ctx, fsid, daemon_id)
+ data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+ binds.extend(cc.get_container_binds(data_dir))
+
+ return binds
+
+
+def get_container_mounts(ctx, fsid, daemon_type, daemon_id,
+ no_config=False):
+ # type: (CephadmContext, str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
+ mounts = dict()
+
+ if daemon_type in Ceph.daemons:
+ if fsid:
+ run_path = os.path.join('/var/run/ceph', fsid)
+ if os.path.exists(run_path):
+ mounts[run_path] = '/var/run/ceph:z'
+ log_dir = get_log_dir(fsid, ctx.log_dir)
+ mounts[log_dir] = '/var/log/ceph:z'
+ crash_dir = '/var/lib/ceph/%s/crash' % fsid
+ if os.path.exists(crash_dir):
+ mounts[crash_dir] = '/var/lib/ceph/crash:z'
+ if daemon_type != 'crash' and should_log_to_journald(ctx):
+ journald_sock_dir = '/run/systemd/journal'
+ mounts[journald_sock_dir] = journald_sock_dir
+
+ if daemon_type in Ceph.daemons and daemon_id:
+ data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+ if daemon_type == 'rgw':
+ cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id)
+ else:
+ cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id)
+ if daemon_type != 'crash':
+ mounts[data_dir] = cdata_dir + ':z'
+ if not no_config:
+ mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
+ if daemon_type in ['rbd-mirror', 'cephfs-mirror', 'crash', 'ceph-exporter']:
+ # these do not search for their keyrings in a data directory
+ mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id)
+
+ if daemon_type in ['mon', 'osd', 'clusterless-ceph-volume']:
+ mounts['/dev'] = '/dev' # FIXME: narrow this down?
+ mounts['/run/udev'] = '/run/udev'
+ if daemon_type in ['osd', 'clusterless-ceph-volume']:
+ mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
+ mounts['/run/lvm'] = '/run/lvm'
+ mounts['/run/lock/lvm'] = '/run/lock/lvm'
+ if daemon_type == 'osd':
+ # selinux-policy in the container may not match the host.
+ if HostFacts(ctx).selinux_enabled:
+ cluster_dir = f'{ctx.data_dir}/{fsid}'
+ selinux_folder = f'{cluster_dir}/selinux'
+ if os.path.exists(cluster_dir):
+ if not os.path.exists(selinux_folder):
+ os.makedirs(selinux_folder, mode=0o755)
+ mounts[selinux_folder] = '/sys/fs/selinux:ro'
+ else:
+ logger.error(f'Cluster direcotry {cluster_dir} does not exist.')
+ mounts['/'] = '/rootfs'
+
+ try:
+ if ctx.shared_ceph_folder: # make easy manager modules/ceph-volume development
+ ceph_folder = pathify(ctx.shared_ceph_folder)
+ if os.path.exists(ceph_folder):
+ mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
+ mounts[ceph_folder + '/src/cephadm/cephadm.py'] = '/usr/sbin/cephadm'
+ mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
+ mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
+ mounts[ceph_folder + '/monitoring/ceph-mixin/dashboards_out'] = '/etc/grafana/dashboards/ceph-dashboard'
+ mounts[ceph_folder + '/monitoring/ceph-mixin/prometheus_alerts.yml'] = '/etc/prometheus/ceph/ceph_default_alerts.yml'
+ else:
+ logger.error('{}{}{}'.format(termcolor.red,
+ 'Ceph shared source folder does not exist.',
+ termcolor.end))
+ except AttributeError:
+ pass
+
+ if daemon_type in Monitoring.components and daemon_id:
+ data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+ log_dir = get_log_dir(fsid, ctx.log_dir)
+ if daemon_type == 'prometheus':
+ mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z'
+ mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z'
+ elif daemon_type == 'loki':
+ mounts[os.path.join(data_dir, 'etc/loki')] = '/etc/loki:Z'
+ mounts[os.path.join(data_dir, 'data')] = '/loki:Z'
+ elif daemon_type == 'promtail':
+ mounts[os.path.join(data_dir, 'etc/promtail')] = '/etc/promtail:Z'
+ mounts[log_dir] = '/var/log/ceph:z'
+ mounts[os.path.join(data_dir, 'data')] = '/promtail:Z'
+ elif daemon_type == 'node-exporter':
+ mounts[os.path.join(data_dir, 'etc/node-exporter')] = '/etc/node-exporter:Z'
+ mounts['/proc'] = '/host/proc:ro'
+ mounts['/sys'] = '/host/sys:ro'
+ mounts['/'] = '/rootfs:ro'
+ elif daemon_type == 'grafana':
+ mounts[os.path.join(data_dir, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
+ mounts[os.path.join(data_dir, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
+ mounts[os.path.join(data_dir, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
+ mounts[os.path.join(data_dir, 'data/grafana.db')] = '/var/lib/grafana/grafana.db:Z'
+ elif daemon_type == 'alertmanager':
+ mounts[os.path.join(data_dir, 'etc/alertmanager')] = '/etc/alertmanager:Z'
+
+ if daemon_type == NFSGanesha.daemon_type:
+ assert daemon_id
+ data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+ nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
+ mounts.update(nfs_ganesha.get_container_mounts(data_dir))
+
+ if daemon_type == HAproxy.daemon_type:
+ assert daemon_id
+ data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+ mounts.update(HAproxy.get_container_mounts(data_dir))
+
+ if daemon_type == CephNvmeof.daemon_type:
+ assert daemon_id
+ data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+ mounts.update(CephNvmeof.get_container_mounts(data_dir))
+
+ if daemon_type == CephIscsi.daemon_type:
+ assert daemon_id
+ data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+ # Removes ending ".tcmu" from data_dir a tcmu-runner uses the same data_dir
+ # as rbd-runner-api
+ if data_dir.endswith('.tcmu'):
+ data_dir = re.sub(r'\.tcmu$', '', data_dir)
+ log_dir = get_log_dir(fsid, ctx.log_dir)
+ mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir))
+
+ if daemon_type == Keepalived.daemon_type:
+ assert daemon_id
+ data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+ mounts.update(Keepalived.get_container_mounts(data_dir))
+
+ if daemon_type == CustomContainer.daemon_type:
+ assert daemon_id
+ cc = CustomContainer.init(ctx, fsid, daemon_id)
+ data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+ mounts.update(cc.get_container_mounts(data_dir))
+
+ # Modifications podman makes to /etc/hosts causes issues with
+ # certain daemons (specifically referencing "host.containers.internal" entry
+ # being added to /etc/hosts in this case). To avoid that, but still
+ # allow users to use /etc/hosts for hostname resolution, we can
+ # mount the host's /etc/hosts file.
+ # https://tracker.ceph.com/issues/58532
+ # https://tracker.ceph.com/issues/57018
+ if isinstance(ctx.container_engine, Podman):
+ if os.path.exists('/etc/hosts'):
+ if '/etc/hosts' not in mounts:
+ mounts['/etc/hosts'] = '/etc/hosts:ro'
+
+ return mounts
+
+
+def get_ceph_volume_container(ctx: CephadmContext,
+ privileged: bool = True,
+ cname: str = '',
+ volume_mounts: Dict[str, str] = {},
+ bind_mounts: Optional[List[List[str]]] = None,
+ args: List[str] = [],
+ envs: Optional[List[str]] = None) -> 'CephContainer':
+ if envs is None:
+ envs = []
+ envs.append('CEPH_VOLUME_SKIP_RESTORECON=yes')
+ envs.append('CEPH_VOLUME_DEBUG=1')
+
+ return CephContainer(
+ ctx,
+ image=ctx.image,
+ entrypoint='/usr/sbin/ceph-volume',
+ args=args,
+ volume_mounts=volume_mounts,
+ bind_mounts=bind_mounts,
+ envs=envs,
+ privileged=privileged,
+ cname=cname,
+ memory_request=ctx.memory_request,
+ memory_limit=ctx.memory_limit,
+ )
+
+
+def set_pids_limit_unlimited(ctx: CephadmContext, container_args: List[str]) -> None:
+ # set container's pids-limit to unlimited rather than default (Docker 4096 / Podman 2048)
+ # Useful for daemons like iscsi where the default pids-limit limits the number of luns
+ # per iscsi target or rgw where increasing the rgw_thread_pool_size to a value near
+ # the default pids-limit may cause the container to crash.
+ if (
+ isinstance(ctx.container_engine, Podman)
+ and ctx.container_engine.version >= PIDS_LIMIT_UNLIMITED_PODMAN_VERSION
+ ):
+ container_args.append('--pids-limit=-1')
+ else:
+ container_args.append('--pids-limit=0')
+
+
+def get_container(ctx: CephadmContext,
+ fsid: str, daemon_type: str, daemon_id: Union[int, str],
+ privileged: bool = False,
+ ptrace: bool = False,
+ container_args: Optional[List[str]] = None) -> 'CephContainer':
+ entrypoint: str = ''
+ name: str = ''
+ ceph_args: List[str] = []
+ envs: List[str] = []
+ host_network: bool = True
+
+ if daemon_type in Ceph.daemons:
+ envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728')
+ if container_args is None:
+ container_args = []
+ if daemon_type in Ceph.daemons or daemon_type in Ceph.gateways:
+ set_pids_limit_unlimited(ctx, container_args)
+ if daemon_type in ['mon', 'osd']:
+ # mon and osd need privileged in order for libudev to query devices
+ privileged = True
+ if daemon_type == 'rgw':
+ entrypoint = '/usr/bin/radosgw'
+ name = 'client.rgw.%s' % daemon_id
+ elif daemon_type == 'rbd-mirror':
+ entrypoint = '/usr/bin/rbd-mirror'
+ name = 'client.rbd-mirror.%s' % daemon_id
+ elif daemon_type == 'cephfs-mirror':
+ entrypoint = '/usr/bin/cephfs-mirror'
+ name = 'client.cephfs-mirror.%s' % daemon_id
+ elif daemon_type == 'crash':
+ entrypoint = '/usr/bin/ceph-crash'
+ name = 'client.crash.%s' % daemon_id
+ elif daemon_type in ['mon', 'mgr', 'mds', 'osd']:
+ entrypoint = '/usr/bin/ceph-' + daemon_type
+ name = '%s.%s' % (daemon_type, daemon_id)
+ elif daemon_type in Monitoring.components:
+ entrypoint = ''
+ elif daemon_type in Tracing.components:
+ entrypoint = ''
+ name = '%s.%s' % (daemon_type, daemon_id)
+ config = fetch_configs(ctx)
+ Tracing.set_configuration(config, daemon_type)
+ envs.extend(Tracing.components[daemon_type].get('envs', []))
+ elif daemon_type == NFSGanesha.daemon_type:
+ entrypoint = NFSGanesha.entrypoint
+ name = '%s.%s' % (daemon_type, daemon_id)
+ envs.extend(NFSGanesha.get_container_envs())
+ elif daemon_type == CephExporter.daemon_type:
+ entrypoint = CephExporter.entrypoint
+ name = 'client.ceph-exporter.%s' % daemon_id
+ elif daemon_type == HAproxy.daemon_type:
+ name = '%s.%s' % (daemon_type, daemon_id)
+ container_args.extend(['--user=root']) # haproxy 2.4 defaults to a different user
+ elif daemon_type == Keepalived.daemon_type:
+ name = '%s.%s' % (daemon_type, daemon_id)
+ envs.extend(Keepalived.get_container_envs())
+ container_args.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW'])
+ elif daemon_type == CephNvmeof.daemon_type:
+ name = '%s.%s' % (daemon_type, daemon_id)
+ container_args.extend(['--ulimit', 'memlock=-1:-1'])
+ container_args.extend(['--ulimit', 'nofile=10240'])
+ container_args.extend(['--cap-add=SYS_ADMIN', '--cap-add=CAP_SYS_NICE'])
+ elif daemon_type == CephIscsi.daemon_type:
+ entrypoint = CephIscsi.entrypoint
+ name = '%s.%s' % (daemon_type, daemon_id)
+ # So the container can modprobe iscsi_target_mod and have write perms
+ # to configfs we need to make this a privileged container.
+ privileged = True
+ elif daemon_type == CustomContainer.daemon_type:
+ cc = CustomContainer.init(ctx, fsid, daemon_id)
+ entrypoint = cc.entrypoint
+ host_network = False
+ envs.extend(cc.get_container_envs())
+ container_args.extend(cc.get_container_args())
+
+ if daemon_type in Monitoring.components:
+ uid, gid = extract_uid_gid_monitoring(ctx, daemon_type)
+ monitoring_args = [
+ '--user',
+ str(uid),
+ # FIXME: disable cpu/memory limits for the time being (not supported
+ # by ubuntu 18.04 kernel!)
+ ]
+ container_args.extend(monitoring_args)
+ if daemon_type == 'node-exporter':
+ # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys',
+ # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation
+ # between the node-exporter container and the host to avoid selinux denials
+ container_args.extend(['--security-opt', 'label=disable'])
+ elif daemon_type == 'crash':
+ ceph_args = ['-n', name]
+ elif daemon_type in Ceph.daemons:
+ ceph_args = ['-n', name, '-f']
+ elif daemon_type == SNMPGateway.daemon_type:
+ sg = SNMPGateway.init(ctx, fsid, daemon_id)
+ container_args.append(
+ f'--env-file={sg.conf_file_path}'
+ )
+
+ # if using podman, set -d, --conmon-pidfile & --cidfile flags
+ # so service can have Type=Forking
+ if isinstance(ctx.container_engine, Podman):
+ runtime_dir = '/run'
+ container_args.extend([
+ '-d', '--log-driver', 'journald',
+ '--conmon-pidfile',
+ runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, daemon_id),
+ '--cidfile',
+ runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, daemon_id),
+ ])
+ if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION and not ctx.no_cgroups_split:
+ container_args.append('--cgroups=split')
+ # if /etc/hosts doesn't exist, we can be confident
+ # users aren't using it for host name resolution
+ # and adding --no-hosts avoids bugs created in certain daemons
+ # by modifications podman makes to /etc/hosts
+ # https://tracker.ceph.com/issues/58532
+ # https://tracker.ceph.com/issues/57018
+ if not os.path.exists('/etc/hosts'):
+ container_args.extend(['--no-hosts'])
+
+ return CephContainer.for_daemon(
+ ctx,
+ fsid=fsid,
+ daemon_type=daemon_type,
+ daemon_id=str(daemon_id),
+ entrypoint=entrypoint,
+ args=ceph_args + get_daemon_args(ctx, fsid, daemon_type, daemon_id),
+ container_args=container_args,
+ volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
+ bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
+ envs=envs,
+ privileged=privileged,
+ ptrace=ptrace,
+ host_network=host_network,
+ )
+
+
+def extract_uid_gid(ctx, img='', file_path='/var/lib/ceph'):
+ # type: (CephadmContext, str, Union[str, List[str]]) -> Tuple[int, int]
+
+ if not img:
+ img = ctx.image
+
+ if isinstance(file_path, str):
+ paths = [file_path]
+ else:
+ paths = file_path
+
+ ex: Optional[Tuple[str, RuntimeError]] = None
+
+ for fp in paths:
+ try:
+ out = CephContainer(
+ ctx,
+ image=img,
+ entrypoint='stat',
+ args=['-c', '%u %g', fp]
+ ).run(verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+ uid, gid = out.split(' ')
+ return int(uid), int(gid)
+ except RuntimeError as e:
+ ex = (fp, e)
+ if ex:
+ raise Error(f'Failed to extract uid/gid for path {ex[0]}: {ex[1]}')
+
+ raise RuntimeError('uid/gid not found')
+
+
+def deploy_daemon(ctx: CephadmContext, fsid: str, daemon_type: str,
+ daemon_id: Union[int, str], c: Optional['CephContainer'],
+ uid: int, gid: int, config: Optional[str] = None,
+ keyring: Optional[str] = None, osd_fsid: Optional[str] = None,
+ deployment_type: DeploymentType = DeploymentType.DEFAULT,
+ endpoints: Optional[List[EndPoint]] = None) -> None:
+
+ endpoints = endpoints or []
+ # only check port in use if fresh deployment since service
+ # we are redeploying/reconfiguring will already be using the port
+ if deployment_type == DeploymentType.DEFAULT:
+ if any([port_in_use(ctx, e) for e in endpoints]):
+ if daemon_type == 'mgr':
+ # non-fatal for mgr when we are in mgr_standby_modules=false, but we can't
+ # tell whether that is the case here.
+ logger.warning(
+ f"ceph-mgr TCP port(s) {','.join(map(str, endpoints))} already in use"
+ )
+ else:
+ raise Error("TCP Port(s) '{}' required for {} already in use".format(','.join(map(str, endpoints)), daemon_type))
+
+ data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+ if deployment_type == DeploymentType.RECONFIG and not os.path.exists(data_dir):
+ raise Error('cannot reconfig, data path %s does not exist' % data_dir)
+ if daemon_type == 'mon' and not os.path.exists(data_dir):
+ assert config
+ assert keyring
+ # tmp keyring file
+ tmp_keyring = write_tmp(keyring, uid, gid)
+
+ # tmp config file
+ tmp_config = write_tmp(config, uid, gid)
+
+ # --mkfs
+ create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid)
+ mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', daemon_id)
+ log_dir = get_log_dir(fsid, ctx.log_dir)
+ CephContainer(
+ ctx,
+ image=ctx.image,
+ entrypoint='/usr/bin/ceph-mon',
+ args=[
+ '--mkfs',
+ '-i', str(daemon_id),
+ '--fsid', fsid,
+ '-c', '/tmp/config',
+ '--keyring', '/tmp/keyring',
+ ] + get_daemon_args(ctx, fsid, 'mon', daemon_id),
+ volume_mounts={
+ log_dir: '/var/log/ceph:z',
+ mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id),
+ tmp_keyring.name: '/tmp/keyring:z',
+ tmp_config.name: '/tmp/config:z',
+ },
+ ).run()
+
+ # write conf
+ with write_new(mon_dir + '/config', owner=(uid, gid)) as f:
+ f.write(config)
+ else:
+ # dirs, conf, keyring
+ create_daemon_dirs(
+ ctx,
+ fsid, daemon_type, daemon_id,
+ uid, gid,
+ config, keyring)
+
+ # only write out unit files and start daemon
+ # with systemd if this is not a reconfig
+ if deployment_type != DeploymentType.RECONFIG:
+ if daemon_type == CephadmAgent.daemon_type:
+ config_js = fetch_configs(ctx)
+ assert isinstance(config_js, dict)
+
+ cephadm_agent = CephadmAgent(ctx, fsid, daemon_id)
+ cephadm_agent.deploy_daemon_unit(config_js)
+ else:
+ if c:
+ deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id,
+ c, osd_fsid=osd_fsid, endpoints=endpoints)
+ else:
+ raise RuntimeError('attempting to deploy a daemon without a container image')
+
+ if not os.path.exists(data_dir + '/unit.created'):
+ with write_new(data_dir + '/unit.created', owner=(uid, gid)) as f:
+ f.write('mtime is time the daemon deployment was created\n')
+
+ with write_new(data_dir + '/unit.configured', owner=(uid, gid)) as f:
+ f.write('mtime is time we were last configured\n')
+
+ update_firewalld(ctx, daemon_type)
+
+ # Open ports explicitly required for the daemon
+ if endpoints:
+ fw = Firewalld(ctx)
+ fw.open_ports([e.port for e in endpoints] + fw.external_ports.get(daemon_type, []))
+ fw.apply_rules()
+
+ # If this was a reconfig and the daemon is not a Ceph daemon, restart it
+ # so it can pick up potential changes to its configuration files
+ if deployment_type == DeploymentType.RECONFIG and daemon_type not in Ceph.daemons:
+ # ceph daemons do not need a restart; others (presumably) do to pick
+ # up the new config
+ call_throws(ctx, ['systemctl', 'reset-failed',
+ get_unit_name(fsid, daemon_type, daemon_id)])
+ call_throws(ctx, ['systemctl', 'restart',
+ get_unit_name(fsid, daemon_type, daemon_id)])
+
+
+def _write_container_cmd_to_bash(ctx, file_obj, container, comment=None, background=False):
+ # type: (CephadmContext, IO[str], CephContainer, Optional[str], Optional[bool]) -> None
+ if comment:
+ # Sometimes adding a comment, especially if there are multiple containers in one
+ # unit file, makes it easier to read and grok.
+ file_obj.write('# ' + comment + '\n')
+ # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
+ file_obj.write('! ' + ' '.join(container.rm_cmd(old_cname=True)) + ' 2> /dev/null\n')
+ file_obj.write('! ' + ' '.join(container.rm_cmd()) + ' 2> /dev/null\n')
+ # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
+ if isinstance(ctx.container_engine, Podman):
+ file_obj.write(
+ '! '
+ + ' '.join([shlex.quote(a) for a in container.rm_cmd(storage=True)])
+ + ' 2> /dev/null\n')
+ file_obj.write(
+ '! '
+ + ' '.join([shlex.quote(a) for a in container.rm_cmd(old_cname=True, storage=True)])
+ + ' 2> /dev/null\n')
+
+ # container run command
+ file_obj.write(
+ ' '.join([shlex.quote(a) for a in container.run_cmd()])
+ + (' &' if background else '') + '\n')
+
+
+def clean_cgroup(ctx: CephadmContext, fsid: str, unit_name: str) -> None:
+ # systemd may fail to cleanup cgroups from previous stopped unit, which will cause next "systemctl start" to fail.
+ # see https://tracker.ceph.com/issues/50998
+
+ CGROUPV2_PATH = Path('/sys/fs/cgroup')
+ if not (CGROUPV2_PATH / 'system.slice').exists():
+ # Only unified cgroup is affected, skip if not the case
+ return
+
+ slice_name = 'system-ceph\\x2d{}.slice'.format(fsid.replace('-', '\\x2d'))
+ cg_path = CGROUPV2_PATH / 'system.slice' / slice_name / f'{unit_name}.service'
+ if not cg_path.exists():
+ return
+
+ def cg_trim(path: Path) -> None:
+ for p in path.iterdir():
+ if p.is_dir():
+ cg_trim(p)
+ path.rmdir()
+ try:
+ cg_trim(cg_path)
+ except OSError:
+ logger.warning(f'Failed to trim old cgroups {cg_path}')
+
+
+def deploy_daemon_units(
+ ctx: CephadmContext,
+ fsid: str,
+ uid: int,
+ gid: int,
+ daemon_type: str,
+ daemon_id: Union[int, str],
+ c: 'CephContainer',
+ enable: bool = True,
+ start: bool = True,
+ osd_fsid: Optional[str] = None,
+ endpoints: Optional[List[EndPoint]] = None,
+) -> None:
+ # cmd
+
+ def add_stop_actions(f: TextIO, timeout: Optional[int]) -> None:
+ # following generated script basically checks if the container exists
+ # before stopping it. Exit code will be success either if it doesn't
+ # exist or if it exists and is stopped successfully.
+ container_exists = f'{ctx.container_engine.path} inspect %s &>/dev/null'
+ f.write(f'! {container_exists % c.old_cname} || {" ".join(c.stop_cmd(old_cname=True, timeout=timeout))} \n')
+ f.write(f'! {container_exists % c.cname} || {" ".join(c.stop_cmd(timeout=timeout))} \n')
+
+ data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
+ run_file_path = data_dir + '/unit.run'
+ meta_file_path = data_dir + '/unit.meta'
+ with write_new(run_file_path) as f, write_new(meta_file_path) as metaf:
+
+ f.write('set -e\n')
+
+ if daemon_type in Ceph.daemons:
+ install_path = find_program('install')
+ f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid))
+
+ # pre-start cmd(s)
+ if daemon_type == 'osd':
+ # osds have a pre-start step
+ assert osd_fsid
+ simple_fn = os.path.join('/etc/ceph/osd',
+ '%s-%s.json.adopted-by-cephadm' % (daemon_id, osd_fsid))
+ if os.path.exists(simple_fn):
+ f.write('# Simple OSDs need chown on startup:\n')
+ for n in ['block', 'block.db', 'block.wal']:
+ p = os.path.join(data_dir, n)
+ f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid))
+ else:
+ # if ceph-volume does not support 'ceph-volume activate', we must
+ # do 'ceph-volume lvm activate'.
+ test_cv = get_ceph_volume_container(
+ ctx,
+ args=['activate', '--bad-option'],
+ volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
+ bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
+ cname='ceph-%s-%s.%s-activate-test' % (fsid, daemon_type, daemon_id),
+ )
+ out, err, ret = call(ctx, test_cv.run_cmd(), verbosity=CallVerbosity.SILENT)
+ # bad: ceph-volume: error: unrecognized arguments: activate --bad-option
+ # good: ceph-volume: error: unrecognized arguments: --bad-option
+ if 'unrecognized arguments: activate' in err:
+ # older ceph-volume without top-level activate or --no-tmpfs
+ cmd = [
+ 'lvm', 'activate',
+ str(daemon_id), osd_fsid,
+ '--no-systemd',
+ ]
+ else:
+ cmd = [
+ 'activate',
+ '--osd-id', str(daemon_id),
+ '--osd-uuid', osd_fsid,
+ '--no-systemd',
+ '--no-tmpfs',
+ ]
+
+ prestart = get_ceph_volume_container(
+ ctx,
+ args=cmd,
+ volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
+ bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
+ cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id),
+ )
+ _write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate')
+ elif daemon_type == CephIscsi.daemon_type:
+ f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
+ ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
+ tcmu_container = ceph_iscsi.get_tcmu_runner_container()
+ _write_container_cmd_to_bash(ctx, f, tcmu_container, 'iscsi tcmu-runner container', background=True)
+
+ _write_container_cmd_to_bash(ctx, f, c, '%s.%s' % (daemon_type, str(daemon_id)))
+
+ # some metadata about the deploy
+ meta: Dict[str, Any] = fetch_meta(ctx)
+ meta.update({
+ 'memory_request': int(ctx.memory_request) if ctx.memory_request else None,
+ 'memory_limit': int(ctx.memory_limit) if ctx.memory_limit else None,
+ })
+ if not meta.get('ports'):
+ if endpoints:
+ meta['ports'] = [e.port for e in endpoints]
+ else:
+ meta['ports'] = []
+ metaf.write(json.dumps(meta, indent=4) + '\n')
+
+ timeout = 30 if daemon_type == 'osd' else None
+ # post-stop command(s)
+ with write_new(data_dir + '/unit.poststop') as f:
+ # this is a fallback to eventually stop any underlying container that was not stopped properly by unit.stop,
+ # this could happen in very slow setups as described in the issue https://tracker.ceph.com/issues/58242.
+ add_stop_actions(cast(TextIO, f), timeout)
+ if daemon_type == 'osd':
+ assert osd_fsid
+ poststop = get_ceph_volume_container(
+ ctx,
+ args=[
+ 'lvm', 'deactivate',
+ str(daemon_id), osd_fsid,
+ ],
+ volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
+ bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
+ cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type,
+ daemon_id),
+ )
+ _write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd')
+ elif daemon_type == CephIscsi.daemon_type:
+ # make sure we also stop the tcmu container
+ runtime_dir = '/run'
+ ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
+ tcmu_container = ceph_iscsi.get_tcmu_runner_container()
+ f.write('! ' + ' '.join(tcmu_container.stop_cmd()) + '\n')
+ f.write('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, str(daemon_id) + '.tcmu') + '\n')
+ f.write('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, str(daemon_id) + '.tcmu') + '\n')
+ f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n')
+
+ # post-stop command(s)
+ with write_new(data_dir + '/unit.stop') as f:
+ add_stop_actions(cast(TextIO, f), timeout)
+
+ if c:
+ with write_new(data_dir + '/unit.image') as f:
+ f.write(c.image + '\n')
+
+ # sysctl
+ install_sysctl(ctx, fsid, daemon_type)
+
+ # systemd
+ install_base_units(ctx, fsid)
+ unit = get_unit_file(ctx, fsid)
+ unit_file = 'ceph-%s@.service' % (fsid)
+ with write_new(ctx.unit_dir + '/' + unit_file, perms=None) as f:
+ f.write(unit)
+ call_throws(ctx, ['systemctl', 'daemon-reload'])
+
+ unit_name = get_unit_name(fsid, daemon_type, daemon_id)
+ call(ctx, ['systemctl', 'stop', unit_name],
+ verbosity=CallVerbosity.DEBUG)
+ call(ctx, ['systemctl', 'reset-failed', unit_name],
+ verbosity=CallVerbosity.DEBUG)
+ if enable:
+ call_throws(ctx, ['systemctl', 'enable', unit_name])
+ if start:
+ clean_cgroup(ctx, fsid, unit_name)
+ call_throws(ctx, ['systemctl', 'start', unit_name])
+
+
+class Firewalld(object):
+
+ # for specifying ports we should always open when opening
+ # ports for a daemon of that type. Main use case is for ports
+ # that we should open when deploying the daemon type but that
+ # the daemon itself may not necessarily need to bind to the port.
+ # This needs to be handed differently as we don't want to fail
+ # deployment if the port cannot be bound to but we still want to
+ # open the port in the firewall.
+ external_ports: Dict[str, List[int]] = {
+ 'iscsi': [3260] # 3260 is the well known iSCSI port
+ }
+
+ def __init__(self, ctx):
+ # type: (CephadmContext) -> None
+ self.ctx = ctx
+ self.available = self.check()
+
+ def check(self):
+ # type: () -> bool
+ self.cmd = find_executable('firewall-cmd')
+ if not self.cmd:
+ logger.debug('firewalld does not appear to be present')
+ return False
+ (enabled, state, _) = check_unit(self.ctx, 'firewalld.service')
+ if not enabled:
+ logger.debug('firewalld.service is not enabled')
+ return False
+ if state != 'running':
+ logger.debug('firewalld.service is not running')
+ return False
+
+ logger.info('firewalld ready')
+ return True
+
+ def enable_service_for(self, daemon_type):
+ # type: (str) -> None
+ if not self.available:
+ logger.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type)
+ return
+
+ if daemon_type == 'mon':
+ svc = 'ceph-mon'
+ elif daemon_type in ['mgr', 'mds', 'osd']:
+ svc = 'ceph'
+ elif daemon_type == NFSGanesha.daemon_type:
+ svc = 'nfs'
+ else:
+ return
+
+ if not self.cmd:
+ raise RuntimeError('command not defined')
+
+ out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG)
+ if ret:
+ logger.info('Enabling firewalld service %s in current zone...' % svc)
+ out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-service', svc])
+ if ret:
+ raise RuntimeError(
+ 'unable to add service %s to current zone: %s' % (svc, err))
+ else:
+ logger.debug('firewalld service %s is enabled in current zone' % svc)
+
+ def open_ports(self, fw_ports):
+ # type: (List[int]) -> None
+ if not self.available:
+ logger.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports)
+ return
+
+ if not self.cmd:
+ raise RuntimeError('command not defined')
+
+ for port in fw_ports:
+ tcp_port = str(port) + '/tcp'
+ out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
+ if ret:
+ logger.info('Enabling firewalld port %s in current zone...' % tcp_port)
+ out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-port', tcp_port])
+ if ret:
+ raise RuntimeError('unable to add port %s to current zone: %s' %
+ (tcp_port, err))
+ else:
+ logger.debug('firewalld port %s is enabled in current zone' % tcp_port)
+
+ def close_ports(self, fw_ports):
+ # type: (List[int]) -> None
+ if not self.available:
+ logger.debug('Not possible to close ports <%s>. firewalld.service is not available' % fw_ports)
+ return
+
+ if not self.cmd:
+ raise RuntimeError('command not defined')
+
+ for port in fw_ports:
+ tcp_port = str(port) + '/tcp'
+ out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
+ if not ret:
+ logger.info('Disabling port %s in current zone...' % tcp_port)
+ out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--remove-port', tcp_port])
+ if ret:
+ raise RuntimeError('unable to remove port %s from current zone: %s' %
+ (tcp_port, err))
+ else:
+ logger.info(f'Port {tcp_port} disabled')
+ else:
+ logger.info(f'firewalld port {tcp_port} already closed')
+
+ def apply_rules(self):
+ # type: () -> None
+ if not self.available:
+ return
+
+ if not self.cmd:
+ raise RuntimeError('command not defined')
+
+ call_throws(self.ctx, [self.cmd, '--reload'])
+
+
+def update_firewalld(ctx, daemon_type):
+ # type: (CephadmContext, str) -> None
+ if not ('skip_firewalld' in ctx and ctx.skip_firewalld):
+ firewall = Firewalld(ctx)
+ firewall.enable_service_for(daemon_type)
+ firewall.apply_rules()
+
+
+def install_sysctl(ctx: CephadmContext, fsid: str, daemon_type: str) -> None:
+ """
+ Set up sysctl settings
+ """
+ def _write(conf: Path, lines: List[str]) -> None:
+ lines = [
+ '# created by cephadm',
+ '',
+ *lines,
+ '',
+ ]
+ with write_new(conf, owner=None, perms=None) as f:
+ f.write('\n'.join(lines))
+
+ conf = Path(ctx.sysctl_dir).joinpath(f'90-ceph-{fsid}-{daemon_type}.conf')
+ lines: List = []
+
+ if daemon_type == 'osd':
+ lines = OSD.get_sysctl_settings()
+ elif daemon_type == 'haproxy':
+ lines = HAproxy.get_sysctl_settings()
+ elif daemon_type == 'keepalived':
+ lines = Keepalived.get_sysctl_settings()
+ elif daemon_type == CephNvmeof.daemon_type:
+ lines = CephNvmeof.get_sysctl_settings()
+ lines = filter_sysctl_settings(ctx, lines)
+
+ # apply the sysctl settings
+ if lines:
+ Path(ctx.sysctl_dir).mkdir(mode=0o755, exist_ok=True)
+ _write(conf, lines)
+ call_throws(ctx, ['sysctl', '--system'])
+
+
+def sysctl_get(ctx: CephadmContext, variable: str) -> Union[str, None]:
+ """
+ Read a sysctl setting by executing 'sysctl -b {variable}'
+ """
+ out, err, code = call(ctx, ['sysctl', '-b', variable])
+ return out or None
+
+
+def filter_sysctl_settings(ctx: CephadmContext, lines: List[str]) -> List[str]:
+ """
+ Given a list of sysctl settings, examine the system's current configuration
+ and return those which are not currently set as described.
+ """
+ def test_setting(desired_line: str) -> bool:
+ # Remove any comments
+ comment_start = desired_line.find('#')
+ if comment_start != -1:
+ desired_line = desired_line[:comment_start]
+ desired_line = desired_line.strip()
+ if not desired_line or desired_line.isspace():
+ return False
+ setting, desired_value = map(lambda s: s.strip(), desired_line.split('='))
+ if not setting or not desired_value:
+ return False
+ actual_value = sysctl_get(ctx, setting)
+ return desired_value != actual_value
+ return list(filter(test_setting, lines))
+
+
+def migrate_sysctl_dir(ctx: CephadmContext, fsid: str) -> None:
+ """
+ Cephadm once used '/usr/lib/sysctl.d' for storing sysctl configuration.
+ This moves it to '/etc/sysctl.d'.
+ """
+ deprecated_location: str = '/usr/lib/sysctl.d'
+ deprecated_confs: List[str] = glob(f'{deprecated_location}/90-ceph-{fsid}-*.conf')
+ if not deprecated_confs:
+ return
+
+ file_count: int = len(deprecated_confs)
+ logger.info(f'Found sysctl {file_count} files in deprecated location {deprecated_location}. Starting Migration.')
+ for conf in deprecated_confs:
+ try:
+ shutil.move(conf, ctx.sysctl_dir)
+ file_count -= 1
+ except shutil.Error as err:
+ if str(err).endswith('already exists'):
+ logger.warning(f'Destination file already exists. Deleting {conf}.')
+ try:
+ os.unlink(conf)
+ file_count -= 1
+ except OSError as del_err:
+ logger.warning(f'Could not remove {conf}: {del_err}.')
+ else:
+ logger.warning(f'Could not move {conf} from {deprecated_location} to {ctx.sysctl_dir}: {err}')
+
+ # Log successful migration
+ if file_count == 0:
+ logger.info(f'Successfully migrated sysctl config to {ctx.sysctl_dir}.')
+ return
+
+ # Log partially successful / unsuccessful migration
+ files_processed: int = len(deprecated_confs)
+ if file_count < files_processed:
+ status: str = f'partially successful (failed {file_count}/{files_processed})'
+ elif file_count == files_processed:
+ status = 'unsuccessful'
+ logger.warning(f'Migration of sysctl configuration {status}. You may want to perform a migration manually.')
+
+
+def install_base_units(ctx, fsid):
+ # type: (CephadmContext, str) -> None
+ """
+ Set up ceph.target and ceph-$fsid.target units.
+ """
+ # global unit
+ existed = os.path.exists(ctx.unit_dir + '/ceph.target')
+ with write_new(ctx.unit_dir + '/ceph.target', perms=None) as f:
+ f.write('[Unit]\n'
+ 'Description=All Ceph clusters and services\n'
+ '\n'
+ '[Install]\n'
+ 'WantedBy=multi-user.target\n')
+ if not existed:
+ # we disable before enable in case a different ceph.target
+ # (from the traditional package) is present; while newer
+ # systemd is smart enough to disable the old
+ # (/lib/systemd/...) and enable the new (/etc/systemd/...),
+ # some older versions of systemd error out with EEXIST.
+ call_throws(ctx, ['systemctl', 'disable', 'ceph.target'])
+ call_throws(ctx, ['systemctl', 'enable', 'ceph.target'])
+ call_throws(ctx, ['systemctl', 'start', 'ceph.target'])
+
+ # cluster unit
+ existed = os.path.exists(ctx.unit_dir + '/ceph-%s.target' % fsid)
+ with write_new(ctx.unit_dir + f'/ceph-{fsid}.target', perms=None) as f:
+ f.write(
+ '[Unit]\n'
+ 'Description=Ceph cluster {fsid}\n'
+ 'PartOf=ceph.target\n'
+ 'Before=ceph.target\n'
+ '\n'
+ '[Install]\n'
+ 'WantedBy=multi-user.target ceph.target\n'.format(
+ fsid=fsid)
+ )
+ if not existed:
+ call_throws(ctx, ['systemctl', 'enable', 'ceph-%s.target' % fsid])
+ call_throws(ctx, ['systemctl', 'start', 'ceph-%s.target' % fsid])
+
+ # don't overwrite file in order to allow users to manipulate it
+ if os.path.exists(ctx.logrotate_dir + f'/ceph-{fsid}'):
+ return
+
+ # logrotate for the cluster
+ with write_new(ctx.logrotate_dir + f'/ceph-{fsid}', perms=None) as f:
+ """
+ This is a bit sloppy in that the killall/pkill will touch all ceph daemons
+ in all containers, but I don't see an elegant way to send SIGHUP *just* to
+ the daemons for this cluster. (1) systemd kill -s will get the signal to
+ podman, but podman will exit. (2) podman kill will get the signal to the
+ first child (bash), but that isn't the ceph daemon. This is simpler and
+ should be harmless.
+ """
+ targets: List[str] = [
+ 'ceph-mon',
+ 'ceph-mgr',
+ 'ceph-mds',
+ 'ceph-osd',
+ 'ceph-fuse',
+ 'radosgw',
+ 'rbd-mirror',
+ 'cephfs-mirror',
+ 'tcmu-runner'
+ ]
+
+ f.write("""# created by cephadm
+/var/log/ceph/%s/*.log {
+ rotate 7
+ daily
+ compress
+ sharedscripts
+ postrotate
+ killall -q -1 %s || pkill -1 -x '%s' || true
+ endscript
+ missingok
+ notifempty
+ su root root
+}
+""" % (fsid, ' '.join(targets), '|'.join(targets)))
+
+
+def get_unit_file(ctx, fsid):
+ # type: (CephadmContext, str) -> str
+ extra_args = ''
+ if isinstance(ctx.container_engine, Podman):
+ extra_args = ('ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
+ 'ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
+ 'Type=forking\n'
+ 'PIDFile=%t/%n-pid\n')
+ if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION:
+ extra_args += 'Delegate=yes\n'
+
+ docker = isinstance(ctx.container_engine, Docker)
+ u = """# generated by cephadm
+[Unit]
+Description=Ceph %i for {fsid}
+
+# According to:
+# http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
+# these can be removed once ceph-mon will dynamically change network
+# configuration.
+After=network-online.target local-fs.target time-sync.target{docker_after}
+Wants=network-online.target local-fs.target time-sync.target
+{docker_requires}
+
+PartOf=ceph-{fsid}.target
+Before=ceph-{fsid}.target
+
+[Service]
+LimitNOFILE=1048576
+LimitNPROC=1048576
+EnvironmentFile=-/etc/environment
+ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
+ExecStop=-/bin/bash -c 'bash {data_dir}/{fsid}/%i/unit.stop'
+ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
+KillMode=none
+Restart=on-failure
+RestartSec=10s
+TimeoutStartSec=200
+TimeoutStopSec=120
+StartLimitInterval=30min
+StartLimitBurst=5
+{extra_args}
+[Install]
+WantedBy=ceph-{fsid}.target
+""".format(fsid=fsid,
+ data_dir=ctx.data_dir,
+ extra_args=extra_args,
+ # if docker, we depend on docker.service
+ docker_after=' docker.service' if docker else '',
+ docker_requires='Requires=docker.service\n' if docker else '')
+
+ return u
+
+##################################
+
+
+class CephContainer:
+ def __init__(self,
+ ctx: CephadmContext,
+ image: str,
+ entrypoint: str,
+ args: List[str] = [],
+ volume_mounts: Dict[str, str] = {},
+ cname: str = '',
+ container_args: List[str] = [],
+ envs: Optional[List[str]] = None,
+ privileged: bool = False,
+ ptrace: bool = False,
+ bind_mounts: Optional[List[List[str]]] = None,
+ init: Optional[bool] = None,
+ host_network: bool = True,
+ memory_request: Optional[str] = None,
+ memory_limit: Optional[str] = None,
+ ) -> None:
+ self.ctx = ctx
+ self.image = image
+ self.entrypoint = entrypoint
+ self.args = args
+ self.volume_mounts = volume_mounts
+ self._cname = cname
+ self.container_args = container_args
+ self.envs = envs
+ self.privileged = privileged
+ self.ptrace = ptrace
+ self.bind_mounts = bind_mounts if bind_mounts else []
+ self.init = init if init else ctx.container_init
+ self.host_network = host_network
+ self.memory_request = memory_request
+ self.memory_limit = memory_limit
+
+ @classmethod
+ def for_daemon(cls,
+ ctx: CephadmContext,
+ fsid: str,
+ daemon_type: str,
+ daemon_id: str,
+ entrypoint: str,
+ args: List[str] = [],
+ volume_mounts: Dict[str, str] = {},
+ container_args: List[str] = [],
+ envs: Optional[List[str]] = None,
+ privileged: bool = False,
+ ptrace: bool = False,
+ bind_mounts: Optional[List[List[str]]] = None,
+ init: Optional[bool] = None,
+ host_network: bool = True,
+ memory_request: Optional[str] = None,
+ memory_limit: Optional[str] = None,
+ ) -> 'CephContainer':
+ return cls(
+ ctx,
+ image=ctx.image,
+ entrypoint=entrypoint,
+ args=args,
+ volume_mounts=volume_mounts,
+ cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id),
+ container_args=container_args,
+ envs=envs,
+ privileged=privileged,
+ ptrace=ptrace,
+ bind_mounts=bind_mounts,
+ init=init,
+ host_network=host_network,
+ memory_request=memory_request,
+ memory_limit=memory_limit,
+ )
+
+ @property
+ def cname(self) -> str:
+ """
+ podman adds the current container name to the /etc/hosts
+ file. Turns out, python's `socket.getfqdn()` differs from
+ `hostname -f`, when we have the container names containing
+ dots in it.:
+
+ # podman run --name foo.bar.baz.com ceph/ceph /bin/bash
+ [root@sebastians-laptop /]# cat /etc/hosts
+ 127.0.0.1 localhost
+ ::1 localhost
+ 127.0.1.1 sebastians-laptop foo.bar.baz.com
+ [root@sebastians-laptop /]# hostname -f
+ sebastians-laptop
+ [root@sebastians-laptop /]# python3 -c 'import socket; print(socket.getfqdn())'
+ foo.bar.baz.com
+
+ Fascinatingly, this doesn't happen when using dashes.
+ """
+ return self._cname.replace('.', '-')
+
+ @cname.setter
+ def cname(self, val: str) -> None:
+ self._cname = val
+
+ @property
+ def old_cname(self) -> str:
+ return self._cname
+
+ def run_cmd(self) -> List[str]:
+ cmd_args: List[str] = [
+ str(self.ctx.container_engine.path),
+ 'run',
+ '--rm',
+ '--ipc=host',
+ # some containers (ahem, haproxy) override this, but we want a fast
+ # shutdown always (and, more importantly, a successful exit even if we
+ # fall back to SIGKILL).
+ '--stop-signal=SIGTERM',
+ ]
+
+ if isinstance(self.ctx.container_engine, Podman):
+ if os.path.exists('/etc/ceph/podman-auth.json'):
+ cmd_args.append('--authfile=/etc/ceph/podman-auth.json')
+
+ if isinstance(self.ctx.container_engine, Docker):
+ cmd_args.extend(['--ulimit', 'nofile=1048576'])
+
+ envs: List[str] = [
+ '-e', 'CONTAINER_IMAGE=%s' % self.image,
+ '-e', 'NODE_NAME=%s' % get_hostname(),
+ ]
+ vols: List[str] = []
+ binds: List[str] = []
+
+ if self.memory_request:
+ cmd_args.extend(['-e', 'POD_MEMORY_REQUEST', str(self.memory_request)])
+ if self.memory_limit:
+ cmd_args.extend(['-e', 'POD_MEMORY_LIMIT', str(self.memory_limit)])
+ cmd_args.extend(['--memory', str(self.memory_limit)])
+
+ if self.host_network:
+ cmd_args.append('--net=host')
+ if self.entrypoint:
+ cmd_args.extend(['--entrypoint', self.entrypoint])
+ if self.privileged:
+ cmd_args.extend([
+ '--privileged',
+ # let OSD etc read block devs that haven't been chowned
+ '--group-add=disk'])
+ if self.ptrace and not self.privileged:
+ # if privileged, the SYS_PTRACE cap is already added
+ # in addition, --cap-add and --privileged are mutually
+ # exclusive since podman >= 2.0
+ cmd_args.append('--cap-add=SYS_PTRACE')
+ if self.init:
+ cmd_args.append('--init')
+ envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1']
+ if self.cname:
+ cmd_args.extend(['--name', self.cname])
+ if self.envs:
+ for env in self.envs:
+ envs.extend(['-e', env])
+
+ vols = sum(
+ [['-v', '%s:%s' % (host_dir, container_dir)]
+ for host_dir, container_dir in self.volume_mounts.items()], [])
+ binds = sum([['--mount', '{}'.format(','.join(bind))]
+ for bind in self.bind_mounts], [])
+
+ return \
+ cmd_args + self.container_args + \
+ envs + vols + binds + \
+ [self.image] + self.args # type: ignore
+
+ def shell_cmd(self, cmd: List[str]) -> List[str]:
+ cmd_args: List[str] = [
+ str(self.ctx.container_engine.path),
+ 'run',
+ '--rm',
+ '--ipc=host',
+ ]
+ envs: List[str] = [
+ '-e', 'CONTAINER_IMAGE=%s' % self.image,
+ '-e', 'NODE_NAME=%s' % get_hostname(),
+ ]
+ vols: List[str] = []
+ binds: List[str] = []
+
+ if self.host_network:
+ cmd_args.append('--net=host')
+ if self.ctx.no_hosts:
+ cmd_args.append('--no-hosts')
+ if self.privileged:
+ cmd_args.extend([
+ '--privileged',
+ # let OSD etc read block devs that haven't been chowned
+ '--group-add=disk',
+ ])
+ if self.init:
+ cmd_args.append('--init')
+ envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1']
+ if self.envs:
+ for env in self.envs:
+ envs.extend(['-e', env])
+
+ vols = sum(
+ [['-v', '%s:%s' % (host_dir, container_dir)]
+ for host_dir, container_dir in self.volume_mounts.items()], [])
+ binds = sum([['--mount', '{}'.format(','.join(bind))]
+ for bind in self.bind_mounts], [])
+
+ return cmd_args + self.container_args + envs + vols + binds + [
+ '--entrypoint', cmd[0],
+ self.image,
+ ] + cmd[1:]
+
+ def exec_cmd(self, cmd):
+ # type: (List[str]) -> List[str]
+ cname = get_running_container_name(self.ctx, self)
+ if not cname:
+ raise Error('unable to find container "{}"'.format(self.cname))
+ return [
+ str(self.ctx.container_engine.path),
+ 'exec',
+ ] + self.container_args + [
+ self.cname,
+ ] + cmd
+
+ def rm_cmd(self, old_cname: bool = False, storage: bool = False) -> List[str]:
+ ret = [
+ str(self.ctx.container_engine.path),
+ 'rm', '-f',
+ ]
+ if storage:
+ ret.append('--storage')
+ if old_cname:
+ ret.append(self.old_cname)
+ else:
+ ret.append(self.cname)
+ return ret
+
+ def stop_cmd(self, old_cname: bool = False, timeout: Optional[int] = None) -> List[str]:
+ if timeout is None:
+ ret = [
+ str(self.ctx.container_engine.path),
+ 'stop', self.old_cname if old_cname else self.cname,
+ ]
+ else:
+ ret = [
+ str(self.ctx.container_engine.path),
+ 'stop', '-t', f'{timeout}',
+ self.old_cname if old_cname else self.cname,
+ ]
+ return ret
+
+ def run(self, timeout=DEFAULT_TIMEOUT, verbosity=CallVerbosity.VERBOSE_ON_FAILURE):
+ # type: (Optional[int], CallVerbosity) -> str
+ out, _, _ = call_throws(self.ctx, self.run_cmd(),
+ desc=self.entrypoint, timeout=timeout, verbosity=verbosity)
+ return out
+
+
+#####################################
+
+class MgrListener(Thread):
+ def __init__(self, agent: 'CephadmAgent') -> None:
+ self.agent = agent
+ self.stop = False
+ super(MgrListener, self).__init__(target=self.run)
+
+ def run(self) -> None:
+ listenSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ listenSocket.bind(('0.0.0.0', int(self.agent.listener_port)))
+ listenSocket.settimeout(60)
+ listenSocket.listen(1)
+ ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
+ ssl_ctx.verify_mode = ssl.CERT_REQUIRED
+ ssl_ctx.load_cert_chain(self.agent.listener_cert_path, self.agent.listener_key_path)
+ ssl_ctx.load_verify_locations(self.agent.ca_path)
+ secureListenSocket = ssl_ctx.wrap_socket(listenSocket, server_side=True)
+ while not self.stop:
+ try:
+ try:
+ conn, _ = secureListenSocket.accept()
+ except socket.timeout:
+ continue
+ try:
+ length: int = int(conn.recv(10).decode())
+ except Exception as e:
+ err_str = f'Failed to extract length of payload from message: {e}'
+ conn.send(err_str.encode())
+ logger.error(err_str)
+ continue
+ while True:
+ payload = conn.recv(length).decode()
+ if not payload:
+ break
+ try:
+ data: Dict[Any, Any] = json.loads(payload)
+ self.handle_json_payload(data)
+ except Exception as e:
+ err_str = f'Failed to extract json payload from message: {e}'
+ conn.send(err_str.encode())
+ logger.error(err_str)
+ else:
+ conn.send(b'ACK')
+ if 'config' in data:
+ self.agent.wakeup()
+ self.agent.ls_gatherer.wakeup()
+ self.agent.volume_gatherer.wakeup()
+ logger.debug(f'Got mgr message {data}')
+ except Exception as e:
+ logger.error(f'Mgr Listener encountered exception: {e}')
+
+ def shutdown(self) -> None:
+ self.stop = True
+
+ def handle_json_payload(self, data: Dict[Any, Any]) -> None:
+ self.agent.ack = int(data['counter'])
+ if 'config' in data:
+ logger.info('Received new config from mgr')
+ config = data['config']
+ for filename in config:
+ if filename in self.agent.required_files:
+ file_path = os.path.join(self.agent.daemon_dir, filename)
+ with write_new(file_path) as f:
+ f.write(config[filename])
+ self.agent.pull_conf_settings()
+ self.agent.wakeup()
+
+
+class CephadmAgent():
+
+ daemon_type = 'agent'
+ default_port = 8498
+ loop_interval = 30
+ stop = False
+
+ required_files = [
+ 'agent.json',
+ 'keyring',
+ 'root_cert.pem',
+ 'listener.crt',
+ 'listener.key',
+ ]
+
+ def __init__(self, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] = ''):
+ self.ctx = ctx
+ self.fsid = fsid
+ self.daemon_id = daemon_id
+ self.starting_port = 14873
+ self.target_ip = ''
+ self.target_port = ''
+ self.host = ''
+ self.daemon_dir = os.path.join(ctx.data_dir, self.fsid, f'{self.daemon_type}.{self.daemon_id}')
+ self.config_path = os.path.join(self.daemon_dir, 'agent.json')
+ self.keyring_path = os.path.join(self.daemon_dir, 'keyring')
+ self.ca_path = os.path.join(self.daemon_dir, 'root_cert.pem')
+ self.listener_cert_path = os.path.join(self.daemon_dir, 'listener.crt')
+ self.listener_key_path = os.path.join(self.daemon_dir, 'listener.key')
+ self.listener_port = ''
+ self.ack = 1
+ self.event = Event()
+ self.mgr_listener = MgrListener(self)
+ self.ls_gatherer = AgentGatherer(self, lambda: self._get_ls(), 'Ls')
+ self.volume_gatherer = AgentGatherer(self, lambda: self._ceph_volume(enhanced=False), 'Volume')
+ self.device_enhanced_scan = False
+ self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0]
+ self.recent_iteration_index: int = 0
+ self.cached_ls_values: Dict[str, Dict[str, str]] = {}
+
+ def validate(self, config: Dict[str, str] = {}) -> None:
+ # check for the required files
+ for fname in self.required_files:
+ if fname not in config:
+ raise Error('required file missing from config: %s' % fname)
+
+ def deploy_daemon_unit(self, config: Dict[str, str] = {}) -> None:
+ if not config:
+ raise Error('Agent needs a config')
+ assert isinstance(config, dict)
+ self.validate(config)
+
+ # Create the required config files in the daemons dir, with restricted permissions
+ for filename in config:
+ if filename in self.required_files:
+ file_path = os.path.join(self.daemon_dir, filename)
+ with write_new(file_path) as f:
+ f.write(config[filename])
+
+ unit_run_path = os.path.join(self.daemon_dir, 'unit.run')
+ with write_new(unit_run_path) as f:
+ f.write(self.unit_run())
+
+ meta: Dict[str, Any] = fetch_meta(self.ctx)
+ meta_file_path = os.path.join(self.daemon_dir, 'unit.meta')
+ with write_new(meta_file_path) as f:
+ f.write(json.dumps(meta, indent=4) + '\n')
+
+ unit_file_path = os.path.join(self.ctx.unit_dir, self.unit_name())
+ with write_new(unit_file_path) as f:
+ f.write(self.unit_file())
+
+ call_throws(self.ctx, ['systemctl', 'daemon-reload'])
+ call(self.ctx, ['systemctl', 'stop', self.unit_name()],
+ verbosity=CallVerbosity.DEBUG)
+ call(self.ctx, ['systemctl', 'reset-failed', self.unit_name()],
+ verbosity=CallVerbosity.DEBUG)
+ call_throws(self.ctx, ['systemctl', 'enable', '--now', self.unit_name()])
+
+ def unit_name(self) -> str:
+ return '{}.service'.format(get_unit_name(self.fsid, self.daemon_type, self.daemon_id))
+
+ def unit_run(self) -> str:
+ py3 = shutil.which('python3')
+ binary_path = os.path.realpath(sys.argv[0])
+ return ('set -e\n' + f'{py3} {binary_path} agent --fsid {self.fsid} --daemon-id {self.daemon_id} &\n')
+
+ def unit_file(self) -> str:
+ return """#generated by cephadm
+[Unit]
+Description=cephadm agent for cluster {fsid}
+
+PartOf=ceph-{fsid}.target
+Before=ceph-{fsid}.target
+
+[Service]
+Type=forking
+ExecStart=/bin/bash {data_dir}/unit.run
+Restart=on-failure
+RestartSec=10s
+
+[Install]
+WantedBy=ceph-{fsid}.target
+""".format(
+ fsid=self.fsid,
+ data_dir=self.daemon_dir
+ )
+
+ def shutdown(self) -> None:
+ self.stop = True
+ if self.mgr_listener.is_alive():
+ self.mgr_listener.shutdown()
+ if self.ls_gatherer.is_alive():
+ self.ls_gatherer.shutdown()
+ if self.volume_gatherer.is_alive():
+ self.volume_gatherer.shutdown()
+
+ def wakeup(self) -> None:
+ self.event.set()
+
+ def pull_conf_settings(self) -> None:
+ try:
+ with open(self.config_path, 'r') as f:
+ config = json.load(f)
+ self.target_ip = config['target_ip']
+ self.target_port = config['target_port']
+ self.loop_interval = int(config['refresh_period'])
+ self.starting_port = int(config['listener_port'])
+ self.host = config['host']
+ use_lsm = config['device_enhanced_scan']
+ except Exception as e:
+ self.shutdown()
+ raise Error(f'Failed to get agent target ip and port from config: {e}')
+
+ try:
+ with open(self.keyring_path, 'r') as f:
+ self.keyring = f.read()
+ except Exception as e:
+ self.shutdown()
+ raise Error(f'Failed to get agent keyring: {e}')
+
+ assert self.target_ip and self.target_port
+
+ self.device_enhanced_scan = False
+ if use_lsm.lower() == 'true':
+ self.device_enhanced_scan = True
+ self.volume_gatherer.update_func(lambda: self._ceph_volume(enhanced=self.device_enhanced_scan))
+
+ def run(self) -> None:
+ self.pull_conf_settings()
+
+ try:
+ for _ in range(1001):
+ if not port_in_use(self.ctx, EndPoint('0.0.0.0', self.starting_port)):
+ self.listener_port = str(self.starting_port)
+ break
+ self.starting_port += 1
+ if not self.listener_port:
+ raise Error(f'All 1000 ports starting at {str(self.starting_port - 1001)} taken.')
+ except Exception as e:
+ raise Error(f'Failed to pick port for agent to listen on: {e}')
+
+ if not self.mgr_listener.is_alive():
+ self.mgr_listener.start()
+
+ if not self.ls_gatherer.is_alive():
+ self.ls_gatherer.start()
+
+ if not self.volume_gatherer.is_alive():
+ self.volume_gatherer.start()
+
+ ssl_ctx = ssl.create_default_context()
+ ssl_ctx.check_hostname = True
+ ssl_ctx.verify_mode = ssl.CERT_REQUIRED
+ ssl_ctx.load_verify_locations(self.ca_path)
+
+ while not self.stop:
+ start_time = time.monotonic()
+ ack = self.ack
+
+ # part of the networks info is returned as a set which is not JSON
+ # serializable. The set must be converted to a list
+ networks = list_networks(self.ctx)
+ networks_list: Dict[str, Dict[str, List[str]]] = {}
+ for key in networks.keys():
+ networks_list[key] = {}
+ for k, v in networks[key].items():
+ networks_list[key][k] = list(v)
+
+ data = json.dumps({'host': self.host,
+ 'ls': (self.ls_gatherer.data if self.ack == self.ls_gatherer.ack
+ and self.ls_gatherer.data is not None else []),
+ 'networks': networks_list,
+ 'facts': HostFacts(self.ctx).dump(),
+ 'volume': (self.volume_gatherer.data if self.ack == self.volume_gatherer.ack
+ and self.volume_gatherer.data is not None else ''),
+ 'ack': str(ack),
+ 'keyring': self.keyring,
+ 'port': self.listener_port})
+ data = data.encode('ascii')
+
+ url = f'https://{self.target_ip}:{self.target_port}/data/'
+ try:
+ req = Request(url, data, {'Content-Type': 'application/json'})
+ send_time = time.monotonic()
+ with urlopen(req, context=ssl_ctx) as response:
+ response_str = response.read()
+ response_json = json.loads(response_str)
+ total_request_time = datetime.timedelta(seconds=(time.monotonic() - send_time)).total_seconds()
+ logger.info(f'Received mgr response: "{response_json["result"]}" {total_request_time} seconds after sending request.')
+ except Exception as e:
+ logger.error(f'Failed to send metadata to mgr: {e}')
+
+ end_time = time.monotonic()
+ run_time = datetime.timedelta(seconds=(end_time - start_time))
+ self.recent_iteration_run_times[self.recent_iteration_index] = run_time.total_seconds()
+ self.recent_iteration_index = (self.recent_iteration_index + 1) % 3
+ run_time_average = sum(self.recent_iteration_run_times, 0.0) / len([t for t in self.recent_iteration_run_times if t])
+
+ self.event.wait(max(self.loop_interval - int(run_time_average), 0))
+ self.event.clear()
+
+ def _ceph_volume(self, enhanced: bool = False) -> Tuple[str, bool]:
+ self.ctx.command = 'inventory --format=json'.split()
+ if enhanced:
+ self.ctx.command.append('--with-lsm')
+ self.ctx.fsid = self.fsid
+
+ stream = io.StringIO()
+ with redirect_stdout(stream):
+ command_ceph_volume(self.ctx)
+
+ stdout = stream.getvalue()
+
+ if stdout:
+ return (stdout, False)
+ else:
+ raise Exception('ceph-volume returned empty value')
+
+ def _daemon_ls_subset(self) -> Dict[str, Dict[str, Any]]:
+ # gets a subset of ls info quickly. The results of this will tell us if our
+ # cached info is still good or if we need to run the full ls again.
+ # for legacy containers, we just grab the full info. For cephadmv1 containers,
+ # we only grab enabled, state, mem_usage and container id. If container id has
+ # not changed for any daemon, we assume our cached info is good.
+ daemons: Dict[str, Dict[str, Any]] = {}
+ data_dir = self.ctx.data_dir
+ seen_memusage = {} # type: Dict[str, int]
+ out, err, code = call(
+ self.ctx,
+ [self.ctx.container_engine.path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'],
+ verbosity=CallVerbosity.DEBUG
+ )
+ seen_memusage_cid_len, seen_memusage = _parse_mem_usage(code, out)
+ # we need a mapping from container names to ids. Later we will convert daemon
+ # names to container names to get daemons container id to see if it has changed
+ out, err, code = call(
+ self.ctx,
+ [self.ctx.container_engine.path, 'ps', '--format', '{{.ID}},{{.Names}}', '--no-trunc'],
+ verbosity=CallVerbosity.DEBUG
+ )
+ name_id_mapping: Dict[str, str] = self._parse_container_id_name(code, out)
+ for i in os.listdir(data_dir):
+ if i in ['mon', 'osd', 'mds', 'mgr']:
+ daemon_type = i
+ for j in os.listdir(os.path.join(data_dir, i)):
+ if '-' not in j:
+ continue
+ (cluster, daemon_id) = j.split('-', 1)
+ legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
+ (enabled, state, _) = check_unit(self.ctx, legacy_unit_name)
+ daemons[f'{daemon_type}.{daemon_id}'] = {
+ 'style': 'legacy',
+ 'name': '%s.%s' % (daemon_type, daemon_id),
+ 'fsid': self.ctx.fsid if self.ctx.fsid is not None else 'unknown',
+ 'systemd_unit': legacy_unit_name,
+ 'enabled': 'true' if enabled else 'false',
+ 'state': state,
+ }
+ elif is_fsid(i):
+ fsid = str(i) # convince mypy that fsid is a str here
+ for j in os.listdir(os.path.join(data_dir, i)):
+ if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)):
+ (daemon_type, daemon_id) = j.split('.', 1)
+ unit_name = get_unit_name(fsid, daemon_type, daemon_id)
+ (enabled, state, _) = check_unit(self.ctx, unit_name)
+ daemons[j] = {
+ 'style': 'cephadm:v1',
+ 'systemd_unit': unit_name,
+ 'enabled': 'true' if enabled else 'false',
+ 'state': state,
+ }
+ c = CephContainer.for_daemon(self.ctx, self.ctx.fsid, daemon_type, daemon_id, 'bash')
+ container_id: Optional[str] = None
+ for name in (c.cname, c.old_cname):
+ if name in name_id_mapping:
+ container_id = name_id_mapping[name]
+ break
+ daemons[j]['container_id'] = container_id
+ if container_id:
+ daemons[j]['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len])
+ return daemons
+
+ def _parse_container_id_name(self, code: int, out: str) -> Dict[str, str]:
+ # map container names to ids from ps output
+ name_id_mapping = {} # type: Dict[str, str]
+ if not code:
+ for line in out.splitlines():
+ id, name = line.split(',')
+ name_id_mapping[name] = id
+ return name_id_mapping
+
+ def _get_ls(self) -> Tuple[List[Dict[str, str]], bool]:
+ if not self.cached_ls_values:
+ logger.info('No cached ls output. Running full daemon ls')
+ ls = list_daemons(self.ctx)
+ for d in ls:
+ self.cached_ls_values[d['name']] = d
+ return (ls, True)
+ else:
+ ls_subset = self._daemon_ls_subset()
+ need_full_ls = False
+ state_change = False
+ if set(self.cached_ls_values.keys()) != set(ls_subset.keys()):
+ # case for a new daemon in ls or an old daemon no longer appearing.
+ # If that happens we need a full ls
+ logger.info('Change detected in state of daemons. Running full daemon ls')
+ self.cached_ls_values = {}
+ ls = list_daemons(self.ctx)
+ for d in ls:
+ self.cached_ls_values[d['name']] = d
+ return (ls, True)
+ for daemon, info in self.cached_ls_values.items():
+ if info['style'] == 'legacy':
+ # for legacy containers, ls_subset just grabs all the info
+ self.cached_ls_values[daemon] = ls_subset[daemon]
+ else:
+ if info['container_id'] != ls_subset[daemon]['container_id']:
+ # case for container id having changed. We need full ls as
+ # info we didn't grab like version and start time could have changed
+ need_full_ls = True
+ break
+
+ # want to know if a daemons state change because in those cases we want
+ # to report back quicker
+ if (
+ self.cached_ls_values[daemon]['enabled'] != ls_subset[daemon]['enabled']
+ or self.cached_ls_values[daemon]['state'] != ls_subset[daemon]['state']
+ ):
+ state_change = True
+ # if we reach here, container id matched. Update the few values we do track
+ # from ls subset: state, enabled, memory_usage.
+ self.cached_ls_values[daemon]['enabled'] = ls_subset[daemon]['enabled']
+ self.cached_ls_values[daemon]['state'] = ls_subset[daemon]['state']
+ if 'memory_usage' in ls_subset[daemon]:
+ self.cached_ls_values[daemon]['memory_usage'] = ls_subset[daemon]['memory_usage']
+ if need_full_ls:
+ logger.info('Change detected in state of daemons. Running full daemon ls')
+ ls = list_daemons(self.ctx)
+ self.cached_ls_values = {}
+ for d in ls:
+ self.cached_ls_values[d['name']] = d
+ return (ls, True)
+ else:
+ ls = [info for daemon, info in self.cached_ls_values.items()]
+ return (ls, state_change)
+
+
+class AgentGatherer(Thread):
+ def __init__(self, agent: 'CephadmAgent', func: Callable, gatherer_type: str = 'Unnamed', initial_ack: int = 0) -> None:
+ self.agent = agent
+ self.func = func
+ self.gatherer_type = gatherer_type
+ self.ack = initial_ack
+ self.event = Event()
+ self.data: Any = None
+ self.stop = False
+ self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0]
+ self.recent_iteration_index: int = 0
+ super(AgentGatherer, self).__init__(target=self.run)
+
+ def run(self) -> None:
+ while not self.stop:
+ try:
+ start_time = time.monotonic()
+
+ ack = self.agent.ack
+ change = False
+ try:
+ self.data, change = self.func()
+ except Exception as e:
+ logger.error(f'{self.gatherer_type} Gatherer encountered exception gathering data: {e}')
+ self.data = None
+ if ack != self.ack or change:
+ self.ack = ack
+ self.agent.wakeup()
+
+ end_time = time.monotonic()
+ run_time = datetime.timedelta(seconds=(end_time - start_time))
+ self.recent_iteration_run_times[self.recent_iteration_index] = run_time.total_seconds()
+ self.recent_iteration_index = (self.recent_iteration_index + 1) % 3
+ run_time_average = sum(self.recent_iteration_run_times, 0.0) / len([t for t in self.recent_iteration_run_times if t])
+
+ self.event.wait(max(self.agent.loop_interval - int(run_time_average), 0))
+ self.event.clear()
+ except Exception as e:
+ logger.error(f'{self.gatherer_type} Gatherer encountered exception: {e}')
+
+ def shutdown(self) -> None:
+ self.stop = True
+
+ def wakeup(self) -> None:
+ self.event.set()
+
+ def update_func(self, func: Callable) -> None:
+ self.func = func
+
+
+def command_agent(ctx: CephadmContext) -> None:
+ agent = CephadmAgent(ctx, ctx.fsid, ctx.daemon_id)
+
+ if not os.path.isdir(agent.daemon_dir):
+ raise Error(f'Agent daemon directory {agent.daemon_dir} does not exist. Perhaps agent was never deployed?')
+
+ agent.run()
+
+
+##################################
+
+@executes_early
+def command_version(ctx):
+ # type: (CephadmContext) -> int
+ import importlib
+
+ try:
+ vmod = importlib.import_module('_version')
+ except ImportError:
+ print('cephadm version UNKNOWN')
+ return 1
+ _unset = '<UNSET>'
+ print('cephadm version {0} ({1}) {2} ({3})'.format(
+ getattr(vmod, 'CEPH_GIT_NICE_VER', _unset),
+ getattr(vmod, 'CEPH_GIT_VER', _unset),
+ getattr(vmod, 'CEPH_RELEASE_NAME', _unset),
+ getattr(vmod, 'CEPH_RELEASE_TYPE', _unset),
+ ))
+ return 0
+
+##################################
+
+
+@default_image
+def command_pull(ctx):
+ # type: (CephadmContext) -> int
+
+ try:
+ _pull_image(ctx, ctx.image, ctx.insecure)
+ except UnauthorizedRegistryError:
+ err_str = 'Failed to pull container image. Check that host(s) are logged into the registry'
+ logger.debug(f'Pulling image for `command_pull` failed: {err_str}')
+ raise Error(err_str)
+ return command_inspect_image(ctx)
+
+
+def _pull_image(ctx, image, insecure=False):
+ # type: (CephadmContext, str, bool) -> None
+ logger.info('Pulling container image %s...' % image)
+
+ ignorelist = [
+ 'error creating read-write layer with ID',
+ 'net/http: TLS handshake timeout',
+ 'Digest did not match, expected',
+ ]
+
+ cmd = [ctx.container_engine.path, 'pull', image]
+ if isinstance(ctx.container_engine, Podman):
+ if insecure:
+ cmd.append('--tls-verify=false')
+
+ if os.path.exists('/etc/ceph/podman-auth.json'):
+ cmd.append('--authfile=/etc/ceph/podman-auth.json')
+ cmd_str = ' '.join(cmd)
+
+ for sleep_secs in [1, 4, 25]:
+ out, err, ret = call(ctx, cmd, verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+ if not ret:
+ return
+
+ if 'unauthorized' in err:
+ raise UnauthorizedRegistryError()
+
+ if not any(pattern in err for pattern in ignorelist):
+ raise Error('Failed command: %s' % cmd_str)
+
+ logger.info('`%s` failed transiently. Retrying. waiting %s seconds...' % (cmd_str, sleep_secs))
+ time.sleep(sleep_secs)
+
+ raise Error('Failed command: %s: maximum retries reached' % cmd_str)
+
+##################################
+
+
+@require_image
+@infer_image
+def command_inspect_image(ctx):
+ # type: (CephadmContext) -> int
+ out, err, ret = call_throws(ctx, [
+ ctx.container_engine.path, 'inspect',
+ '--format', '{{.ID}},{{.RepoDigests}}',
+ ctx.image])
+ if ret:
+ return errno.ENOENT
+ info_from = get_image_info_from_inspect(out.strip(), ctx.image)
+
+ ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip()
+ info_from['ceph_version'] = ver
+
+ print(json.dumps(info_from, indent=4, sort_keys=True))
+ return 0
+
+
+def normalize_image_digest(digest: str) -> str:
+ """
+ Normal case:
+ >>> normalize_image_digest('ceph/ceph', 'docker.io')
+ 'docker.io/ceph/ceph'
+
+ No change:
+ >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'docker.io')
+ 'quay.ceph.io/ceph/ceph'
+
+ >>> normalize_image_digest('docker.io/ubuntu', 'docker.io')
+ 'docker.io/ubuntu'
+
+ >>> normalize_image_digest('localhost/ceph', 'docker.io')
+ 'localhost/ceph'
+ """
+ known_shortnames = [
+ 'ceph/ceph',
+ 'ceph/daemon',
+ 'ceph/daemon-base',
+ ]
+ for image in known_shortnames:
+ if digest.startswith(image):
+ return f'{DEFAULT_REGISTRY}/{digest}'
+ return digest
+
+
+def get_image_info_from_inspect(out, image):
+ # type: (str, str) -> Dict[str, Union[str,List[str]]]
+ image_id, digests = out.split(',', 1)
+ if not out:
+ raise Error('inspect {}: empty result'.format(image))
+ r = {
+ 'image_id': normalize_container_id(image_id)
+ } # type: Dict[str, Union[str,List[str]]]
+ if digests:
+ r['repo_digests'] = list(map(normalize_image_digest, digests[1: -1].split(' ')))
+ return r
+
+##################################
+
+
+def check_subnet(subnets: str) -> Tuple[int, List[int], str]:
+ """Determine whether the given string is a valid subnet
+
+ :param subnets: subnet string, a single definition or comma separated list of CIDR subnets
+ :returns: return code, IP version list of the subnets and msg describing any errors validation errors
+ """
+
+ rc = 0
+ versions = set()
+ errors = []
+ subnet_list = subnets.split(',')
+ for subnet in subnet_list:
+ # ensure the format of the string is as expected address/netmask
+ subnet = subnet.strip()
+ if not re.search(r'\/\d+$', subnet):
+ rc = 1
+ errors.append(f'{subnet} is not in CIDR format (address/netmask)')
+ continue
+ try:
+ v = ipaddress.ip_network(subnet).version
+ versions.add(v)
+ except ValueError as e:
+ rc = 1
+ errors.append(f'{subnet} invalid: {str(e)}')
+
+ return rc, list(versions), ', '.join(errors)
+
+
+def unwrap_ipv6(address):
+ # type: (str) -> str
+ if address.startswith('[') and address.endswith(']'):
+ return address[1: -1]
+ return address
+
+
+def wrap_ipv6(address):
+ # type: (str) -> str
+
+ # We cannot assume it's already wrapped or even an IPv6 address if
+ # it's already wrapped it'll not pass (like if it's a hostname) and trigger
+ # the ValueError
+ try:
+ if ipaddress.ip_address(address).version == 6:
+ return f'[{address}]'
+ except ValueError:
+ pass
+
+ return address
+
+
+def is_ipv6(address):
+ # type: (str) -> bool
+ address = unwrap_ipv6(address)
+ try:
+ return ipaddress.ip_address(address).version == 6
+ except ValueError:
+ logger.warning('Address: {} is not a valid IP address'.format(address))
+ return False
+
+
+def ip_in_subnets(ip_addr: str, subnets: str) -> bool:
+ """Determine if the ip_addr belongs to any of the subnets list."""
+ subnet_list = [x.strip() for x in subnets.split(',')]
+ for subnet in subnet_list:
+ ip_address = unwrap_ipv6(ip_addr) if is_ipv6(ip_addr) else ip_addr
+ if ipaddress.ip_address(ip_address) in ipaddress.ip_network(subnet):
+ return True
+ return False
+
+
+def parse_mon_addrv(addrv_arg: str) -> List[EndPoint]:
+ """Parse mon-addrv param into a list of mon end points."""
+ r = re.compile(r':(\d+)$')
+ addrv_args = []
+ addr_arg = addrv_arg
+ if addr_arg[0] != '[' or addr_arg[-1] != ']':
+ raise Error(f'--mon-addrv value {addr_arg} must use square brackets')
+
+ for addr in addr_arg[1: -1].split(','):
+ hasport = r.findall(addr)
+ if not hasport:
+ raise Error(f'--mon-addrv value {addr_arg} must include port number')
+ port_str = hasport[0]
+ addr = re.sub(r'^v\d+:', '', addr) # strip off v1: or v2: prefix
+ base_ip = addr[0:-(len(port_str)) - 1]
+ addrv_args.append(EndPoint(base_ip, int(port_str)))
+
+ return addrv_args
+
+
+def parse_mon_ip(mon_ip: str) -> List[EndPoint]:
+ """Parse mon-ip param into a list of mon end points."""
+ r = re.compile(r':(\d+)$')
+ addrv_args = []
+ hasport = r.findall(mon_ip)
+ if hasport:
+ port_str = hasport[0]
+ base_ip = mon_ip[0:-(len(port_str)) - 1]
+ addrv_args.append(EndPoint(base_ip, int(port_str)))
+ else:
+ # No port provided: use fixed ports for ceph monitor
+ addrv_args.append(EndPoint(mon_ip, 3300))
+ addrv_args.append(EndPoint(mon_ip, 6789))
+
+ return addrv_args
+
+
+def build_addrv_params(addrv: List[EndPoint]) -> str:
+ """Convert mon end-points (ip:port) into the format: [v[1|2]:ip:port1]"""
+ if len(addrv) > 2:
+ raise Error('Detected a local mon-addrv list with more than 2 entries.')
+ port_to_ver: Dict[int, str] = {6789: 'v1', 3300: 'v2'}
+ addr_arg_list: List[str] = []
+ for ep in addrv:
+ if ep.port in port_to_ver:
+ ver = port_to_ver[ep.port]
+ else:
+ ver = 'v2' # default mon protocol version if port is not provided
+ logger.warning(f'Using msgr2 protocol for unrecognized port {ep}')
+ addr_arg_list.append(f'{ver}:{ep.ip}:{ep.port}')
+
+ addr_arg = '[{0}]'.format(','.join(addr_arg_list))
+ return addr_arg
+
+
+def get_public_net_from_cfg(ctx: CephadmContext) -> Optional[str]:
+ """Get mon public network from configuration file."""
+ cp = read_config(ctx.config)
+ if not cp.has_option('global', 'public_network'):
+ return None
+
+ # Ensure all public CIDR networks are valid
+ public_network = cp.get('global', 'public_network').strip('"').strip("'")
+ rc, _, err_msg = check_subnet(public_network)
+ if rc:
+ raise Error(f'Invalid public_network {public_network} parameter: {err_msg}')
+
+ # Ensure all public CIDR networks are configured locally
+ configured_subnets = set([x.strip() for x in public_network.split(',')])
+ local_subnets = set([x[0] for x in list_networks(ctx).items()])
+ valid_public_net = False
+ for net in configured_subnets:
+ if net in local_subnets:
+ valid_public_net = True
+ else:
+ logger.warning(f'The public CIDR network {net} (from -c conf file) is not configured locally.')
+ if not valid_public_net:
+ raise Error(f'None of the public CIDR network(s) {configured_subnets} (from -c conf file) is configured locally.')
+
+ # Ensure public_network is compatible with the provided mon-ip (or mon-addrv)
+ if ctx.mon_ip:
+ if not ip_in_subnets(ctx.mon_ip, public_network):
+ raise Error(f'The provided --mon-ip {ctx.mon_ip} does not belong to any public_network(s) {public_network}')
+ elif ctx.mon_addrv:
+ addrv_args = parse_mon_addrv(ctx.mon_addrv)
+ for addrv in addrv_args:
+ if not ip_in_subnets(addrv.ip, public_network):
+ raise Error(f'The provided --mon-addrv {addrv.ip} ip does not belong to any public_network(s) {public_network}')
+
+ logger.debug(f'Using mon public network from configuration file {public_network}')
+ return public_network
+
+
+def infer_mon_network(ctx: CephadmContext, mon_eps: List[EndPoint]) -> Optional[str]:
+ """Infer mon public network from local network."""
+ # Make sure IP is configured locally, and then figure out the CIDR network
+ mon_networks = []
+ for net, ifaces in list_networks(ctx).items():
+ # build local_ips list for the specified network
+ local_ips: List[Union[ipaddress.IPv4Address, ipaddress.IPv6Address]] = []
+ for _, ls in ifaces.items():
+ local_ips.extend([ipaddress.ip_address(ip) for ip in ls])
+
+ # check if any of mon ips belong to this net
+ for mon_ep in mon_eps:
+ try:
+ if ipaddress.ip_address(unwrap_ipv6(mon_ep.ip)) in local_ips:
+ mon_networks.append(net)
+ logger.info(f'Mon IP `{mon_ep.ip}` is in CIDR network `{net}`')
+ except ValueError as e:
+ logger.warning(f'Cannot infer CIDR network for mon IP `{mon_ep.ip}` : {e}')
+
+ if not mon_networks:
+ raise Error('Cannot infer CIDR network. Pass --skip-mon-network to configure it later')
+ else:
+ logger.debug(f'Inferred mon public CIDR from local network configuration {mon_networks}')
+
+ mon_networks = list(set(mon_networks)) # remove duplicates
+ return ','.join(mon_networks)
+
+
+def prepare_mon_addresses(ctx: CephadmContext) -> Tuple[str, bool, Optional[str]]:
+ """Get mon public network configuration."""
+ ipv6 = False
+ addrv_args: List[EndPoint] = []
+ mon_addrv: str = '' # i.e: [v2:192.168.100.1:3300,v1:192.168.100.1:6789]
+
+ if ctx.mon_ip:
+ ipv6 = is_ipv6(ctx.mon_ip)
+ if ipv6:
+ ctx.mon_ip = wrap_ipv6(ctx.mon_ip)
+ addrv_args = parse_mon_ip(ctx.mon_ip)
+ mon_addrv = build_addrv_params(addrv_args)
+ elif ctx.mon_addrv:
+ ipv6 = ctx.mon_addrv.count('[') > 1
+ addrv_args = parse_mon_addrv(ctx.mon_addrv)
+ mon_addrv = ctx.mon_addrv
+ else:
+ raise Error('must specify --mon-ip or --mon-addrv')
+
+ if addrv_args:
+ for end_point in addrv_args:
+ check_ip_port(ctx, end_point)
+
+ logger.debug(f'Base mon IP(s) is {addrv_args}, mon addrv is {mon_addrv}')
+ mon_network = None
+ if not ctx.skip_mon_network:
+ mon_network = get_public_net_from_cfg(ctx) or infer_mon_network(ctx, addrv_args)
+
+ return (mon_addrv, ipv6, mon_network)
+
+
+def prepare_cluster_network(ctx: CephadmContext) -> Tuple[str, bool]:
+ # the cluster network may not exist on this node, so all we can do is
+ # validate that the address given is valid ipv4 or ipv6 subnet
+ ipv6_cluster_network = False
+ cp = read_config(ctx.config)
+ cluster_network = ctx.cluster_network
+ if cluster_network is None and cp.has_option('global', 'cluster_network'):
+ cluster_network = cp.get('global', 'cluster_network').strip('"').strip("'")
+
+ if cluster_network:
+ cluster_nets = set([x.strip() for x in cluster_network.split(',')])
+ local_subnets = set([x[0] for x in list_networks(ctx).items()])
+ for net in cluster_nets:
+ if net not in local_subnets:
+ logger.warning(f'The cluster CIDR network {net} is not configured locally.')
+
+ rc, versions, err_msg = check_subnet(cluster_network)
+ if rc:
+ raise Error(f'Invalid --cluster-network parameter: {err_msg}')
+ ipv6_cluster_network = True if 6 in versions else False
+ else:
+ logger.info('Internal network (--cluster-network) has not '
+ 'been provided, OSD replication will default to '
+ 'the public_network')
+
+ return cluster_network, ipv6_cluster_network
+
+
+def create_initial_keys(
+ ctx: CephadmContext,
+ uid: int, gid: int,
+ mgr_id: str
+) -> Tuple[str, str, str, Any, Any]: # type: ignore
+
+ _image = ctx.image
+
+ # create some initial keys
+ logger.info('Creating initial keys...')
+ mon_key = CephContainer(
+ ctx,
+ image=_image,
+ entrypoint='/usr/bin/ceph-authtool',
+ args=['--gen-print-key'],
+ ).run().strip()
+ admin_key = CephContainer(
+ ctx,
+ image=_image,
+ entrypoint='/usr/bin/ceph-authtool',
+ args=['--gen-print-key'],
+ ).run().strip()
+ mgr_key = CephContainer(
+ ctx,
+ image=_image,
+ entrypoint='/usr/bin/ceph-authtool',
+ args=['--gen-print-key'],
+ ).run().strip()
+
+ keyring = ('[mon.]\n'
+ '\tkey = %s\n'
+ '\tcaps mon = allow *\n'
+ '[client.admin]\n'
+ '\tkey = %s\n'
+ '\tcaps mon = allow *\n'
+ '\tcaps mds = allow *\n'
+ '\tcaps mgr = allow *\n'
+ '\tcaps osd = allow *\n'
+ '[mgr.%s]\n'
+ '\tkey = %s\n'
+ '\tcaps mon = profile mgr\n'
+ '\tcaps mds = allow *\n'
+ '\tcaps osd = allow *\n'
+ % (mon_key, admin_key, mgr_id, mgr_key))
+
+ admin_keyring = write_tmp('[client.admin]\n'
+ '\tkey = ' + admin_key + '\n',
+ uid, gid)
+
+ # tmp keyring file
+ bootstrap_keyring = write_tmp(keyring, uid, gid)
+ return (mon_key, mgr_key, admin_key,
+ bootstrap_keyring, admin_keyring)
+
+
+def create_initial_monmap(
+ ctx: CephadmContext,
+ uid: int, gid: int,
+ fsid: str,
+ mon_id: str, mon_addr: str
+) -> Any:
+ logger.info('Creating initial monmap...')
+ monmap = write_tmp('', 0, 0)
+ out = CephContainer(
+ ctx,
+ image=ctx.image,
+ entrypoint='/usr/bin/monmaptool',
+ args=[
+ '--create',
+ '--clobber',
+ '--fsid', fsid,
+ '--addv', mon_id, mon_addr,
+ '/tmp/monmap'
+ ],
+ volume_mounts={
+ monmap.name: '/tmp/monmap:z',
+ },
+ ).run()
+ logger.debug(f'monmaptool for {mon_id} {mon_addr} on {out}')
+
+ # pass monmap file to ceph user for use by ceph-mon --mkfs below
+ os.fchown(monmap.fileno(), uid, gid)
+ return monmap
+
+
+def prepare_create_mon(
+ ctx: CephadmContext,
+ uid: int, gid: int,
+ fsid: str, mon_id: str,
+ bootstrap_keyring_path: str,
+ monmap_path: str
+) -> Tuple[str, str]:
+ logger.info('Creating mon...')
+ create_daemon_dirs(ctx, fsid, 'mon', mon_id, uid, gid)
+ mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', mon_id)
+ log_dir = get_log_dir(fsid, ctx.log_dir)
+ out = CephContainer(
+ ctx,
+ image=ctx.image,
+ entrypoint='/usr/bin/ceph-mon',
+ args=[
+ '--mkfs',
+ '-i', mon_id,
+ '--fsid', fsid,
+ '-c', '/dev/null',
+ '--monmap', '/tmp/monmap',
+ '--keyring', '/tmp/keyring',
+ ] + get_daemon_args(ctx, fsid, 'mon', mon_id),
+ volume_mounts={
+ log_dir: '/var/log/ceph:z',
+ mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
+ bootstrap_keyring_path: '/tmp/keyring:z',
+ monmap_path: '/tmp/monmap:z',
+ },
+ ).run()
+ logger.debug(f'create mon.{mon_id} on {out}')
+ return (mon_dir, log_dir)
+
+
+def create_mon(
+ ctx: CephadmContext,
+ uid: int, gid: int,
+ fsid: str, mon_id: str
+) -> None:
+ mon_c = get_container(ctx, fsid, 'mon', mon_id)
+ ctx.meta_properties = {'service_name': 'mon'}
+ deploy_daemon(ctx, fsid, 'mon', mon_id, mon_c, uid, gid,
+ config=None, keyring=None)
+
+
+def wait_for_mon(
+ ctx: CephadmContext,
+ mon_id: str, mon_dir: str,
+ admin_keyring_path: str, config_path: str
+) -> None:
+ logger.info('Waiting for mon to start...')
+ c = CephContainer(
+ ctx,
+ image=ctx.image,
+ entrypoint='/usr/bin/ceph',
+ args=[
+ 'status'],
+ volume_mounts={
+ mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
+ admin_keyring_path: '/etc/ceph/ceph.client.admin.keyring:z',
+ config_path: '/etc/ceph/ceph.conf:z',
+ },
+ )
+
+ # wait for the service to become available
+ def is_mon_available():
+ # type: () -> bool
+ timeout = ctx.timeout if ctx.timeout else 60 # seconds
+ out, err, ret = call(ctx, c.run_cmd(),
+ desc=c.entrypoint,
+ timeout=timeout,
+ verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+ return ret == 0
+
+ is_available(ctx, 'mon', is_mon_available)
+
+
+def create_mgr(
+ ctx: CephadmContext,
+ uid: int, gid: int,
+ fsid: str, mgr_id: str, mgr_key: str,
+ config: str, clifunc: Callable
+) -> None:
+ logger.info('Creating mgr...')
+ mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key)
+ mgr_c = get_container(ctx, fsid, 'mgr', mgr_id)
+ # Note:the default port used by the Prometheus node exporter is opened in fw
+ ctx.meta_properties = {'service_name': 'mgr'}
+ endpoints = [EndPoint('0.0.0.0', 9283), EndPoint('0.0.0.0', 8765)]
+ if not ctx.skip_monitoring_stack:
+ endpoints.append(EndPoint('0.0.0.0', 8443))
+ deploy_daemon(ctx, fsid, 'mgr', mgr_id, mgr_c, uid, gid,
+ config=config, keyring=mgr_keyring, endpoints=endpoints)
+
+ # wait for the service to become available
+ logger.info('Waiting for mgr to start...')
+
+ def is_mgr_available():
+ # type: () -> bool
+ timeout = ctx.timeout if ctx.timeout else 60 # seconds
+ try:
+ out = clifunc(['status', '-f', 'json-pretty'],
+ timeout=timeout,
+ verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+ j = json.loads(out)
+ return j.get('mgrmap', {}).get('available', False)
+ except Exception as e:
+ logger.debug('status failed: %s' % e)
+ return False
+
+ is_available(ctx, 'mgr', is_mgr_available)
+
+
+def prepare_ssh(
+ ctx: CephadmContext,
+ cli: Callable, wait_for_mgr_restart: Callable
+) -> None:
+
+ cli(['cephadm', 'set-user', ctx.ssh_user])
+
+ if ctx.ssh_config:
+ logger.info('Using provided ssh config...')
+ mounts = {
+ pathify(ctx.ssh_config.name): '/tmp/cephadm-ssh-config:z',
+ }
+ cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts)
+
+ if ctx.ssh_private_key and ctx.ssh_public_key:
+ logger.info('Using provided ssh keys...')
+ mounts = {
+ pathify(ctx.ssh_private_key.name): '/tmp/cephadm-ssh-key:z',
+ pathify(ctx.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z'
+ }
+ cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts)
+ cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts)
+ ssh_pub = cli(['cephadm', 'get-pub-key'])
+ authorize_ssh_key(ssh_pub, ctx.ssh_user)
+ elif ctx.ssh_private_key and ctx.ssh_signed_cert:
+ logger.info('Using provided ssh private key and signed cert ...')
+ mounts = {
+ pathify(ctx.ssh_private_key.name): '/tmp/cephadm-ssh-key:z',
+ pathify(ctx.ssh_signed_cert.name): '/tmp/cephadm-ssh-key-cert.pub:z'
+ }
+ cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts)
+ cli(['cephadm', 'set-signed-cert', '-i', '/tmp/cephadm-ssh-key-cert.pub'], extra_mounts=mounts)
+ else:
+ logger.info('Generating ssh key...')
+ cli(['cephadm', 'generate-key'])
+ ssh_pub = cli(['cephadm', 'get-pub-key'])
+ with open(ctx.output_pub_ssh_key, 'w') as f:
+ f.write(ssh_pub)
+ logger.info('Wrote public SSH key to %s' % ctx.output_pub_ssh_key)
+ authorize_ssh_key(ssh_pub, ctx.ssh_user)
+
+ host = get_hostname()
+ logger.info('Adding host %s...' % host)
+ try:
+ args = ['orch', 'host', 'add', host]
+ if ctx.mon_ip:
+ args.append(unwrap_ipv6(ctx.mon_ip))
+ elif ctx.mon_addrv:
+ addrv_args = parse_mon_addrv(ctx.mon_addrv)
+ args.append(unwrap_ipv6(addrv_args[0].ip))
+ cli(args)
+ except RuntimeError as e:
+ raise Error('Failed to add host <%s>: %s' % (host, e))
+
+ for t in ['mon', 'mgr']:
+ if not ctx.orphan_initial_daemons:
+ logger.info('Deploying %s service with default placement...' % t)
+ cli(['orch', 'apply', t])
+ else:
+ logger.info('Deploying unmanaged %s service...' % t)
+ cli(['orch', 'apply', t, '--unmanaged'])
+
+ if not ctx.orphan_initial_daemons:
+ logger.info('Deploying crash service with default placement...')
+ cli(['orch', 'apply', 'crash'])
+
+ if not ctx.skip_monitoring_stack:
+ for t in ['ceph-exporter', 'prometheus', 'grafana', 'node-exporter', 'alertmanager']:
+ logger.info('Deploying %s service with default placement...' % t)
+ try:
+ cli(['orch', 'apply', t])
+ except RuntimeError:
+ ctx.error_code = -errno.EINVAL
+ logger.error(f'Failed to apply service type {t}. '
+ 'Perhaps the ceph version being bootstrapped does not support it')
+
+ if ctx.with_centralized_logging:
+ for t in ['loki', 'promtail']:
+ logger.info('Deploying %s service with default placement...' % t)
+ try:
+ cli(['orch', 'apply', t])
+ except RuntimeError:
+ ctx.error_code = -errno.EINVAL
+ logger.error(f'Failed to apply service type {t}. '
+ 'Perhaps the ceph version being bootstrapped does not support it')
+
+
+def enable_cephadm_mgr_module(
+ cli: Callable, wait_for_mgr_restart: Callable
+) -> None:
+
+ logger.info('Enabling cephadm module...')
+ cli(['mgr', 'module', 'enable', 'cephadm'])
+ wait_for_mgr_restart()
+ logger.info('Setting orchestrator backend to cephadm...')
+ cli(['orch', 'set', 'backend', 'cephadm'])
+
+
+def prepare_dashboard(
+ ctx: CephadmContext,
+ uid: int, gid: int,
+ cli: Callable, wait_for_mgr_restart: Callable
+) -> None:
+
+ # Configure SSL port (cephadm only allows to configure dashboard SSL port)
+ # if the user does not want to use SSL he can change this setting once the cluster is up
+ cli(['config', 'set', 'mgr', 'mgr/dashboard/ssl_server_port', str(ctx.ssl_dashboard_port)])
+
+ # configuring dashboard parameters
+ logger.info('Enabling the dashboard module...')
+ cli(['mgr', 'module', 'enable', 'dashboard'])
+ wait_for_mgr_restart()
+
+ # dashboard crt and key
+ if ctx.dashboard_key and ctx.dashboard_crt:
+ logger.info('Using provided dashboard certificate...')
+ mounts = {
+ pathify(ctx.dashboard_crt.name): '/tmp/dashboard.crt:z',
+ pathify(ctx.dashboard_key.name): '/tmp/dashboard.key:z'
+ }
+ cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts)
+ cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts)
+ else:
+ logger.info('Generating a dashboard self-signed certificate...')
+ cli(['dashboard', 'create-self-signed-cert'])
+
+ logger.info('Creating initial admin user...')
+ password = ctx.initial_dashboard_password or generate_password()
+ tmp_password_file = write_tmp(password, uid, gid)
+ cmd = ['dashboard', 'ac-user-create', ctx.initial_dashboard_user, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password']
+ if not ctx.dashboard_password_noupdate:
+ cmd.append('--pwd-update-required')
+ cli(cmd, extra_mounts={pathify(tmp_password_file.name): '/tmp/dashboard.pw:z'})
+ logger.info('Fetching dashboard port number...')
+ out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
+ port = int(out)
+
+ # Open dashboard port
+ if not ('skip_firewalld' in ctx and ctx.skip_firewalld):
+ fw = Firewalld(ctx)
+ fw.open_ports([port])
+ fw.apply_rules()
+
+ logger.info('Ceph Dashboard is now available at:\n\n'
+ '\t URL: https://%s:%s/\n'
+ '\t User: %s\n'
+ '\tPassword: %s\n' % (
+ get_fqdn(), port,
+ ctx.initial_dashboard_user,
+ password))
+
+
+def prepare_bootstrap_config(
+ ctx: CephadmContext,
+ fsid: str, mon_addr: str, image: str
+
+) -> str:
+
+ cp = read_config(ctx.config)
+ if not cp.has_section('global'):
+ cp.add_section('global')
+ cp.set('global', 'fsid', fsid)
+ cp.set('global', 'mon_host', mon_addr)
+ cp.set('global', 'container_image', image)
+
+ if not cp.has_section('mon'):
+ cp.add_section('mon')
+ if (
+ not cp.has_option('mon', 'auth_allow_insecure_global_id_reclaim')
+ and not cp.has_option('mon', 'auth allow insecure global id reclaim')
+ ):
+ cp.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false')
+
+ if ctx.single_host_defaults:
+ logger.info('Adjusting default settings to suit single-host cluster...')
+ # replicate across osds, not hosts
+ if (
+ not cp.has_option('global', 'osd_crush_chooseleaf_type')
+ and not cp.has_option('global', 'osd crush chooseleaf type')
+ ):
+ cp.set('global', 'osd_crush_chooseleaf_type', '0')
+ # replica 2x
+ if (
+ not cp.has_option('global', 'osd_pool_default_size')
+ and not cp.has_option('global', 'osd pool default size')
+ ):
+ cp.set('global', 'osd_pool_default_size', '2')
+ # disable mgr standby modules (so we can colocate multiple mgrs on one host)
+ if not cp.has_section('mgr'):
+ cp.add_section('mgr')
+ if (
+ not cp.has_option('mgr', 'mgr_standby_modules')
+ and not cp.has_option('mgr', 'mgr standby modules')
+ ):
+ cp.set('mgr', 'mgr_standby_modules', 'false')
+ if ctx.log_to_file:
+ cp.set('global', 'log_to_file', 'true')
+ cp.set('global', 'log_to_stderr', 'false')
+ cp.set('global', 'log_to_journald', 'false')
+ cp.set('global', 'mon_cluster_log_to_file', 'true')
+ cp.set('global', 'mon_cluster_log_to_stderr', 'false')
+ cp.set('global', 'mon_cluster_log_to_journald', 'false')
+
+ cpf = StringIO()
+ cp.write(cpf)
+ config = cpf.getvalue()
+
+ if ctx.registry_json or ctx.registry_url:
+ command_registry_login(ctx)
+
+ return config
+
+
+def finish_bootstrap_config(
+ ctx: CephadmContext,
+ fsid: str,
+ config: str,
+ mon_id: str, mon_dir: str,
+ mon_network: Optional[str], ipv6: bool,
+ cli: Callable,
+ cluster_network: Optional[str], ipv6_cluster_network: bool
+
+) -> None:
+ if not ctx.no_minimize_config:
+ logger.info('Assimilating anything we can from ceph.conf...')
+ cli([
+ 'config', 'assimilate-conf',
+ '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
+ ], {
+ mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
+ })
+ logger.info('Generating new minimal ceph.conf...')
+ cli([
+ 'config', 'generate-minimal-conf',
+ '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
+ ], {
+ mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
+ })
+ # re-read our minimized config
+ with open(mon_dir + '/config', 'r') as f:
+ config = f.read()
+ logger.info('Restarting the monitor...')
+ call_throws(ctx, [
+ 'systemctl',
+ 'restart',
+ get_unit_name(fsid, 'mon', mon_id)
+ ])
+ elif 'image' in ctx and ctx.image:
+ # we still want to assimilate the given container image if provided
+ cli(['config', 'set', 'global', 'container_image', f'{ctx.image}'])
+
+ if mon_network:
+ cp = read_config(ctx.config)
+ cfg_section = 'global' if cp.has_option('global', 'public_network') else 'mon'
+ logger.info(f'Setting public_network to {mon_network} in {cfg_section} config section')
+ cli(['config', 'set', cfg_section, 'public_network', mon_network])
+
+ if cluster_network:
+ logger.info(f'Setting cluster_network to {cluster_network}')
+ cli(['config', 'set', 'global', 'cluster_network', cluster_network])
+
+ if ipv6 or ipv6_cluster_network:
+ logger.info('Enabling IPv6 (ms_bind_ipv6) binding')
+ cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
+
+ with open(ctx.output_config, 'w') as f:
+ f.write(config)
+ logger.info('Wrote config to %s' % ctx.output_config)
+ pass
+
+
+def _extract_host_info_from_applied_spec(f: Iterable[str]) -> List[Dict[str, str]]:
+ # overall goal of this function is to go through an applied spec and find
+ # the hostname (and addr is provided) for each host spec in the applied spec.
+ # Generally, we should be able to just pass the spec to the mgr module where
+ # proper yaml parsing can happen, but for host specs in particular we want to
+ # be able to distribute ssh keys, which requires finding the hostname (and addr
+ # if possible) for each potential host spec in the applied spec.
+
+ specs: List[List[str]] = []
+ current_spec: List[str] = []
+ for line in f:
+ if re.search(r'^---\s+', line):
+ if current_spec:
+ specs.append(current_spec)
+ current_spec = []
+ else:
+ line = line.strip()
+ if line:
+ current_spec.append(line)
+ if current_spec:
+ specs.append(current_spec)
+
+ host_specs: List[List[str]] = []
+ for spec in specs:
+ for line in spec:
+ if 'service_type' in line:
+ try:
+ _, type = line.split(':')
+ type = type.strip()
+ if type == 'host':
+ host_specs.append(spec)
+ except ValueError as e:
+ spec_str = '\n'.join(spec)
+ logger.error(f'Failed to pull service_type from spec:\n{spec_str}. Got error: {e}')
+ break
+ spec_str = '\n'.join(spec)
+ logger.error(f'Failed to find service_type within spec:\n{spec_str}')
+
+ host_dicts = []
+ for s in host_specs:
+ host_dict = _extract_host_info_from_spec(s)
+ # if host_dict is empty here, we failed to pull the hostname
+ # for the host from the spec. This should have already been logged
+ # so at this point we just don't want to include it in our output
+ if host_dict:
+ host_dicts.append(host_dict)
+
+ return host_dicts
+
+
+def _extract_host_info_from_spec(host_spec: List[str]) -> Dict[str, str]:
+ # note:for our purposes here, we only really want the hostname
+ # and address of the host from each of these specs in order to
+ # be able to distribute ssh keys. We will later apply the spec
+ # through the mgr module where proper yaml parsing can be done
+ # The returned dicts from this function should only contain
+ # one or two entries, one (required) for hostname, one (optional) for addr
+ # {
+ # hostname: <hostname>
+ # addr: <ip-addr>
+ # }
+ # if we fail to find the hostname, an empty dict is returned
+
+ host_dict = {} # type: Dict[str, str]
+ for line in host_spec:
+ for field in ['hostname', 'addr']:
+ if field in line:
+ try:
+ _, field_value = line.split(':')
+ field_value = field_value.strip()
+ host_dict[field] = field_value
+ except ValueError as e:
+ spec_str = '\n'.join(host_spec)
+ logger.error(f'Error trying to pull {field} from host spec:\n{spec_str}. Got error: {e}')
+
+ if 'hostname' not in host_dict:
+ spec_str = '\n'.join(host_spec)
+ logger.error(f'Could not find hostname in host spec:\n{spec_str}')
+ return {}
+ return host_dict
+
+
+def _distribute_ssh_keys(ctx: CephadmContext, host_info: Dict[str, str], bootstrap_hostname: str) -> int:
+ # copy ssh key to hosts in host spec (used for apply spec)
+ ssh_key = CEPH_DEFAULT_PUBKEY
+ if ctx.ssh_public_key:
+ ssh_key = ctx.ssh_public_key.name
+
+ if bootstrap_hostname != host_info['hostname']:
+ if 'addr' in host_info:
+ addr = host_info['addr']
+ else:
+ addr = host_info['hostname']
+ out, err, code = call(ctx, ['sudo', '-u', ctx.ssh_user, 'ssh-copy-id', '-f', '-i', ssh_key, '-o StrictHostKeyChecking=no', '%s@%s' % (ctx.ssh_user, addr)])
+ if code:
+ logger.error('\nCopying ssh key to host %s at address %s failed!\n' % (host_info['hostname'], addr))
+ return 1
+ else:
+ logger.info('Added ssh key to host %s at address %s' % (host_info['hostname'], addr))
+ return 0
+
+
+def save_cluster_config(ctx: CephadmContext, uid: int, gid: int, fsid: str) -> None:
+ """Save cluster configuration to the per fsid directory """
+ def copy_file(src: str, dst: str) -> None:
+ if src:
+ shutil.copyfile(src, dst)
+
+ conf_dir = f'{ctx.data_dir}/{fsid}/{CEPH_CONF_DIR}'
+ makedirs(conf_dir, uid, gid, DATA_DIR_MODE)
+ if os.path.exists(conf_dir):
+ logger.info(f'Saving cluster configuration to {conf_dir} directory')
+ copy_file(ctx.output_config, os.path.join(conf_dir, CEPH_CONF))
+ copy_file(ctx.output_keyring, os.path.join(conf_dir, CEPH_KEYRING))
+ # ctx.output_pub_ssh_key may not exist if user has provided custom ssh keys
+ if (os.path.exists(ctx.output_pub_ssh_key)):
+ copy_file(ctx.output_pub_ssh_key, os.path.join(conf_dir, CEPH_PUBKEY))
+ else:
+ logger.warning(f'Cannot create cluster configuration directory {conf_dir}')
+
+
+def rollback(func: FuncT) -> FuncT:
+ """
+ """
+ @wraps(func)
+ def _rollback(ctx: CephadmContext) -> Any:
+ try:
+ return func(ctx)
+ except ClusterAlreadyExists:
+ # another cluster with the provided fsid already exists: don't remove.
+ raise
+ except (KeyboardInterrupt, Exception) as e:
+ logger.error(f'{type(e).__name__}: {e}')
+ if ctx.cleanup_on_failure:
+ logger.info('\n\n'
+ '\t***************\n'
+ '\tCephadm hit an issue during cluster installation. Current cluster files will be deleted automatically,\n'
+ '\tto disable this behaviour do not pass the --cleanup-on-failure flag. In case of any previous\n'
+ '\tbroken installation user must use the following command to completely delete the broken cluster:\n\n'
+ '\t> cephadm rm-cluster --force --zap-osds --fsid <fsid>\n\n'
+ '\tfor more information please refer to https://docs.ceph.com/en/latest/cephadm/operations/#purging-a-cluster\n'
+ '\t***************\n\n')
+ _rm_cluster(ctx, keep_logs=False, zap_osds=False)
+ else:
+ logger.info('\n\n'
+ '\t***************\n'
+ '\tCephadm hit an issue during cluster installation. Current cluster files will NOT BE DELETED automatically to change\n'
+ '\tthis behaviour you can pass the --cleanup-on-failure. To remove this broken cluster manually please run:\n\n'
+ f'\t > cephadm rm-cluster --force --fsid {ctx.fsid}\n\n'
+ '\tin case of any previous broken installation user must use the rm-cluster command to delete the broken cluster:\n\n'
+ '\t > cephadm rm-cluster --force --zap-osds --fsid <fsid>\n\n'
+ '\tfor more information please refer to https://docs.ceph.com/en/latest/cephadm/operations/#purging-a-cluster\n'
+ '\t***************\n\n')
+ raise
+ return cast(FuncT, _rollback)
+
+
+@rollback
+@default_image
+def command_bootstrap(ctx):
+ # type: (CephadmContext) -> int
+
+ ctx.error_code = 0
+
+ if not ctx.output_config:
+ ctx.output_config = os.path.join(ctx.output_dir, CEPH_CONF)
+ if not ctx.output_keyring:
+ ctx.output_keyring = os.path.join(ctx.output_dir, CEPH_KEYRING)
+ if not ctx.output_pub_ssh_key:
+ ctx.output_pub_ssh_key = os.path.join(ctx.output_dir, CEPH_PUBKEY)
+
+ if (
+ (bool(ctx.ssh_private_key) is not bool(ctx.ssh_public_key))
+ and (bool(ctx.ssh_private_key) is not bool(ctx.ssh_signed_cert))
+ ):
+ raise Error('--ssh-private-key must be passed with either --ssh-public-key in the case of standard pubkey '
+ 'authentication or with --ssh-signed-cert in the case of CA signed signed keys or not provided at all.')
+
+ if (bool(ctx.ssh_public_key) and bool(ctx.ssh_signed_cert)):
+ raise Error('--ssh-public-key and --ssh-signed-cert are mututally exclusive. --ssh-public-key is intended '
+ 'for standard pubkey encryption where the public key is set as an authorized key on cluster hosts. '
+ '--ssh-signed-cert is intended for the CA signed keys use case where cluster hosts are configured to trust '
+ 'a CA pub key and authentication during SSH is done by authenticating the signed cert, requiring no '
+ 'public key to be installed on the cluster hosts.')
+
+ if ctx.fsid:
+ data_dir_base = os.path.join(ctx.data_dir, ctx.fsid)
+ if os.path.exists(data_dir_base):
+ raise ClusterAlreadyExists(f"A cluster with the same fsid '{ctx.fsid}' already exists.")
+ else:
+ logger.warning('Specifying an fsid for your cluster offers no advantages and may increase the likelihood of fsid conflicts.')
+
+ # initial vars
+ ctx.fsid = ctx.fsid or make_fsid()
+ fsid = ctx.fsid
+ if not is_fsid(fsid):
+ raise Error('not an fsid: %s' % fsid)
+
+ # verify output files
+ for f in [ctx.output_config, ctx.output_keyring, ctx.output_pub_ssh_key]:
+ if not ctx.allow_overwrite:
+ if os.path.exists(f):
+ raise ClusterAlreadyExists('%s already exists; delete or pass --allow-overwrite to overwrite' % f)
+ dirname = os.path.dirname(f)
+ if dirname and not os.path.exists(dirname):
+ fname = os.path.basename(f)
+ logger.info(f'Creating directory {dirname} for {fname}')
+ try:
+ # use makedirs to create intermediate missing dirs
+ os.makedirs(dirname, 0o755)
+ except PermissionError:
+ raise Error(f'Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.')
+
+ (user_conf, _) = get_config_and_keyring(ctx)
+
+ if ctx.ssh_user != 'root':
+ check_ssh_connectivity(ctx)
+
+ if not ctx.skip_prepare_host:
+ command_prepare_host(ctx)
+ else:
+ logger.info('Skip prepare_host')
+
+ logger.info('Cluster fsid: %s' % fsid)
+ hostname = get_hostname()
+ if '.' in hostname and not ctx.allow_fqdn_hostname:
+ raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0]))
+ mon_id = ctx.mon_id or get_short_hostname()
+ mgr_id = ctx.mgr_id or generate_service_id()
+
+ lock = FileLock(ctx, fsid)
+ lock.acquire()
+
+ (addr_arg, ipv6, mon_network) = prepare_mon_addresses(ctx)
+ cluster_network, ipv6_cluster_network = prepare_cluster_network(ctx)
+
+ config = prepare_bootstrap_config(ctx, fsid, addr_arg, ctx.image)
+
+ if not ctx.skip_pull:
+ try:
+ _pull_image(ctx, ctx.image)
+ except UnauthorizedRegistryError:
+ err_str = 'Failed to pull container image. Check that correct registry credentials are provided in bootstrap by --registry-url, --registry-username, --registry-password, or supply --registry-json with credentials'
+ logger.debug(f'Pulling image for bootstrap on {hostname} failed: {err_str}')
+ raise Error(err_str)
+
+ image_ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip()
+ logger.info(f'Ceph version: {image_ver}')
+
+ if not ctx.allow_mismatched_release:
+ image_release = image_ver.split()[4]
+ if image_release not in \
+ [DEFAULT_IMAGE_RELEASE, LATEST_STABLE_RELEASE]:
+ raise Error(
+ f'Container release {image_release} != cephadm release {DEFAULT_IMAGE_RELEASE};'
+ ' please use matching version of cephadm (pass --allow-mismatched-release to continue anyway)'
+ )
+
+ logger.info('Extracting ceph user uid/gid from container image...')
+ (uid, gid) = extract_uid_gid(ctx)
+
+ # create some initial keys
+ (mon_key, mgr_key, admin_key, bootstrap_keyring, admin_keyring) = create_initial_keys(ctx, uid, gid, mgr_id)
+
+ monmap = create_initial_monmap(ctx, uid, gid, fsid, mon_id, addr_arg)
+ (mon_dir, log_dir) = prepare_create_mon(ctx, uid, gid, fsid, mon_id,
+ bootstrap_keyring.name, monmap.name)
+
+ with write_new(mon_dir + '/config', owner=(uid, gid)) as f:
+ f.write(config)
+
+ make_var_run(ctx, fsid, uid, gid)
+ create_mon(ctx, uid, gid, fsid, mon_id)
+
+ # config to issue various CLI commands
+ tmp_config = write_tmp(config, uid, gid)
+
+ # a CLI helper to reduce our typing
+ def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT, verbosity=CallVerbosity.VERBOSE_ON_FAILURE):
+ # type: (List[str], Dict[str, str], Optional[int], CallVerbosity) -> str
+ mounts = {
+ log_dir: '/var/log/ceph:z',
+ admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
+ tmp_config.name: '/etc/ceph/ceph.conf:z',
+ }
+ for k, v in extra_mounts.items():
+ mounts[k] = v
+ timeout = timeout or ctx.timeout
+ return CephContainer(
+ ctx,
+ image=ctx.image,
+ entrypoint='/usr/bin/ceph',
+ args=cmd,
+ volume_mounts=mounts,
+ ).run(timeout=timeout, verbosity=verbosity)
+
+ wait_for_mon(ctx, mon_id, mon_dir, admin_keyring.name, tmp_config.name)
+
+ finish_bootstrap_config(ctx, fsid, config, mon_id, mon_dir,
+ mon_network, ipv6, cli,
+ cluster_network, ipv6_cluster_network)
+
+ # output files
+ with write_new(ctx.output_keyring) as f:
+ f.write('[client.admin]\n'
+ '\tkey = ' + admin_key + '\n')
+ logger.info('Wrote keyring to %s' % ctx.output_keyring)
+
+ # create mgr
+ create_mgr(ctx, uid, gid, fsid, mgr_id, mgr_key, config, cli)
+
+ if user_conf:
+ # user given config settings were already assimilated earlier
+ # but if the given settings contained any attributes in
+ # the mgr (e.g. mgr/cephadm/container_image_prometheus)
+ # they don't seem to be stored if there isn't a mgr yet.
+ # Since re-assimilating the same conf settings should be
+ # idempotent we can just do it again here.
+ with tempfile.NamedTemporaryFile(buffering=0) as tmp:
+ tmp.write(user_conf.encode('utf-8'))
+ cli(['config', 'assimilate-conf',
+ '-i', '/var/lib/ceph/user.conf'],
+ {tmp.name: '/var/lib/ceph/user.conf:z'})
+
+ # wait for mgr to restart (after enabling a module)
+ def wait_for_mgr_restart() -> None:
+ # first get latest mgrmap epoch from the mon. try newer 'mgr
+ # stat' command first, then fall back to 'mgr dump' if
+ # necessary
+ try:
+ j = json_loads_retry(lambda: cli(['mgr', 'stat'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR))
+ except Exception:
+ j = json_loads_retry(lambda: cli(['mgr', 'dump'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR))
+ epoch = j['epoch']
+
+ # wait for mgr to have it
+ logger.info('Waiting for the mgr to restart...')
+
+ def mgr_has_latest_epoch():
+ # type: () -> bool
+ try:
+ out = cli(['tell', 'mgr', 'mgr_status'])
+ j = json.loads(out)
+ return j['mgrmap_epoch'] >= epoch
+ except Exception as e:
+ logger.debug('tell mgr mgr_status failed: %s' % e)
+ return False
+ is_available(ctx, 'mgr epoch %d' % epoch, mgr_has_latest_epoch)
+
+ enable_cephadm_mgr_module(cli, wait_for_mgr_restart)
+
+ # ssh
+ if not ctx.skip_ssh:
+ prepare_ssh(ctx, cli, wait_for_mgr_restart)
+
+ if ctx.registry_url and ctx.registry_username and ctx.registry_password:
+ registry_credentials = {'url': ctx.registry_url, 'username': ctx.registry_username, 'password': ctx.registry_password}
+ cli(['config-key', 'set', 'mgr/cephadm/registry_credentials', json.dumps(registry_credentials)])
+
+ cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(ctx.container_init), '--force'])
+
+ if not ctx.skip_dashboard:
+ prepare_dashboard(ctx, uid, gid, cli, wait_for_mgr_restart)
+
+ if ctx.output_config == CEPH_DEFAULT_CONF and not ctx.skip_admin_label and not ctx.no_minimize_config:
+ logger.info('Enabling client.admin keyring and conf on hosts with "admin" label')
+ try:
+ cli(['orch', 'client-keyring', 'set', 'client.admin', 'label:_admin'])
+ cli(['orch', 'host', 'label', 'add', get_hostname(), '_admin'])
+ except Exception:
+ logger.info('Unable to set up "admin" label; assuming older version of Ceph')
+
+ if ctx.apply_spec:
+ logger.info('Applying %s to cluster' % ctx.apply_spec)
+ # copy ssh key to hosts in spec file
+ with open(ctx.apply_spec) as f:
+ host_dicts = _extract_host_info_from_applied_spec(f)
+ for h in host_dicts:
+ if ctx.ssh_signed_cert:
+ logger.info('Key distribution is not supported for signed CA key setups. Skipping ...')
+ else:
+ _distribute_ssh_keys(ctx, h, hostname)
+
+ mounts = {}
+ mounts[pathify(ctx.apply_spec)] = '/tmp/spec.yml:ro'
+ try:
+ out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
+ logger.info(out)
+ except Exception:
+ ctx.error_code = -errno.EINVAL
+ logger.info('\nApplying %s to cluster failed!\n' % ctx.apply_spec)
+
+ save_cluster_config(ctx, uid, gid, fsid)
+
+ # enable autotune for osd_memory_target
+ logger.info('Enabling autotune for osd_memory_target')
+ cli(['config', 'set', 'osd', 'osd_memory_target_autotune', 'true'])
+
+ # Notify the Dashboard to show the 'Expand cluster' page on first log in.
+ cli(['config-key', 'set', 'mgr/dashboard/cluster/status', 'INSTALLED'])
+
+ logger.info('You can access the Ceph CLI as following in case of multi-cluster or non-default config:\n\n'
+ '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
+ sys.argv[0],
+ fsid,
+ ctx.output_config,
+ ctx.output_keyring))
+
+ logger.info('Or, if you are only running a single cluster on this host:\n\n\tsudo %s shell \n' % (sys.argv[0]))
+
+ logger.info('Please consider enabling telemetry to help improve Ceph:\n\n'
+ '\tceph telemetry on\n\n'
+ 'For more information see:\n\n'
+ '\thttps://docs.ceph.com/en/latest/mgr/telemetry/\n')
+ logger.info('Bootstrap complete.')
+ return ctx.error_code
+
+##################################
+
+
+def command_registry_login(ctx: CephadmContext) -> int:
+ if ctx.registry_json:
+ logger.info('Pulling custom registry login info from %s.' % ctx.registry_json)
+ d = get_parm(ctx.registry_json)
+ if d.get('url') and d.get('username') and d.get('password'):
+ ctx.registry_url = d.get('url')
+ ctx.registry_username = d.get('username')
+ ctx.registry_password = d.get('password')
+ registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password)
+ else:
+ raise Error('json provided for custom registry login did not include all necessary fields. '
+ 'Please setup json file as\n'
+ '{\n'
+ ' "url": "REGISTRY_URL",\n'
+ ' "username": "REGISTRY_USERNAME",\n'
+ ' "password": "REGISTRY_PASSWORD"\n'
+ '}\n')
+ elif ctx.registry_url and ctx.registry_username and ctx.registry_password:
+ registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password)
+ else:
+ raise Error('Invalid custom registry arguments received. To login to a custom registry include '
+ '--registry-url, --registry-username and --registry-password '
+ 'options or --registry-json option')
+ return 0
+
+
+def registry_login(ctx: CephadmContext, url: Optional[str], username: Optional[str], password: Optional[str]) -> None:
+ logger.info('Logging into custom registry.')
+ try:
+ engine = ctx.container_engine
+ cmd = [engine.path, 'login',
+ '-u', username, '-p', password,
+ url]
+ if isinstance(engine, Podman):
+ cmd.append('--authfile=/etc/ceph/podman-auth.json')
+ out, _, _ = call_throws(ctx, cmd)
+ if isinstance(engine, Podman):
+ os.chmod('/etc/ceph/podman-auth.json', DEFAULT_MODE)
+ except Exception:
+ raise Error('Failed to login to custom registry @ %s as %s with given password' % (ctx.registry_url, ctx.registry_username))
+
+##################################
+
+
+def extract_uid_gid_monitoring(ctx, daemon_type):
+ # type: (CephadmContext, str) -> Tuple[int, int]
+
+ if daemon_type == 'prometheus':
+ uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus')
+ elif daemon_type == 'node-exporter':
+ uid, gid = 65534, 65534
+ elif daemon_type == 'grafana':
+ uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana')
+ elif daemon_type == 'loki':
+ uid, gid = extract_uid_gid(ctx, file_path='/etc/loki')
+ elif daemon_type == 'promtail':
+ uid, gid = extract_uid_gid(ctx, file_path='/etc/promtail')
+ elif daemon_type == 'alertmanager':
+ uid, gid = extract_uid_gid(ctx, file_path=['/etc/alertmanager', '/etc/prometheus'])
+ else:
+ raise Error('{} not implemented yet'.format(daemon_type))
+ return uid, gid
+
+
+def get_deployment_container(ctx: CephadmContext,
+ fsid: str, daemon_type: str, daemon_id: Union[int, str],
+ privileged: bool = False,
+ ptrace: bool = False,
+ container_args: Optional[List[str]] = None) -> 'CephContainer':
+ # wrapper for get_container specifically for containers made during the `cephadm deploy`
+ # command. Adds some extra things such as extra container args and custom config files
+ c = get_container(ctx, fsid, daemon_type, daemon_id, privileged, ptrace, container_args)
+ if 'extra_container_args' in ctx and ctx.extra_container_args:
+ c.container_args.extend(ctx.extra_container_args)
+ if 'extra_entrypoint_args' in ctx and ctx.extra_entrypoint_args:
+ c.args.extend(ctx.extra_entrypoint_args)
+ ccfiles = fetch_custom_config_files(ctx)
+ if ccfiles:
+ mandatory_keys = ['mount_path', 'content']
+ for conf in ccfiles:
+ if all(k in conf for k in mandatory_keys):
+ mount_path = conf['mount_path']
+ file_path = os.path.join(
+ ctx.data_dir,
+ fsid,
+ 'custom_config_files',
+ f'{daemon_type}.{daemon_id}',
+ os.path.basename(mount_path)
+ )
+ c.volume_mounts[file_path] = mount_path
+ return c
+
+
+def get_deployment_type(ctx: CephadmContext, daemon_type: str, daemon_id: str) -> DeploymentType:
+ deployment_type: DeploymentType = DeploymentType.DEFAULT
+ if ctx.reconfig:
+ deployment_type = DeploymentType.RECONFIG
+ unit_name = get_unit_name(ctx.fsid, daemon_type, daemon_id)
+ (_, state, _) = check_unit(ctx, unit_name)
+ if state == 'running' or is_container_running(ctx, CephContainer.for_daemon(ctx, ctx.fsid, daemon_type, daemon_id, 'bash')):
+ # if reconfig was set, that takes priority over redeploy. If
+ # this is considered a fresh deployment at this stage,
+ # mark it as a redeploy to avoid port checking
+ if deployment_type == DeploymentType.DEFAULT:
+ deployment_type = DeploymentType.REDEPLOY
+
+ logger.info(f'{deployment_type.value} daemon {ctx.name} ...')
+
+ return deployment_type
+
+
+@default_image
+@deprecated_command
+def command_deploy(ctx):
+ # type: (CephadmContext) -> None
+ _common_deploy(ctx)
+
+
+def read_configuration_source(ctx: CephadmContext) -> Dict[str, Any]:
+ """Read a JSON configuration based on the `ctx.source` value."""
+ source = '-'
+ if 'source' in ctx and ctx.source:
+ source = ctx.source
+ if source == '-':
+ config_data = json.load(sys.stdin)
+ else:
+ with open(source, 'rb') as fh:
+ config_data = json.load(fh)
+ logger.debug('Loaded deploy configuration: %r', config_data)
+ return config_data
+
+
+def apply_deploy_config_to_ctx(
+ config_data: Dict[str, Any],
+ ctx: CephadmContext,
+) -> None:
+ """Bind properties taken from the config_data dictionary to our ctx,
+ similar to how cli options on `deploy` are bound to the context.
+ """
+ ctx.name = config_data['name']
+ image = config_data.get('image', '')
+ if image:
+ ctx.image = image
+ if 'fsid' in config_data:
+ ctx.fsid = config_data['fsid']
+ if 'meta' in config_data:
+ ctx.meta_properties = config_data['meta']
+ if 'config_blobs' in config_data:
+ ctx.config_blobs = config_data['config_blobs']
+
+ # many functions don't check that an attribute is set on the ctx
+ # (with getattr or the '__contains__' func on ctx).
+ # This reuses the defaults from the CLI options so we don't
+ # have to repeat things and they can stay in sync.
+ facade = ArgumentFacade()
+ _add_deploy_parser_args(facade)
+ facade.apply(ctx)
+ for key, value in config_data.get('params', {}).items():
+ if key not in facade.defaults:
+ logger.warning('unexpected parameter: %r=%r', key, value)
+ setattr(ctx, key, value)
+ update_default_image(ctx)
+ logger.debug('Determined image: %r', ctx.image)
+
+
+def command_deploy_from(ctx: CephadmContext) -> None:
+ """The deploy-from command is similar to deploy but sources nearly all
+ configuration parameters from an input JSON configuration file.
+ """
+ config_data = read_configuration_source(ctx)
+ apply_deploy_config_to_ctx(config_data, ctx)
+ _common_deploy(ctx)
+
+
+def _common_deploy(ctx: CephadmContext) -> None:
+ daemon_type, daemon_id = ctx.name.split('.', 1)
+ if daemon_type not in get_supported_daemons():
+ raise Error('daemon type %s not recognized' % daemon_type)
+
+ lock = FileLock(ctx, ctx.fsid)
+ lock.acquire()
+
+ deployment_type = get_deployment_type(ctx, daemon_type, daemon_id)
+
+ # Migrate sysctl conf files from /usr/lib to /etc
+ migrate_sysctl_dir(ctx, ctx.fsid)
+
+ # Get and check ports explicitly required to be opened
+ endpoints = fetch_tcp_ports(ctx)
+ _dispatch_deploy(ctx, daemon_type, daemon_id, endpoints, deployment_type)
+
+
+def _dispatch_deploy(
+ ctx: CephadmContext,
+ daemon_type: str,
+ daemon_id: str,
+ daemon_endpoints: List[EndPoint],
+ deployment_type: DeploymentType,
+) -> None:
+ if daemon_type in Ceph.daemons:
+ config, keyring = get_config_and_keyring(ctx)
+ uid, gid = extract_uid_gid(ctx)
+ make_var_run(ctx, ctx.fsid, uid, gid)
+
+ config_json = fetch_configs(ctx)
+
+ c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id,
+ ptrace=ctx.allow_ptrace)
+
+ if daemon_type == 'mon' and config_json is not None:
+ if 'crush_location' in config_json:
+ c_loc = config_json['crush_location']
+ # was originally "c.args.extend(['--set-crush-location', c_loc])"
+ # but that doesn't seem to persist in the object after it's passed
+ # in further function calls
+ c.args = c.args + ['--set-crush-location', c_loc]
+
+ deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
+ config=config, keyring=keyring,
+ osd_fsid=ctx.osd_fsid,
+ deployment_type=deployment_type,
+ endpoints=daemon_endpoints)
+
+ elif daemon_type in Monitoring.components:
+ # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
+ # Default Checks
+ # make sure provided config-json is sufficient
+ config = fetch_configs(ctx) # type: ignore
+ required_files = Monitoring.components[daemon_type].get('config-json-files', list())
+ required_args = Monitoring.components[daemon_type].get('config-json-args', list())
+ if required_files:
+ if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore
+ raise Error('{} deployment requires config-json which must '
+ 'contain file content for {}'.format(daemon_type.capitalize(), ', '.join(required_files)))
+ if required_args:
+ if not config or not all(c in config.keys() for c in required_args): # type: ignore
+ raise Error('{} deployment requires config-json which must '
+ 'contain arg for {}'.format(daemon_type.capitalize(), ', '.join(required_args)))
+
+ uid, gid = extract_uid_gid_monitoring(ctx, daemon_type)
+ c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
+ deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
+ deployment_type=deployment_type,
+ endpoints=daemon_endpoints)
+
+ elif daemon_type == NFSGanesha.daemon_type:
+ # only check ports if this is a fresh deployment
+ if deployment_type == DeploymentType.DEFAULT and not daemon_endpoints:
+ nfs_ports = list(NFSGanesha.port_map.values())
+ daemon_endpoints = [EndPoint('0.0.0.0', p) for p in nfs_ports]
+
+ config, keyring = get_config_and_keyring(ctx)
+ # TODO: extract ganesha uid/gid (997, 994) ?
+ uid, gid = extract_uid_gid(ctx)
+ c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
+ deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
+ config=config, keyring=keyring,
+ deployment_type=deployment_type,
+ endpoints=daemon_endpoints)
+
+ elif daemon_type == CephIscsi.daemon_type:
+ config, keyring = get_config_and_keyring(ctx)
+ uid, gid = extract_uid_gid(ctx)
+ c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
+ deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
+ config=config, keyring=keyring,
+ deployment_type=deployment_type,
+ endpoints=daemon_endpoints)
+ elif daemon_type == CephNvmeof.daemon_type:
+ config, keyring = get_config_and_keyring(ctx)
+ uid, gid = 167, 167 # TODO: need to get properly the uid/gid
+ c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
+ deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
+ config=config, keyring=keyring,
+ deployment_type=deployment_type,
+ endpoints=daemon_endpoints)
+ elif daemon_type in Tracing.components:
+ uid, gid = 65534, 65534
+ c = get_container(ctx, ctx.fsid, daemon_type, daemon_id)
+ deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
+ deployment_type=deployment_type,
+ endpoints=daemon_endpoints)
+ elif daemon_type == HAproxy.daemon_type:
+ haproxy = HAproxy.init(ctx, ctx.fsid, daemon_id)
+ uid, gid = haproxy.extract_uid_gid_haproxy()
+ c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
+ deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
+ deployment_type=deployment_type,
+ endpoints=daemon_endpoints)
+
+ elif daemon_type == Keepalived.daemon_type:
+ keepalived = Keepalived.init(ctx, ctx.fsid, daemon_id)
+ uid, gid = keepalived.extract_uid_gid_keepalived()
+ c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
+ deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
+ deployment_type=deployment_type,
+ endpoints=daemon_endpoints)
+
+ elif daemon_type == CustomContainer.daemon_type:
+ cc = CustomContainer.init(ctx, ctx.fsid, daemon_id)
+ # only check ports if this is a fresh deployment
+ if deployment_type == DeploymentType.DEFAULT:
+ daemon_endpoints.extend([EndPoint('0.0.0.0', p) for p in cc.ports])
+ c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id,
+ privileged=cc.privileged,
+ ptrace=ctx.allow_ptrace)
+ deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c,
+ uid=cc.uid, gid=cc.gid, config=None,
+ keyring=None,
+ deployment_type=deployment_type,
+ endpoints=daemon_endpoints)
+
+ elif daemon_type == CephadmAgent.daemon_type:
+ # get current user gid and uid
+ uid = os.getuid()
+ gid = os.getgid()
+ deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, None,
+ uid, gid,
+ deployment_type=deployment_type,
+ endpoints=daemon_endpoints)
+
+ elif daemon_type == SNMPGateway.daemon_type:
+ sc = SNMPGateway.init(ctx, ctx.fsid, daemon_id)
+ c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
+ deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c,
+ sc.uid, sc.gid,
+ deployment_type=deployment_type,
+ endpoints=daemon_endpoints)
+
+ else:
+ raise Error('daemon type {} not implemented in command_deploy function'
+ .format(daemon_type))
+
+##################################
+
+
+@infer_image
+def command_run(ctx):
+ # type: (CephadmContext) -> int
+ (daemon_type, daemon_id) = ctx.name.split('.', 1)
+ c = get_container(ctx, ctx.fsid, daemon_type, daemon_id)
+ command = c.run_cmd()
+ return call_timeout(ctx, command, ctx.timeout)
+
+##################################
+
+
+@infer_fsid
+@infer_config
+@infer_image
+@validate_fsid
+def command_shell(ctx):
+ # type: (CephadmContext) -> int
+ cp = read_config(ctx.config)
+ if cp.has_option('global', 'fsid') and \
+ cp.get('global', 'fsid') != ctx.fsid:
+ raise Error('fsid does not match ceph.conf')
+
+ if ctx.name:
+ if '.' in ctx.name:
+ (daemon_type, daemon_id) = ctx.name.split('.', 1)
+ else:
+ daemon_type = ctx.name
+ daemon_id = None
+ else:
+ daemon_type = 'osd' # get the most mounts
+ daemon_id = None
+
+ if ctx.fsid and daemon_type in Ceph.daemons:
+ make_log_dir(ctx, ctx.fsid)
+
+ if daemon_id and not ctx.fsid:
+ raise Error('must pass --fsid to specify cluster')
+
+ # in case a dedicated keyring for the specified fsid is found we us it.
+ # Otherwise, use /etc/ceph files by default, if present. We do this instead of
+ # making these defaults in the arg parser because we don't want an error
+ # if they don't exist.
+ if not ctx.keyring:
+ keyring_file = f'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_KEYRING}'
+ if os.path.exists(keyring_file):
+ ctx.keyring = keyring_file
+ elif os.path.exists(CEPH_DEFAULT_KEYRING):
+ ctx.keyring = CEPH_DEFAULT_KEYRING
+
+ container_args: List[str] = ['-i']
+ mounts = get_container_mounts(ctx, ctx.fsid, daemon_type, daemon_id,
+ no_config=True if ctx.config else False)
+ binds = get_container_binds(ctx, ctx.fsid, daemon_type, daemon_id)
+ if ctx.config:
+ mounts[pathify(ctx.config)] = '/etc/ceph/ceph.conf:z'
+ if ctx.keyring:
+ mounts[pathify(ctx.keyring)] = '/etc/ceph/ceph.keyring:z'
+ if ctx.mount:
+ for _mount in ctx.mount:
+ split_src_dst = _mount.split(':')
+ mount = pathify(split_src_dst[0])
+ filename = os.path.basename(split_src_dst[0])
+ if len(split_src_dst) > 1:
+ dst = split_src_dst[1]
+ if len(split_src_dst) == 3:
+ dst = '{}:{}'.format(dst, split_src_dst[2])
+ mounts[mount] = dst
+ else:
+ mounts[mount] = '/mnt/{}'.format(filename)
+ if ctx.command:
+ command = ctx.command
+ else:
+ command = ['bash']
+ container_args += [
+ '-t',
+ '-e', 'LANG=C',
+ '-e', 'PS1=%s' % CUSTOM_PS1,
+ ]
+ if ctx.fsid:
+ home = os.path.join(ctx.data_dir, ctx.fsid, 'home')
+ if not os.path.exists(home):
+ logger.debug('Creating root home at %s' % home)
+ makedirs(home, 0, 0, 0o660)
+ if os.path.exists('/etc/skel'):
+ for f in os.listdir('/etc/skel'):
+ if f.startswith('.bash'):
+ shutil.copyfile(os.path.join('/etc/skel', f),
+ os.path.join(home, f))
+ mounts[home] = '/root'
+
+ for i in ctx.volume:
+ a, b = i.split(':', 1)
+ mounts[a] = b
+
+ c = CephContainer(
+ ctx,
+ image=ctx.image,
+ entrypoint='doesnotmatter',
+ args=[],
+ container_args=container_args,
+ volume_mounts=mounts,
+ bind_mounts=binds,
+ envs=ctx.env,
+ privileged=True)
+ command = c.shell_cmd(command)
+
+ if ctx.dry_run:
+ print(' '.join(shlex.quote(arg) for arg in command))
+ return 0
+
+ return call_timeout(ctx, command, ctx.timeout)
+
+##################################
+
+
+@infer_fsid
+def command_enter(ctx):
+ # type: (CephadmContext) -> int
+ if not ctx.fsid:
+ raise Error('must pass --fsid to specify cluster')
+ (daemon_type, daemon_id) = ctx.name.split('.', 1)
+ container_args = ['-i'] # type: List[str]
+ if ctx.command:
+ command = ctx.command
+ else:
+ command = ['sh']
+ container_args += [
+ '-t',
+ '-e', 'LANG=C',
+ '-e', 'PS1=%s' % CUSTOM_PS1,
+ ]
+ c = CephContainer(
+ ctx,
+ image=ctx.image,
+ entrypoint='doesnotmatter',
+ container_args=container_args,
+ cname='ceph-%s-%s.%s' % (ctx.fsid, daemon_type, daemon_id),
+ )
+ command = c.exec_cmd(command)
+ return call_timeout(ctx, command, ctx.timeout)
+
+##################################
+
+
+@infer_fsid
+@infer_image
+@validate_fsid
+def command_ceph_volume(ctx):
+ # type: (CephadmContext) -> None
+ cp = read_config(ctx.config)
+ if cp.has_option('global', 'fsid') and \
+ cp.get('global', 'fsid') != ctx.fsid:
+ raise Error('fsid does not match ceph.conf')
+
+ if ctx.fsid:
+ make_log_dir(ctx, ctx.fsid)
+
+ lock = FileLock(ctx, ctx.fsid)
+ lock.acquire()
+
+ (uid, gid) = (0, 0) # ceph-volume runs as root
+ mounts = get_container_mounts(ctx, ctx.fsid, 'osd', None)
+
+ tmp_config = None
+ tmp_keyring = None
+
+ (config, keyring) = get_config_and_keyring(ctx)
+
+ if config:
+ # tmp config file
+ tmp_config = write_tmp(config, uid, gid)
+ mounts[tmp_config.name] = '/etc/ceph/ceph.conf:z'
+
+ if keyring:
+ # tmp keyring file
+ tmp_keyring = write_tmp(keyring, uid, gid)
+ mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
+
+ c = get_ceph_volume_container(
+ ctx,
+ envs=ctx.env,
+ args=ctx.command,
+ volume_mounts=mounts,
+ )
+
+ out, err, code = call_throws(ctx, c.run_cmd(), verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+ if not code:
+ print(out)
+
+##################################
+
+
+@infer_fsid
+def command_unit(ctx):
+ # type: (CephadmContext) -> int
+ if not ctx.fsid:
+ raise Error('must pass --fsid to specify cluster')
+
+ unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
+
+ _, _, code = call(
+ ctx,
+ ['systemctl', ctx.command, unit_name],
+ verbosity=CallVerbosity.VERBOSE,
+ desc=''
+ )
+ return code
+
+##################################
+
+
+@infer_fsid
+def command_logs(ctx):
+ # type: (CephadmContext) -> None
+ if not ctx.fsid:
+ raise Error('must pass --fsid to specify cluster')
+
+ unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
+
+ cmd = [find_program('journalctl')]
+ cmd.extend(['-u', unit_name])
+ if ctx.command:
+ cmd.extend(ctx.command)
+
+ # call this directly, without our wrapper, so that we get an unmolested
+ # stdout with logger prefixing.
+ logger.debug('Running command: %s' % ' '.join(cmd))
+ subprocess.call(cmd, env=os.environ.copy()) # type: ignore
+
+##################################
+
+
+def list_networks(ctx):
+ # type: (CephadmContext) -> Dict[str,Dict[str, Set[str]]]
+
+ # sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
+ # so we'll need to use a regex to parse 'ip' command output.
+ #
+ # out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
+ # j = json.loads(out)
+ # for x in j:
+ res = _list_ipv4_networks(ctx)
+ res.update(_list_ipv6_networks(ctx))
+ return res
+
+
+def _list_ipv4_networks(ctx: CephadmContext) -> Dict[str, Dict[str, Set[str]]]:
+ execstr: Optional[str] = find_executable('ip')
+ if not execstr:
+ raise FileNotFoundError("unable to find 'ip' command")
+ out, _, _ = call_throws(ctx, [execstr, 'route', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+ return _parse_ipv4_route(out)
+
+
+def _parse_ipv4_route(out: str) -> Dict[str, Dict[str, Set[str]]]:
+ r = {} # type: Dict[str, Dict[str, Set[str]]]
+ p = re.compile(r'^(\S+) (?:via \S+)? ?dev (\S+) (.*)scope link (.*)src (\S+)')
+ for line in out.splitlines():
+ m = p.findall(line)
+ if not m:
+ continue
+ net = m[0][0]
+ if '/' not in net: # aggregate /32 mask for single host sub-networks
+ net += '/32'
+ iface = m[0][1]
+ ip = m[0][4]
+ if net not in r:
+ r[net] = {}
+ if iface not in r[net]:
+ r[net][iface] = set()
+ r[net][iface].add(ip)
+ return r
+
+
+def _list_ipv6_networks(ctx: CephadmContext) -> Dict[str, Dict[str, Set[str]]]:
+ execstr: Optional[str] = find_executable('ip')
+ if not execstr:
+ raise FileNotFoundError("unable to find 'ip' command")
+ routes, _, _ = call_throws(ctx, [execstr, '-6', 'route', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+ ips, _, _ = call_throws(ctx, [execstr, '-6', 'addr', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+ return _parse_ipv6_route(routes, ips)
+
+
+def _parse_ipv6_route(routes: str, ips: str) -> Dict[str, Dict[str, Set[str]]]:
+ r = {} # type: Dict[str, Dict[str, Set[str]]]
+ route_p = re.compile(r'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$')
+ ip_p = re.compile(r'^\s+inet6 (\S+)/(.*)scope (.*)$')
+ iface_p = re.compile(r'^(\d+): (\S+): (.*)$')
+ for line in routes.splitlines():
+ m = route_p.findall(line)
+ if not m or m[0][0].lower() == 'default':
+ continue
+ net = m[0][0]
+ if '/' not in net: # aggregate /128 mask for single host sub-networks
+ net += '/128'
+ iface = m[0][1]
+ if iface == 'lo': # skip loopback devices
+ continue
+ if net not in r:
+ r[net] = {}
+ if iface not in r[net]:
+ r[net][iface] = set()
+
+ iface = None
+ for line in ips.splitlines():
+ m = ip_p.findall(line)
+ if not m:
+ m = iface_p.findall(line)
+ if m:
+ # drop @... suffix, if present
+ iface = m[0][1].split('@')[0]
+ continue
+ ip = m[0][0]
+ # find the network it belongs to
+ net = [n for n in r.keys()
+ if ipaddress.ip_address(ip) in ipaddress.ip_network(n)]
+ if net and iface in r[net[0]]:
+ assert iface
+ r[net[0]][iface].add(ip)
+
+ return r
+
+
+def command_list_networks(ctx):
+ # type: (CephadmContext) -> None
+ r = list_networks(ctx)
+
+ def serialize_sets(obj: Any) -> Any:
+ return list(obj) if isinstance(obj, set) else obj
+
+ print(json.dumps(r, indent=4, default=serialize_sets))
+
+##################################
+
+
+def command_ls(ctx):
+ # type: (CephadmContext) -> None
+ ls = list_daemons(ctx, detail=not ctx.no_detail,
+ legacy_dir=ctx.legacy_dir)
+ print(json.dumps(ls, indent=4))
+
+
+def with_units_to_int(v: str) -> int:
+ if v.endswith('iB'):
+ v = v[:-2]
+ elif v.endswith('B'):
+ v = v[:-1]
+ mult = 1
+ if v[-1].upper() == 'K':
+ mult = 1024
+ v = v[:-1]
+ elif v[-1].upper() == 'M':
+ mult = 1024 * 1024
+ v = v[:-1]
+ elif v[-1].upper() == 'G':
+ mult = 1024 * 1024 * 1024
+ v = v[:-1]
+ elif v[-1].upper() == 'T':
+ mult = 1024 * 1024 * 1024 * 1024
+ v = v[:-1]
+ return int(float(v) * mult)
+
+
+def list_daemons(ctx, detail=True, legacy_dir=None):
+ # type: (CephadmContext, bool, Optional[str]) -> List[Dict[str, str]]
+ host_version: Optional[str] = None
+ ls = []
+ container_path = ctx.container_engine.path
+
+ data_dir = ctx.data_dir
+ if legacy_dir is not None:
+ data_dir = os.path.abspath(legacy_dir + data_dir)
+
+ # keep track of ceph versions we see
+ seen_versions = {} # type: Dict[str, Optional[str]]
+
+ # keep track of image digests
+ seen_digests = {} # type: Dict[str, List[str]]
+
+ # keep track of memory and cpu usage we've seen
+ seen_memusage = {} # type: Dict[str, int]
+ seen_cpuperc = {} # type: Dict[str, str]
+ out, err, code = call(
+ ctx,
+ [container_path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'],
+ verbosity=CallVerbosity.QUIET
+ )
+ seen_memusage_cid_len, seen_memusage = _parse_mem_usage(code, out)
+
+ out, err, code = call(
+ ctx,
+ [container_path, 'stats', '--format', '{{.ID}},{{.CPUPerc}}', '--no-stream'],
+ verbosity=CallVerbosity.QUIET
+ )
+ seen_cpuperc_cid_len, seen_cpuperc = _parse_cpu_perc(code, out)
+
+ # /var/lib/ceph
+ if os.path.exists(data_dir):
+ for i in os.listdir(data_dir):
+ if i in ['mon', 'osd', 'mds', 'mgr']:
+ daemon_type = i
+ for j in os.listdir(os.path.join(data_dir, i)):
+ if '-' not in j:
+ continue
+ (cluster, daemon_id) = j.split('-', 1)
+ fsid = get_legacy_daemon_fsid(ctx,
+ cluster, daemon_type, daemon_id,
+ legacy_dir=legacy_dir)
+ legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
+ val: Dict[str, Any] = {
+ 'style': 'legacy',
+ 'name': '%s.%s' % (daemon_type, daemon_id),
+ 'fsid': fsid if fsid is not None else 'unknown',
+ 'systemd_unit': legacy_unit_name,
+ }
+ if detail:
+ (val['enabled'], val['state'], _) = check_unit(ctx, legacy_unit_name)
+ if not host_version:
+ try:
+ out, err, code = call(ctx,
+ ['ceph', '-v'],
+ verbosity=CallVerbosity.QUIET)
+ if not code and out.startswith('ceph version '):
+ host_version = out.split(' ')[2]
+ except Exception:
+ pass
+ val['host_version'] = host_version
+ ls.append(val)
+ elif is_fsid(i):
+ fsid = str(i) # convince mypy that fsid is a str here
+ for j in os.listdir(os.path.join(data_dir, i)):
+ if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)):
+ name = j
+ (daemon_type, daemon_id) = j.split('.', 1)
+ unit_name = get_unit_name(fsid,
+ daemon_type,
+ daemon_id)
+ else:
+ continue
+ val = {
+ 'style': 'cephadm:v1',
+ 'name': name,
+ 'fsid': fsid,
+ 'systemd_unit': unit_name,
+ }
+ if detail:
+ # get container id
+ (val['enabled'], val['state'], _) = check_unit(ctx, unit_name)
+ container_id = None
+ image_name = None
+ image_id = None
+ image_digests = None
+ version = None
+ start_stamp = None
+
+ out, err, code = get_container_stats(ctx, container_path, fsid, daemon_type, daemon_id)
+ if not code:
+ (container_id, image_name, image_id, start,
+ version) = out.strip().split(',')
+ image_id = normalize_container_id(image_id)
+ daemon_type = name.split('.', 1)[0]
+ start_stamp = try_convert_datetime(start)
+
+ # collect digests for this image id
+ image_digests = seen_digests.get(image_id)
+ if not image_digests:
+ out, err, code = call(
+ ctx,
+ [
+ container_path, 'image', 'inspect', image_id,
+ '--format', '{{.RepoDigests}}',
+ ],
+ verbosity=CallVerbosity.QUIET)
+ if not code:
+ image_digests = list(set(map(
+ normalize_image_digest,
+ out.strip()[1:-1].split(' '))))
+ seen_digests[image_id] = image_digests
+
+ # identify software version inside the container (if we can)
+ if not version or '.' not in version:
+ version = seen_versions.get(image_id, None)
+ if daemon_type == NFSGanesha.daemon_type:
+ version = NFSGanesha.get_version(ctx, container_id)
+ if daemon_type == CephIscsi.daemon_type:
+ version = CephIscsi.get_version(ctx, container_id)
+ if daemon_type == CephNvmeof.daemon_type:
+ version = CephNvmeof.get_version(ctx, container_id)
+ elif not version:
+ if daemon_type in Ceph.daemons:
+ out, err, code = call(ctx,
+ [container_path, 'exec', container_id,
+ 'ceph', '-v'],
+ verbosity=CallVerbosity.QUIET)
+ if not code and \
+ out.startswith('ceph version '):
+ version = out.split(' ')[2]
+ seen_versions[image_id] = version
+ elif daemon_type == 'grafana':
+ out, err, code = call(ctx,
+ [container_path, 'exec', container_id,
+ 'grafana-server', '-v'],
+ verbosity=CallVerbosity.QUIET)
+ if not code and \
+ out.startswith('Version '):
+ version = out.split(' ')[1]
+ seen_versions[image_id] = version
+ elif daemon_type in ['prometheus',
+ 'alertmanager',
+ 'node-exporter',
+ 'loki',
+ 'promtail']:
+ version = Monitoring.get_version(ctx, container_id, daemon_type)
+ seen_versions[image_id] = version
+ elif daemon_type == 'haproxy':
+ out, err, code = call(ctx,
+ [container_path, 'exec', container_id,
+ 'haproxy', '-v'],
+ verbosity=CallVerbosity.QUIET)
+ if not code and \
+ out.startswith('HA-Proxy version ') or \
+ out.startswith('HAProxy version '):
+ version = out.split(' ')[2]
+ seen_versions[image_id] = version
+ elif daemon_type == 'keepalived':
+ out, err, code = call(ctx,
+ [container_path, 'exec', container_id,
+ 'keepalived', '--version'],
+ verbosity=CallVerbosity.QUIET)
+ if not code and \
+ err.startswith('Keepalived '):
+ version = err.split(' ')[1]
+ if version[0] == 'v':
+ version = version[1:]
+ seen_versions[image_id] = version
+ elif daemon_type == CustomContainer.daemon_type:
+ # Because a custom container can contain
+ # everything, we do not know which command
+ # to execute to get the version.
+ pass
+ elif daemon_type == SNMPGateway.daemon_type:
+ version = SNMPGateway.get_version(ctx, fsid, daemon_id)
+ seen_versions[image_id] = version
+ else:
+ logger.warning('version for unknown daemon type %s' % daemon_type)
+ else:
+ vfile = os.path.join(data_dir, fsid, j, 'unit.image') # type: ignore
+ try:
+ with open(vfile, 'r') as f:
+ image_name = f.read().strip() or None
+ except IOError:
+ pass
+
+ # unit.meta?
+ mfile = os.path.join(data_dir, fsid, j, 'unit.meta') # type: ignore
+ try:
+ with open(mfile, 'r') as f:
+ meta = json.loads(f.read())
+ val.update(meta)
+ except IOError:
+ pass
+
+ val['container_id'] = container_id
+ val['container_image_name'] = image_name
+ val['container_image_id'] = image_id
+ val['container_image_digests'] = image_digests
+ if container_id:
+ val['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len])
+ val['cpu_percentage'] = seen_cpuperc.get(container_id[0:seen_cpuperc_cid_len])
+ val['version'] = version
+ val['started'] = start_stamp
+ val['created'] = get_file_timestamp(
+ os.path.join(data_dir, fsid, j, 'unit.created')
+ )
+ val['deployed'] = get_file_timestamp(
+ os.path.join(data_dir, fsid, j, 'unit.image'))
+ val['configured'] = get_file_timestamp(
+ os.path.join(data_dir, fsid, j, 'unit.configured'))
+ ls.append(val)
+
+ return ls
+
+
+def _parse_mem_usage(code: int, out: str) -> Tuple[int, Dict[str, int]]:
+ # keep track of memory usage we've seen
+ seen_memusage = {} # type: Dict[str, int]
+ seen_memusage_cid_len = 0
+ if not code:
+ for line in out.splitlines():
+ (cid, usage) = line.split(',')
+ (used, limit) = usage.split(' / ')
+ try:
+ seen_memusage[cid] = with_units_to_int(used)
+ if not seen_memusage_cid_len:
+ seen_memusage_cid_len = len(cid)
+ except ValueError:
+ logger.info('unable to parse memory usage line\n>{}'.format(line))
+ pass
+ return seen_memusage_cid_len, seen_memusage
+
+
+def _parse_cpu_perc(code: int, out: str) -> Tuple[int, Dict[str, str]]:
+ seen_cpuperc = {}
+ seen_cpuperc_cid_len = 0
+ if not code:
+ for line in out.splitlines():
+ (cid, cpuperc) = line.split(',')
+ try:
+ seen_cpuperc[cid] = cpuperc
+ if not seen_cpuperc_cid_len:
+ seen_cpuperc_cid_len = len(cid)
+ except ValueError:
+ logger.info('unable to parse cpu percentage line\n>{}'.format(line))
+ pass
+ return seen_cpuperc_cid_len, seen_cpuperc
+
+
+def get_daemon_description(ctx, fsid, name, detail=False, legacy_dir=None):
+ # type: (CephadmContext, str, str, bool, Optional[str]) -> Dict[str, str]
+
+ for d in list_daemons(ctx, detail=detail, legacy_dir=legacy_dir):
+ if d['fsid'] != fsid:
+ continue
+ if d['name'] != name:
+ continue
+ return d
+ raise Error('Daemon not found: {}. See `cephadm ls`'.format(name))
+
+
+def get_container_stats(ctx: CephadmContext, container_path: str, fsid: str, daemon_type: str, daemon_id: str) -> Tuple[str, str, int]:
+ c = CephContainer.for_daemon(ctx, fsid, daemon_type, daemon_id, 'bash')
+ out, err, code = '', '', -1
+ for name in (c.cname, c.old_cname):
+ cmd = [
+ container_path, 'inspect',
+ '--format', '{{.Id}},{{.Config.Image}},{{.Image}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}',
+ name
+ ]
+ out, err, code = call(ctx, cmd, verbosity=CallVerbosity.QUIET)
+ if not code:
+ break
+ return out, err, code
+
+##################################
+
+
+@default_image
+def command_adopt(ctx):
+ # type: (CephadmContext) -> None
+
+ if not ctx.skip_pull:
+ try:
+ _pull_image(ctx, ctx.image)
+ except UnauthorizedRegistryError:
+ err_str = 'Failed to pull container image. Host may not be logged into container registry. Try `cephadm registry-login --registry-url <url> --registry-username <username> --registry-password <password>` or supply login info via a json file with `cephadm registry-login --registry-json <file>`'
+ logger.debug(f'Pulling image for `command_adopt` failed: {err_str}')
+ raise Error(err_str)
+
+ (daemon_type, daemon_id) = ctx.name.split('.', 1)
+
+ # legacy check
+ if ctx.style != 'legacy':
+ raise Error('adoption of style %s not implemented' % ctx.style)
+
+ # lock
+ fsid = get_legacy_daemon_fsid(ctx,
+ ctx.cluster,
+ daemon_type,
+ daemon_id,
+ legacy_dir=ctx.legacy_dir)
+ if not fsid:
+ raise Error('could not detect legacy fsid; set fsid in ceph.conf')
+ lock = FileLock(ctx, fsid)
+ lock.acquire()
+
+ # call correct adoption
+ if daemon_type in Ceph.daemons:
+ command_adopt_ceph(ctx, daemon_type, daemon_id, fsid)
+ elif daemon_type == 'prometheus':
+ command_adopt_prometheus(ctx, daemon_id, fsid)
+ elif daemon_type == 'grafana':
+ command_adopt_grafana(ctx, daemon_id, fsid)
+ elif daemon_type == 'node-exporter':
+ raise Error('adoption of node-exporter not implemented')
+ elif daemon_type == 'alertmanager':
+ command_adopt_alertmanager(ctx, daemon_id, fsid)
+ else:
+ raise Error('daemon type %s not recognized' % daemon_type)
+
+
+class AdoptOsd(object):
+ def __init__(self, ctx, osd_data_dir, osd_id):
+ # type: (CephadmContext, str, str) -> None
+ self.ctx = ctx
+ self.osd_data_dir = osd_data_dir
+ self.osd_id = osd_id
+
+ def check_online_osd(self):
+ # type: () -> Tuple[Optional[str], Optional[str]]
+
+ osd_fsid, osd_type = None, None
+
+ path = os.path.join(self.osd_data_dir, 'fsid')
+ try:
+ with open(path, 'r') as f:
+ osd_fsid = f.read().strip()
+ logger.info('Found online OSD at %s' % path)
+ except IOError:
+ logger.info('Unable to read OSD fsid from %s' % path)
+ if os.path.exists(os.path.join(self.osd_data_dir, 'type')):
+ with open(os.path.join(self.osd_data_dir, 'type')) as f:
+ osd_type = f.read().strip()
+ else:
+ logger.info('"type" file missing for OSD data dir')
+
+ return osd_fsid, osd_type
+
+ def check_offline_lvm_osd(self):
+ # type: () -> Tuple[Optional[str], Optional[str]]
+ osd_fsid, osd_type = None, None
+
+ c = get_ceph_volume_container(
+ self.ctx,
+ args=['lvm', 'list', '--format=json'],
+ )
+ out, err, code = call_throws(self.ctx, c.run_cmd())
+ if not code:
+ try:
+ js = json.loads(out)
+ if self.osd_id in js:
+ logger.info('Found offline LVM OSD {}'.format(self.osd_id))
+ osd_fsid = js[self.osd_id][0]['tags']['ceph.osd_fsid']
+ for device in js[self.osd_id]:
+ if device['tags']['ceph.type'] == 'block':
+ osd_type = 'bluestore'
+ break
+ if device['tags']['ceph.type'] == 'data':
+ osd_type = 'filestore'
+ break
+ except ValueError as e:
+ logger.info('Invalid JSON in ceph-volume lvm list: {}'.format(e))
+
+ return osd_fsid, osd_type
+
+ def check_offline_simple_osd(self):
+ # type: () -> Tuple[Optional[str], Optional[str]]
+ osd_fsid, osd_type = None, None
+
+ osd_file = glob('/etc/ceph/osd/{}-[a-f0-9-]*.json'.format(self.osd_id))
+ if len(osd_file) == 1:
+ with open(osd_file[0], 'r') as f:
+ try:
+ js = json.loads(f.read())
+ logger.info('Found offline simple OSD {}'.format(self.osd_id))
+ osd_fsid = js['fsid']
+ osd_type = js['type']
+ if osd_type != 'filestore':
+ # need this to be mounted for the adopt to work, as it
+ # needs to move files from this directory
+ call_throws(self.ctx, ['mount', js['data']['path'], self.osd_data_dir])
+ except ValueError as e:
+ logger.info('Invalid JSON in {}: {}'.format(osd_file, e))
+
+ return osd_fsid, osd_type
+
+ def change_cluster_name(self) -> None:
+ logger.info('Attempting to convert osd cluster name to ceph . . .')
+ c = get_ceph_volume_container(
+ self.ctx,
+ args=['lvm', 'list', '{}'.format(self.osd_id), '--format=json'],
+ )
+ out, err, code = call_throws(self.ctx, c.run_cmd())
+ if code:
+ raise Exception(f'Failed to get list of LVs: {err}\nceph-volume failed with rc {code}')
+ try:
+ js = json.loads(out)
+ if not js:
+ raise RuntimeError(f'Failed to find osd.{self.osd_id}')
+ device: Optional[Dict[Any, Any]] = None
+ for d in js[self.osd_id]:
+ if d['type'] == 'block':
+ device = d
+ break
+ if not device:
+ raise RuntimeError(f'Failed to find block device for osd.{self.osd_id}')
+ vg = device['vg_name']
+ out, err, code = call_throws(self.ctx, ['lvchange', '--deltag', f'ceph.cluster_name={self.ctx.cluster}', vg])
+ if code:
+ raise RuntimeError(f"Can't delete tag ceph.cluster_name={self.ctx.cluster} on osd.{self.osd_id}.\nlvchange failed with rc {code}")
+ out, err, code = call_throws(self.ctx, ['lvchange', '--addtag', 'ceph.cluster_name=ceph', vg])
+ if code:
+ raise RuntimeError(f"Can't add tag ceph.cluster_name=ceph on osd.{self.osd_id}.\nlvchange failed with rc {code}")
+ logger.info('Successfully converted osd cluster name')
+ except (Exception, RuntimeError) as e:
+ logger.info(f'Failed to convert osd cluster name: {e}')
+
+
+def command_adopt_ceph(ctx, daemon_type, daemon_id, fsid):
+ # type: (CephadmContext, str, str, str) -> None
+
+ (uid, gid) = extract_uid_gid(ctx)
+
+ data_dir_src = ('/var/lib/ceph/%s/%s-%s' %
+ (daemon_type, ctx.cluster, daemon_id))
+ data_dir_src = os.path.abspath(ctx.legacy_dir + data_dir_src)
+
+ if not os.path.exists(data_dir_src):
+ raise Error("{}.{} data directory '{}' does not exist. "
+ 'Incorrect ID specified, or daemon already adopted?'.format(
+ daemon_type, daemon_id, data_dir_src))
+
+ osd_fsid = None
+ if daemon_type == 'osd':
+ adopt_osd = AdoptOsd(ctx, data_dir_src, daemon_id)
+ osd_fsid, osd_type = adopt_osd.check_online_osd()
+ if not osd_fsid:
+ osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd()
+ if not osd_fsid:
+ osd_fsid, osd_type = adopt_osd.check_offline_simple_osd()
+ if not osd_fsid:
+ raise Error('Unable to find OSD {}'.format(daemon_id))
+ elif ctx.cluster != 'ceph':
+ adopt_osd.change_cluster_name()
+ logger.info('objectstore_type is %s' % osd_type)
+ assert osd_type
+ if osd_type == 'filestore':
+ raise Error('FileStore is not supported by cephadm')
+
+ # NOTE: implicit assumption here that the units correspond to the
+ # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
+ # CLUSTER field.
+ unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
+ (enabled, state, _) = check_unit(ctx, unit_name)
+ if state == 'running':
+ logger.info('Stopping old systemd unit %s...' % unit_name)
+ call_throws(ctx, ['systemctl', 'stop', unit_name])
+ if enabled:
+ logger.info('Disabling old systemd unit %s...' % unit_name)
+ call_throws(ctx, ['systemctl', 'disable', unit_name])
+
+ # data
+ logger.info('Moving data...')
+ data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
+ uid=uid, gid=gid)
+ move_files(ctx, glob(os.path.join(data_dir_src, '*')),
+ data_dir_dst,
+ uid=uid, gid=gid)
+ logger.debug('Remove dir `%s`' % (data_dir_src))
+ if os.path.ismount(data_dir_src):
+ call_throws(ctx, ['umount', data_dir_src])
+ os.rmdir(data_dir_src)
+
+ logger.info('Chowning content...')
+ call_throws(ctx, ['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst])
+
+ if daemon_type == 'mon':
+ # rename *.ldb -> *.sst, in case they are coming from ubuntu
+ store = os.path.join(data_dir_dst, 'store.db')
+ num_renamed = 0
+ if os.path.exists(store):
+ for oldf in os.listdir(store):
+ if oldf.endswith('.ldb'):
+ newf = oldf.replace('.ldb', '.sst')
+ oldp = os.path.join(store, oldf)
+ newp = os.path.join(store, newf)
+ logger.debug('Renaming %s -> %s' % (oldp, newp))
+ os.rename(oldp, newp)
+ if num_renamed:
+ logger.info('Renamed %d leveldb *.ldb files to *.sst',
+ num_renamed)
+ if daemon_type == 'osd':
+ for n in ['block', 'block.db', 'block.wal']:
+ p = os.path.join(data_dir_dst, n)
+ if os.path.exists(p):
+ logger.info('Chowning %s...' % p)
+ os.chown(p, uid, gid)
+ # disable the ceph-volume 'simple' mode files on the host
+ simple_fn = os.path.join('/etc/ceph/osd',
+ '%s-%s.json' % (daemon_id, osd_fsid))
+ if os.path.exists(simple_fn):
+ new_fn = simple_fn + '.adopted-by-cephadm'
+ logger.info('Renaming %s -> %s', simple_fn, new_fn)
+ os.rename(simple_fn, new_fn)
+ logger.info('Disabling host unit ceph-volume@ simple unit...')
+ call(ctx, ['systemctl', 'disable',
+ 'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)])
+ else:
+ # assume this is an 'lvm' c-v for now, but don't error
+ # out if it's not.
+ logger.info('Disabling host unit ceph-volume@ lvm unit...')
+ call(ctx, ['systemctl', 'disable',
+ 'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)])
+
+ # config
+ config_src = '/etc/ceph/%s.conf' % (ctx.cluster)
+ config_src = os.path.abspath(ctx.legacy_dir + config_src)
+ config_dst = os.path.join(data_dir_dst, 'config')
+ copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
+
+ # logs
+ logger.info('Moving logs...')
+ log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' %
+ (ctx.cluster, daemon_type, daemon_id))
+ log_dir_src = os.path.abspath(ctx.legacy_dir + log_dir_src)
+ log_dir_dst = make_log_dir(ctx, fsid, uid=uid, gid=gid)
+ move_files(ctx, glob(log_dir_src),
+ log_dir_dst,
+ uid=uid, gid=gid)
+
+ logger.info('Creating new units...')
+ make_var_run(ctx, fsid, uid, gid)
+ c = get_container(ctx, fsid, daemon_type, daemon_id)
+ deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, c,
+ enable=True, # unconditionally enable the new unit
+ start=(state == 'running' or ctx.force_start),
+ osd_fsid=osd_fsid)
+ update_firewalld(ctx, daemon_type)
+
+
+def command_adopt_prometheus(ctx, daemon_id, fsid):
+ # type: (CephadmContext, str, str) -> None
+ daemon_type = 'prometheus'
+ (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
+ # should try to set the ports we know cephadm defaults
+ # to for these services in the firewall.
+ ports = Monitoring.port_map['prometheus']
+ endpoints = [EndPoint('0.0.0.0', p) for p in ports]
+
+ _stop_and_disable(ctx, 'prometheus')
+
+ data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
+ uid=uid, gid=gid)
+
+ # config
+ config_src = '/etc/prometheus/prometheus.yml'
+ config_src = os.path.abspath(ctx.legacy_dir + config_src)
+ config_dst = os.path.join(data_dir_dst, 'etc/prometheus')
+ makedirs(config_dst, uid, gid, 0o755)
+ copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
+
+ # data
+ data_src = '/var/lib/prometheus/metrics/'
+ data_src = os.path.abspath(ctx.legacy_dir + data_src)
+ data_dst = os.path.join(data_dir_dst, 'data')
+ copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
+
+ make_var_run(ctx, fsid, uid, gid)
+ c = get_container(ctx, fsid, daemon_type, daemon_id)
+ deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid,
+ deployment_type=DeploymentType.REDEPLOY, endpoints=endpoints)
+ update_firewalld(ctx, daemon_type)
+
+
+def command_adopt_grafana(ctx, daemon_id, fsid):
+ # type: (CephadmContext, str, str) -> None
+
+ daemon_type = 'grafana'
+ (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
+ # should try to set the ports we know cephadm defaults
+ # to for these services in the firewall.
+ ports = Monitoring.port_map['grafana']
+ endpoints = [EndPoint('0.0.0.0', p) for p in ports]
+
+ _stop_and_disable(ctx, 'grafana-server')
+
+ data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
+ uid=uid, gid=gid)
+
+ # config
+ config_src = '/etc/grafana/grafana.ini'
+ config_src = os.path.abspath(ctx.legacy_dir + config_src)
+ config_dst = os.path.join(data_dir_dst, 'etc/grafana')
+ makedirs(config_dst, uid, gid, 0o755)
+ copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
+
+ prov_src = '/etc/grafana/provisioning/'
+ prov_src = os.path.abspath(ctx.legacy_dir + prov_src)
+ prov_dst = os.path.join(data_dir_dst, 'etc/grafana')
+ copy_tree(ctx, [prov_src], prov_dst, uid=uid, gid=gid)
+
+ # cert
+ cert = '/etc/grafana/grafana.crt'
+ key = '/etc/grafana/grafana.key'
+ if os.path.exists(cert) and os.path.exists(key):
+ cert_src = '/etc/grafana/grafana.crt'
+ cert_src = os.path.abspath(ctx.legacy_dir + cert_src)
+ makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755)
+ cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file')
+ copy_files(ctx, [cert_src], cert_dst, uid=uid, gid=gid)
+
+ key_src = '/etc/grafana/grafana.key'
+ key_src = os.path.abspath(ctx.legacy_dir + key_src)
+ key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key')
+ copy_files(ctx, [key_src], key_dst, uid=uid, gid=gid)
+
+ _adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini'))
+ else:
+ logger.debug('Skipping ssl, missing cert {} or key {}'.format(cert, key))
+
+ # data - possible custom dashboards/plugins
+ data_src = '/var/lib/grafana/'
+ data_src = os.path.abspath(ctx.legacy_dir + data_src)
+ data_dst = os.path.join(data_dir_dst, 'data')
+ copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
+
+ make_var_run(ctx, fsid, uid, gid)
+ c = get_container(ctx, fsid, daemon_type, daemon_id)
+ deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid,
+ deployment_type=DeploymentType.REDEPLOY, endpoints=endpoints)
+ update_firewalld(ctx, daemon_type)
+
+
+def command_adopt_alertmanager(ctx, daemon_id, fsid):
+ # type: (CephadmContext, str, str) -> None
+
+ daemon_type = 'alertmanager'
+ (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
+ # should try to set the ports we know cephadm defaults
+ # to for these services in the firewall.
+ ports = Monitoring.port_map['alertmanager']
+ endpoints = [EndPoint('0.0.0.0', p) for p in ports]
+
+ _stop_and_disable(ctx, 'prometheus-alertmanager')
+
+ data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
+ uid=uid, gid=gid)
+
+ # config
+ config_src = '/etc/prometheus/alertmanager.yml'
+ config_src = os.path.abspath(ctx.legacy_dir + config_src)
+ config_dst = os.path.join(data_dir_dst, 'etc/alertmanager')
+ makedirs(config_dst, uid, gid, 0o755)
+ copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
+
+ # data
+ data_src = '/var/lib/prometheus/alertmanager/'
+ data_src = os.path.abspath(ctx.legacy_dir + data_src)
+ data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data')
+ copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
+
+ make_var_run(ctx, fsid, uid, gid)
+ c = get_container(ctx, fsid, daemon_type, daemon_id)
+ deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid,
+ deployment_type=DeploymentType.REDEPLOY, endpoints=endpoints)
+ update_firewalld(ctx, daemon_type)
+
+
+def _adjust_grafana_ini(filename):
+ # type: (str) -> None
+
+ # Update cert_file, cert_key pathnames in server section
+ # ConfigParser does not preserve comments
+ try:
+ with open(filename, 'r') as grafana_ini:
+ lines = grafana_ini.readlines()
+ with write_new(filename, perms=None) as grafana_ini:
+ server_section = False
+ for line in lines:
+ if line.startswith('['):
+ server_section = False
+ if line.startswith('[server]'):
+ server_section = True
+ if server_section:
+ line = re.sub(r'^cert_file.*',
+ 'cert_file = /etc/grafana/certs/cert_file', line)
+ line = re.sub(r'^cert_key.*',
+ 'cert_key = /etc/grafana/certs/cert_key', line)
+ grafana_ini.write(line)
+ except OSError as err:
+ raise Error('Cannot update {}: {}'.format(filename, err))
+
+
+def _stop_and_disable(ctx, unit_name):
+ # type: (CephadmContext, str) -> None
+
+ (enabled, state, _) = check_unit(ctx, unit_name)
+ if state == 'running':
+ logger.info('Stopping old systemd unit %s...' % unit_name)
+ call_throws(ctx, ['systemctl', 'stop', unit_name])
+ if enabled:
+ logger.info('Disabling old systemd unit %s...' % unit_name)
+ call_throws(ctx, ['systemctl', 'disable', unit_name])
+
+##################################
+
+
+def command_rm_daemon(ctx):
+ # type: (CephadmContext) -> None
+ lock = FileLock(ctx, ctx.fsid)
+ lock.acquire()
+
+ (daemon_type, daemon_id) = ctx.name.split('.', 1)
+ unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
+
+ if daemon_type in ['mon', 'osd'] and not ctx.force:
+ raise Error('must pass --force to proceed: '
+ 'this command may destroy precious data!')
+
+ call(ctx, ['systemctl', 'stop', unit_name],
+ verbosity=CallVerbosity.DEBUG)
+ call(ctx, ['systemctl', 'reset-failed', unit_name],
+ verbosity=CallVerbosity.DEBUG)
+ call(ctx, ['systemctl', 'disable', unit_name],
+ verbosity=CallVerbosity.DEBUG)
+
+ # force remove rgw admin socket file if leftover
+ if daemon_type in ['rgw']:
+ rgw_asok_path = f'/var/run/ceph/{ctx.fsid}/ceph-client.{ctx.name}.*.asok'
+ call(ctx, ['rm', '-rf', rgw_asok_path],
+ verbosity=CallVerbosity.DEBUG)
+
+ data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_id)
+ if daemon_type in ['mon', 'osd', 'prometheus'] and \
+ not ctx.force_delete_data:
+ # rename it out of the way -- do not delete
+ backup_dir = os.path.join(ctx.data_dir, ctx.fsid, 'removed')
+ if not os.path.exists(backup_dir):
+ makedirs(backup_dir, 0, 0, DATA_DIR_MODE)
+ dirname = '%s.%s_%s' % (daemon_type, daemon_id,
+ datetime.datetime.utcnow().strftime(DATEFMT))
+ os.rename(data_dir,
+ os.path.join(backup_dir, dirname))
+ else:
+ call_throws(ctx, ['rm', '-rf', data_dir])
+
+ endpoints = fetch_tcp_ports(ctx)
+ ports: List[int] = [e.port for e in endpoints]
+ if ports:
+ try:
+ fw = Firewalld(ctx)
+ fw.close_ports(ports)
+ fw.apply_rules()
+ except RuntimeError as e:
+ # in case we cannot close the ports we will remove
+ # the daemon but keep them open.
+ logger.warning(f' Error when trying to close ports: {e}')
+
+
+##################################
+
+
+def _zap(ctx: CephadmContext, what: str) -> None:
+ mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None)
+ c = get_ceph_volume_container(ctx,
+ args=['lvm', 'zap', '--destroy', what],
+ volume_mounts=mounts,
+ envs=ctx.env)
+ logger.info(f'Zapping {what}...')
+ out, err, code = call_throws(ctx, c.run_cmd())
+
+
+@infer_image
+def _zap_osds(ctx: CephadmContext) -> None:
+ # assume fsid lock already held
+
+ # list
+ mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None)
+ c = get_ceph_volume_container(ctx,
+ args=['inventory', '--format', 'json'],
+ volume_mounts=mounts,
+ envs=ctx.env)
+ out, err, code = call_throws(ctx, c.run_cmd())
+ if code:
+ raise Error('failed to list osd inventory')
+ try:
+ ls = json.loads(out)
+ except ValueError as e:
+ raise Error(f'Invalid JSON in ceph-volume inventory: {e}')
+
+ for i in ls:
+ matches = [lv.get('cluster_fsid') == ctx.fsid and i.get('ceph_device') for lv in i.get('lvs', [])]
+ if any(matches) and all(matches):
+ _zap(ctx, i.get('path'))
+ elif any(matches):
+ lv_names = [lv['name'] for lv in i.get('lvs', [])]
+ # TODO: we need to map the lv_names back to device paths (the vg
+ # id isn't part of the output here!)
+ logger.warning(f'Not zapping LVs (not implemented): {lv_names}')
+
+
+def command_zap_osds(ctx: CephadmContext) -> None:
+ if not ctx.force:
+ raise Error('must pass --force to proceed: '
+ 'this command may destroy precious data!')
+
+ lock = FileLock(ctx, ctx.fsid)
+ lock.acquire()
+
+ _zap_osds(ctx)
+
+##################################
+
+
+def get_ceph_cluster_count(ctx: CephadmContext) -> int:
+ return len([c for c in os.listdir(ctx.data_dir) if is_fsid(c)])
+
+
+def command_rm_cluster(ctx: CephadmContext) -> None:
+ if not ctx.force:
+ raise Error('must pass --force to proceed: '
+ 'this command may destroy precious data!')
+
+ lock = FileLock(ctx, ctx.fsid)
+ lock.acquire()
+ _rm_cluster(ctx, ctx.keep_logs, ctx.zap_osds)
+
+
+def _rm_cluster(ctx: CephadmContext, keep_logs: bool, zap_osds: bool) -> None:
+
+ if not ctx.fsid:
+ raise Error('must select the cluster to delete by passing --fsid to proceed')
+
+ def disable_systemd_service(unit_name: str) -> None:
+ call(ctx, ['systemctl', 'stop', unit_name],
+ verbosity=CallVerbosity.DEBUG)
+ call(ctx, ['systemctl', 'reset-failed', unit_name],
+ verbosity=CallVerbosity.DEBUG)
+ call(ctx, ['systemctl', 'disable', unit_name],
+ verbosity=CallVerbosity.DEBUG)
+
+ logger.info(f'Deleting cluster with fsid: {ctx.fsid}')
+
+ # stop + disable individual daemon units
+ for d in list_daemons(ctx, detail=False):
+ if d['fsid'] != ctx.fsid:
+ continue
+ if d['style'] != 'cephadm:v1':
+ continue
+ disable_systemd_service(get_unit_name(ctx.fsid, d['name']))
+
+ # cluster units
+ for unit_name in ['ceph-%s.target' % ctx.fsid]:
+ disable_systemd_service(unit_name)
+
+ slice_name = 'system-ceph\\x2d{}.slice'.format(ctx.fsid.replace('-', '\\x2d'))
+ call(ctx, ['systemctl', 'stop', slice_name],
+ verbosity=CallVerbosity.DEBUG)
+
+ # osds?
+ if zap_osds:
+ _zap_osds(ctx)
+
+ # rm units
+ call_throws(ctx, ['rm', '-f', ctx.unit_dir
+ + '/ceph-%s@.service' % ctx.fsid])
+ call_throws(ctx, ['rm', '-f', ctx.unit_dir
+ + '/ceph-%s.target' % ctx.fsid])
+ call_throws(ctx, ['rm', '-rf',
+ ctx.unit_dir + '/ceph-%s.target.wants' % ctx.fsid])
+ # rm data
+ call_throws(ctx, ['rm', '-rf', ctx.data_dir + '/' + ctx.fsid])
+
+ if not keep_logs:
+ # rm logs
+ call_throws(ctx, ['rm', '-rf', ctx.log_dir + '/' + ctx.fsid])
+ call_throws(ctx, ['rm', '-rf', ctx.log_dir
+ + '/*.wants/ceph-%s@*' % ctx.fsid])
+
+ # rm logrotate config
+ call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/ceph-%s' % ctx.fsid])
+
+ # if last cluster on host remove shared files
+ if get_ceph_cluster_count(ctx) == 0:
+ disable_systemd_service('ceph.target')
+
+ # rm shared ceph target files
+ call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/multi-user.target.wants/ceph.target'])
+ call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/ceph.target'])
+
+ # rm cephadm logrotate config
+ call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/cephadm'])
+
+ if not keep_logs:
+ # remove all cephadm logs
+ for fname in glob(f'{ctx.log_dir}/cephadm.log*'):
+ os.remove(fname)
+
+ # rm sysctl settings
+ sysctl_dirs: List[Path] = [Path(ctx.sysctl_dir), Path('/usr/lib/sysctl.d')]
+
+ for sysctl_dir in sysctl_dirs:
+ for p in sysctl_dir.glob(f'90-ceph-{ctx.fsid}-*.conf'):
+ p.unlink()
+
+ # cleanup remaining ceph directories
+ ceph_dirs = [f'/run/ceph/{ctx.fsid}', f'/tmp/cephadm-{ctx.fsid}', f'/var/run/ceph/{ctx.fsid}']
+ for dd in ceph_dirs:
+ shutil.rmtree(dd, ignore_errors=True)
+
+ # clean up config, keyring, and pub key files
+ files = [CEPH_DEFAULT_CONF, CEPH_DEFAULT_PUBKEY, CEPH_DEFAULT_KEYRING]
+ if os.path.exists(files[0]):
+ valid_fsid = False
+ with open(files[0]) as f:
+ if ctx.fsid in f.read():
+ valid_fsid = True
+ if valid_fsid:
+ # rm configuration files on /etc/ceph
+ for n in range(0, len(files)):
+ if os.path.exists(files[n]):
+ os.remove(files[n])
+
+##################################
+
+
+def check_time_sync(ctx, enabler=None):
+ # type: (CephadmContext, Optional[Packager]) -> bool
+ units = [
+ 'chrony.service', # 18.04 (at least)
+ 'chronyd.service', # el / opensuse
+ 'systemd-timesyncd.service',
+ 'ntpd.service', # el7 (at least)
+ 'ntp.service', # 18.04 (at least)
+ 'ntpsec.service', # 20.04 (at least) / buster
+ 'openntpd.service', # ubuntu / debian
+ ]
+ if not check_units(ctx, units, enabler):
+ logger.warning('No time sync service is running; checked for %s' % units)
+ return False
+ return True
+
+
+def command_check_host(ctx: CephadmContext) -> None:
+ errors = []
+ commands = ['systemctl', 'lvcreate']
+
+ try:
+ engine = check_container_engine(ctx)
+ logger.info(f'{engine} is present')
+ except Error as e:
+ errors.append(str(e))
+
+ for command in commands:
+ try:
+ find_program(command)
+ logger.info('%s is present' % command)
+ except ValueError:
+ errors.append('%s binary does not appear to be installed' % command)
+
+ # check for configured+running chronyd or ntp
+ if not check_time_sync(ctx):
+ errors.append('No time synchronization is active')
+
+ if 'expect_hostname' in ctx and ctx.expect_hostname:
+ if get_hostname().lower() != ctx.expect_hostname.lower():
+ errors.append('hostname "%s" does not match expected hostname "%s"' % (
+ get_hostname(), ctx.expect_hostname))
+ else:
+ logger.info('Hostname "%s" matches what is expected.',
+ ctx.expect_hostname)
+
+ if errors:
+ raise Error('\nERROR: '.join(errors))
+
+ logger.info('Host looks OK')
+
+##################################
+
+
+def get_ssh_vars(ssh_user: str) -> Tuple[int, int, str]:
+ try:
+ s_pwd = pwd.getpwnam(ssh_user)
+ except KeyError:
+ raise Error('Cannot find uid/gid for ssh-user: %s' % (ssh_user))
+
+ ssh_uid = s_pwd.pw_uid
+ ssh_gid = s_pwd.pw_gid
+ ssh_dir = os.path.join(s_pwd.pw_dir, '.ssh')
+ return ssh_uid, ssh_gid, ssh_dir
+
+
+def authorize_ssh_key(ssh_pub_key: str, ssh_user: str) -> bool:
+ """Authorize the public key for the provided ssh user"""
+
+ def key_in_file(path: str, key: str) -> bool:
+ if not os.path.exists(path):
+ return False
+ with open(path) as f:
+ lines = f.readlines()
+ for line in lines:
+ if line.strip() == key.strip():
+ return True
+ return False
+
+ logger.info(f'Adding key to {ssh_user}@localhost authorized_keys...')
+ if ssh_pub_key is None or ssh_pub_key.isspace():
+ raise Error('Trying to authorize an empty ssh key')
+
+ ssh_pub_key = ssh_pub_key.strip()
+ ssh_uid, ssh_gid, ssh_dir = get_ssh_vars(ssh_user)
+ if not os.path.exists(ssh_dir):
+ makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700)
+
+ auth_keys_file = '%s/authorized_keys' % ssh_dir
+ if key_in_file(auth_keys_file, ssh_pub_key):
+ logger.info(f'key already in {ssh_user}@localhost authorized_keys...')
+ return False
+
+ add_newline = False
+ if os.path.exists(auth_keys_file):
+ with open(auth_keys_file, 'r') as f:
+ f.seek(0, os.SEEK_END)
+ if f.tell() > 0:
+ f.seek(f.tell() - 1, os.SEEK_SET) # go to last char
+ if f.read() != '\n':
+ add_newline = True
+
+ with open(auth_keys_file, 'a') as f:
+ os.fchown(f.fileno(), ssh_uid, ssh_gid) # just in case we created it
+ os.fchmod(f.fileno(), DEFAULT_MODE) # just in case we created it
+ if add_newline:
+ f.write('\n')
+ f.write(ssh_pub_key + '\n')
+
+ return True
+
+
+def revoke_ssh_key(key: str, ssh_user: str) -> None:
+ """Revoke the public key authorization for the ssh user"""
+ ssh_uid, ssh_gid, ssh_dir = get_ssh_vars(ssh_user)
+ auth_keys_file = '%s/authorized_keys' % ssh_dir
+ deleted = False
+ if os.path.exists(auth_keys_file):
+ with open(auth_keys_file, 'r') as f:
+ lines = f.readlines()
+ _, filename = tempfile.mkstemp()
+ with open(filename, 'w') as f:
+ os.fchown(f.fileno(), ssh_uid, ssh_gid)
+ os.fchmod(f.fileno(), DEFAULT_MODE) # secure access to the keys file
+ for line in lines:
+ if line.strip() == key.strip():
+ deleted = True
+ else:
+ f.write(line)
+
+ if deleted:
+ shutil.move(filename, auth_keys_file)
+ else:
+ logger.warning('Cannot find the ssh key to be deleted')
+
+
+def check_ssh_connectivity(ctx: CephadmContext) -> None:
+
+ def cmd_is_available(cmd: str) -> bool:
+ if shutil.which(cmd) is None:
+ logger.warning(f'Command not found: {cmd}')
+ return False
+ return True
+
+ if not cmd_is_available('ssh') or not cmd_is_available('ssh-keygen'):
+ logger.warning('Cannot check ssh connectivity. Skipping...')
+ return
+
+ ssh_priv_key_path = ''
+ ssh_pub_key_path = ''
+ ssh_signed_cert_path = ''
+ if ctx.ssh_private_key and ctx.ssh_public_key:
+ # let's use the keys provided by the user
+ ssh_priv_key_path = pathify(ctx.ssh_private_key.name)
+ ssh_pub_key_path = pathify(ctx.ssh_public_key.name)
+ elif ctx.ssh_private_key and ctx.ssh_signed_cert:
+ # CA signed keys use case
+ ssh_priv_key_path = pathify(ctx.ssh_private_key.name)
+ ssh_signed_cert_path = pathify(ctx.ssh_signed_cert.name)
+ else:
+ # no custom keys, let's generate some random keys just for this check
+ ssh_priv_key_path = f'/tmp/ssh_key_{uuid.uuid1()}'
+ ssh_pub_key_path = f'{ssh_priv_key_path}.pub'
+ ssh_key_gen_cmd = ['ssh-keygen', '-q', '-t', 'rsa', '-N', '', '-C', '', '-f', ssh_priv_key_path]
+ _, _, code = call(ctx, ssh_key_gen_cmd)
+ if code != 0:
+ logger.warning('Cannot generate keys to check ssh connectivity.')
+ return
+
+ if ssh_signed_cert_path:
+ logger.info('Verification for CA signed keys authentication not implemented. Skipping ...')
+ elif ssh_pub_key_path:
+ logger.info('Verifying ssh connectivity using standard pubkey authentication ...')
+ with open(ssh_pub_key_path, 'r') as f:
+ key = f.read().strip()
+ new_key = authorize_ssh_key(key, ctx.ssh_user)
+ ssh_cfg_file_arg = ['-F', pathify(ctx.ssh_config.name)] if ctx.ssh_config else []
+ _, _, code = call(ctx, ['ssh', '-o StrictHostKeyChecking=no',
+ *ssh_cfg_file_arg, '-i', ssh_priv_key_path,
+ '-o PasswordAuthentication=no',
+ f'{ctx.ssh_user}@{get_hostname()}',
+ 'sudo echo'])
+
+ # we only remove the key if it's a new one. In case the user has provided
+ # some already existing key then we don't alter authorized_keys file
+ if new_key:
+ revoke_ssh_key(key, ctx.ssh_user)
+
+ pub_key_msg = '- The public key file configured by --ssh-public-key is valid\n' if ctx.ssh_public_key else ''
+ prv_key_msg = '- The private key file configured by --ssh-private-key is valid\n' if ctx.ssh_private_key else ''
+ ssh_cfg_msg = '- The ssh configuration file configured by --ssh-config is valid\n' if ctx.ssh_config else ''
+ err_msg = f"""
+** Please verify your user's ssh configuration and make sure:
+- User {ctx.ssh_user} must have passwordless sudo access
+{pub_key_msg}{prv_key_msg}{ssh_cfg_msg}
+"""
+ if code != 0:
+ raise Error(err_msg)
+
+
+def command_prepare_host(ctx: CephadmContext) -> None:
+ logger.info('Verifying podman|docker is present...')
+ pkg = None
+ try:
+ check_container_engine(ctx)
+ except Error as e:
+ logger.warning(str(e))
+ if not pkg:
+ pkg = create_packager(ctx)
+ pkg.install_podman()
+
+ logger.info('Verifying lvm2 is present...')
+ if not find_executable('lvcreate'):
+ if not pkg:
+ pkg = create_packager(ctx)
+ pkg.install(['lvm2'])
+
+ logger.info('Verifying time synchronization is in place...')
+ if not check_time_sync(ctx):
+ if not pkg:
+ pkg = create_packager(ctx)
+ pkg.install(['chrony'])
+ # check again, and this time try to enable
+ # the service
+ check_time_sync(ctx, enabler=pkg)
+
+ if 'expect_hostname' in ctx and ctx.expect_hostname and ctx.expect_hostname != get_hostname():
+ logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), ctx.expect_hostname))
+ call_throws(ctx, ['hostname', ctx.expect_hostname])
+ with open('/etc/hostname', 'w') as f:
+ f.write(ctx.expect_hostname + '\n')
+
+ logger.info('Repeating the final host check...')
+ command_check_host(ctx)
+
+##################################
+
+
+class CustomValidation(argparse.Action):
+
+ def _check_name(self, values: str) -> None:
+ try:
+ (daemon_type, daemon_id) = values.split('.', 1)
+ except ValueError:
+ raise argparse.ArgumentError(self,
+ 'must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com')
+
+ daemons = get_supported_daemons()
+ if daemon_type not in daemons:
+ raise argparse.ArgumentError(self,
+ 'name must declare the type of daemon e.g. '
+ '{}'.format(', '.join(daemons)))
+
+ def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: Union[str, Sequence[Any], None],
+ option_string: Optional[str] = None) -> None:
+ assert isinstance(values, str)
+ if self.dest == 'name':
+ self._check_name(values)
+ setattr(namespace, self.dest, values)
+
+##################################
+
+
+def get_distro():
+ # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
+ distro = None
+ distro_version = None
+ distro_codename = None
+ with open('/etc/os-release', 'r') as f:
+ for line in f.readlines():
+ line = line.strip()
+ if '=' not in line or line.startswith('#'):
+ continue
+ (var, val) = line.split('=', 1)
+ if val[0] == '"' and val[-1] == '"':
+ val = val[1:-1]
+ if var == 'ID':
+ distro = val.lower()
+ elif var == 'VERSION_ID':
+ distro_version = val.lower()
+ elif var == 'VERSION_CODENAME':
+ distro_codename = val.lower()
+ return distro, distro_version, distro_codename
+
+
+class Packager(object):
+ def __init__(self, ctx: CephadmContext,
+ stable: Optional[str] = None, version: Optional[str] = None,
+ branch: Optional[str] = None, commit: Optional[str] = None):
+ assert \
+ (stable and not version and not branch and not commit) or \
+ (not stable and version and not branch and not commit) or \
+ (not stable and not version and branch) or \
+ (not stable and not version and not branch and not commit)
+ self.ctx = ctx
+ self.stable = stable
+ self.version = version
+ self.branch = branch
+ self.commit = commit
+
+ def validate(self) -> None:
+ """Validate parameters before writing any state to disk."""
+ pass
+
+ def add_repo(self) -> None:
+ raise NotImplementedError
+
+ def rm_repo(self) -> None:
+ raise NotImplementedError
+
+ def install(self, ls: List[str]) -> None:
+ raise NotImplementedError
+
+ def install_podman(self) -> None:
+ raise NotImplementedError
+
+ def query_shaman(self, distro: str, distro_version: Any, branch: Optional[str], commit: Optional[str]) -> str:
+ # query shaman
+ logger.info('Fetching repo metadata from shaman and chacra...')
+ shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
+ distro=distro,
+ distro_version=distro_version,
+ branch=branch,
+ sha1=commit or 'latest',
+ arch=get_arch()
+ )
+ try:
+ shaman_response = urlopen(shaman_url)
+ except HTTPError as err:
+ logger.error('repository not found in shaman (might not be available yet)')
+ raise Error('%s, failed to fetch %s' % (err, shaman_url))
+ chacra_url = ''
+ try:
+ chacra_url = shaman_response.geturl()
+ chacra_response = urlopen(chacra_url)
+ except HTTPError as err:
+ logger.error('repository not found in chacra (might not be available yet)')
+ raise Error('%s, failed to fetch %s' % (err, chacra_url))
+ return chacra_response.read().decode('utf-8')
+
+ def repo_gpgkey(self) -> Tuple[str, str]:
+ if self.ctx.gpg_url:
+ return self.ctx.gpg_url, 'manual'
+ if self.stable or self.version:
+ return 'https://download.ceph.com/keys/release.gpg', 'release'
+ else:
+ return 'https://download.ceph.com/keys/autobuild.gpg', 'autobuild'
+
+ def enable_service(self, service: str) -> None:
+ """
+ Start and enable the service (typically using systemd).
+ """
+ call_throws(self.ctx, ['systemctl', 'enable', '--now', service])
+
+
+class Apt(Packager):
+ DISTRO_NAMES = {
+ 'ubuntu': 'ubuntu',
+ 'debian': 'debian',
+ }
+
+ def __init__(self, ctx: CephadmContext,
+ stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str],
+ distro: Optional[str], distro_version: Optional[str], distro_codename: Optional[str]) -> None:
+ super(Apt, self).__init__(ctx, stable=stable, version=version,
+ branch=branch, commit=commit)
+ assert distro
+ self.ctx = ctx
+ self.distro = self.DISTRO_NAMES[distro]
+ self.distro_codename = distro_codename
+ self.distro_version = distro_version
+
+ def repo_path(self) -> str:
+ return '/etc/apt/sources.list.d/ceph.list'
+
+ def add_repo(self) -> None:
+
+ url, name = self.repo_gpgkey()
+ logger.info('Installing repo GPG key from %s...' % url)
+ try:
+ response = urlopen(url)
+ except HTTPError as err:
+ logger.error('failed to fetch GPG repo key from %s: %s' % (
+ url, err))
+ raise Error('failed to fetch GPG key')
+ key = response.read()
+ with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'wb') as f:
+ f.write(key)
+
+ if self.version:
+ content = 'deb %s/debian-%s/ %s main\n' % (
+ self.ctx.repo_url, self.version, self.distro_codename)
+ elif self.stable:
+ content = 'deb %s/debian-%s/ %s main\n' % (
+ self.ctx.repo_url, self.stable, self.distro_codename)
+ else:
+ content = self.query_shaman(self.distro, self.distro_codename, self.branch,
+ self.commit)
+
+ logger.info('Installing repo file at %s...' % self.repo_path())
+ with open(self.repo_path(), 'w') as f:
+ f.write(content)
+
+ self.update()
+
+ def rm_repo(self) -> None:
+ for name in ['autobuild', 'release', 'manual']:
+ p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
+ if os.path.exists(p):
+ logger.info('Removing repo GPG key %s...' % p)
+ os.unlink(p)
+ if os.path.exists(self.repo_path()):
+ logger.info('Removing repo at %s...' % self.repo_path())
+ os.unlink(self.repo_path())
+
+ if self.distro == 'ubuntu':
+ self.rm_kubic_repo()
+
+ def install(self, ls: List[str]) -> None:
+ logger.info('Installing packages %s...' % ls)
+ call_throws(self.ctx, ['apt-get', 'install', '-y'] + ls)
+
+ def update(self) -> None:
+ logger.info('Updating package list...')
+ call_throws(self.ctx, ['apt-get', 'update'])
+
+ def install_podman(self) -> None:
+ if self.distro == 'ubuntu':
+ logger.info('Setting up repo for podman...')
+ self.add_kubic_repo()
+ self.update()
+
+ logger.info('Attempting podman install...')
+ try:
+ self.install(['podman'])
+ except Error:
+ logger.info('Podman did not work. Falling back to docker...')
+ self.install(['docker.io'])
+
+ def kubic_repo_url(self) -> str:
+ return 'https://download.opensuse.org/repositories/devel:/kubic:/' \
+ 'libcontainers:/stable/xUbuntu_%s/' % self.distro_version
+
+ def kubic_repo_path(self) -> str:
+ return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list'
+
+ def kubic_repo_gpgkey_url(self) -> str:
+ return '%s/Release.key' % self.kubic_repo_url()
+
+ def kubic_repo_gpgkey_path(self) -> str:
+ return '/etc/apt/trusted.gpg.d/kubic.release.gpg'
+
+ def add_kubic_repo(self) -> None:
+ url = self.kubic_repo_gpgkey_url()
+ logger.info('Installing repo GPG key from %s...' % url)
+ try:
+ response = urlopen(url)
+ except HTTPError as err:
+ logger.error('failed to fetch GPG repo key from %s: %s' % (
+ url, err))
+ raise Error('failed to fetch GPG key')
+ key = response.read().decode('utf-8')
+ tmp_key = write_tmp(key, 0, 0)
+ keyring = self.kubic_repo_gpgkey_path()
+ call_throws(self.ctx, ['apt-key', '--keyring', keyring, 'add', tmp_key.name])
+
+ logger.info('Installing repo file at %s...' % self.kubic_repo_path())
+ content = 'deb %s /\n' % self.kubic_repo_url()
+ with open(self.kubic_repo_path(), 'w') as f:
+ f.write(content)
+
+ def rm_kubic_repo(self) -> None:
+ keyring = self.kubic_repo_gpgkey_path()
+ if os.path.exists(keyring):
+ logger.info('Removing repo GPG key %s...' % keyring)
+ os.unlink(keyring)
+
+ p = self.kubic_repo_path()
+ if os.path.exists(p):
+ logger.info('Removing repo at %s...' % p)
+ os.unlink(p)
+
+
+class YumDnf(Packager):
+ DISTRO_NAMES = {
+ 'centos': ('centos', 'el'),
+ 'rhel': ('centos', 'el'),
+ 'scientific': ('centos', 'el'),
+ 'rocky': ('centos', 'el'),
+ 'almalinux': ('centos', 'el'),
+ 'ol': ('centos', 'el'),
+ 'fedora': ('fedora', 'fc'),
+ 'mariner': ('mariner', 'cm'),
+ }
+
+ def __init__(self, ctx: CephadmContext,
+ stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str],
+ distro: Optional[str], distro_version: Optional[str]) -> None:
+ super(YumDnf, self).__init__(ctx, stable=stable, version=version,
+ branch=branch, commit=commit)
+ assert distro
+ assert distro_version
+ self.ctx = ctx
+ self.major = int(distro_version.split('.')[0])
+ self.distro_normalized = self.DISTRO_NAMES[distro][0]
+ self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major)
+ if (self.distro_code == 'fc' and self.major >= 30) or \
+ (self.distro_code == 'el' and self.major >= 8):
+ self.tool = 'dnf'
+ elif (self.distro_code == 'cm'):
+ self.tool = 'tdnf'
+ else:
+ self.tool = 'yum'
+
+ def custom_repo(self, **kw: Any) -> str:
+ """
+ Repo files need special care in that a whole line should not be present
+ if there is no value for it. Because we were using `format()` we could
+ not conditionally add a line for a repo file. So the end result would
+ contain a key with a missing value (say if we were passing `None`).
+
+ For example, it could look like::
+
+ [ceph repo]
+ name= ceph repo
+ proxy=
+ gpgcheck=
+
+ Which breaks. This function allows us to conditionally add lines,
+ preserving an order and be more careful.
+
+ Previously, and for historical purposes, this is how the template used
+ to look::
+
+ custom_repo =
+ [{repo_name}]
+ name={name}
+ baseurl={baseurl}
+ enabled={enabled}
+ gpgcheck={gpgcheck}
+ type={_type}
+ gpgkey={gpgkey}
+ proxy={proxy}
+
+ """
+ lines = []
+
+ # by using tuples (vs a dict) we preserve the order of what we want to
+ # return, like starting with a [repo name]
+ tmpl = (
+ ('reponame', '[%s]'),
+ ('name', 'name=%s'),
+ ('baseurl', 'baseurl=%s'),
+ ('enabled', 'enabled=%s'),
+ ('gpgcheck', 'gpgcheck=%s'),
+ ('_type', 'type=%s'),
+ ('gpgkey', 'gpgkey=%s'),
+ ('proxy', 'proxy=%s'),
+ ('priority', 'priority=%s'),
+ )
+
+ for line in tmpl:
+ tmpl_key, tmpl_value = line # key values from tmpl
+
+ # ensure that there is an actual value (not None nor empty string)
+ if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
+ lines.append(tmpl_value % kw.get(tmpl_key))
+
+ return '\n'.join(lines)
+
+ def repo_path(self) -> str:
+ return '/etc/yum.repos.d/ceph.repo'
+
+ def repo_baseurl(self) -> str:
+ assert self.stable or self.version
+ if self.version:
+ return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.version,
+ self.distro_code)
+ else:
+ return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.stable,
+ self.distro_code)
+
+ def validate(self) -> None:
+ if self.distro_code.startswith('fc'):
+ raise Error('Ceph team does not build Fedora specific packages and therefore cannot add repos for this distro')
+ if self.distro_code == 'el7':
+ if self.stable and self.stable >= 'pacific':
+ raise Error('Ceph does not support pacific or later for this version of this linux distro and therefore cannot add a repo for it')
+ if self.version and self.version.split('.')[0] >= '16':
+ raise Error('Ceph does not support 16.y.z or later for this version of this linux distro and therefore cannot add a repo for it')
+
+ if self.stable or self.version:
+ # we know that yum & dnf require there to be a
+ # $base_url/$arch/repodata/repomd.xml so we can test if this URL
+ # is gettable in order to validate the inputs
+ test_url = self.repo_baseurl() + '/noarch/repodata/repomd.xml'
+ try:
+ urlopen(test_url)
+ except HTTPError as err:
+ logger.error('unable to fetch repo metadata: %r', err)
+ raise Error('failed to fetch repository metadata. please check'
+ ' the provided parameters are correct and try again')
+
+ def add_repo(self) -> None:
+ if self.stable or self.version:
+ content = ''
+ for n, t in {
+ 'Ceph': '$basearch',
+ 'Ceph-noarch': 'noarch',
+ 'Ceph-source': 'SRPMS'}.items():
+ content += '[%s]\n' % (n)
+ content += self.custom_repo(
+ name='Ceph %s' % t,
+ baseurl=self.repo_baseurl() + '/' + t,
+ enabled=1,
+ gpgcheck=1,
+ gpgkey=self.repo_gpgkey()[0],
+ )
+ content += '\n\n'
+ else:
+ content = self.query_shaman(self.distro_normalized, self.major,
+ self.branch,
+ self.commit)
+
+ logger.info('Writing repo to %s...' % self.repo_path())
+ with open(self.repo_path(), 'w') as f:
+ f.write(content)
+
+ if self.distro_code.startswith('el'):
+ logger.info('Enabling EPEL...')
+ call_throws(self.ctx, [self.tool, 'install', '-y', 'epel-release'])
+
+ def rm_repo(self) -> None:
+ if os.path.exists(self.repo_path()):
+ os.unlink(self.repo_path())
+
+ def install(self, ls: List[str]) -> None:
+ logger.info('Installing packages %s...' % ls)
+ call_throws(self.ctx, [self.tool, 'install', '-y'] + ls)
+
+ def install_podman(self) -> None:
+ self.install(['podman'])
+
+
+class Zypper(Packager):
+ DISTRO_NAMES = [
+ 'sles',
+ 'opensuse-tumbleweed',
+ 'opensuse-leap'
+ ]
+
+ def __init__(self, ctx: CephadmContext,
+ stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str],
+ distro: Optional[str], distro_version: Optional[str]) -> None:
+ super(Zypper, self).__init__(ctx, stable=stable, version=version,
+ branch=branch, commit=commit)
+ assert distro is not None
+ self.ctx = ctx
+ self.tool = 'zypper'
+ self.distro = 'opensuse'
+ self.distro_version = '15.1'
+ if 'tumbleweed' not in distro and distro_version is not None:
+ self.distro_version = distro_version
+
+ def custom_repo(self, **kw: Any) -> str:
+ """
+ See YumDnf for format explanation.
+ """
+ lines = []
+
+ # by using tuples (vs a dict) we preserve the order of what we want to
+ # return, like starting with a [repo name]
+ tmpl = (
+ ('reponame', '[%s]'),
+ ('name', 'name=%s'),
+ ('baseurl', 'baseurl=%s'),
+ ('enabled', 'enabled=%s'),
+ ('gpgcheck', 'gpgcheck=%s'),
+ ('_type', 'type=%s'),
+ ('gpgkey', 'gpgkey=%s'),
+ ('proxy', 'proxy=%s'),
+ ('priority', 'priority=%s'),
+ )
+
+ for line in tmpl:
+ tmpl_key, tmpl_value = line # key values from tmpl
+
+ # ensure that there is an actual value (not None nor empty string)
+ if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
+ lines.append(tmpl_value % kw.get(tmpl_key))
+
+ return '\n'.join(lines)
+
+ def repo_path(self) -> str:
+ return '/etc/zypp/repos.d/ceph.repo'
+
+ def repo_baseurl(self) -> str:
+ assert self.stable or self.version
+ if self.version:
+ return '%s/rpm-%s/%s' % (self.ctx.repo_url,
+ self.stable, self.distro)
+ else:
+ return '%s/rpm-%s/%s' % (self.ctx.repo_url,
+ self.stable, self.distro)
+
+ def add_repo(self) -> None:
+ if self.stable or self.version:
+ content = ''
+ for n, t in {
+ 'Ceph': '$basearch',
+ 'Ceph-noarch': 'noarch',
+ 'Ceph-source': 'SRPMS'}.items():
+ content += '[%s]\n' % (n)
+ content += self.custom_repo(
+ name='Ceph %s' % t,
+ baseurl=self.repo_baseurl() + '/' + t,
+ enabled=1,
+ gpgcheck=1,
+ gpgkey=self.repo_gpgkey()[0],
+ )
+ content += '\n\n'
+ else:
+ content = self.query_shaman(self.distro, self.distro_version,
+ self.branch,
+ self.commit)
+
+ logger.info('Writing repo to %s...' % self.repo_path())
+ with open(self.repo_path(), 'w') as f:
+ f.write(content)
+
+ def rm_repo(self) -> None:
+ if os.path.exists(self.repo_path()):
+ os.unlink(self.repo_path())
+
+ def install(self, ls: List[str]) -> None:
+ logger.info('Installing packages %s...' % ls)
+ call_throws(self.ctx, [self.tool, 'in', '-y'] + ls)
+
+ def install_podman(self) -> None:
+ self.install(['podman'])
+
+
+def create_packager(ctx: CephadmContext,
+ stable: Optional[str] = None, version: Optional[str] = None,
+ branch: Optional[str] = None, commit: Optional[str] = None) -> Packager:
+ distro, distro_version, distro_codename = get_distro()
+ if distro in YumDnf.DISTRO_NAMES:
+ return YumDnf(ctx, stable=stable, version=version,
+ branch=branch, commit=commit,
+ distro=distro, distro_version=distro_version)
+ elif distro in Apt.DISTRO_NAMES:
+ return Apt(ctx, stable=stable, version=version,
+ branch=branch, commit=commit,
+ distro=distro, distro_version=distro_version,
+ distro_codename=distro_codename)
+ elif distro in Zypper.DISTRO_NAMES:
+ return Zypper(ctx, stable=stable, version=version,
+ branch=branch, commit=commit,
+ distro=distro, distro_version=distro_version)
+ raise Error('Distro %s version %s not supported' % (distro, distro_version))
+
+
+def command_add_repo(ctx: CephadmContext) -> None:
+ if ctx.version and ctx.release:
+ raise Error('you can specify either --release or --version but not both')
+ if not ctx.version and not ctx.release and not ctx.dev and not ctx.dev_commit:
+ raise Error('please supply a --release, --version, --dev or --dev-commit argument')
+ if ctx.version:
+ try:
+ (x, y, z) = ctx.version.split('.')
+ except Exception:
+ raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
+ if ctx.release:
+ # Pacific =/= pacific in this case, set to undercase to avoid confusion
+ ctx.release = ctx.release.lower()
+
+ pkg = create_packager(ctx, stable=ctx.release,
+ version=ctx.version,
+ branch=ctx.dev,
+ commit=ctx.dev_commit)
+ pkg.validate()
+ pkg.add_repo()
+ logger.info('Completed adding repo.')
+
+
+def command_rm_repo(ctx: CephadmContext) -> None:
+ pkg = create_packager(ctx)
+ pkg.rm_repo()
+
+
+def command_install(ctx: CephadmContext) -> None:
+ pkg = create_packager(ctx)
+ pkg.install(ctx.packages)
+
+
+def command_rescan_disks(ctx: CephadmContext) -> str:
+
+ def probe_hba(scan_path: str) -> None:
+ """Tell the adapter to rescan"""
+ with open(scan_path, 'w') as f:
+ f.write('- - -')
+
+ cmd = ctx.func.__name__.replace('command_', '')
+ logger.info(f'{cmd}: starting')
+ start = time.time()
+
+ all_scan_files = glob('/sys/class/scsi_host/*/scan')
+ scan_files = []
+ skipped = []
+ for scan_path in all_scan_files:
+ adapter_name = os.path.basename(os.path.dirname(scan_path))
+ proc_name = read_file([os.path.join(os.path.dirname(scan_path), 'proc_name')])
+ if proc_name in ['unknown', 'usb-storage']:
+ skipped.append(os.path.basename(scan_path))
+ logger.info(f'{cmd}: rescan skipping incompatible host adapter {adapter_name} : {proc_name}')
+ continue
+
+ scan_files.append(scan_path)
+
+ if not scan_files:
+ logger.info(f'{cmd}: no compatible HBAs found')
+ return 'Ok. No compatible HBAs found'
+
+ responses = async_run(concurrent_tasks(probe_hba, scan_files))
+ failures = [r for r in responses if r]
+
+ logger.info(f'{cmd}: Complete. {len(scan_files)} adapters rescanned, {len(failures)} failures, {len(skipped)} skipped')
+
+ elapsed = time.time() - start
+ if failures:
+ plural = 's' if len(failures) > 1 else ''
+ if len(failures) == len(scan_files):
+ return f'Failed. All {len(scan_files)} rescan requests failed'
+ else:
+ return f'Partial. {len(scan_files) - len(failures)} successful, {len(failures)} failure{plural} against: {", ".join(failures)}'
+
+ return f'Ok. {len(all_scan_files)} adapters detected: {len(scan_files)} rescanned, {len(skipped)} skipped, {len(failures)} failed ({elapsed:.2f}s)'
+
+##################################
+
+
+def get_ipv4_address(ifname):
+ # type: (str) -> str
+ def _extract(sock: socket.socket, offset: int) -> str:
+ return socket.inet_ntop(
+ socket.AF_INET,
+ fcntl.ioctl(
+ sock.fileno(),
+ offset,
+ struct.pack('256s', bytes(ifname[:15], 'utf-8'))
+ )[20:24])
+
+ s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+ try:
+ addr = _extract(s, 35093) # '0x8915' = SIOCGIFADDR
+ dq_mask = _extract(s, 35099) # 0x891b = SIOCGIFNETMASK
+ except OSError:
+ # interface does not have an ipv4 address
+ return ''
+
+ dec_mask = sum([bin(int(i)).count('1')
+ for i in dq_mask.split('.')])
+ return '{}/{}'.format(addr, dec_mask)
+
+
+def get_ipv6_address(ifname):
+ # type: (str) -> str
+ if not os.path.exists('/proc/net/if_inet6'):
+ return ''
+
+ raw = read_file(['/proc/net/if_inet6'])
+ data = raw.splitlines()
+ # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
+ # field 0 is ipv6, field 2 is scope
+ for iface_setting in data:
+ field = iface_setting.split()
+ if field[-1] == ifname:
+ ipv6_raw = field[0]
+ ipv6_fmtd = ':'.join([ipv6_raw[_p:_p + 4] for _p in range(0, len(field[0]), 4)])
+ # apply naming rules using ipaddress module
+ ipv6 = ipaddress.ip_address(ipv6_fmtd)
+ return '{}/{}'.format(str(ipv6), int('0x{}'.format(field[2]), 16))
+ return ''
+
+
+def bytes_to_human(num, mode='decimal'):
+ # type: (float, str) -> str
+ """Convert a bytes value into it's human-readable form.
+
+ :param num: number, in bytes, to convert
+ :param mode: Either decimal (default) or binary to determine divisor
+ :returns: string representing the bytes value in a more readable format
+ """
+ unit_list = ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
+ divisor = 1000.0
+ yotta = 'YB'
+
+ if mode == 'binary':
+ unit_list = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
+ divisor = 1024.0
+ yotta = 'YiB'
+
+ for unit in unit_list:
+ if abs(num) < divisor:
+ return '%3.1f%s' % (num, unit)
+ num /= divisor
+ return '%.1f%s' % (num, yotta)
+
+
+def read_file(path_list, file_name=''):
+ # type: (List[str], str) -> str
+ """Returns the content of the first file found within the `path_list`
+
+ :param path_list: list of file paths to search
+ :param file_name: optional file_name to be applied to a file path
+ :returns: content of the file or 'Unknown'
+ """
+ for path in path_list:
+ if file_name:
+ file_path = os.path.join(path, file_name)
+ else:
+ file_path = path
+ if os.path.exists(file_path):
+ with open(file_path, 'rb') as f:
+ try:
+ content = f.read().decode('utf-8', 'ignore').strip()
+ except OSError:
+ # sysfs may populate the file, but for devices like
+ # virtio reads can fail
+ return 'Unknown'
+ else:
+ return content
+ return 'Unknown'
+
+##################################
+
+
+class Enclosure:
+ def __init__(self, enc_id: str, enc_path: str, dev_path: str):
+ """External disk enclosure metadata
+
+ Args:
+ :param enc_id: enclosure id (normally a WWN)
+ :param enc_path: sysfs path to HBA attached to the enclosure
+ e.g. /sys/class/scsi_generic/sg11/device/enclosure/0:0:9:0
+ :param dev_path: sysfs path to the generic scsi device for the enclosure HBA
+ e.g. /sys/class/scsi_generic/sg2
+ """
+ self._path: str = dev_path
+ self._dev_path: str = os.path.join(dev_path, 'device')
+ self._enc_path: str = enc_path
+ self.ses_paths: List[str] = []
+ self.path_count: int = 0
+ self.vendor: str = ''
+ self.model: str = ''
+ self.enc_id: str = enc_id
+ self.components: Union[int, str] = 0
+ self.device_lookup: Dict[str, str] = {}
+ self.device_count: int = 0
+ self.slot_map: Dict[str, Dict[str, str]] = {}
+
+ self._probe()
+
+ def _probe(self) -> None:
+ """Analyse the dev paths to identify enclosure related information"""
+
+ self.vendor = read_file([os.path.join(self._dev_path, 'vendor')])
+ self.model = read_file([os.path.join(self._dev_path, 'model')])
+ self.components = read_file([os.path.join(self._enc_path, 'components')])
+ slot_paths = glob(os.path.join(self._enc_path, '*', 'slot'))
+ for slot_path in slot_paths:
+ slot = read_file([slot_path])
+ serial_path = os.path.join(os.path.dirname(slot_path), 'device', 'vpd_pg80')
+ serial = ''
+ if os.path.exists(serial_path):
+ serial_raw = read_file([serial_path])
+ serial = (''.join(char for char in serial_raw if char in string.printable)).strip()
+ self.device_lookup[serial] = slot
+ slot_dir = os.path.dirname(slot_path)
+ self.slot_map[slot] = {
+ 'status': read_file([os.path.join(slot_dir, 'status')]),
+ 'fault': read_file([os.path.join(slot_dir, 'fault')]),
+ 'locate': read_file([os.path.join(slot_dir, 'locate')]),
+ 'serial': serial,
+ }
+
+ self.device_count = len(self.device_lookup)
+ self.update(os.path.basename(self._path))
+
+ def update(self, dev_id: str) -> None:
+ """Update an enclosure object with a related sg device name
+
+ :param dev_id (str): device name e.g. sg2
+ """
+ self.ses_paths.append(dev_id)
+ self.path_count = len(self.ses_paths)
+
+ def _dump(self) -> Dict[str, Any]:
+ """Return a dict representation of the object"""
+ return {k: v for k, v in self.__dict__.items() if not k.startswith('_')}
+
+ def __str__(self) -> str:
+ """Return a formatted json representation of the object as a string"""
+ return json.dumps(self._dump(), indent=2)
+
+ def __repr__(self) -> str:
+ """Return a json representation of the object as a string"""
+ return json.dumps(self._dump())
+
+ def as_json(self) -> Dict[str, Any]:
+ """Return a dict representing the object"""
+ return self._dump()
+
+
+class HostFacts():
+ _dmi_path_list = ['/sys/class/dmi/id']
+ _nic_path_list = ['/sys/class/net']
+ _apparmor_path_list = ['/etc/apparmor']
+ _disk_vendor_workarounds = {
+ '0x1af4': 'Virtio Block Device'
+ }
+ _excluded_block_devices = ('sr', 'zram', 'dm-', 'loop', 'md')
+ _sg_generic_glob = '/sys/class/scsi_generic/*'
+
+ def __init__(self, ctx: CephadmContext):
+ self.ctx: CephadmContext = ctx
+ self.cpu_model: str = 'Unknown'
+ self.sysctl_options: Dict[str, str] = self._populate_sysctl_options()
+ self.cpu_count: int = 0
+ self.cpu_cores: int = 0
+ self.cpu_threads: int = 0
+ self.interfaces: Dict[str, Any] = {}
+
+ self._meminfo: List[str] = read_file(['/proc/meminfo']).splitlines()
+ self._get_cpuinfo()
+ self._process_nics()
+ self.arch: str = platform.processor()
+ self.kernel: str = platform.release()
+ self._enclosures = self._discover_enclosures()
+ self._block_devices = self._get_block_devs()
+ self._device_list = self._get_device_info()
+
+ def _populate_sysctl_options(self) -> Dict[str, str]:
+ sysctl_options = {}
+ out, _, _ = call_throws(self.ctx, ['sysctl', '-a'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
+ if out:
+ for line in out.splitlines():
+ option, value = line.split('=')
+ sysctl_options[option.strip()] = value.strip()
+ return sysctl_options
+
+ def _discover_enclosures(self) -> Dict[str, Enclosure]:
+ """Build a dictionary of discovered scsi enclosures
+
+ Enclosures are detected by walking the scsi generic sysfs hierarchy.
+ Any device tree that holds an 'enclosure' subdirectory is interpreted as
+ an enclosure. Once identified the enclosure directory is analysis to
+ identify key descriptors that will help relate disks to enclosures and
+ disks to enclosure slots.
+
+ :return: Dict[str, Enclosure]: a map of enclosure id (hex) to enclosure object
+ """
+ sg_paths: List[str] = glob(HostFacts._sg_generic_glob)
+ enclosures: Dict[str, Enclosure] = {}
+
+ for sg_path in sg_paths:
+ enc_path = os.path.join(sg_path, 'device', 'enclosure')
+ if os.path.exists(enc_path):
+ enc_dirs = glob(os.path.join(enc_path, '*'))
+ if len(enc_dirs) != 1:
+ # incomplete enclosure spec - expecting ONE dir in the format
+ # host(adapter):bus:target:lun e.g. 16:0:0:0
+ continue
+ enc_path = enc_dirs[0]
+ enc_id = read_file([os.path.join(enc_path, 'id')])
+ if enc_id in enclosures:
+ enclosures[enc_id].update(os.path.basename(sg_path))
+ continue
+
+ enclosure = Enclosure(enc_id, enc_path, sg_path)
+ enclosures[enc_id] = enclosure
+
+ return enclosures
+
+ @property
+ def enclosures(self) -> Dict[str, Dict[str, Any]]:
+ """Dump the enclosure objects as dicts"""
+ return {k: v._dump() for k, v in self._enclosures.items()}
+
+ @property
+ def enclosure_count(self) -> int:
+ """Return the number of enclosures detected"""
+ return len(self._enclosures.keys())
+
+ def _get_cpuinfo(self):
+ # type: () -> None
+ """Determine cpu information via /proc/cpuinfo"""
+ raw = read_file(['/proc/cpuinfo'])
+ output = raw.splitlines()
+ cpu_set = set()
+
+ for line in output:
+ field = [f.strip() for f in line.split(':')]
+ if 'model name' in line:
+ self.cpu_model = field[1]
+ if 'physical id' in line:
+ cpu_set.add(field[1])
+ if 'siblings' in line:
+ self.cpu_threads = int(field[1].strip())
+ if 'cpu cores' in line:
+ self.cpu_cores = int(field[1].strip())
+ pass
+ self.cpu_count = len(cpu_set)
+
+ def _get_block_devs(self):
+ # type: () -> List[str]
+ """Determine the list of block devices by looking at /sys/block"""
+ return [dev for dev in os.listdir('/sys/block')
+ if not dev.startswith(HostFacts._excluded_block_devices)]
+
+ @property
+ def operating_system(self):
+ # type: () -> str
+ """Determine OS version"""
+ raw_info = read_file(['/etc/os-release'])
+ os_release = raw_info.splitlines()
+ rel_str = 'Unknown'
+ rel_dict = dict()
+
+ for line in os_release:
+ if '=' in line:
+ var_name, var_value = line.split('=')
+ rel_dict[var_name] = var_value.strip('"')
+
+ # Would normally use PRETTY_NAME, but NAME and VERSION are more
+ # consistent
+ if all(_v in rel_dict for _v in ['NAME', 'VERSION']):
+ rel_str = '{} {}'.format(rel_dict['NAME'], rel_dict['VERSION'])
+ return rel_str
+
+ @property
+ def hostname(self):
+ # type: () -> str
+ """Return the hostname"""
+ return platform.node()
+
+ @property
+ def shortname(self) -> str:
+ return platform.node().split('.', 1)[0]
+
+ @property
+ def fqdn(self) -> str:
+ return get_fqdn()
+
+ @property
+ def subscribed(self):
+ # type: () -> str
+ """Highlevel check to see if the host is subscribed to receive updates/support"""
+ def _red_hat():
+ # type: () -> str
+ # RHEL 7 and RHEL 8
+ entitlements_dir = '/etc/pki/entitlement'
+ if os.path.exists(entitlements_dir):
+ pems = glob('{}/*.pem'.format(entitlements_dir))
+ if len(pems) >= 2:
+ return 'Yes'
+
+ return 'No'
+
+ os_name = self.operating_system
+ if os_name.upper().startswith('RED HAT'):
+ return _red_hat()
+
+ return 'Unknown'
+
+ @property
+ def hdd_count(self):
+ # type: () -> int
+ """Return a count of HDDs (spinners)"""
+ return len(self.hdd_list)
+
+ def _get_capacity(self, dev):
+ # type: (str) -> int
+ """Determine the size of a given device
+
+ The kernel always bases device size calculations based on a 512 byte
+ sector. For more information see
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/include/linux/types.h?h=v5.15.63#n120
+ """
+ size_path = os.path.join('/sys/block', dev, 'size')
+ size_blocks = int(read_file([size_path]))
+ return size_blocks * 512
+
+ def _get_capacity_by_type(self, disk_type='hdd'):
+ # type: (str) -> int
+ """Return the total capacity of a category of device (flash or hdd)"""
+ capacity: int = 0
+ for dev in self._device_list:
+ if dev['disk_type'] == disk_type:
+ disk_capacity = cast(int, dev.get('disk_size_bytes', 0))
+ capacity += disk_capacity
+ return capacity
+
+ def _get_device_info(self):
+ # type: () -> List[Dict[str, object]]
+ """Return a 'pretty' name list for each unique device in the `dev_list`"""
+ disk_list = list()
+
+ # serial_num_lookup is a dict of serial number -> List of devices with that serial number
+ serial_num_lookup: Dict[str, List[str]] = {}
+
+ # make a map of devname -> disk path. this path name may indicate the physical slot
+ # of a drive (phyXX)
+ disk_path_map: Dict[str, str] = {}
+ for path in glob('/dev/disk/by-path/*'):
+ tgt_raw = Path(path).resolve()
+ tgt = os.path.basename(str(tgt_raw))
+ disk_path_map[tgt] = path
+
+ # make a map of holder (dm-XX) -> full mpath name
+ dm_device_map: Dict[str, str] = {}
+ for mpath in glob('/dev/mapper/mpath*'):
+ tgt_raw = Path(mpath).resolve()
+ tgt = os.path.basename(str(tgt_raw))
+ dm_device_map[tgt] = mpath
+
+ # main loop to process all eligible block devices
+ for dev in self._block_devices:
+ enclosure_id = ''
+ enclosure_slot = ''
+ scsi_addr = ''
+ mpath = ''
+
+ disk_model = read_file(['/sys/block/{}/device/model'.format(dev)]).strip()
+ disk_rev = read_file(['/sys/block/{}/device/rev'.format(dev)]).strip()
+ disk_wwid = read_file(['/sys/block/{}/device/wwid'.format(dev)]).strip()
+ vendor = read_file(['/sys/block/{}/device/vendor'.format(dev)]).strip()
+ rotational = read_file(['/sys/block/{}/queue/rotational'.format(dev)])
+ holders_raw = glob('/sys/block/{}/holders/*'.format(dev))
+ if len(holders_raw) == 1:
+ # mpath will have 1 holder entry
+ holder = os.path.basename(holders_raw[0])
+ mpath = dm_device_map.get(holder, '')
+
+ disk_type = 'hdd' if rotational == '1' else 'flash'
+ scsi_addr_path = glob('/sys/block/{}/device/bsg/*'.format(dev))
+ if len(scsi_addr_path) == 1:
+ scsi_addr = os.path.basename(scsi_addr_path[0])
+
+ # vpd_pg80 isn't guaranteed (libvirt, vmware for example)
+ serial_raw = read_file(['/sys/block/{}/device/vpd_pg80'.format(dev)])
+ serial = (''.join(i for i in serial_raw if i in string.printable)).strip()
+ if serial.lower() == 'unknown':
+ serial = ''
+ else:
+ if serial in serial_num_lookup:
+ serial_num_lookup[serial].append(dev)
+ else:
+ serial_num_lookup[serial] = [dev]
+ for enc_id, enclosure in self._enclosures.items():
+ if serial in enclosure.device_lookup.keys():
+ enclosure_id = enc_id
+ enclosure_slot = enclosure.device_lookup[serial]
+
+ disk_vendor = HostFacts._disk_vendor_workarounds.get(vendor, vendor)
+ disk_size_bytes = self._get_capacity(dev)
+ disk_list.append({
+ 'description': '{} {} ({})'.format(disk_vendor, disk_model, bytes_to_human(disk_size_bytes)),
+ 'vendor': disk_vendor,
+ 'model': disk_model,
+ 'rev': disk_rev,
+ 'wwid': disk_wwid,
+ 'dev_name': dev,
+ 'disk_size_bytes': disk_size_bytes,
+ 'disk_type': disk_type,
+ 'serial': serial,
+ 'alt_dev_name': '',
+ 'scsi_addr': scsi_addr,
+ 'enclosure_id': enclosure_id,
+ 'enclosure_slot': enclosure_slot,
+ 'path_id': disk_path_map.get(dev, ''),
+ 'mpath': mpath,
+ })
+
+ # process the devices to drop duplicate physical devs based on matching
+ # the unique serial number
+ disk_list_unique: List[Dict[str, Any]] = []
+ serials_seen: List[str] = []
+ for dev in disk_list:
+ serial = str(dev['serial'])
+ if serial:
+ if serial in serials_seen:
+ continue
+ else:
+ serials_seen.append(serial)
+ devs = serial_num_lookup[serial].copy()
+ devs.remove(str(dev['dev_name']))
+ dev['alt_dev_name'] = ','.join(devs)
+ disk_list_unique.append(dev)
+
+ return disk_list_unique
+
+ @property
+ def hdd_list(self):
+ # type: () -> List[Dict[str, object]]
+ """Return a list of devices that are HDDs (spinners)"""
+ return [dev for dev in self._device_list if dev['disk_type'] == 'hdd']
+
+ @property
+ def flash_list(self):
+ # type: () -> List[Dict[str, object]]
+ """Return a list of devices that are flash based (SSD, NVMe)"""
+ return [dev for dev in self._device_list if dev['disk_type'] == 'flash']
+
+ @property
+ def hdd_capacity_bytes(self):
+ # type: () -> int
+ """Return the total capacity for all HDD devices (bytes)"""
+ return self._get_capacity_by_type(disk_type='hdd')
+
+ @property
+ def hdd_capacity(self):
+ # type: () -> str
+ """Return the total capacity for all HDD devices (human readable format)"""
+ return bytes_to_human(self.hdd_capacity_bytes)
+
+ @property
+ def cpu_load(self):
+ # type: () -> Dict[str, float]
+ """Return the cpu load average data for the host"""
+ raw = read_file(['/proc/loadavg']).strip()
+ data = raw.split()
+ return {
+ '1min': float(data[0]),
+ '5min': float(data[1]),
+ '15min': float(data[2]),
+ }
+
+ @property
+ def flash_count(self):
+ # type: () -> int
+ """Return the number of flash devices in the system (SSD, NVMe)"""
+ return len(self.flash_list)
+
+ @property
+ def flash_capacity_bytes(self):
+ # type: () -> int
+ """Return the total capacity for all flash devices (bytes)"""
+ return self._get_capacity_by_type(disk_type='flash')
+
+ @property
+ def flash_capacity(self):
+ # type: () -> str
+ """Return the total capacity for all Flash devices (human readable format)"""
+ return bytes_to_human(self.flash_capacity_bytes)
+
+ def _process_nics(self):
+ # type: () -> None
+ """Look at the NIC devices and extract network related metadata"""
+ # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
+ hw_lookup = {
+ '1': 'ethernet',
+ '32': 'infiniband',
+ '772': 'loopback',
+ }
+
+ for nic_path in HostFacts._nic_path_list:
+ if not os.path.exists(nic_path):
+ continue
+ for iface in os.listdir(nic_path):
+
+ if os.path.exists(os.path.join(nic_path, iface, 'bridge')):
+ nic_type = 'bridge'
+ elif os.path.exists(os.path.join(nic_path, iface, 'bonding')):
+ nic_type = 'bonding'
+ else:
+ nic_type = hw_lookup.get(read_file([os.path.join(nic_path, iface, 'type')]), 'Unknown')
+
+ if nic_type == 'loopback': # skip loopback devices
+ continue
+
+ lower_devs_list = [os.path.basename(link.replace('lower_', '')) for link in glob(os.path.join(nic_path, iface, 'lower_*'))]
+ upper_devs_list = [os.path.basename(link.replace('upper_', '')) for link in glob(os.path.join(nic_path, iface, 'upper_*'))]
+
+ try:
+ mtu = int(read_file([os.path.join(nic_path, iface, 'mtu')]))
+ except ValueError:
+ mtu = 0
+
+ operstate = read_file([os.path.join(nic_path, iface, 'operstate')])
+ try:
+ speed = int(read_file([os.path.join(nic_path, iface, 'speed')]))
+ except (OSError, ValueError):
+ # OSError : device doesn't support the ethtool get_link_ksettings
+ # ValueError : raised when the read fails, and returns Unknown
+ #
+ # Either way, we show a -1 when speed isn't available
+ speed = -1
+
+ dev_link = os.path.join(nic_path, iface, 'device')
+ if os.path.exists(dev_link):
+ iftype = 'physical'
+ driver_path = os.path.join(dev_link, 'driver')
+ if os.path.exists(driver_path):
+ driver = os.path.basename(os.path.realpath(driver_path))
+ else:
+ driver = 'Unknown'
+
+ else:
+ iftype = 'logical'
+ driver = ''
+
+ self.interfaces[iface] = {
+ 'mtu': mtu,
+ 'upper_devs_list': upper_devs_list,
+ 'lower_devs_list': lower_devs_list,
+ 'operstate': operstate,
+ 'iftype': iftype,
+ 'nic_type': nic_type,
+ 'driver': driver,
+ 'speed': speed,
+ 'ipv4_address': get_ipv4_address(iface),
+ 'ipv6_address': get_ipv6_address(iface),
+ }
+
+ @property
+ def nic_count(self):
+ # type: () -> int
+ """Return a total count of all physical NICs detected in the host"""
+ phys_devs = []
+ for iface in self.interfaces:
+ if self.interfaces[iface]['iftype'] == 'physical':
+ phys_devs.append(iface)
+ return len(phys_devs)
+
+ def _get_mem_data(self, field_name):
+ # type: (str) -> int
+ for line in self._meminfo:
+ if line.startswith(field_name):
+ _d = line.split()
+ return int(_d[1])
+ return 0
+
+ @property
+ def memory_total_kb(self):
+ # type: () -> int
+ """Determine the memory installed (kb)"""
+ return self._get_mem_data('MemTotal')
+
+ @property
+ def memory_free_kb(self):
+ # type: () -> int
+ """Determine the memory free (not cache, immediately usable)"""
+ return self._get_mem_data('MemFree')
+
+ @property
+ def memory_available_kb(self):
+ # type: () -> int
+ """Determine the memory available to new applications without swapping"""
+ return self._get_mem_data('MemAvailable')
+
+ @property
+ def vendor(self):
+ # type: () -> str
+ """Determine server vendor from DMI data in sysfs"""
+ return read_file(HostFacts._dmi_path_list, 'sys_vendor')
+
+ @property
+ def model(self):
+ # type: () -> str
+ """Determine server model information from DMI data in sysfs"""
+ family = read_file(HostFacts._dmi_path_list, 'product_family')
+ product = read_file(HostFacts._dmi_path_list, 'product_name')
+ if family == 'Unknown' and product:
+ return '{}'.format(product)
+
+ return '{} ({})'.format(family, product)
+
+ @property
+ def bios_version(self):
+ # type: () -> str
+ """Determine server BIOS version from DMI data in sysfs"""
+ return read_file(HostFacts._dmi_path_list, 'bios_version')
+
+ @property
+ def bios_date(self):
+ # type: () -> str
+ """Determine server BIOS date from DMI data in sysfs"""
+ return read_file(HostFacts._dmi_path_list, 'bios_date')
+
+ @property
+ def chassis_serial(self):
+ # type: () -> str
+ """Determine chassis serial number from DMI data in sysfs"""
+ return read_file(HostFacts._dmi_path_list, 'chassis_serial')
+
+ @property
+ def board_serial(self):
+ # type: () -> str
+ """Determine mainboard serial number from DMI data in sysfs"""
+ return read_file(HostFacts._dmi_path_list, 'board_serial')
+
+ @property
+ def product_serial(self):
+ # type: () -> str
+ """Determine server's serial number from DMI data in sysfs"""
+ return read_file(HostFacts._dmi_path_list, 'product_serial')
+
+ @property
+ def timestamp(self):
+ # type: () -> float
+ """Return the current time as Epoch seconds"""
+ return time.time()
+
+ @property
+ def system_uptime(self):
+ # type: () -> float
+ """Return the system uptime (in secs)"""
+ raw_time = read_file(['/proc/uptime'])
+ up_secs, _ = raw_time.split()
+ return float(up_secs)
+
+ @property
+ def kernel_security(self):
+ # type: () -> Dict[str, str]
+ """Determine the security features enabled in the kernel - SELinux, AppArmor"""
+ def _fetch_selinux() -> Dict[str, str]:
+ """Get the selinux status"""
+ security = {}
+ try:
+ out, err, code = call(self.ctx, ['sestatus'],
+ verbosity=CallVerbosity.QUIET)
+ security['type'] = 'SELinux'
+ status, mode, policy = '', '', ''
+ for line in out.split('\n'):
+ if line.startswith('SELinux status:'):
+ k, v = line.split(':')
+ status = v.strip()
+ elif line.startswith('Current mode:'):
+ k, v = line.split(':')
+ mode = v.strip()
+ elif line.startswith('Loaded policy name:'):
+ k, v = line.split(':')
+ policy = v.strip()
+ if status == 'disabled':
+ security['description'] = 'SELinux: Disabled'
+ else:
+ security['description'] = 'SELinux: Enabled({}, {})'.format(mode, policy)
+ except Exception as e:
+ logger.info('unable to get selinux status: %s' % e)
+ return security
+
+ def _fetch_apparmor() -> Dict[str, str]:
+ """Read the apparmor profiles directly, returning an overview of AppArmor status"""
+ security = {}
+ for apparmor_path in HostFacts._apparmor_path_list:
+ if os.path.exists(apparmor_path):
+ security['type'] = 'AppArmor'
+ security['description'] = 'AppArmor: Enabled'
+ try:
+ profiles = read_file(['/sys/kernel/security/apparmor/profiles'])
+ if len(profiles) == 0:
+ return {}
+ except OSError:
+ pass
+ else:
+ summary = {} # type: Dict[str, int]
+ for line in profiles.split('\n'):
+ item, mode = line.split(' ')
+ mode = mode.strip('()')
+ if mode in summary:
+ summary[mode] += 1
+ else:
+ summary[mode] = 0
+ summary_str = ','.join(['{} {}'.format(v, k) for k, v in summary.items()])
+ security = {**security, **summary} # type: ignore
+ security['description'] += '({})'.format(summary_str)
+
+ return security
+ return {}
+
+ ret = {}
+ if os.path.exists('/sys/kernel/security/lsm'):
+ lsm = read_file(['/sys/kernel/security/lsm']).strip()
+ if 'selinux' in lsm:
+ ret = _fetch_selinux()
+ elif 'apparmor' in lsm:
+ ret = _fetch_apparmor()
+ else:
+ return {
+ 'type': 'Unknown',
+ 'description': 'Linux Security Module framework is active, but is not using SELinux or AppArmor'
+ }
+
+ if ret:
+ return ret
+
+ return {
+ 'type': 'None',
+ 'description': 'Linux Security Module framework is not available'
+ }
+
+ @property
+ def selinux_enabled(self) -> bool:
+ return (self.kernel_security['type'] == 'SELinux') and \
+ (self.kernel_security['description'] != 'SELinux: Disabled')
+
+ @property
+ def kernel_parameters(self):
+ # type: () -> Dict[str, str]
+ """Get kernel parameters required/used in Ceph clusters"""
+
+ k_param = {}
+ out, _, _ = call_throws(self.ctx, ['sysctl', '-a'], verbosity=CallVerbosity.SILENT)
+ if out:
+ param_list = out.split('\n')
+ param_dict = {param.split(' = ')[0]: param.split(' = ')[-1] for param in param_list}
+
+ # return only desired parameters
+ if 'net.ipv4.ip_nonlocal_bind' in param_dict:
+ k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind']
+
+ return k_param
+
+ @staticmethod
+ def _process_net_data(tcp_file: str, protocol: str = 'tcp') -> List[int]:
+ listening_ports = []
+ # Connections state documentation
+ # tcp - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/net/tcp_states.h
+ # udp - uses 07 (TCP_CLOSE or UNCONN, since udp is stateless. test with netcat -ul <port>)
+ listening_state = {
+ 'tcp': '0A',
+ 'udp': '07'
+ }
+
+ if protocol not in listening_state.keys():
+ return []
+
+ if os.path.exists(tcp_file):
+ with open(tcp_file) as f:
+ tcp_data = f.readlines()[1:]
+
+ for con in tcp_data:
+ con_info = con.strip().split()
+ if con_info[3] == listening_state[protocol]:
+ local_port = int(con_info[1].split(':')[1], 16)
+ listening_ports.append(local_port)
+
+ return listening_ports
+
+ @property
+ def tcp_ports_used(self) -> List[int]:
+ return HostFacts._process_net_data('/proc/net/tcp')
+
+ @property
+ def tcp6_ports_used(self) -> List[int]:
+ return HostFacts._process_net_data('/proc/net/tcp6')
+
+ @property
+ def udp_ports_used(self) -> List[int]:
+ return HostFacts._process_net_data('/proc/net/udp', 'udp')
+
+ @property
+ def udp6_ports_used(self) -> List[int]:
+ return HostFacts._process_net_data('/proc/net/udp6', 'udp')
+
+ def dump(self):
+ # type: () -> str
+ """Return the attributes of this HostFacts object as json"""
+ data = {
+ k: getattr(self, k) for k in dir(self)
+ if not k.startswith('_')
+ and isinstance(getattr(self, k), (float, int, str, list, dict, tuple))
+ }
+ return json.dumps(data, indent=2, sort_keys=True)
+
+##################################
+
+
+def command_gather_facts(ctx: CephadmContext) -> None:
+ """gather_facts is intended to provide host related metadata to the caller"""
+ host = HostFacts(ctx)
+ print(host.dump())
+
+
+##################################
+
+
+def systemd_target_state(ctx: CephadmContext, target_name: str, subsystem: str = 'ceph') -> bool:
+ # TODO: UNITTEST
+ return os.path.exists(
+ os.path.join(
+ ctx.unit_dir,
+ f'{subsystem}.target.wants',
+ target_name
+ )
+ )
+
+
+def target_exists(ctx: CephadmContext) -> bool:
+ return os.path.exists(ctx.unit_dir + '/ceph.target')
+
+
+@infer_fsid
+def command_maintenance(ctx: CephadmContext) -> str:
+ if not ctx.fsid:
+ raise Error('failed - must pass --fsid to specify cluster')
+
+ target = f'ceph-{ctx.fsid}.target'
+
+ if ctx.maintenance_action.lower() == 'enter':
+ logger.info('Requested to place host into maintenance')
+ if systemd_target_state(ctx, target):
+ _out, _err, code = call(ctx,
+ ['systemctl', 'disable', target],
+ verbosity=CallVerbosity.DEBUG)
+ if code:
+ logger.error(f'Failed to disable the {target} target')
+ return 'failed - to disable the target'
+ else:
+ # stopping a target waits by default
+ _out, _err, code = call(ctx,
+ ['systemctl', 'stop', target],
+ verbosity=CallVerbosity.DEBUG)
+ if code:
+ logger.error(f'Failed to stop the {target} target')
+ return 'failed - to disable the target'
+ else:
+ return f'success - systemd target {target} disabled'
+
+ else:
+ return 'skipped - target already disabled'
+
+ else:
+ logger.info('Requested to exit maintenance state')
+ # if we've never deployed a daemon on this host there will be no systemd
+ # target to disable so attempting a disable will fail. We still need to
+ # return success here or host will be permanently stuck in maintenance mode
+ # as no daemons can be deployed so no systemd target will ever exist to disable.
+ if not target_exists(ctx):
+ return 'skipped - systemd target not present on this host. Host removed from maintenance mode.'
+ # exit maintenance request
+ if not systemd_target_state(ctx, target):
+ _out, _err, code = call(ctx,
+ ['systemctl', 'enable', target],
+ verbosity=CallVerbosity.DEBUG)
+ if code:
+ logger.error(f'Failed to enable the {target} target')
+ return 'failed - unable to enable the target'
+ else:
+ # starting a target waits by default
+ _out, _err, code = call(ctx,
+ ['systemctl', 'start', target],
+ verbosity=CallVerbosity.DEBUG)
+ if code:
+ logger.error(f'Failed to start the {target} target')
+ return 'failed - unable to start the target'
+ else:
+ return f'success - systemd target {target} enabled and started'
+ return f'success - systemd target {target} enabled and started'
+
+##################################
+
+
+class ArgumentFacade:
+ def __init__(self) -> None:
+ self.defaults: Dict[str, Any] = {}
+
+ def add_argument(self, *args: Any, **kwargs: Any) -> None:
+ if not args:
+ raise ValueError('expected at least one argument')
+ name = args[0]
+ if not name.startswith('--'):
+ raise ValueError(f'expected long option, got: {name!r}')
+ name = name[2:].replace('-', '_')
+ value = kwargs.pop('default', None)
+ self.defaults[name] = value
+
+ def apply(self, ctx: CephadmContext) -> None:
+ for key, value in self.defaults.items():
+ setattr(ctx, key, value)
+
+
+def _add_deploy_parser_args(
+ parser_deploy: Union[argparse.ArgumentParser, ArgumentFacade],
+) -> None:
+ parser_deploy.add_argument(
+ '--config', '-c',
+ help='config file for new daemon')
+ parser_deploy.add_argument(
+ '--config-json',
+ help='Additional configuration information in JSON format')
+ parser_deploy.add_argument(
+ '--keyring',
+ help='keyring for new daemon')
+ parser_deploy.add_argument(
+ '--key',
+ help='key for new daemon')
+ parser_deploy.add_argument(
+ '--osd-fsid',
+ help='OSD uuid, if creating an OSD container')
+ parser_deploy.add_argument(
+ '--skip-firewalld',
+ action='store_true',
+ help='Do not configure firewalld')
+ parser_deploy.add_argument(
+ '--tcp-ports',
+ help='List of tcp ports to open in the host firewall')
+ parser_deploy.add_argument(
+ '--port-ips',
+ help='JSON dict mapping ports to IPs they need to be bound on'
+ )
+ parser_deploy.add_argument(
+ '--reconfig',
+ action='store_true',
+ help='Reconfigure a previously deployed daemon')
+ parser_deploy.add_argument(
+ '--allow-ptrace',
+ action='store_true',
+ help='Allow SYS_PTRACE on daemon container')
+ parser_deploy.add_argument(
+ '--container-init',
+ action='store_true',
+ default=CONTAINER_INIT,
+ help=argparse.SUPPRESS)
+ parser_deploy.add_argument(
+ '--memory-request',
+ help='Container memory request/target'
+ )
+ parser_deploy.add_argument(
+ '--memory-limit',
+ help='Container memory hard limit'
+ )
+ parser_deploy.add_argument(
+ '--meta-json',
+ help='JSON dict of additional metadata'
+ )
+ parser_deploy.add_argument(
+ '--extra-container-args',
+ action='append',
+ default=[],
+ help='Additional container arguments to apply to daemon'
+ )
+ parser_deploy.add_argument(
+ '--extra-entrypoint-args',
+ action='append',
+ default=[],
+ help='Additional entrypoint arguments to apply to deamon'
+ )
+
+
+def _get_parser():
+ # type: () -> argparse.ArgumentParser
+ parser = argparse.ArgumentParser(
+ description='Bootstrap Ceph daemons with systemd and containers.',
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument(
+ '--image',
+ help='container image. Can also be set via the "CEPHADM_IMAGE" '
+ 'env var')
+ parser.add_argument(
+ '--docker',
+ action='store_true',
+ help='use docker instead of podman')
+ parser.add_argument(
+ '--data-dir',
+ default=DATA_DIR,
+ help='base directory for daemon data')
+ parser.add_argument(
+ '--log-dir',
+ default=LOG_DIR,
+ help='base directory for daemon logs')
+ parser.add_argument(
+ '--logrotate-dir',
+ default=LOGROTATE_DIR,
+ help='location of logrotate configuration files')
+ parser.add_argument(
+ '--sysctl-dir',
+ default=SYSCTL_DIR,
+ help='location of sysctl configuration files')
+ parser.add_argument(
+ '--unit-dir',
+ default=UNIT_DIR,
+ help='base directory for systemd units')
+ parser.add_argument(
+ '--verbose', '-v',
+ action='store_true',
+ help='Show debug-level log messages')
+ parser.add_argument(
+ '--timeout',
+ type=int,
+ default=DEFAULT_TIMEOUT,
+ help='timeout in seconds')
+ parser.add_argument(
+ '--retry',
+ type=int,
+ default=DEFAULT_RETRY,
+ help='max number of retries')
+ parser.add_argument(
+ '--env', '-e',
+ action='append',
+ default=[],
+ help='set environment variable')
+ parser.add_argument(
+ '--no-container-init',
+ action='store_true',
+ default=not CONTAINER_INIT,
+ help='Do not run podman/docker with `--init`')
+ parser.add_argument(
+ '--no-cgroups-split',
+ action='store_true',
+ default=False,
+ help='Do not run containers with --cgroups=split (currently only relevant when using podman)')
+
+ subparsers = parser.add_subparsers(help='sub-command')
+
+ parser_version = subparsers.add_parser(
+ 'version', help='get cephadm version')
+ parser_version.set_defaults(func=command_version)
+
+ parser_pull = subparsers.add_parser(
+ 'pull', help='pull the default container image')
+ parser_pull.set_defaults(func=command_pull)
+ parser_pull.add_argument(
+ '--insecure',
+ action='store_true',
+ help=argparse.SUPPRESS,
+ )
+
+ parser_inspect_image = subparsers.add_parser(
+ 'inspect-image', help='inspect local container image')
+ parser_inspect_image.set_defaults(func=command_inspect_image)
+
+ parser_ls = subparsers.add_parser(
+ 'ls', help='list daemon instances on this host')
+ parser_ls.set_defaults(func=command_ls)
+ parser_ls.add_argument(
+ '--no-detail',
+ action='store_true',
+ help='Do not include daemon status')
+ parser_ls.add_argument(
+ '--legacy-dir',
+ default='/',
+ help='base directory for legacy daemon data')
+
+ parser_list_networks = subparsers.add_parser(
+ 'list-networks', help='list IP networks')
+ parser_list_networks.set_defaults(func=command_list_networks)
+
+ parser_adopt = subparsers.add_parser(
+ 'adopt', help='adopt daemon deployed with a different tool')
+ parser_adopt.set_defaults(func=command_adopt)
+ parser_adopt.add_argument(
+ '--name', '-n',
+ required=True,
+ help='daemon name (type.id)')
+ parser_adopt.add_argument(
+ '--style',
+ required=True,
+ help='deployment style (legacy, ...)')
+ parser_adopt.add_argument(
+ '--cluster',
+ default='ceph',
+ help='cluster name')
+ parser_adopt.add_argument(
+ '--legacy-dir',
+ default='/',
+ help='base directory for legacy daemon data')
+ parser_adopt.add_argument(
+ '--config-json',
+ help='Additional configuration information in JSON format')
+ parser_adopt.add_argument(
+ '--skip-firewalld',
+ action='store_true',
+ help='Do not configure firewalld')
+ parser_adopt.add_argument(
+ '--skip-pull',
+ action='store_true',
+ help='do not pull the default image before adopting')
+ parser_adopt.add_argument(
+ '--force-start',
+ action='store_true',
+ help='start newly adopted daemon, even if it was not running previously')
+ parser_adopt.add_argument(
+ '--container-init',
+ action='store_true',
+ default=CONTAINER_INIT,
+ help=argparse.SUPPRESS)
+
+ parser_rm_daemon = subparsers.add_parser(
+ 'rm-daemon', help='remove daemon instance')
+ parser_rm_daemon.set_defaults(func=command_rm_daemon)
+ parser_rm_daemon.add_argument(
+ '--name', '-n',
+ required=True,
+ action=CustomValidation,
+ help='daemon name (type.id)')
+ parser_rm_daemon.add_argument(
+ '--tcp-ports',
+ help='List of tcp ports to close in the host firewall')
+ parser_rm_daemon.add_argument(
+ '--fsid',
+ required=True,
+ help='cluster FSID')
+ parser_rm_daemon.add_argument(
+ '--force',
+ action='store_true',
+ help='proceed, even though this may destroy valuable data')
+ parser_rm_daemon.add_argument(
+ '--force-delete-data',
+ action='store_true',
+ help='delete valuable daemon data instead of making a backup')
+
+ parser_rm_cluster = subparsers.add_parser(
+ 'rm-cluster', help='remove all daemons for a cluster')
+ parser_rm_cluster.set_defaults(func=command_rm_cluster)
+ parser_rm_cluster.add_argument(
+ '--fsid',
+ required=True,
+ help='cluster FSID')
+ parser_rm_cluster.add_argument(
+ '--force',
+ action='store_true',
+ help='proceed, even though this may destroy valuable data')
+ parser_rm_cluster.add_argument(
+ '--keep-logs',
+ action='store_true',
+ help='do not remove log files')
+ parser_rm_cluster.add_argument(
+ '--zap-osds',
+ action='store_true',
+ help='zap OSD devices for this cluster')
+
+ parser_run = subparsers.add_parser(
+ 'run', help='run a ceph daemon, in a container, in the foreground')
+ parser_run.set_defaults(func=command_run)
+ parser_run.add_argument(
+ '--name', '-n',
+ required=True,
+ help='daemon name (type.id)')
+ parser_run.add_argument(
+ '--fsid',
+ required=True,
+ help='cluster FSID')
+
+ parser_shell = subparsers.add_parser(
+ 'shell', help='run an interactive shell inside a daemon container')
+ parser_shell.set_defaults(func=command_shell)
+ parser_shell.add_argument(
+ '--shared_ceph_folder',
+ metavar='CEPH_SOURCE_FOLDER',
+ help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
+ parser_shell.add_argument(
+ '--fsid',
+ help='cluster FSID')
+ parser_shell.add_argument(
+ '--name', '-n',
+ help='daemon name (type.id)')
+ parser_shell.add_argument(
+ '--config', '-c',
+ help='ceph.conf to pass through to the container')
+ parser_shell.add_argument(
+ '--keyring', '-k',
+ help='ceph.keyring to pass through to the container')
+ parser_shell.add_argument(
+ '--mount', '-m',
+ help=('mount a file or directory in the container. '
+ 'Support multiple mounts. '
+ 'ie: `--mount /foo /bar:/bar`. '
+ 'When no destination is passed, default is /mnt'),
+ nargs='+')
+ parser_shell.add_argument(
+ '--env', '-e',
+ action='append',
+ default=[],
+ help='set environment variable')
+ parser_shell.add_argument(
+ '--volume', '-v',
+ action='append',
+ default=[],
+ help='set environment variable')
+ parser_shell.add_argument(
+ 'command', nargs=argparse.REMAINDER,
+ help='command (optional)')
+ parser_shell.add_argument(
+ '--no-hosts',
+ action='store_true',
+ help='dont pass /etc/hosts through to the container')
+ parser_shell.add_argument(
+ '--dry-run',
+ action='store_true',
+ help='print, but do not execute, the container command to start the shell')
+
+ parser_enter = subparsers.add_parser(
+ 'enter', help='run an interactive shell inside a running daemon container')
+ parser_enter.set_defaults(func=command_enter)
+ parser_enter.add_argument(
+ '--fsid',
+ help='cluster FSID')
+ parser_enter.add_argument(
+ '--name', '-n',
+ required=True,
+ help='daemon name (type.id)')
+ parser_enter.add_argument(
+ 'command', nargs=argparse.REMAINDER,
+ help='command')
+
+ parser_ceph_volume = subparsers.add_parser(
+ 'ceph-volume', help='run ceph-volume inside a container')
+ parser_ceph_volume.set_defaults(func=command_ceph_volume)
+ parser_ceph_volume.add_argument(
+ '--shared_ceph_folder',
+ metavar='CEPH_SOURCE_FOLDER',
+ help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
+ parser_ceph_volume.add_argument(
+ '--fsid',
+ help='cluster FSID')
+ parser_ceph_volume.add_argument(
+ '--config-json',
+ help='JSON file with config and (client.bootstrap-osd) key')
+ parser_ceph_volume.add_argument(
+ '--config', '-c',
+ help='ceph conf file')
+ parser_ceph_volume.add_argument(
+ '--keyring', '-k',
+ help='ceph.keyring to pass through to the container')
+ parser_ceph_volume.add_argument(
+ 'command', nargs=argparse.REMAINDER,
+ help='command')
+
+ parser_zap_osds = subparsers.add_parser(
+ 'zap-osds', help='zap all OSDs associated with a particular fsid')
+ parser_zap_osds.set_defaults(func=command_zap_osds)
+ parser_zap_osds.add_argument(
+ '--fsid',
+ required=True,
+ help='cluster FSID')
+ parser_zap_osds.add_argument(
+ '--force',
+ action='store_true',
+ help='proceed, even though this may destroy valuable data')
+
+ parser_unit = subparsers.add_parser(
+ 'unit', help="operate on the daemon's systemd unit")
+ parser_unit.set_defaults(func=command_unit)
+ parser_unit.add_argument(
+ 'command',
+ help='systemd command (start, stop, restart, enable, disable, ...)')
+ parser_unit.add_argument(
+ '--fsid',
+ help='cluster FSID')
+ parser_unit.add_argument(
+ '--name', '-n',
+ required=True,
+ help='daemon name (type.id)')
+
+ parser_logs = subparsers.add_parser(
+ 'logs', help='print journald logs for a daemon container')
+ parser_logs.set_defaults(func=command_logs)
+ parser_logs.add_argument(
+ '--fsid',
+ help='cluster FSID')
+ parser_logs.add_argument(
+ '--name', '-n',
+ required=True,
+ help='daemon name (type.id)')
+ parser_logs.add_argument(
+ 'command', nargs='*',
+ help='additional journalctl args')
+
+ parser_bootstrap = subparsers.add_parser(
+ 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
+ parser_bootstrap.set_defaults(func=command_bootstrap)
+ parser_bootstrap.add_argument(
+ '--config', '-c',
+ help='ceph conf file to incorporate')
+ parser_bootstrap.add_argument(
+ '--mon-id',
+ required=False,
+ help='mon id (default: local hostname)')
+ group = parser_bootstrap.add_mutually_exclusive_group()
+ group.add_argument(
+ '--mon-addrv',
+ help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
+ group.add_argument(
+ '--mon-ip',
+ help='mon IP')
+ parser_bootstrap.add_argument(
+ '--mgr-id',
+ required=False,
+ help='mgr id (default: randomly generated)')
+ parser_bootstrap.add_argument(
+ '--fsid',
+ help='cluster FSID')
+ parser_bootstrap.add_argument(
+ '--output-dir',
+ default='/etc/ceph',
+ help='directory to write config, keyring, and pub key files')
+ parser_bootstrap.add_argument(
+ '--output-keyring',
+ help='location to write keyring file with new cluster admin and mon keys')
+ parser_bootstrap.add_argument(
+ '--output-config',
+ help='location to write conf file to connect to new cluster')
+ parser_bootstrap.add_argument(
+ '--output-pub-ssh-key',
+ help="location to write the cluster's public SSH key")
+ parser_bootstrap.add_argument(
+ '--skip-admin-label',
+ action='store_true',
+ help='do not create admin label for ceph.conf and client.admin keyring distribution')
+ parser_bootstrap.add_argument(
+ '--skip-ssh',
+ action='store_true',
+ help='skip setup of ssh key on local host')
+ parser_bootstrap.add_argument(
+ '--initial-dashboard-user',
+ default='admin',
+ help='Initial user for the dashboard')
+ parser_bootstrap.add_argument(
+ '--initial-dashboard-password',
+ help='Initial password for the initial dashboard user')
+ parser_bootstrap.add_argument(
+ '--ssl-dashboard-port',
+ type=int,
+ default=8443,
+ help='Port number used to connect with dashboard using SSL')
+ parser_bootstrap.add_argument(
+ '--dashboard-key',
+ type=argparse.FileType('r'),
+ help='Dashboard key')
+ parser_bootstrap.add_argument(
+ '--dashboard-crt',
+ type=argparse.FileType('r'),
+ help='Dashboard certificate')
+
+ parser_bootstrap.add_argument(
+ '--ssh-config',
+ type=argparse.FileType('r'),
+ help='SSH config')
+ parser_bootstrap.add_argument(
+ '--ssh-private-key',
+ type=argparse.FileType('r'),
+ help='SSH private key')
+ parser_bootstrap.add_argument(
+ '--ssh-public-key',
+ type=argparse.FileType('r'),
+ help='SSH public key')
+ parser_bootstrap.add_argument(
+ '--ssh-signed-cert',
+ type=argparse.FileType('r'),
+ help='Signed cert for setups using CA signed SSH keys')
+ parser_bootstrap.add_argument(
+ '--ssh-user',
+ default='root',
+ help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users')
+ parser_bootstrap.add_argument(
+ '--skip-mon-network',
+ action='store_true',
+ help='set mon public_network based on bootstrap mon ip')
+ parser_bootstrap.add_argument(
+ '--skip-dashboard',
+ action='store_true',
+ help='do not enable the Ceph Dashboard')
+ parser_bootstrap.add_argument(
+ '--dashboard-password-noupdate',
+ action='store_true',
+ help='stop forced dashboard password change')
+ parser_bootstrap.add_argument(
+ '--no-minimize-config',
+ action='store_true',
+ help='do not assimilate and minimize the config file')
+ parser_bootstrap.add_argument(
+ '--skip-ping-check',
+ action='store_true',
+ help='do not verify that mon IP is pingable')
+ parser_bootstrap.add_argument(
+ '--skip-pull',
+ action='store_true',
+ help='do not pull the default image before bootstrapping')
+ parser_bootstrap.add_argument(
+ '--skip-firewalld',
+ action='store_true',
+ help='Do not configure firewalld')
+ parser_bootstrap.add_argument(
+ '--allow-overwrite',
+ action='store_true',
+ help='allow overwrite of existing --output-* config/keyring/ssh files')
+ parser_bootstrap.add_argument(
+ '--cleanup-on-failure',
+ action='store_true',
+ default=False,
+ help='Delete cluster files in case of a failed installation')
+ parser_bootstrap.add_argument(
+ '--allow-fqdn-hostname',
+ action='store_true',
+ help='allow hostname that is fully-qualified (contains ".")')
+ parser_bootstrap.add_argument(
+ '--allow-mismatched-release',
+ action='store_true',
+ help="allow bootstrap of ceph that doesn't match this version of cephadm")
+ parser_bootstrap.add_argument(
+ '--skip-prepare-host',
+ action='store_true',
+ help='Do not prepare host')
+ parser_bootstrap.add_argument(
+ '--orphan-initial-daemons',
+ action='store_true',
+ help='Set mon and mgr service to `unmanaged`, Do not create the crash service')
+ parser_bootstrap.add_argument(
+ '--skip-monitoring-stack',
+ action='store_true',
+ help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
+ parser_bootstrap.add_argument(
+ '--with-centralized-logging',
+ action='store_true',
+ help='Automatically provision centralized logging (promtail, loki)')
+ parser_bootstrap.add_argument(
+ '--apply-spec',
+ help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
+ parser_bootstrap.add_argument(
+ '--shared_ceph_folder',
+ metavar='CEPH_SOURCE_FOLDER',
+ help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
+
+ parser_bootstrap.add_argument(
+ '--registry-url',
+ help='url for custom registry')
+ parser_bootstrap.add_argument(
+ '--registry-username',
+ help='username for custom registry')
+ parser_bootstrap.add_argument(
+ '--registry-password',
+ help='password for custom registry')
+ parser_bootstrap.add_argument(
+ '--registry-json',
+ help='json file with custom registry login info (URL, Username, Password)')
+ parser_bootstrap.add_argument(
+ '--container-init',
+ action='store_true',
+ default=CONTAINER_INIT,
+ help=argparse.SUPPRESS)
+ parser_bootstrap.add_argument(
+ '--cluster-network',
+ help='subnet to use for cluster replication, recovery and heartbeats (in CIDR notation network/mask)')
+ parser_bootstrap.add_argument(
+ '--single-host-defaults',
+ action='store_true',
+ help='adjust configuration defaults to suit a single-host cluster')
+ parser_bootstrap.add_argument(
+ '--log-to-file',
+ action='store_true',
+ help='configure cluster to log to traditional log files in /var/log/ceph/$fsid')
+
+ parser_deploy = subparsers.add_parser(
+ 'deploy', help='deploy a daemon')
+ parser_deploy.set_defaults(func=command_deploy)
+ parser_deploy.add_argument(
+ '--name',
+ required=True,
+ action=CustomValidation,
+ help='daemon name (type.id)')
+ parser_deploy.add_argument(
+ '--fsid',
+ required=True,
+ help='cluster FSID')
+ _add_deploy_parser_args(parser_deploy)
+
+ parser_orch = subparsers.add_parser(
+ '_orch',
+ )
+ subparsers_orch = parser_orch.add_subparsers(
+ title='Orchestrator Driven Commands',
+ description='Commands that are typically only run by cephadm mgr module',
+ )
+
+ parser_deploy_from = subparsers_orch.add_parser(
+ 'deploy', help='deploy a daemon')
+ parser_deploy_from.set_defaults(func=command_deploy_from)
+ # currently cephadm mgr module passes an fsid option on the CLI too
+ # TODO: remove this and always source fsid from the JSON?
+ parser_deploy_from.add_argument(
+ '--fsid',
+ help='cluster FSID')
+ parser_deploy_from.add_argument(
+ 'source',
+ default='-',
+ nargs='?',
+ help='Configuration input source file',
+ )
+
+ parser_check_host = subparsers.add_parser(
+ 'check-host', help='check host configuration')
+ parser_check_host.set_defaults(func=command_check_host)
+ parser_check_host.add_argument(
+ '--expect-hostname',
+ help='Check that hostname matches an expected value')
+
+ parser_prepare_host = subparsers.add_parser(
+ 'prepare-host', help='prepare a host for cephadm use')
+ parser_prepare_host.set_defaults(func=command_prepare_host)
+ parser_prepare_host.add_argument(
+ '--expect-hostname',
+ help='Set hostname')
+
+ parser_add_repo = subparsers.add_parser(
+ 'add-repo', help='configure package repository')
+ parser_add_repo.set_defaults(func=command_add_repo)
+ parser_add_repo.add_argument(
+ '--release',
+ help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE))
+ parser_add_repo.add_argument(
+ '--version',
+ help='use specific upstream version (x.y.z)')
+ parser_add_repo.add_argument(
+ '--dev',
+ help='use specified bleeding edge build from git branch or tag')
+ parser_add_repo.add_argument(
+ '--dev-commit',
+ help='use specified bleeding edge build from git commit')
+ parser_add_repo.add_argument(
+ '--gpg-url',
+ help='specify alternative GPG key location')
+ parser_add_repo.add_argument(
+ '--repo-url',
+ default='https://download.ceph.com',
+ help='specify alternative repo location')
+ # TODO: proxy?
+
+ parser_rm_repo = subparsers.add_parser(
+ 'rm-repo', help='remove package repository configuration')
+ parser_rm_repo.set_defaults(func=command_rm_repo)
+
+ parser_install = subparsers.add_parser(
+ 'install', help='install ceph package(s)')
+ parser_install.set_defaults(func=command_install)
+ parser_install.add_argument(
+ 'packages', nargs='*',
+ default=['cephadm'],
+ help='packages')
+
+ parser_registry_login = subparsers.add_parser(
+ 'registry-login', help='log host into authenticated registry')
+ parser_registry_login.set_defaults(func=command_registry_login)
+ parser_registry_login.add_argument(
+ '--registry-url',
+ help='url for custom registry')
+ parser_registry_login.add_argument(
+ '--registry-username',
+ help='username for custom registry')
+ parser_registry_login.add_argument(
+ '--registry-password',
+ help='password for custom registry')
+ parser_registry_login.add_argument(
+ '--registry-json',
+ help='json file with custom registry login info (URL, Username, Password)')
+ parser_registry_login.add_argument(
+ '--fsid',
+ help='cluster FSID')
+
+ parser_gather_facts = subparsers.add_parser(
+ 'gather-facts', help='gather and return host related information (JSON format)')
+ parser_gather_facts.set_defaults(func=command_gather_facts)
+
+ parser_maintenance = subparsers.add_parser(
+ 'host-maintenance', help='Manage the maintenance state of a host')
+ parser_maintenance.add_argument(
+ '--fsid',
+ help='cluster FSID')
+ parser_maintenance.add_argument(
+ 'maintenance_action',
+ type=str,
+ choices=['enter', 'exit'],
+ help='Maintenance action - enter maintenance, or exit maintenance')
+ parser_maintenance.set_defaults(func=command_maintenance)
+
+ parser_agent = subparsers.add_parser(
+ 'agent', help='start cephadm agent')
+ parser_agent.set_defaults(func=command_agent)
+ parser_agent.add_argument(
+ '--fsid',
+ required=True,
+ help='cluster FSID')
+ parser_agent.add_argument(
+ '--daemon-id',
+ help='daemon id for agent')
+
+ parser_disk_rescan = subparsers.add_parser(
+ 'disk-rescan', help='rescan all HBAs to detect new/removed devices')
+ parser_disk_rescan.set_defaults(func=command_rescan_disks)
+
+ return parser
+
+
+def _parse_args(av: List[str]) -> argparse.Namespace:
+ parser = _get_parser()
+
+ args = parser.parse_args(av)
+ if 'command' in args and args.command and args.command[0] == '--':
+ args.command.pop(0)
+
+ # workaround argparse to deprecate the subparser `--container-init` flag
+ # container_init and no_container_init must always be mutually exclusive
+ container_init_args = ('--container-init', '--no-container-init')
+ if set(container_init_args).issubset(av):
+ parser.error('argument %s: not allowed with argument %s' % (container_init_args))
+ elif '--container-init' in av:
+ args.no_container_init = not args.container_init
+ else:
+ args.container_init = not args.no_container_init
+ assert args.container_init is not args.no_container_init
+
+ return args
+
+
+def cephadm_init_ctx(args: List[str]) -> CephadmContext:
+ ctx = CephadmContext()
+ ctx.set_args(_parse_args(args))
+ return ctx
+
+
+def cephadm_init_logging(ctx: CephadmContext, args: List[str]) -> None:
+ """Configure the logging for cephadm as well as updating the system
+ to have the expected log dir and logrotate configuration.
+ """
+ logging.addLevelName(QUIET_LOG_LEVEL, 'QUIET')
+ global logger
+ if not os.path.exists(LOG_DIR):
+ os.makedirs(LOG_DIR)
+ operations = ['bootstrap', 'rm-cluster']
+ if any(op in args for op in operations):
+ dictConfig(interactive_logging_config)
+ else:
+ dictConfig(logging_config)
+
+ logger = logging.getLogger()
+ logger.setLevel(QUIET_LOG_LEVEL)
+
+ if not os.path.exists(ctx.logrotate_dir + '/cephadm'):
+ with open(ctx.logrotate_dir + '/cephadm', 'w') as f:
+ f.write("""# created by cephadm
+/var/log/ceph/cephadm.log {
+ rotate 7
+ daily
+ compress
+ missingok
+ notifempty
+ su root root
+}
+""")
+
+ if ctx.verbose:
+ for handler in logger.handlers:
+ if handler.name in ['console', 'log_file', 'console_stdout']:
+ handler.setLevel(QUIET_LOG_LEVEL)
+ logger.debug('%s\ncephadm %s' % ('-' * 80, args))
+
+
+def cephadm_require_root() -> None:
+ """Exit if the process is not running as root."""
+ if os.geteuid() != 0:
+ sys.stderr.write('ERROR: cephadm should be run as root\n')
+ sys.exit(1)
+
+
+def main() -> None:
+ av: List[str] = []
+ av = sys.argv[1:]
+
+ ctx = cephadm_init_ctx(av)
+ if not ctx.has_function():
+ sys.stderr.write('No command specified; pass -h or --help for usage\n')
+ sys.exit(1)
+
+ if ctx.has_function() and getattr(ctx.func, '_execute_early', False):
+ try:
+ sys.exit(ctx.func(ctx))
+ except Error as e:
+ if ctx.verbose:
+ raise
+ logger.error('ERROR: %s' % e)
+ sys.exit(1)
+
+ cephadm_require_root()
+ cephadm_init_logging(ctx, av)
+ try:
+ # podman or docker?
+ ctx.container_engine = find_container_engine(ctx)
+ if ctx.func not in \
+ [
+ command_check_host,
+ command_prepare_host,
+ command_add_repo,
+ command_rm_repo,
+ command_install
+ ]:
+ check_container_engine(ctx)
+ # command handler
+ r = ctx.func(ctx)
+ except (Error, ClusterAlreadyExists) as e:
+ if ctx.verbose:
+ raise
+ logger.error('ERROR: %s' % e)
+ sys.exit(1)
+ if not r:
+ r = 0
+ sys.exit(r)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/cephadm/containers/keepalived/Dockerfile b/src/cephadm/containers/keepalived/Dockerfile
new file mode 100644
index 000000000..ac305f72b
--- /dev/null
+++ b/src/cephadm/containers/keepalived/Dockerfile
@@ -0,0 +1,24 @@
+FROM registry.access.redhat.com/ubi8/ubi-minimal:latest
+
+RUN microdnf install --nodocs \
+ bash \
+ curl \
+ iproute \
+ keepalived-2.1.5 \
+ && rm /etc/keepalived/keepalived.conf && microdnf clean all
+
+COPY /skel /
+
+RUN chmod +x init.sh
+
+CMD ["./init.sh"]
+
+# Build specific labels
+LABEL maintainer="Guillaume Abrioux <gabrioux@redhat.com>"
+LABEL com.redhat.component="keepalived-container"
+LABEL version=2.1.5
+LABEL name="keepalived"
+LABEL description="keepalived for Ceph"
+LABEL summary="Provides keepalived on RHEL 8 for Ceph."
+LABEL io.k8s.display-name="Keepalived on RHEL 8"
+LABEL io.openshift.tags="Ceph keepalived"
diff --git a/src/cephadm/containers/keepalived/LICENSE b/src/cephadm/containers/keepalived/LICENSE
new file mode 100644
index 000000000..74b10b143
--- /dev/null
+++ b/src/cephadm/containers/keepalived/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017 University of Michigan
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/src/cephadm/containers/keepalived/README.md b/src/cephadm/containers/keepalived/README.md
new file mode 100644
index 000000000..bd7b605ac
--- /dev/null
+++ b/src/cephadm/containers/keepalived/README.md
@@ -0,0 +1,233 @@
+# quay.io/ceph/keepalived
+
+A small [ubi8-minimal](https://catalog.redhat.com/software/containers/registry/registry.access.redhat.com/repository/ubi8/ubi-minimal) based Docker container that provides a method of IP high availability via [keepalived](http://www.keepalived.org/) (VRRP failover), and optional Kubernetes API Server monitoring. If allowed to auto configure (default behaviour) it will automatically generate a unicast based failover configuration with a minimal amount of user supplied information.
+
+For specific information on Keepalived, please see the man page on [keepalived.conf](http://linux.die.net/man/5/keepalived.conf) or the [Keepalived User Guide](http://www.keepalived.org/pdf/UserGuide.pdf).
+
+
+## Index
+- [quay.io/ceph/keepalived](#cephkeepalived)
+ - [Index](#index)
+ - [Prerequisites](#prerequisites)
+ - [Configuration](#configuration)
+ - [Execution Control](#execution-control)
+ - [Autoconfiguration Options](#autoconfiguration-options)
+ - [Kubernetes Options](#kubernetes-options)
+ - [Suggested Kubernetes Settings](#suggested-kubernetes-settings)
+ - [Example Keepalived Configs](#example-keepalived-configs)
+ - [Example Autogenerated Keepalived Master Config](#example-autogenerated-keepalived-master-config)
+ - [Example Autogenerated Keepalived Backup Config](#example-autogenerated-keepalived-backup-config)
+ - [Example Run Commands](#example-run-commands)
+ - [Example Master Run Command](#example-master-run-command)
+ - [Example Backup Run Command](#example-backup-run-command)
+
+
+## Prerequisites
+
+Before attempting to deploy the keepalived container, the host must allow non local binding of ipv4 addresses. To do this, configure the sysctl tunable `net.ipv4.ip_nonlocal_bind=1`.
+
+In addition to enabling the nonlocal binds, the container must be run with both host networking (`--net=host`) and security setting CAP_NET_ADMIN (`--cap-add NET_ADMIN`) capability. These allow the container to manage the host's networking configuration, and this is essential to the function of keepalived.
+
+
+## Configuration
+### Execution Control
+
+| Variable | Default |
+|:---------------------:|:------------------------------------------------:|
+| `KEEPALIVED_AUTOCONF` | `true` |
+| `KEEPALIVED_CONF` | `/etc/keepalived/keepalived.conf` |
+| `KEEPALIVED_CMD` | `/usr/sbin/keepalived -n -l -f $KEEPALIVED_CONF` |
+| `KEEPALIVED_DEBUG` | `false` |
+
+* `KEEPALIVED_AUTOCONF` - Enables or disables the auto-configuration of keepalived.
+
+* `KEEPALIVED_CONF` - The path to the keepalived configuration file.
+
+* `KEEPALIVED_CMD` - The command called to execute keepalived.
+
+* `KEEPALIVED_DEBUG` - Enables or disables debug level logging for keepalived (adds `-D` to `KEEPALIVED_CMD`.
+
+
+### Autoconfiguration Options
+
+| Variable | Default |
+|:-------------------------------------------:|:----------------------------------:|
+| `KEEPALIVED_ADVERT_INT` | `1` |
+| `KEEPALIVED_AUTH_PASS` | `pwd$KEEPALIVED_VIRTUAL_ROUTER_ID` |
+| `KEEPALIVED_INTERFACE` | `eth0` |
+| `KEEPALIVED_PRIORITY` | `200` |
+| `KEEPALIVED_STATE` | `MASTER` |
+| `KEEPALIVED_TRACK_INTERFACE_###` | |
+| `KEEPALIVED_UNICAST_SRC_IP` | |
+| `KEEPALIVED_UNICAST_PEER_###` | |
+| `KEEPALIVED_VIRTUAL_IPADDRESS_###` | |
+| `KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_###` | |
+| `KEEPALIVED_VIRTUAL_ROUTER_ID` | `1` |
+| `KEEPALIVED_KUBE_APISERVER_CHECK` | `false` |
+
+* `KEEPALIVED_ADVERT_INT` - The VRRP advertisement interval (in seconds).
+
+* `KEEPALIVED_AUTH_PASS` - A shared password used to authenticate each node in a VRRP group (**Note:** If password is longer than 8 characters, only the first 8 characters are used).
+
+* `KEEPALIVED_INTERFACE` - The host interface that keepalived will monitor and use for VRRP traffic.
+
+* `KEEPALIVED_PRIORITY` - Election value, the server configured with the highest priority will become the Master.
+
+* `KEEPALIVED_STATE` - Defines the server role as Master or Backup. (**Options:** `MASTER` or `BACKUP`).
+
+* `KEEPALIVED_TRACK_INTERFACE_###` - An interface that's state should be monitored (e.g. eth0). More than one can be supplied as long as the variable name ends in a number from 0-999.
+
+* `KEEPALIVED_UNICAST_SRC_IP` - The IP on the host that the keepalived daemon should bind to. **Note:** If not specified, it will be the first IP bound to the interface specified in `KEEPALIVED_INTERFACE`.
+
+* `KEEPALIVED_UNICAST_PEER_###` - An IP of a peer participating in the VRRP group. More tha one can be supplied as long as the variable name ends in a number from 0-999.
+
+* `KEEPALIVED_VIRTUAL_IPADDRESS_###` - An instance of an address that will be monitored and failed over from one host to another. These should be a quoted string in the form of: `<IPADDRESS>/<MASK> brd <BROADCAST_IP> dev <DEVICE> scope <SCOPE> label <LABEL>` At a minimum the ip address, mask and device should be specified e.g. `KEEPALIVED_VIRTUAL_IPADDRESS_1="10.10.0.2/24 dev eth0"`. More than one can be supplied as long as the variable name ends in a number from 0-999. **Note:** Keepalived has a hard limit of **20** addresses that can be monitored. More can be failed over with the monitored addresses via `KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_###`.
+
+
+* `KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_###` - An instance of an address that will be failed over with the monitored addresses supplied via `KEEPALIVED_VIRTUAL_IPADDRESS_###`. These should be a quoted string in the form of: `<IPADDRESS>/<MASK> brd <BROADCAST_IP> dev <DEVICE> scope <SCOPE> label <LABEL>` At a minimum the ip address, mask and device should be specified e.g. `KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_1="172.16.1.20/24 dev eth1"`. More than one can be supplied as long as the variable name ends in a number from 0-999.
+
+* `KEEPALIVED_VIRTUAL_ROUTER_ID` - A unique number from 0 to 255 that should identify the VRRP group. Master and Backup should have the same value. Multiple instances of keepalived can be run on the same host, but each pair **MUST** have a unique virtual router id.
+
+* `KEEPALIVED_KUBE_APISERVER_CHECK` - If enabled it configures a simple check script for the Kubernetes API-Server. For more information on this feature, please see the [Kubernetes Options](#kubernetes-options) section.
+
+
+### Kubernetes Options
+
+
+| **Variable** | **Default** |
+|:-----------------------------:|:----------------------------------------------:|
+| `KUBE_APISERVER_ADDRESS` | parsed from `KEEPALIVED_VIRTUAL_IPADDRESS_###` |
+| `KUBE_APISERVER_PORT` | `6443` |
+| `KUBE_APISERVER_CHK_INTERVAL` | `3` |
+| `KUBE_APISERVER_CHK_FALL` | `10` |
+| `KUBE_APISERVER_CHK_RISE` | `2` |
+| `KUBE_APISERVER_CHK_WEIGHT` | `-50` |
+
+
+
+* `KUBE_APISERVER_ADDRESS` - The Virtual IP being used for the Kube API Server. If none is supplied, it is assumed to be the lowest numbered entry in the `KEEPALIVED_VIRTUAL_IPADDRESS_###` variables.
+
+* `KUBE_APISERVER_PORT` - The port to use in conjunction with the `KUBE_APISERVER_ADDRESS`.
+
+* `KUBE_APISERVER_CHK_INTERVAL` - The interval in seconds between calling the script.
+
+* `KUBE_APISERVER_CHK_FALL` - The number of consecutive non-zero script exits before setting the state to `FAULT`.
+
+* `KUBE_APISERVER_CHK_RISE` - The number of consecutive zero script exits before exiting the `FAULT` state.
+
+* `KUBE_APISERVER_CHK_WEIGHT` - The weight to apply to the priority when the service enters the `FAULT` state.
+
+
+
+---
+
+### Suggested Kubernetes Settings
+
+Assuming there are three nodes running the kube-apiserver, you cannot rely on setting just the`KEEPALIVED_STATE` parameter to manage failover across the nodes.
+
+To manage kube-apiserver failover, enable the healthcheck option with `KEEPALIVED_KUBE_APISERVER_CHECK`, and set the `KEEPALIVED_PRIORITY` manually for the three instances.
+
+| **Node** | **Priority** |
+|:--------:|:------------:|
+| node-01 | 200 |
+| node-02 | 190 |
+| node-03 | 180 |
+
+With the default weight of `-50`, if `node-01` has an issue, it's priority will drop to `150` and allow `node-02` to take over, the same is repeated if `node-02` has a failure dropping it's weight to `140` and `node-03` takes over.
+
+Recovery occurs in the same order with the system with the highest priority being promoted to master.
+
+### Example Keepalived Configs
+
+##### Example Autogenerated Keepalived Master Config
+```
+vrrp_instance MAIN {
+ state MASTER
+ interface eth0
+ virtual_router_id 2
+ priority 200
+ advert_int 1
+ unicast_src_ip 10.10.0.21
+ unicast_peer {
+ 10.10.0.22
+ }
+ authentication {
+ auth_type PASS
+ auth_pass pwd1
+ }
+ virtual_ipaddress {
+ 10.10.0.2/24 dev eth0
+ }
+ virtual_ipaddress_excluded {
+ 172.16.1.20/24 dev eth1
+ }
+ track_interface {
+ eth0
+ eth1
+ }
+}
+```
+
+##### Example Autogenerated Keepalived Backup Config
+```
+vrrp_instance MAIN {
+ state BACKUP
+ interface eth0
+ virtual_router_id 2
+ priority 100
+ advert_int 1
+ unicast_src_ip 10.10.0.22
+ unicast_peer {
+ 10.10.0.21
+ }
+ authentication {
+ auth_type PASS
+ auth_pass pwd1
+ }
+ virtual_ipaddress {
+ 10.10.0.2/24 dev eth0
+ }
+ virtual_ipaddress_excluded {
+ 172.16.1.20/24 dev eth1
+ }
+ track_interface {
+ eth0
+ eth1
+ }
+}
+
+```
+
+
+## Example Run Commands
+##### Example Master Run Command
+```bash
+docker run -d --net=host --cap-add NET_ADMIN \
+-e KEEPALIVED_AUTOCONF=true \
+-e KEEPALIVED_STATE=MASTER \
+-e KEEPALIVED_INTERFACE=eth0 \
+-e KEEPALIVED_VIRTUAL_ROUTER_ID=2 \
+-e KEEPALIVED_UNICAST_SRC_IP=10.10.0.21 \
+-e KEEPALIVED_UNICAST_PEER_0=10.10.0.22 \
+-e KEEPALIVED_TRACK_INTERFACE_1=eth0 \
+-e KEEPALIVED_TRACK_INTERFACE_2=eth1 \
+-e KEEPALIVED_VIRTUAL_IPADDRESS_1="10.10.0.3/24 dev eth0" \
+-e KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_1="172.16.1.20/24 dev eth1" \
+quay.io/ceph/keepalived
+```
+
+##### Example Backup Run Command
+```bash
+docker run -d --net=host --cap-add NET_ADMIN \
+-e KEEPALIVED_AUTOCONF=true \
+-e KEEPALIVED_STATE=BACKUP \
+-e KEEPALIVED_INTERFACE=eth0 \
+-e KEEPALIVED_VIRTUAL_ROUTER_ID=2 \
+-e KEEPALIVED_UNICAST_SRC_IP=10.10.0.22 \
+-e KEEPALIVED_UNICAST_PEER_0=10.10.0.21 \
+-e KEEPALIVED_TRACK_INTERFACE_1=eth0 \
+-e KEEPALIVED_TRACK_INTERFACE_2=eth1 \
+-e KEEPALIVED_VIRTUAL_IPADDRESS_1="10.10.0.3/24 dev eth0" \
+-e KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_1="172.16.1.20/24 dev eth1" \
+quay.io/ceph/keepalived
+```
diff --git a/src/cephadm/containers/keepalived/skel/init.sh b/src/cephadm/containers/keepalived/skel/init.sh
new file mode 100755
index 000000000..9c86cfad2
--- /dev/null
+++ b/src/cephadm/containers/keepalived/skel/init.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+set -e
+set -o pipefail
+
+
+KEEPALIVED_DEBUG=${KEEPALIVED_DEBUG:-false}
+KEEPALIVED_KUBE_APISERVER_CHECK=${KEEPALIVED_KUBE_APISERVER_CHECK:-false}
+KEEPALIVED_CONF=${KEEPALIVED_CONF:-/etc/keepalived/keepalived.conf}
+KEEPALIVED_VAR_RUN=${KEEPALIVED_VAR_RUN:-/var/run/keepalived}
+
+if [[ ${KEEPALIVED_DEBUG,,} == 'true' ]]; then
+ kd_cmd="/usr/sbin/keepalived -n -l -D -f $KEEPALIVED_CONF"
+else
+ kd_cmd="/usr/sbin/keepalived -n -l -f $KEEPALIVED_CONF"
+fi
+
+KEEPALIVED_CMD=${KEEPALIVED_CMD:-"$kd_cmd"}
+
+rm -fr "$KEEPALIVED_VAR_RUN"
+
+exec $KEEPALIVED_CMD \ No newline at end of file
diff --git a/src/cephadm/samples/alertmanager.json b/src/cephadm/samples/alertmanager.json
new file mode 100644
index 000000000..bacbad300
--- /dev/null
+++ b/src/cephadm/samples/alertmanager.json
@@ -0,0 +1,27 @@
+{
+ "files": {
+ "alertmanager.yml": [
+ "global:",
+ " resolve_timeout: 5m",
+ "",
+ "route:",
+ " group_by: ['alertname']",
+ " group_wait: 10s",
+ " group_interval: 10s",
+ " repeat_interval: 1h",
+ " receiver: 'web.hook'",
+ "receivers:",
+ "- name: 'web.hook'",
+ " webhook_configs:",
+ " - url: 'http://127.0.0.1:5001/'",
+ "inhibit_rules:",
+ " - source_match:",
+ " severity: 'critical'",
+ " target_match:",
+ " severity: 'warning'",
+ " equal: ['alertname', 'dev', 'instance']"
+ ]
+ },
+ "peers": []
+}
+
diff --git a/src/cephadm/samples/custom_container.json b/src/cephadm/samples/custom_container.json
new file mode 100644
index 000000000..194a44d2a
--- /dev/null
+++ b/src/cephadm/samples/custom_container.json
@@ -0,0 +1,35 @@
+{
+ "image": "docker.io/prom/alertmanager:v0.20.0",
+ "ports": [9093, 9094],
+ "args": [
+ "-p", "9093:9093",
+ "-p", "9094:9094"
+ ],
+ "dirs": ["etc/alertmanager"],
+ "files": {
+ "etc/alertmanager/alertmanager.yml": [
+ "global:",
+ " resolve_timeout: 5m",
+ "",
+ "route:",
+ " group_by: ['alertname']",
+ " group_wait: 10s",
+ " group_interval: 10s",
+ " repeat_interval: 1h",
+ " receiver: 'web.hook'",
+ "receivers:",
+ "- name: 'web.hook'",
+ " webhook_configs:",
+ " - url: 'http://127.0.0.1:5001/'",
+ "inhibit_rules:",
+ " - source_match:",
+ " severity: 'critical'",
+ " target_match:",
+ " severity: 'warning'",
+ " equal: ['alertname', 'dev', 'instance']"
+ ]
+ },
+ "volume_mounts": {
+ "etc/alertmanager": "/etc/alertmanager"
+ }
+}
diff --git a/src/cephadm/samples/grafana.json b/src/cephadm/samples/grafana.json
new file mode 100644
index 000000000..0e0689b7e
--- /dev/null
+++ b/src/cephadm/samples/grafana.json
@@ -0,0 +1,90 @@
+{
+ "files": {
+ "grafana.ini": [
+ "[users]",
+ " default_theme = light",
+ "[auth.anonymous]",
+ " enabled = true",
+ " org_name = 'Main Org.'",
+ " org_role = 'Viewer'",
+ "[server]",
+ " domain = 'bootstrap.storage.lab'",
+ " protocol = https",
+ " cert_file = /etc/grafana/certs/cert_file",
+ " cert_key = /etc/grafana/certs/cert_key",
+ " http_port = 3000",
+ " http_addr = localhost",
+ "[security]",
+ " admin_user = admin",
+ " admin_password = admin",
+ " allow_embedding = true"
+ ],
+ "provisioning/datasources/ceph-dashboard.yml": [
+ "deleteDatasources:",
+ " - name: 'Dashboard'",
+ " orgId: 1",
+ " ",
+ "datasources:",
+ " - name: 'Dashboard'",
+ " type: 'prometheus'",
+ " access: 'proxy'",
+ " orgId: 1",
+ " url: 'http://localhost:9095'",
+ " basicAuth: false",
+ " isDefault: true",
+ " editable: false"
+ ],
+ "certs/cert_file": [
+ "-----BEGIN CERTIFICATE-----",
+ "MIIDLTCCAhWgAwIBAgIUEH0mq6u93LKsWlNXst5pxWcuqkQwDQYJKoZIhvcNAQEL",
+ "BQAwJjELMAkGA1UECgwCSVQxFzAVBgNVBAMMDmNlcGgtZGFzaGJvYXJkMB4XDTIw",
+ "MDEwNTIyNDYyMFoXDTMwMDEwMjIyNDYyMFowJjELMAkGA1UECgwCSVQxFzAVBgNV",
+ "BAMMDmNlcGgtZGFzaGJvYXJkMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC",
+ "AQEAqxh6eO0NTZJe+DoKZG/kozJCf+83eB3gWzwXoNinRmV/49f5WPR20DIxAe0R",
+ "saO6XynJXTrhvXT1bsARUq+LSmjWNFoYXopFuOJhGdWn4dmpuHwtpcFv2kjzNOKj",
+ "U2EG8j6bsRp1jFAzn7kdbSWT0UHySRXp9DPAjDiF3LjykMXiJMReccFXrB1pRi93",
+ "nJxED8d6oT5GazGB44svb+Zi6ABamZu5SDJC1Fr/O5rWFNQkH4hQEqDPj1817H9O",
+ "sm0mZiNy77ZQuAzOgZN153L3QOsyJismwNHfAMGMH9mzPKOjyhc13VlZyeEzml8p",
+ "ZpWQ2gi8P2r/FAr8bFL3MFnHKwIDAQABo1MwUTAdBgNVHQ4EFgQUZg3v7MX4J+hx",
+ "w3HENCrUkMK8tbwwHwYDVR0jBBgwFoAUZg3v7MX4J+hxw3HENCrUkMK8tbwwDwYD",
+ "VR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAaR/XPGKwUgVwH3KXAb6+",
+ "s9NTAt6lCmFdQz1ngoqFSizW7KGSXnOgd6xTiUCR0Tjjo2zKCwhIINaI6mwqMbrg",
+ "BOjb7diaqwFaitRs27AtdmaqMGndUqEBUn/k64Ld3VPGL4p0W2W+tXsyzZg1qQIn",
+ "JXb7c4+oWzXny7gHFheYQTwnHzDcNOf9vJiMGyYYvU1xTOGucu6dwtOVDDe1Z4Nq",
+ "AyIYWDScRr2FeAOXyx4aW2v5bjpTxvP+79/OOBbQ+p4y5F4PDrPeOSweGoo6huTR",
+ "+T+YI9Jfw2XCgV7NHWhfdt3fHHwUQzO6WszWU557pmCODLvXWsQ8P+GRiG7Nywm3",
+ "uA==",
+ "-----END CERTIFICATE-----"
+ ],
+ "certs/cert_key": [
+ "-----BEGIN PRIVATE KEY-----",
+ "MIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCrGHp47Q1Nkl74",
+ "Ogpkb+SjMkJ/7zd4HeBbPBeg2KdGZX/j1/lY9HbQMjEB7RGxo7pfKcldOuG9dPVu",
+ "wBFSr4tKaNY0WhheikW44mEZ1afh2am4fC2lwW/aSPM04qNTYQbyPpuxGnWMUDOf",
+ "uR1tJZPRQfJJFen0M8CMOIXcuPKQxeIkxF5xwVesHWlGL3ecnEQPx3qhPkZrMYHj",
+ "iy9v5mLoAFqZm7lIMkLUWv87mtYU1CQfiFASoM+PXzXsf06ybSZmI3LvtlC4DM6B",
+ "k3XncvdA6zImKybA0d8AwYwf2bM8o6PKFzXdWVnJ4TOaXylmlZDaCLw/av8UCvxs",
+ "UvcwWccrAgMBAAECggEAeBv0BiYrm5QwdUORfhaKxAIJavRM1Vbr5EBYOgM90o54",
+ "bEN2ePsM2XUSsE5ziGfu8tVL1dX7GNwdW8UbpBc1ymO0VAYXa27YKUVKcy9o7oS1",
+ "v5v1E5Kq6esiSLL9gw/vJ2nKNFblxD2dL/hs7u1dSp5n7uSiW1tlRUp8toljRzts",
+ "1Cenp0J/a82HwWDE8j/H9NvitTOZ2cdwJ76V8GkBynlvr2ARjRfZGx0WXEJmoZYD",
+ "YUQVU303DB6Q2tkFco4LbPofkuhhMPhXsz3fZ/blHj/c78tqP9L5sQ29oqoPE1pS",
+ "DBOwKC/eoi5FY34RdLNL0dKq9MzbuYqEcCfZOJgxoQKBgQDf+5XF+aXQz2OmSaj6",
+ "1Yr+3KAKdfX/AYp22X1Wy4zWcZlgujgwQ1FG0zay8HVBM0/xn4UgOtcKCoXibePh",
+ "ag1t8aZINdRE1JcMzKmZoSvU9Xk30CNvygizuJVEKsJFPDbPzCpauDSplzcQb4pZ",
+ "wepucPuowkPMBx0iU3x0qSThWwKBgQDDjYs7d30xxSqWWXyCOZshy7UtHMNfqP15",
+ "kDfTXIZzuHvDf6ZNci10VY1eDZbpZfHgc6x1ElbKv2H4dYsgkENJZUi1YQDpVPKq",
+ "4N5teNykgAuagiR7dRFltSju3S7hIE6HInTv3hShaFPymlEE7zuBMuEUcuvYz5YN",
+ "RjxsvypKcQKBgCuuV+Y1KqZPW8K5SNAqRyIvCrMfkCr8NPG6tpvvtHa5zsyzZHPd",
+ "HQOv+1HoXSWrCSM5FfBUKU3XAYdIIRH76cSQRPp+LPiDcTXY0Baa/P5aJRrCZ7bM",
+ "cugBznJt2FdCR/o8eeIZXIPabq2w4w1gKQUC2cFuqWQn2wGvwGzL89pTAoGAAfpx",
+ "mSVpT9KVzrWTC+I3To04BP/QfixAfDVYSzwZZBxOrDijXw8zpISlDHmIuE2+t62T",
+ "5g9Mb3qmLBRMVwT+mUR8CtGzZ6jjV5U0yti5KrTc6TA93D3f8i51/oygR8jC4p0X",
+ "n8GYZdWfW8nx3eHpsTHpkwJinmvjMbkvLU51yBECgYAnUAMyhNOWjbYS5QWd8i1W",
+ "SFQansVDeeT98RebrzmGwlgrCImHItJz0Tz8gkNB3+S2B2balqT0WHaDxQ8vCtwX",
+ "xB4wd+gMomgdYtHGRnRwj1UyRXDk0c1TgGdRjOn3URaezBMibHTQSbFgPciJgAuU",
+ "mEl75h1ToBX9yvnH39o50g==",
+ "-----END PRIVATE KEY-----"
+ ]
+ }
+}
diff --git a/src/cephadm/samples/nfs.json b/src/cephadm/samples/nfs.json
new file mode 100644
index 000000000..2e6625101
--- /dev/null
+++ b/src/cephadm/samples/nfs.json
@@ -0,0 +1,14 @@
+{
+ "pool" : "nfs-ganesha",
+ "namespace" : "nfs-ns",
+ "files": {
+ "ganesha.conf": [
+ "RADOS_URLS {",
+ " userid = admin;",
+ "}",
+ "",
+ "%url rados://nfs-ganesha/nfs-ns/conf-nfs.a",
+ ""
+ ]
+ }
+}
diff --git a/src/cephadm/samples/prometheus.json b/src/cephadm/samples/prometheus.json
new file mode 100644
index 000000000..64727fb59
--- /dev/null
+++ b/src/cephadm/samples/prometheus.json
@@ -0,0 +1,17 @@
+{
+ "files": {
+ "prometheus.yml": [
+ "global:",
+ " scrape_interval: 5s",
+ " evaluation_interval: 10s",
+ "",
+ "rule_files: ",
+ " - '/etc/prometheus/alerting/*'",
+ "",
+ "scrape_configs:",
+ " - job_name: 'prometheus'",
+ " static_configs:",
+ " - targets: ['localhost:9095']"
+ ]
+ }
+}
diff --git a/src/cephadm/samples/rgw_ssl.json b/src/cephadm/samples/rgw_ssl.json
new file mode 100644
index 000000000..3fe6fea1c
--- /dev/null
+++ b/src/cephadm/samples/rgw_ssl.json
@@ -0,0 +1,101 @@
+{
+ "rgw_realm": "default",
+ "rgw_zone": "default",
+ "service_type": "rgw",
+ "placement": {
+ "hosts": [{
+ "hostname": "ironic-moliver",
+ "name": "",
+ "network": ""
+ }],
+ "count": 1
+ },
+ "ssl": true,
+ "rgw_frontend_port": 4343,
+ "rgw_frontend_ssl_certificate": [
+ "-----BEGIN CERTIFICATE-----",
+ "MIIFmjCCA4KgAwIBAgIJAIZ2n35bmwXTMA0GCSqGSIb3DQEBCwUAMGIxCzAJBgNV",
+ "BAYTAkFVMQwwCgYDVQQIDANOU1cxHTAbBgNVBAoMFEV4YW1wbGUgUkdXIFNTTCBp",
+ "bmMuMSYwJAYDVQQDDB1yZ3ctZW5kcG9pbnQuZXhhbXBsZS1jZXBoLmNvbTAeFw0y",
+ "MDAyMDcwMDEzNTFaFw0zMDAyMDQwMDEzNTFaMGIxCzAJBgNVBAYTAkFVMQwwCgYD",
+ "VQQIDANOU1cxHTAbBgNVBAoMFEV4YW1wbGUgUkdXIFNTTCBpbmMuMSYwJAYDVQQD",
+ "DB1yZ3ctZW5kcG9pbnQuZXhhbXBsZS1jZXBoLmNvbTCCAiIwDQYJKoZIhvcNAQEB",
+ "BQADggIPADCCAgoCggIBAMptGJ523QkEbc37za8iuCTahj0Zr6hy+ToSX/Vfdzxj",
+ "iYHuD2PiZZyJB7t2eOqiA8sQ5N513EUtf2ZIBwtnnqFIzD5TqI3BxRajUTlOyXUX",
+ "onMwQwXu2ifDUy3LCmuQfzanOTWvVLac1NmkWbJHpJCXYbUnPb1Nvd0QjTTEH1jt",
+ "5bDHhfxwCIYK6PY+MqC72a09wB2ZF+EKsSdqghOKmibfJHtoJdsqGeLrysBLrzUJ",
+ "e/5ZW3V4Z85T2lja5KZnWgRofrUy5TmJV10HO4Hht92xvWvEi/rmjg2AVYZFUQQx",
+ "xKXpUBbF5T46eSVmaT7IH88Yp5ytgBTaigym7ETCjohp/DfCaK1DUehh0ce7iUq2",
+ "yCLviZsX4WdPYxzkoLflNrqm4YZP6iKcZSUR/A+qPKCzCXgMXFNA1JxilDwEq35F",
+ "zGN++ehJqdNmOQ1eQScsLwZQa6mC97d+upWdCvyntf1+S6vNcXhtRQpjNM4W37oW",
+ "r5nicsGA3/0rpDEHZW85KlkdWO1uCS/6ftgt8UUMaf5ew3PigzusqymBWTlMOjtW",
+ "uAQXxgZZvkRp+xdspn/uTCAP+bNShGD6Q+TO3U6IjTqHk83sGKCvg2dyU/dqgPr9",
+ "2IIzgQBFGk0W0nM/E83E8hUSwX17COLL3drhPZb4VRMChQ8PAa6u9nIymkX2wSVv",
+ "AgMBAAGjUzBRMB0GA1UdDgQWBBSsZHuY7KK80RrZHp+Gx+k16skuRDAfBgNVHSME",
+ "GDAWgBSsZHuY7KK80RrZHp+Gx+k16skuRDAPBgNVHRMBAf8EBTADAQH/MA0GCSqG",
+ "SIb3DQEBCwUAA4ICAQAE+BLtnu0p8FtK7vrBCRcCdvycWaSFGJUt7r5Nm8TD7sKw",
+ "bWeDLgXrRouyA7n6yt/JqQbXYcxt4MLAM0P6NQd5BlNrrnDk4rBnJiJgejppNE+S",
+ "BazR7Dv0uYcs8kPT4DPpwzv4aJ2aXCBaxYrq8Rx2xOqANCPVOrtPUk9yGpaQ5adU",
+ "GfxkVbpgIEz1c71PeQuK1KUU/Wpk7cpm+FQCizl9ftP2lHWsGhSLCuyWoMTjt68P",
+ "gYEWoV54eo/bzwj2ei6TcfNo+uHyzEiiG2qEvMh/cnYUFzs8O1t0mN19WPB1pSh1",
+ "faci5lGdtkRbLgP0g5RvpagE7Lw3mCc5Om8jmHs4mPfuVkssBVV23CrFpqLLrDX3",
+ "Acwb/zRGvA7T4WESBTJMYFOLgm0W0Y+AN8RcYNU9QbDhe++Te0uz/3Sy3GN2Xg5z",
+ "MxfD1+34x6KvMfCh8NjII2mFQ9ukcfrhcfO3oWDLlwsqlVbhkZxNiUOEIx9nzHcF",
+ "kWpZ2ypBDH45h2o3LyqvGjsu/BFkeG6JpEDCWbClKWcjKxOrLVDufhSDduffDjja",
+ "zOsgQJg0Yf//Ubb5p0c54GjHM/XDXEcV3m3sEtbmMYz6xGwuag4bx8P2E/QY8sFp",
+ "JxgIdS8vdl6YhDCjKJ2XzI30JwCdftgDIAiWSE0ivoDc+8+gG1nb11GT52HFzA==",
+ "-----END CERTIFICATE-----",
+ "-----BEGIN PRIVATE KEY-----",
+ "MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQDKbRiedt0JBG3N",
+ "+82vIrgk2oY9Ga+ocvk6El/1X3c8Y4mB7g9j4mWciQe7dnjqogPLEOTeddxFLX9m",
+ "SAcLZ56hSMw+U6iNwcUWo1E5Tsl1F6JzMEMF7tonw1MtywprkH82pzk1r1S2nNTZ",
+ "pFmyR6SQl2G1Jz29Tb3dEI00xB9Y7eWwx4X8cAiGCuj2PjKgu9mtPcAdmRfhCrEn",
+ "aoITipom3yR7aCXbKhni68rAS681CXv+WVt1eGfOU9pY2uSmZ1oEaH61MuU5iVdd",
+ "BzuB4bfdsb1rxIv65o4NgFWGRVEEMcSl6VAWxeU+OnklZmk+yB/PGKecrYAU2ooM",
+ "puxEwo6Iafw3wmitQ1HoYdHHu4lKtsgi74mbF+FnT2Mc5KC35Ta6puGGT+oinGUl",
+ "EfwPqjygswl4DFxTQNScYpQ8BKt+RcxjfvnoSanTZjkNXkEnLC8GUGupgve3frqV",
+ "nQr8p7X9fkurzXF4bUUKYzTOFt+6Fq+Z4nLBgN/9K6QxB2VvOSpZHVjtbgkv+n7Y",
+ "LfFFDGn+XsNz4oM7rKspgVk5TDo7VrgEF8YGWb5EafsXbKZ/7kwgD/mzUoRg+kPk",
+ "zt1OiI06h5PN7Bigr4NnclP3aoD6/diCM4EARRpNFtJzPxPNxPIVEsF9ewjiy93a",
+ "4T2W+FUTAoUPDwGurvZyMppF9sElbwIDAQABAoICAQC4sATwP563pXTRpNYq3lCI",
+ "P2COyqq70/qUA0PNygYt8Nr60srz5RG0WknVvefgm2U+lvFaDsqjyzkbhsf2ndnb",
+ "aWH/07BLdeluGB/5W2rvDFtJIVVlSmF8OffgJgohzbpjkPrfglKWMkz5LbwwrrD0",
+ "w0mAUIdB+nYqBfnvlKjNKHCSc9hJU6ZTNg0K7gCfKgUWzOpFlvJ0fp7XSZPYZHL0",
+ "2E6e0Y0Ig0cPBPb9r4/xoe+hRsHtUafUVik3PK+1K0K0FurUQ9VkQ2yUEg83F0v8",
+ "Vzht5OuaRVSB+P8O/JtIamfywAY0YOYhepQhjWikwU5UUzhJ+PqNDD87/+g9bA1B",
+ "xC25eoDxThiQlgDmRoH18ZsWDVf9TuJnm4cpxnZYX6ip+BLm/aidT39auZo0Fl+r",
+ "cJxRn0Qlm0Vm4Tc/6ZG6PQWB+Q6CjVFdoxeOvEQcTSuKA6VZBStLmqX++5In1Lmj",
+ "hVr3/aueHiZvXS5bNIdd2IfzatR+nP+uxzM/ryJRvGO2B2XTS00Cvv/lH84BDJYV",
+ "yt1PJIBoM9Dh7aUAHmKNVfRt83xzvcSPZx9VmSzA6wwqCQcO1GJk6keAuxOuligu",
+ "YdSFcfChOg90WvBcl+NzMblLkwrFSBQR7kgG0+dedv+Wkm4xO4T7B4W2G5+VIJKG",
+ "mrEAq6XQMFnfEJzNVg7JUQKCAQEA91eMvphoVVz+cxa4Ew7OokNXk5kSlvmQ8+Ij",
+ "ngFBvniXPZecxVzFEJglSthH5KI2ZqxwF3GJhKjxUihwf6K13Hx54EM7S/qV57ie",
+ "kVeKdAs+SGv+hRk1gQOoPBInbtKGKTni1V8T7iNginLueC/YikFugzv6IxiliBSG",
+ "3R7zjRepOW69aEoCPecx9amU4CkAwgeLJgBloBoqWD8sKM+bl7p5juQCU2sQ9D4/",
+ "kLnpG9+zPRUNjI4sog3L1wql3zthI6/4gf0TNuDhJTZ68vpMSi02pOUkVa0MmVOA",
+ "ex16luIp0BhxG/sUAeoevFL4KqR0CBbyAstbt2E/oPYOWMJ4MwKCAQEA0YMNXY7I",
+ "RNFOmiZ2Wn8kENCyJguqbOMd/li2+ercgp3MaSgTjC5KDFvZqTwXXlrURPu9hcyv",
+ "sJBSjp45g1T1LsUUq8UJgOIQgxykurIstGg33TAR+TN9VUu/xonLQF23GD8M6Vzd",
+ "EcZEVlBY33hgNXw4mRcBPnaoG5FZCBfHOgdBCExoYKW/RNKcmu0q+h9uhDBCbopv",
+ "04ROzw+HW1qc5qvNPR47buZ9+5QdonVK8s2bguMJ0phXwdSxL21wsjIsXyAO9m7w",
+ "qLHOq/hVokM0Fki09Exg4ppB8cLHC2ITpsVSgn4Dcz5zRtyvhozSKX4R9kMC64a0",
+ "AgMPVMllmGlR1QKCAQBIIGCrh7gNBIQyvXJKJGw/RxH3uZCBNB9/7vbh8Y3hZDr+",
+ "PAL8TpQsxaFCVRWJ53+jqy84tuQaKkXM5nv/zEvqEuZbbl+RRW6HVv/udC+srUap",
+ "Scy7tWEz0QQzGDwlhgCXbwjlnccrD2fsl51QsOsdTf1TCZ9ksqA6sXmua4MsJrUz",
+ "SUa0bbh/oraf46bFQ0+0RQzftQftixPEDg/rirbdpQQjlfvTpYoZHzncE0qV1ULo",
+ "UgZUcXU0gH9rovBBy4gFJyB5j3oV67fb6SorRrAOhWbE6QkSbtcYsw/pVuxTqXn1",
+ "89qwBSSNdl8mHa++h5xKa56BEBobvKEYaAhA+9yfAoIBAQDPFEE5n87Cdj7CjhGd",
+ "EN2M4Tmz8QPZ7AHRS85O5pxdXaqUpU/s1jPKU16nrwVJ9WypYkjI3q8oTP3MiQc/",
+ "j9FnENSFkpL6GHdJoB4Rido11myg6spZDVNr4xsCGWATlo1KIceZQHghAV66EWBG",
+ "QKyXMNigN+S64Hz4AomFPjtkV5cnpJ3mKO0MET9IwfIglsCdVzXSHHK7FaLvdeHL",
+ "oZxDQrvxFNiZnKgY6SUBVf1mT2LN06n5xSm4I4md3wXsmzrQKtefK7gihNxJjYLW",
+ "hqYNAIAalwOL9fwIAQTLc30I8S/EWtj+J1O5TpcO3lE7QahvR3yzXsi81Flq7ETG",
+ "iBKhAoIBAGHGpnjrLlCarNk9axh4Dw1OjgEvwPlEqsiWXt2tylLeab0OGC47MmJx",
+ "RmKwgVukMuxkQb8v4ANSRtih7R+E+qXfexjEFYtzh/uaRP1Z7ZrO/oqq0oLbPpsx",
+ "yTSRDL1i5/fgdIlKVH3N4IF7E8Pc3REgYIwLQxYjTdgVHEAM65XegQ2Lkpr4iae3",
+ "hm4IsD2PrsVITrlsLg65XnfcbsCs/OfQ5GuUp+xUBw5e0bQBmsWEiCaCjrq/EHJa",
+ "/oeJRqS7lyGYDC+wiSsE70x4dvu1um2F+V1Jw4LWjhu8Z8dNSXPSf8vLqXGkWAlk",
+ "805lq+iy7Mkhb+dlr4R9WhMWDyGwgYs=",
+ "-----END PRIVATE KEY-----"
+ ]
+}
diff --git a/src/cephadm/tests/__init__.py b/src/cephadm/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/cephadm/tests/__init__.py
diff --git a/src/cephadm/tests/fixtures.py b/src/cephadm/tests/fixtures.py
new file mode 100644
index 000000000..76ac0b44c
--- /dev/null
+++ b/src/cephadm/tests/fixtures.py
@@ -0,0 +1,162 @@
+import mock
+import os
+import pytest
+import time
+
+from contextlib import contextmanager
+from pyfakefs import fake_filesystem
+
+from typing import Dict, List, Optional
+
+
+def import_cephadm():
+ """Import cephadm as a module."""
+ import cephadm as _cephadm
+
+ return _cephadm
+
+
+def mock_docker():
+ _cephadm = import_cephadm()
+ docker = mock.Mock(_cephadm.Docker)
+ docker.path = '/usr/bin/docker'
+ return docker
+
+
+def mock_podman():
+ _cephadm = import_cephadm()
+ podman = mock.Mock(_cephadm.Podman)
+ podman.path = '/usr/bin/podman'
+ podman.version = (2, 1, 0)
+ return podman
+
+
+def _daemon_path():
+ return os.getcwd()
+
+
+def mock_bad_firewalld():
+ def raise_bad_firewalld():
+ raise Exception('Called bad firewalld')
+
+ _cephadm = import_cephadm()
+ f = mock.Mock(_cephadm.Firewalld)
+ f.enable_service_for = lambda _: raise_bad_firewalld()
+ f.apply_rules = lambda: raise_bad_firewalld()
+ f.open_ports = lambda _: raise_bad_firewalld()
+
+
+def _mock_scrape_host(obj, interval):
+ try:
+ raise ValueError("wah")
+ except Exception as e:
+ obj._handle_thread_exception(e, 'host')
+
+
+def _mock_run(obj):
+ t = obj._create_thread(obj._scrape_host_facts, 'host', 5)
+ time.sleep(1)
+ if not t.is_alive():
+ obj.cephadm_cache.update_health('host', "inactive", "host thread stopped")
+
+
+@pytest.fixture()
+def cephadm_fs(
+ fs: fake_filesystem.FakeFilesystem,
+):
+ """
+ use pyfakefs to stub filesystem calls
+ """
+ uid = os.getuid()
+ gid = os.getgid()
+
+ def fchown(fd, _uid, _gid):
+ """pyfakefs doesn't provide a working fchown or fchmod.
+ In order to get permissions working generally across renames
+ we need to provide our own implemenation.
+ """
+ file_obj = fs.get_open_file(fd).get_object()
+ file_obj.st_uid = _uid
+ file_obj.st_gid = _gid
+
+ _cephadm = import_cephadm()
+ with mock.patch('os.fchown', side_effect=fchown), \
+ mock.patch('os.fchmod'), \
+ mock.patch('platform.processor', return_value='x86_64'), \
+ mock.patch('cephadm.extract_uid_gid', return_value=(uid, gid)):
+
+ try:
+ if not fake_filesystem.is_root():
+ fake_filesystem.set_uid(0)
+ except AttributeError:
+ pass
+
+ fs.create_dir(_cephadm.DATA_DIR)
+ fs.create_dir(_cephadm.LOG_DIR)
+ fs.create_dir(_cephadm.LOCK_DIR)
+ fs.create_dir(_cephadm.LOGROTATE_DIR)
+ fs.create_dir(_cephadm.UNIT_DIR)
+ fs.create_dir('/sys/block')
+
+ yield fs
+
+
+@pytest.fixture()
+def host_sysfs(fs: fake_filesystem.FakeFilesystem):
+ """Create a fake filesystem to represent sysfs"""
+ enc_path = '/sys/class/scsi_generic/sg2/device/enclosure/0:0:1:0'
+ dev_path = '/sys/class/scsi_generic/sg2/device'
+ slot_count = 12
+ fs.create_dir(dev_path)
+ fs.create_file(os.path.join(dev_path, 'vendor'), contents="EnclosuresInc")
+ fs.create_file(os.path.join(dev_path, 'model'), contents="D12")
+ fs.create_file(os.path.join(enc_path, 'id'), contents='1')
+ fs.create_file(os.path.join(enc_path, 'components'), contents=str(slot_count))
+ for slot_num in range(slot_count):
+ slot_dir = os.path.join(enc_path, str(slot_num))
+ fs.create_file(os.path.join(slot_dir, 'locate'), contents='0')
+ fs.create_file(os.path.join(slot_dir, 'fault'), contents='0')
+ fs.create_file(os.path.join(slot_dir, 'slot'), contents=str(slot_num))
+ if slot_num < 6:
+ fs.create_file(os.path.join(slot_dir, 'status'), contents='Ok')
+ slot_dev = os.path.join(slot_dir, 'device')
+ fs.create_dir(slot_dev)
+ fs.create_file(os.path.join(slot_dev, 'vpd_pg80'), contents=f'fake{slot_num:0>3}')
+ else:
+ fs.create_file(os.path.join(slot_dir, 'status'), contents='not installed')
+
+ yield fs
+
+
+@contextmanager
+def with_cephadm_ctx(
+ cmd: List[str],
+ list_networks: Optional[Dict[str, Dict[str, List[str]]]] = None,
+ hostname: Optional[str] = None,
+):
+ """
+ :param cmd: cephadm command argv
+ :param list_networks: mock 'list-networks' return
+ :param hostname: mock 'socket.gethostname' return
+ """
+ if not hostname:
+ hostname = 'host1'
+
+ _cephadm = import_cephadm()
+ with mock.patch('cephadm.attempt_bind'), \
+ mock.patch('cephadm.call', return_value=('', '', 0)), \
+ mock.patch('cephadm.call_timeout', return_value=0), \
+ mock.patch('cephadm.find_executable', return_value='foo'), \
+ mock.patch('cephadm.get_container_info', return_value=None), \
+ mock.patch('cephadm.is_available', return_value=True), \
+ mock.patch('cephadm.json_loads_retry', return_value={'epoch' : 1}), \
+ mock.patch('cephadm.logger'), \
+ mock.patch('socket.gethostname', return_value=hostname):
+ ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(cmd)
+ ctx.container_engine = mock_podman()
+ if list_networks is not None:
+ with mock.patch('cephadm.list_networks', return_value=list_networks):
+ yield ctx
+ else:
+ yield ctx
+
diff --git a/src/cephadm/tests/test_agent.py b/src/cephadm/tests/test_agent.py
new file mode 100644
index 000000000..f9cf201e2
--- /dev/null
+++ b/src/cephadm/tests/test_agent.py
@@ -0,0 +1,800 @@
+from unittest import mock
+import copy, datetime, json, os, socket, threading
+
+import pytest
+
+from tests.fixtures import with_cephadm_ctx, cephadm_fs, import_cephadm
+
+from typing import Optional
+
+_cephadm = import_cephadm()
+
+
+FSID = "beefbeef-beef-beef-1234-beefbeefbeef"
+AGENT_ID = 'host1'
+AGENT_DIR = f'/var/lib/ceph/{FSID}/agent.{AGENT_ID}'
+
+
+def test_agent_validate():
+ required_files = _cephadm.CephadmAgent.required_files
+ with with_cephadm_ctx([]) as ctx:
+ agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+ for i in range(len(required_files)):
+ incomplete_files = {s: 'text' for s in [f for j, f in enumerate(required_files) if j != i]}
+ with pytest.raises(_cephadm.Error, match=f'required file missing from config: {required_files[i]}'):
+ agent.validate(incomplete_files)
+ all_files = {s: 'text' for s in required_files}
+ agent.validate(all_files)
+
+
+def _check_file(path, content):
+ assert os.path.exists(path)
+ with open(path) as f:
+ fcontent = f.read()
+ assert fcontent == content
+
+
+@mock.patch('cephadm.call_throws')
+def test_agent_deploy_daemon_unit(_call_throws, cephadm_fs):
+ _call_throws.return_value = ('', '', 0)
+ agent_id = AGENT_ID
+
+ with with_cephadm_ctx([]) as ctx:
+ ctx.meta_json = json.dumps({'meta': 'data'})
+ agent = _cephadm.CephadmAgent(ctx, FSID, agent_id)
+ cephadm_fs.create_dir(AGENT_DIR)
+
+ with pytest.raises(_cephadm.Error, match='Agent needs a config'):
+ agent.deploy_daemon_unit()
+
+ config = {s: f'text for {s}' for s in _cephadm.CephadmAgent.required_files}
+ config['not-required-file.txt'] = 'don\'t write me'
+
+ agent.deploy_daemon_unit(config)
+
+ # check required config file were all created
+ for fname in _cephadm.CephadmAgent.required_files:
+ _check_file(f'{AGENT_DIR}/{fname}', f'text for {fname}')
+
+ # assert non-required file was not written
+ assert not os.path.exists(f'{AGENT_DIR}/not-required-file.txt')
+
+ # check unit.run file was created correctly
+ _check_file(f'{AGENT_DIR}/unit.run', agent.unit_run())
+
+ # check unit.meta file created correctly
+ _check_file(f'{AGENT_DIR}/unit.meta', json.dumps({'meta': 'data'}, indent=4) + '\n')
+
+ # check unit file was created correctly
+ _check_file(f'{ctx.unit_dir}/{agent.unit_name()}', agent.unit_file())
+
+ expected_call_throws_calls = [
+ mock.call(ctx, ['systemctl', 'daemon-reload']),
+ mock.call(ctx, ['systemctl', 'enable', '--now', agent.unit_name()]),
+ ]
+ _call_throws.assert_has_calls(expected_call_throws_calls)
+
+ expected_call_calls = [
+ mock.call(ctx, ['systemctl', 'stop', agent.unit_name()], verbosity=_cephadm.CallVerbosity.DEBUG),
+ mock.call(ctx, ['systemctl', 'reset-failed', agent.unit_name()], verbosity=_cephadm.CallVerbosity.DEBUG),
+ ]
+ _cephadm.call.assert_has_calls(expected_call_calls)
+
+
+@mock.patch('threading.Thread.is_alive')
+def test_agent_shutdown(_is_alive):
+ with with_cephadm_ctx([]) as ctx:
+ agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+ _is_alive.return_value = True
+ assert agent.stop == False
+ assert agent.mgr_listener.stop == False
+ assert agent.ls_gatherer.stop == False
+ assert agent.volume_gatherer.stop == False
+ agent.shutdown()
+ assert agent.stop == True
+ assert agent.mgr_listener.stop == True
+ assert agent.ls_gatherer.stop == True
+ assert agent.volume_gatherer.stop == True
+
+
+def test_agent_wakeup():
+ with with_cephadm_ctx([]) as ctx:
+ agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+ assert agent.event.is_set() == False
+ agent.wakeup()
+ assert agent.event.is_set() == True
+
+
+@mock.patch("cephadm.CephadmAgent.shutdown")
+@mock.patch("cephadm.AgentGatherer.update_func")
+def test_pull_conf_settings(_update_func, _shutdown, cephadm_fs):
+ target_ip = '192.168.0.0'
+ target_port = 9876
+ refresh_period = 20
+ listener_port = 5678
+ host = AGENT_ID
+ device_enhanced_scan = 'True'
+ with with_cephadm_ctx([]) as ctx:
+ agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+ full_config = {
+ 'target_ip': target_ip,
+ 'target_port': target_port,
+ 'refresh_period': refresh_period,
+ 'listener_port': listener_port,
+ 'host': host,
+ 'device_enhanced_scan': device_enhanced_scan
+ }
+ cephadm_fs.create_dir(AGENT_DIR)
+ with open(agent.config_path, 'w') as f:
+ f.write(json.dumps(full_config))
+
+ with pytest.raises(_cephadm.Error, match="Failed to get agent keyring:"):
+ agent.pull_conf_settings()
+ _shutdown.assert_called()
+ with open(agent.keyring_path, 'w') as f:
+ f.write('keyring')
+
+ assert agent.device_enhanced_scan == False
+ agent.pull_conf_settings()
+ assert agent.host == host
+ assert agent.target_ip == target_ip
+ assert agent.target_port == target_port
+ assert agent.loop_interval == refresh_period
+ assert agent.starting_port == listener_port
+ assert agent.device_enhanced_scan == True
+ assert agent.keyring == 'keyring'
+ _update_func.assert_called()
+
+ full_config.pop('target_ip')
+ with open(agent.config_path, 'w') as f:
+ f.write(json.dumps(full_config))
+ with pytest.raises(_cephadm.Error, match="Failed to get agent target ip and port from config:"):
+ agent.pull_conf_settings()
+
+
+@mock.patch("cephadm.command_ceph_volume")
+def test_agent_ceph_volume(_ceph_volume):
+
+ def _ceph_volume_outputter(_):
+ print("ceph-volume output")
+
+ def _ceph_volume_empty(_):
+ pass
+
+ with with_cephadm_ctx([]) as ctx:
+ agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+
+ _ceph_volume.side_effect = _ceph_volume_outputter
+ out, _ = agent._ceph_volume(False)
+ assert ctx.command == ['inventory', '--format=json']
+ assert out == "ceph-volume output\n"
+
+ out, _ = agent._ceph_volume(True)
+ assert ctx.command == ['inventory', '--format=json', '--with-lsm']
+ assert out == "ceph-volume output\n"
+
+ _ceph_volume.side_effect = _ceph_volume_empty
+ with pytest.raises(Exception, match='ceph-volume returned empty value'):
+ out, _ = agent._ceph_volume(False)
+
+
+def test_agent_daemon_ls_subset(cephadm_fs):
+ # Basing part of this test on some actual sample output
+
+ # Some sample "podman stats --format '{{.ID}},{{.MemUsage}}' --no-stream" output
+ # 3f2b31d19ecd,456.4MB / 41.96GB
+ # 5aca2499e0f8,7.082MB / 41.96GB
+ # fe0cef07d5f7,35.91MB / 41.96GB
+
+ # Sample "podman ps --format '{{.ID}},{{.Names}}' --no-trunc" output with the same containers
+ # fe0cef07d5f71c5c604f7d1b4a4ac2e27873c96089d015014524e803361b4a30,ceph-4434fa7c-5602-11ed-b719-5254006ef86b-mon-host1
+ # 3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e70660340cd1f,ceph-4434fa7c-5602-11ed-b719-5254006ef86b-mgr-host1-pntmho
+ # 5aca2499e0f8fb903788ff90eb03fe6ed58c7ed177caf278fed199936aff7b4a,ceph-4434fa7c-5602-11ed-b719-5254006ef86b-crash-host1
+
+ # Some of the components from that output
+ mgr_cid = '3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e70660340cd1f'
+ mon_cid = 'fe0cef07d5f71c5c604f7d1b4a4ac2e27873c96089d015014524e803361b4a30'
+ crash_cid = '5aca2499e0f8fb903788ff90eb03fe6ed58c7ed177caf278fed199936aff7b4a'
+ mgr_short_cid = mgr_cid[0:12]
+ mon_short_cid = mon_cid[0:12]
+ crash_short_cid = crash_cid[0:12]
+
+ #Rebuilding the output but with our testing FSID and components (to allow alteration later for whatever reason)
+ mem_out = f"""{mgr_short_cid},456.4MB / 41.96GB
+{crash_short_cid},7.082MB / 41.96GB
+{mon_short_cid},35.91MB / 41.96GB"""
+
+ ps_out = f"""{mon_cid},ceph-{FSID}-mon-host1
+{mgr_cid},ceph-{FSID}-mgr-host1-pntmho
+{crash_cid},ceph-{FSID}-crash-host1"""
+
+ def _fake_call(ctx, cmd, desc=None, verbosity=_cephadm.CallVerbosity.VERBOSE_ON_FAILURE, timeout=_cephadm.DEFAULT_TIMEOUT, **kwargs):
+ if 'stats' in cmd:
+ return (mem_out, '', 0)
+ elif 'ps' in cmd:
+ return (ps_out, '', 0)
+ return ('out', 'err', 0)
+
+ cephadm_fs.create_dir(AGENT_DIR)
+ cephadm_fs.create_dir(f'/var/lib/ceph/mon/ceph-host1') # legacy daemon
+ cephadm_fs.create_dir(f'/var/lib/ceph/osd/nothing') # improper directory, should be skipped
+ cephadm_fs.create_dir(f'/var/lib/ceph/{FSID}/mgr.host1.pntmho') # cephadm daemon
+ cephadm_fs.create_dir(f'/var/lib/ceph/{FSID}/crash.host1') # cephadm daemon
+
+ with with_cephadm_ctx([]) as ctx:
+ ctx.fsid = FSID
+ agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+ _cephadm.call.side_effect = _fake_call
+ daemons = agent._daemon_ls_subset()
+
+ assert 'agent.host1' in daemons
+ assert 'mgr.host1.pntmho' in daemons
+ assert 'crash.host1' in daemons
+ assert 'mon.host1' in daemons
+
+ assert daemons['mon.host1']['style'] == 'legacy'
+ assert daemons['mgr.host1.pntmho']['style'] == 'cephadm:v1'
+ assert daemons['crash.host1']['style'] == 'cephadm:v1'
+ assert daemons['agent.host1']['style'] == 'cephadm:v1'
+
+ assert daemons['mgr.host1.pntmho']['systemd_unit'] == f'ceph-{FSID}@mgr.host1.pntmho'
+ assert daemons['agent.host1']['systemd_unit'] == f'ceph-{FSID}@agent.host1'
+ assert daemons['crash.host1']['systemd_unit'] == f'ceph-{FSID}@crash.host1'
+
+ assert daemons['mgr.host1.pntmho']['container_id'] == mgr_cid
+ assert daemons['crash.host1']['container_id'] == crash_cid
+
+ assert daemons['mgr.host1.pntmho']['memory_usage'] == 478570086 # 456.4 MB
+ assert daemons['crash.host1']['memory_usage'] == 7426015 # 7.082 MB
+
+
+@mock.patch("cephadm.list_daemons")
+@mock.patch("cephadm.CephadmAgent._daemon_ls_subset")
+def test_agent_get_ls(_ls_subset, _ls, cephadm_fs):
+ ls_out = [{
+ "style": "cephadm:v1",
+ "name": "mgr.host1.pntmho",
+ "fsid": FSID,
+ "systemd_unit": f"ceph-{FSID}@mgr.host1.pntmho",
+ "enabled": True,
+ "state": "running",
+ "service_name": "mgr",
+ "memory_request": None,
+ "memory_limit": None,
+ "ports": [
+ 9283,
+ 8765
+ ],
+ "container_id": "3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e70660340cd1f",
+ "container_image_name": "quay.io/ceph/ceph:testing",
+ "container_image_id": "3300e39269f0c13ae45026cf233d8b3fff1303d52f2598a69c7fba0bb8405164",
+ "container_image_digests": [
+ "quay.io/ceph/ceph@sha256:d4f3522528ee79904f9e530bdce438acac30a039e9a0b3cf31d8b614f9f96a30"
+ ],
+ "memory_usage": 507510784,
+ "cpu_percentage": "5.95%",
+ "version": "18.0.0-556-gb4d1a199",
+ "started": "2022-10-27T14:19:36.086664Z",
+ "created": "2022-10-27T14:19:36.282281Z",
+ "deployed": "2022-10-27T14:19:35.377275Z",
+ "configured": "2022-10-27T14:22:40.316912Z"
+ },{
+ "style": "cephadm:v1",
+ "name": "agent.host1",
+ "fsid": FSID,
+ "systemd_unit": f"ceph-{FSID}@agent.host1",
+ "enabled": True,
+ "state": "running",
+ "service_name": "agent",
+ "ports": [],
+ "ip": None,
+ "deployed_by": [
+ "quay.io/ceph/ceph@sha256:d4f3522528ee79904f9e530bdce438acac30a039e9a0b3cf31d8b614f9f96a30"
+ ],
+ "rank": None,
+ "rank_generation": None,
+ "extra_container_args": None,
+ "container_id": None,
+ "container_image_name": None,
+ "container_image_id": None,
+ "container_image_digests": None,
+ "version": None,
+ "started": None,
+ "created": "2022-10-27T19:46:49.751594Z",
+ "deployed": None,
+ "configured": "2022-10-27T19:46:49.751594Z"
+ }, {
+ "style": "legacy",
+ "name": "mon.host1",
+ "fsid": FSID,
+ "systemd_unit": "ceph-mon@host1",
+ "enabled": False,
+ "state": "stopped",
+ "host_version": None
+ }]
+
+ ls_subset_out = {
+ 'mgr.host1.pntmho': {
+ "style": "cephadm:v1",
+ "fsid": FSID,
+ "systemd_unit": f"ceph-{FSID}@mgr.host1.pntmho",
+ "enabled": True,
+ "state": "running",
+ "container_id": "3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e70660340cd1f",
+ "memory_usage": 507510784,
+ },
+ 'agent.host1': {
+ "style": "cephadm:v1",
+ "fsid": FSID,
+ "systemd_unit": f"ceph-{FSID}@agent.host1",
+ "enabled": True,
+ "state": "running",
+ "container_id": None
+ }, 'mon.host1': {
+ "style": "legacy",
+ "name": "mon.host1",
+ "fsid": FSID,
+ "systemd_unit": "ceph-mon@host1",
+ "enabled": False,
+ "state": "stopped",
+ "host_version": None
+ }}
+
+ _ls.return_value = ls_out
+ _ls_subset.return_value = ls_subset_out
+
+ with with_cephadm_ctx([]) as ctx:
+ ctx.fsid = FSID
+ agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+
+ # first pass, no cached daemon metadata
+ daemons, changed = agent._get_ls()
+ assert daemons == ls_out
+ assert changed
+
+ # second pass, should recognize that daemons have not changed and just keep cached values
+ daemons, changed = agent._get_ls()
+ assert daemons == daemons
+ assert not changed
+
+ # change a container id so it needs to get more info
+ ls_subset_out2 = copy.deepcopy(ls_subset_out)
+ ls_out2 = copy.deepcopy(ls_out)
+ ls_subset_out2['mgr.host1.pntmho']['container_id'] = '3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e7066034aaaaa'
+ ls_out2[0]['container_id'] = '3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e7066034aaaaa'
+ _ls.return_value = ls_out2
+ _ls_subset.return_value = ls_subset_out2
+ assert agent.cached_ls_values['mgr.host1.pntmho']['container_id'] == "3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e70660340cd1f"
+ daemons, changed = agent._get_ls()
+ assert daemons == ls_out2
+ assert changed
+
+ # run again with the same data so it should use cached values
+ daemons, changed = agent._get_ls()
+ assert daemons == ls_out2
+ assert not changed
+
+ # change the state of a container so new daemon metadata is needed
+ ls_subset_out3 = copy.deepcopy(ls_subset_out2)
+ ls_out3 = copy.deepcopy(ls_out2)
+ ls_subset_out3['mgr.host1.pntmho']['enabled'] = False
+ ls_out3[0]['enabled'] = False
+ _ls.return_value = ls_out3
+ _ls_subset.return_value = ls_subset_out3
+ assert agent.cached_ls_values['mgr.host1.pntmho']['enabled'] == True
+ daemons, changed = agent._get_ls()
+ assert daemons == ls_out3
+ assert changed
+
+ # run again with the same data so it should use cached values
+ daemons, changed = agent._get_ls()
+ assert daemons == ls_out3
+ assert not changed
+
+ # remove a daemon so new metadats is needed
+ ls_subset_out4 = copy.deepcopy(ls_subset_out3)
+ ls_out4 = copy.deepcopy(ls_out3)
+ ls_subset_out4.pop('mon.host1')
+ ls_out4.pop()
+ _ls.return_value = ls_out4
+ _ls_subset.return_value = ls_subset_out4
+ assert 'mon.host1' in agent.cached_ls_values
+ daemons, changed = agent._get_ls()
+ assert daemons == ls_out4
+ assert changed
+
+ # run again with the same data so it should use cached values
+ daemons, changed = agent._get_ls()
+ assert daemons == ls_out4
+ assert not changed
+
+
+@mock.patch("threading.Event.clear")
+@mock.patch("threading.Event.wait")
+@mock.patch("urllib.request.Request.__init__")
+@mock.patch("cephadm.urlopen")
+@mock.patch("cephadm.list_networks")
+@mock.patch("cephadm.HostFacts.dump")
+@mock.patch("cephadm.HostFacts.__init__", lambda _, __: None)
+@mock.patch("ssl.SSLContext.load_verify_locations")
+@mock.patch("threading.Thread.is_alive")
+@mock.patch("cephadm.MgrListener.start")
+@mock.patch("cephadm.AgentGatherer.start")
+@mock.patch("cephadm.port_in_use")
+@mock.patch("cephadm.CephadmAgent.pull_conf_settings")
+def test_agent_run(_pull_conf_settings, _port_in_use, _gatherer_start,
+ _listener_start, _is_alive, _load_verify_locations,
+ _HF_dump, _list_networks, _urlopen, _RQ_init, _wait, _clear):
+ target_ip = '192.168.0.0'
+ target_port = '9999'
+ refresh_period = 20
+ listener_port = 7770
+ open_listener_port = 7777
+ host = AGENT_ID
+ device_enhanced_scan = False
+
+ def _fake_port_in_use(ctx, endpoint):
+ if endpoint.port == open_listener_port:
+ return False
+ return True
+
+ network_data: Dict[str, Dict[str, Set[str]]] = {
+ "10.2.1.0/24": {
+ "eth1": set(["10.2.1.122"])
+ },
+ "192.168.122.0/24": {
+ "eth0": set(["192.168.122.221"])
+ },
+ "fe80::/64": {
+ "eth0": set(["fe80::5054:ff:fe3f:d94e"]),
+ "eth1": set(["fe80::5054:ff:fe3f:aa4a"]),
+ }
+ }
+
+ # the json serializable version of the networks data
+ # we expect the agent to actually send
+ network_data_no_sets: Dict[str, Dict[str, List[str]]] = {
+ "10.2.1.0/24": {
+ "eth1": ["10.2.1.122"]
+ },
+ "192.168.122.0/24": {
+ "eth0": ["192.168.122.221"]
+ },
+ "fe80::/64": {
+ "eth0": ["fe80::5054:ff:fe3f:d94e"],
+ "eth1": ["fe80::5054:ff:fe3f:aa4a"],
+ }
+ }
+
+ class FakeHTTPResponse():
+ def __init__(self):
+ pass
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, type, value, tb):
+ pass
+
+ def read(self):
+ return json.dumps({'valid': 'output', 'result': '400'})
+
+ _port_in_use.side_effect = _fake_port_in_use
+ _is_alive.return_value = False
+ _HF_dump.return_value = 'Host Facts'
+ _list_networks.return_value = network_data
+ _urlopen.side_effect = lambda *args, **kwargs: FakeHTTPResponse()
+ _RQ_init.side_effect = lambda *args, **kwargs: None
+ with with_cephadm_ctx([]) as ctx:
+ ctx.fsid = FSID
+ agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+ agent.keyring = 'agent keyring'
+ agent.ack = 7
+ agent.volume_gatherer.ack = 7
+ agent.volume_gatherer.data = 'ceph-volume inventory data'
+ agent.ls_gatherer.ack = 7
+ agent.ls_gatherer.data = [{'valid_daemon': 'valid_metadata'}]
+
+ def _set_conf():
+ agent.target_ip = target_ip
+ agent.target_port = target_port
+ agent.loop_interval = refresh_period
+ agent.starting_port = listener_port
+ agent.host = host
+ agent.device_enhanced_scan = device_enhanced_scan
+ _pull_conf_settings.side_effect = _set_conf
+
+ # technically the run function loops forever unless the agent
+ # is told to stop. To get around that we're going to have the
+ # event.wait() (which happens at the end of the loop) to throw
+ # a special exception type. If we catch this exception we can
+ # consider it as being a "success" run
+ class EventCleared(Exception):
+ pass
+
+ _clear.side_effect = EventCleared('SUCCESS')
+ with pytest.raises(EventCleared, match='SUCCESS'):
+ agent.run()
+
+ expected_data = {
+ 'host': host,
+ 'ls': [{'valid_daemon': 'valid_metadata'}],
+ 'networks': network_data_no_sets,
+ 'facts': 'Host Facts',
+ 'volume': 'ceph-volume inventory data',
+ 'ack': str(7),
+ 'keyring': 'agent keyring',
+ 'port': str(open_listener_port)
+ }
+ _RQ_init.assert_called_with(
+ f'https://{target_ip}:{target_port}/data/',
+ json.dumps(expected_data).encode('ascii'),
+ {'Content-Type': 'application/json'}
+ )
+ _listener_start.assert_called()
+ _gatherer_start.assert_called()
+ _urlopen.assert_called()
+
+ # agent should not go down if connections fail
+ _urlopen.side_effect = Exception()
+ with pytest.raises(EventCleared, match='SUCCESS'):
+ agent.run()
+
+ # should fail if no ports are open for listener
+ _port_in_use.side_effect = lambda _, __: True
+ agent.listener_port = None
+ with pytest.raises(Exception, match='Failed to pick port for agent to listen on: All 1000 ports starting at 7770 taken.'):
+ agent.run()
+
+
+@mock.patch("cephadm.CephadmAgent.pull_conf_settings")
+@mock.patch("cephadm.CephadmAgent.wakeup")
+def test_mgr_listener_handle_json_payload(_agent_wakeup, _pull_conf_settings, cephadm_fs):
+ with with_cephadm_ctx([]) as ctx:
+ ctx.fsid = FSID
+ agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+ cephadm_fs.create_dir(AGENT_DIR)
+
+ data_no_config = {
+ 'counter': 7
+ }
+ agent.mgr_listener.handle_json_payload(data_no_config)
+ _agent_wakeup.assert_not_called()
+ _pull_conf_settings.assert_not_called()
+ assert not any(os.path.exists(os.path.join(AGENT_DIR, s)) for s in agent.required_files)
+
+ data_with_config = {
+ 'counter': 7,
+ 'config': {
+ 'unrequired-file': 'unrequired-text'
+ }
+ }
+ data_with_config['config'].update({s: f'{s} text' for s in agent.required_files if s != agent.required_files[2]})
+ agent.mgr_listener.handle_json_payload(data_with_config)
+ _agent_wakeup.assert_called()
+ _pull_conf_settings.assert_called()
+ assert all(os.path.exists(os.path.join(AGENT_DIR, s)) for s in agent.required_files if s != agent.required_files[2])
+ assert not os.path.exists(os.path.join(AGENT_DIR, agent.required_files[2]))
+ assert not os.path.exists(os.path.join(AGENT_DIR, 'unrequired-file'))
+
+
+@mock.patch("socket.socket")
+@mock.patch("ssl.SSLContext.wrap_socket")
+@mock.patch("cephadm.MgrListener.handle_json_payload")
+@mock.patch("ssl.SSLContext.load_verify_locations")
+@mock.patch("ssl.SSLContext.load_cert_chain")
+def test_mgr_listener_run(_load_cert_chain, _load_verify_locations, _handle_json_payload,
+ _wrap_context, _socket, cephadm_fs):
+
+ with with_cephadm_ctx([]) as ctx:
+ ctx.fsid = FSID
+ agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+ cephadm_fs.create_dir(AGENT_DIR)
+
+ payload = json.dumps({'counter': 3,
+ 'config': {s: f'{s} text' for s in agent.required_files if s != agent.required_files[1]}})
+
+ class FakeSocket:
+
+ def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, fileno=None):
+ self.family = family
+ self.type = type
+
+ def bind(*args, **kwargs):
+ return
+
+ def settimeout(*args, **kwargs):
+ return
+
+ def listen(*args, **kwargs):
+ return
+
+ class FakeSecureSocket:
+
+ def __init__(self, pload):
+ self.payload = pload
+ self._conn = FakeConn(self.payload)
+ self.accepted = False
+
+ def accept(self):
+ # to make mgr listener run loop stop running,
+ # set it to stop after accepting a "connection"
+ # on our fake socket so only one iteration of the loop
+ # actually happens
+ agent.mgr_listener.stop = True
+ accepted = True
+ return self._conn, None
+
+ def load_cert_chain(*args, **kwargs):
+ return
+
+ def load_verify_locations(*args, **kwargs):
+ return
+
+ class FakeConn:
+
+ def __init__(self, payload: str = ''):
+ payload_len_str = str(len(payload.encode('utf-8')))
+ while len(payload_len_str.encode('utf-8')) < 10:
+ payload_len_str = '0' + payload_len_str
+ self.payload = (payload_len_str + payload).encode('utf-8')
+ self.buffer_len = len(self.payload)
+
+ def recv(self, len: Optional[int] = None):
+ if not len or len >= self.buffer_len:
+ ret = self.payload
+ self.payload = b''
+ self.buffer_len = 0
+ return ret
+ else:
+ ret = self.payload[:len]
+ self.payload = self.payload[len:]
+ self.buffer_len = self.buffer_len - len
+ return ret
+
+ FSS_good_data = FakeSecureSocket(payload)
+ FSS_bad_json = FakeSecureSocket('bad json')
+ _socket = FakeSocket
+ agent.listener_port = 7777
+
+ # first run, should successfully receive properly structured json payload
+ _wrap_context.side_effect = [FSS_good_data]
+ agent.mgr_listener.stop = False
+ FakeConn.send = mock.Mock(return_value=None)
+ agent.mgr_listener.run()
+
+ # verify payload was correctly extracted
+ assert _handle_json_payload.called_with(json.loads(payload))
+ FakeConn.send.assert_called_once_with(b'ACK')
+
+ # second run, with bad json data received
+ _wrap_context.side_effect = [FSS_bad_json]
+ agent.mgr_listener.stop = False
+ FakeConn.send = mock.Mock(return_value=None)
+ agent.mgr_listener.run()
+ FakeConn.send.assert_called_once_with(b'Failed to extract json payload from message: Expecting value: line 1 column 1 (char 0)')
+
+ # third run, no proper length as beginning og payload
+ FSS_no_length = FakeSecureSocket(payload)
+ FSS_no_length.payload = FSS_no_length.payload[10:]
+ FSS_no_length._conn.payload = FSS_no_length._conn.payload[10:]
+ FSS_no_length._conn.buffer_len -= 10
+ _wrap_context.side_effect = [FSS_no_length]
+ agent.mgr_listener.stop = False
+ FakeConn.send = mock.Mock(return_value=None)
+ agent.mgr_listener.run()
+ FakeConn.send.assert_called_once_with(b'Failed to extract length of payload from message: invalid literal for int() with base 10: \'{"counter"\'')
+
+ # some exception handling for full coverage
+ FSS_exc_testing = FakeSecureSocket(payload)
+ FSS_exc_testing.accept = mock.MagicMock()
+
+ def _accept(*args, **kwargs):
+ if not FSS_exc_testing.accepted:
+ FSS_exc_testing.accepted = True
+ raise socket.timeout()
+ else:
+ agent.mgr_listener.stop = True
+ raise Exception()
+
+ FSS_exc_testing.accept.side_effect = _accept
+ _wrap_context.side_effect = [FSS_exc_testing]
+ agent.mgr_listener.stop = False
+ FakeConn.send = mock.Mock(return_value=None)
+ agent.mgr_listener.run()
+ FakeConn.send.assert_not_called()
+ FSS_exc_testing.accept.call_count == 3
+
+
+@mock.patch("cephadm.CephadmAgent._get_ls")
+def test_gatherer_update_func(_get_ls, cephadm_fs):
+ with with_cephadm_ctx([]) as ctx:
+ ctx.fsid = FSID
+ agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+ cephadm_fs.create_dir(AGENT_DIR)
+
+ def _sample_func():
+ return 7
+
+ agent.ls_gatherer.func()
+ _get_ls.assert_called()
+
+ _get_ls = mock.MagicMock()
+ agent.ls_gatherer.update_func(_sample_func)
+ out = agent.ls_gatherer.func()
+ assert out == 7
+ _get_ls.assert_not_called()
+
+
+@mock.patch("cephadm.CephadmAgent.wakeup")
+@mock.patch("time.monotonic")
+@mock.patch("threading.Event.wait")
+def test_gatherer_run(_wait, _time, _agent_wakeup, cephadm_fs):
+ with with_cephadm_ctx([]) as ctx:
+ ctx.fsid = FSID
+ agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID)
+ cephadm_fs.create_dir(AGENT_DIR)
+ agent.loop_interval = 30
+ agent.ack = 23
+
+ _sample_func = lambda *args, **kwargs: ('sample out', True)
+ agent.ls_gatherer.update_func(_sample_func)
+ agent.ls_gatherer.ack = 20
+ agent.ls_gatherer.stop = False
+
+ def _fake_clear(*args, **kwargs):
+ agent.ls_gatherer.stop = True
+
+ _time.side_effect = [0, 20, 0, 20, 0, 20] # start at time 0, complete at time 20
+ _wait.return_value = None
+
+ with mock.patch("threading.Event.clear") as _clear:
+ _clear.side_effect = _fake_clear
+ agent.ls_gatherer.run()
+
+ _wait.assert_called_with(10) # agent loop_interval - run time
+ assert agent.ls_gatherer.data == 'sample out'
+ assert agent.ls_gatherer.ack == 23
+ _agent_wakeup.assert_called_once()
+ _clear.assert_called_once()
+
+ _exc_func = lambda *args, **kwargs: Exception()
+ agent.ls_gatherer.update_func(_exc_func)
+ agent.ls_gatherer.ack = 20
+ agent.ls_gatherer.stop = False
+
+ with mock.patch("threading.Event.clear") as _clear:
+ _clear.side_effect = _fake_clear
+ agent.ls_gatherer.run()
+ assert agent.ls_gatherer.data is None
+ assert agent.ls_gatherer.ack == agent.ack
+ # should have run full loop despite exception
+ _clear.assert_called_once()
+
+ # test general exception for full coverage
+ _agent_wakeup.side_effect = [Exception()]
+ agent.ls_gatherer.update_func(_sample_func)
+ agent.ls_gatherer.stop = False
+ # just to force only one iteration
+ _time.side_effect = _fake_clear
+ with mock.patch("threading.Event.clear") as _clear:
+ _clear.side_effect = Exception()
+ agent.ls_gatherer.run()
+ assert agent.ls_gatherer.data == 'sample out'
+ assert agent.ls_gatherer.ack == agent.ack
+ # should not have gotten to end of loop
+ _clear.assert_not_called()
+
+
+@mock.patch("cephadm.CephadmAgent.run")
+def test_command_agent(_agent_run, cephadm_fs):
+ with with_cephadm_ctx([]) as ctx:
+ ctx.fsid = FSID
+ ctx.daemon_id = AGENT_ID
+
+ with pytest.raises(Exception, match=f"Agent daemon directory {AGENT_DIR} does not exist. Perhaps agent was never deployed?"):
+ _cephadm.command_agent(ctx)
+
+ cephadm_fs.create_dir(AGENT_DIR)
+ _cephadm.command_agent(ctx)
+ _agent_run.assert_called()
diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py
new file mode 100644
index 000000000..d310215f6
--- /dev/null
+++ b/src/cephadm/tests/test_cephadm.py
@@ -0,0 +1,2708 @@
+# type: ignore
+
+import errno
+import json
+import mock
+import os
+import pytest
+import socket
+import unittest
+from textwrap import dedent
+
+from .fixtures import (
+ cephadm_fs,
+ mock_docker,
+ mock_podman,
+ with_cephadm_ctx,
+ mock_bad_firewalld,
+ import_cephadm,
+)
+
+from pyfakefs import fake_filesystem
+from pyfakefs import fake_filesystem_unittest
+
+_cephadm = import_cephadm()
+
+
+def get_ceph_conf(
+ fsid='00000000-0000-0000-0000-0000deadbeef',
+ mon_host='[v2:192.168.1.1:3300/0,v1:192.168.1.1:6789/0]'):
+ return f'''
+# minimal ceph.conf for {fsid}
+[global]
+ fsid = {fsid}
+ mon_host = {mon_host}
+'''
+
+class TestCephAdm(object):
+
+ def test_docker_unit_file(self):
+ ctx = _cephadm.CephadmContext()
+ ctx.container_engine = mock_docker()
+ r = _cephadm.get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
+ assert 'Requires=docker.service' in r
+ ctx.container_engine = mock_podman()
+ r = _cephadm.get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
+ assert 'Requires=docker.service' not in r
+
+ @mock.patch('cephadm.logger')
+ def test_attempt_bind(self, _logger):
+ ctx = None
+ address = None
+ port = 0
+
+ def os_error(errno):
+ _os_error = OSError()
+ _os_error.errno = errno
+ return _os_error
+
+ for side_effect, expected_exception in (
+ (os_error(errno.EADDRINUSE), _cephadm.PortOccupiedError),
+ (os_error(errno.EAFNOSUPPORT), OSError),
+ (os_error(errno.EADDRNOTAVAIL), OSError),
+ (None, None),
+ ):
+ _socket = mock.Mock()
+ _socket.bind.side_effect = side_effect
+ try:
+ _cephadm.attempt_bind(ctx, _socket, address, port)
+ except Exception as e:
+ assert isinstance(e, expected_exception)
+ else:
+ if expected_exception is not None:
+ assert False
+
+ @mock.patch('cephadm.attempt_bind')
+ @mock.patch('cephadm.logger')
+ def test_port_in_use(self, _logger, _attempt_bind):
+ empty_ctx = None
+
+ assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('0.0.0.0', 9100)) == False
+
+ _attempt_bind.side_effect = _cephadm.PortOccupiedError('msg')
+ assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('0.0.0.0', 9100)) == True
+
+ os_error = OSError()
+ os_error.errno = errno.EADDRNOTAVAIL
+ _attempt_bind.side_effect = os_error
+ assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('0.0.0.0', 9100)) == False
+
+ os_error = OSError()
+ os_error.errno = errno.EAFNOSUPPORT
+ _attempt_bind.side_effect = os_error
+ assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('0.0.0.0', 9100)) == False
+
+ @mock.patch('cephadm.socket.socket.bind')
+ @mock.patch('cephadm.logger')
+ def test_port_in_use_special_cases(self, _logger, _bind):
+ # port_in_use has special handling for
+ # EAFNOSUPPORT and EADDRNOTAVAIL errno OSErrors.
+ # If we get those specific errors when attempting
+ # to bind to the ip:port we should not say the
+ # port is in use
+
+ def os_error(errno):
+ _os_error = OSError()
+ _os_error.errno = errno
+ return _os_error
+
+ _bind.side_effect = os_error(errno.EADDRNOTAVAIL)
+ in_use = _cephadm.port_in_use(None, _cephadm.EndPoint('1.2.3.4', 10000))
+ assert in_use == False
+
+ _bind.side_effect = os_error(errno.EAFNOSUPPORT)
+ in_use = _cephadm.port_in_use(None, _cephadm.EndPoint('1.2.3.4', 10000))
+ assert in_use == False
+
+ # this time, have it raise the actual port taken error
+ # so it should report the port is in use
+ _bind.side_effect = os_error(errno.EADDRINUSE)
+ in_use = _cephadm.port_in_use(None, _cephadm.EndPoint('1.2.3.4', 10000))
+ assert in_use == True
+
+ @mock.patch('cephadm.attempt_bind')
+ @mock.patch('cephadm.logger')
+ def test_port_in_use_with_specific_ips(self, _logger, _attempt_bind):
+ empty_ctx = None
+
+ def _fake_attempt_bind(ctx, s: socket.socket, addr: str, port: int) -> None:
+ occupied_error = _cephadm.PortOccupiedError('msg')
+ if addr.startswith('200'):
+ raise occupied_error
+ if addr.startswith('100'):
+ if port == 4567:
+ raise occupied_error
+
+ _attempt_bind.side_effect = _fake_attempt_bind
+
+ assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('200.0.0.0', 9100)) == True
+ assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('100.0.0.0', 9100)) == False
+ assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('100.0.0.0', 4567)) == True
+ assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('155.0.0.0', 4567)) == False
+
+ @mock.patch('socket.socket')
+ @mock.patch('cephadm.logger')
+ def test_check_ip_port_success(self, _logger, _socket):
+ ctx = _cephadm.CephadmContext()
+ ctx.skip_ping_check = False # enables executing port check with `check_ip_port`
+
+ for address, address_family in (
+ ('0.0.0.0', socket.AF_INET),
+ ('::', socket.AF_INET6),
+ ):
+ try:
+ _cephadm.check_ip_port(ctx, _cephadm.EndPoint(address, 9100))
+ except:
+ assert False
+ else:
+ assert _socket.call_args == mock.call(address_family, socket.SOCK_STREAM)
+
+ @mock.patch('socket.socket')
+ @mock.patch('cephadm.logger')
+ def test_check_ip_port_failure(self, _logger, _socket):
+ ctx = _cephadm.CephadmContext()
+ ctx.skip_ping_check = False # enables executing port check with `check_ip_port`
+
+ def os_error(errno):
+ _os_error = OSError()
+ _os_error.errno = errno
+ return _os_error
+
+ for address, address_family in (
+ ('0.0.0.0', socket.AF_INET),
+ ('::', socket.AF_INET6),
+ ):
+ for side_effect, expected_exception in (
+ (os_error(errno.EADDRINUSE), _cephadm.PortOccupiedError),
+ (os_error(errno.EADDRNOTAVAIL), OSError),
+ (os_error(errno.EAFNOSUPPORT), OSError),
+ (None, None),
+ ):
+ mock_socket_obj = mock.Mock()
+ mock_socket_obj.bind.side_effect = side_effect
+ _socket.return_value = mock_socket_obj
+ try:
+ _cephadm.check_ip_port(ctx, _cephadm.EndPoint(address, 9100))
+ except Exception as e:
+ assert isinstance(e, expected_exception)
+ else:
+ if side_effect is not None:
+ assert False
+
+
+ def test_is_not_fsid(self):
+ assert not _cephadm.is_fsid('no-uuid')
+
+ def test_is_fsid(self):
+ assert _cephadm.is_fsid('e863154d-33c7-4350-bca5-921e0467e55b')
+
+ def test__get_parser_image(self):
+ args = _cephadm._parse_args(['--image', 'foo', 'version'])
+ assert args.image == 'foo'
+
+ def test_check_required_global_args(self):
+ ctx = _cephadm.CephadmContext()
+ mock_fn = mock.Mock()
+ mock_fn.return_value = 0
+ require_image = _cephadm.require_image(mock_fn)
+
+ with pytest.raises(_cephadm.Error, match='This command requires the global --image option to be set'):
+ require_image(ctx)
+
+ ctx.image = 'sample-image'
+ require_image(ctx)
+
+ @mock.patch('cephadm.logger')
+ def test_parse_mem_usage(self, _logger):
+ len, summary = _cephadm._parse_mem_usage(0, 'c6290e3f1489,-- / --')
+ assert summary == {}
+
+ def test_CustomValidation(self):
+ assert _cephadm._parse_args(['deploy', '--name', 'mon.a', '--fsid', 'fsid'])
+
+ with pytest.raises(SystemExit):
+ _cephadm._parse_args(['deploy', '--name', 'wrong', '--fsid', 'fsid'])
+
+ @pytest.mark.parametrize("test_input, expected", [
+ ("1.6.2", (1,6,2)),
+ ("1.6.2-stable2", (1,6,2)),
+ ])
+ def test_parse_podman_version(self, test_input, expected):
+ assert _cephadm._parse_podman_version(test_input) == expected
+
+ def test_parse_podman_version_invalid(self):
+ with pytest.raises(ValueError) as res:
+ _cephadm._parse_podman_version('inval.id')
+ assert 'inval' in str(res.value)
+
+ @mock.patch('cephadm.logger')
+ def test_is_ipv6(self, _logger):
+ for good in ("[::1]", "::1",
+ "fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"):
+ assert _cephadm.is_ipv6(good)
+ for bad in ("127.0.0.1",
+ "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffg",
+ "1:2:3:4:5:6:7:8:9", "fd00::1::1", "[fg::1]"):
+ assert not _cephadm.is_ipv6(bad)
+
+ def test_unwrap_ipv6(self):
+ def unwrap_test(address, expected):
+ assert _cephadm.unwrap_ipv6(address) == expected
+
+ tests = [
+ ('::1', '::1'), ('[::1]', '::1'),
+ ('[fde4:8dba:82e1:0:5054:ff:fe6a:357]', 'fde4:8dba:82e1:0:5054:ff:fe6a:357'),
+ ('can actually be any string', 'can actually be any string'),
+ ('[but needs to be stripped] ', '[but needs to be stripped] ')]
+ for address, expected in tests:
+ unwrap_test(address, expected)
+
+ def test_wrap_ipv6(self):
+ def wrap_test(address, expected):
+ assert _cephadm.wrap_ipv6(address) == expected
+
+ tests = [
+ ('::1', '[::1]'), ('[::1]', '[::1]'),
+ ('fde4:8dba:82e1:0:5054:ff:fe6a:357',
+ '[fde4:8dba:82e1:0:5054:ff:fe6a:357]'),
+ ('myhost.example.com', 'myhost.example.com'),
+ ('192.168.0.1', '192.168.0.1'),
+ ('', ''), ('fd00::1::1', 'fd00::1::1')]
+ for address, expected in tests:
+ wrap_test(address, expected)
+
+ @mock.patch('cephadm.Firewalld', mock_bad_firewalld)
+ @mock.patch('cephadm.logger')
+ def test_skip_firewalld(self, _logger, cephadm_fs):
+ """
+ test --skip-firewalld actually skips changing firewall
+ """
+
+ ctx = _cephadm.CephadmContext()
+ with pytest.raises(Exception):
+ _cephadm.update_firewalld(ctx, 'mon')
+
+ ctx.skip_firewalld = True
+ _cephadm.update_firewalld(ctx, 'mon')
+
+ ctx.skip_firewalld = False
+ with pytest.raises(Exception):
+ _cephadm.update_firewalld(ctx, 'mon')
+
+ ctx = _cephadm.CephadmContext()
+ ctx.ssl_dashboard_port = 8888
+ ctx.dashboard_key = None
+ ctx.dashboard_password_noupdate = True
+ ctx.initial_dashboard_password = 'password'
+ ctx.initial_dashboard_user = 'User'
+ with pytest.raises(Exception):
+ _cephadm.prepare_dashboard(ctx, 0, 0, lambda _, extra_mounts=None, ___=None : '5', lambda : None)
+
+ ctx.skip_firewalld = True
+ _cephadm.prepare_dashboard(ctx, 0, 0, lambda _, extra_mounts=None, ___=None : '5', lambda : None)
+
+ ctx.skip_firewalld = False
+ with pytest.raises(Exception):
+ _cephadm.prepare_dashboard(ctx, 0, 0, lambda _, extra_mounts=None, ___=None : '5', lambda : None)
+
+ @mock.patch('cephadm.logger')
+ @mock.patch('cephadm.fetch_custom_config_files')
+ @mock.patch('cephadm.get_container')
+ def test_get_deployment_container(self, _get_container, _get_config, _logger):
+ """
+ test get_deployment_container properly makes use of extra container args and custom conf files
+ """
+
+ ctx = _cephadm.CephadmContext()
+ ctx.config_json = '-'
+ ctx.extra_container_args = [
+ '--pids-limit=12345',
+ '--something',
+ ]
+ ctx.data_dir = 'data'
+ _get_config.return_value = [
+ {
+ 'mount_path': '/etc/testing.str',
+ 'content': 'this\nis\na\nstring',
+ }
+ ]
+ _get_container.return_value = _cephadm.CephContainer.for_daemon(
+ ctx,
+ fsid='9b9d7609-f4d5-4aba-94c8-effa764d96c9',
+ daemon_type='grafana',
+ daemon_id='host1',
+ entrypoint='',
+ args=[],
+ container_args=[],
+ volume_mounts={},
+ bind_mounts=[],
+ envs=[],
+ privileged=False,
+ ptrace=False,
+ host_network=True,
+ )
+ c = _cephadm.get_deployment_container(ctx,
+ '9b9d7609-f4d5-4aba-94c8-effa764d96c9',
+ 'grafana',
+ 'host1',)
+
+ assert '--pids-limit=12345' in c.container_args
+ assert '--something' in c.container_args
+ assert os.path.join('data', '9b9d7609-f4d5-4aba-94c8-effa764d96c9', 'custom_config_files', 'grafana.host1', 'testing.str') in c.volume_mounts
+ assert c.volume_mounts[os.path.join('data', '9b9d7609-f4d5-4aba-94c8-effa764d96c9', 'custom_config_files', 'grafana.host1', 'testing.str')] == '/etc/testing.str'
+
+ @mock.patch('cephadm.logger')
+ @mock.patch('cephadm.FileLock')
+ @mock.patch('cephadm.deploy_daemon')
+ @mock.patch('cephadm.fetch_configs')
+ @mock.patch('cephadm.make_var_run')
+ @mock.patch('cephadm.migrate_sysctl_dir')
+ @mock.patch('cephadm.check_unit', lambda *args, **kwargs: (None, 'running', None))
+ @mock.patch('cephadm.get_unit_name', lambda *args, **kwargs: 'mon-unit-name')
+ @mock.patch('cephadm.get_deployment_container')
+ @mock.patch('cephadm.read_configuration_source', lambda c: {})
+ @mock.patch('cephadm.apply_deploy_config_to_ctx', lambda d, c: None)
+ @mock.patch('cephadm.extract_uid_gid', lambda *args, **kwargs: ('ceph', 'ceph'))
+ def test_mon_crush_location(self, _get_deployment_container, _migrate_sysctl, _make_var_run, _fetch_configs, _deploy_daemon, _file_lock, _logger):
+ """
+ test that crush location for mon is set if it is included in config_json
+ """
+
+ ctx = _cephadm.CephadmContext()
+ ctx.name = 'mon.test'
+ ctx.fsid = '9b9d7609-f4d5-4aba-94c8-effa764d96c9'
+ ctx.reconfig = False
+ ctx.container_engine = mock_docker()
+ ctx.allow_ptrace = True
+ ctx.config_json = '-'
+ ctx.osd_fsid = '0'
+ ctx.tcp_ports = '3300 6789'
+ _fetch_configs.return_value = {
+ 'crush_location': 'database=a'
+ }
+
+ _get_deployment_container.return_value = _cephadm.CephContainer.for_daemon(
+ ctx,
+ fsid='9b9d7609-f4d5-4aba-94c8-effa764d96c9',
+ daemon_type='mon',
+ daemon_id='test',
+ entrypoint='',
+ args=[],
+ container_args=[],
+ volume_mounts={},
+ bind_mounts=[],
+ envs=[],
+ privileged=False,
+ ptrace=False,
+ host_network=True,
+ )
+
+ def _crush_location_checker(ctx, fsid, daemon_type, daemon_id, container, uid, gid, **kwargs):
+ print(container.args)
+ raise Exception(' '.join(container.args))
+
+ _deploy_daemon.side_effect = _crush_location_checker
+
+ with pytest.raises(Exception, match='--set-crush-location database=a'):
+ _cephadm.command_deploy_from(ctx)
+
+ @mock.patch('cephadm.logger')
+ @mock.patch('cephadm.fetch_custom_config_files')
+ def test_write_custom_conf_files(self, _get_config, _logger, cephadm_fs):
+ """
+ test _write_custom_conf_files writes the conf files correctly
+ """
+
+ ctx = _cephadm.CephadmContext()
+ ctx.config_json = '-'
+ ctx.data_dir = _cephadm.DATA_DIR
+ _get_config.return_value = [
+ {
+ 'mount_path': '/etc/testing.str',
+ 'content': 'this\nis\na\nstring',
+ },
+ {
+ 'mount_path': '/etc/testing.conf',
+ 'content': 'very_cool_conf_setting: very_cool_conf_value\nx: y',
+ },
+ {
+ 'mount_path': '/etc/no-content.conf',
+ },
+ ]
+ _cephadm._write_custom_conf_files(ctx, 'mon', 'host1', 'fsid', 0, 0)
+ with open(os.path.join(_cephadm.DATA_DIR, 'fsid', 'custom_config_files', 'mon.host1', 'testing.str'), 'r') as f:
+ assert 'this\nis\na\nstring' == f.read()
+ with open(os.path.join(_cephadm.DATA_DIR, 'fsid', 'custom_config_files', 'mon.host1', 'testing.conf'), 'r') as f:
+ assert 'very_cool_conf_setting: very_cool_conf_value\nx: y' == f.read()
+ with pytest.raises(FileNotFoundError):
+ open(os.path.join(_cephadm.DATA_DIR, 'fsid', 'custom_config_files', 'mon.host1', 'no-content.conf'), 'r')
+
+ @mock.patch('cephadm.call_throws')
+ @mock.patch('cephadm.get_parm')
+ @mock.patch('cephadm.logger')
+ def test_registry_login(self, _logger, _get_parm, _call_throws):
+ # test normal valid login with url, username and password specified
+ _call_throws.return_value = '', '', 0
+ ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
+ ['registry-login', '--registry-url', 'sample-url',
+ '--registry-username', 'sample-user', '--registry-password',
+ 'sample-pass'])
+ ctx.container_engine = mock_docker()
+ retval = _cephadm.command_registry_login(ctx)
+ assert retval == 0
+
+ # test bad login attempt with invalid arguments given
+ ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
+ ['registry-login', '--registry-url', 'bad-args-url'])
+ with pytest.raises(Exception) as e:
+ assert _cephadm.command_registry_login(ctx)
+ assert str(e.value) == ('Invalid custom registry arguments received. To login to a custom registry include '
+ '--registry-url, --registry-username and --registry-password options or --registry-json option')
+
+ # test normal valid login with json file
+ _get_parm.return_value = {"url": "sample-url", "username": "sample-username", "password": "sample-password"}
+ ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
+ ['registry-login', '--registry-json', 'sample-json'])
+ ctx.container_engine = mock_docker()
+ retval = _cephadm.command_registry_login(ctx)
+ assert retval == 0
+
+ # test bad login attempt with bad json file
+ _get_parm.return_value = {"bad-json": "bad-json"}
+ ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
+ ['registry-login', '--registry-json', 'sample-json'])
+ with pytest.raises(Exception) as e:
+ assert _cephadm.command_registry_login(ctx)
+ assert str(e.value) == ("json provided for custom registry login did not include all necessary fields. "
+ "Please setup json file as\n"
+ "{\n"
+ " \"url\": \"REGISTRY_URL\",\n"
+ " \"username\": \"REGISTRY_USERNAME\",\n"
+ " \"password\": \"REGISTRY_PASSWORD\"\n"
+ "}\n")
+
+ # test login attempt with valid arguments where login command fails
+ _call_throws.side_effect = Exception
+ ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
+ ['registry-login', '--registry-url', 'sample-url',
+ '--registry-username', 'sample-user', '--registry-password',
+ 'sample-pass'])
+ with pytest.raises(Exception) as e:
+ _cephadm.command_registry_login(ctx)
+ assert str(e.value) == "Failed to login to custom registry @ sample-url as sample-user with given password"
+
+ def test_get_image_info_from_inspect(self):
+ # podman
+ out = """204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1,[docker.io/ceph/ceph@sha256:1cc9b824e1b076cdff52a9aa3f0cc8557d879fb2fbbba0cafed970aca59a3992]"""
+ r = _cephadm.get_image_info_from_inspect(out, 'registry/ceph/ceph:latest')
+ print(r)
+ assert r == {
+ 'image_id': '204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1',
+ 'repo_digests': ['docker.io/ceph/ceph@sha256:1cc9b824e1b076cdff52a9aa3f0cc8557d879fb2fbbba0cafed970aca59a3992']
+ }
+
+ # docker
+ out = """sha256:16f4549cf7a8f112bbebf7946749e961fbbd1b0838627fe619aab16bc17ce552,[quay.ceph.io/ceph-ci/ceph@sha256:4e13da36c1bd6780b312a985410ae678984c37e6a9493a74c87e4a50b9bda41f]"""
+ r = _cephadm.get_image_info_from_inspect(out, 'registry/ceph/ceph:latest')
+ assert r == {
+ 'image_id': '16f4549cf7a8f112bbebf7946749e961fbbd1b0838627fe619aab16bc17ce552',
+ 'repo_digests': ['quay.ceph.io/ceph-ci/ceph@sha256:4e13da36c1bd6780b312a985410ae678984c37e6a9493a74c87e4a50b9bda41f']
+ }
+
+ # multiple digests (podman)
+ out = """e935122ab143a64d92ed1fbb27d030cf6e2f0258207be1baf1b509c466aeeb42,[docker.io/prom/prometheus@sha256:e4ca62c0d62f3e886e684806dfe9d4e0cda60d54986898173c1083856cfda0f4 docker.io/prom/prometheus@sha256:efd99a6be65885c07c559679a0df4ec709604bcdd8cd83f0d00a1a683b28fb6a]"""
+ r = _cephadm.get_image_info_from_inspect(out, 'registry/prom/prometheus:latest')
+ assert r == {
+ 'image_id': 'e935122ab143a64d92ed1fbb27d030cf6e2f0258207be1baf1b509c466aeeb42',
+ 'repo_digests': [
+ 'docker.io/prom/prometheus@sha256:e4ca62c0d62f3e886e684806dfe9d4e0cda60d54986898173c1083856cfda0f4',
+ 'docker.io/prom/prometheus@sha256:efd99a6be65885c07c559679a0df4ec709604bcdd8cd83f0d00a1a683b28fb6a',
+ ]
+ }
+
+
+ def test_dict_get(self):
+ result = _cephadm.dict_get({'a': 1}, 'a', require=True)
+ assert result == 1
+ result = _cephadm.dict_get({'a': 1}, 'b')
+ assert result is None
+ result = _cephadm.dict_get({'a': 1}, 'b', default=2)
+ assert result == 2
+
+ def test_dict_get_error(self):
+ with pytest.raises(_cephadm.Error):
+ _cephadm.dict_get({'a': 1}, 'b', require=True)
+
+ def test_dict_get_join(self):
+ result = _cephadm.dict_get_join({'foo': ['a', 'b']}, 'foo')
+ assert result == 'a\nb'
+ result = _cephadm.dict_get_join({'foo': [1, 2]}, 'foo')
+ assert result == '1\n2'
+ result = _cephadm.dict_get_join({'bar': 'a'}, 'bar')
+ assert result == 'a'
+ result = _cephadm.dict_get_join({'a': 1}, 'a')
+ assert result == 1
+
+ @mock.patch('os.listdir', return_value=[])
+ @mock.patch('cephadm.logger')
+ def test_infer_local_ceph_image(self, _logger, _listdir):
+ ctx = _cephadm.CephadmContext()
+ ctx.fsid = '00000000-0000-0000-0000-0000deadbeez'
+ ctx.container_engine = mock_podman()
+
+ # make sure the right image is selected when container is found
+ cinfo = _cephadm.ContainerInfo('935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972',
+ 'registry.hub.docker.com/rkachach/ceph:custom-v0.5',
+ '514e6a882f6e74806a5856468489eeff8d7106095557578da96935e4d0ba4d9d',
+ '2022-04-19 13:45:20.97146228 +0000 UTC',
+ '')
+ out = '''quay.ceph.io/ceph-ci/ceph@sha256:87f200536bb887b36b959e887d5984dd7a3f008a23aa1f283ab55d48b22c6185|dad864ee21e9|main|2022-03-23 16:29:19 +0000 UTC
+ quay.ceph.io/ceph-ci/ceph@sha256:b50b130fcda2a19f8507ddde3435bb4722266956e1858ac395c838bc1dcf1c0e|514e6a882f6e|pacific|2022-03-23 15:58:34 +0000 UTC
+ docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
+ with mock.patch('cephadm.call_throws', return_value=(out, '', '')):
+ with mock.patch('cephadm.get_container_info', return_value=cinfo):
+ image = _cephadm.infer_local_ceph_image(ctx, ctx.container_engine)
+ assert image == 'quay.ceph.io/ceph-ci/ceph@sha256:b50b130fcda2a19f8507ddde3435bb4722266956e1858ac395c838bc1dcf1c0e'
+
+ # make sure first valid image is used when no container_info is found
+ out = '''quay.ceph.io/ceph-ci/ceph@sha256:87f200536bb887b36b959e887d5984dd7a3f008a23aa1f283ab55d48b22c6185|dad864ee21e9|main|2022-03-23 16:29:19 +0000 UTC
+ quay.ceph.io/ceph-ci/ceph@sha256:b50b130fcda2a19f8507ddde3435bb4722266956e1858ac395c838bc1dcf1c0e|514e6a882f6e|pacific|2022-03-23 15:58:34 +0000 UTC
+ docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
+ with mock.patch('cephadm.call_throws', return_value=(out, '', '')):
+ with mock.patch('cephadm.get_container_info', return_value=None):
+ image = _cephadm.infer_local_ceph_image(ctx, ctx.container_engine)
+ assert image == 'quay.ceph.io/ceph-ci/ceph@sha256:87f200536bb887b36b959e887d5984dd7a3f008a23aa1f283ab55d48b22c6185'
+
+ # make sure images without digest are discarded (no container_info is found)
+ out = '''quay.ceph.io/ceph-ci/ceph@|||
+ docker.io/ceph/ceph@|||
+ docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
+ with mock.patch('cephadm.call_throws', return_value=(out, '', '')):
+ with mock.patch('cephadm.get_container_info', return_value=None):
+ image = _cephadm.infer_local_ceph_image(ctx, ctx.container_engine)
+ assert image == 'docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508'
+
+
+
+ @pytest.mark.parametrize('daemon_filter, by_name, daemon_list, container_stats, output',
+ [
+ # get container info by type ('mon')
+ (
+ 'mon',
+ False,
+ [
+ {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+ {'name': 'mgr.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+ ],
+ ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,",
+ "",
+ 0),
+ _cephadm.ContainerInfo('935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972',
+ 'registry.hub.docker.com/rkachach/ceph:custom-v0.5',
+ '666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4',
+ '2022-04-19 13:45:20.97146228 +0000 UTC',
+ '')
+ ),
+ # get container info by name ('mon.ceph-node-0')
+ (
+ 'mon.ceph-node-0',
+ True,
+ [
+ {'name': 'mgr.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+ {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+ ],
+ ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,",
+ "",
+ 0),
+ _cephadm.ContainerInfo('935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972',
+ 'registry.hub.docker.com/rkachach/ceph:custom-v0.5',
+ '666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4',
+ '2022-04-19 13:45:20.97146228 +0000 UTC',
+ '')
+ ),
+ # get container info by name (same daemon but two different fsids)
+ (
+ 'mon.ceph-node-0',
+ True,
+ [
+ {'name': 'mon.ceph-node-0', 'fsid': '10000000-0000-0000-0000-0000deadbeef'},
+ {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+ ],
+ ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,",
+ "",
+ 0),
+ _cephadm.ContainerInfo('935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972',
+ 'registry.hub.docker.com/rkachach/ceph:custom-v0.5',
+ '666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4',
+ '2022-04-19 13:45:20.97146228 +0000 UTC',
+ '')
+ ),
+ # get container info by type (bad container stats: 127 code)
+ (
+ 'mon',
+ False,
+ [
+ {'name': 'mon.ceph-node-0', 'fsid': '00000000-FFFF-0000-0000-0000deadbeef'},
+ {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+ ],
+ ("",
+ "",
+ 127),
+ None
+ ),
+ # get container info by name (bad container stats: 127 code)
+ (
+ 'mon.ceph-node-0',
+ True,
+ [
+ {'name': 'mgr.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+ {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+ ],
+ ("",
+ "",
+ 127),
+ None
+ ),
+ # get container info by invalid name (doens't contain '.')
+ (
+ 'mon-ceph-node-0',
+ True,
+ [
+ {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+ {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+ ],
+ ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,",
+ "",
+ 0),
+ None
+ ),
+ # get container info by invalid name (empty)
+ (
+ '',
+ True,
+ [
+ {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+ {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+ ],
+ ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,",
+ "",
+ 0),
+ None
+ ),
+ # get container info by invalid type (empty)
+ (
+ '',
+ False,
+ [
+ {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+ {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+ ],
+ ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,",
+ "",
+ 0),
+ None
+ ),
+ # get container info by name: no match (invalid fsid)
+ (
+ 'mon',
+ False,
+ [
+ {'name': 'mon.ceph-node-0', 'fsid': '00000000-1111-0000-0000-0000deadbeef'},
+ {'name': 'mon.ceph-node-0', 'fsid': '00000000-2222-0000-0000-0000deadbeef'},
+ ],
+ ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,",
+ "",
+ 0),
+ None
+ ),
+ # get container info by name: no match
+ (
+ 'mon.ceph-node-0',
+ True,
+ [],
+ None,
+ None
+ ),
+ # get container info by type: no match
+ (
+ 'mgr',
+ False,
+ [],
+ None,
+ None
+ ),
+ ])
+ @mock.patch('cephadm.logger')
+ def test_get_container_info(self, _logger, daemon_filter, by_name, daemon_list, container_stats, output):
+ ctx = _cephadm.CephadmContext()
+ ctx.fsid = '00000000-0000-0000-0000-0000deadbeef'
+ ctx.container_engine = mock_podman()
+ with mock.patch('cephadm.list_daemons', return_value=daemon_list):
+ with mock.patch('cephadm.get_container_stats', return_value=container_stats):
+ assert _cephadm.get_container_info(ctx, daemon_filter, by_name) == output
+
+ def test_should_log_to_journald(self):
+ ctx = _cephadm.CephadmContext()
+ # explicit
+ ctx.log_to_journald = True
+ assert _cephadm.should_log_to_journald(ctx)
+
+ ctx.log_to_journald = None
+ # enable if podman support --cgroup=split
+ ctx.container_engine = mock_podman()
+ ctx.container_engine.version = (2, 1, 0)
+ assert _cephadm.should_log_to_journald(ctx)
+
+ # disable on old podman
+ ctx.container_engine.version = (2, 0, 0)
+ assert not _cephadm.should_log_to_journald(ctx)
+
+ # disable on docker
+ ctx.container_engine = mock_docker()
+ assert not _cephadm.should_log_to_journald(ctx)
+
+ def test_normalize_image_digest(self):
+ s = 'myhostname:5000/ceph/ceph@sha256:753886ad9049004395ae990fbb9b096923b5a518b819283141ee8716ddf55ad1'
+ assert _cephadm.normalize_image_digest(s) == s
+
+ s = 'ceph/ceph:latest'
+ assert _cephadm.normalize_image_digest(s) == f'{_cephadm.DEFAULT_REGISTRY}/{s}'
+
+ @pytest.mark.parametrize('fsid, ceph_conf, list_daemons, result, err, ',
+ [
+ (
+ None,
+ None,
+ [],
+ None,
+ None,
+ ),
+ (
+ '00000000-0000-0000-0000-0000deadbeef',
+ None,
+ [],
+ '00000000-0000-0000-0000-0000deadbeef',
+ None,
+ ),
+ (
+ '00000000-0000-0000-0000-0000deadbeef',
+ None,
+ [
+ {'fsid': '10000000-0000-0000-0000-0000deadbeef'},
+ {'fsid': '20000000-0000-0000-0000-0000deadbeef'},
+ ],
+ '00000000-0000-0000-0000-0000deadbeef',
+ None,
+ ),
+ (
+ None,
+ None,
+ [
+ {'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+ ],
+ '00000000-0000-0000-0000-0000deadbeef',
+ None,
+ ),
+ (
+ None,
+ None,
+ [
+ {'fsid': '10000000-0000-0000-0000-0000deadbeef'},
+ {'fsid': '20000000-0000-0000-0000-0000deadbeef'},
+ ],
+ None,
+ r'Cannot infer an fsid',
+ ),
+ (
+ None,
+ get_ceph_conf(fsid='00000000-0000-0000-0000-0000deadbeef'),
+ [],
+ '00000000-0000-0000-0000-0000deadbeef',
+ None,
+ ),
+ (
+ None,
+ get_ceph_conf(fsid='00000000-0000-0000-0000-0000deadbeef'),
+ [
+ {'fsid': '00000000-0000-0000-0000-0000deadbeef'},
+ ],
+ '00000000-0000-0000-0000-0000deadbeef',
+ None,
+ ),
+ (
+ None,
+ get_ceph_conf(fsid='00000000-0000-0000-0000-0000deadbeef'),
+ [
+ {'fsid': '10000000-0000-0000-0000-0000deadbeef'},
+ {'fsid': '20000000-0000-0000-0000-0000deadbeef'},
+ ],
+ None,
+ r'Cannot infer an fsid',
+ ),
+ ])
+ @mock.patch('cephadm.call')
+ @mock.patch('cephadm.logger')
+ def test_infer_fsid(self, _logger, _call, fsid, ceph_conf, list_daemons, result, err, cephadm_fs):
+ # build the context
+ ctx = _cephadm.CephadmContext()
+ ctx.fsid = fsid
+
+ # mock the decorator
+ mock_fn = mock.Mock()
+ mock_fn.return_value = 0
+ infer_fsid = _cephadm.infer_fsid(mock_fn)
+
+ # mock the ceph.conf file content
+ if ceph_conf:
+ f = cephadm_fs.create_file('ceph.conf', contents=ceph_conf)
+ ctx.config = f.path
+
+ # test
+ with mock.patch('cephadm.list_daemons', return_value=list_daemons):
+ if err:
+ with pytest.raises(_cephadm.Error, match=err):
+ infer_fsid(ctx)
+ else:
+ infer_fsid(ctx)
+ assert ctx.fsid == result
+
+ @pytest.mark.parametrize('fsid, other_conf_files, config, name, list_daemons, result, ',
+ [
+ # per cluster conf has more precedence than default conf
+ (
+ '00000000-0000-0000-0000-0000deadbeef',
+ [_cephadm.CEPH_DEFAULT_CONF],
+ None,
+ None,
+ [],
+ '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/config/ceph.conf',
+ ),
+ # mon daemon conf has more precedence than cluster conf and default conf
+ (
+ '00000000-0000-0000-0000-0000deadbeef',
+ ['/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/config/ceph.conf',
+ _cephadm.CEPH_DEFAULT_CONF],
+ None,
+ None,
+ [{'name': 'mon.a', 'fsid': '00000000-0000-0000-0000-0000deadbeef', 'style': 'cephadm:v1'}],
+ '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/mon.a/config',
+ ),
+ # daemon conf (--name option) has more precedence than cluster, default and mon conf
+ (
+ '00000000-0000-0000-0000-0000deadbeef',
+ ['/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/config/ceph.conf',
+ '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/mon.a/config',
+ _cephadm.CEPH_DEFAULT_CONF],
+ None,
+ 'osd.0',
+ [{'name': 'mon.a', 'fsid': '00000000-0000-0000-0000-0000deadbeef', 'style': 'cephadm:v1'},
+ {'name': 'osd.0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'}],
+ '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/osd.0/config',
+ ),
+ # user provided conf ('/foo/ceph.conf') more precedence than any other conf
+ (
+ '00000000-0000-0000-0000-0000deadbeef',
+ ['/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/config/ceph.conf',
+ _cephadm.CEPH_DEFAULT_CONF,
+ '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/mon.a/config'],
+ '/foo/ceph.conf',
+ None,
+ [{'name': 'mon.a', 'fsid': '00000000-0000-0000-0000-0000deadbeef', 'style': 'cephadm:v1'}],
+ '/foo/ceph.conf',
+ ),
+ ])
+ @mock.patch('cephadm.call')
+ @mock.patch('cephadm.logger')
+ def test_infer_config_precedence(self, _logger, _call, other_conf_files, fsid, config, name, list_daemons, result, cephadm_fs):
+ # build the context
+ ctx = _cephadm.CephadmContext()
+ ctx.fsid = fsid
+ ctx.config = config
+ ctx.name = name
+
+ # mock the decorator
+ mock_fn = mock.Mock()
+ mock_fn.return_value = 0
+ infer_config = _cephadm.infer_config(mock_fn)
+
+ # mock the config file
+ cephadm_fs.create_file(result)
+
+ # mock other potential config files
+ for f in other_conf_files:
+ cephadm_fs.create_file(f)
+
+ # test
+ with mock.patch('cephadm.list_daemons', return_value=list_daemons):
+ infer_config(ctx)
+ assert ctx.config == result
+
+ @pytest.mark.parametrize('fsid, config, name, list_daemons, result, ',
+ [
+ (
+ None,
+ '/foo/bar.conf',
+ None,
+ [],
+ '/foo/bar.conf',
+ ),
+ (
+ '00000000-0000-0000-0000-0000deadbeef',
+ None,
+ None,
+ [],
+ _cephadm.CEPH_DEFAULT_CONF,
+ ),
+ (
+ '00000000-0000-0000-0000-0000deadbeef',
+ None,
+ None,
+ [],
+ '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/config/ceph.conf',
+ ),
+ (
+ '00000000-0000-0000-0000-0000deadbeef',
+ None,
+ None,
+ [{'name': 'mon.a', 'fsid': '00000000-0000-0000-0000-0000deadbeef', 'style': 'cephadm:v1'}],
+ '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/mon.a/config',
+ ),
+ (
+ '00000000-0000-0000-0000-0000deadbeef',
+ None,
+ None,
+ [{'name': 'mon.a', 'fsid': 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa', 'style': 'cephadm:v1'}],
+ _cephadm.CEPH_DEFAULT_CONF,
+ ),
+ (
+ '00000000-0000-0000-0000-0000deadbeef',
+ None,
+ None,
+ [{'name': 'mon.a', 'fsid': '00000000-0000-0000-0000-0000deadbeef', 'style': 'legacy'}],
+ _cephadm.CEPH_DEFAULT_CONF,
+ ),
+ (
+ '00000000-0000-0000-0000-0000deadbeef',
+ None,
+ None,
+ [{'name': 'osd.0'}],
+ _cephadm.CEPH_DEFAULT_CONF,
+ ),
+ (
+ '00000000-0000-0000-0000-0000deadbeef',
+ '/foo/bar.conf',
+ 'mon.a',
+ [{'name': 'mon.a', 'style': 'cephadm:v1'}],
+ '/foo/bar.conf',
+ ),
+ (
+ '00000000-0000-0000-0000-0000deadbeef',
+ None,
+ 'mon.a',
+ [],
+ '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/mon.a/config',
+ ),
+ (
+ '00000000-0000-0000-0000-0000deadbeef',
+ None,
+ 'osd.0',
+ [],
+ '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/osd.0/config',
+ ),
+ (
+ None,
+ None,
+ None,
+ [],
+ _cephadm.CEPH_DEFAULT_CONF,
+ ),
+ ])
+ @mock.patch('cephadm.call')
+ @mock.patch('cephadm.logger')
+ def test_infer_config(self, _logger, _call, fsid, config, name, list_daemons, result, cephadm_fs):
+ # build the context
+ ctx = _cephadm.CephadmContext()
+ ctx.fsid = fsid
+ ctx.config = config
+ ctx.name = name
+
+ # mock the decorator
+ mock_fn = mock.Mock()
+ mock_fn.return_value = 0
+ infer_config = _cephadm.infer_config(mock_fn)
+
+ # mock the config file
+ cephadm_fs.create_file(result)
+
+ # test
+ with mock.patch('cephadm.list_daemons', return_value=list_daemons):
+ infer_config(ctx)
+ assert ctx.config == result
+
+ @mock.patch('cephadm.call')
+ def test_extract_uid_gid_fail(self, _call):
+ err = """Error: container_linux.go:370: starting container process caused: process_linux.go:459: container init caused: process_linux.go:422: setting cgroup config for procHooks process caused: Unit libpod-056038e1126191fba41d8a037275136f2d7aeec9710b9ee
+ff792c06d8544b983.scope not found.: OCI runtime error"""
+ _call.return_value = ('', err, 127)
+ ctx = _cephadm.CephadmContext()
+ ctx.container_engine = mock_podman()
+ with pytest.raises(_cephadm.Error, match='OCI'):
+ _cephadm.extract_uid_gid(ctx)
+
+ @pytest.mark.parametrize('test_input, expected', [
+ ([_cephadm.make_fsid(), _cephadm.make_fsid(), _cephadm.make_fsid()], 3),
+ ([_cephadm.make_fsid(), 'invalid-fsid', _cephadm.make_fsid(), '0b87e50c-8e77-11ec-b890-'], 2),
+ (['f6860ec2-8e76-11ec-', '0b87e50c-8e77-11ec-b890-', ''], 0),
+ ([], 0),
+ ])
+ def test_get_ceph_cluster_count(self, test_input, expected):
+ ctx = _cephadm.CephadmContext()
+ with mock.patch('os.listdir', return_value=test_input):
+ assert _cephadm.get_ceph_cluster_count(ctx) == expected
+
+ def test_set_image_minimize_config(self):
+ def throw_cmd(cmd):
+ raise _cephadm.Error(' '.join(cmd))
+ ctx = _cephadm.CephadmContext()
+ ctx.image = 'test_image'
+ ctx.no_minimize_config = True
+ fake_cli = lambda cmd, __=None, ___=None: throw_cmd(cmd)
+ with pytest.raises(_cephadm.Error, match='config set global container_image test_image'):
+ _cephadm.finish_bootstrap_config(
+ ctx=ctx,
+ fsid=_cephadm.make_fsid(),
+ config='',
+ mon_id='a', mon_dir='mon_dir',
+ mon_network=None, ipv6=False,
+ cli=fake_cli,
+ cluster_network=None,
+ ipv6_cluster_network=False
+ )
+
+
+class TestCustomContainer(unittest.TestCase):
+ cc: _cephadm.CustomContainer
+
+ def setUp(self):
+ self.cc = _cephadm.CustomContainer(
+ 'e863154d-33c7-4350-bca5-921e0467e55b',
+ 'container',
+ config_json={
+ 'entrypoint': 'bash',
+ 'gid': 1000,
+ 'args': [
+ '--no-healthcheck',
+ '-p 6800:6800'
+ ],
+ 'envs': ['SECRET=password'],
+ 'ports': [8080, 8443],
+ 'volume_mounts': {
+ '/CONFIG_DIR': '/foo/conf',
+ 'bar/config': '/bar:ro'
+ },
+ 'bind_mounts': [
+ [
+ 'type=bind',
+ 'source=/CONFIG_DIR',
+ 'destination=/foo/conf',
+ ''
+ ],
+ [
+ 'type=bind',
+ 'source=bar/config',
+ 'destination=/bar:ro',
+ 'ro=true'
+ ]
+ ]
+ },
+ image='docker.io/library/hello-world:latest'
+ )
+
+ def test_entrypoint(self):
+ self.assertEqual(self.cc.entrypoint, 'bash')
+
+ def test_uid_gid(self):
+ self.assertEqual(self.cc.uid, 65534)
+ self.assertEqual(self.cc.gid, 1000)
+
+ def test_ports(self):
+ self.assertEqual(self.cc.ports, [8080, 8443])
+
+ def test_get_container_args(self):
+ result = self.cc.get_container_args()
+ self.assertEqual(result, [
+ '--no-healthcheck',
+ '-p 6800:6800'
+ ])
+
+ def test_get_container_envs(self):
+ result = self.cc.get_container_envs()
+ self.assertEqual(result, ['SECRET=password'])
+
+ def test_get_container_mounts(self):
+ result = self.cc.get_container_mounts('/xyz')
+ self.assertDictEqual(result, {
+ '/CONFIG_DIR': '/foo/conf',
+ '/xyz/bar/config': '/bar:ro'
+ })
+
+ def test_get_container_binds(self):
+ result = self.cc.get_container_binds('/xyz')
+ self.assertEqual(result, [
+ [
+ 'type=bind',
+ 'source=/CONFIG_DIR',
+ 'destination=/foo/conf',
+ ''
+ ],
+ [
+ 'type=bind',
+ 'source=/xyz/bar/config',
+ 'destination=/bar:ro',
+ 'ro=true'
+ ]
+ ])
+
+
+class TestMaintenance:
+ systemd_target = "ceph.00000000-0000-0000-0000-000000c0ffee.target"
+ fsid = '0ea8cdd0-1bbf-11ec-a9c7-5254002763fa'
+
+ def test_systemd_target_OK(self, tmp_path):
+ base = tmp_path
+ wants = base / "ceph.target.wants"
+ wants.mkdir()
+ target = wants / TestMaintenance.systemd_target
+ target.touch()
+ ctx = _cephadm.CephadmContext()
+ ctx.unit_dir = str(base)
+
+ assert _cephadm.systemd_target_state(ctx, target.name)
+
+ def test_systemd_target_NOTOK(self, tmp_path):
+ base = tmp_path
+ ctx = _cephadm.CephadmContext()
+ ctx.unit_dir = str(base)
+ assert not _cephadm.systemd_target_state(ctx, TestMaintenance.systemd_target)
+
+ def test_parser_OK(self):
+ args = _cephadm._parse_args(['host-maintenance', 'enter'])
+ assert args.maintenance_action == 'enter'
+
+ def test_parser_BAD(self):
+ with pytest.raises(SystemExit):
+ _cephadm._parse_args(['host-maintenance', 'wah'])
+
+ @mock.patch('os.listdir', return_value=[])
+ @mock.patch('cephadm.call')
+ @mock.patch('cephadm.logger')
+ @mock.patch('cephadm.systemd_target_state')
+ def test_enter_failure_1(self, _target_state, _logger, _call, _listdir):
+ _call.return_value = '', '', 999
+ _target_state.return_value = True
+ ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
+ ['host-maintenance', 'enter', '--fsid', TestMaintenance.fsid])
+ ctx.container_engine = mock_podman()
+ retval = _cephadm.command_maintenance(ctx)
+ assert retval.startswith('failed')
+
+ @mock.patch('os.listdir', return_value=[])
+ @mock.patch('cephadm.call')
+ @mock.patch('cephadm.logger')
+ @mock.patch('cephadm.systemd_target_state')
+ def test_enter_failure_2(self, _target_state, _logger, _call, _listdir):
+ _call.side_effect = [('', '', 0), ('', '', 999), ('', '', 0), ('', '', 999)]
+ _target_state.return_value = True
+ ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
+ ['host-maintenance', 'enter', '--fsid', TestMaintenance.fsid])
+ ctx.container_engine = mock_podman()
+ retval = _cephadm.command_maintenance(ctx)
+ assert retval.startswith('failed')
+
+ @mock.patch('os.listdir', return_value=[])
+ @mock.patch('cephadm.call')
+ @mock.patch('cephadm.logger')
+ @mock.patch('cephadm.systemd_target_state')
+ @mock.patch('cephadm.target_exists')
+ def test_exit_failure_1(self, _target_exists, _target_state, _logger, _call, _listdir):
+ _call.return_value = '', '', 999
+ _target_state.return_value = False
+ _target_exists.return_value = True
+ ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
+ ['host-maintenance', 'exit', '--fsid', TestMaintenance.fsid])
+ ctx.container_engine = mock_podman()
+ retval = _cephadm.command_maintenance(ctx)
+ assert retval.startswith('failed')
+
+ @mock.patch('os.listdir', return_value=[])
+ @mock.patch('cephadm.call')
+ @mock.patch('cephadm.logger')
+ @mock.patch('cephadm.systemd_target_state')
+ @mock.patch('cephadm.target_exists')
+ def test_exit_failure_2(self, _target_exists, _target_state, _logger, _call, _listdir):
+ _call.side_effect = [('', '', 0), ('', '', 999), ('', '', 0), ('', '', 999)]
+ _target_state.return_value = False
+ _target_exists.return_value = True
+ ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
+ ['host-maintenance', 'exit', '--fsid', TestMaintenance.fsid])
+ ctx.container_engine = mock_podman()
+ retval = _cephadm.command_maintenance(ctx)
+ assert retval.startswith('failed')
+
+
+class TestMonitoring(object):
+ @mock.patch('cephadm.call')
+ def test_get_version_alertmanager(self, _call):
+ ctx = _cephadm.CephadmContext()
+ ctx.container_engine = mock_podman()
+ daemon_type = 'alertmanager'
+
+ # binary `prometheus`
+ _call.return_value = '', '{}, version 0.16.1'.format(daemon_type), 0
+ version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type)
+ assert version == '0.16.1'
+
+ # binary `prometheus-alertmanager`
+ _call.side_effect = (
+ ('', '', 1),
+ ('', '{}, version 0.16.1'.format(daemon_type), 0),
+ )
+ version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type)
+ assert version == '0.16.1'
+
+ @mock.patch('cephadm.call')
+ def test_get_version_prometheus(self, _call):
+ ctx = _cephadm.CephadmContext()
+ ctx.container_engine = mock_podman()
+ daemon_type = 'prometheus'
+ _call.return_value = '', '{}, version 0.16.1'.format(daemon_type), 0
+ version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type)
+ assert version == '0.16.1'
+
+ def test_prometheus_external_url(self):
+ ctx = _cephadm.CephadmContext()
+ ctx.config_json = json.dumps({'files': {}, 'retention_time': '15d'})
+ daemon_type = 'prometheus'
+ daemon_id = 'home'
+ fsid = 'aaf5a720-13fe-4a3b-82b9-2d99b7fd9704'
+ args = _cephadm.get_daemon_args(ctx, fsid, daemon_type, daemon_id)
+ assert any([x.startswith('--web.external-url=http://') for x in args])
+
+ @mock.patch('cephadm.call')
+ def test_get_version_node_exporter(self, _call):
+ ctx = _cephadm.CephadmContext()
+ ctx.container_engine = mock_podman()
+ daemon_type = 'node-exporter'
+ _call.return_value = '', '{}, version 0.16.1'.format(daemon_type.replace('-', '_')), 0
+ version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type)
+ assert version == '0.16.1'
+
+ def test_create_daemon_dirs_prometheus(self, cephadm_fs):
+ """
+ Ensures the required and optional files given in the configuration are
+ created and mapped correctly inside the container. Tests absolute and
+ relative file paths given in the configuration.
+ """
+
+ fsid = 'aaf5a720-13fe-4a3b-82b9-2d99b7fd9704'
+ daemon_type = 'prometheus'
+ uid, gid = 50, 50
+ daemon_id = 'home'
+ ctx = _cephadm.CephadmContext()
+ ctx.data_dir = '/somedir'
+ ctx.config_json = json.dumps({
+ 'files': {
+ 'prometheus.yml': 'foo',
+ '/etc/prometheus/alerting/ceph_alerts.yml': 'bar'
+ }
+ })
+
+ _cephadm.create_daemon_dirs(ctx,
+ fsid,
+ daemon_type,
+ daemon_id,
+ uid,
+ gid,
+ config=None,
+ keyring=None)
+
+ prefix = '{data_dir}/{fsid}/{daemon_type}.{daemon_id}'.format(
+ data_dir=ctx.data_dir,
+ fsid=fsid,
+ daemon_type=daemon_type,
+ daemon_id=daemon_id
+ )
+
+ expected = {
+ 'etc/prometheus/prometheus.yml': 'foo',
+ 'etc/prometheus/alerting/ceph_alerts.yml': 'bar',
+ }
+
+ for file,content in expected.items():
+ file = os.path.join(prefix, file)
+ assert os.path.exists(file)
+ with open(file) as f:
+ assert f.read() == content
+
+ # assert uid/gid after redeploy
+ new_uid = uid+1
+ new_gid = gid+1
+ _cephadm.create_daemon_dirs(ctx,
+ fsid,
+ daemon_type,
+ daemon_id,
+ new_uid,
+ new_gid,
+ config=None,
+ keyring=None)
+ for file,content in expected.items():
+ file = os.path.join(prefix, file)
+ assert os.stat(file).st_uid == new_uid
+ assert os.stat(file).st_gid == new_gid
+
+
+class TestBootstrap(object):
+
+ @staticmethod
+ def _get_cmd(*args):
+ return [
+ 'bootstrap',
+ '--allow-mismatched-release',
+ '--skip-prepare-host',
+ '--skip-dashboard',
+ *args,
+ ]
+
+
+###############################################3
+
+ def test_config(self, cephadm_fs):
+ conf_file = 'foo'
+ cmd = self._get_cmd(
+ '--mon-ip', '192.168.1.1',
+ '--skip-mon-network',
+ '--config', conf_file,
+ )
+
+ with with_cephadm_ctx(cmd) as ctx:
+ msg = r'No such file or directory'
+ with pytest.raises(_cephadm.Error, match=msg):
+ _cephadm.command_bootstrap(ctx)
+
+ cephadm_fs.create_file(conf_file)
+ with with_cephadm_ctx(cmd) as ctx:
+ retval = _cephadm.command_bootstrap(ctx)
+ assert retval == 0
+
+ def test_no_mon_addr(self, cephadm_fs):
+ cmd = self._get_cmd()
+ with with_cephadm_ctx(cmd) as ctx:
+ msg = r'must specify --mon-ip or --mon-addrv'
+ with pytest.raises(_cephadm.Error, match=msg):
+ _cephadm.command_bootstrap(ctx)
+
+ def test_skip_mon_network(self, cephadm_fs):
+ cmd = self._get_cmd('--mon-ip', '192.168.1.1')
+
+ with with_cephadm_ctx(cmd, list_networks={}) as ctx:
+ msg = r'--skip-mon-network'
+ with pytest.raises(_cephadm.Error, match=msg):
+ _cephadm.command_bootstrap(ctx)
+
+ cmd += ['--skip-mon-network']
+ with with_cephadm_ctx(cmd, list_networks={}) as ctx:
+ retval = _cephadm.command_bootstrap(ctx)
+ assert retval == 0
+
+ @pytest.mark.parametrize('mon_ip, list_networks, result',
+ [
+ # IPv4
+ (
+ 'eth0',
+ {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+ False,
+ ),
+ (
+ '0.0.0.0',
+ {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+ False,
+ ),
+ (
+ '192.168.1.0',
+ {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+ False,
+ ),
+ (
+ '192.168.1.1',
+ {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+ True,
+ ),
+ (
+ '192.168.1.1:1234',
+ {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+ True,
+ ),
+ (
+ '192.168.1.1:0123',
+ {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+ True,
+ ),
+ # IPv6
+ (
+ '::',
+ {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+ False,
+ ),
+ (
+ '::ffff:192.168.1.0',
+ {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}},
+ False,
+ ),
+ (
+ '::ffff:192.168.1.1',
+ {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}},
+ True,
+ ),
+ (
+ '::ffff:c0a8:101',
+ {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}},
+ True,
+ ),
+ (
+ '[::ffff:c0a8:101]:1234',
+ {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}},
+ True,
+ ),
+ (
+ '[::ffff:c0a8:101]:0123',
+ {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}},
+ True,
+ ),
+ (
+ '0000:0000:0000:0000:0000:FFFF:C0A8:0101',
+ {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}},
+ True,
+ ),
+ ])
+ def test_mon_ip(self, mon_ip, list_networks, result, cephadm_fs):
+ cmd = self._get_cmd('--mon-ip', mon_ip)
+ if not result:
+ with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx:
+ msg = r'--skip-mon-network'
+ with pytest.raises(_cephadm.Error, match=msg):
+ _cephadm.command_bootstrap(ctx)
+ else:
+ with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx:
+ retval = _cephadm.command_bootstrap(ctx)
+ assert retval == 0
+
+ @pytest.mark.parametrize('mon_addrv, list_networks, err',
+ [
+ # IPv4
+ (
+ '192.168.1.1',
+ {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+ r'must use square brackets',
+ ),
+ (
+ '[192.168.1.1]',
+ {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+ r'must include port number',
+ ),
+ (
+ '[192.168.1.1:1234]',
+ {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+ None,
+ ),
+ (
+ '[192.168.1.1:0123]',
+ {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+ None,
+ ),
+ (
+ '[v2:192.168.1.1:3300,v1:192.168.1.1:6789]',
+ {'192.168.1.0/24': {'eth0': ['192.168.1.1']}},
+ None,
+ ),
+ # IPv6
+ (
+ '[::ffff:192.168.1.1:1234]',
+ {'ffff::/64': {'eth0': ['::ffff:c0a8:101']}},
+ None,
+ ),
+ (
+ '[::ffff:192.168.1.1:0123]',
+ {'ffff::/64': {'eth0': ['::ffff:c0a8:101']}},
+ None,
+ ),
+ (
+ '[0000:0000:0000:0000:0000:FFFF:C0A8:0101:1234]',
+ {'ffff::/64': {'eth0': ['::ffff:c0a8:101']}},
+ None,
+ ),
+ (
+ '[v2:0000:0000:0000:0000:0000:FFFF:C0A8:0101:3300,v1:0000:0000:0000:0000:0000:FFFF:C0A8:0101:6789]',
+ {'ffff::/64': {'eth0': ['::ffff:c0a8:101']}},
+ None,
+ ),
+ ])
+ def test_mon_addrv(self, mon_addrv, list_networks, err, cephadm_fs):
+ cmd = self._get_cmd('--mon-addrv', mon_addrv)
+ if err:
+ with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx:
+ with pytest.raises(_cephadm.Error, match=err):
+ _cephadm.command_bootstrap(ctx)
+ else:
+ with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx:
+ retval = _cephadm.command_bootstrap(ctx)
+ assert retval == 0
+
+ def test_allow_fqdn_hostname(self, cephadm_fs):
+ hostname = 'foo.bar'
+ cmd = self._get_cmd(
+ '--mon-ip', '192.168.1.1',
+ '--skip-mon-network',
+ )
+
+ with with_cephadm_ctx(cmd, hostname=hostname) as ctx:
+ msg = r'--allow-fqdn-hostname'
+ with pytest.raises(_cephadm.Error, match=msg):
+ _cephadm.command_bootstrap(ctx)
+
+ cmd += ['--allow-fqdn-hostname']
+ with with_cephadm_ctx(cmd, hostname=hostname) as ctx:
+ retval = _cephadm.command_bootstrap(ctx)
+ assert retval == 0
+
+ @pytest.mark.parametrize('fsid, err',
+ [
+ ('', None),
+ ('00000000-0000-0000-0000-0000deadbeef', None),
+ ('00000000-0000-0000-0000-0000deadbeez', 'not an fsid'),
+ ])
+ def test_fsid(self, fsid, err, cephadm_fs):
+ cmd = self._get_cmd(
+ '--mon-ip', '192.168.1.1',
+ '--skip-mon-network',
+ '--fsid', fsid,
+ )
+
+ with with_cephadm_ctx(cmd) as ctx:
+ if err:
+ with pytest.raises(_cephadm.Error, match=err):
+ _cephadm.command_bootstrap(ctx)
+ else:
+ retval = _cephadm.command_bootstrap(ctx)
+ assert retval == 0
+
+
+class TestShell(object):
+
+ def test_fsid(self, cephadm_fs):
+ fsid = '00000000-0000-0000-0000-0000deadbeef'
+
+ cmd = ['shell', '--fsid', fsid]
+ with with_cephadm_ctx(cmd) as ctx:
+ retval = _cephadm.command_shell(ctx)
+ assert retval == 0
+ assert ctx.fsid == fsid
+
+ cmd = ['shell', '--fsid', '00000000-0000-0000-0000-0000deadbeez']
+ with with_cephadm_ctx(cmd) as ctx:
+ err = 'not an fsid'
+ with pytest.raises(_cephadm.Error, match=err):
+ retval = _cephadm.command_shell(ctx)
+ assert retval == 1
+ assert ctx.fsid == None
+
+ s = get_ceph_conf(fsid=fsid)
+ f = cephadm_fs.create_file('ceph.conf', contents=s)
+
+ cmd = ['shell', '--fsid', fsid, '--config', f.path]
+ with with_cephadm_ctx(cmd) as ctx:
+ retval = _cephadm.command_shell(ctx)
+ assert retval == 0
+ assert ctx.fsid == fsid
+
+ cmd = ['shell', '--fsid', '10000000-0000-0000-0000-0000deadbeef', '--config', f.path]
+ with with_cephadm_ctx(cmd) as ctx:
+ err = 'fsid does not match ceph.conf'
+ with pytest.raises(_cephadm.Error, match=err):
+ retval = _cephadm.command_shell(ctx)
+ assert retval == 1
+ assert ctx.fsid == None
+
+ def test_name(self, cephadm_fs):
+ cmd = ['shell', '--name', 'foo']
+ with with_cephadm_ctx(cmd) as ctx:
+ retval = _cephadm.command_shell(ctx)
+ assert retval == 0
+
+ cmd = ['shell', '--name', 'foo.bar']
+ with with_cephadm_ctx(cmd) as ctx:
+ err = r'must pass --fsid'
+ with pytest.raises(_cephadm.Error, match=err):
+ retval = _cephadm.command_shell(ctx)
+ assert retval == 1
+
+ fsid = '00000000-0000-0000-0000-0000deadbeef'
+ cmd = ['shell', '--name', 'foo.bar', '--fsid', fsid]
+ with with_cephadm_ctx(cmd) as ctx:
+ retval = _cephadm.command_shell(ctx)
+ assert retval == 0
+
+ def test_config(self, cephadm_fs):
+ cmd = ['shell']
+ with with_cephadm_ctx(cmd) as ctx:
+ retval = _cephadm.command_shell(ctx)
+ assert retval == 0
+ assert ctx.config == None
+
+ cephadm_fs.create_file(_cephadm.CEPH_DEFAULT_CONF)
+ with with_cephadm_ctx(cmd) as ctx:
+ retval = _cephadm.command_shell(ctx)
+ assert retval == 0
+ assert ctx.config == _cephadm.CEPH_DEFAULT_CONF
+
+ cmd = ['shell', '--config', 'foo']
+ with with_cephadm_ctx(cmd) as ctx:
+ retval = _cephadm.command_shell(ctx)
+ assert retval == 0
+ assert ctx.config == 'foo'
+
+ def test_keyring(self, cephadm_fs):
+ cmd = ['shell']
+ with with_cephadm_ctx(cmd) as ctx:
+ retval = _cephadm.command_shell(ctx)
+ assert retval == 0
+ assert ctx.keyring == None
+
+ cephadm_fs.create_file(_cephadm.CEPH_DEFAULT_KEYRING)
+ with with_cephadm_ctx(cmd) as ctx:
+ retval = _cephadm.command_shell(ctx)
+ assert retval == 0
+ assert ctx.keyring == _cephadm.CEPH_DEFAULT_KEYRING
+
+ cmd = ['shell', '--keyring', 'foo']
+ with with_cephadm_ctx(cmd) as ctx:
+ retval = _cephadm.command_shell(ctx)
+ assert retval == 0
+ assert ctx.keyring == 'foo'
+
+ @mock.patch('cephadm.CephContainer')
+ def test_mount_no_dst(self, _ceph_container, cephadm_fs):
+ cmd = ['shell', '--mount', '/etc/foo']
+ with with_cephadm_ctx(cmd) as ctx:
+ retval = _cephadm.command_shell(ctx)
+ assert retval == 0
+ assert _ceph_container.call_args.kwargs['volume_mounts']['/etc/foo'] == '/mnt/foo'
+
+ @mock.patch('cephadm.CephContainer')
+ def test_mount_with_dst_no_opt(self, _ceph_container, cephadm_fs):
+ cmd = ['shell', '--mount', '/etc/foo:/opt/foo/bar']
+ with with_cephadm_ctx(cmd) as ctx:
+ retval = _cephadm.command_shell(ctx)
+ assert retval == 0
+ assert _ceph_container.call_args.kwargs['volume_mounts']['/etc/foo'] == '/opt/foo/bar'
+
+ @mock.patch('cephadm.CephContainer')
+ def test_mount_with_dst_and_opt(self, _ceph_container, cephadm_fs):
+ cmd = ['shell', '--mount', '/etc/foo:/opt/foo/bar:Z']
+ with with_cephadm_ctx(cmd) as ctx:
+ retval = _cephadm.command_shell(ctx)
+ assert retval == 0
+ assert _ceph_container.call_args.kwargs['volume_mounts']['/etc/foo'] == '/opt/foo/bar:Z'
+
+class TestCephVolume(object):
+
+ @staticmethod
+ def _get_cmd(*args):
+ return [
+ 'ceph-volume',
+ *args,
+ '--', 'inventory', '--format', 'json'
+ ]
+
+ def test_noop(self, cephadm_fs):
+ cmd = self._get_cmd()
+ with with_cephadm_ctx(cmd) as ctx:
+ _cephadm.command_ceph_volume(ctx)
+ assert ctx.fsid == None
+ assert ctx.config == None
+ assert ctx.keyring == None
+ assert ctx.config_json == None
+
+ def test_fsid(self, cephadm_fs):
+ fsid = '00000000-0000-0000-0000-0000deadbeef'
+
+ cmd = self._get_cmd('--fsid', fsid)
+ with with_cephadm_ctx(cmd) as ctx:
+ _cephadm.command_ceph_volume(ctx)
+ assert ctx.fsid == fsid
+
+ cmd = self._get_cmd('--fsid', '00000000-0000-0000-0000-0000deadbeez')
+ with with_cephadm_ctx(cmd) as ctx:
+ err = 'not an fsid'
+ with pytest.raises(_cephadm.Error, match=err):
+ retval = _cephadm.command_shell(ctx)
+ assert retval == 1
+ assert ctx.fsid == None
+
+ s = get_ceph_conf(fsid=fsid)
+ f = cephadm_fs.create_file('ceph.conf', contents=s)
+
+ cmd = self._get_cmd('--fsid', fsid, '--config', f.path)
+ with with_cephadm_ctx(cmd) as ctx:
+ _cephadm.command_ceph_volume(ctx)
+ assert ctx.fsid == fsid
+
+ cmd = self._get_cmd('--fsid', '10000000-0000-0000-0000-0000deadbeef', '--config', f.path)
+ with with_cephadm_ctx(cmd) as ctx:
+ err = 'fsid does not match ceph.conf'
+ with pytest.raises(_cephadm.Error, match=err):
+ _cephadm.command_ceph_volume(ctx)
+ assert ctx.fsid == None
+
+ def test_config(self, cephadm_fs):
+ cmd = self._get_cmd('--config', 'foo')
+ with with_cephadm_ctx(cmd) as ctx:
+ err = r'No such file or directory'
+ with pytest.raises(_cephadm.Error, match=err):
+ _cephadm.command_ceph_volume(ctx)
+
+ cephadm_fs.create_file('bar')
+ cmd = self._get_cmd('--config', 'bar')
+ with with_cephadm_ctx(cmd) as ctx:
+ _cephadm.command_ceph_volume(ctx)
+ assert ctx.config == 'bar'
+
+ def test_keyring(self, cephadm_fs):
+ cmd = self._get_cmd('--keyring', 'foo')
+ with with_cephadm_ctx(cmd) as ctx:
+ err = r'No such file or directory'
+ with pytest.raises(_cephadm.Error, match=err):
+ _cephadm.command_ceph_volume(ctx)
+
+ cephadm_fs.create_file('bar')
+ cmd = self._get_cmd('--keyring', 'bar')
+ with with_cephadm_ctx(cmd) as ctx:
+ _cephadm.command_ceph_volume(ctx)
+ assert ctx.keyring == 'bar'
+
+
+class TestIscsi:
+ def test_unit_run(self, cephadm_fs):
+ fsid = '9b9d7609-f4d5-4aba-94c8-effa764d96c9'
+ config_json = {
+ 'files': {'iscsi-gateway.cfg': ''}
+ }
+ with with_cephadm_ctx(['--image=ceph/ceph'], list_networks={}) as ctx:
+ import json
+ ctx.container_engine = mock_docker()
+ ctx.config_json = json.dumps(config_json)
+ ctx.fsid = fsid
+ _cephadm.get_parm.return_value = config_json
+ c = _cephadm.get_container(ctx, fsid, 'iscsi', 'daemon_id')
+
+ _cephadm.make_data_dir(ctx, fsid, 'iscsi', 'daemon_id')
+ _cephadm.deploy_daemon_units(
+ ctx,
+ fsid,
+ 0, 0,
+ 'iscsi',
+ 'daemon_id',
+ c,
+ True, True
+ )
+
+ with open('/var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/unit.run') as f:
+ assert f.read() == """set -e
+if ! grep -qs /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs /proc/mounts; then mount -t configfs none /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs; fi
+# iscsi tcmu-runner container
+! /usr/bin/docker rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi.daemon_id-tcmu 2> /dev/null
+! /usr/bin/docker rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id-tcmu 2> /dev/null
+/usr/bin/docker run --rm --ipc=host --stop-signal=SIGTERM --ulimit nofile=1048576 --net=host --entrypoint /usr/local/scripts/tcmu-runner-entrypoint.sh --privileged --group-add=disk --init --name ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id-tcmu --pids-limit=0 -e CONTAINER_IMAGE=ceph/ceph -e NODE_NAME=host1 -e CEPH_USE_RANDOM_NONCE=1 -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/config:/etc/ceph/ceph.conf:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/keyring:/etc/ceph/keyring:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/iscsi-gateway.cfg:/etc/ceph/iscsi-gateway.cfg:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs:/sys/kernel/config -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/tcmu-runner-entrypoint.sh:/usr/local/scripts/tcmu-runner-entrypoint.sh -v /var/log/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9:/var/log:z -v /dev:/dev --mount type=bind,source=/lib/modules,destination=/lib/modules,ro=true ceph/ceph &
+# iscsi.daemon_id
+! /usr/bin/docker rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi.daemon_id 2> /dev/null
+! /usr/bin/docker rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id 2> /dev/null
+/usr/bin/docker run --rm --ipc=host --stop-signal=SIGTERM --ulimit nofile=1048576 --net=host --entrypoint /usr/bin/rbd-target-api --privileged --group-add=disk --init --name ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id --pids-limit=0 -e CONTAINER_IMAGE=ceph/ceph -e NODE_NAME=host1 -e CEPH_USE_RANDOM_NONCE=1 -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/config:/etc/ceph/ceph.conf:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/keyring:/etc/ceph/keyring:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/iscsi-gateway.cfg:/etc/ceph/iscsi-gateway.cfg:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs:/sys/kernel/config -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/tcmu-runner-entrypoint.sh:/usr/local/scripts/tcmu-runner-entrypoint.sh -v /var/log/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9:/var/log:z -v /dev:/dev --mount type=bind,source=/lib/modules,destination=/lib/modules,ro=true ceph/ceph
+"""
+
+ def test_get_container(self):
+ """
+ Due to a combination of socket.getfqdn() and podman's behavior to
+ add the container name into the /etc/hosts file, we cannot use periods
+ in container names. But we need to be able to detect old existing containers.
+ Assert this behaviour. I think we can remove this in Ceph R
+ """
+ fsid = '9b9d7609-f4d5-4aba-94c8-effa764d96c9'
+ with with_cephadm_ctx(['--image=ceph/ceph'], list_networks={}) as ctx:
+ ctx.fsid = fsid
+ c = _cephadm.get_container(ctx, fsid, 'iscsi', 'something')
+ assert c.cname == 'ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-something'
+ assert c.old_cname == 'ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi.something'
+
+
+class TestCheckHost:
+
+ @mock.patch('cephadm.find_executable', return_value='foo')
+ @mock.patch('cephadm.check_time_sync', return_value=True)
+ @mock.patch('cephadm.logger')
+ def test_container_engine(self, _logger, _find_executable, _check_time_sync):
+ ctx = _cephadm.CephadmContext()
+
+ ctx.container_engine = None
+ err = r'No container engine binary found'
+ with pytest.raises(_cephadm.Error, match=err):
+ _cephadm.command_check_host(ctx)
+
+ ctx.container_engine = mock_podman()
+ _cephadm.command_check_host(ctx)
+
+ ctx.container_engine = mock_docker()
+ _cephadm.command_check_host(ctx)
+
+
+class TestRmRepo:
+
+ @pytest.mark.parametrize('os_release',
+ [
+ # Apt
+ dedent("""
+ NAME="Ubuntu"
+ VERSION="20.04 LTS (Focal Fossa)"
+ ID=ubuntu
+ ID_LIKE=debian
+ PRETTY_NAME="Ubuntu 20.04 LTS"
+ VERSION_ID="20.04"
+ HOME_URL="https://www.ubuntu.com/"
+ SUPPORT_URL="https://help.ubuntu.com/"
+ BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
+ PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
+ VERSION_CODENAME=focal
+ UBUNTU_CODENAME=focal
+ """),
+
+ # YumDnf
+ dedent("""
+ NAME="CentOS Linux"
+ VERSION="8 (Core)"
+ ID="centos"
+ ID_LIKE="rhel fedora"
+ VERSION_ID="8"
+ PLATFORM_ID="platform:el8"
+ PRETTY_NAME="CentOS Linux 8 (Core)"
+ ANSI_COLOR="0;31"
+ CPE_NAME="cpe:/o:centos:centos:8"
+ HOME_URL="https://www.centos.org/"
+ BUG_REPORT_URL="https://bugs.centos.org/"
+
+ CENTOS_MANTISBT_PROJECT="CentOS-8"
+ CENTOS_MANTISBT_PROJECT_VERSION="8"
+ REDHAT_SUPPORT_PRODUCT="centos"
+ REDHAT_SUPPORT_PRODUCT_VERSION="8"
+ """),
+
+ # Zypper
+ dedent("""
+ NAME="openSUSE Tumbleweed"
+ # VERSION="20210810"
+ ID="opensuse-tumbleweed"
+ ID_LIKE="opensuse suse"
+ VERSION_ID="20210810"
+ PRETTY_NAME="openSUSE Tumbleweed"
+ ANSI_COLOR="0;32"
+ CPE_NAME="cpe:/o:opensuse:tumbleweed:20210810"
+ BUG_REPORT_URL="https://bugs.opensuse.org"
+ HOME_URL="https://www.opensuse.org/"
+ DOCUMENTATION_URL="https://en.opensuse.org/Portal:Tumbleweed"
+ LOGO="distributor-logo"
+ """),
+ ])
+ @mock.patch('cephadm.find_executable', return_value='foo')
+ def test_container_engine(self, _find_executable, os_release, cephadm_fs):
+ cephadm_fs.create_file('/etc/os-release', contents=os_release)
+ ctx = _cephadm.CephadmContext()
+
+ ctx.container_engine = None
+ _cephadm.command_rm_repo(ctx)
+
+ ctx.container_engine = mock_podman()
+ _cephadm.command_rm_repo(ctx)
+
+ ctx.container_engine = mock_docker()
+ _cephadm.command_rm_repo(ctx)
+
+
+class TestValidateRepo:
+
+ @pytest.mark.parametrize('values',
+ [
+ # Apt - no checks
+ dict(
+ version="",
+ release="pacific",
+ err_text="",
+ os_release=dedent("""
+ NAME="Ubuntu"
+ VERSION="20.04 LTS (Focal Fossa)"
+ ID=ubuntu
+ ID_LIKE=debian
+ PRETTY_NAME="Ubuntu 20.04 LTS"
+ VERSION_ID="20.04"
+ HOME_URL="https://www.ubuntu.com/"
+ SUPPORT_URL="https://help.ubuntu.com/"
+ BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
+ PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
+ VERSION_CODENAME=focal
+ UBUNTU_CODENAME=focal
+ """)),
+
+ # YumDnf on Centos8 - OK
+ dict(
+ version="",
+ release="pacific",
+ err_text="",
+ os_release=dedent("""
+ NAME="CentOS Linux"
+ VERSION="8 (Core)"
+ ID="centos"
+ ID_LIKE="rhel fedora"
+ VERSION_ID="8"
+ PLATFORM_ID="platform:el8"
+ PRETTY_NAME="CentOS Linux 8 (Core)"
+ ANSI_COLOR="0;31"
+ CPE_NAME="cpe:/o:centos:centos:8"
+ HOME_URL="https://www.centos.org/"
+ BUG_REPORT_URL="https://bugs.centos.org/"
+
+ CENTOS_MANTISBT_PROJECT="CentOS-8"
+ CENTOS_MANTISBT_PROJECT_VERSION="8"
+ REDHAT_SUPPORT_PRODUCT="centos"
+ REDHAT_SUPPORT_PRODUCT_VERSION="8"
+ """)),
+
+ # YumDnf on Fedora - Fedora not supported
+ dict(
+ version="",
+ release="pacific",
+ err_text="does not build Fedora",
+ os_release=dedent("""
+ NAME="Fedora Linux"
+ VERSION="35 (Cloud Edition)"
+ ID=fedora
+ VERSION_ID=35
+ VERSION_CODENAME=""
+ PLATFORM_ID="platform:f35"
+ PRETTY_NAME="Fedora Linux 35 (Cloud Edition)"
+ ANSI_COLOR="0;38;2;60;110;180"
+ LOGO=fedora-logo-icon
+ CPE_NAME="cpe:/o:fedoraproject:fedora:35"
+ HOME_URL="https://fedoraproject.org/"
+ DOCUMENTATION_URL="https://docs.fedoraproject.org/en-US/fedora/f35/system-administrators-guide/"
+ SUPPORT_URL="https://ask.fedoraproject.org/"
+ BUG_REPORT_URL="https://bugzilla.redhat.com/"
+ REDHAT_BUGZILLA_PRODUCT="Fedora"
+ REDHAT_BUGZILLA_PRODUCT_VERSION=35
+ REDHAT_SUPPORT_PRODUCT="Fedora"
+ REDHAT_SUPPORT_PRODUCT_VERSION=35
+ PRIVACY_POLICY_URL="https://fedoraproject.org/wiki/Legal:PrivacyPolicy"
+ VARIANT="Cloud Edition"
+ VARIANT_ID=cloud
+ """)),
+
+ # YumDnf on Centos 7 - no pacific
+ dict(
+ version="",
+ release="pacific",
+ err_text="does not support pacific",
+ os_release=dedent("""
+ NAME="CentOS Linux"
+ VERSION="7 (Core)"
+ ID="centos"
+ ID_LIKE="rhel fedora"
+ VERSION_ID="7"
+ PRETTY_NAME="CentOS Linux 7 (Core)"
+ ANSI_COLOR="0;31"
+ CPE_NAME="cpe:/o:centos:centos:7"
+ HOME_URL="https://www.centos.org/"
+ BUG_REPORT_URL="https://bugs.centos.org/"
+
+ CENTOS_MANTISBT_PROJECT="CentOS-7"
+ CENTOS_MANTISBT_PROJECT_VERSION="7"
+ REDHAT_SUPPORT_PRODUCT="centos"
+ REDHAT_SUPPORT_PRODUCT_VERSION="7"
+ """)),
+
+ # YumDnf on Centos 7 - nothing after pacific
+ dict(
+ version="",
+ release="zillions",
+ err_text="does not support pacific",
+ os_release=dedent("""
+ NAME="CentOS Linux"
+ VERSION="7 (Core)"
+ ID="centos"
+ ID_LIKE="rhel fedora"
+ VERSION_ID="7"
+ PRETTY_NAME="CentOS Linux 7 (Core)"
+ ANSI_COLOR="0;31"
+ CPE_NAME="cpe:/o:centos:centos:7"
+ HOME_URL="https://www.centos.org/"
+ BUG_REPORT_URL="https://bugs.centos.org/"
+
+ CENTOS_MANTISBT_PROJECT="CentOS-7"
+ CENTOS_MANTISBT_PROJECT_VERSION="7"
+ REDHAT_SUPPORT_PRODUCT="centos"
+ REDHAT_SUPPORT_PRODUCT_VERSION="7"
+ """)),
+
+ # YumDnf on Centos 7 - nothing v16 or higher
+ dict(
+ version="v16.1.3",
+ release="",
+ err_text="does not support",
+ os_release=dedent("""
+ NAME="CentOS Linux"
+ VERSION="7 (Core)"
+ ID="centos"
+ ID_LIKE="rhel fedora"
+ VERSION_ID="7"
+ PRETTY_NAME="CentOS Linux 7 (Core)"
+ ANSI_COLOR="0;31"
+ CPE_NAME="cpe:/o:centos:centos:7"
+ HOME_URL="https://www.centos.org/"
+ BUG_REPORT_URL="https://bugs.centos.org/"
+
+ CENTOS_MANTISBT_PROJECT="CentOS-7"
+ CENTOS_MANTISBT_PROJECT_VERSION="7"
+ REDHAT_SUPPORT_PRODUCT="centos"
+ REDHAT_SUPPORT_PRODUCT_VERSION="7"
+ """)),
+ ])
+ @mock.patch('cephadm.find_executable', return_value='foo')
+ def test_distro_validation(self, _find_executable, values, cephadm_fs):
+ os_release = values['os_release']
+ release = values['release']
+ version = values['version']
+ err_text = values['err_text']
+
+ cephadm_fs.create_file('/etc/os-release', contents=os_release)
+ ctx = _cephadm.CephadmContext()
+ ctx.repo_url = 'http://localhost'
+ pkg = _cephadm.create_packager(ctx, stable=release, version=version)
+
+ if err_text:
+ with pytest.raises(_cephadm.Error, match=err_text):
+ pkg.validate()
+ else:
+ with mock.patch('cephadm.urlopen', return_value=None):
+ pkg.validate()
+
+ @pytest.mark.parametrize('values',
+ [
+ # Apt - not checked
+ dict(
+ version="",
+ release="pacific",
+ err_text="",
+ os_release=dedent("""
+ NAME="Ubuntu"
+ VERSION="20.04 LTS (Focal Fossa)"
+ ID=ubuntu
+ ID_LIKE=debian
+ PRETTY_NAME="Ubuntu 20.04 LTS"
+ VERSION_ID="20.04"
+ HOME_URL="https://www.ubuntu.com/"
+ SUPPORT_URL="https://help.ubuntu.com/"
+ BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
+ PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
+ VERSION_CODENAME=focal
+ UBUNTU_CODENAME=focal
+ """)),
+
+ # YumDnf on Centos8 - force failure
+ dict(
+ version="",
+ release="foobar",
+ err_text="failed to fetch repository metadata",
+ os_release=dedent("""
+ NAME="CentOS Linux"
+ VERSION="8 (Core)"
+ ID="centos"
+ ID_LIKE="rhel fedora"
+ VERSION_ID="8"
+ PLATFORM_ID="platform:el8"
+ PRETTY_NAME="CentOS Linux 8 (Core)"
+ ANSI_COLOR="0;31"
+ CPE_NAME="cpe:/o:centos:centos:8"
+ HOME_URL="https://www.centos.org/"
+ BUG_REPORT_URL="https://bugs.centos.org/"
+
+ CENTOS_MANTISBT_PROJECT="CentOS-8"
+ CENTOS_MANTISBT_PROJECT_VERSION="8"
+ REDHAT_SUPPORT_PRODUCT="centos"
+ REDHAT_SUPPORT_PRODUCT_VERSION="8"
+ """)),
+ ])
+ @mock.patch('cephadm.find_executable', return_value='foo')
+ @mock.patch('cephadm.logger')
+ def test_http_validation(self, _logger, _find_executable, values, cephadm_fs):
+ from urllib.error import HTTPError
+
+ os_release = values['os_release']
+ release = values['release']
+ version = values['version']
+ err_text = values['err_text']
+
+ cephadm_fs.create_file('/etc/os-release', contents=os_release)
+ ctx = _cephadm.CephadmContext()
+ ctx.repo_url = 'http://localhost'
+ pkg = _cephadm.create_packager(ctx, stable=release, version=version)
+
+ with mock.patch('cephadm.urlopen') as _urlopen:
+ _urlopen.side_effect = HTTPError(ctx.repo_url, 404, "not found", None, fp=None)
+ if err_text:
+ with pytest.raises(_cephadm.Error, match=err_text):
+ pkg.validate()
+ else:
+ pkg.validate()
+
+
+class TestPull:
+
+ @mock.patch('time.sleep')
+ @mock.patch('cephadm.call', return_value=('', '', 0))
+ @mock.patch('cephadm.get_image_info_from_inspect', return_value={})
+ @mock.patch('cephadm.logger')
+ def test_error(self, _logger, _get_image_info_from_inspect, _call, _sleep):
+ ctx = _cephadm.CephadmContext()
+ ctx.container_engine = mock_podman()
+ ctx.insecure = False
+
+ _call.return_value = ('', '', 0)
+ retval = _cephadm.command_pull(ctx)
+ assert retval == 0
+
+ err = 'maximum retries reached'
+
+ _call.return_value = ('', 'foobar', 1)
+ with pytest.raises(_cephadm.Error) as e:
+ _cephadm.command_pull(ctx)
+ assert err not in str(e.value)
+
+ _call.return_value = ('', 'net/http: TLS handshake timeout', 1)
+ with pytest.raises(_cephadm.Error) as e:
+ _cephadm.command_pull(ctx)
+ assert err in str(e.value)
+
+ @mock.patch('cephadm.get_image_info_from_inspect', return_value={})
+ @mock.patch('cephadm.infer_local_ceph_image', return_value='last_local_ceph_image')
+ def test_image(self, _infer_local_ceph_image, _get_image_info_from_inspect):
+ cmd = ['pull']
+ with with_cephadm_ctx(cmd) as ctx:
+ retval = _cephadm.command_pull(ctx)
+ assert retval == 0
+ assert ctx.image == _cephadm.DEFAULT_IMAGE
+
+ with mock.patch.dict(os.environ, {"CEPHADM_IMAGE": 'cephadm_image_environ'}):
+ cmd = ['pull']
+ with with_cephadm_ctx(cmd) as ctx:
+ retval = _cephadm.command_pull(ctx)
+ assert retval == 0
+ assert ctx.image == 'cephadm_image_environ'
+
+ cmd = ['--image', 'cephadm_image_param', 'pull']
+ with with_cephadm_ctx(cmd) as ctx:
+ retval = _cephadm.command_pull(ctx)
+ assert retval == 0
+ assert ctx.image == 'cephadm_image_param'
+
+
+class TestApplySpec:
+
+ def test_extract_host_info_from_applied_spec(self, cephadm_fs):
+ yaml = '''---
+service_type: host
+hostname: vm-00
+addr: 192.168.122.44
+labels:
+ - example1
+ - example2
+---
+service_type: host
+hostname: vm-01
+addr: 192.168.122.247
+labels:
+ - grafana
+---
+service_type: host
+hostname: vm-02
+---
+---
+service_type: rgw
+service_id: myrgw
+spec:
+ rgw_frontend_ssl_certificate: |
+ -----BEGIN PRIVATE KEY-----
+ V2VyIGRhcyBsaWVzdCBpc3QgZG9vZi4gTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFt
+ ZXQsIGNvbnNldGV0dXIgc2FkaXBzY2luZyBlbGl0ciwgc2VkIGRpYW0gbm9udW15
+ IGVpcm1vZCB0ZW1wb3IgaW52aWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu
+ YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg
+ ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=
+ -----END PRIVATE KEY-----
+ -----BEGIN CERTIFICATE-----
+ V2VyIGRhcyBsaWVzdCBpc3QgZG9vZi4gTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFt
+ ZXQsIGNvbnNldGV0dXIgc2FkaXBzY2luZyBlbGl0ciwgc2VkIGRpYW0gbm9udW15
+ IGVpcm1vZCB0ZW1wb3IgaW52aWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu
+ YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg
+ ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=
+ -----END CERTIFICATE-----
+ ssl: true
+---
+'''
+
+ cephadm_fs.create_file('spec.yml', contents=yaml)
+ retdic = [{'hostname': 'vm-00', 'addr': '192.168.122.44'},
+ {'hostname': 'vm-01', 'addr': '192.168.122.247'},
+ {'hostname': 'vm-02',}]
+
+ with open('spec.yml') as f:
+ dic = _cephadm._extract_host_info_from_applied_spec(f)
+ assert dic == retdic
+
+ @mock.patch('cephadm.call', return_value=('', '', 0))
+ @mock.patch('cephadm.logger')
+ def test_distribute_ssh_keys(self, _logger, _call):
+ ctx = _cephadm.CephadmContext()
+ ctx.ssh_public_key = None
+ ctx.ssh_user = 'root'
+
+ host_spec = {'service_type': 'host', 'hostname': 'vm-02', 'addr': '192.168.122.165'}
+
+ retval = _cephadm._distribute_ssh_keys(ctx, host_spec, 'bootstrap_hostname')
+
+ assert retval == 0
+
+ _call.return_value = ('', '', 1)
+
+ retval = _cephadm._distribute_ssh_keys(ctx, host_spec, 'bootstrap_hostname')
+
+ assert retval == 1
+
+
+class TestSNMPGateway:
+ V2c_config = {
+ 'snmp_community': 'public',
+ 'destination': '192.168.1.10:162',
+ 'snmp_version': 'V2c',
+ }
+ V3_no_priv_config = {
+ 'destination': '192.168.1.10:162',
+ 'snmp_version': 'V3',
+ 'snmp_v3_auth_username': 'myuser',
+ 'snmp_v3_auth_password': 'mypassword',
+ 'snmp_v3_auth_protocol': 'SHA',
+ 'snmp_v3_engine_id': '8000C53F00000000',
+ }
+ V3_priv_config = {
+ 'destination': '192.168.1.10:162',
+ 'snmp_version': 'V3',
+ 'snmp_v3_auth_username': 'myuser',
+ 'snmp_v3_auth_password': 'mypassword',
+ 'snmp_v3_auth_protocol': 'SHA',
+ 'snmp_v3_priv_protocol': 'DES',
+ 'snmp_v3_priv_password': 'mysecret',
+ 'snmp_v3_engine_id': '8000C53F00000000',
+ }
+ no_destination_config = {
+ 'snmp_version': 'V3',
+ 'snmp_v3_auth_username': 'myuser',
+ 'snmp_v3_auth_password': 'mypassword',
+ 'snmp_v3_auth_protocol': 'SHA',
+ 'snmp_v3_priv_protocol': 'DES',
+ 'snmp_v3_priv_password': 'mysecret',
+ 'snmp_v3_engine_id': '8000C53F00000000',
+ }
+ bad_version_config = {
+ 'snmp_community': 'public',
+ 'destination': '192.168.1.10:162',
+ 'snmp_version': 'V1',
+ }
+
+ def test_unit_run_V2c(self, cephadm_fs):
+ fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
+ with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
+ import json
+ ctx.config_json = json.dumps(self.V2c_config)
+ ctx.fsid = fsid
+ ctx.tcp_ports = '9464'
+ _cephadm.get_parm.return_value = self.V2c_config
+ c = _cephadm.get_container(ctx, fsid, 'snmp-gateway', 'daemon_id')
+
+ _cephadm.make_data_dir(ctx, fsid, 'snmp-gateway', 'daemon_id')
+
+ _cephadm.create_daemon_dirs(ctx, fsid, 'snmp-gateway', 'daemon_id', 0, 0)
+ with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/snmp-gateway.conf', 'r') as f:
+ conf = f.read().rstrip()
+ assert conf == 'SNMP_NOTIFIER_COMMUNITY=public'
+
+ _cephadm.deploy_daemon_units(
+ ctx,
+ fsid,
+ 0, 0,
+ 'snmp-gateway',
+ 'daemon_id',
+ c,
+ True, True
+ )
+ with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/unit.run', 'r') as f:
+ run_cmd = f.readlines()[-1].rstrip()
+ assert run_cmd.endswith('docker.io/maxwo/snmp-notifier:v1.2.1 --web.listen-address=:9464 --snmp.destination=192.168.1.10:162 --snmp.version=V2c --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl')
+
+ def test_unit_run_V3_noPriv(self, cephadm_fs):
+ fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
+ with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
+ import json
+ ctx.config_json = json.dumps(self.V3_no_priv_config)
+ ctx.fsid = fsid
+ ctx.tcp_ports = '9465'
+ _cephadm.get_parm.return_value = self.V3_no_priv_config
+ c = _cephadm.get_container(ctx, fsid, 'snmp-gateway', 'daemon_id')
+
+ _cephadm.make_data_dir(ctx, fsid, 'snmp-gateway', 'daemon_id')
+
+ _cephadm.create_daemon_dirs(ctx, fsid, 'snmp-gateway', 'daemon_id', 0, 0)
+ with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/snmp-gateway.conf', 'r') as f:
+ conf = f.read()
+ assert conf == 'SNMP_NOTIFIER_AUTH_USERNAME=myuser\nSNMP_NOTIFIER_AUTH_PASSWORD=mypassword\n'
+
+ _cephadm.deploy_daemon_units(
+ ctx,
+ fsid,
+ 0, 0,
+ 'snmp-gateway',
+ 'daemon_id',
+ c,
+ True, True
+ )
+ with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/unit.run', 'r') as f:
+ run_cmd = f.readlines()[-1].rstrip()
+ assert run_cmd.endswith('docker.io/maxwo/snmp-notifier:v1.2.1 --web.listen-address=:9465 --snmp.destination=192.168.1.10:162 --snmp.version=V3 --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl --snmp.authentication-enabled --snmp.authentication-protocol=SHA --snmp.security-engine-id=8000C53F00000000')
+
+ def test_unit_run_V3_Priv(self, cephadm_fs):
+ fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
+ with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
+ import json
+ ctx.config_json = json.dumps(self.V3_priv_config)
+ ctx.fsid = fsid
+ ctx.tcp_ports = '9464'
+ _cephadm.get_parm.return_value = self.V3_priv_config
+ c = _cephadm.get_container(ctx, fsid, 'snmp-gateway', 'daemon_id')
+
+ _cephadm.make_data_dir(ctx, fsid, 'snmp-gateway', 'daemon_id')
+
+ _cephadm.create_daemon_dirs(ctx, fsid, 'snmp-gateway', 'daemon_id', 0, 0)
+ with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/snmp-gateway.conf', 'r') as f:
+ conf = f.read()
+ assert conf == 'SNMP_NOTIFIER_AUTH_USERNAME=myuser\nSNMP_NOTIFIER_AUTH_PASSWORD=mypassword\nSNMP_NOTIFIER_PRIV_PASSWORD=mysecret\n'
+
+ _cephadm.deploy_daemon_units(
+ ctx,
+ fsid,
+ 0, 0,
+ 'snmp-gateway',
+ 'daemon_id',
+ c,
+ True, True
+ )
+ with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/unit.run', 'r') as f:
+ run_cmd = f.readlines()[-1].rstrip()
+ assert run_cmd.endswith('docker.io/maxwo/snmp-notifier:v1.2.1 --web.listen-address=:9464 --snmp.destination=192.168.1.10:162 --snmp.version=V3 --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl --snmp.authentication-enabled --snmp.authentication-protocol=SHA --snmp.security-engine-id=8000C53F00000000 --snmp.private-enabled --snmp.private-protocol=DES')
+
+ def test_unit_run_no_dest(self, cephadm_fs):
+ fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
+ with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
+ import json
+ ctx.config_json = json.dumps(self.no_destination_config)
+ ctx.fsid = fsid
+ ctx.tcp_ports = '9464'
+ _cephadm.get_parm.return_value = self.no_destination_config
+
+ with pytest.raises(Exception) as e:
+ c = _cephadm.get_container(ctx, fsid, 'snmp-gateway', 'daemon_id')
+ assert str(e.value) == "config is missing destination attribute(<ip>:<port>) of the target SNMP listener"
+
+ def test_unit_run_bad_version(self, cephadm_fs):
+ fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
+ with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
+ import json
+ ctx.config_json = json.dumps(self.bad_version_config)
+ ctx.fsid = fsid
+ ctx.tcp_ports = '9464'
+ _cephadm.get_parm.return_value = self.bad_version_config
+
+ with pytest.raises(Exception) as e:
+ c = _cephadm.get_container(ctx, fsid, 'snmp-gateway', 'daemon_id')
+ assert str(e.value) == 'not a valid snmp version: V1'
+
+class TestNetworkValidation:
+
+ def test_ipv4_subnet(self):
+ rc, v, msg = _cephadm.check_subnet('192.168.1.0/24')
+ assert rc == 0 and v[0] == 4
+
+ def test_ipv4_subnet_list(self):
+ rc, v, msg = _cephadm.check_subnet('192.168.1.0/24,10.90.90.0/24')
+ assert rc == 0 and not msg
+
+ def test_ipv4_subnet_list_with_spaces(self):
+ rc, v, msg = _cephadm.check_subnet('192.168.1.0/24, 10.90.90.0/24 ')
+ assert rc == 0 and not msg
+
+ def test_ipv4_subnet_badlist(self):
+ rc, v, msg = _cephadm.check_subnet('192.168.1.0/24,192.168.1.1')
+ assert rc == 1 and msg
+
+ def test_ipv4_subnet_mixed(self):
+ rc, v, msg = _cephadm.check_subnet('192.168.100.0/24,fe80::/64')
+ assert rc == 0 and v == [4,6]
+
+ def test_ipv6_subnet(self):
+ rc, v, msg = _cephadm.check_subnet('fe80::/64')
+ assert rc == 0 and v[0] == 6
+
+ def test_subnet_mask_missing(self):
+ rc, v, msg = _cephadm.check_subnet('192.168.1.58')
+ assert rc == 1 and msg
+
+ def test_subnet_mask_junk(self):
+ rc, v, msg = _cephadm.check_subnet('wah')
+ assert rc == 1 and msg
+
+ def test_ip_in_subnet(self):
+ # valid ip and only one valid subnet
+ rc = _cephadm.ip_in_subnets('192.168.100.1', '192.168.100.0/24')
+ assert rc is True
+
+ # valid ip and valid subnets list without spaces
+ rc = _cephadm.ip_in_subnets('192.168.100.1', '192.168.100.0/24,10.90.90.0/24')
+ assert rc is True
+
+ # valid ip and valid subnets list with spaces
+ rc = _cephadm.ip_in_subnets('10.90.90.2', '192.168.1.0/24, 192.168.100.0/24, 10.90.90.0/24')
+ assert rc is True
+
+ # valid ip that doesn't belong to any subnet
+ rc = _cephadm.ip_in_subnets('192.168.100.2', '192.168.50.0/24, 10.90.90.0/24')
+ assert rc is False
+
+ # valid ip that doesn't belong to the subnet (only 14 hosts)
+ rc = _cephadm.ip_in_subnets('192.168.100.20', '192.168.100.0/28')
+ assert rc is False
+
+ # valid ip and valid IPV6 network
+ rc = _cephadm.ip_in_subnets('fe80::5054:ff:fef4:873a', 'fe80::/64')
+ assert rc is True
+
+ # valid wrapped ip and valid IPV6 network
+ rc = _cephadm.ip_in_subnets('[fe80::5054:ff:fef4:873a]', 'fe80::/64')
+ assert rc is True
+
+ # valid ip and that doesn't belong to IPV6 network
+ rc = _cephadm.ip_in_subnets('fe80::5054:ff:fef4:873a', '2001:db8:85a3::/64')
+ assert rc is False
+
+ # invalid IPv4 and valid subnets list
+ with pytest.raises(Exception):
+ rc = _cephadm.ip_in_sublets('10.90.200.', '192.168.1.0/24, 192.168.100.0/24, 10.90.90.0/24')
+
+ # invalid IPv6 and valid subnets list
+ with pytest.raises(Exception):
+ rc = _cephadm.ip_in_sublets('fe80:2030:31:24', 'fe80::/64')
+
+ @pytest.mark.parametrize("conf", [
+ """[global]
+public_network='1.1.1.0/24,2.2.2.0/24'
+cluster_network="3.3.3.0/24, 4.4.4.0/24"
+""",
+ """[global]
+public_network=" 1.1.1.0/24,2.2.2.0/24 "
+cluster_network=3.3.3.0/24, 4.4.4.0/24
+""",
+ """[global]
+ public_network= 1.1.1.0/24, 2.2.2.0/24
+ cluster_network='3.3.3.0/24,4.4.4.0/24'
+"""])
+ @mock.patch('cephadm.list_networks')
+ @mock.patch('cephadm.logger')
+ def test_get_networks_from_conf(self, _logger, _list_networks, conf, cephadm_fs):
+ cephadm_fs.create_file('ceph.conf', contents=conf)
+ _list_networks.return_value = {'1.1.1.0/24': {'eth0': ['1.1.1.1']},
+ '2.2.2.0/24': {'eth1': ['2.2.2.2']},
+ '3.3.3.0/24': {'eth2': ['3.3.3.3']},
+ '4.4.4.0/24': {'eth3': ['4.4.4.4']}}
+ ctx = _cephadm.CephadmContext()
+ ctx.config = 'ceph.conf'
+ ctx.mon_ip = '1.1.1.1'
+ ctx.cluster_network = None
+ # what the cephadm module does with the public network string is
+ # [x.strip() for x in out.split(',')]
+ # so we must make sure our output, through that alteration,
+ # generates correctly formatted networks
+ def _str_to_networks(s):
+ return [x.strip() for x in s.split(',')]
+ public_network = _cephadm.get_public_net_from_cfg(ctx)
+ assert _str_to_networks(public_network) == ['1.1.1.0/24', '2.2.2.0/24']
+ cluster_network, ipv6 = _cephadm.prepare_cluster_network(ctx)
+ assert not ipv6
+ assert _str_to_networks(cluster_network) == ['3.3.3.0/24', '4.4.4.0/24']
+
+class TestSysctl:
+ @mock.patch('cephadm.sysctl_get')
+ def test_filter_sysctl_settings(self, _sysctl_get):
+ ctx = _cephadm.CephadmContext()
+ input = [
+ # comment-only lines should be ignored
+ "# just a comment",
+ # As should whitespace-only lines",
+ " \t ",
+ " = \t ",
+ # inline comments are stripped when querying
+ "something = value # inline comment",
+ "fs.aio-max-nr = 1048576",
+ "kernel.pid_max = 4194304",
+ "vm.lowmem_reserve_ratio = 256\t256\t32\t0\t0",
+ " vm.max_map_count = 65530 ",
+ " vm.max_map_count = 65530 ",
+ ]
+ _sysctl_get.side_effect = [
+ "value",
+ "1",
+ "4194304",
+ "256\t256\t32\t0\t0",
+ "65530",
+ "something else",
+ ]
+ result = _cephadm.filter_sysctl_settings(ctx, input)
+ assert len(_sysctl_get.call_args_list) == 6
+ assert _sysctl_get.call_args_list[0].args[1] == "something"
+ assert _sysctl_get.call_args_list[1].args[1] == "fs.aio-max-nr"
+ assert _sysctl_get.call_args_list[2].args[1] == "kernel.pid_max"
+ assert _sysctl_get.call_args_list[3].args[1] == "vm.lowmem_reserve_ratio"
+ assert _sysctl_get.call_args_list[4].args[1] == "vm.max_map_count"
+ assert _sysctl_get.call_args_list[5].args[1] == "vm.max_map_count"
+ assert result == [
+ "fs.aio-max-nr = 1048576",
+ " vm.max_map_count = 65530 ",
+ ]
+
+class TestJaeger:
+ single_es_node_conf = {
+ 'elasticsearch_nodes': 'http://192.168.0.1:9200'}
+ multiple_es_nodes_conf = {
+ 'elasticsearch_nodes': 'http://192.168.0.1:9200,http://192.168.0.2:9300'}
+ agent_conf = {
+ 'collector_nodes': 'test:14250'}
+
+ def test_single_es(self, cephadm_fs):
+ fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
+ with with_cephadm_ctx(['--image=quay.io/jaegertracing/jaeger-collector:1.29'], list_networks={}) as ctx:
+ import json
+ ctx.config_json = json.dumps(self.single_es_node_conf)
+ ctx.fsid = fsid
+ c = _cephadm.get_container(ctx, fsid, 'jaeger-collector', 'daemon_id')
+ _cephadm.create_daemon_dirs(ctx, fsid, 'jaeger-collector', 'daemon_id', 0, 0)
+ _cephadm.deploy_daemon_units(
+ ctx,
+ fsid,
+ 0, 0,
+ 'jaeger-collector',
+ 'daemon_id',
+ c,
+ True, True
+ )
+ with open(f'/var/lib/ceph/{fsid}/jaeger-collector.daemon_id/unit.run', 'r') as f:
+ run_cmd = f.readlines()[-1].rstrip()
+ assert run_cmd.endswith('SPAN_STORAGE_TYPE=elasticsearch -e ES_SERVER_URLS=http://192.168.0.1:9200 quay.io/jaegertracing/jaeger-collector:1.29')
+
+ def test_multiple_es(self, cephadm_fs):
+ fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
+ with with_cephadm_ctx(['--image=quay.io/jaegertracing/jaeger-collector:1.29'], list_networks={}) as ctx:
+ import json
+ ctx.config_json = json.dumps(self.multiple_es_nodes_conf)
+ ctx.fsid = fsid
+ c = _cephadm.get_container(ctx, fsid, 'jaeger-collector', 'daemon_id')
+ _cephadm.create_daemon_dirs(ctx, fsid, 'jaeger-collector', 'daemon_id', 0, 0)
+ _cephadm.deploy_daemon_units(
+ ctx,
+ fsid,
+ 0, 0,
+ 'jaeger-collector',
+ 'daemon_id',
+ c,
+ True, True
+ )
+ with open(f'/var/lib/ceph/{fsid}/jaeger-collector.daemon_id/unit.run', 'r') as f:
+ run_cmd = f.readlines()[-1].rstrip()
+ assert run_cmd.endswith('SPAN_STORAGE_TYPE=elasticsearch -e ES_SERVER_URLS=http://192.168.0.1:9200,http://192.168.0.2:9300 quay.io/jaegertracing/jaeger-collector:1.29')
+
+ def test_jaeger_agent(self, cephadm_fs):
+ fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
+ with with_cephadm_ctx(['--image=quay.io/jaegertracing/jaeger-agent:1.29'], list_networks={}) as ctx:
+ import json
+ ctx.config_json = json.dumps(self.agent_conf)
+ ctx.fsid = fsid
+ c = _cephadm.get_container(ctx, fsid, 'jaeger-agent', 'daemon_id')
+ _cephadm.create_daemon_dirs(ctx, fsid, 'jaeger-agent', 'daemon_id', 0, 0)
+ _cephadm.deploy_daemon_units(
+ ctx,
+ fsid,
+ 0, 0,
+ 'jaeger-agent',
+ 'daemon_id',
+ c,
+ True, True
+ )
+ with open(f'/var/lib/ceph/{fsid}/jaeger-agent.daemon_id/unit.run', 'r') as f:
+ run_cmd = f.readlines()[-1].rstrip()
+ assert run_cmd.endswith('quay.io/jaegertracing/jaeger-agent:1.29 --reporter.grpc.host-port=test:14250 --processor.jaeger-compact.server-host-port=6799')
+
+class TestRescan(fake_filesystem_unittest.TestCase):
+
+ def setUp(self):
+ self.setUpPyfakefs()
+ if not fake_filesystem.is_root():
+ fake_filesystem.set_uid(0)
+
+ self.fs.create_dir('/sys/class')
+ self.ctx = _cephadm.CephadmContext()
+ self.ctx.func = _cephadm.command_rescan_disks
+
+ @mock.patch('cephadm.logger')
+ def test_no_hbas(self, _logger):
+ out = _cephadm.command_rescan_disks(self.ctx)
+ assert out == 'Ok. No compatible HBAs found'
+
+ @mock.patch('cephadm.logger')
+ def test_success(self, _logger):
+ self.fs.create_file('/sys/class/scsi_host/host0/scan')
+ self.fs.create_file('/sys/class/scsi_host/host1/scan')
+ out = _cephadm.command_rescan_disks(self.ctx)
+ assert out.startswith('Ok. 2 adapters detected: 2 rescanned, 0 skipped, 0 failed')
+
+ @mock.patch('cephadm.logger')
+ def test_skip_usb_adapter(self, _logger):
+ self.fs.create_file('/sys/class/scsi_host/host0/scan')
+ self.fs.create_file('/sys/class/scsi_host/host1/scan')
+ self.fs.create_file('/sys/class/scsi_host/host1/proc_name', contents='usb-storage')
+ out = _cephadm.command_rescan_disks(self.ctx)
+ assert out.startswith('Ok. 2 adapters detected: 1 rescanned, 1 skipped, 0 failed')
+
+ @mock.patch('cephadm.logger')
+ def test_skip_unknown_adapter(self, _logger):
+ self.fs.create_file('/sys/class/scsi_host/host0/scan')
+ self.fs.create_file('/sys/class/scsi_host/host1/scan')
+ self.fs.create_file('/sys/class/scsi_host/host1/proc_name', contents='unknown')
+ out = _cephadm.command_rescan_disks(self.ctx)
+ assert out.startswith('Ok. 2 adapters detected: 1 rescanned, 1 skipped, 0 failed')
diff --git a/src/cephadm/tests/test_container_engine.py b/src/cephadm/tests/test_container_engine.py
new file mode 100644
index 000000000..433f01270
--- /dev/null
+++ b/src/cephadm/tests/test_container_engine.py
@@ -0,0 +1,54 @@
+from unittest import mock
+
+import pytest
+
+from tests.fixtures import with_cephadm_ctx, import_cephadm
+
+_cephadm = import_cephadm()
+
+
+def test_container_engine():
+ with pytest.raises(NotImplementedError):
+ _cephadm.ContainerEngine()
+
+ class PhonyContainerEngine(_cephadm.ContainerEngine):
+ EXE = "true"
+
+ with mock.patch("cephadm.find_program") as find_program:
+ find_program.return_value = "/usr/bin/true"
+ pce = PhonyContainerEngine()
+ assert str(pce) == "true (/usr/bin/true)"
+
+
+def test_podman():
+ with mock.patch("cephadm.find_program") as find_program:
+ find_program.return_value = "/usr/bin/podman"
+ pm = _cephadm.Podman()
+ find_program.assert_called()
+ with pytest.raises(RuntimeError):
+ pm.version
+ with mock.patch("cephadm.call_throws") as call_throws:
+ call_throws.return_value = ("4.9.9", None, None)
+ with with_cephadm_ctx([]) as ctx:
+ pm.get_version(ctx)
+ assert pm.version == (4, 9, 9)
+ assert str(pm) == "podman (/usr/bin/podman) version 4.9.9"
+
+
+def test_podman_badversion():
+ with mock.patch("cephadm.find_program") as find_program:
+ find_program.return_value = "/usr/bin/podman"
+ pm = _cephadm.Podman()
+ find_program.assert_called()
+ with mock.patch("cephadm.call_throws") as call_throws:
+ call_throws.return_value = ("4.10.beta2", None, None)
+ with with_cephadm_ctx([]) as ctx:
+ with pytest.raises(ValueError):
+ pm.get_version(ctx)
+
+
+def test_docker():
+ with mock.patch("cephadm.find_program") as find_program:
+ find_program.return_value = "/usr/bin/docker"
+ docker = _cephadm.Docker()
+ assert str(docker) == "docker (/usr/bin/docker)"
diff --git a/src/cephadm/tests/test_enclosure.py b/src/cephadm/tests/test_enclosure.py
new file mode 100644
index 000000000..1ea419fb3
--- /dev/null
+++ b/src/cephadm/tests/test_enclosure.py
@@ -0,0 +1,72 @@
+import pytest
+
+from unittest import mock
+from tests.fixtures import host_sysfs, import_cephadm
+
+_cephadm = import_cephadm()
+
+
+@pytest.fixture
+def enclosure(host_sysfs):
+ e = _cephadm.Enclosure(
+ enc_id='1',
+ enc_path='/sys/class/scsi_generic/sg2/device/enclosure/0:0:1:0',
+ dev_path='/sys/class/scsi_generic/sg2')
+ yield e
+
+
+class TestEnclosure:
+
+ def test_enc_metadata(self, enclosure):
+ """Check metadata for the enclosure e.g. vendor and model"""
+
+ assert enclosure.vendor == "EnclosuresInc"
+ assert enclosure.components == '12'
+ assert enclosure.model == "D12"
+ assert enclosure.enc_id == '1'
+
+ assert enclosure.ses_paths == ['sg2']
+ assert enclosure.path_count == 1
+
+ def test_enc_slots(self, enclosure):
+ """Check slot count"""
+
+ assert len(enclosure.slot_map) == 12
+
+ def test_enc_slot_format(self, enclosure):
+ """Check the attributes of a slot are as expected"""
+
+ assert all(k in ['fault', 'locate', 'serial', 'status']
+ for k, _v in enclosure.slot_map['0'].items())
+
+ def test_enc_slot_status(self, enclosure):
+ """Check the number of occupied slots is correct"""
+
+ occupied_slots = [slot_id for slot_id in enclosure.slot_map
+ if enclosure.slot_map[slot_id].get('status').upper() == 'OK']
+
+ assert len(occupied_slots) == 6
+
+ def test_enc_disk_count(self, enclosure):
+ """Check the disks found matches the slot info"""
+
+ assert len(enclosure.device_lookup) == 6
+ assert enclosure.device_count == 6
+
+ def test_enc_device_serial(self, enclosure):
+ """Check the device serial numbers are as expected"""
+
+ assert all(fake_serial in enclosure.device_lookup.keys()
+ for fake_serial in [
+ 'fake000',
+ 'fake001',
+ 'fake002',
+ 'fake003',
+ 'fake004',
+ 'fake005'])
+
+ def test_enc_slot_to_serial(self, enclosure):
+ """Check serial number to slot matches across slot_map and device_lookup"""
+
+ for serial, slot in enclosure.device_lookup.items():
+ assert enclosure.slot_map[slot].get('serial') == serial
diff --git a/src/cephadm/tests/test_ingress.py b/src/cephadm/tests/test_ingress.py
new file mode 100644
index 000000000..798c73708
--- /dev/null
+++ b/src/cephadm/tests/test_ingress.py
@@ -0,0 +1,350 @@
+from unittest import mock
+import json
+
+import pytest
+
+from tests.fixtures import with_cephadm_ctx, cephadm_fs, import_cephadm
+
+_cephadm = import_cephadm()
+
+SAMPLE_UUID = "2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae"
+SAMPLE_HAPROXY_IMAGE = "registry.example.net/haproxy/haproxy:latest"
+SAMPLE_KEEPALIVED_IMAGE = "registry.example.net/keepalive/keepalived:latest"
+
+
+def good_haproxy_json():
+ return haproxy_json(files=True)
+
+
+def haproxy_json(**kwargs):
+ if kwargs.get("files"):
+ return {
+ "files": {
+ "haproxy.cfg": "",
+ },
+ }
+ return {}
+
+
+def good_keepalived_json():
+ return keepalived_json(files=True)
+
+
+def keepalived_json(**kwargs):
+ if kwargs.get("files"):
+ return {
+ "files": {
+ "keepalived.conf": "",
+ },
+ }
+ return {}
+
+
+@pytest.mark.parametrize(
+ "args",
+ # args: <fsid>, <daemon_id>, <config_json>, <image>
+ [
+ # fail due to: invalid fsid
+ (["foobar", "wilma", good_haproxy_json(), SAMPLE_HAPROXY_IMAGE]),
+ # fail due to: invalid daemon_id
+ ([SAMPLE_UUID, "", good_haproxy_json(), SAMPLE_HAPROXY_IMAGE]),
+ # fail due to: invalid image
+ ([SAMPLE_UUID, "wilma", good_haproxy_json(), ""]),
+ # fail due to: no files in config_json
+ (
+ [
+ SAMPLE_UUID,
+ "wilma",
+ haproxy_json(files=False),
+ SAMPLE_HAPROXY_IMAGE,
+ ]
+ ),
+ ],
+)
+def test_haproxy_validation_errors(args):
+ with pytest.raises(_cephadm.Error):
+ with with_cephadm_ctx([]) as ctx:
+ _cephadm.HAproxy(ctx, *args)
+
+
+def test_haproxy_init():
+ with with_cephadm_ctx([]) as ctx:
+ ctx.config_json = json.dumps(good_haproxy_json())
+ ctx.image = SAMPLE_HAPROXY_IMAGE
+ hap = _cephadm.HAproxy.init(
+ ctx,
+ SAMPLE_UUID,
+ "wilma",
+ )
+ assert hap.fsid == SAMPLE_UUID
+ assert hap.daemon_id == "wilma"
+ assert hap.image == SAMPLE_HAPROXY_IMAGE
+
+
+def test_haproxy_container_mounts():
+ with with_cephadm_ctx([]) as ctx:
+ hap = _cephadm.HAproxy(
+ ctx,
+ SAMPLE_UUID,
+ "wilma",
+ good_haproxy_json(),
+ SAMPLE_HAPROXY_IMAGE,
+ )
+ cmounts = hap.get_container_mounts("/var/tmp")
+ assert len(cmounts) == 1
+ assert cmounts["/var/tmp/haproxy"] == "/var/lib/haproxy"
+
+
+def test_haproxy_get_daemon_name():
+ with with_cephadm_ctx([]) as ctx:
+ hap = _cephadm.HAproxy(
+ ctx,
+ SAMPLE_UUID,
+ "wilma",
+ good_haproxy_json(),
+ SAMPLE_HAPROXY_IMAGE,
+ )
+ assert hap.get_daemon_name() == "haproxy.wilma"
+
+
+def test_haproxy_get_container_name():
+ with with_cephadm_ctx([]) as ctx:
+ hap = _cephadm.HAproxy(
+ ctx,
+ SAMPLE_UUID,
+ "wilma",
+ good_haproxy_json(),
+ SAMPLE_HAPROXY_IMAGE,
+ )
+ name1 = hap.get_container_name()
+ assert (
+ name1 == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-haproxy.wilma"
+ )
+ name2 = hap.get_container_name(desc="extra")
+ assert (
+ name2
+ == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-haproxy.wilma-extra"
+ )
+
+
+def test_haproxy_get_daemon_args():
+ with with_cephadm_ctx([]) as ctx:
+ hap = _cephadm.HAproxy(
+ ctx,
+ SAMPLE_UUID,
+ "wilma",
+ good_haproxy_json(),
+ SAMPLE_HAPROXY_IMAGE,
+ )
+ args = hap.get_daemon_args()
+ assert args == ["haproxy", "-f", "/var/lib/haproxy/haproxy.cfg"]
+
+
+@mock.patch("cephadm.logger")
+def test_haproxy_create_daemon_dirs(_logger, cephadm_fs):
+ with with_cephadm_ctx([]) as ctx:
+ hap = _cephadm.HAproxy(
+ ctx,
+ SAMPLE_UUID,
+ "wilma",
+ good_haproxy_json(),
+ SAMPLE_HAPROXY_IMAGE,
+ )
+ with pytest.raises(OSError):
+ hap.create_daemon_dirs("/var/tmp", 45, 54)
+ cephadm_fs.create_dir("/var/tmp")
+ hap.create_daemon_dirs("/var/tmp", 45, 54)
+ # TODO: make assertions about the dirs created
+
+
+def test_haproxy_extract_uid_gid_haproxy():
+ with with_cephadm_ctx([]) as ctx:
+ hap = _cephadm.HAproxy(
+ ctx,
+ SAMPLE_UUID,
+ "wilma",
+ good_haproxy_json(),
+ SAMPLE_HAPROXY_IMAGE,
+ )
+ with mock.patch("cephadm.CephContainer") as cc:
+ cc.return_value.run.return_value = "500 500"
+ uid, gid = hap.extract_uid_gid_haproxy()
+ cc.return_value.run.assert_called()
+ assert uid == 500
+ assert gid == 500
+
+
+def test_haproxy_get_sysctl_settings():
+ with with_cephadm_ctx([]) as ctx:
+ hap = _cephadm.HAproxy(
+ ctx,
+ SAMPLE_UUID,
+ "wilma",
+ good_haproxy_json(),
+ SAMPLE_HAPROXY_IMAGE,
+ )
+ ss = hap.get_sysctl_settings()
+ assert len(ss) == 3
+
+
+@pytest.mark.parametrize(
+ "args",
+ # args: <fsid>, <daemon_id>, <config_json>, <image>
+ [
+ # fail due to: invalid fsid
+ (
+ [
+ "foobar",
+ "barney",
+ good_keepalived_json(),
+ SAMPLE_KEEPALIVED_IMAGE,
+ ]
+ ),
+ # fail due to: invalid daemon_id
+ ([SAMPLE_UUID, "", good_keepalived_json(), SAMPLE_KEEPALIVED_IMAGE]),
+ # fail due to: invalid image
+ ([SAMPLE_UUID, "barney", good_keepalived_json(), ""]),
+ # fail due to: no files in config_json
+ (
+ [
+ SAMPLE_UUID,
+ "barney",
+ keepalived_json(files=False),
+ SAMPLE_KEEPALIVED_IMAGE,
+ ]
+ ),
+ ],
+)
+def test_keepalived_validation_errors(args):
+ with pytest.raises(_cephadm.Error):
+ with with_cephadm_ctx([]) as ctx:
+ _cephadm.Keepalived(ctx, *args)
+
+
+def test_keepalived_init():
+ with with_cephadm_ctx([]) as ctx:
+ ctx.config_json = json.dumps(good_keepalived_json())
+ ctx.image = SAMPLE_KEEPALIVED_IMAGE
+ kad = _cephadm.Keepalived.init(
+ ctx,
+ SAMPLE_UUID,
+ "barney",
+ )
+ assert kad.fsid == SAMPLE_UUID
+ assert kad.daemon_id == "barney"
+ assert kad.image == SAMPLE_KEEPALIVED_IMAGE
+
+
+def test_keepalived_container_mounts():
+ with with_cephadm_ctx([]) as ctx:
+ kad = _cephadm.Keepalived(
+ ctx,
+ SAMPLE_UUID,
+ "barney",
+ good_keepalived_json(),
+ SAMPLE_KEEPALIVED_IMAGE,
+ )
+ cmounts = kad.get_container_mounts("/var/tmp")
+ assert len(cmounts) == 1
+ assert (
+ cmounts["/var/tmp/keepalived.conf"]
+ == "/etc/keepalived/keepalived.conf"
+ )
+
+
+def test_keepalived_get_daemon_name():
+ with with_cephadm_ctx([]) as ctx:
+ kad = _cephadm.Keepalived(
+ ctx,
+ SAMPLE_UUID,
+ "barney",
+ good_keepalived_json(),
+ SAMPLE_KEEPALIVED_IMAGE,
+ )
+ assert kad.get_daemon_name() == "keepalived.barney"
+
+
+def test_keepalived_get_container_name():
+ with with_cephadm_ctx([]) as ctx:
+ kad = _cephadm.Keepalived(
+ ctx,
+ SAMPLE_UUID,
+ "barney",
+ good_keepalived_json(),
+ SAMPLE_KEEPALIVED_IMAGE,
+ )
+ name1 = kad.get_container_name()
+ assert (
+ name1
+ == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-keepalived.barney"
+ )
+ name2 = kad.get_container_name(desc="extra")
+ assert (
+ name2
+ == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-keepalived.barney-extra"
+ )
+
+
+def test_keepalived_get_container_envs():
+ with with_cephadm_ctx([]) as ctx:
+ kad = _cephadm.Keepalived(
+ ctx,
+ SAMPLE_UUID,
+ "barney",
+ good_keepalived_json(),
+ SAMPLE_KEEPALIVED_IMAGE,
+ )
+ args = kad.get_container_envs()
+ assert args == [
+ "KEEPALIVED_AUTOCONF=false",
+ "KEEPALIVED_CONF=/etc/keepalived/keepalived.conf",
+ "KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf",
+ "KEEPALIVED_DEBUG=false",
+ ]
+
+
+@mock.patch("cephadm.logger")
+def test_keepalived_create_daemon_dirs(_logger, cephadm_fs):
+ with with_cephadm_ctx([]) as ctx:
+ kad = _cephadm.Keepalived(
+ ctx,
+ SAMPLE_UUID,
+ "barney",
+ good_keepalived_json(),
+ SAMPLE_KEEPALIVED_IMAGE,
+ )
+ with pytest.raises(OSError):
+ kad.create_daemon_dirs("/var/tmp", 45, 54)
+ cephadm_fs.create_dir("/var/tmp")
+ kad.create_daemon_dirs("/var/tmp", 45, 54)
+ # TODO: make assertions about the dirs created
+
+
+def test_keepalived_extract_uid_gid_keepalived():
+ with with_cephadm_ctx([]) as ctx:
+ kad = _cephadm.Keepalived(
+ ctx,
+ SAMPLE_UUID,
+ "barney",
+ good_keepalived_json(),
+ SAMPLE_KEEPALIVED_IMAGE,
+ )
+ with mock.patch("cephadm.CephContainer") as cc:
+ cc.return_value.run.return_value = "500 500"
+ uid, gid = kad.extract_uid_gid_keepalived()
+ cc.return_value.run.assert_called()
+ assert uid == 500
+ assert gid == 500
+
+
+def test_keepalived_get_sysctl_settings():
+ with with_cephadm_ctx([]) as ctx:
+ kad = _cephadm.Keepalived(
+ ctx,
+ SAMPLE_UUID,
+ "barney",
+ good_keepalived_json(),
+ SAMPLE_KEEPALIVED_IMAGE,
+ )
+ ss = kad.get_sysctl_settings()
+ assert len(ss) == 3
diff --git a/src/cephadm/tests/test_networks.py b/src/cephadm/tests/test_networks.py
new file mode 100644
index 000000000..7c0575046
--- /dev/null
+++ b/src/cephadm/tests/test_networks.py
@@ -0,0 +1,233 @@
+import json
+from textwrap import dedent
+from unittest import mock
+
+import pytest
+
+from tests.fixtures import with_cephadm_ctx, cephadm_fs, import_cephadm
+
+_cephadm = import_cephadm()
+
+
+class TestCommandListNetworks:
+ @pytest.mark.parametrize("test_input, expected", [
+ (
+ dedent("""
+ default via 192.168.178.1 dev enxd89ef3f34260 proto dhcp metric 100
+ 10.0.0.0/8 via 10.4.0.1 dev tun0 proto static metric 50
+ 10.3.0.0/21 via 10.4.0.1 dev tun0 proto static metric 50
+ 10.4.0.1 dev tun0 proto kernel scope link src 10.4.0.2 metric 50
+ 137.1.0.0/16 via 10.4.0.1 dev tun0 proto static metric 50
+ 138.1.0.0/16 via 10.4.0.1 dev tun0 proto static metric 50
+ 139.1.0.0/16 via 10.4.0.1 dev tun0 proto static metric 50
+ 140.1.0.0/17 via 10.4.0.1 dev tun0 proto static metric 50
+ 141.1.0.0/16 via 10.4.0.1 dev tun0 proto static metric 50
+ 172.16.100.34 via 172.16.100.34 dev eth1 proto kernel scope link src 172.16.100.34
+ 192.168.122.1 dev ens3 proto dhcp scope link src 192.168.122.236 metric 100
+ 169.254.0.0/16 dev docker0 scope link metric 1000
+ 172.17.0.0/16 dev docker0 proto kernel scope link src 172.17.0.1
+ 192.168.39.0/24 dev virbr1 proto kernel scope link src 192.168.39.1 linkdown
+ 192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 linkdown
+ 192.168.178.0/24 dev enxd89ef3f34260 proto kernel scope link src 192.168.178.28 metric 100
+ 192.168.178.1 dev enxd89ef3f34260 proto static scope link metric 100
+ 195.135.221.12 via 192.168.178.1 dev enxd89ef3f34260 proto static metric 100
+ """),
+ {
+ '172.16.100.34/32': {'eth1': {'172.16.100.34'}},
+ '192.168.122.1/32': {'ens3': {'192.168.122.236'}},
+ '10.4.0.1/32': {'tun0': {'10.4.0.2'}},
+ '172.17.0.0/16': {'docker0': {'172.17.0.1'}},
+ '192.168.39.0/24': {'virbr1': {'192.168.39.1'}},
+ '192.168.122.0/24': {'virbr0': {'192.168.122.1'}},
+ '192.168.178.0/24': {'enxd89ef3f34260': {'192.168.178.28'}}
+ }
+ ), (
+ dedent("""
+ default via 10.3.64.1 dev eno1 proto static metric 100
+ 10.3.64.0/24 dev eno1 proto kernel scope link src 10.3.64.23 metric 100
+ 10.3.64.0/24 dev eno1 proto kernel scope link src 10.3.64.27 metric 100
+ 10.88.0.0/16 dev cni-podman0 proto kernel scope link src 10.88.0.1 linkdown
+ 172.21.0.0/20 via 172.21.3.189 dev tun0
+ 172.21.1.0/20 via 172.21.3.189 dev tun0
+ 172.21.2.1 via 172.21.3.189 dev tun0
+ 172.21.3.1 dev tun0 proto kernel scope link src 172.21.3.2
+ 172.21.4.0/24 via 172.21.3.1 dev tun0
+ 172.21.5.0/24 via 172.21.3.1 dev tun0
+ 172.21.6.0/24 via 172.21.3.1 dev tun0
+ 172.21.7.0/24 via 172.21.3.1 dev tun0
+ 192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 linkdown
+ 192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 linkdown
+ 192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 linkdown
+ 192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 linkdown
+ """),
+ {
+ '10.3.64.0/24': {'eno1': {'10.3.64.23', '10.3.64.27'}},
+ '10.88.0.0/16': {'cni-podman0': {'10.88.0.1'}},
+ '172.21.3.1/32': {'tun0': {'172.21.3.2'}},
+ '192.168.122.0/24': {'virbr0': {'192.168.122.1'}}
+ }
+ ),
+ ])
+ def test_parse_ipv4_route(self, test_input, expected):
+ assert _cephadm._parse_ipv4_route(test_input) == expected
+
+ @pytest.mark.parametrize("test_routes, test_ips, expected", [
+ (
+ dedent("""
+ ::1 dev lo proto kernel metric 256 pref medium
+ fe80::/64 dev eno1 proto kernel metric 100 pref medium
+ fe80::/64 dev br-3d443496454c proto kernel metric 256 linkdown pref medium
+ fe80::/64 dev tun0 proto kernel metric 256 pref medium
+ fe80::/64 dev br-4355f5dbb528 proto kernel metric 256 pref medium
+ fe80::/64 dev docker0 proto kernel metric 256 linkdown pref medium
+ fe80::/64 dev cni-podman0 proto kernel metric 256 linkdown pref medium
+ fe80::/64 dev veth88ba1e8 proto kernel metric 256 pref medium
+ fe80::/64 dev vethb6e5fc7 proto kernel metric 256 pref medium
+ fe80::/64 dev vethaddb245 proto kernel metric 256 pref medium
+ fe80::/64 dev vethbd14d6b proto kernel metric 256 pref medium
+ fe80::/64 dev veth13e8fd2 proto kernel metric 256 pref medium
+ fe80::/64 dev veth1d3aa9e proto kernel metric 256 pref medium
+ fe80::/64 dev vethe485ca9 proto kernel metric 256 pref medium
+ """),
+ dedent("""
+ 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 state UNKNOWN qlen 1000
+ inet6 ::1/128 scope host
+ valid_lft forever preferred_lft forever
+ 2: eno1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000
+ inet6 fe80::225:90ff:fee5:26e8/64 scope link noprefixroute
+ valid_lft forever preferred_lft forever
+ 6: br-3d443496454c: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 state DOWN
+ inet6 fe80::42:23ff:fe9d:ee4/64 scope link
+ valid_lft forever preferred_lft forever
+ 7: br-4355f5dbb528: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP
+ inet6 fe80::42:6eff:fe35:41fe/64 scope link
+ valid_lft forever preferred_lft forever
+ 8: docker0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 state DOWN
+ inet6 fe80::42:faff:fee6:40a0/64 scope link
+ valid_lft forever preferred_lft forever
+ 11: tun0: <POINTOPOINT,MULTICAST,NOARP,UP,LOWER_UP> mtu 1500 state UNKNOWN qlen 100
+ inet6 fe80::98a6:733e:dafd:350/64 scope link stable-privacy
+ valid_lft forever preferred_lft forever
+ 28: cni-podman0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 state DOWN qlen 1000
+ inet6 fe80::3449:cbff:fe89:b87e/64 scope link
+ valid_lft forever preferred_lft forever
+ 31: vethaddb245@if30: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP
+ inet6 fe80::90f7:3eff:feed:a6bb/64 scope link
+ valid_lft forever preferred_lft forever
+ 33: veth88ba1e8@if32: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP
+ inet6 fe80::d:f5ff:fe73:8c82/64 scope link
+ valid_lft forever preferred_lft forever
+ 35: vethbd14d6b@if34: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP
+ inet6 fe80::b44f:8ff:fe6f:813d/64 scope link
+ valid_lft forever preferred_lft forever
+ 37: vethb6e5fc7@if36: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP
+ inet6 fe80::4869:c6ff:feaa:8afe/64 scope link
+ valid_lft forever preferred_lft forever
+ 39: veth13e8fd2@if38: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP
+ inet6 fe80::78f4:71ff:fefe:eb40/64 scope link
+ valid_lft forever preferred_lft forever
+ 41: veth1d3aa9e@if40: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP
+ inet6 fe80::24bd:88ff:fe28:5b18/64 scope link
+ valid_lft forever preferred_lft forever
+ 43: vethe485ca9@if42: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP
+ inet6 fe80::6425:87ff:fe42:b9f0/64 scope link
+ valid_lft forever preferred_lft forever
+ """),
+ {
+ "fe80::/64": {
+ "eno1": {"fe80::225:90ff:fee5:26e8"},
+ "br-3d443496454c": {"fe80::42:23ff:fe9d:ee4"},
+ "tun0": {"fe80::98a6:733e:dafd:350"},
+ "br-4355f5dbb528": {"fe80::42:6eff:fe35:41fe"},
+ "docker0": {"fe80::42:faff:fee6:40a0"},
+ "cni-podman0": {"fe80::3449:cbff:fe89:b87e"},
+ "veth88ba1e8": {"fe80::d:f5ff:fe73:8c82"},
+ "vethb6e5fc7": {"fe80::4869:c6ff:feaa:8afe"},
+ "vethaddb245": {"fe80::90f7:3eff:feed:a6bb"},
+ "vethbd14d6b": {"fe80::b44f:8ff:fe6f:813d"},
+ "veth13e8fd2": {"fe80::78f4:71ff:fefe:eb40"},
+ "veth1d3aa9e": {"fe80::24bd:88ff:fe28:5b18"},
+ "vethe485ca9": {"fe80::6425:87ff:fe42:b9f0"},
+ }
+ }
+ ),
+ (
+ dedent("""
+ ::1 dev lo proto kernel metric 256 pref medium
+ 2001:1458:301:eb::100:1a dev ens20f0 proto kernel metric 100 pref medium
+ 2001:1458:301:eb::/64 dev ens20f0 proto ra metric 100 pref medium
+ fd01:1458:304:5e::/64 dev ens20f0 proto ra metric 100 pref medium
+ fe80::/64 dev ens20f0 proto kernel metric 100 pref medium
+ default proto ra metric 100
+ nexthop via fe80::46ec:ce00:b8a0:d3c8 dev ens20f0 weight 1
+ nexthop via fe80::46ec:ce00:b8a2:33c8 dev ens20f0 weight 1 pref medium
+ """),
+ dedent("""
+ 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 state UNKNOWN qlen 1000
+ inet6 ::1/128 scope host
+ valid_lft forever preferred_lft forever
+ 2: ens20f0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000
+ inet6 2001:1458:301:eb::100:1a/128 scope global dynamic noprefixroute
+ valid_lft 590879sec preferred_lft 590879sec
+ inet6 fe80::2e60:cff:fef8:da41/64 scope link noprefixroute
+ valid_lft forever preferred_lft forever
+ inet6 fe80::2e60:cff:fef8:da41/64 scope link noprefixroute
+ valid_lft forever preferred_lft forever
+ inet6 fe80::2e60:cff:fef8:da41/64 scope link noprefixroute
+ valid_lft forever preferred_lft forever
+ """),
+ {
+ '2001:1458:301:eb::100:1a/128': {
+ 'ens20f0': {
+ '2001:1458:301:eb::100:1a'
+ },
+ },
+ '2001:1458:301:eb::/64': {
+ 'ens20f0': set(),
+ },
+ 'fe80::/64': {
+ 'ens20f0': {'fe80::2e60:cff:fef8:da41'},
+ },
+ 'fd01:1458:304:5e::/64': {
+ 'ens20f0': set()
+ },
+ }
+ ),
+ (
+ dedent("""
+ ::1 dev lo proto kernel metric 256 pref medium
+ fe80::/64 dev ceph-brx proto kernel metric 256 pref medium
+ fe80::/64 dev brx.0 proto kernel metric 256 pref medium
+ default via fe80::327c:5e00:6487:71e0 dev enp3s0f1 proto ra metric 1024 expires 1790sec hoplimit 64 pref medium """),
+ dedent("""
+ 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 state UNKNOWN qlen 1000
+ inet6 ::1/128 scope host
+ valid_lft forever preferred_lft forever
+ 5: enp3s0f1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000
+ inet6 fe80::ec4:7aff:fe8f:cb83/64 scope link noprefixroute
+ valid_lft forever preferred_lft forever
+ 6: ceph-brx: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000
+ inet6 fe80::d8a1:69ff:fede:8f58/64 scope link
+ valid_lft forever preferred_lft forever
+ 7: brx.0@eno1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000
+ inet6 fe80::a4cb:54ff:fecc:f2a2/64 scope link
+ valid_lft forever preferred_lft forever
+ """),
+ {
+ 'fe80::/64': {
+ 'brx.0': {'fe80::a4cb:54ff:fecc:f2a2'},
+ 'ceph-brx': {'fe80::d8a1:69ff:fede:8f58'}
+ }
+ }
+ ),
+ ])
+ def test_parse_ipv6_route(self, test_routes, test_ips, expected):
+ assert _cephadm._parse_ipv6_route(test_routes, test_ips) == expected
+
+ @mock.patch.object(_cephadm, 'call_throws', return_value=('10.4.0.1 dev tun0 proto kernel scope link src 10.4.0.2 metric 50\n', '', ''))
+ def test_command_list_networks(self, cephadm_fs, capsys):
+ with with_cephadm_ctx([]) as ctx:
+ _cephadm.command_list_networks(ctx)
+ assert json.loads(capsys.readouterr().out) == {
+ '10.4.0.1/32': {'tun0': ['10.4.0.2']}
+ }
diff --git a/src/cephadm/tests/test_nfs.py b/src/cephadm/tests/test_nfs.py
new file mode 100644
index 000000000..0649ef934
--- /dev/null
+++ b/src/cephadm/tests/test_nfs.py
@@ -0,0 +1,239 @@
+from unittest import mock
+import json
+
+import pytest
+
+from tests.fixtures import with_cephadm_ctx, cephadm_fs, import_cephadm
+
+_cephadm = import_cephadm()
+
+
+SAMPLE_UUID = "2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae"
+
+
+def good_nfs_json():
+ return nfs_json(
+ pool=True,
+ files=True,
+ )
+
+
+def nfs_json(**kwargs):
+ result = {}
+ if kwargs.get("pool"):
+ result["pool"] = "party"
+ if kwargs.get("files"):
+ result["files"] = {
+ "ganesha.conf": "",
+ }
+ if kwargs.get("rgw_content"):
+ result["rgw"] = dict(kwargs["rgw_content"])
+ elif kwargs.get("rgw"):
+ result["rgw"] = {
+ "keyring": "foobar",
+ "user": "jsmith",
+ }
+ return result
+
+
+@pytest.mark.parametrize(
+ "args,kwargs",
+ # args: <fsid>, <daemon_id>, <config_json>; kwargs: <image>
+ [
+ # fail due to: invalid fsid
+ (["foobar", "fred", good_nfs_json()], {}),
+ # fail due to: invalid daemon_id
+ ([SAMPLE_UUID, "", good_nfs_json()], {}),
+ # fail due to: invalid image
+ (
+ [SAMPLE_UUID, "fred", good_nfs_json()],
+ {"image": ""},
+ ),
+ # fail due to: no files in config_json
+ (
+ [
+ SAMPLE_UUID,
+ "fred",
+ nfs_json(pool=True),
+ ],
+ {},
+ ),
+ # fail due to: no pool in config_json
+ (
+ [
+ SAMPLE_UUID,
+ "fred",
+ nfs_json(files=True),
+ ],
+ {},
+ ),
+ # fail due to: bad rgw content
+ (
+ [
+ SAMPLE_UUID,
+ "fred",
+ nfs_json(pool=True, files=True, rgw_content={"foo": True}),
+ ],
+ {},
+ ),
+ # fail due to: rgw keyring given but no user
+ (
+ [
+ SAMPLE_UUID,
+ "fred",
+ nfs_json(
+ pool=True, files=True, rgw_content={"keyring": "foo"}
+ ),
+ ],
+ {},
+ ),
+ ],
+)
+def test_nfsganesha_validation_errors(args, kwargs):
+ with pytest.raises(_cephadm.Error):
+ with with_cephadm_ctx([]) as ctx:
+ _cephadm.NFSGanesha(ctx, *args, **kwargs)
+
+
+def test_nfsganesha_init():
+ with with_cephadm_ctx([]) as ctx:
+ ctx.config_json = json.dumps(good_nfs_json())
+ ctx.image = "test_image"
+ nfsg = _cephadm.NFSGanesha.init(
+ ctx,
+ SAMPLE_UUID,
+ "fred",
+ )
+ assert nfsg.fsid == SAMPLE_UUID
+ assert nfsg.daemon_id == "fred"
+ assert nfsg.pool == "party"
+
+
+def test_nfsganesha_container_mounts():
+ with with_cephadm_ctx([]) as ctx:
+ nfsg = _cephadm.NFSGanesha(
+ ctx,
+ SAMPLE_UUID,
+ "fred",
+ good_nfs_json(),
+ )
+ cmounts = nfsg.get_container_mounts("/var/tmp")
+ assert len(cmounts) == 3
+ assert cmounts["/var/tmp/config"] == "/etc/ceph/ceph.conf:z"
+ assert cmounts["/var/tmp/keyring"] == "/etc/ceph/keyring:z"
+ assert cmounts["/var/tmp/etc/ganesha"] == "/etc/ganesha:z"
+
+ with with_cephadm_ctx([]) as ctx:
+ nfsg = _cephadm.NFSGanesha(
+ ctx,
+ SAMPLE_UUID,
+ "fred",
+ nfs_json(pool=True, files=True, rgw=True),
+ )
+ cmounts = nfsg.get_container_mounts("/var/tmp")
+ assert len(cmounts) == 4
+ assert cmounts["/var/tmp/config"] == "/etc/ceph/ceph.conf:z"
+ assert cmounts["/var/tmp/keyring"] == "/etc/ceph/keyring:z"
+ assert cmounts["/var/tmp/etc/ganesha"] == "/etc/ganesha:z"
+ assert (
+ cmounts["/var/tmp/keyring.rgw"]
+ == "/var/lib/ceph/radosgw/ceph-jsmith/keyring:z"
+ )
+
+
+def test_nfsganesha_container_envs():
+ with with_cephadm_ctx([]) as ctx:
+ nfsg = _cephadm.NFSGanesha(
+ ctx,
+ SAMPLE_UUID,
+ "fred",
+ good_nfs_json(),
+ )
+ envs = nfsg.get_container_envs()
+ assert len(envs) == 1
+ assert envs[0] == "CEPH_CONF=/etc/ceph/ceph.conf"
+
+
+def test_nfsganesha_get_version():
+ with with_cephadm_ctx([]) as ctx:
+ nfsg = _cephadm.NFSGanesha(
+ ctx,
+ SAMPLE_UUID,
+ "fred",
+ good_nfs_json(),
+ )
+
+ with mock.patch("cephadm.call") as _call:
+ _call.return_value = ("NFS-Ganesha Release = V100", "", 0)
+ ver = nfsg.get_version(ctx, "fake_version")
+ _call.assert_called()
+ assert ver == "100"
+
+
+def test_nfsganesha_get_daemon_name():
+ with with_cephadm_ctx([]) as ctx:
+ nfsg = _cephadm.NFSGanesha(
+ ctx,
+ SAMPLE_UUID,
+ "fred",
+ good_nfs_json(),
+ )
+ assert nfsg.get_daemon_name() == "nfs.fred"
+
+
+def test_nfsganesha_get_container_name():
+ with with_cephadm_ctx([]) as ctx:
+ nfsg = _cephadm.NFSGanesha(
+ ctx,
+ SAMPLE_UUID,
+ "fred",
+ good_nfs_json(),
+ )
+ name1 = nfsg.get_container_name()
+ assert name1 == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-nfs.fred"
+ name2 = nfsg.get_container_name(desc="extra")
+ assert (
+ name2 == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-nfs.fred-extra"
+ )
+
+
+def test_nfsganesha_get_daemon_args():
+ with with_cephadm_ctx([]) as ctx:
+ nfsg = _cephadm.NFSGanesha(
+ ctx,
+ SAMPLE_UUID,
+ "fred",
+ good_nfs_json(),
+ )
+ args = nfsg.get_daemon_args()
+ assert args == ["-F", "-L", "STDERR"]
+
+
+@mock.patch("cephadm.logger")
+def test_nfsganesha_create_daemon_dirs(_logger, cephadm_fs):
+ with with_cephadm_ctx([]) as ctx:
+ nfsg = _cephadm.NFSGanesha(
+ ctx,
+ SAMPLE_UUID,
+ "fred",
+ good_nfs_json(),
+ )
+ with pytest.raises(OSError):
+ nfsg.create_daemon_dirs("/var/tmp", 45, 54)
+ cephadm_fs.create_dir("/var/tmp")
+ nfsg.create_daemon_dirs("/var/tmp", 45, 54)
+ # TODO: make assertions about the dirs created
+
+
+@mock.patch("cephadm.logger")
+def test_nfsganesha_create_daemon_dirs_rgw(_logger, cephadm_fs):
+ with with_cephadm_ctx([]) as ctx:
+ nfsg = _cephadm.NFSGanesha(
+ ctx,
+ SAMPLE_UUID,
+ "fred",
+ nfs_json(pool=True, files=True, rgw=True),
+ )
+ cephadm_fs.create_dir("/var/tmp")
+ nfsg.create_daemon_dirs("/var/tmp", 45, 54)
+ # TODO: make assertions about the dirs created
diff --git a/src/cephadm/tests/test_util_funcs.py b/src/cephadm/tests/test_util_funcs.py
new file mode 100644
index 000000000..270753a55
--- /dev/null
+++ b/src/cephadm/tests/test_util_funcs.py
@@ -0,0 +1,808 @@
+# Tests for various assorted utility functions found within cephadm
+#
+from unittest import mock
+
+import functools
+import io
+import os
+import sys
+
+import pytest
+
+from tests.fixtures import with_cephadm_ctx, import_cephadm
+
+_cephadm = import_cephadm()
+
+
+class TestCopyTree:
+ def _copy_tree(self, *args, **kwargs):
+ with with_cephadm_ctx([]) as ctx:
+ with mock.patch("cephadm.extract_uid_gid") as eug:
+ eug.return_value = (os.getuid(), os.getgid())
+ _cephadm.copy_tree(ctx, *args, **kwargs)
+
+ def test_one_dir(self, tmp_path):
+ """Copy one dir into a non-existing dest dir."""
+ src1 = tmp_path / "src1"
+ dst = tmp_path / "dst"
+ src1.mkdir(parents=True)
+
+ with (src1 / "foo.txt").open("w") as fh:
+ fh.write("hello\n")
+ fh.write("earth\n")
+
+ assert not (dst / "foo.txt").exists()
+
+ self._copy_tree([src1], dst)
+ assert (dst / "foo.txt").exists()
+
+ def test_one_existing_dir(self, tmp_path):
+ """Copy one dir into an existing dest dir."""
+ src1 = tmp_path / "src1"
+ dst = tmp_path / "dst"
+ src1.mkdir(parents=True)
+ dst.mkdir(parents=True)
+
+ with (src1 / "foo.txt").open("w") as fh:
+ fh.write("hello\n")
+ fh.write("earth\n")
+
+ assert not (dst / "src1").exists()
+
+ self._copy_tree([src1], dst)
+ assert (dst / "src1/foo.txt").exists()
+
+ def test_two_dirs(self, tmp_path):
+ """Copy two source directories into an existing dest dir."""
+ src1 = tmp_path / "src1"
+ src2 = tmp_path / "src2"
+ dst = tmp_path / "dst"
+ src1.mkdir(parents=True)
+ src2.mkdir(parents=True)
+ dst.mkdir(parents=True)
+
+ with (src1 / "foo.txt").open("w") as fh:
+ fh.write("hello\n")
+ fh.write("earth\n")
+ with (src2 / "bar.txt").open("w") as fh:
+ fh.write("goodbye\n")
+ fh.write("mars\n")
+
+ assert not (dst / "src1").exists()
+ assert not (dst / "src2").exists()
+
+ self._copy_tree([src1, src2], dst)
+ assert (dst / "src1/foo.txt").exists()
+ assert (dst / "src2/bar.txt").exists()
+
+ def test_one_dir_set_uid(self, tmp_path):
+ """Explicity pass uid/gid values and assert these are passed to chown."""
+ # Because this test will often be run by non-root users it is necessary
+ # to mock os.chown or we too easily run into perms issues.
+ src1 = tmp_path / "src1"
+ dst = tmp_path / "dst"
+ src1.mkdir(parents=True)
+
+ with (src1 / "foo.txt").open("w") as fh:
+ fh.write("hello\n")
+ fh.write("earth\n")
+
+ assert not (dst / "foo.txt").exists()
+
+ with mock.patch("os.chown") as _chown:
+ _chown.return_value = None
+ self._copy_tree([src1], dst, uid=0, gid=0)
+ assert len(_chown.mock_calls) >= 2
+ for c in _chown.mock_calls:
+ assert c == mock.call(mock.ANY, 0, 0)
+ assert (dst / "foo.txt").exists()
+
+
+class TestCopyFiles:
+ def _copy_files(self, *args, **kwargs):
+ with with_cephadm_ctx([]) as ctx:
+ with mock.patch("cephadm.extract_uid_gid") as eug:
+ eug.return_value = (os.getuid(), os.getgid())
+ _cephadm.copy_files(ctx, *args, **kwargs)
+
+ def test_one_file(self, tmp_path):
+ """Copy one file into the dest dir."""
+ file1 = tmp_path / "f1.txt"
+ dst = tmp_path / "dst"
+ dst.mkdir(parents=True)
+
+ with file1.open("w") as fh:
+ fh.write("its test time\n")
+
+ self._copy_files([file1], dst)
+ assert (dst / "f1.txt").exists()
+
+ def test_one_file_nodest(self, tmp_path):
+ """Copy one file to the given destination path."""
+ file1 = tmp_path / "f1.txt"
+ dst = tmp_path / "dst"
+
+ with file1.open("w") as fh:
+ fh.write("its test time\n")
+
+ self._copy_files([file1], dst)
+ assert not dst.is_dir()
+ assert dst.is_file()
+ assert dst.open("r").read() == "its test time\n"
+
+ def test_three_files(self, tmp_path):
+ """Copy one file into the dest dir."""
+ file1 = tmp_path / "f1.txt"
+ file2 = tmp_path / "f2.txt"
+ file3 = tmp_path / "f3.txt"
+ dst = tmp_path / "dst"
+ dst.mkdir(parents=True)
+
+ with file1.open("w") as fh:
+ fh.write("its test time\n")
+ with file2.open("w") as fh:
+ fh.write("f2\n")
+ with file3.open("w") as fh:
+ fh.write("f3\n")
+
+ self._copy_files([file1, file2, file3], dst)
+ assert (dst / "f1.txt").exists()
+ assert (dst / "f2.txt").exists()
+ assert (dst / "f3.txt").exists()
+
+ def test_three_files_nodest(self, tmp_path):
+ """Copy files to dest path (not a dir). This is not a useful operation."""
+ file1 = tmp_path / "f1.txt"
+ file2 = tmp_path / "f2.txt"
+ file3 = tmp_path / "f3.txt"
+ dst = tmp_path / "dst"
+
+ with file1.open("w") as fh:
+ fh.write("its test time\n")
+ with file2.open("w") as fh:
+ fh.write("f2\n")
+ with file3.open("w") as fh:
+ fh.write("f3\n")
+
+ self._copy_files([file1, file2, file3], dst)
+ assert not dst.is_dir()
+ assert dst.is_file()
+ assert dst.open("r").read() == "f3\n"
+
+ def test_one_file_set_uid(self, tmp_path):
+ """Explicity pass uid/gid values and assert these are passed to chown."""
+ # Because this test will often be run by non-root users it is necessary
+ # to mock os.chown or we too easily run into perms issues.
+ file1 = tmp_path / "f1.txt"
+ dst = tmp_path / "dst"
+ dst.mkdir(parents=True)
+
+ with file1.open("w") as fh:
+ fh.write("its test time\n")
+
+ assert not (dst / "f1.txt").exists()
+
+ with mock.patch("os.chown") as _chown:
+ _chown.return_value = None
+ self._copy_files([file1], dst, uid=0, gid=0)
+ assert len(_chown.mock_calls) >= 1
+ for c in _chown.mock_calls:
+ assert c == mock.call(mock.ANY, 0, 0)
+ assert (dst / "f1.txt").exists()
+
+
+class TestMoveFiles:
+ def _move_files(self, *args, **kwargs):
+ with with_cephadm_ctx([]) as ctx:
+ with mock.patch("cephadm.extract_uid_gid") as eug:
+ eug.return_value = (os.getuid(), os.getgid())
+ _cephadm.move_files(ctx, *args, **kwargs)
+
+ def test_one_file(self, tmp_path):
+ """Move a named file to test dest path."""
+ file1 = tmp_path / "f1.txt"
+ dst = tmp_path / "dst"
+
+ with file1.open("w") as fh:
+ fh.write("lets moove\n")
+
+ assert not dst.exists()
+ assert file1.is_file()
+
+ self._move_files([file1], dst)
+ assert dst.is_file()
+ assert not file1.exists()
+
+ def test_one_file_destdir(self, tmp_path):
+ """Move a file into an existing dest dir."""
+ file1 = tmp_path / "f1.txt"
+ dst = tmp_path / "dst"
+ dst.mkdir(parents=True)
+
+ with file1.open("w") as fh:
+ fh.write("lets moove\n")
+
+ assert not (dst / "f1.txt").exists()
+ assert file1.is_file()
+
+ self._move_files([file1], dst)
+ assert (dst / "f1.txt").is_file()
+ assert not file1.exists()
+
+ def test_one_file_one_link(self, tmp_path):
+ """Move a file and a symlink to that file to a dest dir."""
+ file1 = tmp_path / "f1.txt"
+ link1 = tmp_path / "lnk"
+ dst = tmp_path / "dst"
+ dst.mkdir(parents=True)
+
+ with file1.open("w") as fh:
+ fh.write("lets moove\n")
+ os.symlink("f1.txt", link1)
+
+ assert not (dst / "f1.txt").exists()
+ assert file1.is_file()
+ assert link1.exists()
+
+ self._move_files([file1, link1], dst)
+ assert (dst / "f1.txt").is_file()
+ assert (dst / "lnk").is_symlink()
+ assert not file1.exists()
+ assert not link1.exists()
+ assert (dst / "f1.txt").open("r").read() == "lets moove\n"
+ assert (dst / "lnk").open("r").read() == "lets moove\n"
+
+ def test_one_file_set_uid(self, tmp_path):
+ """Explicity pass uid/gid values and assert these are passed to chown."""
+ # Because this test will often be run by non-root users it is necessary
+ # to mock os.chown or we too easily run into perms issues.
+ file1 = tmp_path / "f1.txt"
+ dst = tmp_path / "dst"
+
+ with file1.open("w") as fh:
+ fh.write("lets moove\n")
+
+ assert not dst.exists()
+ assert file1.is_file()
+
+ with mock.patch("os.chown") as _chown:
+ _chown.return_value = None
+ self._move_files([file1], dst, uid=0, gid=0)
+ assert len(_chown.mock_calls) >= 1
+ for c in _chown.mock_calls:
+ assert c == mock.call(mock.ANY, 0, 0)
+ assert dst.is_file()
+ assert not file1.exists()
+
+
+def test_recursive_chown(tmp_path):
+ d1 = tmp_path / "dir1"
+ d2 = d1 / "dir2"
+ f1 = d2 / "file1.txt"
+ d2.mkdir(parents=True)
+
+ with f1.open("w") as fh:
+ fh.write("low down\n")
+
+ with mock.patch("os.chown") as _chown:
+ _chown.return_value = None
+ _cephadm.recursive_chown(str(d1), uid=500, gid=500)
+ assert len(_chown.mock_calls) == 3
+ assert _chown.mock_calls[0] == mock.call(str(d1), 500, 500)
+ assert _chown.mock_calls[1] == mock.call(str(d2), 500, 500)
+ assert _chown.mock_calls[2] == mock.call(str(f1), 500, 500)
+
+
+class TestFindExecutable:
+ def test_standard_exe(self):
+ # pretty much every system will have `true` on the path. It's a safe choice
+ # for the first assertion
+ exe = _cephadm.find_executable("true")
+ assert exe.endswith("true")
+
+ def test_custom_path(self, tmp_path):
+ foo_sh = tmp_path / "foo.sh"
+ with open(foo_sh, "w") as fh:
+ fh.write("#!/bin/sh\n")
+ fh.write("echo foo\n")
+ foo_sh.chmod(0o755)
+
+ exe = _cephadm.find_executable(foo_sh)
+ assert str(exe) == str(foo_sh)
+
+ def test_no_path(self, monkeypatch):
+ monkeypatch.delenv("PATH")
+ exe = _cephadm.find_executable("true")
+ assert exe.endswith("true")
+
+ def test_no_path_no_confstr(self, monkeypatch):
+ def _fail(_):
+ raise ValueError("fail")
+
+ monkeypatch.delenv("PATH")
+ monkeypatch.setattr("os.confstr", _fail)
+ exe = _cephadm.find_executable("true")
+ assert exe.endswith("true")
+
+ def test_unset_path(self):
+ exe = _cephadm.find_executable("true", path="")
+ assert exe is None
+
+ def test_no_such_exe(self):
+ exe = _cephadm.find_executable("foo_bar-baz.noway")
+ assert exe is None
+
+
+def test_find_program():
+ exe = _cephadm.find_program("true")
+ assert exe.endswith("true")
+
+ with pytest.raises(ValueError):
+ _cephadm.find_program("foo_bar-baz.noway")
+
+
+def _mk_fake_call(enabled, active):
+ def _fake_call(ctx, cmd, **kwargs):
+ if "is-enabled" in cmd:
+ if isinstance(enabled, Exception):
+ raise enabled
+ return enabled
+ if "is-active" in cmd:
+ if isinstance(active, Exception):
+ raise active
+ return active
+ raise ValueError("should not get here")
+
+ return _fake_call
+
+
+@pytest.mark.parametrize(
+ "enabled_out, active_out, expected",
+ [
+ (
+ # ok, all is well
+ ("", "", 0),
+ ("active", "", 0),
+ (True, "running", True),
+ ),
+ (
+ # disabled, unknown if active
+ ("disabled", "", 1),
+ ("", "", 0),
+ (False, "unknown", True),
+ ),
+ (
+ # is-enabled error (not disabled, unknown if active
+ ("bleh", "", 1),
+ ("", "", 0),
+ (False, "unknown", False),
+ ),
+ (
+ # is-enabled ok, inactive is stopped
+ ("", "", 0),
+ ("inactive", "", 0),
+ (True, "stopped", True),
+ ),
+ (
+ # is-enabled ok, failed is error
+ ("", "", 0),
+ ("failed", "", 0),
+ (True, "error", True),
+ ),
+ (
+ # is-enabled ok, auto-restart is error
+ ("", "", 0),
+ ("auto-restart", "", 0),
+ (True, "error", True),
+ ),
+ (
+ # error exec'ing is-enabled cmd
+ ValueError("bonk"),
+ ("active", "", 0),
+ (False, "running", False),
+ ),
+ (
+ # error exec'ing is-enabled cmd
+ ("", "", 0),
+ ValueError("blat"),
+ (True, "unknown", True),
+ ),
+ ],
+)
+def test_check_unit(enabled_out, active_out, expected):
+ with with_cephadm_ctx([]) as ctx:
+ _cephadm.call.side_effect = _mk_fake_call(
+ enabled=enabled_out,
+ active=active_out,
+ )
+ enabled, state, installed = _cephadm.check_unit(ctx, "foobar")
+ assert (enabled, state, installed) == expected
+
+
+class FakeEnabler:
+ def __init__(self, should_be_called):
+ self._should_be_called = should_be_called
+ self._services = []
+
+ def enable_service(self, service):
+ self._services.append(service)
+
+ def check_expected(self):
+ if not self._should_be_called:
+ assert not self._services
+ return
+ # there are currently seven chron/chrony type services that
+ # cephadm looks for. Make sure it probed for each of them
+ # or more in case someone adds to the list.
+ assert len(self._services) >= 7
+ assert "chrony.service" in self._services
+ assert "ntp.service" in self._services
+
+
+@pytest.mark.parametrize(
+ "call_fn, enabler, expected",
+ [
+ # Test that time sync services are not enabled
+ (
+ _mk_fake_call(
+ enabled=("", "", 1),
+ active=("", "", 1),
+ ),
+ None,
+ False,
+ ),
+ # Test that time sync service is enabled
+ (
+ _mk_fake_call(
+ enabled=("", "", 0),
+ active=("active", "", 0),
+ ),
+ None,
+ True,
+ ),
+ # Test that time sync is not enabled, and try to enable them.
+ # This one needs to be not running, but installed in order to
+ # call the enabler. It should call the enabler with every known
+ # service name.
+ (
+ _mk_fake_call(
+ enabled=("disabled", "", 1),
+ active=("", "", 1),
+ ),
+ FakeEnabler(True),
+ False,
+ ),
+ # Test that time sync is enabled, with an enabler passed which
+ # will check that the enabler was never called.
+ (
+ _mk_fake_call(
+ enabled=("", "", 0),
+ active=("active", "", 0),
+ ),
+ FakeEnabler(False),
+ True,
+ ),
+ ],
+)
+def test_check_time_sync(call_fn, enabler, expected):
+ """The check_time_sync call actually checks if a time synchronization service
+ is enabled. It is also the only consumer of check_units.
+ """
+ with with_cephadm_ctx([]) as ctx:
+ _cephadm.call.side_effect = call_fn
+ result = _cephadm.check_time_sync(ctx, enabler=enabler)
+ assert result == expected
+ if enabler is not None:
+ enabler.check_expected()
+
+
+@pytest.mark.parametrize(
+ "content, expected",
+ [
+ (
+ """#JUNK
+ FOO=1
+ """,
+ (None, None, None),
+ ),
+ (
+ """# A sample from a real centos system
+NAME="CentOS Stream"
+VERSION="8"
+ID="centos"
+ID_LIKE="rhel fedora"
+VERSION_ID="8"
+PLATFORM_ID="platform:el8"
+PRETTY_NAME="CentOS Stream 8"
+ANSI_COLOR="0;31"
+CPE_NAME="cpe:/o:centos:centos:8"
+HOME_URL="https://centos.org/"
+BUG_REPORT_URL="https://bugzilla.redhat.com/"
+REDHAT_SUPPORT_PRODUCT="Red Hat Enterprise Linux 8"
+REDHAT_SUPPORT_PRODUCT_VERSION="CentOS Stream"
+ """,
+ ("centos", "8", None),
+ ),
+ (
+ """# Minimal but complete, made up vals
+ID="hpec"
+VERSION_ID="33"
+VERSION_CODENAME="hpec nimda"
+ """,
+ ("hpec", "33", "hpec nimda"),
+ ),
+ (
+ """# Minimal but complete, no quotes
+ID=hpec
+VERSION_ID=33
+VERSION_CODENAME=hpec nimda
+ """,
+ ("hpec", "33", "hpec nimda"),
+ ),
+ ],
+)
+def test_get_distro(monkeypatch, content, expected):
+ def _fake_open(*args, **kwargs):
+ return io.StringIO(content)
+
+ monkeypatch.setattr("builtins.open", _fake_open)
+ assert _cephadm.get_distro() == expected
+
+
+class FakeContext:
+ """FakeContext is a minimal type for passing as a ctx, when
+ with_cephadm_ctx is not appropriate (it enables too many mocks, etc).
+ """
+
+ timeout = 30
+
+
+def _has_non_zero_exit(clog):
+ assert any("Non-zero exit" in ll for _, _, ll in clog.record_tuples)
+
+
+def _has_values_somewhere(clog, values, non_zero=True):
+ if non_zero:
+ _has_non_zero_exit(clog)
+ for value in values:
+ assert any(value in ll for _, _, ll in clog.record_tuples)
+
+
+@pytest.mark.parametrize(
+ "pyline, expected, call_kwargs, log_check",
+ [
+ pytest.param(
+ "import time; time.sleep(0.1)",
+ ("", "", 0),
+ {},
+ None,
+ id="brief-sleep",
+ ),
+ pytest.param(
+ "import sys; sys.exit(2)",
+ ("", "", 2),
+ {},
+ _has_non_zero_exit,
+ id="exit-non-zero",
+ ),
+ pytest.param(
+ "import sys; sys.exit(0)",
+ ("", "", 0),
+ {"desc": "success"},
+ None,
+ id="success-with-desc",
+ ),
+ pytest.param(
+ "print('foo'); print('bar')",
+ ("foo\nbar\n", "", 0),
+ {"desc": "stdout"},
+ None,
+ id="stdout-print",
+ ),
+ pytest.param(
+ "import sys; sys.stderr.write('la\\nla\\nla\\n')",
+ ("", "la\nla\nla\n", 0),
+ {"desc": "stderr"},
+ None,
+ id="stderr-print",
+ ),
+ pytest.param(
+ "for i in range(501): print(i, flush=True)",
+ lambda r: r[2] == 0 and r[1] == "" and "500" in r[0].splitlines(),
+ {},
+ None,
+ id="stdout-long",
+ ),
+ pytest.param(
+ "for i in range(1000000): print(i, flush=True)",
+ lambda r: r[2] == 0
+ and r[1] == ""
+ and len(r[0].splitlines()) == 1000000,
+ {},
+ None,
+ id="stdout-very-long",
+ ),
+ pytest.param(
+ "import sys; sys.stderr.write('pow\\noof\\nouch\\n'); sys.exit(1)",
+ ("", "pow\noof\nouch\n", 1),
+ {"desc": "stderr"},
+ functools.partial(
+ _has_values_somewhere,
+ values=["pow", "oof", "ouch"],
+ non_zero=True,
+ ),
+ id="stderr-logged-non-zero",
+ ),
+ pytest.param(
+ "import time; time.sleep(4)",
+ ("", "", 124),
+ {"timeout": 1},
+ None,
+ id="long-sleep",
+ ),
+ pytest.param(
+ "import time\nfor i in range(100):\n\tprint(i, flush=True); time.sleep(0.01)",
+ ("", "", 124),
+ {"timeout": 0.5},
+ None,
+ id="slow-print-timeout",
+ ),
+ # Commands that time out collect no logs, return empty std{out,err} strings
+ ],
+)
+def test_call(caplog, monkeypatch, pyline, expected, call_kwargs, log_check):
+ import logging
+
+ caplog.set_level(logging.INFO)
+ monkeypatch.setattr("cephadm.logger", logging.getLogger())
+ ctx = FakeContext()
+ result = _cephadm.call(ctx, [sys.executable, "-c", pyline], **call_kwargs)
+ if callable(expected):
+ assert expected(result)
+ else:
+ assert result == expected
+ if callable(log_check):
+ log_check(caplog)
+
+
+class TestWriteNew:
+ def test_success(self, tmp_path):
+ "Test the simple basic feature of writing a file."
+ dest = tmp_path / "foo.txt"
+ with _cephadm.write_new(dest) as fh:
+ fh.write("something\n")
+ fh.write("something else\n")
+
+ with open(dest, "r") as fh:
+ assert fh.read() == "something\nsomething else\n"
+
+ def test_write_ower_mode(self, tmp_path):
+ "Test that the owner and perms options function."
+ dest = tmp_path / "foo.txt"
+
+ # if this is test run as non-root, we can't really change ownership
+ uid = os.getuid()
+ gid = os.getgid()
+
+ with _cephadm.write_new(dest, owner=(uid, gid), perms=0o600) as fh:
+ fh.write("xomething\n")
+ fh.write("xomething else\n")
+
+ with open(dest, "r") as fh:
+ assert fh.read() == "xomething\nxomething else\n"
+ sr = os.fstat(fh.fileno())
+ assert sr.st_uid == uid
+ assert sr.st_gid == gid
+ assert (sr.st_mode & 0o777) == 0o600
+
+ def test_encoding(self, tmp_path):
+ "Test that the encoding option functions."
+ dest = tmp_path / "foo.txt"
+ msg = "\u2603\u26C5\n"
+ with _cephadm.write_new(dest, encoding='utf-8') as fh:
+ fh.write(msg)
+ with open(dest, "rb") as fh:
+ b1 = fh.read()
+ assert b1.decode('utf-8') == msg
+
+ dest = tmp_path / "foo2.txt"
+ with _cephadm.write_new(dest, encoding='utf-16le') as fh:
+ fh.write(msg)
+ with open(dest, "rb") as fh:
+ b2 = fh.read()
+ assert b2.decode('utf-16le') == msg
+
+ # the binary data should differ due to the different encodings
+ assert b1 != b2
+
+ def test_cleanup(self, tmp_path):
+ "Test that an exception during write leaves no file behind."
+ dest = tmp_path / "foo.txt"
+ with pytest.raises(ValueError):
+ with _cephadm.write_new(dest) as fh:
+ fh.write("hello\n")
+ raise ValueError("foo")
+ fh.write("world\n")
+ assert not dest.exists()
+ assert not dest.with_name(dest.name+".new").exists()
+ assert list(dest.parent.iterdir()) == []
+
+
+class CompareContext1:
+ cfg_data = {
+ "name": "mane",
+ "fsid": "foobar",
+ "image": "fake.io/noway/nohow:gndn",
+ "meta": {
+ "fruit": "banana",
+ "vegetable": "carrot",
+ },
+ "params": {
+ "osd_fsid": "robble",
+ "tcp_ports": [404, 9999],
+ },
+ "config_blobs": {
+ "alpha": {"sloop": "John B"},
+ "beta": {"forest": "birch"},
+ "gamma": {"forest": "pine"},
+ },
+ }
+
+ def check(self, ctx):
+ assert ctx.name == 'mane'
+ assert ctx.fsid == 'foobar'
+ assert ctx.image == 'fake.io/noway/nohow:gndn'
+ assert ctx.meta_properties == {"fruit": "banana", "vegetable": "carrot"}
+ assert ctx.config_blobs == {
+ "alpha": {"sloop": "John B"},
+ "beta": {"forest": "birch"},
+ "gamma": {"forest": "pine"},
+ }
+ assert ctx.osd_fsid == "robble"
+ assert ctx.tcp_ports == [404, 9999]
+
+
+class CompareContext2:
+ cfg_data = {
+ "name": "cc2",
+ "fsid": "foobar",
+ "meta": {
+ "fruit": "banana",
+ "vegetable": "carrot",
+ },
+ "params": {},
+ "config_blobs": {
+ "alpha": {"sloop": "John B"},
+ "beta": {"forest": "birch"},
+ "gamma": {"forest": "pine"},
+ },
+ }
+
+ def check(self, ctx):
+ assert ctx.name == 'cc2'
+ assert ctx.fsid == 'foobar'
+ assert ctx.image == 'quay.io/ceph/ceph:v18'
+ assert ctx.meta_properties == {"fruit": "banana", "vegetable": "carrot"}
+ assert ctx.config_blobs == {
+ "alpha": {"sloop": "John B"},
+ "beta": {"forest": "birch"},
+ "gamma": {"forest": "pine"},
+ }
+ assert ctx.osd_fsid is None
+ assert ctx.tcp_ports is None
+
+
+@pytest.mark.parametrize(
+ "cc",
+ [
+ CompareContext1(),
+ CompareContext2(),
+ ],
+)
+def test_apply_deploy_config_to_ctx(cc, monkeypatch):
+ import logging
+
+ monkeypatch.setattr("cephadm.logger", logging.getLogger())
+ ctx = FakeContext()
+ _cephadm.apply_deploy_config_to_ctx(cc.cfg_data, ctx)
+ cc.check(ctx)
diff --git a/src/cephadm/tox.ini b/src/cephadm/tox.ini
new file mode 100644
index 000000000..2cbfca70f
--- /dev/null
+++ b/src/cephadm/tox.ini
@@ -0,0 +1,77 @@
+[tox]
+envlist =
+ py3
+ mypy
+ fix
+ flake8
+skipsdist = true
+
+[flake8]
+max-line-length = 100
+inline-quotes = '
+ignore =
+ E501, \
+ W503,
+exclude =
+ .tox, \
+ .vagrant, \
+ __pycache__, \
+ *.pyc, \
+ templates, \
+ .eggs
+statistics = True
+
+[autopep8]
+addopts =
+ --max-line-length {[flake8]max-line-length} \
+ --ignore "{[flake8]ignore}" \
+ --exclude "{[flake8]exclude}" \
+ --in-place \
+ --recursive \
+ --ignore-local-config
+
+[testenv]
+skip_install=true
+deps =
+ pyfakefs == 4.5.6 ; python_version < "3.7"
+ pyfakefs >= 5, < 6 ; python_version >= "3.7"
+ mock
+ pytest
+commands=pytest {posargs}
+
+[testenv:mypy]
+basepython = python3
+deps =
+ mypy
+ -c{toxinidir}/../mypy-constrains.txt
+commands = mypy --config-file ../mypy.ini {posargs:cephadm.py}
+
+[testenv:fix]
+basepython = python3
+deps =
+ autopep8
+commands =
+ python --version
+ autopep8 {[autopep8]addopts} {posargs: cephadm.py}
+
+[testenv:flake8]
+basepython = python3
+allowlist_externals = bash
+deps =
+ flake8 == 5.0.4
+ flake8-quotes
+commands =
+ flake8 --config=tox.ini {posargs:cephadm.py}
+ bash -c "test $(grep -c 'docker.io' cephadm.py) == 11"
+# Downstream distributions may choose to alter this "docker.io" number,
+# to make sure no new references to docker.io are creeping in unnoticed.
+
+# coverage env is intentionally left out of the envlist. It is here for developers
+# to run locally to generate and review test coverage of cephadm.
+[testenv:coverage]
+skip_install=true
+deps =
+ {[testenv]deps}
+ pytest-cov
+commands =
+ pytest -v --cov=cephadm --cov-report=term-missing --cov-report=html {posargs}
diff --git a/src/cephadm/vstart-cleanup.sh b/src/cephadm/vstart-cleanup.sh
new file mode 100755
index 000000000..facbdd100
--- /dev/null
+++ b/src/cephadm/vstart-cleanup.sh
@@ -0,0 +1,6 @@
+#!/bin/sh -ex
+
+bin/ceph mon rm `hostname`
+for f in `bin/ceph orch ls | grep -v NAME | awk '{print $1}'` ; do
+ bin/ceph orch rm $f --force
+done
diff --git a/src/cephadm/vstart-smoke.sh b/src/cephadm/vstart-smoke.sh
new file mode 100755
index 000000000..ecdb59d18
--- /dev/null
+++ b/src/cephadm/vstart-smoke.sh
@@ -0,0 +1,86 @@
+#!/bin/bash -ex
+
+# this is a smoke test, meant to be run against vstart.sh.
+
+host="$(hostname)"
+
+bin/init-ceph stop || true
+MON=1 OSD=1 MDS=0 MGR=1 ../src/vstart.sh -d -n -x -l --cephadm
+
+export CEPH_DEV=1
+
+bin/ceph orch ls
+bin/ceph orch apply mds foo 1
+bin/ceph orch ls | grep foo
+while ! bin/ceph orch ps | grep mds.foo ; do sleep 1 ; done
+bin/ceph orch ps
+
+bin/ceph orch host ls
+
+bin/ceph orch rm crash
+! bin/ceph orch ls | grep crash
+bin/ceph orch apply crash '*'
+bin/ceph orch ls | grep crash
+
+while ! bin/ceph orch ps | grep crash ; do sleep 1 ; done
+bin/ceph orch ps | grep crash.$host | grep running
+bin/ceph orch ls | grep crash | grep 1/1
+bin/ceph orch daemon rm crash.$host
+while ! bin/ceph orch ps | grep crash ; do sleep 1 ; done
+
+bin/ceph orch daemon stop crash.$host
+bin/ceph orch daemon start crash.$host
+bin/ceph orch daemon restart crash.$host
+bin/ceph orch daemon reconfig crash.$host
+bin/ceph orch daemon redeploy crash.$host
+
+bin/ceph orch host ls | grep $host
+bin/ceph orch host label add $host fooxyz
+bin/ceph orch host ls | grep $host | grep fooxyz
+bin/ceph orch host label rm $host fooxyz
+! bin/ceph orch host ls | grep $host | grep fooxyz
+bin/ceph orch host set-addr $host $host
+
+bin/ceph cephadm check-host $host
+#! bin/ceph cephadm check-host $host 1.2.3.4
+#bin/ceph orch host set-addr $host 1.2.3.4
+#! bin/ceph cephadm check-host $host
+bin/ceph orch host set-addr $host $host
+bin/ceph cephadm check-host $host
+
+bin/ceph orch apply mgr 1
+bin/ceph orch rm mgr --force # we don't want a mgr to take over for ours
+
+bin/ceph orch daemon add mon $host:127.0.0.1
+
+while ! bin/ceph mon dump | grep 'epoch 2' ; do sleep 1 ; done
+
+bin/ceph orch apply rbd-mirror 1
+
+bin/ceph orch apply node-exporter '*'
+bin/ceph orch apply prometheus 1
+bin/ceph orch apply alertmanager 1
+bin/ceph orch apply grafana 1
+
+while ! bin/ceph dashboard get-grafana-api-url | grep $host ; do sleep 1 ; done
+
+bin/ceph orch apply rgw foo --placement=1
+
+bin/ceph orch ps
+bin/ceph orch ls
+
+# clean up
+bin/ceph orch rm mds.foo
+bin/ceph orch rm rgw.myrealm.myzone
+bin/ceph orch rm rbd-mirror
+bin/ceph orch rm node-exporter
+bin/ceph orch rm alertmanager
+bin/ceph orch rm grafana
+bin/ceph orch rm prometheus
+bin/ceph orch rm crash
+
+bin/ceph mon rm $host
+! bin/ceph orch daemon rm mon.$host
+bin/ceph orch daemon rm mon.$host --force
+
+echo OK