diff options
Diffstat (limited to '')
41 files changed, 18388 insertions, 0 deletions
diff --git a/src/cephadm/.gitignore b/src/cephadm/.gitignore new file mode 100644 index 000000000..8d1529027 --- /dev/null +++ b/src/cephadm/.gitignore @@ -0,0 +1,6 @@ +# tox related +.coverage* +htmlcov +.tox +coverage.xml +.mypy_cache diff --git a/src/cephadm/CMakeLists.txt b/src/cephadm/CMakeLists.txt new file mode 100644 index 000000000..8b969bc33 --- /dev/null +++ b/src/cephadm/CMakeLists.txt @@ -0,0 +1,28 @@ +if(WITH_TESTS) + include(AddCephTest) + add_tox_test(cephadm TOX_ENVS py3 mypy flake8) +endif() + +set(bin_target_file ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/cephadm) + +add_custom_command( + OUTPUT "${bin_target_file}" + DEPENDS + ${CMAKE_CURRENT_SOURCE_DIR}/cephadm.py + ${CMAKE_CURRENT_SOURCE_DIR}/build.py + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMAND ${Python3_EXECUTABLE} build.py + --set-version-var=CEPH_GIT_VER=${CEPH_GIT_VER} + --set-version-var=CEPH_GIT_NICE_VER=${CEPH_GIT_NICE_VER} + --set-version-var=CEPH_RELEASE=${CEPH_RELEASE} + --set-version-var=CEPH_RELEASE_NAME=${CEPH_RELEASE_NAME} + --set-version-var=CEPH_RELEASE_TYPE=${CEPH_RELEASE_TYPE} + ${bin_target_file} +) + +add_custom_target(cephadm ALL + DEPENDS "${bin_target_file}") + +install(PROGRAMS + ${bin_target_file} + DESTINATION ${CMAKE_INSTALL_SBINDIR}) diff --git a/src/cephadm/box/DockerfileDocker b/src/cephadm/box/DockerfileDocker new file mode 100644 index 000000000..f64b48e4c --- /dev/null +++ b/src/cephadm/box/DockerfileDocker @@ -0,0 +1,33 @@ +# https://developers.redhat.com/blog/2014/05/05/running-systemd-within-docker-container/ +FROM centos:8 as centos-systemd +ENV container docker +ENV CEPHADM_PATH=/usr/local/sbin/cephadm + +# Centos met EOL and the content of the CentOS 8 repos has been moved to vault.centos.org +RUN sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-Linux-* +RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=https://vault.centos.org|g' /etc/yum.repos.d/CentOS-Linux-* + +RUN dnf -y install chrony firewalld lvm2 \ + openssh-server openssh-clients python3 \ + yum-utils sudo which && dnf clean all + +RUN systemctl enable chronyd firewalld sshd + + +FROM centos-systemd as centos-systemd-docker +# To cache cephadm images +RUN yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo +RUN dnf -y install docker-ce && \ + dnf clean all && systemctl enable docker + +# ssh utilities +RUN dnf install epel-release -y && dnf makecache && dnf install sshpass -y +RUN touch /.box_container # empty file to check if inside a container + +EXPOSE 8443 +EXPOSE 22 + +FROM centos-systemd-docker +WORKDIR /root + +CMD [ "/usr/sbin/init" ] diff --git a/src/cephadm/box/DockerfilePodman b/src/cephadm/box/DockerfilePodman new file mode 100644 index 000000000..115c3c730 --- /dev/null +++ b/src/cephadm/box/DockerfilePodman @@ -0,0 +1,64 @@ +# stable/Dockerfile +# +# Build a Podman container image from the latest +# stable version of Podman on the Fedoras Updates System. +# https://bodhi.fedoraproject.org/updates/?search=podman +# This image can be used to create a secured container +# that runs safely with privileges within the container. +# +FROM fedora:34 + +ENV CEPHADM_PATH=/usr/local/sbin/cephadm +RUN ln -s /ceph/src/cephadm/cephadm.py $CEPHADM_PATH # NOTE: assume path of ceph volume + +# Don't include container-selinux and remove +# directories used by yum that are just taking +# up space. +RUN dnf -y update; rpm --restore shadow-utils 2>/dev/null; \ +yum -y install podman fuse-overlayfs --exclude container-selinux; \ +rm -rf /var/cache /var/log/dnf* /var/log/yum.* + +RUN dnf install which firewalld chrony procps systemd openssh openssh-server openssh-clients sshpass lvm2 -y + +ADD https://raw.githubusercontent.com/containers/podman/main/contrib/podmanimage/stable/containers.conf /etc/containers/containers.conf +ADD https://raw.githubusercontent.com/containers/podman/main/contrib/podmanimage/stable/podman-containers.conf /root/.config/containers/containers.conf + +RUN mkdir -p /root/.local/share/containers; # chown podman:podman -R /home/podman + +# Note VOLUME options must always happen after the chown call above +# RUN commands can not modify existing volumes +VOLUME /var/lib/containers +VOLUME /root/.local/share/containers + +# chmod containers.conf and adjust storage.conf to enable Fuse storage. +RUN chmod 644 /etc/containers/containers.conf; sed -i -e 's|^#mount_program|mount_program|g' -e '/additionalimage.*/a "/var/lib/shared",' -e 's|^mountopt[[:space:]]*=.*$|mountopt = "nodev,fsync=0"|g' /etc/containers/storage.conf +RUN mkdir -p /var/lib/shared/overlay-images /var/lib/shared/overlay-layers /var/lib/shared/vfs-images /var/lib/shared/vfs-layers; touch /var/lib/shared/overlay-images/images.lock; touch /var/lib/shared/overlay-layers/layers.lock; touch /var/lib/shared/vfs-images/images.lock; touch /var/lib/shared/vfs-layers/layers.lock + +RUN echo 'root:root' | chpasswd + +RUN dnf install -y adjtimex # adjtimex syscall doesn't exist in fedora 35+ therefore we have to install it manually + # so chronyd works +RUN dnf install -y strace sysstat # debugging tools +RUN dnf -y install hostname iproute udev +ENV _CONTAINERS_USERNS_CONFIGURED="" + +RUN useradd podman; \ +echo podman:0:5000 > /etc/subuid; \ +echo podman:0:5000 > /etc/subgid; \ +echo root:0:65535 > /etc/subuid; \ +echo root:0:65535 > /etc/subgid; + +VOLUME /home/podman/.local/share/containers + +ADD https://raw.githubusercontent.com/containers/libpod/master/contrib/podmanimage/stable/containers.conf /etc/containers/containers.conf +ADD https://raw.githubusercontent.com/containers/libpod/master/contrib/podmanimage/stable/podman-containers.conf /home/podman/.config/containers/containers.conf + +RUN chown podman:podman -R /home/podman + +RUN echo 'podman:podman' | chpasswd +RUN touch /.box_container # empty file to check if inside a container + +EXPOSE 8443 +EXPOSE 22 + +ENTRYPOINT ["/usr/sbin/init"] diff --git a/src/cephadm/box/__init__.py b/src/cephadm/box/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/cephadm/box/__init__.py diff --git a/src/cephadm/box/box.py b/src/cephadm/box/box.py new file mode 100755 index 000000000..db2f24233 --- /dev/null +++ b/src/cephadm/box/box.py @@ -0,0 +1,414 @@ +#!/bin/python3 +import argparse +import os +import stat +import json +import sys +import host +import osd +from multiprocessing import Process, Pool +from util import ( + BoxType, + Config, + Target, + ensure_inside_container, + ensure_outside_container, + get_boxes_container_info, + run_cephadm_shell_command, + run_dc_shell_command, + run_dc_shell_commands, + get_container_engine, + run_shell_command, + run_shell_commands, + ContainerEngine, + DockerEngine, + PodmanEngine, + colored, + engine, + engine_compose, + Colors, + get_seed_name +) + +CEPH_IMAGE = 'quay.ceph.io/ceph-ci/ceph:main' +BOX_IMAGE = 'cephadm-box:latest' + +# NOTE: this image tar is a trickeroo so cephadm won't pull the image everytime +# we deploy a cluster. Keep in mind that you'll be responsible for pulling the +# image yourself with `./box.py -v cluster setup` +CEPH_IMAGE_TAR = 'docker/ceph/image/quay.ceph.image.tar' +CEPH_ROOT = '../../../' +DASHBOARD_PATH = '../../../src/pybind/mgr/dashboard/frontend/' + +root_error_msg = """ +WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +sudo with this script can kill your computer, try again without sudo +if you value your time. +""" + +def remove_ceph_image_tar(): + if os.path.exists(CEPH_IMAGE_TAR): + os.remove(CEPH_IMAGE_TAR) + + +def cleanup_box() -> None: + osd.cleanup_osds() + remove_ceph_image_tar() + + +def image_exists(image_name: str): + # extract_tag + assert image_name.find(':') + image_name, tag = image_name.split(':') + engine = get_container_engine() + images = engine.run('image ls').split('\n') + IMAGE_NAME = 0 + TAG = 1 + for image in images: + image = image.split() + print(image) + print(image_name, tag) + if image[IMAGE_NAME] == image_name and image[TAG] == tag: + return True + return False + + +def get_ceph_image(): + print('Getting ceph image') + engine = get_container_engine() + engine.run(f'pull {CEPH_IMAGE}') + # update + engine.run(f'build -t {CEPH_IMAGE} docker/ceph') + if not os.path.exists('docker/ceph/image'): + os.mkdir('docker/ceph/image') + + remove_ceph_image_tar() + + engine.run(f'save {CEPH_IMAGE} -o {CEPH_IMAGE_TAR}') + run_shell_command(f'chmod 777 {CEPH_IMAGE_TAR}') + print('Ceph image added') + + +def get_box_image(): + print('Getting box image') + engine = get_container_engine() + engine.run(f'build -t cephadm-box -f {engine.dockerfile} .') + print('Box image added') + +def check_dashboard(): + if not os.path.exists(os.path.join(CEPH_ROOT, 'dist')): + print(colored('Missing build in dashboard', Colors.WARNING)) + +def check_cgroups(): + if not os.path.exists('/sys/fs/cgroup/cgroup.controllers'): + print(colored('cgroups v1 is not supported', Colors.FAIL)) + print('Enable cgroups v2 please') + sys.exit(666) + +def check_selinux(): + selinux = run_shell_command('getenforce') + if 'Disabled' not in selinux: + print(colored('selinux should be disabled, please disable it if you ' + 'don\'t want unexpected behaviour.', Colors.WARNING)) +def dashboard_setup(): + command = f'cd {DASHBOARD_PATH} && npm install' + run_shell_command(command) + command = f'cd {DASHBOARD_PATH} && npm run build' + run_shell_command(command) + +class Cluster(Target): + _help = 'Manage docker cephadm boxes' + actions = ['bootstrap', 'start', 'down', 'list', 'bash', 'setup', 'cleanup'] + + def set_args(self): + self.parser.add_argument( + 'action', choices=Cluster.actions, help='Action to perform on the box' + ) + self.parser.add_argument('--osds', type=int, default=3, help='Number of osds') + + self.parser.add_argument('--hosts', type=int, default=1, help='Number of hosts') + self.parser.add_argument('--skip-deploy-osds', action='store_true', help='skip deploy osd') + self.parser.add_argument('--skip-create-loop', action='store_true', help='skip create loopback device') + self.parser.add_argument('--skip-monitoring-stack', action='store_true', help='skip monitoring stack') + self.parser.add_argument('--skip-dashboard', action='store_true', help='skip dashboard') + self.parser.add_argument('--expanded', action='store_true', help='deploy 3 hosts and 3 osds') + self.parser.add_argument('--jobs', type=int, help='Number of jobs scheduled in parallel') + + @ensure_outside_container + def setup(self): + check_cgroups() + check_selinux() + + targets = [ + get_ceph_image, + get_box_image, + dashboard_setup + ] + results = [] + jobs = Config.get('jobs') + if jobs: + jobs = int(jobs) + else: + jobs = None + pool = Pool(jobs) + for target in targets: + results.append(pool.apply_async(target)) + + for result in results: + result.wait() + + + @ensure_outside_container + def cleanup(self): + cleanup_box() + + @ensure_inside_container + def bootstrap(self): + print('Running bootstrap on seed') + cephadm_path = str(os.environ.get('CEPHADM_PATH')) + + engine = get_container_engine() + if isinstance(engine, DockerEngine): + engine.restart() + st = os.stat(cephadm_path) + os.chmod(cephadm_path, st.st_mode | stat.S_IEXEC) + + engine.run('load < /cephadm/box/docker/ceph/image/quay.ceph.image.tar') + # cephadm guid error because it sometimes tries to use quay.ceph.io/ceph-ci/ceph:<none> + # instead of main branch's tag + run_shell_command('export CEPH_SOURCE_FOLDER=/ceph') + run_shell_command('export CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:main') + run_shell_command( + 'echo "export CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:main" >> ~/.bashrc' + ) + + extra_args = [] + + extra_args.append('--skip-pull') + + # cephadm prints in warning, let's redirect it to the output so shell_command doesn't + # complain + extra_args.append('2>&0') + + extra_args = ' '.join(extra_args) + skip_monitoring_stack = ( + '--skip-monitoring-stack' if Config.get('skip-monitoring-stack') else '' + ) + skip_dashboard = '--skip-dashboard' if Config.get('skip-dashboard') else '' + + fsid = Config.get('fsid') + config_folder = str(Config.get('config_folder')) + config = str(Config.get('config')) + keyring = str(Config.get('keyring')) + if not os.path.exists(config_folder): + os.mkdir(config_folder) + + cephadm_bootstrap_command = ( + '$CEPHADM_PATH --verbose bootstrap ' + '--mon-ip "$(hostname -i)" ' + '--allow-fqdn-hostname ' + '--initial-dashboard-password admin ' + '--dashboard-password-noupdate ' + '--shared_ceph_folder /ceph ' + '--allow-overwrite ' + f'--output-config {config} ' + f'--output-keyring {keyring} ' + f'--output-config {config} ' + f'--fsid "{fsid}" ' + '--log-to-file ' + f'{skip_dashboard} ' + f'{skip_monitoring_stack} ' + f'{extra_args} ' + ) + + print('Running cephadm bootstrap...') + run_shell_command(cephadm_bootstrap_command, expect_exit_code=120) + print('Cephadm bootstrap complete') + + run_shell_command('sudo vgchange --refresh') + run_shell_command('cephadm ls') + run_shell_command('ln -s /ceph/src/cephadm/box/box.py /usr/bin/box') + + run_cephadm_shell_command('ceph -s') + + print('Bootstrap completed!') + + @ensure_outside_container + def start(self): + check_cgroups() + check_selinux() + osds = int(Config.get('osds')) + hosts = int(Config.get('hosts')) + engine = get_container_engine() + + # ensure boxes don't exist + self.down() + + # podman is ran without sudo + if isinstance(engine, PodmanEngine): + I_am = run_shell_command('whoami') + if 'root' in I_am: + print(root_error_msg) + sys.exit(1) + + print('Checking docker images') + if not image_exists(CEPH_IMAGE): + get_ceph_image() + if not image_exists(BOX_IMAGE): + get_box_image() + + used_loop = "" + if not Config.get('skip_create_loop'): + print('Creating OSD devices...') + used_loop = osd.create_loopback_devices(osds) + print(f'Added {osds} logical volumes in a loopback device') + + print('Starting containers') + + engine.up(hosts) + + containers = engine.get_containers() + seed = engine.get_seed() + # Umounting somehow brings back the contents of the host /sys/dev/block. + # On startup /sys/dev/block is empty. After umount, we can see symlinks again + # so that lsblk is able to run as expected + run_dc_shell_command('umount /sys/dev/block', seed) + + run_shell_command('sudo sysctl net.ipv4.conf.all.forwarding=1') + run_shell_command('sudo iptables -P FORWARD ACCEPT') + + # don't update clock with chronyd / setup chronyd on all boxes + chronyd_setup = """ + sed 's/$OPTIONS/-x/g' /usr/lib/systemd/system/chronyd.service -i + systemctl daemon-reload + systemctl start chronyd + systemctl status --no-pager chronyd + """ + for container in containers: + print(colored('Got container:', Colors.OKCYAN), str(container)) + for container in containers: + run_dc_shell_commands(chronyd_setup, container) + + print('Seting up host ssh servers') + for container in containers: + print(colored('Setting up ssh server for:', Colors.OKCYAN), str(container)) + host._setup_ssh(container) + + verbose = '-v' if Config.get('verbose') else '' + skip_deploy = '--skip-deploy-osds' if Config.get('skip-deploy-osds') else '' + skip_monitoring_stack = ( + '--skip-monitoring-stack' if Config.get('skip-monitoring-stack') else '' + ) + skip_dashboard = '--skip-dashboard' if Config.get('skip-dashboard') else '' + box_bootstrap_command = ( + f'/cephadm/box/box.py {verbose} --engine {engine.command} cluster bootstrap ' + f'--osds {osds} ' + f'--hosts {hosts} ' + f'{skip_deploy} ' + f'{skip_dashboard} ' + f'{skip_monitoring_stack} ' + ) + print(box_bootstrap_command) + run_dc_shell_command(box_bootstrap_command, seed) + + expanded = Config.get('expanded') + if expanded: + info = get_boxes_container_info() + ips = info['ips'] + hostnames = info['hostnames'] + print(ips) + if hosts > 0: + host._copy_cluster_ssh_key(ips) + host._add_hosts(ips, hostnames) + if not Config.get('skip-deploy-osds'): + print('Deploying osds... This could take up to minutes') + osd.deploy_osds(osds) + print('Osds deployed') + + + dashboard_ip = 'localhost' + info = get_boxes_container_info(with_seed=True) + if isinstance(engine, DockerEngine): + for i in range(info['size']): + if get_seed_name() in info['container_names'][i]: + dashboard_ip = info["ips"][i] + print(colored(f'dashboard available at https://{dashboard_ip}:8443', Colors.OKGREEN)) + + print('Bootstrap finished successfully') + + @ensure_outside_container + def down(self): + engine = get_container_engine() + if isinstance(engine, PodmanEngine): + containers = json.loads(engine.run('container ls --format json')) + for container in containers: + for name in container['Names']: + if name.startswith('box_hosts_'): + engine.run(f'container kill {name}') + engine.run(f'container rm {name}') + pods = json.loads(engine.run('pod ls --format json')) + for pod in pods: + if 'Name' in pod and pod['Name'].startswith('box_pod_host'): + name = pod['Name'] + engine.run(f'pod kill {name}') + engine.run(f'pod rm {name}') + else: + run_shell_command(f'{engine_compose()} -f {Config.get("docker_yaml")} down') + print('Successfully killed all boxes') + + @ensure_outside_container + def list(self): + info = get_boxes_container_info(with_seed=True) + for i in range(info['size']): + ip = info['ips'][i] + name = info['container_names'][i] + hostname = info['hostnames'][i] + print(f'{name} \t{ip} \t{hostname}') + + @ensure_outside_container + def bash(self): + # we need verbose to see the prompt after running shell command + Config.set('verbose', True) + print('Seed bash') + engine = get_container_engine() + engine.run(f'exec -it {engine.seed_name} bash') + + +targets = { + 'cluster': Cluster, + 'osd': osd.Osd, + 'host': host.Host, +} + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + '-v', action='store_true', dest='verbose', help='be more verbose' + ) + parser.add_argument( + '--engine', type=str, default='podman', + dest='engine', help='choose engine between "docker" and "podman"' + ) + + subparsers = parser.add_subparsers() + target_instances = {} + for name, target in targets.items(): + target_instances[name] = target(None, subparsers) + + for count, arg in enumerate(sys.argv, 1): + if arg in targets: + instance = target_instances[arg] + if hasattr(instance, 'main'): + instance.argv = sys.argv[count:] + instance.set_args() + args = parser.parse_args() + Config.add_args(vars(args)) + instance.main() + sys.exit(0) + + parser.print_help() + + +if __name__ == '__main__': + main() diff --git a/src/cephadm/box/daemon.json b/src/cephadm/box/daemon.json new file mode 100644 index 000000000..5cfcaa87f --- /dev/null +++ b/src/cephadm/box/daemon.json @@ -0,0 +1,3 @@ +{ + "storage-driver": "fuse-overlayfs" +} diff --git a/src/cephadm/box/docker-compose-docker.yml b/src/cephadm/box/docker-compose-docker.yml new file mode 100644 index 000000000..fdecf6677 --- /dev/null +++ b/src/cephadm/box/docker-compose-docker.yml @@ -0,0 +1,39 @@ +version: "2.4" +services: + cephadm-host-base: + build: + context: . + environment: + - CEPH_BRANCH=master + image: cephadm-box + privileged: true + stop_signal: RTMIN+3 + volumes: + - ../../../:/ceph + - ..:/cephadm + - ./daemon.json:/etc/docker/daemon.json + # dangerous, maybe just map the loopback + # https://stackoverflow.com/questions/36880565/why-dont-my-udev-rules-work-inside-of-a-running-docker-container + - /dev:/dev + networks: + - public + mem_limit: "20g" + scale: -1 + seed: + extends: + service: cephadm-host-base + ports: + - "3000:3000" + - "8443:8443" + - "9095:9095" + scale: 1 + hosts: + extends: + service: cephadm-host-base + scale: 3 + + +volumes: + var-lib-docker: +networks: + public: diff --git a/src/cephadm/box/docker-compose.cgroup1.yml b/src/cephadm/box/docker-compose.cgroup1.yml new file mode 100644 index 000000000..ea23dec1e --- /dev/null +++ b/src/cephadm/box/docker-compose.cgroup1.yml @@ -0,0 +1,10 @@ +version: "2.4" + +# If cgroups v2 is disabled then add cgroup fs +services: + seed: + volumes: + - "/sys/fs/cgroup:/sys/fs/cgroup:ro" + hosts: + volumes: + - "/sys/fs/cgroup:/sys/fs/cgroup:ro" diff --git a/src/cephadm/box/docker/ceph/.bashrc b/src/cephadm/box/docker/ceph/.bashrc new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/cephadm/box/docker/ceph/.bashrc diff --git a/src/cephadm/box/docker/ceph/Dockerfile b/src/cephadm/box/docker/ceph/Dockerfile new file mode 100644 index 000000000..b950750e9 --- /dev/null +++ b/src/cephadm/box/docker/ceph/Dockerfile @@ -0,0 +1,3 @@ +FROM quay.ceph.io/ceph-ci/ceph:main +RUN pip3 install packaging +EXPOSE 8443 diff --git a/src/cephadm/box/docker/ceph/locale.conf b/src/cephadm/box/docker/ceph/locale.conf new file mode 100644 index 000000000..00d76c8cd --- /dev/null +++ b/src/cephadm/box/docker/ceph/locale.conf @@ -0,0 +1,2 @@ +LANG="en_US.UTF-8" +LC_ALL="en_US.UTF-8" diff --git a/src/cephadm/box/host.py b/src/cephadm/box/host.py new file mode 100644 index 000000000..aae16d07f --- /dev/null +++ b/src/cephadm/box/host.py @@ -0,0 +1,120 @@ +import os +from typing import List, Union + +from util import ( + Config, + HostContainer, + Target, + get_boxes_container_info, + get_container_engine, + inside_container, + run_cephadm_shell_command, + run_dc_shell_command, + run_shell_command, + engine, + BoxType +) + + +def _setup_ssh(container: HostContainer): + if inside_container(): + if not os.path.exists('/root/.ssh/known_hosts'): + run_shell_command('echo "y" | ssh-keygen -b 2048 -t rsa -f /root/.ssh/id_rsa -q -N ""', + expect_error=True) + + run_shell_command('echo "root:root" | chpasswd') + with open('/etc/ssh/sshd_config', 'a+') as f: + f.write('PermitRootLogin yes\n') + f.write('PasswordAuthentication yes\n') + f.flush() + run_shell_command('systemctl restart sshd') + else: + print('Redirecting to _setup_ssh to container') + verbose = '-v' if Config.get('verbose') else '' + run_dc_shell_command( + f'/cephadm/box/box.py {verbose} --engine {engine()} host setup_ssh {container.name}', + container + ) + + +def _add_hosts(ips: Union[List[str], str], hostnames: Union[List[str], str]): + if inside_container(): + assert len(ips) == len(hostnames) + for i in range(len(ips)): + run_cephadm_shell_command(f'ceph orch host add {hostnames[i]} {ips[i]}') + else: + print('Redirecting to _add_hosts to container') + verbose = '-v' if Config.get('verbose') else '' + print(ips) + ips = ' '.join(ips) + ips = f'{ips}' + hostnames = ' '.join(hostnames) + hostnames = f'{hostnames}' + seed = get_container_engine().get_seed() + run_dc_shell_command( + f'/cephadm/box/box.py {verbose} --engine {engine()} host add_hosts {seed.name} --ips {ips} --hostnames {hostnames}', + seed + ) + + +def _copy_cluster_ssh_key(ips: Union[List[str], str]): + if inside_container(): + local_ip = run_shell_command('hostname -i') + for ip in ips: + if ip != local_ip: + run_shell_command( + ( + 'sshpass -p "root" ssh-copy-id -f ' + f'-o StrictHostKeyChecking=no -i /etc/ceph/ceph.pub "root@{ip}"' + ) + ) + + else: + print('Redirecting to _copy_cluster_ssh to container') + verbose = '-v' if Config.get('verbose') else '' + print(ips) + ips = ' '.join(ips) + ips = f'{ips}' + # assume we only have one seed + seed = get_container_engine().get_seed() + run_dc_shell_command( + f'/cephadm/box/box.py {verbose} --engine {engine()} host copy_cluster_ssh_key {seed.name} --ips {ips}', + seed + ) + + +class Host(Target): + _help = 'Run seed/host related commands' + actions = ['setup_ssh', 'copy_cluster_ssh_key', 'add_hosts'] + + def set_args(self): + self.parser.add_argument('action', choices=Host.actions) + self.parser.add_argument( + 'container_name', + type=str, + help='box_{type}_{index}. In docker, type can be seed or hosts. In podman only hosts.' + ) + self.parser.add_argument('--ips', nargs='*', help='List of host ips') + self.parser.add_argument( + '--hostnames', nargs='*', help='List of hostnames ips(relative to ip list)' + ) + + def setup_ssh(self): + container_name = Config.get('container_name') + engine = get_container_engine() + _setup_ssh(engine.get_container(container_name)) + + def add_hosts(self): + ips = Config.get('ips') + if not ips: + ips = get_boxes_container_info()['ips'] + hostnames = Config.get('hostnames') + if not hostnames: + hostnames = get_boxes_container_info()['hostnames'] + _add_hosts(ips, hostnames) + + def copy_cluster_ssh_key(self): + ips = Config.get('ips') + if not ips: + ips = get_boxes_container_info()['ips'] + _copy_cluster_ssh_key(ips) diff --git a/src/cephadm/box/osd.py b/src/cephadm/box/osd.py new file mode 100644 index 000000000..827a4de36 --- /dev/null +++ b/src/cephadm/box/osd.py @@ -0,0 +1,157 @@ +import json +import os +import time +import re +from typing import Dict + +from util import ( + BoxType, + Config, + Target, + ensure_inside_container, + ensure_outside_container, + get_orch_hosts, + run_cephadm_shell_command, + run_dc_shell_command, + get_container_engine, + run_shell_command, +) + +DEVICES_FILE="./devices.json" + +def remove_loop_img() -> None: + loop_image = Config.get('loop_img') + if os.path.exists(loop_image): + os.remove(loop_image) + +def create_loopback_devices(osds: int) -> Dict[int, Dict[str, str]]: + assert osds + cleanup_osds() + osd_devs = dict() + + for i in range(osds): + img_name = f'osd{i}' + loop_dev = create_loopback_device(img_name) + osd_devs[i] = dict(img_name=img_name, device=loop_dev) + with open(DEVICES_FILE, 'w') as dev_file: + dev_file.write(json.dumps(osd_devs)) + return osd_devs + +def create_loopback_device(img_name, size_gb=5): + loop_img_dir = Config.get('loop_img_dir') + run_shell_command(f'mkdir -p {loop_img_dir}') + loop_img = os.path.join(loop_img_dir, img_name) + run_shell_command(f'rm -f {loop_img}') + run_shell_command(f'dd if=/dev/zero of={loop_img} bs=1 count=0 seek={size_gb}G') + loop_dev = run_shell_command(f'sudo losetup -f') + if not os.path.exists(loop_dev): + dev_minor = re.match(r'\/dev\/[^\d]+(\d+)', loop_dev).groups()[0] + run_shell_command(f'sudo mknod -m777 {loop_dev} b 7 {dev_minor}') + run_shell_command(f'sudo chown {os.getuid()}:{os.getgid()} {loop_dev}') + if os.path.ismount(loop_dev): + os.umount(loop_dev) + run_shell_command(f'sudo losetup {loop_dev} {loop_img}') + run_shell_command(f'sudo chown {os.getuid()}:{os.getgid()} {loop_dev}') + return loop_dev + + +def get_lvm_osd_data(data: str) -> Dict[str, str]: + osd_lvm_info = run_cephadm_shell_command(f'ceph-volume lvm list {data}') + osd_data = {} + for line in osd_lvm_info.split('\n'): + line = line.strip() + if not line: + continue + line = line.split() + if line[0].startswith('===') or line[0].startswith('[block]'): + continue + # "block device" key -> "block_device" + key = '_'.join(line[:-1]) + osd_data[key] = line[-1] + return osd_data + +def load_osd_devices(): + if not os.path.exists(DEVICES_FILE): + return dict() + with open(DEVICES_FILE) as dev_file: + devs = json.loads(dev_file.read()) + return devs + + +@ensure_inside_container +def deploy_osd(data: str, hostname: str) -> bool: + out = run_cephadm_shell_command(f'ceph orch daemon add osd {hostname}:{data} raw') + return 'Created osd(s)' in out + + +def cleanup_osds() -> None: + loop_img_dir = Config.get('loop_img_dir') + osd_devs = load_osd_devices() + for osd in osd_devs.values(): + device = osd['device'] + if 'loop' in device: + loop_img = os.path.join(loop_img_dir, osd['img_name']) + run_shell_command(f'sudo losetup -d {device}', expect_error=True) + if os.path.exists(loop_img): + os.remove(loop_img) + run_shell_command(f'rm -rf {loop_img_dir}') + + +def deploy_osds(count: int): + osd_devs = load_osd_devices() + hosts = get_orch_hosts() + host_index = 0 + seed = get_container_engine().get_seed() + v = '-v' if Config.get('verbose') else '' + for osd in osd_devs.values(): + deployed = False + while not deployed: + print(hosts) + hostname = hosts[host_index]['hostname'] + deployed = run_dc_shell_command( + f'/cephadm/box/box.py {v} osd deploy --data {osd["device"]} --hostname {hostname}', + seed + ) + deployed = 'created osd' in deployed.lower() or 'already created?' in deployed.lower() + print('Waiting 5 seconds to re-run deploy osd...') + time.sleep(5) + host_index = (host_index + 1) % len(hosts) + + +class Osd(Target): + _help = """ + Deploy osds and create needed block devices with loopback devices: + Actions: + - deploy: Deploy an osd given a block device + - create_loop: Create needed loopback devices and block devices in logical volumes + for a number of osds. + - destroy: Remove all osds and the underlying loopback devices. + """ + actions = ['deploy', 'create_loop', 'destroy'] + + def set_args(self): + self.parser.add_argument('action', choices=Osd.actions) + self.parser.add_argument('--data', type=str, help='path to a block device') + self.parser.add_argument('--hostname', type=str, help='host to deploy osd') + self.parser.add_argument('--osds', type=int, default=0, help='number of osds') + + def deploy(self): + data = Config.get('data') + hostname = Config.get('hostname') + if not hostname: + # assume this host + hostname = run_shell_command('hostname') + if not data: + deploy_osds(Config.get('osds')) + else: + deploy_osd(data, hostname) + + @ensure_outside_container + def create_loop(self): + osds = Config.get('osds') + create_loopback_devices(int(osds)) + print('Successfully created loopback devices') + + @ensure_outside_container + def destroy(self): + cleanup_osds() diff --git a/src/cephadm/box/util.py b/src/cephadm/box/util.py new file mode 100644 index 000000000..7dcf883f8 --- /dev/null +++ b/src/cephadm/box/util.py @@ -0,0 +1,421 @@ +import json +import os +import subprocess +import sys +import copy +from abc import ABCMeta, abstractmethod +from enum import Enum +from typing import Any, Callable, Dict, List + +class Colors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKCYAN = '\033[96m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + +class Config: + args = { + 'fsid': '00000000-0000-0000-0000-0000deadbeef', + 'config_folder': '/etc/ceph/', + 'config': '/etc/ceph/ceph.conf', + 'keyring': '/etc/ceph/ceph.keyring', + 'loop_img': 'loop-images/loop.img', + 'engine': 'podman', + 'docker_yaml': 'docker-compose-docker.yml', + 'docker_v1_yaml': 'docker-compose.cgroup1.yml', + 'podman_yaml': 'docker-compose-podman.yml', + 'loop_img_dir': 'loop-images', + } + + @staticmethod + def set(key, value): + Config.args[key] = value + + @staticmethod + def get(key): + if key in Config.args: + return Config.args[key] + return None + + @staticmethod + def add_args(args: Dict[str, str]) -> None: + Config.args.update(args) + +class Target: + def __init__(self, argv, subparsers): + self.argv = argv + self.parser = subparsers.add_parser( + self.__class__.__name__.lower(), help=self.__class__._help + ) + + def set_args(self): + """ + adding the required arguments of the target should go here, example: + self.parser.add_argument(..) + """ + raise NotImplementedError() + + def main(self): + """ + A target will be setup by first calling this main function + where the parser is initialized. + """ + args = self.parser.parse_args(self.argv) + Config.add_args(vars(args)) + function = getattr(self, args.action) + function() + + +def ensure_outside_container(func) -> Callable: + def wrapper(*args, **kwargs): + if not inside_container(): + return func(*args, **kwargs) + else: + raise RuntimeError('This command should be ran outside a container') + + return wrapper + + +def ensure_inside_container(func) -> bool: + def wrapper(*args, **kwargs): + if inside_container(): + return func(*args, **kwargs) + else: + raise RuntimeError('This command should be ran inside a container') + + return wrapper + + +def colored(msg, color: Colors): + return color + msg + Colors.ENDC + +class BoxType(str, Enum): + SEED = 'seed' + HOST = 'host' + +class HostContainer: + def __init__(self, _name, _type) -> None: + self._name: str = _name + self._type: BoxType = _type + + @property + def name(self) -> str: + return self._name + + @property + def type(self) -> BoxType: + return self._type + def __str__(self) -> str: + return f'{self.name} {self.type}' + +def run_shell_command(command: str, expect_error=False, verbose=True, expect_exit_code=0) -> str: + if Config.get('verbose'): + print(f'{colored("Running command", Colors.HEADER)}: {colored(command, Colors.OKBLUE)}') + + process = subprocess.Popen( + command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + + out = '' + err = '' + # let's read when output comes so it is in real time + while True: + # TODO: improve performance of this part, I think this part is a problem + pout = process.stdout.read(1).decode('latin1') + if pout == '' and process.poll() is not None: + break + if pout: + if Config.get('verbose') and verbose: + sys.stdout.write(pout) + sys.stdout.flush() + out += pout + + process.wait() + + err += process.stderr.read().decode('latin1').strip() + out = out.strip() + + if process.returncode != 0 and not expect_error and process.returncode != expect_exit_code: + err = colored(err, Colors.FAIL); + + raise RuntimeError(f'Failed command: {command}\n{err}\nexit code: {process.returncode}') + sys.exit(1) + return out + + +def run_dc_shell_commands(commands: str, container: HostContainer, expect_error=False) -> str: + for command in commands.split('\n'): + command = command.strip() + if not command: + continue + run_dc_shell_command(command.strip(), container, expect_error=expect_error) + +def run_shell_commands(commands: str, expect_error=False) -> str: + for command in commands.split('\n'): + command = command.strip() + if not command: + continue + run_shell_command(command, expect_error=expect_error) + +@ensure_inside_container +def run_cephadm_shell_command(command: str, expect_error=False) -> str: + config = Config.get('config') + keyring = Config.get('keyring') + fsid = Config.get('fsid') + + with_cephadm_image = 'CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:main' + out = run_shell_command( + f'{with_cephadm_image} cephadm --verbose shell --fsid {fsid} --config {config} --keyring {keyring} -- {command}', + expect_error, + ) + return out + + +def run_dc_shell_command( + command: str, container: HostContainer, expect_error=False +) -> str: + out = get_container_engine().run_exec(container, command, expect_error=expect_error) + return out + +def inside_container() -> bool: + return os.path.exists('/.box_container') + +def get_container_id(container_name: str): + return run_shell_command(f"{engine()} ps | \grep " + container_name + " | awk '{ print $1 }'") + +def engine(): + return Config.get('engine') + +def engine_compose(): + return f'{engine()}-compose' + +def get_seed_name(): + if engine() == 'docker': + return 'seed' + elif engine() == 'podman': + return 'box_hosts_0' + else: + print(f'unkown engine {engine()}') + sys.exit(1) + + +@ensure_outside_container +def get_boxes_container_info(with_seed: bool = False) -> Dict[str, Any]: + # NOTE: this could be cached + ips_query = engine() + " inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}} %tab% {{.Name}} %tab% {{.Config.Hostname}}' $("+ engine() + " ps -aq) --format json" + containers = json.loads(run_shell_command(ips_query, verbose=False)) + # FIXME: if things get more complex a class representing a container info might be useful, + # for now representing data this way is faster. + info = {'size': 0, 'ips': [], 'container_names': [], 'hostnames': []} + for container in containers: + # Most commands use hosts only + name = container['Name'] + if name.startswith('box_hosts'): + if not with_seed and name == get_seed_name(): + continue + info['size'] += 1 + print(container['NetworkSettings']) + if 'Networks' in container['NetworkSettings']: + info['ips'].append(container['NetworkSettings']['Networks']['box_network']['IPAddress']) + else: + info['ips'].append('n/a') + info['container_names'].append(name) + info['hostnames'].append(container['Config']['Hostname']) + return info + + +def get_orch_hosts(): + if inside_container(): + orch_host_ls_out = run_cephadm_shell_command('ceph orch host ls --format json') + else: + orch_host_ls_out = run_dc_shell_command(f'cephadm shell --keyring /etc/ceph/ceph.keyring --config /etc/ceph/ceph.conf -- ceph orch host ls --format json', + get_container_engine().get_seed()) + sp = orch_host_ls_out.split('\n') + orch_host_ls_out = sp[len(sp) - 1] + hosts = json.loads(orch_host_ls_out) + return hosts + + +class ContainerEngine(metaclass=ABCMeta): + @property + @abstractmethod + def command(self) -> str: pass + + @property + @abstractmethod + def seed_name(self) -> str: pass + + @property + @abstractmethod + def dockerfile(self) -> str: pass + + @property + def host_name_prefix(self) -> str: + return 'box_hosts_' + + @abstractmethod + def up(self, hosts: int): pass + + def run_exec(self, container: HostContainer, command: str, expect_error: bool = False): + return run_shell_command(' '.join([self.command, 'exec', container.name, command]), + expect_error=expect_error) + + def run(self, engine_command: str, expect_error: bool = False): + return run_shell_command(' '.join([self.command, engine_command]), expect_error=expect_error) + + def get_containers(self) -> List[HostContainer]: + ps_out = json.loads(run_shell_command('podman ps --format json')) + containers = [] + for container in ps_out: + if not container['Names']: + raise RuntimeError(f'Container {container} missing name') + name = container['Names'][0] + if name == self.seed_name: + containers.append(HostContainer(name, BoxType.SEED)) + elif name.startswith(self.host_name_prefix): + containers.append(HostContainer(name, BoxType.HOST)) + return containers + + def get_seed(self) -> HostContainer: + for container in self.get_containers(): + if container.type == BoxType.SEED: + return container + raise RuntimeError('Missing seed container') + + def get_container(self, container_name: str): + containers = self.get_containers() + for container in containers: + if container.name == container_name: + return container + return None + + + def restart(self): + pass + + +class DockerEngine(ContainerEngine): + command = 'docker' + seed_name = 'seed' + dockerfile = 'DockerfileDocker' + + def restart(self): + run_shell_command('systemctl restart docker') + + def up(self, hosts: int): + dcflags = f'-f {Config.get("docker_yaml")}' + if not os.path.exists('/sys/fs/cgroup/cgroup.controllers'): + dcflags += f' -f {Config.get("docker_v1_yaml")}' + run_shell_command(f'{engine_compose()} {dcflags} up --scale hosts={hosts} -d') + +class PodmanEngine(ContainerEngine): + command = 'podman' + seed_name = 'box_hosts_0' + dockerfile = 'DockerfilePodman' + + CAPS = [ + "SYS_ADMIN", + "NET_ADMIN", + "SYS_TIME", + "SYS_RAWIO", + "MKNOD", + "NET_RAW", + "SETUID", + "SETGID", + "CHOWN", + "SYS_PTRACE", + "SYS_TTY_CONFIG", + "CAP_AUDIT_WRITE", + "CAP_AUDIT_CONTROL", + ] + + VOLUMES = [ + '../../../:/ceph:z', + '../:/cephadm:z', + '/run/udev:/run/udev', + '/sys/dev/block:/sys/dev/block', + '/sys/fs/cgroup:/sys/fs/cgroup:ro', + '/dev/fuse:/dev/fuse', + '/dev/disk:/dev/disk', + '/sys/devices/virtual/block:/sys/devices/virtual/block', + '/sys/block:/dev/block', + '/dev/mapper:/dev/mapper', + '/dev/mapper/control:/dev/mapper/control', + ] + + TMPFS = ['/run', '/tmp'] + + # FIXME: right now we are assuming every service will be exposed through the seed, but this is far + # from the truth. Services can be deployed on different hosts so we need a system to manage this. + SEED_PORTS = [ + 8443, # dashboard + 3000, # grafana + 9093, # alertmanager + 9095 # prometheus + ] + + + def setup_podman_env(self, hosts: int = 1, osd_devs={}): + network_name = 'box_network' + networks = run_shell_command('podman network ls') + if network_name not in networks: + run_shell_command(f'podman network create -d bridge {network_name}') + + args = [ + '--group-add', 'keep-groups', + '--device', '/dev/fuse' , + '-it' , + '-d', + '-e', 'CEPH_BRANCH=main', + '--stop-signal', 'RTMIN+3' + ] + + for cap in self.CAPS: + args.append('--cap-add') + args.append(cap) + + for volume in self.VOLUMES: + args.append('-v') + args.append(volume) + + for tmp in self.TMPFS: + args.append('--tmpfs') + args.append(tmp) + + + for osd_dev in osd_devs.values(): + device = osd_dev["device"] + args.append('--device') + args.append(f'{device}:{device}') + + + for host in range(hosts+1): # 0 will be the seed + options = copy.copy(args) + options.append('--name') + options.append(f'box_hosts_{host}') + options.append('--network') + options.append(f'{network_name}') + if host == 0: + for port in self.SEED_PORTS: + options.append('-p') + options.append(f'{port}:{port}') + + options.append('cephadm-box') + options = ' '.join(options) + + run_shell_command(f'podman run {options}') + + def up(self, hosts: int): + import osd + self.setup_podman_env(hosts=hosts, osd_devs=osd.load_osd_devices()) + +def get_container_engine() -> ContainerEngine: + if engine() == 'docker': + return DockerEngine() + else: + return PodmanEngine() diff --git a/src/cephadm/build.py b/src/cephadm/build.py new file mode 100755 index 000000000..4264b814f --- /dev/null +++ b/src/cephadm/build.py @@ -0,0 +1,204 @@ +#!/usr/bin/python3 +"""Build cephadm from one or more files into a standalone executable. +""" +# TODO: If cephadm is being built and packaged within a format such as RPM +# do we have to do anything special wrt passing in the version +# of python to build with? Even with the intermediate cmake layer? + +import argparse +import compileall +import logging +import os +import pathlib +import shutil +import subprocess +import tempfile +import sys + +HAS_ZIPAPP = False +try: + import zipapp + + HAS_ZIPAPP = True +except ImportError: + pass + + +log = logging.getLogger(__name__) + + +_VALID_VERS_VARS = [ + "CEPH_GIT_VER", + "CEPH_GIT_NICE_VER", + "CEPH_RELEASE", + "CEPH_RELEASE_NAME", + "CEPH_RELEASE_TYPE", +] + + +def _reexec(python): + """Switch to the selected version of python by exec'ing into the desired + python path. + Sets the _BUILD_PYTHON_SET env variable as a sentinel to indicate exec has + been performed. + """ + env = os.environ.copy() + env["_BUILD_PYTHON_SET"] = python + os.execvpe(python, [python, __file__] + sys.argv[1:], env) + + +def _did_rexec(): + """Returns true if the process has already exec'ed into the desired python + version. + """ + return bool(os.environ.get("_BUILD_PYTHON_SET", "")) + + +def _build(dest, src, versioning_vars=None): + """Build the binary.""" + os.chdir(src) + tempdir = pathlib.Path(tempfile.mkdtemp(suffix=".cephadm.build")) + log.debug("working in %s", tempdir) + try: + if os.path.isfile("requirements.txt"): + _install_deps(tempdir) + log.info("Copying contents") + # TODO: currently the only file relevant to a compiled cephadm is the + # cephadm.py file. Once cephadm is broken up into multiple py files + # (and possibly other libs from python-common, etc) we'll want some + # sort organized structure to track what gets copied into the + # dir to be zipped. For now we just have a simple call to copy + # (and rename) the one file we care about. + shutil.copy("cephadm.py", tempdir / "__main__.py") + if versioning_vars: + generate_version_file(versioning_vars, tempdir / "_version.py") + _compile(dest, tempdir) + finally: + shutil.rmtree(tempdir) + + +def _compile(dest, tempdir): + """Compile the zipapp.""" + log.info("Byte-compiling py to pyc") + compileall.compile_dir( + tempdir, + maxlevels=16, + legacy=True, + quiet=1, + workers=0, + ) + # TODO we could explicitly pass a python version here + log.info("Constructing the zipapp file") + try: + zipapp.create_archive( + source=tempdir, + target=dest, + interpreter=sys.executable, + compressed=True, + ) + log.info("Zipapp created with compression") + except TypeError: + # automatically fall back to uncompressed + zipapp.create_archive( + source=tempdir, + target=dest, + interpreter=sys.executable, + ) + log.info("Zipapp created without compression") + + +def _install_deps(tempdir): + """Install dependencies with pip.""" + # TODO we could explicitly pass a python version here + log.info("Installing dependencies") + # apparently pip doesn't have an API, just a cli. + subprocess.check_call( + [ + sys.executable, + "-m", + "pip", + "install", + "--requirement", + "requirements.txt", + "--target", + tempdir, + ] + ) + + +def generate_version_file(versioning_vars, dest): + log.info("Generating version file") + log.debug("versioning_vars=%r", versioning_vars) + with open(dest, "w") as fh: + print("# GENERATED FILE -- do not edit", file=fh) + for key, value in versioning_vars: + print(f"{key} = {value!r}", file=fh) + + +def version_kv_pair(value): + if "=" not in value: + raise argparse.ArgumentTypeError(f"not a key=value pair: {value!r}") + key, value = value.split("=", 1) + if key not in _VALID_VERS_VARS: + raise argparse.ArgumentTypeError(f"Unexpected key: {key!r}") + return key, value + + +def main(): + handler = logging.StreamHandler(sys.stdout) + handler.setFormatter(logging.Formatter("cephadm/build.py: %(message)s")) + log.addHandler(handler) + log.setLevel(logging.INFO) + + log.debug("argv: %r", sys.argv) + parser = argparse.ArgumentParser() + parser.add_argument( + "dest", help="Destination path name for new cephadm binary" + ) + parser.add_argument( + "--source", help="Directory containing cephadm sources" + ) + parser.add_argument( + "--python", help="The path to the desired version of python" + ) + parser.add_argument( + "--set-version-var", + "-S", + type=version_kv_pair, + dest="version_vars", + action="append", + help="Set a key=value pair in the generated version info file", + ) + args = parser.parse_args() + + if not _did_rexec() and args.python: + _reexec(args.python) + + log.info( + "Python Version: {v.major}.{v.minor}.{v.micro}".format( + v=sys.version_info + ) + ) + log.info("Args: %s", vars(args)) + if not HAS_ZIPAPP: + # Unconditionally display an error that the version of python + # lacks zipapp (probably too old). + print("error: zipapp module not found", file=sys.stderr) + print( + "(zipapp is available in Python 3.5 or later." + " are you using a new enough version?)", + file=sys.stderr, + ) + sys.exit(2) + if args.source: + source = pathlib.Path(args.source).absolute() + else: + source = pathlib.Path(__file__).absolute().parent + dest = pathlib.Path(args.dest).absolute() + log.info("Source Dir: %s", source) + log.info("Destination Path: %s", dest) + _build(dest, source, versioning_vars=args.version_vars) + + +if __name__ == "__main__": + main() diff --git a/src/cephadm/build.sh b/src/cephadm/build.sh new file mode 100755 index 000000000..84b58f14f --- /dev/null +++ b/src/cephadm/build.sh @@ -0,0 +1,5 @@ +#!/bin/bash -ex + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +exec python3 $SCRIPT_DIR/build.py "$@" diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py new file mode 100755 index 000000000..bcb82c4c4 --- /dev/null +++ b/src/cephadm/cephadm.py @@ -0,0 +1,10700 @@ +#!/usr/bin/python3 + +import asyncio +import asyncio.subprocess +import argparse +import datetime +import fcntl +import ipaddress +import io +import json +import logging +from logging.config import dictConfig +import os +import platform +import pwd +import random +import shlex +import shutil +import socket +import string +import subprocess +import sys +import tempfile +import time +import errno +import struct +import ssl +from enum import Enum +from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO, Sequence, TypeVar, cast, Set, Iterable, TextIO, Generator + +import re +import uuid + +from configparser import ConfigParser +from contextlib import redirect_stdout, contextmanager +from functools import wraps +from glob import glob +from io import StringIO +from threading import Thread, Event +from urllib.error import HTTPError, URLError +from urllib.request import urlopen, Request +from pathlib import Path + +FuncT = TypeVar('FuncT', bound=Callable) + +# Default container images ----------------------------------------------------- +DEFAULT_IMAGE = 'quay.io/ceph/ceph:v18' +DEFAULT_IMAGE_IS_MAIN = False +DEFAULT_IMAGE_RELEASE = 'reef' +DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.43.0' +DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0' +DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0' +DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.5.0' +DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.25.0' +DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:9.4.7' +DEFAULT_HAPROXY_IMAGE = 'quay.io/ceph/haproxy:2.3' +DEFAULT_KEEPALIVED_IMAGE = 'quay.io/ceph/keepalived:2.2.4' +DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:0.0.1' +DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1' +DEFAULT_ELASTICSEARCH_IMAGE = 'quay.io/omrizeneva/elasticsearch:6.8.23' +DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29' +DEFAULT_JAEGER_AGENT_IMAGE = 'quay.io/jaegertracing/jaeger-agent:1.29' +DEFAULT_JAEGER_QUERY_IMAGE = 'quay.io/jaegertracing/jaeger-query:1.29' +DEFAULT_REGISTRY = 'docker.io' # normalize unqualified digests to this +# ------------------------------------------------------------------------------ + +LATEST_STABLE_RELEASE = 'reef' +DATA_DIR = '/var/lib/ceph' +LOG_DIR = '/var/log/ceph' +LOCK_DIR = '/run/cephadm' +LOGROTATE_DIR = '/etc/logrotate.d' +SYSCTL_DIR = '/etc/sysctl.d' +UNIT_DIR = '/etc/systemd/system' +CEPH_CONF_DIR = 'config' +CEPH_CONF = 'ceph.conf' +CEPH_PUBKEY = 'ceph.pub' +CEPH_KEYRING = 'ceph.client.admin.keyring' +CEPH_DEFAULT_CONF = f'/etc/ceph/{CEPH_CONF}' +CEPH_DEFAULT_KEYRING = f'/etc/ceph/{CEPH_KEYRING}' +CEPH_DEFAULT_PUBKEY = f'/etc/ceph/{CEPH_PUBKEY}' +LOG_DIR_MODE = 0o770 +DATA_DIR_MODE = 0o700 +DEFAULT_MODE = 0o600 +CONTAINER_INIT = True +MIN_PODMAN_VERSION = (2, 0, 2) +CGROUPS_SPLIT_PODMAN_VERSION = (2, 1, 0) +PIDS_LIMIT_UNLIMITED_PODMAN_VERSION = (3, 4, 1) +CUSTOM_PS1 = r'[ceph: \u@\h \W]\$ ' +DEFAULT_TIMEOUT = None # in seconds +DEFAULT_RETRY = 15 +DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ' +QUIET_LOG_LEVEL = 9 # DEBUG is 10, so using 9 to be lower level than DEBUG +NO_DEPRECATED = False + +logger: logging.Logger = None # type: ignore + +""" +You can invoke cephadm in two ways: + +1. The normal way, at the command line. + +2. By piping the script to the python3 binary. In this latter case, you should + prepend one or more lines to the beginning of the script. + + For arguments, + + injected_argv = [...] + + e.g., + + injected_argv = ['ls'] + + For reading stdin from the '--config-json -' argument, + + injected_stdin = '...' +""" +cached_stdin = None + + +################################## + + +async def run_func(func: Callable, cmd: str) -> subprocess.CompletedProcess: + logger.debug(f'running function {func.__name__}, with parms: {cmd}') + response = func(cmd) + return response + + +async def concurrent_tasks(func: Callable, cmd_list: List[str]) -> List[Any]: + tasks = [] + for cmd in cmd_list: + tasks.append(run_func(func, cmd)) + + data = await asyncio.gather(*tasks) + + return data + + +class EndPoint: + """EndPoint representing an ip:port format""" + + def __init__(self, ip: str, port: int) -> None: + self.ip = ip + self.port = port + + def __str__(self) -> str: + return f'{self.ip}:{self.port}' + + def __repr__(self) -> str: + return f'{self.ip}:{self.port}' + + +class ContainerInfo: + def __init__(self, container_id: str, + image_name: str, + image_id: str, + start: str, + version: str) -> None: + self.container_id = container_id + self.image_name = image_name + self.image_id = image_id + self.start = start + self.version = version + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, ContainerInfo): + return NotImplemented + return (self.container_id == other.container_id + and self.image_name == other.image_name + and self.image_id == other.image_id + and self.start == other.start + and self.version == other.version) + + +class DeploymentType(Enum): + # Fresh deployment of a daemon. + DEFAULT = 'Deploy' + # Redeploying a daemon. Works the same as fresh + # deployment minus port checking. + REDEPLOY = 'Redeploy' + # Reconfiguring a daemon. Rewrites config + # files and potentially restarts daemon. + RECONFIG = 'Reconfig' + + +class BaseConfig: + + def __init__(self) -> None: + self.image: str = '' + self.docker: bool = False + self.data_dir: str = DATA_DIR + self.log_dir: str = LOG_DIR + self.logrotate_dir: str = LOGROTATE_DIR + self.sysctl_dir: str = SYSCTL_DIR + self.unit_dir: str = UNIT_DIR + self.verbose: bool = False + self.timeout: Optional[int] = DEFAULT_TIMEOUT + self.retry: int = DEFAULT_RETRY + self.env: List[str] = [] + self.memory_request: Optional[int] = None + self.memory_limit: Optional[int] = None + self.log_to_journald: Optional[bool] = None + + self.container_init: bool = CONTAINER_INIT + self.container_engine: Optional[ContainerEngine] = None + + def set_from_args(self, args: argparse.Namespace) -> None: + argdict: Dict[str, Any] = vars(args) + for k, v in argdict.items(): + if hasattr(self, k): + setattr(self, k, v) + + +class CephadmContext: + + def __init__(self) -> None: + self.__dict__['_args'] = None + self.__dict__['_conf'] = BaseConfig() + + def set_args(self, args: argparse.Namespace) -> None: + self._conf.set_from_args(args) + self._args = args + + def has_function(self) -> bool: + return 'func' in self._args + + def __contains__(self, name: str) -> bool: + return hasattr(self, name) + + def __getattr__(self, name: str) -> Any: + if '_conf' in self.__dict__ and hasattr(self._conf, name): + return getattr(self._conf, name) + elif '_args' in self.__dict__ and hasattr(self._args, name): + return getattr(self._args, name) + else: + return super().__getattribute__(name) + + def __setattr__(self, name: str, value: Any) -> None: + if hasattr(self._conf, name): + setattr(self._conf, name, value) + elif hasattr(self._args, name): + setattr(self._args, name, value) + else: + super().__setattr__(name, value) + + +class ContainerEngine: + def __init__(self) -> None: + self.path = find_program(self.EXE) + + @property + def EXE(self) -> str: + raise NotImplementedError() + + def __str__(self) -> str: + return f'{self.EXE} ({self.path})' + + +class Podman(ContainerEngine): + EXE = 'podman' + + def __init__(self) -> None: + super().__init__() + self._version: Optional[Tuple[int, ...]] = None + + @property + def version(self) -> Tuple[int, ...]: + if self._version is None: + raise RuntimeError('Please call `get_version` first') + return self._version + + def get_version(self, ctx: CephadmContext) -> None: + out, _, _ = call_throws(ctx, [self.path, 'version', '--format', '{{.Client.Version}}'], verbosity=CallVerbosity.QUIET) + self._version = _parse_podman_version(out) + + def __str__(self) -> str: + version = '.'.join(map(str, self.version)) + return f'{self.EXE} ({self.path}) version {version}' + + +class Docker(ContainerEngine): + EXE = 'docker' + + +CONTAINER_PREFERENCE = (Podman, Docker) # prefer podman to docker + + +# During normal cephadm operations (cephadm ls, gather-facts, etc ) we use: +# stdout: for JSON output only +# stderr: for error, debug, info, etc +logging_config = { + 'version': 1, + 'disable_existing_loggers': True, + 'formatters': { + 'cephadm': { + 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s' + }, + }, + 'handlers': { + 'console': { + 'level': 'INFO', + 'class': 'logging.StreamHandler', + }, + 'log_file': { + 'level': 'DEBUG', + 'class': 'logging.handlers.WatchedFileHandler', + 'formatter': 'cephadm', + 'filename': '%s/cephadm.log' % LOG_DIR, + } + }, + 'loggers': { + '': { + 'level': 'DEBUG', + 'handlers': ['console', 'log_file'], + } + } +} + + +class ExcludeErrorsFilter(logging.Filter): + def filter(self, record: logging.LogRecord) -> bool: + """Only lets through log messages with log level below WARNING .""" + return record.levelno < logging.WARNING + + +# When cephadm is used as standard binary (bootstrap, rm-cluster, etc) we use: +# stdout: for debug and info +# stderr: for errors and warnings +interactive_logging_config = { + 'version': 1, + 'filters': { + 'exclude_errors': { + '()': ExcludeErrorsFilter + } + }, + 'disable_existing_loggers': True, + 'formatters': { + 'cephadm': { + 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s' + }, + }, + 'handlers': { + 'console_stdout': { + 'level': 'INFO', + 'class': 'logging.StreamHandler', + 'filters': ['exclude_errors'], + 'stream': sys.stdout + }, + 'console_stderr': { + 'level': 'WARNING', + 'class': 'logging.StreamHandler', + 'stream': sys.stderr + }, + 'log_file': { + 'level': 'DEBUG', + 'class': 'logging.handlers.WatchedFileHandler', + 'formatter': 'cephadm', + 'filename': '%s/cephadm.log' % LOG_DIR, + } + }, + 'loggers': { + '': { + 'level': 'DEBUG', + 'handlers': ['console_stdout', 'console_stderr', 'log_file'], + } + } +} + + +class termcolor: + yellow = '\033[93m' + red = '\033[31m' + end = '\033[0m' + + +class Error(Exception): + pass + + +class ClusterAlreadyExists(Exception): + pass + + +class TimeoutExpired(Error): + pass + + +class UnauthorizedRegistryError(Error): + pass + +################################## + + +class Ceph(object): + daemons = ('mon', 'mgr', 'osd', 'mds', 'rgw', 'rbd-mirror', + 'crash', 'cephfs-mirror', 'ceph-exporter') + gateways = ('iscsi', 'nfs', 'nvmeof') + +################################## + + +class OSD(object): + @staticmethod + def get_sysctl_settings() -> List[str]: + return [ + '# allow a large number of OSDs', + 'fs.aio-max-nr = 1048576', + 'kernel.pid_max = 4194304', + ] + + +################################## + + +class SNMPGateway: + """Defines an SNMP gateway between Prometheus and SNMP monitoring Frameworks""" + daemon_type = 'snmp-gateway' + SUPPORTED_VERSIONS = ['V2c', 'V3'] + default_image = DEFAULT_SNMP_GATEWAY_IMAGE + DEFAULT_PORT = 9464 + env_filename = 'snmp-gateway.conf' + + def __init__(self, + ctx: CephadmContext, + fsid: str, + daemon_id: Union[int, str], + config_json: Dict[str, Any], + image: Optional[str] = None) -> None: + self.ctx = ctx + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image or SNMPGateway.default_image + + self.uid = config_json.get('uid', 0) + self.gid = config_json.get('gid', 0) + + self.destination = config_json.get('destination', '') + self.snmp_version = config_json.get('snmp_version', 'V2c') + self.snmp_community = config_json.get('snmp_community', 'public') + self.log_level = config_json.get('log_level', 'info') + self.snmp_v3_auth_username = config_json.get('snmp_v3_auth_username', '') + self.snmp_v3_auth_password = config_json.get('snmp_v3_auth_password', '') + self.snmp_v3_auth_protocol = config_json.get('snmp_v3_auth_protocol', '') + self.snmp_v3_priv_protocol = config_json.get('snmp_v3_priv_protocol', '') + self.snmp_v3_priv_password = config_json.get('snmp_v3_priv_password', '') + self.snmp_v3_engine_id = config_json.get('snmp_v3_engine_id', '') + + self.validate() + + @classmethod + def init(cls, ctx: CephadmContext, fsid: str, + daemon_id: Union[int, str]) -> 'SNMPGateway': + cfgs = fetch_configs(ctx) + assert cfgs # assert some config data was found + return cls(ctx, fsid, daemon_id, cfgs, ctx.image) + + @staticmethod + def get_version(ctx: CephadmContext, fsid: str, daemon_id: str) -> Optional[str]: + """Return the version of the notifier from it's http endpoint""" + path = os.path.join(ctx.data_dir, fsid, f'snmp-gateway.{daemon_id}', 'unit.meta') + try: + with open(path, 'r') as env: + metadata = json.loads(env.read()) + except (OSError, json.JSONDecodeError): + return None + + ports = metadata.get('ports', []) + if not ports: + return None + + try: + with urlopen(f'http://127.0.0.1:{ports[0]}/') as r: + html = r.read().decode('utf-8').split('\n') + except (HTTPError, URLError): + return None + + for h in html: + stripped = h.strip() + if stripped.startswith(('<pre>', '<PRE>')) and \ + stripped.endswith(('</pre>', '</PRE>')): + # <pre>(version=1.2.1, branch=HEAD, revision=7... + return stripped.split(',')[0].split('version=')[1] + + return None + + @property + def port(self) -> int: + endpoints = fetch_tcp_ports(self.ctx) + if not endpoints: + return self.DEFAULT_PORT + return endpoints[0].port + + def get_daemon_args(self) -> List[str]: + v3_args = [] + base_args = [ + f'--web.listen-address=:{self.port}', + f'--snmp.destination={self.destination}', + f'--snmp.version={self.snmp_version}', + f'--log.level={self.log_level}', + '--snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl' + ] + + if self.snmp_version == 'V3': + # common auth settings + v3_args.extend([ + '--snmp.authentication-enabled', + f'--snmp.authentication-protocol={self.snmp_v3_auth_protocol}', + f'--snmp.security-engine-id={self.snmp_v3_engine_id}' + ]) + # authPriv setting is applied if we have a privacy protocol setting + if self.snmp_v3_priv_protocol: + v3_args.extend([ + '--snmp.private-enabled', + f'--snmp.private-protocol={self.snmp_v3_priv_protocol}' + ]) + + return base_args + v3_args + + @property + def data_dir(self) -> str: + return os.path.join(self.ctx.data_dir, self.ctx.fsid, f'{self.daemon_type}.{self.daemon_id}') + + @property + def conf_file_path(self) -> str: + return os.path.join(self.data_dir, self.env_filename) + + def create_daemon_conf(self) -> None: + """Creates the environment file holding 'secrets' passed to the snmp-notifier daemon""" + with write_new(self.conf_file_path) as f: + if self.snmp_version == 'V2c': + f.write(f'SNMP_NOTIFIER_COMMUNITY={self.snmp_community}\n') + else: + f.write(f'SNMP_NOTIFIER_AUTH_USERNAME={self.snmp_v3_auth_username}\n') + f.write(f'SNMP_NOTIFIER_AUTH_PASSWORD={self.snmp_v3_auth_password}\n') + if self.snmp_v3_priv_password: + f.write(f'SNMP_NOTIFIER_PRIV_PASSWORD={self.snmp_v3_priv_password}\n') + + def validate(self) -> None: + """Validate the settings + + Raises: + Error: if the fsid doesn't look like an fsid + Error: if the snmp version is not supported + Error: destination IP and port address missing + """ + if not is_fsid(self.fsid): + raise Error(f'not a valid fsid: {self.fsid}') + + if self.snmp_version not in SNMPGateway.SUPPORTED_VERSIONS: + raise Error(f'not a valid snmp version: {self.snmp_version}') + + if not self.destination: + raise Error('config is missing destination attribute(<ip>:<port>) of the target SNMP listener') + + +################################## +class Monitoring(object): + """Define the configs for the monitoring containers""" + + port_map = { + 'prometheus': [9095], # Avoid default 9090, due to conflict with cockpit UI + 'node-exporter': [9100], + 'grafana': [3000], + 'alertmanager': [9093, 9094], + 'loki': [3100], + 'promtail': [9080] + } + + components = { + 'prometheus': { + 'image': DEFAULT_PROMETHEUS_IMAGE, + 'cpus': '2', + 'memory': '4GB', + 'args': [ + '--config.file=/etc/prometheus/prometheus.yml', + '--storage.tsdb.path=/prometheus', + ], + 'config-json-files': [ + 'prometheus.yml', + ], + }, + 'loki': { + 'image': DEFAULT_LOKI_IMAGE, + 'cpus': '1', + 'memory': '1GB', + 'args': [ + '--config.file=/etc/loki/loki.yml', + ], + 'config-json-files': [ + 'loki.yml' + ], + }, + 'promtail': { + 'image': DEFAULT_PROMTAIL_IMAGE, + 'cpus': '1', + 'memory': '1GB', + 'args': [ + '--config.file=/etc/promtail/promtail.yml', + ], + 'config-json-files': [ + 'promtail.yml', + ], + }, + 'node-exporter': { + 'image': DEFAULT_NODE_EXPORTER_IMAGE, + 'cpus': '1', + 'memory': '1GB', + 'args': [ + '--no-collector.timex' + ], + }, + 'grafana': { + 'image': DEFAULT_GRAFANA_IMAGE, + 'cpus': '2', + 'memory': '4GB', + 'args': [], + 'config-json-files': [ + 'grafana.ini', + 'provisioning/datasources/ceph-dashboard.yml', + 'certs/cert_file', + 'certs/cert_key', + ], + }, + 'alertmanager': { + 'image': DEFAULT_ALERT_MANAGER_IMAGE, + 'cpus': '2', + 'memory': '2GB', + 'args': [ + '--cluster.listen-address=:{}'.format(port_map['alertmanager'][1]), + ], + 'config-json-files': [ + 'alertmanager.yml', + ], + 'config-json-args': [ + 'peers', + ], + }, + } # type: ignore + + @staticmethod + def get_version(ctx, container_id, daemon_type): + # type: (CephadmContext, str, str) -> str + """ + :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter" + """ + assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter', 'loki', 'promtail') + cmd = daemon_type.replace('-', '_') + code = -1 + err = '' + out = '' + version = '' + if daemon_type == 'alertmanager': + for cmd in ['alertmanager', 'prometheus-alertmanager']: + out, err, code = call(ctx, [ + ctx.container_engine.path, 'exec', container_id, cmd, + '--version' + ], verbosity=CallVerbosity.QUIET) + if code == 0: + break + cmd = 'alertmanager' # reset cmd for version extraction + else: + out, err, code = call(ctx, [ + ctx.container_engine.path, 'exec', container_id, cmd, '--version' + ], verbosity=CallVerbosity.QUIET) + if code == 0: + if err.startswith('%s, version ' % cmd): + version = err.split(' ')[2] + elif out.startswith('%s, version ' % cmd): + version = out.split(' ')[2] + return version + +################################## + + +@contextmanager +def write_new( + destination: Union[str, Path], + *, + owner: Optional[Tuple[int, int]] = None, + perms: Optional[int] = DEFAULT_MODE, + encoding: Optional[str] = None, +) -> Generator[IO, None, None]: + """Write a new file in a robust manner, optionally specifying the owner, + permissions, or encoding. This function takes care to never leave a file in + a partially-written state due to a crash or power outage by writing to + temporary file and then renaming that temp file over to the final + destination once all data is written. Note that the temporary files can be + leaked but only for a "crash" or power outage - regular exceptions will + clean up the temporary file. + """ + destination = os.path.abspath(destination) + tempname = f'{destination}.new' + open_kwargs: Dict[str, Any] = {} + if encoding: + open_kwargs['encoding'] = encoding + try: + with open(tempname, 'w', **open_kwargs) as fh: + yield fh + fh.flush() + os.fsync(fh.fileno()) + if owner is not None: + os.fchown(fh.fileno(), *owner) + if perms is not None: + os.fchmod(fh.fileno(), perms) + except Exception: + os.unlink(tempname) + raise + os.rename(tempname, destination) + + +def populate_files(config_dir, config_files, uid, gid): + # type: (str, Dict, int, int) -> None + """create config files for different services""" + for fname in config_files: + config_file = os.path.join(config_dir, fname) + config_content = dict_get_join(config_files, fname) + logger.info('Write file: %s' % (config_file)) + with write_new(config_file, owner=(uid, gid), encoding='utf-8') as f: + f.write(config_content) + + +class NFSGanesha(object): + """Defines a NFS-Ganesha container""" + + daemon_type = 'nfs' + entrypoint = '/usr/bin/ganesha.nfsd' + daemon_args = ['-F', '-L', 'STDERR'] + + required_files = ['ganesha.conf'] + + port_map = { + 'nfs': 2049, + } + + def __init__(self, + ctx, + fsid, + daemon_id, + config_json, + image=DEFAULT_IMAGE): + # type: (CephadmContext, str, Union[int, str], Dict, str) -> None + self.ctx = ctx + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image + + # config-json options + self.pool = dict_get(config_json, 'pool', require=True) + self.namespace = dict_get(config_json, 'namespace') + self.userid = dict_get(config_json, 'userid') + self.extra_args = dict_get(config_json, 'extra_args', []) + self.files = dict_get(config_json, 'files', {}) + self.rgw = dict_get(config_json, 'rgw', {}) + + # validate the supplied args + self.validate() + + @classmethod + def init(cls, ctx, fsid, daemon_id): + # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha + return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image) + + def get_container_mounts(self, data_dir): + # type: (str) -> Dict[str, str] + mounts = dict() + mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z' + mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z' + mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z' + if self.rgw: + cluster = self.rgw.get('cluster', 'ceph') + rgw_user = self.rgw.get('user', 'admin') + mounts[os.path.join(data_dir, 'keyring.rgw')] = \ + '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user) + return mounts + + @staticmethod + def get_container_envs(): + # type: () -> List[str] + envs = [ + 'CEPH_CONF=%s' % (CEPH_DEFAULT_CONF) + ] + return envs + + @staticmethod + def get_version(ctx, container_id): + # type: (CephadmContext, str) -> Optional[str] + version = None + out, err, code = call(ctx, + [ctx.container_engine.path, 'exec', container_id, + NFSGanesha.entrypoint, '-v'], + verbosity=CallVerbosity.QUIET) + if code == 0: + match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out) + if match: + version = match.group(1) + return version + + def validate(self): + # type: () -> None + if not is_fsid(self.fsid): + raise Error('not an fsid: %s' % self.fsid) + if not self.daemon_id: + raise Error('invalid daemon_id: %s' % self.daemon_id) + if not self.image: + raise Error('invalid image: %s' % self.image) + + # check for the required files + if self.required_files: + for fname in self.required_files: + if fname not in self.files: + raise Error('required file missing from config-json: %s' % fname) + + # check for an RGW config + if self.rgw: + if not self.rgw.get('keyring'): + raise Error('RGW keyring is missing') + if not self.rgw.get('user'): + raise Error('RGW user is missing') + + def get_daemon_name(self): + # type: () -> str + return '%s.%s' % (self.daemon_type, self.daemon_id) + + def get_container_name(self, desc=None): + # type: (Optional[str]) -> str + cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) + if desc: + cname = '%s-%s' % (cname, desc) + return cname + + def get_daemon_args(self): + # type: () -> List[str] + return self.daemon_args + self.extra_args + + def create_daemon_dirs(self, data_dir, uid, gid): + # type: (str, int, int) -> None + """Create files under the container data dir""" + if not os.path.isdir(data_dir): + raise OSError('data_dir is not a directory: %s' % (data_dir)) + + logger.info('Creating ganesha config...') + + # create the ganesha conf dir + config_dir = os.path.join(data_dir, 'etc/ganesha') + makedirs(config_dir, uid, gid, 0o755) + + # populate files from the config-json + populate_files(config_dir, self.files, uid, gid) + + # write the RGW keyring + if self.rgw: + keyring_path = os.path.join(data_dir, 'keyring.rgw') + with write_new(keyring_path, owner=(uid, gid)) as f: + f.write(self.rgw.get('keyring', '')) + +################################## + + +class CephIscsi(object): + """Defines a Ceph-Iscsi container""" + + daemon_type = 'iscsi' + entrypoint = '/usr/bin/rbd-target-api' + + required_files = ['iscsi-gateway.cfg'] + + def __init__(self, + ctx, + fsid, + daemon_id, + config_json, + image=DEFAULT_IMAGE): + # type: (CephadmContext, str, Union[int, str], Dict, str) -> None + self.ctx = ctx + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image + + # config-json options + self.files = dict_get(config_json, 'files', {}) + + # validate the supplied args + self.validate() + + @classmethod + def init(cls, ctx, fsid, daemon_id): + # type: (CephadmContext, str, Union[int, str]) -> CephIscsi + return cls(ctx, fsid, daemon_id, + fetch_configs(ctx), ctx.image) + + @staticmethod + def get_container_mounts(data_dir, log_dir): + # type: (str, str) -> Dict[str, str] + mounts = dict() + mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z' + mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z' + mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z' + mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config' + mounts[os.path.join(data_dir, 'tcmu-runner-entrypoint.sh')] = '/usr/local/scripts/tcmu-runner-entrypoint.sh' + mounts[log_dir] = '/var/log:z' + mounts['/dev'] = '/dev' + return mounts + + @staticmethod + def get_container_binds(): + # type: () -> List[List[str]] + binds = [] + lib_modules = ['type=bind', + 'source=/lib/modules', + 'destination=/lib/modules', + 'ro=true'] + binds.append(lib_modules) + return binds + + @staticmethod + def get_version(ctx, container_id): + # type: (CephadmContext, str) -> Optional[str] + version = None + out, err, code = call(ctx, + [ctx.container_engine.path, 'exec', container_id, + '/usr/bin/python3', '-c', + "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"], + verbosity=CallVerbosity.QUIET) + if code == 0: + version = out.strip() + return version + + def validate(self): + # type: () -> None + if not is_fsid(self.fsid): + raise Error('not an fsid: %s' % self.fsid) + if not self.daemon_id: + raise Error('invalid daemon_id: %s' % self.daemon_id) + if not self.image: + raise Error('invalid image: %s' % self.image) + + # check for the required files + if self.required_files: + for fname in self.required_files: + if fname not in self.files: + raise Error('required file missing from config-json: %s' % fname) + + def get_daemon_name(self): + # type: () -> str + return '%s.%s' % (self.daemon_type, self.daemon_id) + + def get_container_name(self, desc=None): + # type: (Optional[str]) -> str + cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) + if desc: + cname = '%s-%s' % (cname, desc) + return cname + + def create_daemon_dirs(self, data_dir, uid, gid): + # type: (str, int, int) -> None + """Create files under the container data dir""" + if not os.path.isdir(data_dir): + raise OSError('data_dir is not a directory: %s' % (data_dir)) + + logger.info('Creating ceph-iscsi config...') + configfs_dir = os.path.join(data_dir, 'configfs') + makedirs(configfs_dir, uid, gid, 0o755) + + # set up the tcmu-runner entrypoint script + # to be mounted into the container. For more info + # on why we need this script, see the + # tcmu_runner_entrypoint_script function + self.files['tcmu-runner-entrypoint.sh'] = self.tcmu_runner_entrypoint_script() + + # populate files from the config-json + populate_files(data_dir, self.files, uid, gid) + + # we want the tcmu runner entrypoint script to be executable + # populate_files will give it 0o600 by default + os.chmod(os.path.join(data_dir, 'tcmu-runner-entrypoint.sh'), 0o700) + + @staticmethod + def configfs_mount_umount(data_dir, mount=True): + # type: (str, bool) -> List[str] + mount_path = os.path.join(data_dir, 'configfs') + if mount: + cmd = 'if ! grep -qs {0} /proc/mounts; then ' \ + 'mount -t configfs none {0}; fi'.format(mount_path) + else: + cmd = 'if grep -qs {0} /proc/mounts; then ' \ + 'umount {0}; fi'.format(mount_path) + return cmd.split() + + @staticmethod + def tcmu_runner_entrypoint_script() -> str: + # since we are having tcmu-runner be a background + # process in its systemd unit (rbd-target-api being + # the main process) systemd will not restart it when + # it fails. in order to try and get around that for now + # we can have a script mounted in the container that + # that attempts to do the restarting for us. This script + # can then become the entrypoint for the tcmu-runner + # container + + # This is intended to be dropped for a better solution + # for at least the squid release onward + return """#!/bin/bash +RUN_DIR=/var/run/tcmu-runner + +if [ ! -d "${RUN_DIR}" ] ; then + mkdir -p "${RUN_DIR}" +fi + +rm -rf "${RUN_DIR}"/* + +while true +do + touch "${RUN_DIR}"/start-up-$(date -Ins) + /usr/bin/tcmu-runner + + # If we got around 3 kills/segfaults in the last minute, + # don't start anymore + if [ $(find "${RUN_DIR}" -type f -cmin -1 | wc -l) -ge 3 ] ; then + exit 0 + fi + + sleep 1 +done +""" + + def get_tcmu_runner_container(self): + # type: () -> CephContainer + # daemon_id, is used to generated the cid and pid files used by podman but as both tcmu-runner + # and rbd-target-api have the same daemon_id, it conflits and prevent the second container from + # starting. .tcmu runner is appended to the daemon_id to fix that. + tcmu_container = get_deployment_container(self.ctx, self.fsid, self.daemon_type, str(self.daemon_id) + '.tcmu') + # TODO: Eventually we don't want to run tcmu-runner through this script. + # This is intended to be a workaround backported to older releases + # and should eventually be removed in at least squid onward + tcmu_container.entrypoint = '/usr/local/scripts/tcmu-runner-entrypoint.sh' + tcmu_container.cname = self.get_container_name(desc='tcmu') + return tcmu_container + + +################################## + + +class CephNvmeof(object): + """Defines a Ceph-Nvmeof container""" + + daemon_type = 'nvmeof' + required_files = ['ceph-nvmeof.conf'] + default_image = DEFAULT_NVMEOF_IMAGE + + def __init__(self, + ctx, + fsid, + daemon_id, + config_json, + image=DEFAULT_NVMEOF_IMAGE): + # type: (CephadmContext, str, Union[int, str], Dict, str) -> None + self.ctx = ctx + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image + + # config-json options + self.files = dict_get(config_json, 'files', {}) + + # validate the supplied args + self.validate() + + @classmethod + def init(cls, ctx, fsid, daemon_id): + # type: (CephadmContext, str, Union[int, str]) -> CephNvmeof + return cls(ctx, fsid, daemon_id, + fetch_configs(ctx), ctx.image) + + @staticmethod + def get_container_mounts(data_dir: str) -> Dict[str, str]: + mounts = dict() + mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z' + mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z' + mounts[os.path.join(data_dir, 'ceph-nvmeof.conf')] = '/src/ceph-nvmeof.conf:z' + mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config' + mounts['/dev/hugepages'] = '/dev/hugepages' + mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio' + return mounts + + @staticmethod + def get_container_binds(): + # type: () -> List[List[str]] + binds = [] + lib_modules = ['type=bind', + 'source=/lib/modules', + 'destination=/lib/modules', + 'ro=true'] + binds.append(lib_modules) + return binds + + @staticmethod + def get_version(ctx: CephadmContext, container_id: str) -> Optional[str]: + out, err, ret = call(ctx, + [ctx.container_engine.path, 'inspect', + '--format', '{{index .Config.Labels "io.ceph.version"}}', + ctx.image]) + version = None + if ret == 0: + version = out.strip() + return version + + def validate(self): + # type: () -> None + if not is_fsid(self.fsid): + raise Error('not an fsid: %s' % self.fsid) + if not self.daemon_id: + raise Error('invalid daemon_id: %s' % self.daemon_id) + if not self.image: + raise Error('invalid image: %s' % self.image) + + # check for the required files + if self.required_files: + for fname in self.required_files: + if fname not in self.files: + raise Error('required file missing from config-json: %s' % fname) + + def get_daemon_name(self): + # type: () -> str + return '%s.%s' % (self.daemon_type, self.daemon_id) + + def get_container_name(self, desc=None): + # type: (Optional[str]) -> str + cname = '%s-%s' % (self.fsid, self.get_daemon_name()) + if desc: + cname = '%s-%s' % (cname, desc) + return cname + + def create_daemon_dirs(self, data_dir, uid, gid): + # type: (str, int, int) -> None + """Create files under the container data dir""" + if not os.path.isdir(data_dir): + raise OSError('data_dir is not a directory: %s' % (data_dir)) + + logger.info('Creating ceph-nvmeof config...') + configfs_dir = os.path.join(data_dir, 'configfs') + makedirs(configfs_dir, uid, gid, 0o755) + + # populate files from the config-json + populate_files(data_dir, self.files, uid, gid) + + @staticmethod + def configfs_mount_umount(data_dir, mount=True): + # type: (str, bool) -> List[str] + mount_path = os.path.join(data_dir, 'configfs') + if mount: + cmd = 'if ! grep -qs {0} /proc/mounts; then ' \ + 'mount -t configfs none {0}; fi'.format(mount_path) + else: + cmd = 'if grep -qs {0} /proc/mounts; then ' \ + 'umount {0}; fi'.format(mount_path) + return cmd.split() + + @staticmethod + def get_sysctl_settings() -> List[str]: + return [ + 'vm.nr_hugepages = 4096', + ] + + +################################## + + +class CephExporter(object): + """Defines a Ceph exporter container""" + + daemon_type = 'ceph-exporter' + entrypoint = '/usr/bin/ceph-exporter' + DEFAULT_PORT = 9926 + port_map = { + 'ceph-exporter': DEFAULT_PORT, + } + + def __init__(self, + ctx: CephadmContext, + fsid: str, daemon_id: Union[int, str], + config_json: Dict[str, Any], + image: str = DEFAULT_IMAGE) -> None: + self.ctx = ctx + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image + + self.sock_dir = config_json.get('sock-dir', '/var/run/ceph/') + ipv4_addrs, _ = get_ip_addresses(get_hostname()) + addrs = '0.0.0.0' if ipv4_addrs else '::' + self.addrs = config_json.get('addrs', addrs) + self.port = config_json.get('port', self.DEFAULT_PORT) + self.prio_limit = config_json.get('prio-limit', 5) + self.stats_period = config_json.get('stats-period', 5) + + self.validate() + + @classmethod + def init(cls, ctx: CephadmContext, fsid: str, + daemon_id: Union[int, str]) -> 'CephExporter': + return cls(ctx, fsid, daemon_id, + fetch_configs(ctx), ctx.image) + + @staticmethod + def get_container_mounts() -> Dict[str, str]: + mounts = dict() + mounts['/var/run/ceph'] = '/var/run/ceph:z' + return mounts + + def get_daemon_args(self) -> List[str]: + args = [ + f'--sock-dir={self.sock_dir}', + f'--addrs={self.addrs}', + f'--port={self.port}', + f'--prio-limit={self.prio_limit}', + f'--stats-period={self.stats_period}', + ] + return args + + def validate(self) -> None: + if not os.path.isdir(self.sock_dir): + raise Error(f'Directory does not exist. Got: {self.sock_dir}') + + +################################## + + +class HAproxy(object): + """Defines an HAproxy container""" + daemon_type = 'haproxy' + required_files = ['haproxy.cfg'] + default_image = DEFAULT_HAPROXY_IMAGE + + def __init__(self, + ctx: CephadmContext, + fsid: str, daemon_id: Union[int, str], + config_json: Dict, image: str) -> None: + self.ctx = ctx + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image + + # config-json options + self.files = dict_get(config_json, 'files', {}) + + self.validate() + + @classmethod + def init(cls, ctx: CephadmContext, + fsid: str, daemon_id: Union[int, str]) -> 'HAproxy': + return cls(ctx, fsid, daemon_id, fetch_configs(ctx), + ctx.image) + + def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: + """Create files under the container data dir""" + if not os.path.isdir(data_dir): + raise OSError('data_dir is not a directory: %s' % (data_dir)) + + # create additional directories in data dir for HAproxy to use + if not os.path.isdir(os.path.join(data_dir, 'haproxy')): + makedirs(os.path.join(data_dir, 'haproxy'), uid, gid, DATA_DIR_MODE) + + data_dir = os.path.join(data_dir, 'haproxy') + populate_files(data_dir, self.files, uid, gid) + + def get_daemon_args(self) -> List[str]: + return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg'] + + def validate(self): + # type: () -> None + if not is_fsid(self.fsid): + raise Error('not an fsid: %s' % self.fsid) + if not self.daemon_id: + raise Error('invalid daemon_id: %s' % self.daemon_id) + if not self.image: + raise Error('invalid image: %s' % self.image) + + # check for the required files + if self.required_files: + for fname in self.required_files: + if fname not in self.files: + raise Error('required file missing from config-json: %s' % fname) + + def get_daemon_name(self): + # type: () -> str + return '%s.%s' % (self.daemon_type, self.daemon_id) + + def get_container_name(self, desc=None): + # type: (Optional[str]) -> str + cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) + if desc: + cname = '%s-%s' % (cname, desc) + return cname + + def extract_uid_gid_haproxy(self) -> Tuple[int, int]: + # better directory for this? + return extract_uid_gid(self.ctx, file_path='/var/lib') + + @staticmethod + def get_container_mounts(data_dir: str) -> Dict[str, str]: + mounts = dict() + mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy' + return mounts + + @staticmethod + def get_sysctl_settings() -> List[str]: + return [ + '# IP forwarding and non-local bind', + 'net.ipv4.ip_forward = 1', + 'net.ipv4.ip_nonlocal_bind = 1', + ] + +################################## + + +class Keepalived(object): + """Defines an Keepalived container""" + daemon_type = 'keepalived' + required_files = ['keepalived.conf'] + default_image = DEFAULT_KEEPALIVED_IMAGE + + def __init__(self, + ctx: CephadmContext, + fsid: str, daemon_id: Union[int, str], + config_json: Dict, image: str) -> None: + self.ctx = ctx + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image + + # config-json options + self.files = dict_get(config_json, 'files', {}) + + self.validate() + + @classmethod + def init(cls, ctx: CephadmContext, fsid: str, + daemon_id: Union[int, str]) -> 'Keepalived': + return cls(ctx, fsid, daemon_id, + fetch_configs(ctx), ctx.image) + + def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: + """Create files under the container data dir""" + if not os.path.isdir(data_dir): + raise OSError('data_dir is not a directory: %s' % (data_dir)) + + # create additional directories in data dir for keepalived to use + if not os.path.isdir(os.path.join(data_dir, 'keepalived')): + makedirs(os.path.join(data_dir, 'keepalived'), uid, gid, DATA_DIR_MODE) + + # populate files from the config-json + populate_files(data_dir, self.files, uid, gid) + + def validate(self): + # type: () -> None + if not is_fsid(self.fsid): + raise Error('not an fsid: %s' % self.fsid) + if not self.daemon_id: + raise Error('invalid daemon_id: %s' % self.daemon_id) + if not self.image: + raise Error('invalid image: %s' % self.image) + + # check for the required files + if self.required_files: + for fname in self.required_files: + if fname not in self.files: + raise Error('required file missing from config-json: %s' % fname) + + def get_daemon_name(self): + # type: () -> str + return '%s.%s' % (self.daemon_type, self.daemon_id) + + def get_container_name(self, desc=None): + # type: (Optional[str]) -> str + cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) + if desc: + cname = '%s-%s' % (cname, desc) + return cname + + @staticmethod + def get_container_envs(): + # type: () -> List[str] + envs = [ + 'KEEPALIVED_AUTOCONF=false', + 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf', + 'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf', + 'KEEPALIVED_DEBUG=false' + ] + return envs + + @staticmethod + def get_sysctl_settings() -> List[str]: + return [ + '# IP forwarding and non-local bind', + 'net.ipv4.ip_forward = 1', + 'net.ipv4.ip_nonlocal_bind = 1', + ] + + def extract_uid_gid_keepalived(self) -> Tuple[int, int]: + # better directory for this? + return extract_uid_gid(self.ctx, file_path='/var/lib') + + @staticmethod + def get_container_mounts(data_dir: str) -> Dict[str, str]: + mounts = dict() + mounts[os.path.join(data_dir, 'keepalived.conf')] = '/etc/keepalived/keepalived.conf' + return mounts + +################################## + + +class Tracing(object): + """Define the configs for the jaeger tracing containers""" + + components: Dict[str, Dict[str, Any]] = { + 'elasticsearch': { + 'image': DEFAULT_ELASTICSEARCH_IMAGE, + 'envs': ['discovery.type=single-node'] + }, + 'jaeger-agent': { + 'image': DEFAULT_JAEGER_AGENT_IMAGE, + }, + 'jaeger-collector': { + 'image': DEFAULT_JAEGER_COLLECTOR_IMAGE, + }, + 'jaeger-query': { + 'image': DEFAULT_JAEGER_QUERY_IMAGE, + }, + } # type: ignore + + @staticmethod + def set_configuration(config: Dict[str, str], daemon_type: str) -> None: + if daemon_type in ['jaeger-collector', 'jaeger-query']: + assert 'elasticsearch_nodes' in config + Tracing.components[daemon_type]['envs'] = [ + 'SPAN_STORAGE_TYPE=elasticsearch', + f'ES_SERVER_URLS={config["elasticsearch_nodes"]}'] + if daemon_type == 'jaeger-agent': + assert 'collector_nodes' in config + Tracing.components[daemon_type]['daemon_args'] = [ + f'--reporter.grpc.host-port={config["collector_nodes"]}', + '--processor.jaeger-compact.server-host-port=6799' + ] + +################################## + + +class CustomContainer(object): + """Defines a custom container""" + daemon_type = 'container' + + def __init__(self, + fsid: str, daemon_id: Union[int, str], + config_json: Dict, image: str) -> None: + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image + + # config-json options + self.entrypoint = dict_get(config_json, 'entrypoint') + self.uid = dict_get(config_json, 'uid', 65534) # nobody + self.gid = dict_get(config_json, 'gid', 65534) # nobody + self.volume_mounts = dict_get(config_json, 'volume_mounts', {}) + self.args = dict_get(config_json, 'args', []) + self.envs = dict_get(config_json, 'envs', []) + self.privileged = dict_get(config_json, 'privileged', False) + self.bind_mounts = dict_get(config_json, 'bind_mounts', []) + self.ports = dict_get(config_json, 'ports', []) + self.dirs = dict_get(config_json, 'dirs', []) + self.files = dict_get(config_json, 'files', {}) + + @classmethod + def init(cls, ctx: CephadmContext, + fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer': + return cls(fsid, daemon_id, + fetch_configs(ctx), ctx.image) + + def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: + """ + Create dirs/files below the container data directory. + """ + logger.info('Creating custom container configuration ' + 'dirs/files in {} ...'.format(data_dir)) + + if not os.path.isdir(data_dir): + raise OSError('data_dir is not a directory: %s' % data_dir) + + for dir_path in self.dirs: + logger.info('Creating directory: {}'.format(dir_path)) + dir_path = os.path.join(data_dir, dir_path.strip('/')) + makedirs(dir_path, uid, gid, 0o755) + + for file_path in self.files: + logger.info('Creating file: {}'.format(file_path)) + content = dict_get_join(self.files, file_path) + file_path = os.path.join(data_dir, file_path.strip('/')) + with write_new(file_path, owner=(uid, gid), encoding='utf-8') as f: + f.write(content) + + def get_daemon_args(self) -> List[str]: + return [] + + def get_container_args(self) -> List[str]: + return self.args + + def get_container_envs(self) -> List[str]: + return self.envs + + def get_container_mounts(self, data_dir: str) -> Dict[str, str]: + """ + Get the volume mounts. Relative source paths will be located below + `/var/lib/ceph/<cluster-fsid>/<daemon-name>`. + + Example: + { + /foo/conf: /conf + foo/conf: /conf + } + becomes + { + /foo/conf: /conf + /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf + } + """ + mounts = {} + for source, destination in self.volume_mounts.items(): + source = os.path.join(data_dir, source) + mounts[source] = destination + return mounts + + def get_container_binds(self, data_dir: str) -> List[List[str]]: + """ + Get the bind mounts. Relative `source=...` paths will be located below + `/var/lib/ceph/<cluster-fsid>/<daemon-name>`. + + Example: + [ + 'type=bind', + 'source=lib/modules', + 'destination=/lib/modules', + 'ro=true' + ] + becomes + [ + ... + 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules', + ... + ] + """ + binds = self.bind_mounts.copy() + for bind in binds: + for index, value in enumerate(bind): + match = re.match(r'^source=(.+)$', value) + if match: + bind[index] = 'source={}'.format(os.path.join( + data_dir, match.group(1))) + return binds + +################################## + + +def touch(file_path: str, uid: Optional[int] = None, gid: Optional[int] = None) -> None: + Path(file_path).touch() + if uid and gid: + os.chown(file_path, uid, gid) + + +################################## + + +def dict_get(d: Dict, key: str, default: Any = None, require: bool = False) -> Any: + """ + Helper function to get a key from a dictionary. + :param d: The dictionary to process. + :param key: The name of the key to get. + :param default: The default value in case the key does not + exist. Default is `None`. + :param require: Set to `True` if the key is required. An + exception will be raised if the key does not exist in + the given dictionary. + :return: Returns the value of the given key. + :raises: :exc:`self.Error` if the given key does not exist + and `require` is set to `True`. + """ + if require and key not in d.keys(): + raise Error('{} missing from dict'.format(key)) + return d.get(key, default) # type: ignore + +################################## + + +def dict_get_join(d: Dict[str, Any], key: str) -> Any: + """ + Helper function to get the value of a given key from a dictionary. + `List` values will be converted to a string by joining them with a + line break. + :param d: The dictionary to process. + :param key: The name of the key to get. + :return: Returns the value of the given key. If it was a `list`, it + will be joining with a line break. + """ + value = d.get(key) + if isinstance(value, list): + value = '\n'.join(map(str, value)) + return value + +################################## + + +def get_supported_daemons(): + # type: () -> List[str] + supported_daemons = list(Ceph.daemons) + supported_daemons.extend(Monitoring.components) + supported_daemons.append(NFSGanesha.daemon_type) + supported_daemons.append(CephIscsi.daemon_type) + supported_daemons.append(CephNvmeof.daemon_type) + supported_daemons.append(CustomContainer.daemon_type) + supported_daemons.append(HAproxy.daemon_type) + supported_daemons.append(Keepalived.daemon_type) + supported_daemons.append(CephadmAgent.daemon_type) + supported_daemons.append(SNMPGateway.daemon_type) + supported_daemons.extend(Tracing.components) + assert len(supported_daemons) == len(set(supported_daemons)) + return supported_daemons + +################################## + + +class PortOccupiedError(Error): + pass + + +def attempt_bind(ctx, s, address, port): + # type: (CephadmContext, socket.socket, str, int) -> None + try: + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + s.bind((address, port)) + except OSError as e: + if e.errno == errno.EADDRINUSE: + msg = 'Cannot bind to IP %s port %d: %s' % (address, port, e) + logger.warning(msg) + raise PortOccupiedError(msg) + else: + raise e + except Exception as e: + raise Error(e) + finally: + s.close() + + +def port_in_use(ctx: CephadmContext, endpoint: EndPoint) -> bool: + """Detect whether a port is in use on the local machine - IPv4 and IPv6""" + logger.info('Verifying port %s ...' % str(endpoint)) + + def _port_in_use(af: socket.AddressFamily, address: str) -> bool: + try: + s = socket.socket(af, socket.SOCK_STREAM) + attempt_bind(ctx, s, address, endpoint.port) + except PortOccupiedError: + return True + except OSError as e: + if e.errno in (errno.EAFNOSUPPORT, errno.EADDRNOTAVAIL): + # Ignore EAFNOSUPPORT and EADDRNOTAVAIL as two interfaces are + # being tested here and one might be intentionally be disabled. + # In that case no error should be raised. + return False + else: + raise e + return False + + if endpoint.ip != '0.0.0.0' and endpoint.ip != '::': + if is_ipv6(endpoint.ip): + return _port_in_use(socket.AF_INET6, endpoint.ip) + else: + return _port_in_use(socket.AF_INET, endpoint.ip) + + return any(_port_in_use(af, address) for af, address in ( + (socket.AF_INET, '0.0.0.0'), + (socket.AF_INET6, '::') + )) + + +def check_ip_port(ctx, ep): + # type: (CephadmContext, EndPoint) -> None + if not ctx.skip_ping_check: + logger.info(f'Verifying IP {ep.ip} port {ep.port} ...') + if is_ipv6(ep.ip): + s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + ip = unwrap_ipv6(ep.ip) + else: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + ip = ep.ip + attempt_bind(ctx, s, ip, ep.port) + +################################## + + +# this is an abbreviated version of +# https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py +# that drops all of the compatibility (this is Unix/Linux only). + +class Timeout(TimeoutError): + """ + Raised when the lock could not be acquired in *timeout* + seconds. + """ + + def __init__(self, lock_file: str) -> None: + """ + """ + #: The path of the file lock. + self.lock_file = lock_file + return None + + def __str__(self) -> str: + temp = "The file lock '{}' could not be acquired."\ + .format(self.lock_file) + return temp + + +class _Acquire_ReturnProxy(object): + def __init__(self, lock: 'FileLock') -> None: + self.lock = lock + return None + + def __enter__(self) -> 'FileLock': + return self.lock + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + self.lock.release() + return None + + +class FileLock(object): + def __init__(self, ctx: CephadmContext, name: str, timeout: int = -1) -> None: + if not os.path.exists(LOCK_DIR): + os.mkdir(LOCK_DIR, 0o700) + self._lock_file = os.path.join(LOCK_DIR, name + '.lock') + self.ctx = ctx + + # The file descriptor for the *_lock_file* as it is returned by the + # os.open() function. + # This file lock is only NOT None, if the object currently holds the + # lock. + self._lock_file_fd: Optional[int] = None + self.timeout = timeout + # The lock counter is used for implementing the nested locking + # mechanism. Whenever the lock is acquired, the counter is increased and + # the lock is only released, when this value is 0 again. + self._lock_counter = 0 + return None + + @property + def is_locked(self) -> bool: + return self._lock_file_fd is not None + + def acquire(self, timeout: Optional[int] = None, poll_intervall: float = 0.05) -> _Acquire_ReturnProxy: + """ + Acquires the file lock or fails with a :exc:`Timeout` error. + .. code-block:: python + # You can use this method in the context manager (recommended) + with lock.acquire(): + pass + # Or use an equivalent try-finally construct: + lock.acquire() + try: + pass + finally: + lock.release() + :arg float timeout: + The maximum time waited for the file lock. + If ``timeout < 0``, there is no timeout and this method will + block until the lock could be acquired. + If ``timeout`` is None, the default :attr:`~timeout` is used. + :arg float poll_intervall: + We check once in *poll_intervall* seconds if we can acquire the + file lock. + :raises Timeout: + if the lock could not be acquired in *timeout* seconds. + .. versionchanged:: 2.0.0 + This method returns now a *proxy* object instead of *self*, + so that it can be used in a with statement without side effects. + """ + + # Use the default timeout, if no timeout is provided. + if timeout is None: + timeout = self.timeout + + # Increment the number right at the beginning. + # We can still undo it, if something fails. + self._lock_counter += 1 + + lock_id = id(self) + lock_filename = self._lock_file + start_time = time.time() + try: + while True: + if not self.is_locked: + logger.log(QUIET_LOG_LEVEL, 'Acquiring lock %s on %s', lock_id, + lock_filename) + self._acquire() + + if self.is_locked: + logger.log(QUIET_LOG_LEVEL, 'Lock %s acquired on %s', lock_id, + lock_filename) + break + elif timeout >= 0 and time.time() - start_time > timeout: + logger.warning('Timeout acquiring lock %s on %s', lock_id, + lock_filename) + raise Timeout(self._lock_file) + else: + logger.log( + QUIET_LOG_LEVEL, + 'Lock %s not acquired on %s, waiting %s seconds ...', + lock_id, lock_filename, poll_intervall + ) + time.sleep(poll_intervall) + except Exception: + # Something did go wrong, so decrement the counter. + self._lock_counter = max(0, self._lock_counter - 1) + + raise + return _Acquire_ReturnProxy(lock=self) + + def release(self, force: bool = False) -> None: + """ + Releases the file lock. + Please note, that the lock is only completely released, if the lock + counter is 0. + Also note, that the lock file itself is not automatically deleted. + :arg bool force: + If true, the lock counter is ignored and the lock is released in + every case. + """ + if self.is_locked: + self._lock_counter -= 1 + + if self._lock_counter == 0 or force: + # lock_id = id(self) + # lock_filename = self._lock_file + + # Can't log in shutdown: + # File "/usr/lib64/python3.9/logging/__init__.py", line 1175, in _open + # NameError: name 'open' is not defined + # logger.debug('Releasing lock %s on %s', lock_id, lock_filename) + self._release() + self._lock_counter = 0 + # logger.debug('Lock %s released on %s', lock_id, lock_filename) + + return None + + def __enter__(self) -> 'FileLock': + self.acquire() + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + self.release() + return None + + def __del__(self) -> None: + self.release(force=True) + return None + + def _acquire(self) -> None: + open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC + fd = os.open(self._lock_file, open_mode) + + try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except (IOError, OSError): + os.close(fd) + else: + self._lock_file_fd = fd + return None + + def _release(self) -> None: + # Do not remove the lockfile: + # + # https://github.com/benediktschmitt/py-filelock/issues/31 + # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition + fd = self._lock_file_fd + self._lock_file_fd = None + fcntl.flock(fd, fcntl.LOCK_UN) # type: ignore + os.close(fd) # type: ignore + return None + + +################################## +# Popen wrappers, lifted from ceph-volume + +class CallVerbosity(Enum): + ##### + # Format: + # Normal Operation: <log-level-when-no-errors>, Errors: <log-level-when-error> + # + # NOTE: QUIET log level is custom level only used when --verbose is passed + ##### + + # Normal Operation: None, Errors: None + SILENT = 0 + # Normal Operation: QUIET, Error: QUIET + QUIET = 1 + # Normal Operation: DEBUG, Error: DEBUG + DEBUG = 2 + # Normal Operation: QUIET, Error: INFO + QUIET_UNLESS_ERROR = 3 + # Normal Operation: DEBUG, Error: INFO + VERBOSE_ON_FAILURE = 4 + # Normal Operation: INFO, Error: INFO + VERBOSE = 5 + + def success_log_level(self) -> int: + _verbosity_level_to_log_level = { + self.SILENT: 0, + self.QUIET: QUIET_LOG_LEVEL, + self.DEBUG: logging.DEBUG, + self.QUIET_UNLESS_ERROR: QUIET_LOG_LEVEL, + self.VERBOSE_ON_FAILURE: logging.DEBUG, + self.VERBOSE: logging.INFO + } + return _verbosity_level_to_log_level[self] # type: ignore + + def error_log_level(self) -> int: + _verbosity_level_to_log_level = { + self.SILENT: 0, + self.QUIET: QUIET_LOG_LEVEL, + self.DEBUG: logging.DEBUG, + self.QUIET_UNLESS_ERROR: logging.INFO, + self.VERBOSE_ON_FAILURE: logging.INFO, + self.VERBOSE: logging.INFO + } + return _verbosity_level_to_log_level[self] # type: ignore + + +# disable coverage for the next block. this is copy-n-paste +# from other code for compatibilty on older python versions +if sys.version_info < (3, 8): # pragma: no cover + import itertools + import threading + import warnings + from asyncio import events + + class ThreadedChildWatcher(asyncio.AbstractChildWatcher): + """Threaded child watcher implementation. + The watcher uses a thread per process + for waiting for the process finish. + It doesn't require subscription on POSIX signal + but a thread creation is not free. + The watcher has O(1) complexity, its performance doesn't depend + on amount of spawn processes. + """ + + def __init__(self) -> None: + self._pid_counter = itertools.count(0) + self._threads: Dict[Any, Any] = {} + + def is_active(self) -> bool: + return True + + def close(self) -> None: + self._join_threads() + + def _join_threads(self) -> None: + """Internal: Join all non-daemon threads""" + threads = [thread for thread in list(self._threads.values()) + if thread.is_alive() and not thread.daemon] + for thread in threads: + thread.join() + + def __enter__(self) -> Any: + return self + + def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: + pass + + def __del__(self, _warn: Any = warnings.warn) -> None: + threads = [thread for thread in list(self._threads.values()) + if thread.is_alive()] + if threads: + _warn(f'{self.__class__} has registered but not finished child processes', + ResourceWarning, + source=self) + + def add_child_handler(self, pid: Any, callback: Any, *args: Any) -> None: + loop = events.get_event_loop() + thread = threading.Thread(target=self._do_waitpid, + name=f'waitpid-{next(self._pid_counter)}', + args=(loop, pid, callback, args), + daemon=True) + self._threads[pid] = thread + thread.start() + + def remove_child_handler(self, pid: Any) -> bool: + # asyncio never calls remove_child_handler() !!! + # The method is no-op but is implemented because + # abstract base classe requires it + return True + + def attach_loop(self, loop: Any) -> None: + pass + + def _do_waitpid(self, loop: Any, expected_pid: Any, callback: Any, args: Any) -> None: + assert expected_pid > 0 + + try: + pid, status = os.waitpid(expected_pid, 0) + except ChildProcessError: + # The child process is already reaped + # (may happen if waitpid() is called elsewhere). + pid = expected_pid + returncode = 255 + logger.warning( + 'Unknown child process pid %d, will report returncode 255', + pid) + else: + if os.WIFEXITED(status): + returncode = os.WEXITSTATUS(status) + elif os.WIFSIGNALED(status): + returncode = -os.WTERMSIG(status) + else: + raise ValueError(f'unknown wait status {status}') + if loop.get_debug(): + logger.debug('process %s exited with returncode %s', + expected_pid, returncode) + + if loop.is_closed(): + logger.warning('Loop %r that handles pid %r is closed', loop, pid) + else: + loop.call_soon_threadsafe(callback, pid, returncode, *args) + + self._threads.pop(expected_pid) + + # unlike SafeChildWatcher which handles SIGCHLD in the main thread, + # ThreadedChildWatcher runs in a separated thread, hence allows us to + # run create_subprocess_exec() in non-main thread, see + # https://bugs.python.org/issue35621 + asyncio.set_child_watcher(ThreadedChildWatcher()) + + +try: + from asyncio import run as async_run # type: ignore[attr-defined] +except ImportError: # pragma: no cover + # disable coverage for this block. it should be a copy-n-paste from + # from newer libs for compatibilty on older python versions + def async_run(coro): # type: ignore + loop = asyncio.new_event_loop() + try: + asyncio.set_event_loop(loop) + return loop.run_until_complete(coro) + finally: + try: + loop.run_until_complete(loop.shutdown_asyncgens()) + finally: + asyncio.set_event_loop(None) + loop.close() + + +def call(ctx: CephadmContext, + command: List[str], + desc: Optional[str] = None, + verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE, + timeout: Optional[int] = DEFAULT_TIMEOUT, + **kwargs: Any) -> Tuple[str, str, int]: + """ + Wrap subprocess.Popen to + + - log stdout/stderr to a logger, + - decode utf-8 + - cleanly return out, err, returncode + + :param timeout: timeout in seconds + """ + + prefix = command[0] if desc is None else desc + if prefix: + prefix += ': ' + timeout = timeout or ctx.timeout + + async def run_with_timeout() -> Tuple[str, str, int]: + process = await asyncio.create_subprocess_exec( + *command, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + env=os.environ.copy()) + assert process.stdout + assert process.stderr + try: + stdout, stderr = await asyncio.wait_for( + process.communicate(), + timeout, + ) + except asyncio.TimeoutError: + # try to terminate the process assuming it is still running. It's + # possible that even after killing the process it will not + # complete, particularly if it is D-state. If that happens the + # process.wait call will block, but we're no worse off than before + # when the timeout did not work. Additionally, there are other + # corner-cases we could try and handle here but we decided to start + # simple. + process.kill() + await process.wait() + logger.info(prefix + f'timeout after {timeout} seconds') + return '', '', 124 + else: + assert process.returncode is not None + return ( + stdout.decode('utf-8'), + stderr.decode('utf-8'), + process.returncode, + ) + + stdout, stderr, returncode = async_run(run_with_timeout()) + log_level = verbosity.success_log_level() + if returncode != 0: + log_level = verbosity.error_log_level() + logger.log(log_level, f'Non-zero exit code {returncode} from {" ".join(command)}') + for line in stdout.splitlines(): + logger.log(log_level, prefix + 'stdout ' + line) + for line in stderr.splitlines(): + logger.log(log_level, prefix + 'stderr ' + line) + return stdout, stderr, returncode + + +def call_throws( + ctx: CephadmContext, + command: List[str], + desc: Optional[str] = None, + verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE, + timeout: Optional[int] = DEFAULT_TIMEOUT, + **kwargs: Any) -> Tuple[str, str, int]: + out, err, ret = call(ctx, command, desc, verbosity, timeout, **kwargs) + if ret: + for s in (out, err): + if s.strip() and len(s.splitlines()) <= 2: # readable message? + raise RuntimeError(f'Failed command: {" ".join(command)}: {s}') + raise RuntimeError('Failed command: %s' % ' '.join(command)) + return out, err, ret + + +def call_timeout(ctx, command, timeout): + # type: (CephadmContext, List[str], int) -> int + logger.debug('Running command (timeout=%s): %s' + % (timeout, ' '.join(command))) + + def raise_timeout(command, timeout): + # type: (List[str], int) -> NoReturn + msg = 'Command `%s` timed out after %s seconds' % (command, timeout) + logger.debug(msg) + raise TimeoutExpired(msg) + + try: + return subprocess.call(command, timeout=timeout, env=os.environ.copy()) + except subprocess.TimeoutExpired: + raise_timeout(command, timeout) + +################################## + + +def json_loads_retry(cli_func: Callable[[], str]) -> Any: + for sleep_secs in [1, 4, 4]: + try: + return json.loads(cli_func()) + except json.JSONDecodeError: + logger.debug('Invalid JSON. Retrying in %s seconds...' % sleep_secs) + time.sleep(sleep_secs) + return json.loads(cli_func()) + + +def is_available(ctx, what, func): + # type: (CephadmContext, str, Callable[[], bool]) -> None + """ + Wait for a service to become available + + :param what: the name of the service + :param func: the callable object that determines availability + """ + retry = ctx.retry + logger.info('Waiting for %s...' % what) + num = 1 + while True: + if func(): + logger.info('%s is available' + % what) + break + elif num > retry: + raise Error('%s not available after %s tries' + % (what, retry)) + + logger.info('%s not available, waiting (%s/%s)...' + % (what, num, retry)) + + num += 1 + time.sleep(2) + + +def read_config(fn): + # type: (Optional[str]) -> ConfigParser + cp = ConfigParser() + if fn: + cp.read(fn) + return cp + + +def pathify(p): + # type: (str) -> str + p = os.path.expanduser(p) + return os.path.abspath(p) + + +def get_file_timestamp(fn): + # type: (str) -> Optional[str] + try: + mt = os.path.getmtime(fn) + return datetime.datetime.fromtimestamp( + mt, tz=datetime.timezone.utc + ).strftime(DATEFMT) + except Exception: + return None + + +def try_convert_datetime(s): + # type: (str) -> Optional[str] + # This is super irritating because + # 1) podman and docker use different formats + # 2) python's strptime can't parse either one + # + # I've seen: + # docker 18.09.7: 2020-03-03T09:21:43.636153304Z + # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00 + # 2020-03-03 15:52:30.136257504 -0600 CST + # (In the podman case, there is a different string format for + # 'inspect' and 'inspect --format {{.Created}}'!!) + + # In *all* cases, the 9 digit second precision is too much for + # python's strptime. Shorten it to 6 digits. + p = re.compile(r'(\.[\d]{6})[\d]*') + s = p.sub(r'\1', s) + + # replace trailing Z with -0000, since (on python 3.6.8) it won't parse + if s and s[-1] == 'Z': + s = s[:-1] + '-0000' + + # cut off the redundant 'CST' part that strptime can't parse, if + # present. + v = s.split(' ') + s = ' '.join(v[0:3]) + + # try parsing with several format strings + fmts = [ + '%Y-%m-%dT%H:%M:%S.%f%z', + '%Y-%m-%d %H:%M:%S.%f %z', + ] + for f in fmts: + try: + # return timestamp normalized to UTC, rendered as DATEFMT. + return datetime.datetime.strptime(s, f).astimezone(tz=datetime.timezone.utc).strftime(DATEFMT) + except ValueError: + pass + return None + + +def _parse_podman_version(version_str): + # type: (str) -> Tuple[int, ...] + def to_int(val: str, org_e: Optional[Exception] = None) -> int: + if not val and org_e: + raise org_e + try: + return int(val) + except ValueError as e: + return to_int(val[0:-1], org_e or e) + + return tuple(map(to_int, version_str.split('.'))) + + +def get_hostname(): + # type: () -> str + return socket.gethostname() + + +def get_short_hostname(): + # type: () -> str + return get_hostname().split('.', 1)[0] + + +def get_fqdn(): + # type: () -> str + return socket.getfqdn() or socket.gethostname() + + +def get_ip_addresses(hostname: str) -> Tuple[List[str], List[str]]: + items = socket.getaddrinfo(hostname, None, + flags=socket.AI_CANONNAME, + type=socket.SOCK_STREAM) + ipv4_addresses = [i[4][0] for i in items if i[0] == socket.AF_INET] + ipv6_addresses = [i[4][0] for i in items if i[0] == socket.AF_INET6] + return ipv4_addresses, ipv6_addresses + + +def get_arch(): + # type: () -> str + return platform.uname().machine + + +def generate_service_id(): + # type: () -> str + return get_short_hostname() + '.' + ''.join(random.choice(string.ascii_lowercase) + for _ in range(6)) + + +def generate_password(): + # type: () -> str + return ''.join(random.choice(string.ascii_lowercase + string.digits) + for i in range(10)) + + +def normalize_container_id(i): + # type: (str) -> str + # docker adds the sha256: prefix, but AFAICS both + # docker (18.09.7 in bionic at least) and podman + # both always use sha256, so leave off the prefix + # for consistency. + prefix = 'sha256:' + if i.startswith(prefix): + i = i[len(prefix):] + return i + + +def make_fsid(): + # type: () -> str + return str(uuid.uuid1()) + + +def is_fsid(s): + # type: (str) -> bool + try: + uuid.UUID(s) + except ValueError: + return False + return True + + +def validate_fsid(func: FuncT) -> FuncT: + @wraps(func) + def _validate_fsid(ctx: CephadmContext) -> Any: + if 'fsid' in ctx and ctx.fsid: + if not is_fsid(ctx.fsid): + raise Error('not an fsid: %s' % ctx.fsid) + return func(ctx) + return cast(FuncT, _validate_fsid) + + +def infer_fsid(func: FuncT) -> FuncT: + """ + If we only find a single fsid in /var/lib/ceph/*, use that + """ + @infer_config + @wraps(func) + def _infer_fsid(ctx: CephadmContext) -> Any: + if 'fsid' in ctx and ctx.fsid: + logger.debug('Using specified fsid: %s' % ctx.fsid) + return func(ctx) + + fsids = set() + + cp = read_config(ctx.config) + if cp.has_option('global', 'fsid'): + fsids.add(cp.get('global', 'fsid')) + + daemon_list = list_daemons(ctx, detail=False) + for daemon in daemon_list: + if not is_fsid(daemon['fsid']): + # 'unknown' fsid + continue + elif 'name' not in ctx or not ctx.name: + # ctx.name not specified + fsids.add(daemon['fsid']) + elif daemon['name'] == ctx.name: + # ctx.name is a match + fsids.add(daemon['fsid']) + fsids = sorted(fsids) + + if not fsids: + # some commands do not always require an fsid + pass + elif len(fsids) == 1: + logger.info('Inferring fsid %s' % fsids[0]) + ctx.fsid = fsids[0] + else: + raise Error('Cannot infer an fsid, one must be specified (using --fsid): %s' % fsids) + return func(ctx) + + return cast(FuncT, _infer_fsid) + + +def infer_config(func: FuncT) -> FuncT: + """ + Infer the cluster configuration using the following priority order: + 1- if the user has provided custom conf file (-c option) use it + 2- otherwise if daemon --name has been provided use daemon conf + 3- otherwise find the mon daemon conf file and use it (if v1) + 4- otherwise if {ctx.data_dir}/{fsid}/{CEPH_CONF_DIR} dir exists use it + 5- finally: fallback to the default file /etc/ceph/ceph.conf + """ + @wraps(func) + def _infer_config(ctx: CephadmContext) -> Any: + + def config_path(daemon_type: str, daemon_name: str) -> str: + data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_name) + return os.path.join(data_dir, 'config') + + def get_mon_daemon_name(fsid: str) -> Optional[str]: + daemon_list = list_daemons(ctx, detail=False) + for daemon in daemon_list: + if ( + daemon.get('name', '').startswith('mon.') + and daemon.get('fsid', '') == fsid + and daemon.get('style', '') == 'cephadm:v1' + and os.path.exists(config_path('mon', daemon['name'].split('.', 1)[1])) + ): + return daemon['name'] + return None + + ctx.config = ctx.config if 'config' in ctx else None + # check if user has provided conf by using -c option + if ctx.config and (ctx.config != CEPH_DEFAULT_CONF): + logger.debug(f'Using specified config: {ctx.config}') + return func(ctx) + + if 'fsid' in ctx and ctx.fsid: + name = ctx.name if ('name' in ctx and ctx.name) else get_mon_daemon_name(ctx.fsid) + if name is not None: + # daemon name has been specified (or inferred from mon), let's use its conf + ctx.config = config_path(name.split('.', 1)[0], name.split('.', 1)[1]) + else: + # no daemon, in case the cluster has a config dir then use it + ceph_conf = f'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_CONF}' + if os.path.exists(ceph_conf): + ctx.config = ceph_conf + + if ctx.config: + logger.info(f'Inferring config {ctx.config}') + elif os.path.exists(CEPH_DEFAULT_CONF): + logger.debug(f'Using default config {CEPH_DEFAULT_CONF}') + ctx.config = CEPH_DEFAULT_CONF + return func(ctx) + + return cast(FuncT, _infer_config) + + +def _get_default_image(ctx: CephadmContext) -> str: + if DEFAULT_IMAGE_IS_MAIN: + warn = """This is a development version of cephadm. +For information regarding the latest stable release: + https://docs.ceph.com/docs/{}/cephadm/install +""".format(LATEST_STABLE_RELEASE) + for line in warn.splitlines(): + logger.warning('{}{}{}'.format(termcolor.yellow, line, termcolor.end)) + return DEFAULT_IMAGE + + +def infer_image(func: FuncT) -> FuncT: + """ + Use the most recent ceph image + """ + @wraps(func) + def _infer_image(ctx: CephadmContext) -> Any: + if not ctx.image: + ctx.image = os.environ.get('CEPHADM_IMAGE') + if not ctx.image: + ctx.image = infer_local_ceph_image(ctx, ctx.container_engine.path) + if not ctx.image: + ctx.image = _get_default_image(ctx) + return func(ctx) + + return cast(FuncT, _infer_image) + + +def require_image(func: FuncT) -> FuncT: + """ + Require the global --image flag to be set + """ + @wraps(func) + def _require_image(ctx: CephadmContext) -> Any: + if not ctx.image: + raise Error('This command requires the global --image option to be set') + return func(ctx) + + return cast(FuncT, _require_image) + + +def default_image(func: FuncT) -> FuncT: + @wraps(func) + def _default_image(ctx: CephadmContext) -> Any: + update_default_image(ctx) + return func(ctx) + + return cast(FuncT, _default_image) + + +def update_default_image(ctx: CephadmContext) -> None: + if getattr(ctx, 'image', None): + return + ctx.image = None # ensure ctx.image exists to avoid repeated `getattr`s + name = getattr(ctx, 'name', None) + if name: + type_ = name.split('.', 1)[0] + if type_ in Monitoring.components: + ctx.image = Monitoring.components[type_]['image'] + if type_ == 'haproxy': + ctx.image = HAproxy.default_image + if type_ == 'keepalived': + ctx.image = Keepalived.default_image + if type_ == SNMPGateway.daemon_type: + ctx.image = SNMPGateway.default_image + if type_ == CephNvmeof.daemon_type: + ctx.image = CephNvmeof.default_image + if type_ in Tracing.components: + ctx.image = Tracing.components[type_]['image'] + if not ctx.image: + ctx.image = os.environ.get('CEPHADM_IMAGE') + if not ctx.image: + ctx.image = _get_default_image(ctx) + + +def executes_early(func: FuncT) -> FuncT: + """Decorator that indicates the command function is meant to have no + dependencies and no environmental requirements and can therefore be + executed as non-root and with no logging, etc. Commands that have this + decorator applied must be simple and self-contained. + """ + cast(Any, func)._execute_early = True + return func + + +def deprecated_command(func: FuncT) -> FuncT: + @wraps(func) + def _deprecated_command(ctx: CephadmContext) -> Any: + logger.warning(f'Deprecated command used: {func}') + if NO_DEPRECATED: + raise Error('running deprecated commands disabled') + return func(ctx) + + return cast(FuncT, _deprecated_command) + + +def get_container_info(ctx: CephadmContext, daemon_filter: str, by_name: bool) -> Optional[ContainerInfo]: + """ + :param ctx: Cephadm context + :param daemon_filter: daemon name or type + :param by_name: must be set to True if daemon name is provided + :return: Container information or None + """ + def daemon_name_or_type(daemon: Dict[str, str]) -> str: + return daemon['name'] if by_name else daemon['name'].split('.', 1)[0] + + if by_name and '.' not in daemon_filter: + logger.warning(f'Trying to get container info using invalid daemon name {daemon_filter}') + return None + daemons = list_daemons(ctx, detail=False) + matching_daemons = [d for d in daemons if daemon_name_or_type(d) == daemon_filter and d['fsid'] == ctx.fsid] + if matching_daemons: + d_type, d_id = matching_daemons[0]['name'].split('.', 1) + out, _, code = get_container_stats(ctx, ctx.container_engine.path, ctx.fsid, d_type, d_id) + if not code: + (container_id, image_name, image_id, start, version) = out.strip().split(',') + return ContainerInfo(container_id, image_name, image_id, start, version) + return None + + +def infer_local_ceph_image(ctx: CephadmContext, container_path: str) -> Optional[str]: + """ + Infer the local ceph image based on the following priority criteria: + 1- the image specified by --image arg (if provided). + 2- the same image as the daemon container specified by --name arg (if provided). + 3- image used by any ceph container running on the host. In this case we use daemon types. + 4- if no container is found then we use the most ceph recent image on the host. + + Note: any selected container must have the same fsid inferred previously. + + :return: The most recent local ceph image (already pulled) + """ + # '|' special character is used to separate the output fields into: + # - Repository@digest + # - Image Id + # - Image Tag + # - Image creation date + out, _, _ = call_throws(ctx, + [container_path, 'images', + '--filter', 'label=ceph=True', + '--filter', 'dangling=false', + '--format', '{{.Repository}}@{{.Digest}}|{{.ID}}|{{.Tag}}|{{.CreatedAt}}']) + + container_info = None + daemon_name = ctx.name if ('name' in ctx and ctx.name and '.' in ctx.name) else None + daemons_ls = [daemon_name] if daemon_name is not None else Ceph.daemons # daemon types: 'mon', 'mgr', etc + for daemon in daemons_ls: + container_info = get_container_info(ctx, daemon, daemon_name is not None) + if container_info is not None: + logger.debug(f"Using container info for daemon '{daemon}'") + break + + for image in out.splitlines(): + if image and not image.isspace(): + (digest, image_id, tag, created_date) = image.lstrip().split('|') + if container_info is not None and image_id not in container_info.image_id: + continue + if digest and not digest.endswith('@'): + logger.info(f"Using ceph image with id '{image_id}' and tag '{tag}' created on {created_date}\n{digest}") + return digest + return None + + +def write_tmp(s, uid, gid): + # type: (str, int, int) -> IO[str] + tmp_f = tempfile.NamedTemporaryFile(mode='w', + prefix='ceph-tmp') + os.fchown(tmp_f.fileno(), uid, gid) + tmp_f.write(s) + tmp_f.flush() + + return tmp_f + + +def makedirs(dir, uid, gid, mode): + # type: (str, int, int, int) -> None + if not os.path.exists(dir): + os.makedirs(dir, mode=mode) + else: + os.chmod(dir, mode) + os.chown(dir, uid, gid) + os.chmod(dir, mode) # the above is masked by umask... + + +def get_data_dir(fsid, data_dir, t, n): + # type: (str, str, str, Union[int, str]) -> str + return os.path.join(data_dir, fsid, '%s.%s' % (t, n)) + + +def get_log_dir(fsid, log_dir): + # type: (str, str) -> str + return os.path.join(log_dir, fsid) + + +def make_data_dir_base(fsid, data_dir, uid, gid): + # type: (str, str, int, int) -> str + data_dir_base = os.path.join(data_dir, fsid) + makedirs(data_dir_base, uid, gid, DATA_DIR_MODE) + makedirs(os.path.join(data_dir_base, 'crash'), uid, gid, DATA_DIR_MODE) + makedirs(os.path.join(data_dir_base, 'crash', 'posted'), uid, gid, + DATA_DIR_MODE) + return data_dir_base + + +def make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=None, gid=None): + # type: (CephadmContext, str, str, Union[int, str], Optional[int], Optional[int]) -> str + if uid is None or gid is None: + uid, gid = extract_uid_gid(ctx) + make_data_dir_base(fsid, ctx.data_dir, uid, gid) + data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + makedirs(data_dir, uid, gid, DATA_DIR_MODE) + return data_dir + + +def make_log_dir(ctx, fsid, uid=None, gid=None): + # type: (CephadmContext, str, Optional[int], Optional[int]) -> str + if uid is None or gid is None: + uid, gid = extract_uid_gid(ctx) + log_dir = get_log_dir(fsid, ctx.log_dir) + makedirs(log_dir, uid, gid, LOG_DIR_MODE) + return log_dir + + +def make_var_run(ctx, fsid, uid, gid): + # type: (CephadmContext, str, int, int) -> None + call_throws(ctx, ['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid), + '/var/run/ceph/%s' % fsid]) + + +def copy_tree(ctx, src, dst, uid=None, gid=None): + # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None + """ + Copy a directory tree from src to dst + """ + if uid is None or gid is None: + (uid, gid) = extract_uid_gid(ctx) + + for src_dir in src: + dst_dir = dst + if os.path.isdir(dst): + dst_dir = os.path.join(dst, os.path.basename(src_dir)) + + logger.debug('copy directory `%s` -> `%s`' % (src_dir, dst_dir)) + shutil.rmtree(dst_dir, ignore_errors=True) + shutil.copytree(src_dir, dst_dir) # dirs_exist_ok needs python 3.8 + + for dirpath, dirnames, filenames in os.walk(dst_dir): + logger.debug('chown %s:%s `%s`' % (uid, gid, dirpath)) + os.chown(dirpath, uid, gid) + for filename in filenames: + logger.debug('chown %s:%s `%s`' % (uid, gid, filename)) + os.chown(os.path.join(dirpath, filename), uid, gid) + + +def copy_files(ctx, src, dst, uid=None, gid=None): + # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None + """ + Copy a files from src to dst + """ + if uid is None or gid is None: + (uid, gid) = extract_uid_gid(ctx) + + for src_file in src: + dst_file = dst + if os.path.isdir(dst): + dst_file = os.path.join(dst, os.path.basename(src_file)) + + logger.debug('copy file `%s` -> `%s`' % (src_file, dst_file)) + shutil.copyfile(src_file, dst_file) + + logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file)) + os.chown(dst_file, uid, gid) + + +def move_files(ctx, src, dst, uid=None, gid=None): + # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None + """ + Move files from src to dst + """ + if uid is None or gid is None: + (uid, gid) = extract_uid_gid(ctx) + + for src_file in src: + dst_file = dst + if os.path.isdir(dst): + dst_file = os.path.join(dst, os.path.basename(src_file)) + + if os.path.islink(src_file): + # shutil.move() in py2 does not handle symlinks correctly + src_rl = os.readlink(src_file) + logger.debug("symlink '%s' -> '%s'" % (dst_file, src_rl)) + os.symlink(src_rl, dst_file) + os.unlink(src_file) + else: + logger.debug("move file '%s' -> '%s'" % (src_file, dst_file)) + shutil.move(src_file, dst_file) + logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file)) + os.chown(dst_file, uid, gid) + + +def recursive_chown(path: str, uid: int, gid: int) -> None: + for dirpath, dirnames, filenames in os.walk(path): + os.chown(dirpath, uid, gid) + for filename in filenames: + os.chown(os.path.join(dirpath, filename), uid, gid) + + +# copied from distutils +def find_executable(executable: str, path: Optional[str] = None) -> Optional[str]: + """Tries to find 'executable' in the directories listed in 'path'. + A string listing directories separated by 'os.pathsep'; defaults to + os.environ['PATH']. Returns the complete filename or None if not found. + """ + _, ext = os.path.splitext(executable) + if (sys.platform == 'win32') and (ext != '.exe'): + executable = executable + '.exe' # pragma: no cover + + if os.path.isfile(executable): + return executable + + if path is None: + path = os.environ.get('PATH', None) + if path is None: + try: + path = os.confstr('CS_PATH') + except (AttributeError, ValueError): + # os.confstr() or CS_PATH is not available + path = os.defpath + # bpo-35755: Don't use os.defpath if the PATH environment variable is + # set to an empty string + + # PATH='' doesn't match, whereas PATH=':' looks in the current directory + if not path: + return None + + paths = path.split(os.pathsep) + for p in paths: + f = os.path.join(p, executable) + if os.path.isfile(f): + # the file exists, we have a shot at spawn working + return f + return None + + +def find_program(filename): + # type: (str) -> str + name = find_executable(filename) + if name is None: + raise ValueError('%s not found' % filename) + return name + + +def find_container_engine(ctx: CephadmContext) -> Optional[ContainerEngine]: + if ctx.docker: + return Docker() + else: + for i in CONTAINER_PREFERENCE: + try: + return i() + except Exception: + pass + return None + + +def check_container_engine(ctx: CephadmContext) -> ContainerEngine: + engine = ctx.container_engine + if not isinstance(engine, CONTAINER_PREFERENCE): + # See https://github.com/python/mypy/issues/8993 + exes: List[str] = [i.EXE for i in CONTAINER_PREFERENCE] # type: ignore + raise Error('No container engine binary found ({}). Try run `apt/dnf/yum/zypper install <container engine>`'.format(' or '.join(exes))) + elif isinstance(engine, Podman): + engine.get_version(ctx) + if engine.version < MIN_PODMAN_VERSION: + raise Error('podman version %d.%d.%d or later is required' % MIN_PODMAN_VERSION) + return engine + + +def get_unit_name(fsid, daemon_type, daemon_id=None): + # type: (str, str, Optional[Union[int, str]]) -> str + # accept either name or type + id + if daemon_id is not None: + return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id) + else: + return 'ceph-%s@%s' % (fsid, daemon_type) + + +def get_unit_name_by_daemon_name(ctx: CephadmContext, fsid: str, name: str) -> str: + daemon = get_daemon_description(ctx, fsid, name) + try: + return daemon['systemd_unit'] + except KeyError: + raise Error('Failed to get unit name for {}'.format(daemon)) + + +def check_unit(ctx, unit_name): + # type: (CephadmContext, str) -> Tuple[bool, str, bool] + # NOTE: we ignore the exit code here because systemctl outputs + # various exit codes based on the state of the service, but the + # string result is more explicit (and sufficient). + enabled = False + installed = False + try: + out, err, code = call(ctx, ['systemctl', 'is-enabled', unit_name], + verbosity=CallVerbosity.QUIET) + if code == 0: + enabled = True + installed = True + elif 'disabled' in out: + installed = True + except Exception as e: + logger.warning('unable to run systemctl: %s' % e) + enabled = False + installed = False + + state = 'unknown' + try: + out, err, code = call(ctx, ['systemctl', 'is-active', unit_name], + verbosity=CallVerbosity.QUIET) + out = out.strip() + if out in ['active']: + state = 'running' + elif out in ['inactive']: + state = 'stopped' + elif out in ['failed', 'auto-restart']: + state = 'error' + else: + state = 'unknown' + except Exception as e: + logger.warning('unable to run systemctl: %s' % e) + state = 'unknown' + return (enabled, state, installed) + + +def check_units(ctx, units, enabler=None): + # type: (CephadmContext, List[str], Optional[Packager]) -> bool + for u in units: + (enabled, state, installed) = check_unit(ctx, u) + if enabled and state == 'running': + logger.info('Unit %s is enabled and running' % u) + return True + if enabler is not None: + if installed: + logger.info('Enabling unit %s' % u) + enabler.enable_service(u) + return False + + +def is_container_running(ctx: CephadmContext, c: 'CephContainer') -> bool: + if ctx.name.split('.', 1)[0] in ['agent', 'cephadm-exporter']: + # these are non-containerized daemon types + return False + return bool(get_running_container_name(ctx, c)) + + +def get_running_container_name(ctx: CephadmContext, c: 'CephContainer') -> Optional[str]: + for name in [c.cname, c.old_cname]: + out, err, ret = call(ctx, [ + ctx.container_engine.path, 'container', 'inspect', + '--format', '{{.State.Status}}', name + ]) + if out.strip() == 'running': + return name + return None + + +def get_legacy_config_fsid(cluster, legacy_dir=None): + # type: (str, Optional[str]) -> Optional[str] + config_file = '/etc/ceph/%s.conf' % cluster + if legacy_dir is not None: + config_file = os.path.abspath(legacy_dir + config_file) + + if os.path.exists(config_file): + config = read_config(config_file) + if config.has_section('global') and config.has_option('global', 'fsid'): + return config.get('global', 'fsid') + return None + + +def get_legacy_daemon_fsid(ctx, cluster, + daemon_type, daemon_id, legacy_dir=None): + # type: (CephadmContext, str, str, Union[int, str], Optional[str]) -> Optional[str] + fsid = None + if daemon_type == 'osd': + try: + fsid_file = os.path.join(ctx.data_dir, + daemon_type, + 'ceph-%s' % daemon_id, + 'ceph_fsid') + if legacy_dir is not None: + fsid_file = os.path.abspath(legacy_dir + fsid_file) + with open(fsid_file, 'r') as f: + fsid = f.read().strip() + except IOError: + pass + if not fsid: + fsid = get_legacy_config_fsid(cluster, legacy_dir=legacy_dir) + return fsid + + +def should_log_to_journald(ctx: CephadmContext) -> bool: + if ctx.log_to_journald is not None: + return ctx.log_to_journald + return isinstance(ctx.container_engine, Podman) and \ + ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION + + +def get_daemon_args(ctx, fsid, daemon_type, daemon_id): + # type: (CephadmContext, str, str, Union[int, str]) -> List[str] + r = list() # type: List[str] + + if daemon_type in Ceph.daemons and daemon_type not in ['crash', 'ceph-exporter']: + r += [ + '--setuser', 'ceph', + '--setgroup', 'ceph', + '--default-log-to-file=false', + ] + log_to_journald = should_log_to_journald(ctx) + if log_to_journald: + r += [ + '--default-log-to-journald=true', + '--default-log-to-stderr=false', + ] + else: + r += [ + '--default-log-to-stderr=true', + '--default-log-stderr-prefix=debug ', + ] + if daemon_type == 'mon': + r += [ + '--default-mon-cluster-log-to-file=false', + ] + if log_to_journald: + r += [ + '--default-mon-cluster-log-to-journald=true', + '--default-mon-cluster-log-to-stderr=false', + ] + else: + r += ['--default-mon-cluster-log-to-stderr=true'] + elif daemon_type in Monitoring.components: + metadata = Monitoring.components[daemon_type] + r += metadata.get('args', list()) + # set ip and port to bind to for nodeexporter,alertmanager,prometheus + if daemon_type not in ['grafana', 'loki', 'promtail']: + ip = '' + port = Monitoring.port_map[daemon_type][0] + meta = fetch_meta(ctx) + if meta: + if 'ip' in meta and meta['ip']: + ip = meta['ip'] + if 'ports' in meta and meta['ports']: + port = meta['ports'][0] + r += [f'--web.listen-address={ip}:{port}'] + if daemon_type == 'prometheus': + config = fetch_configs(ctx) + retention_time = config.get('retention_time', '15d') + retention_size = config.get('retention_size', '0') # default to disabled + r += [f'--storage.tsdb.retention.time={retention_time}'] + r += [f'--storage.tsdb.retention.size={retention_size}'] + scheme = 'http' + host = get_fqdn() + # in case host is not an fqdn then we use the IP to + # avoid producing a broken web.external-url link + if '.' not in host: + ipv4_addrs, ipv6_addrs = get_ip_addresses(get_hostname()) + # use the first ipv4 (if any) otherwise use the first ipv6 + addr = next(iter(ipv4_addrs or ipv6_addrs), None) + host = wrap_ipv6(addr) if addr else host + r += [f'--web.external-url={scheme}://{host}:{port}'] + if daemon_type == 'alertmanager': + config = fetch_configs(ctx) + peers = config.get('peers', list()) # type: ignore + for peer in peers: + r += ['--cluster.peer={}'.format(peer)] + try: + r += [f'--web.config.file={config["web_config"]}'] + except KeyError: + pass + # some alertmanager, by default, look elsewhere for a config + r += ['--config.file=/etc/alertmanager/alertmanager.yml'] + if daemon_type == 'promtail': + r += ['--config.expand-env'] + if daemon_type == 'prometheus': + config = fetch_configs(ctx) + try: + r += [f'--web.config.file={config["web_config"]}'] + except KeyError: + pass + if daemon_type == 'node-exporter': + config = fetch_configs(ctx) + try: + r += [f'--web.config.file={config["web_config"]}'] + except KeyError: + pass + r += ['--path.procfs=/host/proc', + '--path.sysfs=/host/sys', + '--path.rootfs=/rootfs'] + elif daemon_type == 'jaeger-agent': + r.extend(Tracing.components[daemon_type]['daemon_args']) + elif daemon_type == NFSGanesha.daemon_type: + nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) + r += nfs_ganesha.get_daemon_args() + elif daemon_type == CephExporter.daemon_type: + ceph_exporter = CephExporter.init(ctx, fsid, daemon_id) + r.extend(ceph_exporter.get_daemon_args()) + elif daemon_type == HAproxy.daemon_type: + haproxy = HAproxy.init(ctx, fsid, daemon_id) + r += haproxy.get_daemon_args() + elif daemon_type == CustomContainer.daemon_type: + cc = CustomContainer.init(ctx, fsid, daemon_id) + r.extend(cc.get_daemon_args()) + elif daemon_type == SNMPGateway.daemon_type: + sc = SNMPGateway.init(ctx, fsid, daemon_id) + r.extend(sc.get_daemon_args()) + + return r + + +def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid, + config=None, keyring=None): + # type: (CephadmContext, str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None + data_dir = make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=uid, gid=gid) + + if daemon_type in Ceph.daemons: + make_log_dir(ctx, fsid, uid=uid, gid=gid) + + if config: + config_path = os.path.join(data_dir, 'config') + with write_new(config_path, owner=(uid, gid)) as f: + f.write(config) + + if keyring: + keyring_path = os.path.join(data_dir, 'keyring') + with write_new(keyring_path, owner=(uid, gid)) as f: + f.write(keyring) + + if daemon_type in Monitoring.components.keys(): + config_json = fetch_configs(ctx) + + # Set up directories specific to the monitoring component + config_dir = '' + data_dir_root = '' + if daemon_type == 'prometheus': + data_dir_root = get_data_dir(fsid, ctx.data_dir, + daemon_type, daemon_id) + config_dir = 'etc/prometheus' + makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) + makedirs(os.path.join(data_dir_root, config_dir, 'alerting'), uid, gid, 0o755) + makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755) + recursive_chown(os.path.join(data_dir_root, 'etc'), uid, gid) + recursive_chown(os.path.join(data_dir_root, 'data'), uid, gid) + elif daemon_type == 'grafana': + data_dir_root = get_data_dir(fsid, ctx.data_dir, + daemon_type, daemon_id) + config_dir = 'etc/grafana' + makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) + makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755) + makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755) + makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755) + touch(os.path.join(data_dir_root, 'data', 'grafana.db'), uid, gid) + elif daemon_type == 'alertmanager': + data_dir_root = get_data_dir(fsid, ctx.data_dir, + daemon_type, daemon_id) + config_dir = 'etc/alertmanager' + makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) + makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755) + elif daemon_type == 'promtail': + data_dir_root = get_data_dir(fsid, ctx.data_dir, + daemon_type, daemon_id) + config_dir = 'etc/promtail' + makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) + makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755) + elif daemon_type == 'loki': + data_dir_root = get_data_dir(fsid, ctx.data_dir, + daemon_type, daemon_id) + config_dir = 'etc/loki' + makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) + makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755) + elif daemon_type == 'node-exporter': + data_dir_root = get_data_dir(fsid, ctx.data_dir, + daemon_type, daemon_id) + config_dir = 'etc/node-exporter' + makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) + recursive_chown(os.path.join(data_dir_root, 'etc'), uid, gid) + + # populate the config directory for the component from the config-json + if 'files' in config_json: + for fname in config_json['files']: + # work around mypy wierdness where it thinks `str`s aren't Anys + # when used for dictionary values! feels like possibly a mypy bug?! + cfg = cast(Dict[str, Any], config_json['files']) + content = dict_get_join(cfg, fname) + if os.path.isabs(fname): + fpath = os.path.join(data_dir_root, fname.lstrip(os.path.sep)) + else: + fpath = os.path.join(data_dir_root, config_dir, fname) + with write_new(fpath, owner=(uid, gid), encoding='utf-8') as f: + f.write(content) + + elif daemon_type == NFSGanesha.daemon_type: + nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) + nfs_ganesha.create_daemon_dirs(data_dir, uid, gid) + + elif daemon_type == CephIscsi.daemon_type: + ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id) + ceph_iscsi.create_daemon_dirs(data_dir, uid, gid) + + elif daemon_type == CephNvmeof.daemon_type: + ceph_nvmeof = CephNvmeof.init(ctx, fsid, daemon_id) + ceph_nvmeof.create_daemon_dirs(data_dir, uid, gid) + + elif daemon_type == HAproxy.daemon_type: + haproxy = HAproxy.init(ctx, fsid, daemon_id) + haproxy.create_daemon_dirs(data_dir, uid, gid) + + elif daemon_type == Keepalived.daemon_type: + keepalived = Keepalived.init(ctx, fsid, daemon_id) + keepalived.create_daemon_dirs(data_dir, uid, gid) + + elif daemon_type == CustomContainer.daemon_type: + cc = CustomContainer.init(ctx, fsid, daemon_id) + cc.create_daemon_dirs(data_dir, uid, gid) + + elif daemon_type == SNMPGateway.daemon_type: + sg = SNMPGateway.init(ctx, fsid, daemon_id) + sg.create_daemon_conf() + + _write_custom_conf_files(ctx, daemon_type, str(daemon_id), fsid, uid, gid) + + +def _write_custom_conf_files(ctx: CephadmContext, daemon_type: str, daemon_id: str, fsid: str, uid: int, gid: int) -> None: + # mostly making this its own function to make unit testing easier + ccfiles = fetch_custom_config_files(ctx) + if not ccfiles: + return + custom_config_dir = os.path.join(ctx.data_dir, fsid, 'custom_config_files', f'{daemon_type}.{daemon_id}') + if not os.path.exists(custom_config_dir): + makedirs(custom_config_dir, uid, gid, 0o755) + mandatory_keys = ['mount_path', 'content'] + for ccf in ccfiles: + if all(k in ccf for k in mandatory_keys): + file_path = os.path.join(custom_config_dir, os.path.basename(ccf['mount_path'])) + with write_new(file_path, owner=(uid, gid), encoding='utf-8') as f: + f.write(ccf['content']) + # temporary workaround to make custom config files work for tcmu-runner + # container we deploy with iscsi until iscsi is refactored + if daemon_type == 'iscsi': + tcmu_config_dir = custom_config_dir + '.tcmu' + if not os.path.exists(tcmu_config_dir): + makedirs(tcmu_config_dir, uid, gid, 0o755) + tcmu_file_path = os.path.join(tcmu_config_dir, os.path.basename(ccf['mount_path'])) + with write_new(tcmu_file_path, owner=(uid, gid), encoding='utf-8') as f: + f.write(ccf['content']) + + +def get_parm(option: str) -> Dict[str, str]: + js = _get_config_json(option) + # custom_config_files is a special field that may be in the config + # dict. It is used for mounting custom config files into daemon's containers + # and should be accessed through the "fetch_custom_config_files" function. + # For get_parm we need to discard it. + js.pop('custom_config_files', None) + return js + + +def _get_config_json(option: str) -> Dict[str, Any]: + if not option: + return dict() + + global cached_stdin + if option == '-': + if cached_stdin is not None: + j = cached_stdin + else: + j = sys.stdin.read() + cached_stdin = j + else: + # inline json string + if option[0] == '{' and option[-1] == '}': + j = option + # json file + elif os.path.exists(option): + with open(option, 'r') as f: + j = f.read() + else: + raise Error('Config file {} not found'.format(option)) + + try: + js = json.loads(j) + except ValueError as e: + raise Error('Invalid JSON in {}: {}'.format(option, e)) + else: + return js + + +def fetch_meta(ctx: CephadmContext) -> Dict[str, Any]: + """Return a dict containing metadata about a deployment. + """ + meta = getattr(ctx, 'meta_properties', None) + if meta is not None: + return meta + mjson = getattr(ctx, 'meta_json', None) + if mjson is not None: + meta = json.loads(mjson) or {} + ctx.meta_properties = meta + return meta + return {} + + +def fetch_configs(ctx: CephadmContext) -> Dict[str, str]: + """Return a dict containing arbitrary configuration parameters. + This function filters out the key 'custom_config_files' which + must not be part of a deployment's configuration key-value pairs. + To access custom configuration file data, use `fetch_custom_config_files`. + """ + # ctx.config_blobs is *always* a dict. it is created once when + # a command is parsed/processed and stored "forever" + cfg_blobs = getattr(ctx, 'config_blobs', None) + if cfg_blobs: + cfg_blobs = dict(cfg_blobs) + cfg_blobs.pop('custom_config_files', None) + return cfg_blobs + # ctx.config_json is the legacy equivalent of config_blobs. it is a + # string that either contains json or refers to a file name where + # the file contains json. + cfg_json = getattr(ctx, 'config_json', None) + if cfg_json: + jdata = _get_config_json(cfg_json) or {} + jdata.pop('custom_config_files', None) + return jdata + return {} + + +def fetch_custom_config_files(ctx: CephadmContext) -> List[Dict[str, Any]]: + """Return a list containing dicts that can be used to populate + custom configuration files for containers. + """ + # NOTE: this function works like the opposite of fetch_configs. + # instead of filtering out custom_config_files, it returns only + # the content in that key. + cfg_blobs = getattr(ctx, 'config_blobs', None) + if cfg_blobs: + return cfg_blobs.get('custom_config_files', []) + cfg_json = getattr(ctx, 'config_json', None) + if cfg_json: + jdata = _get_config_json(cfg_json) + return jdata.get('custom_config_files', []) + return [] + + +def fetch_tcp_ports(ctx: CephadmContext) -> List[EndPoint]: + """Return a list of Endpoints, which have a port and ip attribute + """ + ports = getattr(ctx, 'tcp_ports', None) + if ports is None: + ports = [] + if isinstance(ports, str): + ports = list(map(int, ports.split())) + port_ips: Dict[str, str] = {} + port_ips_attr: Union[str, Dict[str, str], None] = getattr(ctx, 'port_ips', None) + if isinstance(port_ips_attr, str): + port_ips = json.loads(port_ips_attr) + elif port_ips_attr is not None: + # if it's not None or a str, assume it's already the dict we want + port_ips = port_ips_attr + + endpoints: List[EndPoint] = [] + for port in ports: + if str(port) in port_ips: + endpoints.append(EndPoint(port_ips[str(port)], port)) + else: + endpoints.append(EndPoint('0.0.0.0', port)) + + return endpoints + + +def get_config_and_keyring(ctx): + # type: (CephadmContext) -> Tuple[Optional[str], Optional[str]] + config = None + keyring = None + + d = fetch_configs(ctx) + if d: + config = d.get('config') + keyring = d.get('keyring') + if config and keyring: + return config, keyring + + if 'config' in ctx and ctx.config: + try: + with open(ctx.config, 'r') as f: + config = f.read() + except FileNotFoundError as e: + raise Error(e) + + if 'key' in ctx and ctx.key: + keyring = '[%s]\n\tkey = %s\n' % (ctx.name, ctx.key) + elif 'keyring' in ctx and ctx.keyring: + try: + with open(ctx.keyring, 'r') as f: + keyring = f.read() + except FileNotFoundError as e: + raise Error(e) + + return config, keyring + + +def get_container_binds(ctx, fsid, daemon_type, daemon_id): + # type: (CephadmContext, str, str, Union[int, str, None]) -> List[List[str]] + binds = list() + + if daemon_type == CephIscsi.daemon_type: + binds.extend(CephIscsi.get_container_binds()) + if daemon_type == CephNvmeof.daemon_type: + binds.extend(CephNvmeof.get_container_binds()) + elif daemon_type == CustomContainer.daemon_type: + assert daemon_id + cc = CustomContainer.init(ctx, fsid, daemon_id) + data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + binds.extend(cc.get_container_binds(data_dir)) + + return binds + + +def get_container_mounts(ctx, fsid, daemon_type, daemon_id, + no_config=False): + # type: (CephadmContext, str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str] + mounts = dict() + + if daemon_type in Ceph.daemons: + if fsid: + run_path = os.path.join('/var/run/ceph', fsid) + if os.path.exists(run_path): + mounts[run_path] = '/var/run/ceph:z' + log_dir = get_log_dir(fsid, ctx.log_dir) + mounts[log_dir] = '/var/log/ceph:z' + crash_dir = '/var/lib/ceph/%s/crash' % fsid + if os.path.exists(crash_dir): + mounts[crash_dir] = '/var/lib/ceph/crash:z' + if daemon_type != 'crash' and should_log_to_journald(ctx): + journald_sock_dir = '/run/systemd/journal' + mounts[journald_sock_dir] = journald_sock_dir + + if daemon_type in Ceph.daemons and daemon_id: + data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + if daemon_type == 'rgw': + cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id) + else: + cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id) + if daemon_type != 'crash': + mounts[data_dir] = cdata_dir + ':z' + if not no_config: + mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z' + if daemon_type in ['rbd-mirror', 'cephfs-mirror', 'crash', 'ceph-exporter']: + # these do not search for their keyrings in a data directory + mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id) + + if daemon_type in ['mon', 'osd', 'clusterless-ceph-volume']: + mounts['/dev'] = '/dev' # FIXME: narrow this down? + mounts['/run/udev'] = '/run/udev' + if daemon_type in ['osd', 'clusterless-ceph-volume']: + mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ... + mounts['/run/lvm'] = '/run/lvm' + mounts['/run/lock/lvm'] = '/run/lock/lvm' + if daemon_type == 'osd': + # selinux-policy in the container may not match the host. + if HostFacts(ctx).selinux_enabled: + cluster_dir = f'{ctx.data_dir}/{fsid}' + selinux_folder = f'{cluster_dir}/selinux' + if os.path.exists(cluster_dir): + if not os.path.exists(selinux_folder): + os.makedirs(selinux_folder, mode=0o755) + mounts[selinux_folder] = '/sys/fs/selinux:ro' + else: + logger.error(f'Cluster direcotry {cluster_dir} does not exist.') + mounts['/'] = '/rootfs' + + try: + if ctx.shared_ceph_folder: # make easy manager modules/ceph-volume development + ceph_folder = pathify(ctx.shared_ceph_folder) + if os.path.exists(ceph_folder): + mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume' + mounts[ceph_folder + '/src/cephadm/cephadm.py'] = '/usr/sbin/cephadm' + mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr' + mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph' + mounts[ceph_folder + '/monitoring/ceph-mixin/dashboards_out'] = '/etc/grafana/dashboards/ceph-dashboard' + mounts[ceph_folder + '/monitoring/ceph-mixin/prometheus_alerts.yml'] = '/etc/prometheus/ceph/ceph_default_alerts.yml' + else: + logger.error('{}{}{}'.format(termcolor.red, + 'Ceph shared source folder does not exist.', + termcolor.end)) + except AttributeError: + pass + + if daemon_type in Monitoring.components and daemon_id: + data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + log_dir = get_log_dir(fsid, ctx.log_dir) + if daemon_type == 'prometheus': + mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z' + mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z' + elif daemon_type == 'loki': + mounts[os.path.join(data_dir, 'etc/loki')] = '/etc/loki:Z' + mounts[os.path.join(data_dir, 'data')] = '/loki:Z' + elif daemon_type == 'promtail': + mounts[os.path.join(data_dir, 'etc/promtail')] = '/etc/promtail:Z' + mounts[log_dir] = '/var/log/ceph:z' + mounts[os.path.join(data_dir, 'data')] = '/promtail:Z' + elif daemon_type == 'node-exporter': + mounts[os.path.join(data_dir, 'etc/node-exporter')] = '/etc/node-exporter:Z' + mounts['/proc'] = '/host/proc:ro' + mounts['/sys'] = '/host/sys:ro' + mounts['/'] = '/rootfs:ro' + elif daemon_type == 'grafana': + mounts[os.path.join(data_dir, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z' + mounts[os.path.join(data_dir, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z' + mounts[os.path.join(data_dir, 'etc/grafana/certs')] = '/etc/grafana/certs:Z' + mounts[os.path.join(data_dir, 'data/grafana.db')] = '/var/lib/grafana/grafana.db:Z' + elif daemon_type == 'alertmanager': + mounts[os.path.join(data_dir, 'etc/alertmanager')] = '/etc/alertmanager:Z' + + if daemon_type == NFSGanesha.daemon_type: + assert daemon_id + data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) + mounts.update(nfs_ganesha.get_container_mounts(data_dir)) + + if daemon_type == HAproxy.daemon_type: + assert daemon_id + data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + mounts.update(HAproxy.get_container_mounts(data_dir)) + + if daemon_type == CephNvmeof.daemon_type: + assert daemon_id + data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + mounts.update(CephNvmeof.get_container_mounts(data_dir)) + + if daemon_type == CephIscsi.daemon_type: + assert daemon_id + data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + # Removes ending ".tcmu" from data_dir a tcmu-runner uses the same data_dir + # as rbd-runner-api + if data_dir.endswith('.tcmu'): + data_dir = re.sub(r'\.tcmu$', '', data_dir) + log_dir = get_log_dir(fsid, ctx.log_dir) + mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir)) + + if daemon_type == Keepalived.daemon_type: + assert daemon_id + data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + mounts.update(Keepalived.get_container_mounts(data_dir)) + + if daemon_type == CustomContainer.daemon_type: + assert daemon_id + cc = CustomContainer.init(ctx, fsid, daemon_id) + data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + mounts.update(cc.get_container_mounts(data_dir)) + + # Modifications podman makes to /etc/hosts causes issues with + # certain daemons (specifically referencing "host.containers.internal" entry + # being added to /etc/hosts in this case). To avoid that, but still + # allow users to use /etc/hosts for hostname resolution, we can + # mount the host's /etc/hosts file. + # https://tracker.ceph.com/issues/58532 + # https://tracker.ceph.com/issues/57018 + if isinstance(ctx.container_engine, Podman): + if os.path.exists('/etc/hosts'): + if '/etc/hosts' not in mounts: + mounts['/etc/hosts'] = '/etc/hosts:ro' + + return mounts + + +def get_ceph_volume_container(ctx: CephadmContext, + privileged: bool = True, + cname: str = '', + volume_mounts: Dict[str, str] = {}, + bind_mounts: Optional[List[List[str]]] = None, + args: List[str] = [], + envs: Optional[List[str]] = None) -> 'CephContainer': + if envs is None: + envs = [] + envs.append('CEPH_VOLUME_SKIP_RESTORECON=yes') + envs.append('CEPH_VOLUME_DEBUG=1') + + return CephContainer( + ctx, + image=ctx.image, + entrypoint='/usr/sbin/ceph-volume', + args=args, + volume_mounts=volume_mounts, + bind_mounts=bind_mounts, + envs=envs, + privileged=privileged, + cname=cname, + memory_request=ctx.memory_request, + memory_limit=ctx.memory_limit, + ) + + +def set_pids_limit_unlimited(ctx: CephadmContext, container_args: List[str]) -> None: + # set container's pids-limit to unlimited rather than default (Docker 4096 / Podman 2048) + # Useful for daemons like iscsi where the default pids-limit limits the number of luns + # per iscsi target or rgw where increasing the rgw_thread_pool_size to a value near + # the default pids-limit may cause the container to crash. + if ( + isinstance(ctx.container_engine, Podman) + and ctx.container_engine.version >= PIDS_LIMIT_UNLIMITED_PODMAN_VERSION + ): + container_args.append('--pids-limit=-1') + else: + container_args.append('--pids-limit=0') + + +def get_container(ctx: CephadmContext, + fsid: str, daemon_type: str, daemon_id: Union[int, str], + privileged: bool = False, + ptrace: bool = False, + container_args: Optional[List[str]] = None) -> 'CephContainer': + entrypoint: str = '' + name: str = '' + ceph_args: List[str] = [] + envs: List[str] = [] + host_network: bool = True + + if daemon_type in Ceph.daemons: + envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728') + if container_args is None: + container_args = [] + if daemon_type in Ceph.daemons or daemon_type in Ceph.gateways: + set_pids_limit_unlimited(ctx, container_args) + if daemon_type in ['mon', 'osd']: + # mon and osd need privileged in order for libudev to query devices + privileged = True + if daemon_type == 'rgw': + entrypoint = '/usr/bin/radosgw' + name = 'client.rgw.%s' % daemon_id + elif daemon_type == 'rbd-mirror': + entrypoint = '/usr/bin/rbd-mirror' + name = 'client.rbd-mirror.%s' % daemon_id + elif daemon_type == 'cephfs-mirror': + entrypoint = '/usr/bin/cephfs-mirror' + name = 'client.cephfs-mirror.%s' % daemon_id + elif daemon_type == 'crash': + entrypoint = '/usr/bin/ceph-crash' + name = 'client.crash.%s' % daemon_id + elif daemon_type in ['mon', 'mgr', 'mds', 'osd']: + entrypoint = '/usr/bin/ceph-' + daemon_type + name = '%s.%s' % (daemon_type, daemon_id) + elif daemon_type in Monitoring.components: + entrypoint = '' + elif daemon_type in Tracing.components: + entrypoint = '' + name = '%s.%s' % (daemon_type, daemon_id) + config = fetch_configs(ctx) + Tracing.set_configuration(config, daemon_type) + envs.extend(Tracing.components[daemon_type].get('envs', [])) + elif daemon_type == NFSGanesha.daemon_type: + entrypoint = NFSGanesha.entrypoint + name = '%s.%s' % (daemon_type, daemon_id) + envs.extend(NFSGanesha.get_container_envs()) + elif daemon_type == CephExporter.daemon_type: + entrypoint = CephExporter.entrypoint + name = 'client.ceph-exporter.%s' % daemon_id + elif daemon_type == HAproxy.daemon_type: + name = '%s.%s' % (daemon_type, daemon_id) + container_args.extend(['--user=root']) # haproxy 2.4 defaults to a different user + elif daemon_type == Keepalived.daemon_type: + name = '%s.%s' % (daemon_type, daemon_id) + envs.extend(Keepalived.get_container_envs()) + container_args.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW']) + elif daemon_type == CephNvmeof.daemon_type: + name = '%s.%s' % (daemon_type, daemon_id) + container_args.extend(['--ulimit', 'memlock=-1:-1']) + container_args.extend(['--ulimit', 'nofile=10240']) + container_args.extend(['--cap-add=SYS_ADMIN', '--cap-add=CAP_SYS_NICE']) + elif daemon_type == CephIscsi.daemon_type: + entrypoint = CephIscsi.entrypoint + name = '%s.%s' % (daemon_type, daemon_id) + # So the container can modprobe iscsi_target_mod and have write perms + # to configfs we need to make this a privileged container. + privileged = True + elif daemon_type == CustomContainer.daemon_type: + cc = CustomContainer.init(ctx, fsid, daemon_id) + entrypoint = cc.entrypoint + host_network = False + envs.extend(cc.get_container_envs()) + container_args.extend(cc.get_container_args()) + + if daemon_type in Monitoring.components: + uid, gid = extract_uid_gid_monitoring(ctx, daemon_type) + monitoring_args = [ + '--user', + str(uid), + # FIXME: disable cpu/memory limits for the time being (not supported + # by ubuntu 18.04 kernel!) + ] + container_args.extend(monitoring_args) + if daemon_type == 'node-exporter': + # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys', + # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation + # between the node-exporter container and the host to avoid selinux denials + container_args.extend(['--security-opt', 'label=disable']) + elif daemon_type == 'crash': + ceph_args = ['-n', name] + elif daemon_type in Ceph.daemons: + ceph_args = ['-n', name, '-f'] + elif daemon_type == SNMPGateway.daemon_type: + sg = SNMPGateway.init(ctx, fsid, daemon_id) + container_args.append( + f'--env-file={sg.conf_file_path}' + ) + + # if using podman, set -d, --conmon-pidfile & --cidfile flags + # so service can have Type=Forking + if isinstance(ctx.container_engine, Podman): + runtime_dir = '/run' + container_args.extend([ + '-d', '--log-driver', 'journald', + '--conmon-pidfile', + runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, daemon_id), + '--cidfile', + runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, daemon_id), + ]) + if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION and not ctx.no_cgroups_split: + container_args.append('--cgroups=split') + # if /etc/hosts doesn't exist, we can be confident + # users aren't using it for host name resolution + # and adding --no-hosts avoids bugs created in certain daemons + # by modifications podman makes to /etc/hosts + # https://tracker.ceph.com/issues/58532 + # https://tracker.ceph.com/issues/57018 + if not os.path.exists('/etc/hosts'): + container_args.extend(['--no-hosts']) + + return CephContainer.for_daemon( + ctx, + fsid=fsid, + daemon_type=daemon_type, + daemon_id=str(daemon_id), + entrypoint=entrypoint, + args=ceph_args + get_daemon_args(ctx, fsid, daemon_type, daemon_id), + container_args=container_args, + volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), + bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), + envs=envs, + privileged=privileged, + ptrace=ptrace, + host_network=host_network, + ) + + +def extract_uid_gid(ctx, img='', file_path='/var/lib/ceph'): + # type: (CephadmContext, str, Union[str, List[str]]) -> Tuple[int, int] + + if not img: + img = ctx.image + + if isinstance(file_path, str): + paths = [file_path] + else: + paths = file_path + + ex: Optional[Tuple[str, RuntimeError]] = None + + for fp in paths: + try: + out = CephContainer( + ctx, + image=img, + entrypoint='stat', + args=['-c', '%u %g', fp] + ).run(verbosity=CallVerbosity.QUIET_UNLESS_ERROR) + uid, gid = out.split(' ') + return int(uid), int(gid) + except RuntimeError as e: + ex = (fp, e) + if ex: + raise Error(f'Failed to extract uid/gid for path {ex[0]}: {ex[1]}') + + raise RuntimeError('uid/gid not found') + + +def deploy_daemon(ctx: CephadmContext, fsid: str, daemon_type: str, + daemon_id: Union[int, str], c: Optional['CephContainer'], + uid: int, gid: int, config: Optional[str] = None, + keyring: Optional[str] = None, osd_fsid: Optional[str] = None, + deployment_type: DeploymentType = DeploymentType.DEFAULT, + endpoints: Optional[List[EndPoint]] = None) -> None: + + endpoints = endpoints or [] + # only check port in use if fresh deployment since service + # we are redeploying/reconfiguring will already be using the port + if deployment_type == DeploymentType.DEFAULT: + if any([port_in_use(ctx, e) for e in endpoints]): + if daemon_type == 'mgr': + # non-fatal for mgr when we are in mgr_standby_modules=false, but we can't + # tell whether that is the case here. + logger.warning( + f"ceph-mgr TCP port(s) {','.join(map(str, endpoints))} already in use" + ) + else: + raise Error("TCP Port(s) '{}' required for {} already in use".format(','.join(map(str, endpoints)), daemon_type)) + + data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + if deployment_type == DeploymentType.RECONFIG and not os.path.exists(data_dir): + raise Error('cannot reconfig, data path %s does not exist' % data_dir) + if daemon_type == 'mon' and not os.path.exists(data_dir): + assert config + assert keyring + # tmp keyring file + tmp_keyring = write_tmp(keyring, uid, gid) + + # tmp config file + tmp_config = write_tmp(config, uid, gid) + + # --mkfs + create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid) + mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', daemon_id) + log_dir = get_log_dir(fsid, ctx.log_dir) + CephContainer( + ctx, + image=ctx.image, + entrypoint='/usr/bin/ceph-mon', + args=[ + '--mkfs', + '-i', str(daemon_id), + '--fsid', fsid, + '-c', '/tmp/config', + '--keyring', '/tmp/keyring', + ] + get_daemon_args(ctx, fsid, 'mon', daemon_id), + volume_mounts={ + log_dir: '/var/log/ceph:z', + mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id), + tmp_keyring.name: '/tmp/keyring:z', + tmp_config.name: '/tmp/config:z', + }, + ).run() + + # write conf + with write_new(mon_dir + '/config', owner=(uid, gid)) as f: + f.write(config) + else: + # dirs, conf, keyring + create_daemon_dirs( + ctx, + fsid, daemon_type, daemon_id, + uid, gid, + config, keyring) + + # only write out unit files and start daemon + # with systemd if this is not a reconfig + if deployment_type != DeploymentType.RECONFIG: + if daemon_type == CephadmAgent.daemon_type: + config_js = fetch_configs(ctx) + assert isinstance(config_js, dict) + + cephadm_agent = CephadmAgent(ctx, fsid, daemon_id) + cephadm_agent.deploy_daemon_unit(config_js) + else: + if c: + deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, + c, osd_fsid=osd_fsid, endpoints=endpoints) + else: + raise RuntimeError('attempting to deploy a daemon without a container image') + + if not os.path.exists(data_dir + '/unit.created'): + with write_new(data_dir + '/unit.created', owner=(uid, gid)) as f: + f.write('mtime is time the daemon deployment was created\n') + + with write_new(data_dir + '/unit.configured', owner=(uid, gid)) as f: + f.write('mtime is time we were last configured\n') + + update_firewalld(ctx, daemon_type) + + # Open ports explicitly required for the daemon + if endpoints: + fw = Firewalld(ctx) + fw.open_ports([e.port for e in endpoints] + fw.external_ports.get(daemon_type, [])) + fw.apply_rules() + + # If this was a reconfig and the daemon is not a Ceph daemon, restart it + # so it can pick up potential changes to its configuration files + if deployment_type == DeploymentType.RECONFIG and daemon_type not in Ceph.daemons: + # ceph daemons do not need a restart; others (presumably) do to pick + # up the new config + call_throws(ctx, ['systemctl', 'reset-failed', + get_unit_name(fsid, daemon_type, daemon_id)]) + call_throws(ctx, ['systemctl', 'restart', + get_unit_name(fsid, daemon_type, daemon_id)]) + + +def _write_container_cmd_to_bash(ctx, file_obj, container, comment=None, background=False): + # type: (CephadmContext, IO[str], CephContainer, Optional[str], Optional[bool]) -> None + if comment: + # Sometimes adding a comment, especially if there are multiple containers in one + # unit file, makes it easier to read and grok. + file_obj.write('# ' + comment + '\n') + # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually + file_obj.write('! ' + ' '.join(container.rm_cmd(old_cname=True)) + ' 2> /dev/null\n') + file_obj.write('! ' + ' '.join(container.rm_cmd()) + ' 2> /dev/null\n') + # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage` + if isinstance(ctx.container_engine, Podman): + file_obj.write( + '! ' + + ' '.join([shlex.quote(a) for a in container.rm_cmd(storage=True)]) + + ' 2> /dev/null\n') + file_obj.write( + '! ' + + ' '.join([shlex.quote(a) for a in container.rm_cmd(old_cname=True, storage=True)]) + + ' 2> /dev/null\n') + + # container run command + file_obj.write( + ' '.join([shlex.quote(a) for a in container.run_cmd()]) + + (' &' if background else '') + '\n') + + +def clean_cgroup(ctx: CephadmContext, fsid: str, unit_name: str) -> None: + # systemd may fail to cleanup cgroups from previous stopped unit, which will cause next "systemctl start" to fail. + # see https://tracker.ceph.com/issues/50998 + + CGROUPV2_PATH = Path('/sys/fs/cgroup') + if not (CGROUPV2_PATH / 'system.slice').exists(): + # Only unified cgroup is affected, skip if not the case + return + + slice_name = 'system-ceph\\x2d{}.slice'.format(fsid.replace('-', '\\x2d')) + cg_path = CGROUPV2_PATH / 'system.slice' / slice_name / f'{unit_name}.service' + if not cg_path.exists(): + return + + def cg_trim(path: Path) -> None: + for p in path.iterdir(): + if p.is_dir(): + cg_trim(p) + path.rmdir() + try: + cg_trim(cg_path) + except OSError: + logger.warning(f'Failed to trim old cgroups {cg_path}') + + +def deploy_daemon_units( + ctx: CephadmContext, + fsid: str, + uid: int, + gid: int, + daemon_type: str, + daemon_id: Union[int, str], + c: 'CephContainer', + enable: bool = True, + start: bool = True, + osd_fsid: Optional[str] = None, + endpoints: Optional[List[EndPoint]] = None, +) -> None: + # cmd + + def add_stop_actions(f: TextIO, timeout: Optional[int]) -> None: + # following generated script basically checks if the container exists + # before stopping it. Exit code will be success either if it doesn't + # exist or if it exists and is stopped successfully. + container_exists = f'{ctx.container_engine.path} inspect %s &>/dev/null' + f.write(f'! {container_exists % c.old_cname} || {" ".join(c.stop_cmd(old_cname=True, timeout=timeout))} \n') + f.write(f'! {container_exists % c.cname} || {" ".join(c.stop_cmd(timeout=timeout))} \n') + + data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + run_file_path = data_dir + '/unit.run' + meta_file_path = data_dir + '/unit.meta' + with write_new(run_file_path) as f, write_new(meta_file_path) as metaf: + + f.write('set -e\n') + + if daemon_type in Ceph.daemons: + install_path = find_program('install') + f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid)) + + # pre-start cmd(s) + if daemon_type == 'osd': + # osds have a pre-start step + assert osd_fsid + simple_fn = os.path.join('/etc/ceph/osd', + '%s-%s.json.adopted-by-cephadm' % (daemon_id, osd_fsid)) + if os.path.exists(simple_fn): + f.write('# Simple OSDs need chown on startup:\n') + for n in ['block', 'block.db', 'block.wal']: + p = os.path.join(data_dir, n) + f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid)) + else: + # if ceph-volume does not support 'ceph-volume activate', we must + # do 'ceph-volume lvm activate'. + test_cv = get_ceph_volume_container( + ctx, + args=['activate', '--bad-option'], + volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), + bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), + cname='ceph-%s-%s.%s-activate-test' % (fsid, daemon_type, daemon_id), + ) + out, err, ret = call(ctx, test_cv.run_cmd(), verbosity=CallVerbosity.SILENT) + # bad: ceph-volume: error: unrecognized arguments: activate --bad-option + # good: ceph-volume: error: unrecognized arguments: --bad-option + if 'unrecognized arguments: activate' in err: + # older ceph-volume without top-level activate or --no-tmpfs + cmd = [ + 'lvm', 'activate', + str(daemon_id), osd_fsid, + '--no-systemd', + ] + else: + cmd = [ + 'activate', + '--osd-id', str(daemon_id), + '--osd-uuid', osd_fsid, + '--no-systemd', + '--no-tmpfs', + ] + + prestart = get_ceph_volume_container( + ctx, + args=cmd, + volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), + bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), + cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id), + ) + _write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate') + elif daemon_type == CephIscsi.daemon_type: + f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n') + ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id) + tcmu_container = ceph_iscsi.get_tcmu_runner_container() + _write_container_cmd_to_bash(ctx, f, tcmu_container, 'iscsi tcmu-runner container', background=True) + + _write_container_cmd_to_bash(ctx, f, c, '%s.%s' % (daemon_type, str(daemon_id))) + + # some metadata about the deploy + meta: Dict[str, Any] = fetch_meta(ctx) + meta.update({ + 'memory_request': int(ctx.memory_request) if ctx.memory_request else None, + 'memory_limit': int(ctx.memory_limit) if ctx.memory_limit else None, + }) + if not meta.get('ports'): + if endpoints: + meta['ports'] = [e.port for e in endpoints] + else: + meta['ports'] = [] + metaf.write(json.dumps(meta, indent=4) + '\n') + + timeout = 30 if daemon_type == 'osd' else None + # post-stop command(s) + with write_new(data_dir + '/unit.poststop') as f: + # this is a fallback to eventually stop any underlying container that was not stopped properly by unit.stop, + # this could happen in very slow setups as described in the issue https://tracker.ceph.com/issues/58242. + add_stop_actions(cast(TextIO, f), timeout) + if daemon_type == 'osd': + assert osd_fsid + poststop = get_ceph_volume_container( + ctx, + args=[ + 'lvm', 'deactivate', + str(daemon_id), osd_fsid, + ], + volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), + bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), + cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type, + daemon_id), + ) + _write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd') + elif daemon_type == CephIscsi.daemon_type: + # make sure we also stop the tcmu container + runtime_dir = '/run' + ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id) + tcmu_container = ceph_iscsi.get_tcmu_runner_container() + f.write('! ' + ' '.join(tcmu_container.stop_cmd()) + '\n') + f.write('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, str(daemon_id) + '.tcmu') + '\n') + f.write('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, str(daemon_id) + '.tcmu') + '\n') + f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n') + + # post-stop command(s) + with write_new(data_dir + '/unit.stop') as f: + add_stop_actions(cast(TextIO, f), timeout) + + if c: + with write_new(data_dir + '/unit.image') as f: + f.write(c.image + '\n') + + # sysctl + install_sysctl(ctx, fsid, daemon_type) + + # systemd + install_base_units(ctx, fsid) + unit = get_unit_file(ctx, fsid) + unit_file = 'ceph-%s@.service' % (fsid) + with write_new(ctx.unit_dir + '/' + unit_file, perms=None) as f: + f.write(unit) + call_throws(ctx, ['systemctl', 'daemon-reload']) + + unit_name = get_unit_name(fsid, daemon_type, daemon_id) + call(ctx, ['systemctl', 'stop', unit_name], + verbosity=CallVerbosity.DEBUG) + call(ctx, ['systemctl', 'reset-failed', unit_name], + verbosity=CallVerbosity.DEBUG) + if enable: + call_throws(ctx, ['systemctl', 'enable', unit_name]) + if start: + clean_cgroup(ctx, fsid, unit_name) + call_throws(ctx, ['systemctl', 'start', unit_name]) + + +class Firewalld(object): + + # for specifying ports we should always open when opening + # ports for a daemon of that type. Main use case is for ports + # that we should open when deploying the daemon type but that + # the daemon itself may not necessarily need to bind to the port. + # This needs to be handed differently as we don't want to fail + # deployment if the port cannot be bound to but we still want to + # open the port in the firewall. + external_ports: Dict[str, List[int]] = { + 'iscsi': [3260] # 3260 is the well known iSCSI port + } + + def __init__(self, ctx): + # type: (CephadmContext) -> None + self.ctx = ctx + self.available = self.check() + + def check(self): + # type: () -> bool + self.cmd = find_executable('firewall-cmd') + if not self.cmd: + logger.debug('firewalld does not appear to be present') + return False + (enabled, state, _) = check_unit(self.ctx, 'firewalld.service') + if not enabled: + logger.debug('firewalld.service is not enabled') + return False + if state != 'running': + logger.debug('firewalld.service is not running') + return False + + logger.info('firewalld ready') + return True + + def enable_service_for(self, daemon_type): + # type: (str) -> None + if not self.available: + logger.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type) + return + + if daemon_type == 'mon': + svc = 'ceph-mon' + elif daemon_type in ['mgr', 'mds', 'osd']: + svc = 'ceph' + elif daemon_type == NFSGanesha.daemon_type: + svc = 'nfs' + else: + return + + if not self.cmd: + raise RuntimeError('command not defined') + + out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG) + if ret: + logger.info('Enabling firewalld service %s in current zone...' % svc) + out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-service', svc]) + if ret: + raise RuntimeError( + 'unable to add service %s to current zone: %s' % (svc, err)) + else: + logger.debug('firewalld service %s is enabled in current zone' % svc) + + def open_ports(self, fw_ports): + # type: (List[int]) -> None + if not self.available: + logger.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports) + return + + if not self.cmd: + raise RuntimeError('command not defined') + + for port in fw_ports: + tcp_port = str(port) + '/tcp' + out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG) + if ret: + logger.info('Enabling firewalld port %s in current zone...' % tcp_port) + out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-port', tcp_port]) + if ret: + raise RuntimeError('unable to add port %s to current zone: %s' % + (tcp_port, err)) + else: + logger.debug('firewalld port %s is enabled in current zone' % tcp_port) + + def close_ports(self, fw_ports): + # type: (List[int]) -> None + if not self.available: + logger.debug('Not possible to close ports <%s>. firewalld.service is not available' % fw_ports) + return + + if not self.cmd: + raise RuntimeError('command not defined') + + for port in fw_ports: + tcp_port = str(port) + '/tcp' + out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG) + if not ret: + logger.info('Disabling port %s in current zone...' % tcp_port) + out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--remove-port', tcp_port]) + if ret: + raise RuntimeError('unable to remove port %s from current zone: %s' % + (tcp_port, err)) + else: + logger.info(f'Port {tcp_port} disabled') + else: + logger.info(f'firewalld port {tcp_port} already closed') + + def apply_rules(self): + # type: () -> None + if not self.available: + return + + if not self.cmd: + raise RuntimeError('command not defined') + + call_throws(self.ctx, [self.cmd, '--reload']) + + +def update_firewalld(ctx, daemon_type): + # type: (CephadmContext, str) -> None + if not ('skip_firewalld' in ctx and ctx.skip_firewalld): + firewall = Firewalld(ctx) + firewall.enable_service_for(daemon_type) + firewall.apply_rules() + + +def install_sysctl(ctx: CephadmContext, fsid: str, daemon_type: str) -> None: + """ + Set up sysctl settings + """ + def _write(conf: Path, lines: List[str]) -> None: + lines = [ + '# created by cephadm', + '', + *lines, + '', + ] + with write_new(conf, owner=None, perms=None) as f: + f.write('\n'.join(lines)) + + conf = Path(ctx.sysctl_dir).joinpath(f'90-ceph-{fsid}-{daemon_type}.conf') + lines: List = [] + + if daemon_type == 'osd': + lines = OSD.get_sysctl_settings() + elif daemon_type == 'haproxy': + lines = HAproxy.get_sysctl_settings() + elif daemon_type == 'keepalived': + lines = Keepalived.get_sysctl_settings() + elif daemon_type == CephNvmeof.daemon_type: + lines = CephNvmeof.get_sysctl_settings() + lines = filter_sysctl_settings(ctx, lines) + + # apply the sysctl settings + if lines: + Path(ctx.sysctl_dir).mkdir(mode=0o755, exist_ok=True) + _write(conf, lines) + call_throws(ctx, ['sysctl', '--system']) + + +def sysctl_get(ctx: CephadmContext, variable: str) -> Union[str, None]: + """ + Read a sysctl setting by executing 'sysctl -b {variable}' + """ + out, err, code = call(ctx, ['sysctl', '-b', variable]) + return out or None + + +def filter_sysctl_settings(ctx: CephadmContext, lines: List[str]) -> List[str]: + """ + Given a list of sysctl settings, examine the system's current configuration + and return those which are not currently set as described. + """ + def test_setting(desired_line: str) -> bool: + # Remove any comments + comment_start = desired_line.find('#') + if comment_start != -1: + desired_line = desired_line[:comment_start] + desired_line = desired_line.strip() + if not desired_line or desired_line.isspace(): + return False + setting, desired_value = map(lambda s: s.strip(), desired_line.split('=')) + if not setting or not desired_value: + return False + actual_value = sysctl_get(ctx, setting) + return desired_value != actual_value + return list(filter(test_setting, lines)) + + +def migrate_sysctl_dir(ctx: CephadmContext, fsid: str) -> None: + """ + Cephadm once used '/usr/lib/sysctl.d' for storing sysctl configuration. + This moves it to '/etc/sysctl.d'. + """ + deprecated_location: str = '/usr/lib/sysctl.d' + deprecated_confs: List[str] = glob(f'{deprecated_location}/90-ceph-{fsid}-*.conf') + if not deprecated_confs: + return + + file_count: int = len(deprecated_confs) + logger.info(f'Found sysctl {file_count} files in deprecated location {deprecated_location}. Starting Migration.') + for conf in deprecated_confs: + try: + shutil.move(conf, ctx.sysctl_dir) + file_count -= 1 + except shutil.Error as err: + if str(err).endswith('already exists'): + logger.warning(f'Destination file already exists. Deleting {conf}.') + try: + os.unlink(conf) + file_count -= 1 + except OSError as del_err: + logger.warning(f'Could not remove {conf}: {del_err}.') + else: + logger.warning(f'Could not move {conf} from {deprecated_location} to {ctx.sysctl_dir}: {err}') + + # Log successful migration + if file_count == 0: + logger.info(f'Successfully migrated sysctl config to {ctx.sysctl_dir}.') + return + + # Log partially successful / unsuccessful migration + files_processed: int = len(deprecated_confs) + if file_count < files_processed: + status: str = f'partially successful (failed {file_count}/{files_processed})' + elif file_count == files_processed: + status = 'unsuccessful' + logger.warning(f'Migration of sysctl configuration {status}. You may want to perform a migration manually.') + + +def install_base_units(ctx, fsid): + # type: (CephadmContext, str) -> None + """ + Set up ceph.target and ceph-$fsid.target units. + """ + # global unit + existed = os.path.exists(ctx.unit_dir + '/ceph.target') + with write_new(ctx.unit_dir + '/ceph.target', perms=None) as f: + f.write('[Unit]\n' + 'Description=All Ceph clusters and services\n' + '\n' + '[Install]\n' + 'WantedBy=multi-user.target\n') + if not existed: + # we disable before enable in case a different ceph.target + # (from the traditional package) is present; while newer + # systemd is smart enough to disable the old + # (/lib/systemd/...) and enable the new (/etc/systemd/...), + # some older versions of systemd error out with EEXIST. + call_throws(ctx, ['systemctl', 'disable', 'ceph.target']) + call_throws(ctx, ['systemctl', 'enable', 'ceph.target']) + call_throws(ctx, ['systemctl', 'start', 'ceph.target']) + + # cluster unit + existed = os.path.exists(ctx.unit_dir + '/ceph-%s.target' % fsid) + with write_new(ctx.unit_dir + f'/ceph-{fsid}.target', perms=None) as f: + f.write( + '[Unit]\n' + 'Description=Ceph cluster {fsid}\n' + 'PartOf=ceph.target\n' + 'Before=ceph.target\n' + '\n' + '[Install]\n' + 'WantedBy=multi-user.target ceph.target\n'.format( + fsid=fsid) + ) + if not existed: + call_throws(ctx, ['systemctl', 'enable', 'ceph-%s.target' % fsid]) + call_throws(ctx, ['systemctl', 'start', 'ceph-%s.target' % fsid]) + + # don't overwrite file in order to allow users to manipulate it + if os.path.exists(ctx.logrotate_dir + f'/ceph-{fsid}'): + return + + # logrotate for the cluster + with write_new(ctx.logrotate_dir + f'/ceph-{fsid}', perms=None) as f: + """ + This is a bit sloppy in that the killall/pkill will touch all ceph daemons + in all containers, but I don't see an elegant way to send SIGHUP *just* to + the daemons for this cluster. (1) systemd kill -s will get the signal to + podman, but podman will exit. (2) podman kill will get the signal to the + first child (bash), but that isn't the ceph daemon. This is simpler and + should be harmless. + """ + targets: List[str] = [ + 'ceph-mon', + 'ceph-mgr', + 'ceph-mds', + 'ceph-osd', + 'ceph-fuse', + 'radosgw', + 'rbd-mirror', + 'cephfs-mirror', + 'tcmu-runner' + ] + + f.write("""# created by cephadm +/var/log/ceph/%s/*.log { + rotate 7 + daily + compress + sharedscripts + postrotate + killall -q -1 %s || pkill -1 -x '%s' || true + endscript + missingok + notifempty + su root root +} +""" % (fsid, ' '.join(targets), '|'.join(targets))) + + +def get_unit_file(ctx, fsid): + # type: (CephadmContext, str) -> str + extra_args = '' + if isinstance(ctx.container_engine, Podman): + extra_args = ('ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid\n' + 'ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid\n' + 'Type=forking\n' + 'PIDFile=%t/%n-pid\n') + if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION: + extra_args += 'Delegate=yes\n' + + docker = isinstance(ctx.container_engine, Docker) + u = """# generated by cephadm +[Unit] +Description=Ceph %i for {fsid} + +# According to: +# http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget +# these can be removed once ceph-mon will dynamically change network +# configuration. +After=network-online.target local-fs.target time-sync.target{docker_after} +Wants=network-online.target local-fs.target time-sync.target +{docker_requires} + +PartOf=ceph-{fsid}.target +Before=ceph-{fsid}.target + +[Service] +LimitNOFILE=1048576 +LimitNPROC=1048576 +EnvironmentFile=-/etc/environment +ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run +ExecStop=-/bin/bash -c 'bash {data_dir}/{fsid}/%i/unit.stop' +ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop +KillMode=none +Restart=on-failure +RestartSec=10s +TimeoutStartSec=200 +TimeoutStopSec=120 +StartLimitInterval=30min +StartLimitBurst=5 +{extra_args} +[Install] +WantedBy=ceph-{fsid}.target +""".format(fsid=fsid, + data_dir=ctx.data_dir, + extra_args=extra_args, + # if docker, we depend on docker.service + docker_after=' docker.service' if docker else '', + docker_requires='Requires=docker.service\n' if docker else '') + + return u + +################################## + + +class CephContainer: + def __init__(self, + ctx: CephadmContext, + image: str, + entrypoint: str, + args: List[str] = [], + volume_mounts: Dict[str, str] = {}, + cname: str = '', + container_args: List[str] = [], + envs: Optional[List[str]] = None, + privileged: bool = False, + ptrace: bool = False, + bind_mounts: Optional[List[List[str]]] = None, + init: Optional[bool] = None, + host_network: bool = True, + memory_request: Optional[str] = None, + memory_limit: Optional[str] = None, + ) -> None: + self.ctx = ctx + self.image = image + self.entrypoint = entrypoint + self.args = args + self.volume_mounts = volume_mounts + self._cname = cname + self.container_args = container_args + self.envs = envs + self.privileged = privileged + self.ptrace = ptrace + self.bind_mounts = bind_mounts if bind_mounts else [] + self.init = init if init else ctx.container_init + self.host_network = host_network + self.memory_request = memory_request + self.memory_limit = memory_limit + + @classmethod + def for_daemon(cls, + ctx: CephadmContext, + fsid: str, + daemon_type: str, + daemon_id: str, + entrypoint: str, + args: List[str] = [], + volume_mounts: Dict[str, str] = {}, + container_args: List[str] = [], + envs: Optional[List[str]] = None, + privileged: bool = False, + ptrace: bool = False, + bind_mounts: Optional[List[List[str]]] = None, + init: Optional[bool] = None, + host_network: bool = True, + memory_request: Optional[str] = None, + memory_limit: Optional[str] = None, + ) -> 'CephContainer': + return cls( + ctx, + image=ctx.image, + entrypoint=entrypoint, + args=args, + volume_mounts=volume_mounts, + cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id), + container_args=container_args, + envs=envs, + privileged=privileged, + ptrace=ptrace, + bind_mounts=bind_mounts, + init=init, + host_network=host_network, + memory_request=memory_request, + memory_limit=memory_limit, + ) + + @property + def cname(self) -> str: + """ + podman adds the current container name to the /etc/hosts + file. Turns out, python's `socket.getfqdn()` differs from + `hostname -f`, when we have the container names containing + dots in it.: + + # podman run --name foo.bar.baz.com ceph/ceph /bin/bash + [root@sebastians-laptop /]# cat /etc/hosts + 127.0.0.1 localhost + ::1 localhost + 127.0.1.1 sebastians-laptop foo.bar.baz.com + [root@sebastians-laptop /]# hostname -f + sebastians-laptop + [root@sebastians-laptop /]# python3 -c 'import socket; print(socket.getfqdn())' + foo.bar.baz.com + + Fascinatingly, this doesn't happen when using dashes. + """ + return self._cname.replace('.', '-') + + @cname.setter + def cname(self, val: str) -> None: + self._cname = val + + @property + def old_cname(self) -> str: + return self._cname + + def run_cmd(self) -> List[str]: + cmd_args: List[str] = [ + str(self.ctx.container_engine.path), + 'run', + '--rm', + '--ipc=host', + # some containers (ahem, haproxy) override this, but we want a fast + # shutdown always (and, more importantly, a successful exit even if we + # fall back to SIGKILL). + '--stop-signal=SIGTERM', + ] + + if isinstance(self.ctx.container_engine, Podman): + if os.path.exists('/etc/ceph/podman-auth.json'): + cmd_args.append('--authfile=/etc/ceph/podman-auth.json') + + if isinstance(self.ctx.container_engine, Docker): + cmd_args.extend(['--ulimit', 'nofile=1048576']) + + envs: List[str] = [ + '-e', 'CONTAINER_IMAGE=%s' % self.image, + '-e', 'NODE_NAME=%s' % get_hostname(), + ] + vols: List[str] = [] + binds: List[str] = [] + + if self.memory_request: + cmd_args.extend(['-e', 'POD_MEMORY_REQUEST', str(self.memory_request)]) + if self.memory_limit: + cmd_args.extend(['-e', 'POD_MEMORY_LIMIT', str(self.memory_limit)]) + cmd_args.extend(['--memory', str(self.memory_limit)]) + + if self.host_network: + cmd_args.append('--net=host') + if self.entrypoint: + cmd_args.extend(['--entrypoint', self.entrypoint]) + if self.privileged: + cmd_args.extend([ + '--privileged', + # let OSD etc read block devs that haven't been chowned + '--group-add=disk']) + if self.ptrace and not self.privileged: + # if privileged, the SYS_PTRACE cap is already added + # in addition, --cap-add and --privileged are mutually + # exclusive since podman >= 2.0 + cmd_args.append('--cap-add=SYS_PTRACE') + if self.init: + cmd_args.append('--init') + envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1'] + if self.cname: + cmd_args.extend(['--name', self.cname]) + if self.envs: + for env in self.envs: + envs.extend(['-e', env]) + + vols = sum( + [['-v', '%s:%s' % (host_dir, container_dir)] + for host_dir, container_dir in self.volume_mounts.items()], []) + binds = sum([['--mount', '{}'.format(','.join(bind))] + for bind in self.bind_mounts], []) + + return \ + cmd_args + self.container_args + \ + envs + vols + binds + \ + [self.image] + self.args # type: ignore + + def shell_cmd(self, cmd: List[str]) -> List[str]: + cmd_args: List[str] = [ + str(self.ctx.container_engine.path), + 'run', + '--rm', + '--ipc=host', + ] + envs: List[str] = [ + '-e', 'CONTAINER_IMAGE=%s' % self.image, + '-e', 'NODE_NAME=%s' % get_hostname(), + ] + vols: List[str] = [] + binds: List[str] = [] + + if self.host_network: + cmd_args.append('--net=host') + if self.ctx.no_hosts: + cmd_args.append('--no-hosts') + if self.privileged: + cmd_args.extend([ + '--privileged', + # let OSD etc read block devs that haven't been chowned + '--group-add=disk', + ]) + if self.init: + cmd_args.append('--init') + envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1'] + if self.envs: + for env in self.envs: + envs.extend(['-e', env]) + + vols = sum( + [['-v', '%s:%s' % (host_dir, container_dir)] + for host_dir, container_dir in self.volume_mounts.items()], []) + binds = sum([['--mount', '{}'.format(','.join(bind))] + for bind in self.bind_mounts], []) + + return cmd_args + self.container_args + envs + vols + binds + [ + '--entrypoint', cmd[0], + self.image, + ] + cmd[1:] + + def exec_cmd(self, cmd): + # type: (List[str]) -> List[str] + cname = get_running_container_name(self.ctx, self) + if not cname: + raise Error('unable to find container "{}"'.format(self.cname)) + return [ + str(self.ctx.container_engine.path), + 'exec', + ] + self.container_args + [ + self.cname, + ] + cmd + + def rm_cmd(self, old_cname: bool = False, storage: bool = False) -> List[str]: + ret = [ + str(self.ctx.container_engine.path), + 'rm', '-f', + ] + if storage: + ret.append('--storage') + if old_cname: + ret.append(self.old_cname) + else: + ret.append(self.cname) + return ret + + def stop_cmd(self, old_cname: bool = False, timeout: Optional[int] = None) -> List[str]: + if timeout is None: + ret = [ + str(self.ctx.container_engine.path), + 'stop', self.old_cname if old_cname else self.cname, + ] + else: + ret = [ + str(self.ctx.container_engine.path), + 'stop', '-t', f'{timeout}', + self.old_cname if old_cname else self.cname, + ] + return ret + + def run(self, timeout=DEFAULT_TIMEOUT, verbosity=CallVerbosity.VERBOSE_ON_FAILURE): + # type: (Optional[int], CallVerbosity) -> str + out, _, _ = call_throws(self.ctx, self.run_cmd(), + desc=self.entrypoint, timeout=timeout, verbosity=verbosity) + return out + + +##################################### + +class MgrListener(Thread): + def __init__(self, agent: 'CephadmAgent') -> None: + self.agent = agent + self.stop = False + super(MgrListener, self).__init__(target=self.run) + + def run(self) -> None: + listenSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + listenSocket.bind(('0.0.0.0', int(self.agent.listener_port))) + listenSocket.settimeout(60) + listenSocket.listen(1) + ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) + ssl_ctx.verify_mode = ssl.CERT_REQUIRED + ssl_ctx.load_cert_chain(self.agent.listener_cert_path, self.agent.listener_key_path) + ssl_ctx.load_verify_locations(self.agent.ca_path) + secureListenSocket = ssl_ctx.wrap_socket(listenSocket, server_side=True) + while not self.stop: + try: + try: + conn, _ = secureListenSocket.accept() + except socket.timeout: + continue + try: + length: int = int(conn.recv(10).decode()) + except Exception as e: + err_str = f'Failed to extract length of payload from message: {e}' + conn.send(err_str.encode()) + logger.error(err_str) + continue + while True: + payload = conn.recv(length).decode() + if not payload: + break + try: + data: Dict[Any, Any] = json.loads(payload) + self.handle_json_payload(data) + except Exception as e: + err_str = f'Failed to extract json payload from message: {e}' + conn.send(err_str.encode()) + logger.error(err_str) + else: + conn.send(b'ACK') + if 'config' in data: + self.agent.wakeup() + self.agent.ls_gatherer.wakeup() + self.agent.volume_gatherer.wakeup() + logger.debug(f'Got mgr message {data}') + except Exception as e: + logger.error(f'Mgr Listener encountered exception: {e}') + + def shutdown(self) -> None: + self.stop = True + + def handle_json_payload(self, data: Dict[Any, Any]) -> None: + self.agent.ack = int(data['counter']) + if 'config' in data: + logger.info('Received new config from mgr') + config = data['config'] + for filename in config: + if filename in self.agent.required_files: + file_path = os.path.join(self.agent.daemon_dir, filename) + with write_new(file_path) as f: + f.write(config[filename]) + self.agent.pull_conf_settings() + self.agent.wakeup() + + +class CephadmAgent(): + + daemon_type = 'agent' + default_port = 8498 + loop_interval = 30 + stop = False + + required_files = [ + 'agent.json', + 'keyring', + 'root_cert.pem', + 'listener.crt', + 'listener.key', + ] + + def __init__(self, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] = ''): + self.ctx = ctx + self.fsid = fsid + self.daemon_id = daemon_id + self.starting_port = 14873 + self.target_ip = '' + self.target_port = '' + self.host = '' + self.daemon_dir = os.path.join(ctx.data_dir, self.fsid, f'{self.daemon_type}.{self.daemon_id}') + self.config_path = os.path.join(self.daemon_dir, 'agent.json') + self.keyring_path = os.path.join(self.daemon_dir, 'keyring') + self.ca_path = os.path.join(self.daemon_dir, 'root_cert.pem') + self.listener_cert_path = os.path.join(self.daemon_dir, 'listener.crt') + self.listener_key_path = os.path.join(self.daemon_dir, 'listener.key') + self.listener_port = '' + self.ack = 1 + self.event = Event() + self.mgr_listener = MgrListener(self) + self.ls_gatherer = AgentGatherer(self, lambda: self._get_ls(), 'Ls') + self.volume_gatherer = AgentGatherer(self, lambda: self._ceph_volume(enhanced=False), 'Volume') + self.device_enhanced_scan = False + self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0] + self.recent_iteration_index: int = 0 + self.cached_ls_values: Dict[str, Dict[str, str]] = {} + + def validate(self, config: Dict[str, str] = {}) -> None: + # check for the required files + for fname in self.required_files: + if fname not in config: + raise Error('required file missing from config: %s' % fname) + + def deploy_daemon_unit(self, config: Dict[str, str] = {}) -> None: + if not config: + raise Error('Agent needs a config') + assert isinstance(config, dict) + self.validate(config) + + # Create the required config files in the daemons dir, with restricted permissions + for filename in config: + if filename in self.required_files: + file_path = os.path.join(self.daemon_dir, filename) + with write_new(file_path) as f: + f.write(config[filename]) + + unit_run_path = os.path.join(self.daemon_dir, 'unit.run') + with write_new(unit_run_path) as f: + f.write(self.unit_run()) + + meta: Dict[str, Any] = fetch_meta(self.ctx) + meta_file_path = os.path.join(self.daemon_dir, 'unit.meta') + with write_new(meta_file_path) as f: + f.write(json.dumps(meta, indent=4) + '\n') + + unit_file_path = os.path.join(self.ctx.unit_dir, self.unit_name()) + with write_new(unit_file_path) as f: + f.write(self.unit_file()) + + call_throws(self.ctx, ['systemctl', 'daemon-reload']) + call(self.ctx, ['systemctl', 'stop', self.unit_name()], + verbosity=CallVerbosity.DEBUG) + call(self.ctx, ['systemctl', 'reset-failed', self.unit_name()], + verbosity=CallVerbosity.DEBUG) + call_throws(self.ctx, ['systemctl', 'enable', '--now', self.unit_name()]) + + def unit_name(self) -> str: + return '{}.service'.format(get_unit_name(self.fsid, self.daemon_type, self.daemon_id)) + + def unit_run(self) -> str: + py3 = shutil.which('python3') + binary_path = os.path.realpath(sys.argv[0]) + return ('set -e\n' + f'{py3} {binary_path} agent --fsid {self.fsid} --daemon-id {self.daemon_id} &\n') + + def unit_file(self) -> str: + return """#generated by cephadm +[Unit] +Description=cephadm agent for cluster {fsid} + +PartOf=ceph-{fsid}.target +Before=ceph-{fsid}.target + +[Service] +Type=forking +ExecStart=/bin/bash {data_dir}/unit.run +Restart=on-failure +RestartSec=10s + +[Install] +WantedBy=ceph-{fsid}.target +""".format( + fsid=self.fsid, + data_dir=self.daemon_dir + ) + + def shutdown(self) -> None: + self.stop = True + if self.mgr_listener.is_alive(): + self.mgr_listener.shutdown() + if self.ls_gatherer.is_alive(): + self.ls_gatherer.shutdown() + if self.volume_gatherer.is_alive(): + self.volume_gatherer.shutdown() + + def wakeup(self) -> None: + self.event.set() + + def pull_conf_settings(self) -> None: + try: + with open(self.config_path, 'r') as f: + config = json.load(f) + self.target_ip = config['target_ip'] + self.target_port = config['target_port'] + self.loop_interval = int(config['refresh_period']) + self.starting_port = int(config['listener_port']) + self.host = config['host'] + use_lsm = config['device_enhanced_scan'] + except Exception as e: + self.shutdown() + raise Error(f'Failed to get agent target ip and port from config: {e}') + + try: + with open(self.keyring_path, 'r') as f: + self.keyring = f.read() + except Exception as e: + self.shutdown() + raise Error(f'Failed to get agent keyring: {e}') + + assert self.target_ip and self.target_port + + self.device_enhanced_scan = False + if use_lsm.lower() == 'true': + self.device_enhanced_scan = True + self.volume_gatherer.update_func(lambda: self._ceph_volume(enhanced=self.device_enhanced_scan)) + + def run(self) -> None: + self.pull_conf_settings() + + try: + for _ in range(1001): + if not port_in_use(self.ctx, EndPoint('0.0.0.0', self.starting_port)): + self.listener_port = str(self.starting_port) + break + self.starting_port += 1 + if not self.listener_port: + raise Error(f'All 1000 ports starting at {str(self.starting_port - 1001)} taken.') + except Exception as e: + raise Error(f'Failed to pick port for agent to listen on: {e}') + + if not self.mgr_listener.is_alive(): + self.mgr_listener.start() + + if not self.ls_gatherer.is_alive(): + self.ls_gatherer.start() + + if not self.volume_gatherer.is_alive(): + self.volume_gatherer.start() + + ssl_ctx = ssl.create_default_context() + ssl_ctx.check_hostname = True + ssl_ctx.verify_mode = ssl.CERT_REQUIRED + ssl_ctx.load_verify_locations(self.ca_path) + + while not self.stop: + start_time = time.monotonic() + ack = self.ack + + # part of the networks info is returned as a set which is not JSON + # serializable. The set must be converted to a list + networks = list_networks(self.ctx) + networks_list: Dict[str, Dict[str, List[str]]] = {} + for key in networks.keys(): + networks_list[key] = {} + for k, v in networks[key].items(): + networks_list[key][k] = list(v) + + data = json.dumps({'host': self.host, + 'ls': (self.ls_gatherer.data if self.ack == self.ls_gatherer.ack + and self.ls_gatherer.data is not None else []), + 'networks': networks_list, + 'facts': HostFacts(self.ctx).dump(), + 'volume': (self.volume_gatherer.data if self.ack == self.volume_gatherer.ack + and self.volume_gatherer.data is not None else ''), + 'ack': str(ack), + 'keyring': self.keyring, + 'port': self.listener_port}) + data = data.encode('ascii') + + url = f'https://{self.target_ip}:{self.target_port}/data/' + try: + req = Request(url, data, {'Content-Type': 'application/json'}) + send_time = time.monotonic() + with urlopen(req, context=ssl_ctx) as response: + response_str = response.read() + response_json = json.loads(response_str) + total_request_time = datetime.timedelta(seconds=(time.monotonic() - send_time)).total_seconds() + logger.info(f'Received mgr response: "{response_json["result"]}" {total_request_time} seconds after sending request.') + except Exception as e: + logger.error(f'Failed to send metadata to mgr: {e}') + + end_time = time.monotonic() + run_time = datetime.timedelta(seconds=(end_time - start_time)) + self.recent_iteration_run_times[self.recent_iteration_index] = run_time.total_seconds() + self.recent_iteration_index = (self.recent_iteration_index + 1) % 3 + run_time_average = sum(self.recent_iteration_run_times, 0.0) / len([t for t in self.recent_iteration_run_times if t]) + + self.event.wait(max(self.loop_interval - int(run_time_average), 0)) + self.event.clear() + + def _ceph_volume(self, enhanced: bool = False) -> Tuple[str, bool]: + self.ctx.command = 'inventory --format=json'.split() + if enhanced: + self.ctx.command.append('--with-lsm') + self.ctx.fsid = self.fsid + + stream = io.StringIO() + with redirect_stdout(stream): + command_ceph_volume(self.ctx) + + stdout = stream.getvalue() + + if stdout: + return (stdout, False) + else: + raise Exception('ceph-volume returned empty value') + + def _daemon_ls_subset(self) -> Dict[str, Dict[str, Any]]: + # gets a subset of ls info quickly. The results of this will tell us if our + # cached info is still good or if we need to run the full ls again. + # for legacy containers, we just grab the full info. For cephadmv1 containers, + # we only grab enabled, state, mem_usage and container id. If container id has + # not changed for any daemon, we assume our cached info is good. + daemons: Dict[str, Dict[str, Any]] = {} + data_dir = self.ctx.data_dir + seen_memusage = {} # type: Dict[str, int] + out, err, code = call( + self.ctx, + [self.ctx.container_engine.path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'], + verbosity=CallVerbosity.DEBUG + ) + seen_memusage_cid_len, seen_memusage = _parse_mem_usage(code, out) + # we need a mapping from container names to ids. Later we will convert daemon + # names to container names to get daemons container id to see if it has changed + out, err, code = call( + self.ctx, + [self.ctx.container_engine.path, 'ps', '--format', '{{.ID}},{{.Names}}', '--no-trunc'], + verbosity=CallVerbosity.DEBUG + ) + name_id_mapping: Dict[str, str] = self._parse_container_id_name(code, out) + for i in os.listdir(data_dir): + if i in ['mon', 'osd', 'mds', 'mgr']: + daemon_type = i + for j in os.listdir(os.path.join(data_dir, i)): + if '-' not in j: + continue + (cluster, daemon_id) = j.split('-', 1) + legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id) + (enabled, state, _) = check_unit(self.ctx, legacy_unit_name) + daemons[f'{daemon_type}.{daemon_id}'] = { + 'style': 'legacy', + 'name': '%s.%s' % (daemon_type, daemon_id), + 'fsid': self.ctx.fsid if self.ctx.fsid is not None else 'unknown', + 'systemd_unit': legacy_unit_name, + 'enabled': 'true' if enabled else 'false', + 'state': state, + } + elif is_fsid(i): + fsid = str(i) # convince mypy that fsid is a str here + for j in os.listdir(os.path.join(data_dir, i)): + if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)): + (daemon_type, daemon_id) = j.split('.', 1) + unit_name = get_unit_name(fsid, daemon_type, daemon_id) + (enabled, state, _) = check_unit(self.ctx, unit_name) + daemons[j] = { + 'style': 'cephadm:v1', + 'systemd_unit': unit_name, + 'enabled': 'true' if enabled else 'false', + 'state': state, + } + c = CephContainer.for_daemon(self.ctx, self.ctx.fsid, daemon_type, daemon_id, 'bash') + container_id: Optional[str] = None + for name in (c.cname, c.old_cname): + if name in name_id_mapping: + container_id = name_id_mapping[name] + break + daemons[j]['container_id'] = container_id + if container_id: + daemons[j]['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len]) + return daemons + + def _parse_container_id_name(self, code: int, out: str) -> Dict[str, str]: + # map container names to ids from ps output + name_id_mapping = {} # type: Dict[str, str] + if not code: + for line in out.splitlines(): + id, name = line.split(',') + name_id_mapping[name] = id + return name_id_mapping + + def _get_ls(self) -> Tuple[List[Dict[str, str]], bool]: + if not self.cached_ls_values: + logger.info('No cached ls output. Running full daemon ls') + ls = list_daemons(self.ctx) + for d in ls: + self.cached_ls_values[d['name']] = d + return (ls, True) + else: + ls_subset = self._daemon_ls_subset() + need_full_ls = False + state_change = False + if set(self.cached_ls_values.keys()) != set(ls_subset.keys()): + # case for a new daemon in ls or an old daemon no longer appearing. + # If that happens we need a full ls + logger.info('Change detected in state of daemons. Running full daemon ls') + self.cached_ls_values = {} + ls = list_daemons(self.ctx) + for d in ls: + self.cached_ls_values[d['name']] = d + return (ls, True) + for daemon, info in self.cached_ls_values.items(): + if info['style'] == 'legacy': + # for legacy containers, ls_subset just grabs all the info + self.cached_ls_values[daemon] = ls_subset[daemon] + else: + if info['container_id'] != ls_subset[daemon]['container_id']: + # case for container id having changed. We need full ls as + # info we didn't grab like version and start time could have changed + need_full_ls = True + break + + # want to know if a daemons state change because in those cases we want + # to report back quicker + if ( + self.cached_ls_values[daemon]['enabled'] != ls_subset[daemon]['enabled'] + or self.cached_ls_values[daemon]['state'] != ls_subset[daemon]['state'] + ): + state_change = True + # if we reach here, container id matched. Update the few values we do track + # from ls subset: state, enabled, memory_usage. + self.cached_ls_values[daemon]['enabled'] = ls_subset[daemon]['enabled'] + self.cached_ls_values[daemon]['state'] = ls_subset[daemon]['state'] + if 'memory_usage' in ls_subset[daemon]: + self.cached_ls_values[daemon]['memory_usage'] = ls_subset[daemon]['memory_usage'] + if need_full_ls: + logger.info('Change detected in state of daemons. Running full daemon ls') + ls = list_daemons(self.ctx) + self.cached_ls_values = {} + for d in ls: + self.cached_ls_values[d['name']] = d + return (ls, True) + else: + ls = [info for daemon, info in self.cached_ls_values.items()] + return (ls, state_change) + + +class AgentGatherer(Thread): + def __init__(self, agent: 'CephadmAgent', func: Callable, gatherer_type: str = 'Unnamed', initial_ack: int = 0) -> None: + self.agent = agent + self.func = func + self.gatherer_type = gatherer_type + self.ack = initial_ack + self.event = Event() + self.data: Any = None + self.stop = False + self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0] + self.recent_iteration_index: int = 0 + super(AgentGatherer, self).__init__(target=self.run) + + def run(self) -> None: + while not self.stop: + try: + start_time = time.monotonic() + + ack = self.agent.ack + change = False + try: + self.data, change = self.func() + except Exception as e: + logger.error(f'{self.gatherer_type} Gatherer encountered exception gathering data: {e}') + self.data = None + if ack != self.ack or change: + self.ack = ack + self.agent.wakeup() + + end_time = time.monotonic() + run_time = datetime.timedelta(seconds=(end_time - start_time)) + self.recent_iteration_run_times[self.recent_iteration_index] = run_time.total_seconds() + self.recent_iteration_index = (self.recent_iteration_index + 1) % 3 + run_time_average = sum(self.recent_iteration_run_times, 0.0) / len([t for t in self.recent_iteration_run_times if t]) + + self.event.wait(max(self.agent.loop_interval - int(run_time_average), 0)) + self.event.clear() + except Exception as e: + logger.error(f'{self.gatherer_type} Gatherer encountered exception: {e}') + + def shutdown(self) -> None: + self.stop = True + + def wakeup(self) -> None: + self.event.set() + + def update_func(self, func: Callable) -> None: + self.func = func + + +def command_agent(ctx: CephadmContext) -> None: + agent = CephadmAgent(ctx, ctx.fsid, ctx.daemon_id) + + if not os.path.isdir(agent.daemon_dir): + raise Error(f'Agent daemon directory {agent.daemon_dir} does not exist. Perhaps agent was never deployed?') + + agent.run() + + +################################## + +@executes_early +def command_version(ctx): + # type: (CephadmContext) -> int + import importlib + + try: + vmod = importlib.import_module('_version') + except ImportError: + print('cephadm version UNKNOWN') + return 1 + _unset = '<UNSET>' + print('cephadm version {0} ({1}) {2} ({3})'.format( + getattr(vmod, 'CEPH_GIT_NICE_VER', _unset), + getattr(vmod, 'CEPH_GIT_VER', _unset), + getattr(vmod, 'CEPH_RELEASE_NAME', _unset), + getattr(vmod, 'CEPH_RELEASE_TYPE', _unset), + )) + return 0 + +################################## + + +@default_image +def command_pull(ctx): + # type: (CephadmContext) -> int + + try: + _pull_image(ctx, ctx.image, ctx.insecure) + except UnauthorizedRegistryError: + err_str = 'Failed to pull container image. Check that host(s) are logged into the registry' + logger.debug(f'Pulling image for `command_pull` failed: {err_str}') + raise Error(err_str) + return command_inspect_image(ctx) + + +def _pull_image(ctx, image, insecure=False): + # type: (CephadmContext, str, bool) -> None + logger.info('Pulling container image %s...' % image) + + ignorelist = [ + 'error creating read-write layer with ID', + 'net/http: TLS handshake timeout', + 'Digest did not match, expected', + ] + + cmd = [ctx.container_engine.path, 'pull', image] + if isinstance(ctx.container_engine, Podman): + if insecure: + cmd.append('--tls-verify=false') + + if os.path.exists('/etc/ceph/podman-auth.json'): + cmd.append('--authfile=/etc/ceph/podman-auth.json') + cmd_str = ' '.join(cmd) + + for sleep_secs in [1, 4, 25]: + out, err, ret = call(ctx, cmd, verbosity=CallVerbosity.QUIET_UNLESS_ERROR) + if not ret: + return + + if 'unauthorized' in err: + raise UnauthorizedRegistryError() + + if not any(pattern in err for pattern in ignorelist): + raise Error('Failed command: %s' % cmd_str) + + logger.info('`%s` failed transiently. Retrying. waiting %s seconds...' % (cmd_str, sleep_secs)) + time.sleep(sleep_secs) + + raise Error('Failed command: %s: maximum retries reached' % cmd_str) + +################################## + + +@require_image +@infer_image +def command_inspect_image(ctx): + # type: (CephadmContext) -> int + out, err, ret = call_throws(ctx, [ + ctx.container_engine.path, 'inspect', + '--format', '{{.ID}},{{.RepoDigests}}', + ctx.image]) + if ret: + return errno.ENOENT + info_from = get_image_info_from_inspect(out.strip(), ctx.image) + + ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip() + info_from['ceph_version'] = ver + + print(json.dumps(info_from, indent=4, sort_keys=True)) + return 0 + + +def normalize_image_digest(digest: str) -> str: + """ + Normal case: + >>> normalize_image_digest('ceph/ceph', 'docker.io') + 'docker.io/ceph/ceph' + + No change: + >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'docker.io') + 'quay.ceph.io/ceph/ceph' + + >>> normalize_image_digest('docker.io/ubuntu', 'docker.io') + 'docker.io/ubuntu' + + >>> normalize_image_digest('localhost/ceph', 'docker.io') + 'localhost/ceph' + """ + known_shortnames = [ + 'ceph/ceph', + 'ceph/daemon', + 'ceph/daemon-base', + ] + for image in known_shortnames: + if digest.startswith(image): + return f'{DEFAULT_REGISTRY}/{digest}' + return digest + + +def get_image_info_from_inspect(out, image): + # type: (str, str) -> Dict[str, Union[str,List[str]]] + image_id, digests = out.split(',', 1) + if not out: + raise Error('inspect {}: empty result'.format(image)) + r = { + 'image_id': normalize_container_id(image_id) + } # type: Dict[str, Union[str,List[str]]] + if digests: + r['repo_digests'] = list(map(normalize_image_digest, digests[1: -1].split(' '))) + return r + +################################## + + +def check_subnet(subnets: str) -> Tuple[int, List[int], str]: + """Determine whether the given string is a valid subnet + + :param subnets: subnet string, a single definition or comma separated list of CIDR subnets + :returns: return code, IP version list of the subnets and msg describing any errors validation errors + """ + + rc = 0 + versions = set() + errors = [] + subnet_list = subnets.split(',') + for subnet in subnet_list: + # ensure the format of the string is as expected address/netmask + subnet = subnet.strip() + if not re.search(r'\/\d+$', subnet): + rc = 1 + errors.append(f'{subnet} is not in CIDR format (address/netmask)') + continue + try: + v = ipaddress.ip_network(subnet).version + versions.add(v) + except ValueError as e: + rc = 1 + errors.append(f'{subnet} invalid: {str(e)}') + + return rc, list(versions), ', '.join(errors) + + +def unwrap_ipv6(address): + # type: (str) -> str + if address.startswith('[') and address.endswith(']'): + return address[1: -1] + return address + + +def wrap_ipv6(address): + # type: (str) -> str + + # We cannot assume it's already wrapped or even an IPv6 address if + # it's already wrapped it'll not pass (like if it's a hostname) and trigger + # the ValueError + try: + if ipaddress.ip_address(address).version == 6: + return f'[{address}]' + except ValueError: + pass + + return address + + +def is_ipv6(address): + # type: (str) -> bool + address = unwrap_ipv6(address) + try: + return ipaddress.ip_address(address).version == 6 + except ValueError: + logger.warning('Address: {} is not a valid IP address'.format(address)) + return False + + +def ip_in_subnets(ip_addr: str, subnets: str) -> bool: + """Determine if the ip_addr belongs to any of the subnets list.""" + subnet_list = [x.strip() for x in subnets.split(',')] + for subnet in subnet_list: + ip_address = unwrap_ipv6(ip_addr) if is_ipv6(ip_addr) else ip_addr + if ipaddress.ip_address(ip_address) in ipaddress.ip_network(subnet): + return True + return False + + +def parse_mon_addrv(addrv_arg: str) -> List[EndPoint]: + """Parse mon-addrv param into a list of mon end points.""" + r = re.compile(r':(\d+)$') + addrv_args = [] + addr_arg = addrv_arg + if addr_arg[0] != '[' or addr_arg[-1] != ']': + raise Error(f'--mon-addrv value {addr_arg} must use square brackets') + + for addr in addr_arg[1: -1].split(','): + hasport = r.findall(addr) + if not hasport: + raise Error(f'--mon-addrv value {addr_arg} must include port number') + port_str = hasport[0] + addr = re.sub(r'^v\d+:', '', addr) # strip off v1: or v2: prefix + base_ip = addr[0:-(len(port_str)) - 1] + addrv_args.append(EndPoint(base_ip, int(port_str))) + + return addrv_args + + +def parse_mon_ip(mon_ip: str) -> List[EndPoint]: + """Parse mon-ip param into a list of mon end points.""" + r = re.compile(r':(\d+)$') + addrv_args = [] + hasport = r.findall(mon_ip) + if hasport: + port_str = hasport[0] + base_ip = mon_ip[0:-(len(port_str)) - 1] + addrv_args.append(EndPoint(base_ip, int(port_str))) + else: + # No port provided: use fixed ports for ceph monitor + addrv_args.append(EndPoint(mon_ip, 3300)) + addrv_args.append(EndPoint(mon_ip, 6789)) + + return addrv_args + + +def build_addrv_params(addrv: List[EndPoint]) -> str: + """Convert mon end-points (ip:port) into the format: [v[1|2]:ip:port1]""" + if len(addrv) > 2: + raise Error('Detected a local mon-addrv list with more than 2 entries.') + port_to_ver: Dict[int, str] = {6789: 'v1', 3300: 'v2'} + addr_arg_list: List[str] = [] + for ep in addrv: + if ep.port in port_to_ver: + ver = port_to_ver[ep.port] + else: + ver = 'v2' # default mon protocol version if port is not provided + logger.warning(f'Using msgr2 protocol for unrecognized port {ep}') + addr_arg_list.append(f'{ver}:{ep.ip}:{ep.port}') + + addr_arg = '[{0}]'.format(','.join(addr_arg_list)) + return addr_arg + + +def get_public_net_from_cfg(ctx: CephadmContext) -> Optional[str]: + """Get mon public network from configuration file.""" + cp = read_config(ctx.config) + if not cp.has_option('global', 'public_network'): + return None + + # Ensure all public CIDR networks are valid + public_network = cp.get('global', 'public_network').strip('"').strip("'") + rc, _, err_msg = check_subnet(public_network) + if rc: + raise Error(f'Invalid public_network {public_network} parameter: {err_msg}') + + # Ensure all public CIDR networks are configured locally + configured_subnets = set([x.strip() for x in public_network.split(',')]) + local_subnets = set([x[0] for x in list_networks(ctx).items()]) + valid_public_net = False + for net in configured_subnets: + if net in local_subnets: + valid_public_net = True + else: + logger.warning(f'The public CIDR network {net} (from -c conf file) is not configured locally.') + if not valid_public_net: + raise Error(f'None of the public CIDR network(s) {configured_subnets} (from -c conf file) is configured locally.') + + # Ensure public_network is compatible with the provided mon-ip (or mon-addrv) + if ctx.mon_ip: + if not ip_in_subnets(ctx.mon_ip, public_network): + raise Error(f'The provided --mon-ip {ctx.mon_ip} does not belong to any public_network(s) {public_network}') + elif ctx.mon_addrv: + addrv_args = parse_mon_addrv(ctx.mon_addrv) + for addrv in addrv_args: + if not ip_in_subnets(addrv.ip, public_network): + raise Error(f'The provided --mon-addrv {addrv.ip} ip does not belong to any public_network(s) {public_network}') + + logger.debug(f'Using mon public network from configuration file {public_network}') + return public_network + + +def infer_mon_network(ctx: CephadmContext, mon_eps: List[EndPoint]) -> Optional[str]: + """Infer mon public network from local network.""" + # Make sure IP is configured locally, and then figure out the CIDR network + mon_networks = [] + for net, ifaces in list_networks(ctx).items(): + # build local_ips list for the specified network + local_ips: List[Union[ipaddress.IPv4Address, ipaddress.IPv6Address]] = [] + for _, ls in ifaces.items(): + local_ips.extend([ipaddress.ip_address(ip) for ip in ls]) + + # check if any of mon ips belong to this net + for mon_ep in mon_eps: + try: + if ipaddress.ip_address(unwrap_ipv6(mon_ep.ip)) in local_ips: + mon_networks.append(net) + logger.info(f'Mon IP `{mon_ep.ip}` is in CIDR network `{net}`') + except ValueError as e: + logger.warning(f'Cannot infer CIDR network for mon IP `{mon_ep.ip}` : {e}') + + if not mon_networks: + raise Error('Cannot infer CIDR network. Pass --skip-mon-network to configure it later') + else: + logger.debug(f'Inferred mon public CIDR from local network configuration {mon_networks}') + + mon_networks = list(set(mon_networks)) # remove duplicates + return ','.join(mon_networks) + + +def prepare_mon_addresses(ctx: CephadmContext) -> Tuple[str, bool, Optional[str]]: + """Get mon public network configuration.""" + ipv6 = False + addrv_args: List[EndPoint] = [] + mon_addrv: str = '' # i.e: [v2:192.168.100.1:3300,v1:192.168.100.1:6789] + + if ctx.mon_ip: + ipv6 = is_ipv6(ctx.mon_ip) + if ipv6: + ctx.mon_ip = wrap_ipv6(ctx.mon_ip) + addrv_args = parse_mon_ip(ctx.mon_ip) + mon_addrv = build_addrv_params(addrv_args) + elif ctx.mon_addrv: + ipv6 = ctx.mon_addrv.count('[') > 1 + addrv_args = parse_mon_addrv(ctx.mon_addrv) + mon_addrv = ctx.mon_addrv + else: + raise Error('must specify --mon-ip or --mon-addrv') + + if addrv_args: + for end_point in addrv_args: + check_ip_port(ctx, end_point) + + logger.debug(f'Base mon IP(s) is {addrv_args}, mon addrv is {mon_addrv}') + mon_network = None + if not ctx.skip_mon_network: + mon_network = get_public_net_from_cfg(ctx) or infer_mon_network(ctx, addrv_args) + + return (mon_addrv, ipv6, mon_network) + + +def prepare_cluster_network(ctx: CephadmContext) -> Tuple[str, bool]: + # the cluster network may not exist on this node, so all we can do is + # validate that the address given is valid ipv4 or ipv6 subnet + ipv6_cluster_network = False + cp = read_config(ctx.config) + cluster_network = ctx.cluster_network + if cluster_network is None and cp.has_option('global', 'cluster_network'): + cluster_network = cp.get('global', 'cluster_network').strip('"').strip("'") + + if cluster_network: + cluster_nets = set([x.strip() for x in cluster_network.split(',')]) + local_subnets = set([x[0] for x in list_networks(ctx).items()]) + for net in cluster_nets: + if net not in local_subnets: + logger.warning(f'The cluster CIDR network {net} is not configured locally.') + + rc, versions, err_msg = check_subnet(cluster_network) + if rc: + raise Error(f'Invalid --cluster-network parameter: {err_msg}') + ipv6_cluster_network = True if 6 in versions else False + else: + logger.info('Internal network (--cluster-network) has not ' + 'been provided, OSD replication will default to ' + 'the public_network') + + return cluster_network, ipv6_cluster_network + + +def create_initial_keys( + ctx: CephadmContext, + uid: int, gid: int, + mgr_id: str +) -> Tuple[str, str, str, Any, Any]: # type: ignore + + _image = ctx.image + + # create some initial keys + logger.info('Creating initial keys...') + mon_key = CephContainer( + ctx, + image=_image, + entrypoint='/usr/bin/ceph-authtool', + args=['--gen-print-key'], + ).run().strip() + admin_key = CephContainer( + ctx, + image=_image, + entrypoint='/usr/bin/ceph-authtool', + args=['--gen-print-key'], + ).run().strip() + mgr_key = CephContainer( + ctx, + image=_image, + entrypoint='/usr/bin/ceph-authtool', + args=['--gen-print-key'], + ).run().strip() + + keyring = ('[mon.]\n' + '\tkey = %s\n' + '\tcaps mon = allow *\n' + '[client.admin]\n' + '\tkey = %s\n' + '\tcaps mon = allow *\n' + '\tcaps mds = allow *\n' + '\tcaps mgr = allow *\n' + '\tcaps osd = allow *\n' + '[mgr.%s]\n' + '\tkey = %s\n' + '\tcaps mon = profile mgr\n' + '\tcaps mds = allow *\n' + '\tcaps osd = allow *\n' + % (mon_key, admin_key, mgr_id, mgr_key)) + + admin_keyring = write_tmp('[client.admin]\n' + '\tkey = ' + admin_key + '\n', + uid, gid) + + # tmp keyring file + bootstrap_keyring = write_tmp(keyring, uid, gid) + return (mon_key, mgr_key, admin_key, + bootstrap_keyring, admin_keyring) + + +def create_initial_monmap( + ctx: CephadmContext, + uid: int, gid: int, + fsid: str, + mon_id: str, mon_addr: str +) -> Any: + logger.info('Creating initial monmap...') + monmap = write_tmp('', 0, 0) + out = CephContainer( + ctx, + image=ctx.image, + entrypoint='/usr/bin/monmaptool', + args=[ + '--create', + '--clobber', + '--fsid', fsid, + '--addv', mon_id, mon_addr, + '/tmp/monmap' + ], + volume_mounts={ + monmap.name: '/tmp/monmap:z', + }, + ).run() + logger.debug(f'monmaptool for {mon_id} {mon_addr} on {out}') + + # pass monmap file to ceph user for use by ceph-mon --mkfs below + os.fchown(monmap.fileno(), uid, gid) + return monmap + + +def prepare_create_mon( + ctx: CephadmContext, + uid: int, gid: int, + fsid: str, mon_id: str, + bootstrap_keyring_path: str, + monmap_path: str +) -> Tuple[str, str]: + logger.info('Creating mon...') + create_daemon_dirs(ctx, fsid, 'mon', mon_id, uid, gid) + mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', mon_id) + log_dir = get_log_dir(fsid, ctx.log_dir) + out = CephContainer( + ctx, + image=ctx.image, + entrypoint='/usr/bin/ceph-mon', + args=[ + '--mkfs', + '-i', mon_id, + '--fsid', fsid, + '-c', '/dev/null', + '--monmap', '/tmp/monmap', + '--keyring', '/tmp/keyring', + ] + get_daemon_args(ctx, fsid, 'mon', mon_id), + volume_mounts={ + log_dir: '/var/log/ceph:z', + mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id), + bootstrap_keyring_path: '/tmp/keyring:z', + monmap_path: '/tmp/monmap:z', + }, + ).run() + logger.debug(f'create mon.{mon_id} on {out}') + return (mon_dir, log_dir) + + +def create_mon( + ctx: CephadmContext, + uid: int, gid: int, + fsid: str, mon_id: str +) -> None: + mon_c = get_container(ctx, fsid, 'mon', mon_id) + ctx.meta_properties = {'service_name': 'mon'} + deploy_daemon(ctx, fsid, 'mon', mon_id, mon_c, uid, gid, + config=None, keyring=None) + + +def wait_for_mon( + ctx: CephadmContext, + mon_id: str, mon_dir: str, + admin_keyring_path: str, config_path: str +) -> None: + logger.info('Waiting for mon to start...') + c = CephContainer( + ctx, + image=ctx.image, + entrypoint='/usr/bin/ceph', + args=[ + 'status'], + volume_mounts={ + mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id), + admin_keyring_path: '/etc/ceph/ceph.client.admin.keyring:z', + config_path: '/etc/ceph/ceph.conf:z', + }, + ) + + # wait for the service to become available + def is_mon_available(): + # type: () -> bool + timeout = ctx.timeout if ctx.timeout else 60 # seconds + out, err, ret = call(ctx, c.run_cmd(), + desc=c.entrypoint, + timeout=timeout, + verbosity=CallVerbosity.QUIET_UNLESS_ERROR) + return ret == 0 + + is_available(ctx, 'mon', is_mon_available) + + +def create_mgr( + ctx: CephadmContext, + uid: int, gid: int, + fsid: str, mgr_id: str, mgr_key: str, + config: str, clifunc: Callable +) -> None: + logger.info('Creating mgr...') + mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key) + mgr_c = get_container(ctx, fsid, 'mgr', mgr_id) + # Note:the default port used by the Prometheus node exporter is opened in fw + ctx.meta_properties = {'service_name': 'mgr'} + endpoints = [EndPoint('0.0.0.0', 9283), EndPoint('0.0.0.0', 8765)] + if not ctx.skip_monitoring_stack: + endpoints.append(EndPoint('0.0.0.0', 8443)) + deploy_daemon(ctx, fsid, 'mgr', mgr_id, mgr_c, uid, gid, + config=config, keyring=mgr_keyring, endpoints=endpoints) + + # wait for the service to become available + logger.info('Waiting for mgr to start...') + + def is_mgr_available(): + # type: () -> bool + timeout = ctx.timeout if ctx.timeout else 60 # seconds + try: + out = clifunc(['status', '-f', 'json-pretty'], + timeout=timeout, + verbosity=CallVerbosity.QUIET_UNLESS_ERROR) + j = json.loads(out) + return j.get('mgrmap', {}).get('available', False) + except Exception as e: + logger.debug('status failed: %s' % e) + return False + + is_available(ctx, 'mgr', is_mgr_available) + + +def prepare_ssh( + ctx: CephadmContext, + cli: Callable, wait_for_mgr_restart: Callable +) -> None: + + cli(['cephadm', 'set-user', ctx.ssh_user]) + + if ctx.ssh_config: + logger.info('Using provided ssh config...') + mounts = { + pathify(ctx.ssh_config.name): '/tmp/cephadm-ssh-config:z', + } + cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts) + + if ctx.ssh_private_key and ctx.ssh_public_key: + logger.info('Using provided ssh keys...') + mounts = { + pathify(ctx.ssh_private_key.name): '/tmp/cephadm-ssh-key:z', + pathify(ctx.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z' + } + cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts) + cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts) + ssh_pub = cli(['cephadm', 'get-pub-key']) + authorize_ssh_key(ssh_pub, ctx.ssh_user) + elif ctx.ssh_private_key and ctx.ssh_signed_cert: + logger.info('Using provided ssh private key and signed cert ...') + mounts = { + pathify(ctx.ssh_private_key.name): '/tmp/cephadm-ssh-key:z', + pathify(ctx.ssh_signed_cert.name): '/tmp/cephadm-ssh-key-cert.pub:z' + } + cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts) + cli(['cephadm', 'set-signed-cert', '-i', '/tmp/cephadm-ssh-key-cert.pub'], extra_mounts=mounts) + else: + logger.info('Generating ssh key...') + cli(['cephadm', 'generate-key']) + ssh_pub = cli(['cephadm', 'get-pub-key']) + with open(ctx.output_pub_ssh_key, 'w') as f: + f.write(ssh_pub) + logger.info('Wrote public SSH key to %s' % ctx.output_pub_ssh_key) + authorize_ssh_key(ssh_pub, ctx.ssh_user) + + host = get_hostname() + logger.info('Adding host %s...' % host) + try: + args = ['orch', 'host', 'add', host] + if ctx.mon_ip: + args.append(unwrap_ipv6(ctx.mon_ip)) + elif ctx.mon_addrv: + addrv_args = parse_mon_addrv(ctx.mon_addrv) + args.append(unwrap_ipv6(addrv_args[0].ip)) + cli(args) + except RuntimeError as e: + raise Error('Failed to add host <%s>: %s' % (host, e)) + + for t in ['mon', 'mgr']: + if not ctx.orphan_initial_daemons: + logger.info('Deploying %s service with default placement...' % t) + cli(['orch', 'apply', t]) + else: + logger.info('Deploying unmanaged %s service...' % t) + cli(['orch', 'apply', t, '--unmanaged']) + + if not ctx.orphan_initial_daemons: + logger.info('Deploying crash service with default placement...') + cli(['orch', 'apply', 'crash']) + + if not ctx.skip_monitoring_stack: + for t in ['ceph-exporter', 'prometheus', 'grafana', 'node-exporter', 'alertmanager']: + logger.info('Deploying %s service with default placement...' % t) + try: + cli(['orch', 'apply', t]) + except RuntimeError: + ctx.error_code = -errno.EINVAL + logger.error(f'Failed to apply service type {t}. ' + 'Perhaps the ceph version being bootstrapped does not support it') + + if ctx.with_centralized_logging: + for t in ['loki', 'promtail']: + logger.info('Deploying %s service with default placement...' % t) + try: + cli(['orch', 'apply', t]) + except RuntimeError: + ctx.error_code = -errno.EINVAL + logger.error(f'Failed to apply service type {t}. ' + 'Perhaps the ceph version being bootstrapped does not support it') + + +def enable_cephadm_mgr_module( + cli: Callable, wait_for_mgr_restart: Callable +) -> None: + + logger.info('Enabling cephadm module...') + cli(['mgr', 'module', 'enable', 'cephadm']) + wait_for_mgr_restart() + logger.info('Setting orchestrator backend to cephadm...') + cli(['orch', 'set', 'backend', 'cephadm']) + + +def prepare_dashboard( + ctx: CephadmContext, + uid: int, gid: int, + cli: Callable, wait_for_mgr_restart: Callable +) -> None: + + # Configure SSL port (cephadm only allows to configure dashboard SSL port) + # if the user does not want to use SSL he can change this setting once the cluster is up + cli(['config', 'set', 'mgr', 'mgr/dashboard/ssl_server_port', str(ctx.ssl_dashboard_port)]) + + # configuring dashboard parameters + logger.info('Enabling the dashboard module...') + cli(['mgr', 'module', 'enable', 'dashboard']) + wait_for_mgr_restart() + + # dashboard crt and key + if ctx.dashboard_key and ctx.dashboard_crt: + logger.info('Using provided dashboard certificate...') + mounts = { + pathify(ctx.dashboard_crt.name): '/tmp/dashboard.crt:z', + pathify(ctx.dashboard_key.name): '/tmp/dashboard.key:z' + } + cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts) + cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts) + else: + logger.info('Generating a dashboard self-signed certificate...') + cli(['dashboard', 'create-self-signed-cert']) + + logger.info('Creating initial admin user...') + password = ctx.initial_dashboard_password or generate_password() + tmp_password_file = write_tmp(password, uid, gid) + cmd = ['dashboard', 'ac-user-create', ctx.initial_dashboard_user, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password'] + if not ctx.dashboard_password_noupdate: + cmd.append('--pwd-update-required') + cli(cmd, extra_mounts={pathify(tmp_password_file.name): '/tmp/dashboard.pw:z'}) + logger.info('Fetching dashboard port number...') + out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port']) + port = int(out) + + # Open dashboard port + if not ('skip_firewalld' in ctx and ctx.skip_firewalld): + fw = Firewalld(ctx) + fw.open_ports([port]) + fw.apply_rules() + + logger.info('Ceph Dashboard is now available at:\n\n' + '\t URL: https://%s:%s/\n' + '\t User: %s\n' + '\tPassword: %s\n' % ( + get_fqdn(), port, + ctx.initial_dashboard_user, + password)) + + +def prepare_bootstrap_config( + ctx: CephadmContext, + fsid: str, mon_addr: str, image: str + +) -> str: + + cp = read_config(ctx.config) + if not cp.has_section('global'): + cp.add_section('global') + cp.set('global', 'fsid', fsid) + cp.set('global', 'mon_host', mon_addr) + cp.set('global', 'container_image', image) + + if not cp.has_section('mon'): + cp.add_section('mon') + if ( + not cp.has_option('mon', 'auth_allow_insecure_global_id_reclaim') + and not cp.has_option('mon', 'auth allow insecure global id reclaim') + ): + cp.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false') + + if ctx.single_host_defaults: + logger.info('Adjusting default settings to suit single-host cluster...') + # replicate across osds, not hosts + if ( + not cp.has_option('global', 'osd_crush_chooseleaf_type') + and not cp.has_option('global', 'osd crush chooseleaf type') + ): + cp.set('global', 'osd_crush_chooseleaf_type', '0') + # replica 2x + if ( + not cp.has_option('global', 'osd_pool_default_size') + and not cp.has_option('global', 'osd pool default size') + ): + cp.set('global', 'osd_pool_default_size', '2') + # disable mgr standby modules (so we can colocate multiple mgrs on one host) + if not cp.has_section('mgr'): + cp.add_section('mgr') + if ( + not cp.has_option('mgr', 'mgr_standby_modules') + and not cp.has_option('mgr', 'mgr standby modules') + ): + cp.set('mgr', 'mgr_standby_modules', 'false') + if ctx.log_to_file: + cp.set('global', 'log_to_file', 'true') + cp.set('global', 'log_to_stderr', 'false') + cp.set('global', 'log_to_journald', 'false') + cp.set('global', 'mon_cluster_log_to_file', 'true') + cp.set('global', 'mon_cluster_log_to_stderr', 'false') + cp.set('global', 'mon_cluster_log_to_journald', 'false') + + cpf = StringIO() + cp.write(cpf) + config = cpf.getvalue() + + if ctx.registry_json or ctx.registry_url: + command_registry_login(ctx) + + return config + + +def finish_bootstrap_config( + ctx: CephadmContext, + fsid: str, + config: str, + mon_id: str, mon_dir: str, + mon_network: Optional[str], ipv6: bool, + cli: Callable, + cluster_network: Optional[str], ipv6_cluster_network: bool + +) -> None: + if not ctx.no_minimize_config: + logger.info('Assimilating anything we can from ceph.conf...') + cli([ + 'config', 'assimilate-conf', + '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id + ], { + mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id + }) + logger.info('Generating new minimal ceph.conf...') + cli([ + 'config', 'generate-minimal-conf', + '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id + ], { + mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id + }) + # re-read our minimized config + with open(mon_dir + '/config', 'r') as f: + config = f.read() + logger.info('Restarting the monitor...') + call_throws(ctx, [ + 'systemctl', + 'restart', + get_unit_name(fsid, 'mon', mon_id) + ]) + elif 'image' in ctx and ctx.image: + # we still want to assimilate the given container image if provided + cli(['config', 'set', 'global', 'container_image', f'{ctx.image}']) + + if mon_network: + cp = read_config(ctx.config) + cfg_section = 'global' if cp.has_option('global', 'public_network') else 'mon' + logger.info(f'Setting public_network to {mon_network} in {cfg_section} config section') + cli(['config', 'set', cfg_section, 'public_network', mon_network]) + + if cluster_network: + logger.info(f'Setting cluster_network to {cluster_network}') + cli(['config', 'set', 'global', 'cluster_network', cluster_network]) + + if ipv6 or ipv6_cluster_network: + logger.info('Enabling IPv6 (ms_bind_ipv6) binding') + cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true']) + + with open(ctx.output_config, 'w') as f: + f.write(config) + logger.info('Wrote config to %s' % ctx.output_config) + pass + + +def _extract_host_info_from_applied_spec(f: Iterable[str]) -> List[Dict[str, str]]: + # overall goal of this function is to go through an applied spec and find + # the hostname (and addr is provided) for each host spec in the applied spec. + # Generally, we should be able to just pass the spec to the mgr module where + # proper yaml parsing can happen, but for host specs in particular we want to + # be able to distribute ssh keys, which requires finding the hostname (and addr + # if possible) for each potential host spec in the applied spec. + + specs: List[List[str]] = [] + current_spec: List[str] = [] + for line in f: + if re.search(r'^---\s+', line): + if current_spec: + specs.append(current_spec) + current_spec = [] + else: + line = line.strip() + if line: + current_spec.append(line) + if current_spec: + specs.append(current_spec) + + host_specs: List[List[str]] = [] + for spec in specs: + for line in spec: + if 'service_type' in line: + try: + _, type = line.split(':') + type = type.strip() + if type == 'host': + host_specs.append(spec) + except ValueError as e: + spec_str = '\n'.join(spec) + logger.error(f'Failed to pull service_type from spec:\n{spec_str}. Got error: {e}') + break + spec_str = '\n'.join(spec) + logger.error(f'Failed to find service_type within spec:\n{spec_str}') + + host_dicts = [] + for s in host_specs: + host_dict = _extract_host_info_from_spec(s) + # if host_dict is empty here, we failed to pull the hostname + # for the host from the spec. This should have already been logged + # so at this point we just don't want to include it in our output + if host_dict: + host_dicts.append(host_dict) + + return host_dicts + + +def _extract_host_info_from_spec(host_spec: List[str]) -> Dict[str, str]: + # note:for our purposes here, we only really want the hostname + # and address of the host from each of these specs in order to + # be able to distribute ssh keys. We will later apply the spec + # through the mgr module where proper yaml parsing can be done + # The returned dicts from this function should only contain + # one or two entries, one (required) for hostname, one (optional) for addr + # { + # hostname: <hostname> + # addr: <ip-addr> + # } + # if we fail to find the hostname, an empty dict is returned + + host_dict = {} # type: Dict[str, str] + for line in host_spec: + for field in ['hostname', 'addr']: + if field in line: + try: + _, field_value = line.split(':') + field_value = field_value.strip() + host_dict[field] = field_value + except ValueError as e: + spec_str = '\n'.join(host_spec) + logger.error(f'Error trying to pull {field} from host spec:\n{spec_str}. Got error: {e}') + + if 'hostname' not in host_dict: + spec_str = '\n'.join(host_spec) + logger.error(f'Could not find hostname in host spec:\n{spec_str}') + return {} + return host_dict + + +def _distribute_ssh_keys(ctx: CephadmContext, host_info: Dict[str, str], bootstrap_hostname: str) -> int: + # copy ssh key to hosts in host spec (used for apply spec) + ssh_key = CEPH_DEFAULT_PUBKEY + if ctx.ssh_public_key: + ssh_key = ctx.ssh_public_key.name + + if bootstrap_hostname != host_info['hostname']: + if 'addr' in host_info: + addr = host_info['addr'] + else: + addr = host_info['hostname'] + out, err, code = call(ctx, ['sudo', '-u', ctx.ssh_user, 'ssh-copy-id', '-f', '-i', ssh_key, '-o StrictHostKeyChecking=no', '%s@%s' % (ctx.ssh_user, addr)]) + if code: + logger.error('\nCopying ssh key to host %s at address %s failed!\n' % (host_info['hostname'], addr)) + return 1 + else: + logger.info('Added ssh key to host %s at address %s' % (host_info['hostname'], addr)) + return 0 + + +def save_cluster_config(ctx: CephadmContext, uid: int, gid: int, fsid: str) -> None: + """Save cluster configuration to the per fsid directory """ + def copy_file(src: str, dst: str) -> None: + if src: + shutil.copyfile(src, dst) + + conf_dir = f'{ctx.data_dir}/{fsid}/{CEPH_CONF_DIR}' + makedirs(conf_dir, uid, gid, DATA_DIR_MODE) + if os.path.exists(conf_dir): + logger.info(f'Saving cluster configuration to {conf_dir} directory') + copy_file(ctx.output_config, os.path.join(conf_dir, CEPH_CONF)) + copy_file(ctx.output_keyring, os.path.join(conf_dir, CEPH_KEYRING)) + # ctx.output_pub_ssh_key may not exist if user has provided custom ssh keys + if (os.path.exists(ctx.output_pub_ssh_key)): + copy_file(ctx.output_pub_ssh_key, os.path.join(conf_dir, CEPH_PUBKEY)) + else: + logger.warning(f'Cannot create cluster configuration directory {conf_dir}') + + +def rollback(func: FuncT) -> FuncT: + """ + """ + @wraps(func) + def _rollback(ctx: CephadmContext) -> Any: + try: + return func(ctx) + except ClusterAlreadyExists: + # another cluster with the provided fsid already exists: don't remove. + raise + except (KeyboardInterrupt, Exception) as e: + logger.error(f'{type(e).__name__}: {e}') + if ctx.cleanup_on_failure: + logger.info('\n\n' + '\t***************\n' + '\tCephadm hit an issue during cluster installation. Current cluster files will be deleted automatically,\n' + '\tto disable this behaviour do not pass the --cleanup-on-failure flag. In case of any previous\n' + '\tbroken installation user must use the following command to completely delete the broken cluster:\n\n' + '\t> cephadm rm-cluster --force --zap-osds --fsid <fsid>\n\n' + '\tfor more information please refer to https://docs.ceph.com/en/latest/cephadm/operations/#purging-a-cluster\n' + '\t***************\n\n') + _rm_cluster(ctx, keep_logs=False, zap_osds=False) + else: + logger.info('\n\n' + '\t***************\n' + '\tCephadm hit an issue during cluster installation. Current cluster files will NOT BE DELETED automatically to change\n' + '\tthis behaviour you can pass the --cleanup-on-failure. To remove this broken cluster manually please run:\n\n' + f'\t > cephadm rm-cluster --force --fsid {ctx.fsid}\n\n' + '\tin case of any previous broken installation user must use the rm-cluster command to delete the broken cluster:\n\n' + '\t > cephadm rm-cluster --force --zap-osds --fsid <fsid>\n\n' + '\tfor more information please refer to https://docs.ceph.com/en/latest/cephadm/operations/#purging-a-cluster\n' + '\t***************\n\n') + raise + return cast(FuncT, _rollback) + + +@rollback +@default_image +def command_bootstrap(ctx): + # type: (CephadmContext) -> int + + ctx.error_code = 0 + + if not ctx.output_config: + ctx.output_config = os.path.join(ctx.output_dir, CEPH_CONF) + if not ctx.output_keyring: + ctx.output_keyring = os.path.join(ctx.output_dir, CEPH_KEYRING) + if not ctx.output_pub_ssh_key: + ctx.output_pub_ssh_key = os.path.join(ctx.output_dir, CEPH_PUBKEY) + + if ( + (bool(ctx.ssh_private_key) is not bool(ctx.ssh_public_key)) + and (bool(ctx.ssh_private_key) is not bool(ctx.ssh_signed_cert)) + ): + raise Error('--ssh-private-key must be passed with either --ssh-public-key in the case of standard pubkey ' + 'authentication or with --ssh-signed-cert in the case of CA signed signed keys or not provided at all.') + + if (bool(ctx.ssh_public_key) and bool(ctx.ssh_signed_cert)): + raise Error('--ssh-public-key and --ssh-signed-cert are mututally exclusive. --ssh-public-key is intended ' + 'for standard pubkey encryption where the public key is set as an authorized key on cluster hosts. ' + '--ssh-signed-cert is intended for the CA signed keys use case where cluster hosts are configured to trust ' + 'a CA pub key and authentication during SSH is done by authenticating the signed cert, requiring no ' + 'public key to be installed on the cluster hosts.') + + if ctx.fsid: + data_dir_base = os.path.join(ctx.data_dir, ctx.fsid) + if os.path.exists(data_dir_base): + raise ClusterAlreadyExists(f"A cluster with the same fsid '{ctx.fsid}' already exists.") + else: + logger.warning('Specifying an fsid for your cluster offers no advantages and may increase the likelihood of fsid conflicts.') + + # initial vars + ctx.fsid = ctx.fsid or make_fsid() + fsid = ctx.fsid + if not is_fsid(fsid): + raise Error('not an fsid: %s' % fsid) + + # verify output files + for f in [ctx.output_config, ctx.output_keyring, ctx.output_pub_ssh_key]: + if not ctx.allow_overwrite: + if os.path.exists(f): + raise ClusterAlreadyExists('%s already exists; delete or pass --allow-overwrite to overwrite' % f) + dirname = os.path.dirname(f) + if dirname and not os.path.exists(dirname): + fname = os.path.basename(f) + logger.info(f'Creating directory {dirname} for {fname}') + try: + # use makedirs to create intermediate missing dirs + os.makedirs(dirname, 0o755) + except PermissionError: + raise Error(f'Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.') + + (user_conf, _) = get_config_and_keyring(ctx) + + if ctx.ssh_user != 'root': + check_ssh_connectivity(ctx) + + if not ctx.skip_prepare_host: + command_prepare_host(ctx) + else: + logger.info('Skip prepare_host') + + logger.info('Cluster fsid: %s' % fsid) + hostname = get_hostname() + if '.' in hostname and not ctx.allow_fqdn_hostname: + raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0])) + mon_id = ctx.mon_id or get_short_hostname() + mgr_id = ctx.mgr_id or generate_service_id() + + lock = FileLock(ctx, fsid) + lock.acquire() + + (addr_arg, ipv6, mon_network) = prepare_mon_addresses(ctx) + cluster_network, ipv6_cluster_network = prepare_cluster_network(ctx) + + config = prepare_bootstrap_config(ctx, fsid, addr_arg, ctx.image) + + if not ctx.skip_pull: + try: + _pull_image(ctx, ctx.image) + except UnauthorizedRegistryError: + err_str = 'Failed to pull container image. Check that correct registry credentials are provided in bootstrap by --registry-url, --registry-username, --registry-password, or supply --registry-json with credentials' + logger.debug(f'Pulling image for bootstrap on {hostname} failed: {err_str}') + raise Error(err_str) + + image_ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip() + logger.info(f'Ceph version: {image_ver}') + + if not ctx.allow_mismatched_release: + image_release = image_ver.split()[4] + if image_release not in \ + [DEFAULT_IMAGE_RELEASE, LATEST_STABLE_RELEASE]: + raise Error( + f'Container release {image_release} != cephadm release {DEFAULT_IMAGE_RELEASE};' + ' please use matching version of cephadm (pass --allow-mismatched-release to continue anyway)' + ) + + logger.info('Extracting ceph user uid/gid from container image...') + (uid, gid) = extract_uid_gid(ctx) + + # create some initial keys + (mon_key, mgr_key, admin_key, bootstrap_keyring, admin_keyring) = create_initial_keys(ctx, uid, gid, mgr_id) + + monmap = create_initial_monmap(ctx, uid, gid, fsid, mon_id, addr_arg) + (mon_dir, log_dir) = prepare_create_mon(ctx, uid, gid, fsid, mon_id, + bootstrap_keyring.name, monmap.name) + + with write_new(mon_dir + '/config', owner=(uid, gid)) as f: + f.write(config) + + make_var_run(ctx, fsid, uid, gid) + create_mon(ctx, uid, gid, fsid, mon_id) + + # config to issue various CLI commands + tmp_config = write_tmp(config, uid, gid) + + # a CLI helper to reduce our typing + def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT, verbosity=CallVerbosity.VERBOSE_ON_FAILURE): + # type: (List[str], Dict[str, str], Optional[int], CallVerbosity) -> str + mounts = { + log_dir: '/var/log/ceph:z', + admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z', + tmp_config.name: '/etc/ceph/ceph.conf:z', + } + for k, v in extra_mounts.items(): + mounts[k] = v + timeout = timeout or ctx.timeout + return CephContainer( + ctx, + image=ctx.image, + entrypoint='/usr/bin/ceph', + args=cmd, + volume_mounts=mounts, + ).run(timeout=timeout, verbosity=verbosity) + + wait_for_mon(ctx, mon_id, mon_dir, admin_keyring.name, tmp_config.name) + + finish_bootstrap_config(ctx, fsid, config, mon_id, mon_dir, + mon_network, ipv6, cli, + cluster_network, ipv6_cluster_network) + + # output files + with write_new(ctx.output_keyring) as f: + f.write('[client.admin]\n' + '\tkey = ' + admin_key + '\n') + logger.info('Wrote keyring to %s' % ctx.output_keyring) + + # create mgr + create_mgr(ctx, uid, gid, fsid, mgr_id, mgr_key, config, cli) + + if user_conf: + # user given config settings were already assimilated earlier + # but if the given settings contained any attributes in + # the mgr (e.g. mgr/cephadm/container_image_prometheus) + # they don't seem to be stored if there isn't a mgr yet. + # Since re-assimilating the same conf settings should be + # idempotent we can just do it again here. + with tempfile.NamedTemporaryFile(buffering=0) as tmp: + tmp.write(user_conf.encode('utf-8')) + cli(['config', 'assimilate-conf', + '-i', '/var/lib/ceph/user.conf'], + {tmp.name: '/var/lib/ceph/user.conf:z'}) + + # wait for mgr to restart (after enabling a module) + def wait_for_mgr_restart() -> None: + # first get latest mgrmap epoch from the mon. try newer 'mgr + # stat' command first, then fall back to 'mgr dump' if + # necessary + try: + j = json_loads_retry(lambda: cli(['mgr', 'stat'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)) + except Exception: + j = json_loads_retry(lambda: cli(['mgr', 'dump'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)) + epoch = j['epoch'] + + # wait for mgr to have it + logger.info('Waiting for the mgr to restart...') + + def mgr_has_latest_epoch(): + # type: () -> bool + try: + out = cli(['tell', 'mgr', 'mgr_status']) + j = json.loads(out) + return j['mgrmap_epoch'] >= epoch + except Exception as e: + logger.debug('tell mgr mgr_status failed: %s' % e) + return False + is_available(ctx, 'mgr epoch %d' % epoch, mgr_has_latest_epoch) + + enable_cephadm_mgr_module(cli, wait_for_mgr_restart) + + # ssh + if not ctx.skip_ssh: + prepare_ssh(ctx, cli, wait_for_mgr_restart) + + if ctx.registry_url and ctx.registry_username and ctx.registry_password: + registry_credentials = {'url': ctx.registry_url, 'username': ctx.registry_username, 'password': ctx.registry_password} + cli(['config-key', 'set', 'mgr/cephadm/registry_credentials', json.dumps(registry_credentials)]) + + cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(ctx.container_init), '--force']) + + if not ctx.skip_dashboard: + prepare_dashboard(ctx, uid, gid, cli, wait_for_mgr_restart) + + if ctx.output_config == CEPH_DEFAULT_CONF and not ctx.skip_admin_label and not ctx.no_minimize_config: + logger.info('Enabling client.admin keyring and conf on hosts with "admin" label') + try: + cli(['orch', 'client-keyring', 'set', 'client.admin', 'label:_admin']) + cli(['orch', 'host', 'label', 'add', get_hostname(), '_admin']) + except Exception: + logger.info('Unable to set up "admin" label; assuming older version of Ceph') + + if ctx.apply_spec: + logger.info('Applying %s to cluster' % ctx.apply_spec) + # copy ssh key to hosts in spec file + with open(ctx.apply_spec) as f: + host_dicts = _extract_host_info_from_applied_spec(f) + for h in host_dicts: + if ctx.ssh_signed_cert: + logger.info('Key distribution is not supported for signed CA key setups. Skipping ...') + else: + _distribute_ssh_keys(ctx, h, hostname) + + mounts = {} + mounts[pathify(ctx.apply_spec)] = '/tmp/spec.yml:ro' + try: + out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts) + logger.info(out) + except Exception: + ctx.error_code = -errno.EINVAL + logger.info('\nApplying %s to cluster failed!\n' % ctx.apply_spec) + + save_cluster_config(ctx, uid, gid, fsid) + + # enable autotune for osd_memory_target + logger.info('Enabling autotune for osd_memory_target') + cli(['config', 'set', 'osd', 'osd_memory_target_autotune', 'true']) + + # Notify the Dashboard to show the 'Expand cluster' page on first log in. + cli(['config-key', 'set', 'mgr/dashboard/cluster/status', 'INSTALLED']) + + logger.info('You can access the Ceph CLI as following in case of multi-cluster or non-default config:\n\n' + '\tsudo %s shell --fsid %s -c %s -k %s\n' % ( + sys.argv[0], + fsid, + ctx.output_config, + ctx.output_keyring)) + + logger.info('Or, if you are only running a single cluster on this host:\n\n\tsudo %s shell \n' % (sys.argv[0])) + + logger.info('Please consider enabling telemetry to help improve Ceph:\n\n' + '\tceph telemetry on\n\n' + 'For more information see:\n\n' + '\thttps://docs.ceph.com/en/latest/mgr/telemetry/\n') + logger.info('Bootstrap complete.') + return ctx.error_code + +################################## + + +def command_registry_login(ctx: CephadmContext) -> int: + if ctx.registry_json: + logger.info('Pulling custom registry login info from %s.' % ctx.registry_json) + d = get_parm(ctx.registry_json) + if d.get('url') and d.get('username') and d.get('password'): + ctx.registry_url = d.get('url') + ctx.registry_username = d.get('username') + ctx.registry_password = d.get('password') + registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password) + else: + raise Error('json provided for custom registry login did not include all necessary fields. ' + 'Please setup json file as\n' + '{\n' + ' "url": "REGISTRY_URL",\n' + ' "username": "REGISTRY_USERNAME",\n' + ' "password": "REGISTRY_PASSWORD"\n' + '}\n') + elif ctx.registry_url and ctx.registry_username and ctx.registry_password: + registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password) + else: + raise Error('Invalid custom registry arguments received. To login to a custom registry include ' + '--registry-url, --registry-username and --registry-password ' + 'options or --registry-json option') + return 0 + + +def registry_login(ctx: CephadmContext, url: Optional[str], username: Optional[str], password: Optional[str]) -> None: + logger.info('Logging into custom registry.') + try: + engine = ctx.container_engine + cmd = [engine.path, 'login', + '-u', username, '-p', password, + url] + if isinstance(engine, Podman): + cmd.append('--authfile=/etc/ceph/podman-auth.json') + out, _, _ = call_throws(ctx, cmd) + if isinstance(engine, Podman): + os.chmod('/etc/ceph/podman-auth.json', DEFAULT_MODE) + except Exception: + raise Error('Failed to login to custom registry @ %s as %s with given password' % (ctx.registry_url, ctx.registry_username)) + +################################## + + +def extract_uid_gid_monitoring(ctx, daemon_type): + # type: (CephadmContext, str) -> Tuple[int, int] + + if daemon_type == 'prometheus': + uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus') + elif daemon_type == 'node-exporter': + uid, gid = 65534, 65534 + elif daemon_type == 'grafana': + uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana') + elif daemon_type == 'loki': + uid, gid = extract_uid_gid(ctx, file_path='/etc/loki') + elif daemon_type == 'promtail': + uid, gid = extract_uid_gid(ctx, file_path='/etc/promtail') + elif daemon_type == 'alertmanager': + uid, gid = extract_uid_gid(ctx, file_path=['/etc/alertmanager', '/etc/prometheus']) + else: + raise Error('{} not implemented yet'.format(daemon_type)) + return uid, gid + + +def get_deployment_container(ctx: CephadmContext, + fsid: str, daemon_type: str, daemon_id: Union[int, str], + privileged: bool = False, + ptrace: bool = False, + container_args: Optional[List[str]] = None) -> 'CephContainer': + # wrapper for get_container specifically for containers made during the `cephadm deploy` + # command. Adds some extra things such as extra container args and custom config files + c = get_container(ctx, fsid, daemon_type, daemon_id, privileged, ptrace, container_args) + if 'extra_container_args' in ctx and ctx.extra_container_args: + c.container_args.extend(ctx.extra_container_args) + if 'extra_entrypoint_args' in ctx and ctx.extra_entrypoint_args: + c.args.extend(ctx.extra_entrypoint_args) + ccfiles = fetch_custom_config_files(ctx) + if ccfiles: + mandatory_keys = ['mount_path', 'content'] + for conf in ccfiles: + if all(k in conf for k in mandatory_keys): + mount_path = conf['mount_path'] + file_path = os.path.join( + ctx.data_dir, + fsid, + 'custom_config_files', + f'{daemon_type}.{daemon_id}', + os.path.basename(mount_path) + ) + c.volume_mounts[file_path] = mount_path + return c + + +def get_deployment_type(ctx: CephadmContext, daemon_type: str, daemon_id: str) -> DeploymentType: + deployment_type: DeploymentType = DeploymentType.DEFAULT + if ctx.reconfig: + deployment_type = DeploymentType.RECONFIG + unit_name = get_unit_name(ctx.fsid, daemon_type, daemon_id) + (_, state, _) = check_unit(ctx, unit_name) + if state == 'running' or is_container_running(ctx, CephContainer.for_daemon(ctx, ctx.fsid, daemon_type, daemon_id, 'bash')): + # if reconfig was set, that takes priority over redeploy. If + # this is considered a fresh deployment at this stage, + # mark it as a redeploy to avoid port checking + if deployment_type == DeploymentType.DEFAULT: + deployment_type = DeploymentType.REDEPLOY + + logger.info(f'{deployment_type.value} daemon {ctx.name} ...') + + return deployment_type + + +@default_image +@deprecated_command +def command_deploy(ctx): + # type: (CephadmContext) -> None + _common_deploy(ctx) + + +def read_configuration_source(ctx: CephadmContext) -> Dict[str, Any]: + """Read a JSON configuration based on the `ctx.source` value.""" + source = '-' + if 'source' in ctx and ctx.source: + source = ctx.source + if source == '-': + config_data = json.load(sys.stdin) + else: + with open(source, 'rb') as fh: + config_data = json.load(fh) + logger.debug('Loaded deploy configuration: %r', config_data) + return config_data + + +def apply_deploy_config_to_ctx( + config_data: Dict[str, Any], + ctx: CephadmContext, +) -> None: + """Bind properties taken from the config_data dictionary to our ctx, + similar to how cli options on `deploy` are bound to the context. + """ + ctx.name = config_data['name'] + image = config_data.get('image', '') + if image: + ctx.image = image + if 'fsid' in config_data: + ctx.fsid = config_data['fsid'] + if 'meta' in config_data: + ctx.meta_properties = config_data['meta'] + if 'config_blobs' in config_data: + ctx.config_blobs = config_data['config_blobs'] + + # many functions don't check that an attribute is set on the ctx + # (with getattr or the '__contains__' func on ctx). + # This reuses the defaults from the CLI options so we don't + # have to repeat things and they can stay in sync. + facade = ArgumentFacade() + _add_deploy_parser_args(facade) + facade.apply(ctx) + for key, value in config_data.get('params', {}).items(): + if key not in facade.defaults: + logger.warning('unexpected parameter: %r=%r', key, value) + setattr(ctx, key, value) + update_default_image(ctx) + logger.debug('Determined image: %r', ctx.image) + + +def command_deploy_from(ctx: CephadmContext) -> None: + """The deploy-from command is similar to deploy but sources nearly all + configuration parameters from an input JSON configuration file. + """ + config_data = read_configuration_source(ctx) + apply_deploy_config_to_ctx(config_data, ctx) + _common_deploy(ctx) + + +def _common_deploy(ctx: CephadmContext) -> None: + daemon_type, daemon_id = ctx.name.split('.', 1) + if daemon_type not in get_supported_daemons(): + raise Error('daemon type %s not recognized' % daemon_type) + + lock = FileLock(ctx, ctx.fsid) + lock.acquire() + + deployment_type = get_deployment_type(ctx, daemon_type, daemon_id) + + # Migrate sysctl conf files from /usr/lib to /etc + migrate_sysctl_dir(ctx, ctx.fsid) + + # Get and check ports explicitly required to be opened + endpoints = fetch_tcp_ports(ctx) + _dispatch_deploy(ctx, daemon_type, daemon_id, endpoints, deployment_type) + + +def _dispatch_deploy( + ctx: CephadmContext, + daemon_type: str, + daemon_id: str, + daemon_endpoints: List[EndPoint], + deployment_type: DeploymentType, +) -> None: + if daemon_type in Ceph.daemons: + config, keyring = get_config_and_keyring(ctx) + uid, gid = extract_uid_gid(ctx) + make_var_run(ctx, ctx.fsid, uid, gid) + + config_json = fetch_configs(ctx) + + c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id, + ptrace=ctx.allow_ptrace) + + if daemon_type == 'mon' and config_json is not None: + if 'crush_location' in config_json: + c_loc = config_json['crush_location'] + # was originally "c.args.extend(['--set-crush-location', c_loc])" + # but that doesn't seem to persist in the object after it's passed + # in further function calls + c.args = c.args + ['--set-crush-location', c_loc] + + deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, + config=config, keyring=keyring, + osd_fsid=ctx.osd_fsid, + deployment_type=deployment_type, + endpoints=daemon_endpoints) + + elif daemon_type in Monitoring.components: + # monitoring daemon - prometheus, grafana, alertmanager, node-exporter + # Default Checks + # make sure provided config-json is sufficient + config = fetch_configs(ctx) # type: ignore + required_files = Monitoring.components[daemon_type].get('config-json-files', list()) + required_args = Monitoring.components[daemon_type].get('config-json-args', list()) + if required_files: + if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore + raise Error('{} deployment requires config-json which must ' + 'contain file content for {}'.format(daemon_type.capitalize(), ', '.join(required_files))) + if required_args: + if not config or not all(c in config.keys() for c in required_args): # type: ignore + raise Error('{} deployment requires config-json which must ' + 'contain arg for {}'.format(daemon_type.capitalize(), ', '.join(required_args))) + + uid, gid = extract_uid_gid_monitoring(ctx, daemon_type) + c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id) + deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, + deployment_type=deployment_type, + endpoints=daemon_endpoints) + + elif daemon_type == NFSGanesha.daemon_type: + # only check ports if this is a fresh deployment + if deployment_type == DeploymentType.DEFAULT and not daemon_endpoints: + nfs_ports = list(NFSGanesha.port_map.values()) + daemon_endpoints = [EndPoint('0.0.0.0', p) for p in nfs_ports] + + config, keyring = get_config_and_keyring(ctx) + # TODO: extract ganesha uid/gid (997, 994) ? + uid, gid = extract_uid_gid(ctx) + c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id) + deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, + config=config, keyring=keyring, + deployment_type=deployment_type, + endpoints=daemon_endpoints) + + elif daemon_type == CephIscsi.daemon_type: + config, keyring = get_config_and_keyring(ctx) + uid, gid = extract_uid_gid(ctx) + c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id) + deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, + config=config, keyring=keyring, + deployment_type=deployment_type, + endpoints=daemon_endpoints) + elif daemon_type == CephNvmeof.daemon_type: + config, keyring = get_config_and_keyring(ctx) + uid, gid = 167, 167 # TODO: need to get properly the uid/gid + c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id) + deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, + config=config, keyring=keyring, + deployment_type=deployment_type, + endpoints=daemon_endpoints) + elif daemon_type in Tracing.components: + uid, gid = 65534, 65534 + c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) + deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, + deployment_type=deployment_type, + endpoints=daemon_endpoints) + elif daemon_type == HAproxy.daemon_type: + haproxy = HAproxy.init(ctx, ctx.fsid, daemon_id) + uid, gid = haproxy.extract_uid_gid_haproxy() + c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id) + deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, + deployment_type=deployment_type, + endpoints=daemon_endpoints) + + elif daemon_type == Keepalived.daemon_type: + keepalived = Keepalived.init(ctx, ctx.fsid, daemon_id) + uid, gid = keepalived.extract_uid_gid_keepalived() + c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id) + deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, + deployment_type=deployment_type, + endpoints=daemon_endpoints) + + elif daemon_type == CustomContainer.daemon_type: + cc = CustomContainer.init(ctx, ctx.fsid, daemon_id) + # only check ports if this is a fresh deployment + if deployment_type == DeploymentType.DEFAULT: + daemon_endpoints.extend([EndPoint('0.0.0.0', p) for p in cc.ports]) + c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id, + privileged=cc.privileged, + ptrace=ctx.allow_ptrace) + deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, + uid=cc.uid, gid=cc.gid, config=None, + keyring=None, + deployment_type=deployment_type, + endpoints=daemon_endpoints) + + elif daemon_type == CephadmAgent.daemon_type: + # get current user gid and uid + uid = os.getuid() + gid = os.getgid() + deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, None, + uid, gid, + deployment_type=deployment_type, + endpoints=daemon_endpoints) + + elif daemon_type == SNMPGateway.daemon_type: + sc = SNMPGateway.init(ctx, ctx.fsid, daemon_id) + c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id) + deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, + sc.uid, sc.gid, + deployment_type=deployment_type, + endpoints=daemon_endpoints) + + else: + raise Error('daemon type {} not implemented in command_deploy function' + .format(daemon_type)) + +################################## + + +@infer_image +def command_run(ctx): + # type: (CephadmContext) -> int + (daemon_type, daemon_id) = ctx.name.split('.', 1) + c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) + command = c.run_cmd() + return call_timeout(ctx, command, ctx.timeout) + +################################## + + +@infer_fsid +@infer_config +@infer_image +@validate_fsid +def command_shell(ctx): + # type: (CephadmContext) -> int + cp = read_config(ctx.config) + if cp.has_option('global', 'fsid') and \ + cp.get('global', 'fsid') != ctx.fsid: + raise Error('fsid does not match ceph.conf') + + if ctx.name: + if '.' in ctx.name: + (daemon_type, daemon_id) = ctx.name.split('.', 1) + else: + daemon_type = ctx.name + daemon_id = None + else: + daemon_type = 'osd' # get the most mounts + daemon_id = None + + if ctx.fsid and daemon_type in Ceph.daemons: + make_log_dir(ctx, ctx.fsid) + + if daemon_id and not ctx.fsid: + raise Error('must pass --fsid to specify cluster') + + # in case a dedicated keyring for the specified fsid is found we us it. + # Otherwise, use /etc/ceph files by default, if present. We do this instead of + # making these defaults in the arg parser because we don't want an error + # if they don't exist. + if not ctx.keyring: + keyring_file = f'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_KEYRING}' + if os.path.exists(keyring_file): + ctx.keyring = keyring_file + elif os.path.exists(CEPH_DEFAULT_KEYRING): + ctx.keyring = CEPH_DEFAULT_KEYRING + + container_args: List[str] = ['-i'] + mounts = get_container_mounts(ctx, ctx.fsid, daemon_type, daemon_id, + no_config=True if ctx.config else False) + binds = get_container_binds(ctx, ctx.fsid, daemon_type, daemon_id) + if ctx.config: + mounts[pathify(ctx.config)] = '/etc/ceph/ceph.conf:z' + if ctx.keyring: + mounts[pathify(ctx.keyring)] = '/etc/ceph/ceph.keyring:z' + if ctx.mount: + for _mount in ctx.mount: + split_src_dst = _mount.split(':') + mount = pathify(split_src_dst[0]) + filename = os.path.basename(split_src_dst[0]) + if len(split_src_dst) > 1: + dst = split_src_dst[1] + if len(split_src_dst) == 3: + dst = '{}:{}'.format(dst, split_src_dst[2]) + mounts[mount] = dst + else: + mounts[mount] = '/mnt/{}'.format(filename) + if ctx.command: + command = ctx.command + else: + command = ['bash'] + container_args += [ + '-t', + '-e', 'LANG=C', + '-e', 'PS1=%s' % CUSTOM_PS1, + ] + if ctx.fsid: + home = os.path.join(ctx.data_dir, ctx.fsid, 'home') + if not os.path.exists(home): + logger.debug('Creating root home at %s' % home) + makedirs(home, 0, 0, 0o660) + if os.path.exists('/etc/skel'): + for f in os.listdir('/etc/skel'): + if f.startswith('.bash'): + shutil.copyfile(os.path.join('/etc/skel', f), + os.path.join(home, f)) + mounts[home] = '/root' + + for i in ctx.volume: + a, b = i.split(':', 1) + mounts[a] = b + + c = CephContainer( + ctx, + image=ctx.image, + entrypoint='doesnotmatter', + args=[], + container_args=container_args, + volume_mounts=mounts, + bind_mounts=binds, + envs=ctx.env, + privileged=True) + command = c.shell_cmd(command) + + if ctx.dry_run: + print(' '.join(shlex.quote(arg) for arg in command)) + return 0 + + return call_timeout(ctx, command, ctx.timeout) + +################################## + + +@infer_fsid +def command_enter(ctx): + # type: (CephadmContext) -> int + if not ctx.fsid: + raise Error('must pass --fsid to specify cluster') + (daemon_type, daemon_id) = ctx.name.split('.', 1) + container_args = ['-i'] # type: List[str] + if ctx.command: + command = ctx.command + else: + command = ['sh'] + container_args += [ + '-t', + '-e', 'LANG=C', + '-e', 'PS1=%s' % CUSTOM_PS1, + ] + c = CephContainer( + ctx, + image=ctx.image, + entrypoint='doesnotmatter', + container_args=container_args, + cname='ceph-%s-%s.%s' % (ctx.fsid, daemon_type, daemon_id), + ) + command = c.exec_cmd(command) + return call_timeout(ctx, command, ctx.timeout) + +################################## + + +@infer_fsid +@infer_image +@validate_fsid +def command_ceph_volume(ctx): + # type: (CephadmContext) -> None + cp = read_config(ctx.config) + if cp.has_option('global', 'fsid') and \ + cp.get('global', 'fsid') != ctx.fsid: + raise Error('fsid does not match ceph.conf') + + if ctx.fsid: + make_log_dir(ctx, ctx.fsid) + + lock = FileLock(ctx, ctx.fsid) + lock.acquire() + + (uid, gid) = (0, 0) # ceph-volume runs as root + mounts = get_container_mounts(ctx, ctx.fsid, 'osd', None) + + tmp_config = None + tmp_keyring = None + + (config, keyring) = get_config_and_keyring(ctx) + + if config: + # tmp config file + tmp_config = write_tmp(config, uid, gid) + mounts[tmp_config.name] = '/etc/ceph/ceph.conf:z' + + if keyring: + # tmp keyring file + tmp_keyring = write_tmp(keyring, uid, gid) + mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z' + + c = get_ceph_volume_container( + ctx, + envs=ctx.env, + args=ctx.command, + volume_mounts=mounts, + ) + + out, err, code = call_throws(ctx, c.run_cmd(), verbosity=CallVerbosity.QUIET_UNLESS_ERROR) + if not code: + print(out) + +################################## + + +@infer_fsid +def command_unit(ctx): + # type: (CephadmContext) -> int + if not ctx.fsid: + raise Error('must pass --fsid to specify cluster') + + unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name) + + _, _, code = call( + ctx, + ['systemctl', ctx.command, unit_name], + verbosity=CallVerbosity.VERBOSE, + desc='' + ) + return code + +################################## + + +@infer_fsid +def command_logs(ctx): + # type: (CephadmContext) -> None + if not ctx.fsid: + raise Error('must pass --fsid to specify cluster') + + unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name) + + cmd = [find_program('journalctl')] + cmd.extend(['-u', unit_name]) + if ctx.command: + cmd.extend(ctx.command) + + # call this directly, without our wrapper, so that we get an unmolested + # stdout with logger prefixing. + logger.debug('Running command: %s' % ' '.join(cmd)) + subprocess.call(cmd, env=os.environ.copy()) # type: ignore + +################################## + + +def list_networks(ctx): + # type: (CephadmContext) -> Dict[str,Dict[str, Set[str]]] + + # sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag, + # so we'll need to use a regex to parse 'ip' command output. + # + # out, _, _ = call_throws(['ip', '-j', 'route', 'ls']) + # j = json.loads(out) + # for x in j: + res = _list_ipv4_networks(ctx) + res.update(_list_ipv6_networks(ctx)) + return res + + +def _list_ipv4_networks(ctx: CephadmContext) -> Dict[str, Dict[str, Set[str]]]: + execstr: Optional[str] = find_executable('ip') + if not execstr: + raise FileNotFoundError("unable to find 'ip' command") + out, _, _ = call_throws(ctx, [execstr, 'route', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR) + return _parse_ipv4_route(out) + + +def _parse_ipv4_route(out: str) -> Dict[str, Dict[str, Set[str]]]: + r = {} # type: Dict[str, Dict[str, Set[str]]] + p = re.compile(r'^(\S+) (?:via \S+)? ?dev (\S+) (.*)scope link (.*)src (\S+)') + for line in out.splitlines(): + m = p.findall(line) + if not m: + continue + net = m[0][0] + if '/' not in net: # aggregate /32 mask for single host sub-networks + net += '/32' + iface = m[0][1] + ip = m[0][4] + if net not in r: + r[net] = {} + if iface not in r[net]: + r[net][iface] = set() + r[net][iface].add(ip) + return r + + +def _list_ipv6_networks(ctx: CephadmContext) -> Dict[str, Dict[str, Set[str]]]: + execstr: Optional[str] = find_executable('ip') + if not execstr: + raise FileNotFoundError("unable to find 'ip' command") + routes, _, _ = call_throws(ctx, [execstr, '-6', 'route', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR) + ips, _, _ = call_throws(ctx, [execstr, '-6', 'addr', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR) + return _parse_ipv6_route(routes, ips) + + +def _parse_ipv6_route(routes: str, ips: str) -> Dict[str, Dict[str, Set[str]]]: + r = {} # type: Dict[str, Dict[str, Set[str]]] + route_p = re.compile(r'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$') + ip_p = re.compile(r'^\s+inet6 (\S+)/(.*)scope (.*)$') + iface_p = re.compile(r'^(\d+): (\S+): (.*)$') + for line in routes.splitlines(): + m = route_p.findall(line) + if not m or m[0][0].lower() == 'default': + continue + net = m[0][0] + if '/' not in net: # aggregate /128 mask for single host sub-networks + net += '/128' + iface = m[0][1] + if iface == 'lo': # skip loopback devices + continue + if net not in r: + r[net] = {} + if iface not in r[net]: + r[net][iface] = set() + + iface = None + for line in ips.splitlines(): + m = ip_p.findall(line) + if not m: + m = iface_p.findall(line) + if m: + # drop @... suffix, if present + iface = m[0][1].split('@')[0] + continue + ip = m[0][0] + # find the network it belongs to + net = [n for n in r.keys() + if ipaddress.ip_address(ip) in ipaddress.ip_network(n)] + if net and iface in r[net[0]]: + assert iface + r[net[0]][iface].add(ip) + + return r + + +def command_list_networks(ctx): + # type: (CephadmContext) -> None + r = list_networks(ctx) + + def serialize_sets(obj: Any) -> Any: + return list(obj) if isinstance(obj, set) else obj + + print(json.dumps(r, indent=4, default=serialize_sets)) + +################################## + + +def command_ls(ctx): + # type: (CephadmContext) -> None + ls = list_daemons(ctx, detail=not ctx.no_detail, + legacy_dir=ctx.legacy_dir) + print(json.dumps(ls, indent=4)) + + +def with_units_to_int(v: str) -> int: + if v.endswith('iB'): + v = v[:-2] + elif v.endswith('B'): + v = v[:-1] + mult = 1 + if v[-1].upper() == 'K': + mult = 1024 + v = v[:-1] + elif v[-1].upper() == 'M': + mult = 1024 * 1024 + v = v[:-1] + elif v[-1].upper() == 'G': + mult = 1024 * 1024 * 1024 + v = v[:-1] + elif v[-1].upper() == 'T': + mult = 1024 * 1024 * 1024 * 1024 + v = v[:-1] + return int(float(v) * mult) + + +def list_daemons(ctx, detail=True, legacy_dir=None): + # type: (CephadmContext, bool, Optional[str]) -> List[Dict[str, str]] + host_version: Optional[str] = None + ls = [] + container_path = ctx.container_engine.path + + data_dir = ctx.data_dir + if legacy_dir is not None: + data_dir = os.path.abspath(legacy_dir + data_dir) + + # keep track of ceph versions we see + seen_versions = {} # type: Dict[str, Optional[str]] + + # keep track of image digests + seen_digests = {} # type: Dict[str, List[str]] + + # keep track of memory and cpu usage we've seen + seen_memusage = {} # type: Dict[str, int] + seen_cpuperc = {} # type: Dict[str, str] + out, err, code = call( + ctx, + [container_path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'], + verbosity=CallVerbosity.QUIET + ) + seen_memusage_cid_len, seen_memusage = _parse_mem_usage(code, out) + + out, err, code = call( + ctx, + [container_path, 'stats', '--format', '{{.ID}},{{.CPUPerc}}', '--no-stream'], + verbosity=CallVerbosity.QUIET + ) + seen_cpuperc_cid_len, seen_cpuperc = _parse_cpu_perc(code, out) + + # /var/lib/ceph + if os.path.exists(data_dir): + for i in os.listdir(data_dir): + if i in ['mon', 'osd', 'mds', 'mgr']: + daemon_type = i + for j in os.listdir(os.path.join(data_dir, i)): + if '-' not in j: + continue + (cluster, daemon_id) = j.split('-', 1) + fsid = get_legacy_daemon_fsid(ctx, + cluster, daemon_type, daemon_id, + legacy_dir=legacy_dir) + legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id) + val: Dict[str, Any] = { + 'style': 'legacy', + 'name': '%s.%s' % (daemon_type, daemon_id), + 'fsid': fsid if fsid is not None else 'unknown', + 'systemd_unit': legacy_unit_name, + } + if detail: + (val['enabled'], val['state'], _) = check_unit(ctx, legacy_unit_name) + if not host_version: + try: + out, err, code = call(ctx, + ['ceph', '-v'], + verbosity=CallVerbosity.QUIET) + if not code and out.startswith('ceph version '): + host_version = out.split(' ')[2] + except Exception: + pass + val['host_version'] = host_version + ls.append(val) + elif is_fsid(i): + fsid = str(i) # convince mypy that fsid is a str here + for j in os.listdir(os.path.join(data_dir, i)): + if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)): + name = j + (daemon_type, daemon_id) = j.split('.', 1) + unit_name = get_unit_name(fsid, + daemon_type, + daemon_id) + else: + continue + val = { + 'style': 'cephadm:v1', + 'name': name, + 'fsid': fsid, + 'systemd_unit': unit_name, + } + if detail: + # get container id + (val['enabled'], val['state'], _) = check_unit(ctx, unit_name) + container_id = None + image_name = None + image_id = None + image_digests = None + version = None + start_stamp = None + + out, err, code = get_container_stats(ctx, container_path, fsid, daemon_type, daemon_id) + if not code: + (container_id, image_name, image_id, start, + version) = out.strip().split(',') + image_id = normalize_container_id(image_id) + daemon_type = name.split('.', 1)[0] + start_stamp = try_convert_datetime(start) + + # collect digests for this image id + image_digests = seen_digests.get(image_id) + if not image_digests: + out, err, code = call( + ctx, + [ + container_path, 'image', 'inspect', image_id, + '--format', '{{.RepoDigests}}', + ], + verbosity=CallVerbosity.QUIET) + if not code: + image_digests = list(set(map( + normalize_image_digest, + out.strip()[1:-1].split(' ')))) + seen_digests[image_id] = image_digests + + # identify software version inside the container (if we can) + if not version or '.' not in version: + version = seen_versions.get(image_id, None) + if daemon_type == NFSGanesha.daemon_type: + version = NFSGanesha.get_version(ctx, container_id) + if daemon_type == CephIscsi.daemon_type: + version = CephIscsi.get_version(ctx, container_id) + if daemon_type == CephNvmeof.daemon_type: + version = CephNvmeof.get_version(ctx, container_id) + elif not version: + if daemon_type in Ceph.daemons: + out, err, code = call(ctx, + [container_path, 'exec', container_id, + 'ceph', '-v'], + verbosity=CallVerbosity.QUIET) + if not code and \ + out.startswith('ceph version '): + version = out.split(' ')[2] + seen_versions[image_id] = version + elif daemon_type == 'grafana': + out, err, code = call(ctx, + [container_path, 'exec', container_id, + 'grafana-server', '-v'], + verbosity=CallVerbosity.QUIET) + if not code and \ + out.startswith('Version '): + version = out.split(' ')[1] + seen_versions[image_id] = version + elif daemon_type in ['prometheus', + 'alertmanager', + 'node-exporter', + 'loki', + 'promtail']: + version = Monitoring.get_version(ctx, container_id, daemon_type) + seen_versions[image_id] = version + elif daemon_type == 'haproxy': + out, err, code = call(ctx, + [container_path, 'exec', container_id, + 'haproxy', '-v'], + verbosity=CallVerbosity.QUIET) + if not code and \ + out.startswith('HA-Proxy version ') or \ + out.startswith('HAProxy version '): + version = out.split(' ')[2] + seen_versions[image_id] = version + elif daemon_type == 'keepalived': + out, err, code = call(ctx, + [container_path, 'exec', container_id, + 'keepalived', '--version'], + verbosity=CallVerbosity.QUIET) + if not code and \ + err.startswith('Keepalived '): + version = err.split(' ')[1] + if version[0] == 'v': + version = version[1:] + seen_versions[image_id] = version + elif daemon_type == CustomContainer.daemon_type: + # Because a custom container can contain + # everything, we do not know which command + # to execute to get the version. + pass + elif daemon_type == SNMPGateway.daemon_type: + version = SNMPGateway.get_version(ctx, fsid, daemon_id) + seen_versions[image_id] = version + else: + logger.warning('version for unknown daemon type %s' % daemon_type) + else: + vfile = os.path.join(data_dir, fsid, j, 'unit.image') # type: ignore + try: + with open(vfile, 'r') as f: + image_name = f.read().strip() or None + except IOError: + pass + + # unit.meta? + mfile = os.path.join(data_dir, fsid, j, 'unit.meta') # type: ignore + try: + with open(mfile, 'r') as f: + meta = json.loads(f.read()) + val.update(meta) + except IOError: + pass + + val['container_id'] = container_id + val['container_image_name'] = image_name + val['container_image_id'] = image_id + val['container_image_digests'] = image_digests + if container_id: + val['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len]) + val['cpu_percentage'] = seen_cpuperc.get(container_id[0:seen_cpuperc_cid_len]) + val['version'] = version + val['started'] = start_stamp + val['created'] = get_file_timestamp( + os.path.join(data_dir, fsid, j, 'unit.created') + ) + val['deployed'] = get_file_timestamp( + os.path.join(data_dir, fsid, j, 'unit.image')) + val['configured'] = get_file_timestamp( + os.path.join(data_dir, fsid, j, 'unit.configured')) + ls.append(val) + + return ls + + +def _parse_mem_usage(code: int, out: str) -> Tuple[int, Dict[str, int]]: + # keep track of memory usage we've seen + seen_memusage = {} # type: Dict[str, int] + seen_memusage_cid_len = 0 + if not code: + for line in out.splitlines(): + (cid, usage) = line.split(',') + (used, limit) = usage.split(' / ') + try: + seen_memusage[cid] = with_units_to_int(used) + if not seen_memusage_cid_len: + seen_memusage_cid_len = len(cid) + except ValueError: + logger.info('unable to parse memory usage line\n>{}'.format(line)) + pass + return seen_memusage_cid_len, seen_memusage + + +def _parse_cpu_perc(code: int, out: str) -> Tuple[int, Dict[str, str]]: + seen_cpuperc = {} + seen_cpuperc_cid_len = 0 + if not code: + for line in out.splitlines(): + (cid, cpuperc) = line.split(',') + try: + seen_cpuperc[cid] = cpuperc + if not seen_cpuperc_cid_len: + seen_cpuperc_cid_len = len(cid) + except ValueError: + logger.info('unable to parse cpu percentage line\n>{}'.format(line)) + pass + return seen_cpuperc_cid_len, seen_cpuperc + + +def get_daemon_description(ctx, fsid, name, detail=False, legacy_dir=None): + # type: (CephadmContext, str, str, bool, Optional[str]) -> Dict[str, str] + + for d in list_daemons(ctx, detail=detail, legacy_dir=legacy_dir): + if d['fsid'] != fsid: + continue + if d['name'] != name: + continue + return d + raise Error('Daemon not found: {}. See `cephadm ls`'.format(name)) + + +def get_container_stats(ctx: CephadmContext, container_path: str, fsid: str, daemon_type: str, daemon_id: str) -> Tuple[str, str, int]: + c = CephContainer.for_daemon(ctx, fsid, daemon_type, daemon_id, 'bash') + out, err, code = '', '', -1 + for name in (c.cname, c.old_cname): + cmd = [ + container_path, 'inspect', + '--format', '{{.Id}},{{.Config.Image}},{{.Image}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}', + name + ] + out, err, code = call(ctx, cmd, verbosity=CallVerbosity.QUIET) + if not code: + break + return out, err, code + +################################## + + +@default_image +def command_adopt(ctx): + # type: (CephadmContext) -> None + + if not ctx.skip_pull: + try: + _pull_image(ctx, ctx.image) + except UnauthorizedRegistryError: + err_str = 'Failed to pull container image. Host may not be logged into container registry. Try `cephadm registry-login --registry-url <url> --registry-username <username> --registry-password <password>` or supply login info via a json file with `cephadm registry-login --registry-json <file>`' + logger.debug(f'Pulling image for `command_adopt` failed: {err_str}') + raise Error(err_str) + + (daemon_type, daemon_id) = ctx.name.split('.', 1) + + # legacy check + if ctx.style != 'legacy': + raise Error('adoption of style %s not implemented' % ctx.style) + + # lock + fsid = get_legacy_daemon_fsid(ctx, + ctx.cluster, + daemon_type, + daemon_id, + legacy_dir=ctx.legacy_dir) + if not fsid: + raise Error('could not detect legacy fsid; set fsid in ceph.conf') + lock = FileLock(ctx, fsid) + lock.acquire() + + # call correct adoption + if daemon_type in Ceph.daemons: + command_adopt_ceph(ctx, daemon_type, daemon_id, fsid) + elif daemon_type == 'prometheus': + command_adopt_prometheus(ctx, daemon_id, fsid) + elif daemon_type == 'grafana': + command_adopt_grafana(ctx, daemon_id, fsid) + elif daemon_type == 'node-exporter': + raise Error('adoption of node-exporter not implemented') + elif daemon_type == 'alertmanager': + command_adopt_alertmanager(ctx, daemon_id, fsid) + else: + raise Error('daemon type %s not recognized' % daemon_type) + + +class AdoptOsd(object): + def __init__(self, ctx, osd_data_dir, osd_id): + # type: (CephadmContext, str, str) -> None + self.ctx = ctx + self.osd_data_dir = osd_data_dir + self.osd_id = osd_id + + def check_online_osd(self): + # type: () -> Tuple[Optional[str], Optional[str]] + + osd_fsid, osd_type = None, None + + path = os.path.join(self.osd_data_dir, 'fsid') + try: + with open(path, 'r') as f: + osd_fsid = f.read().strip() + logger.info('Found online OSD at %s' % path) + except IOError: + logger.info('Unable to read OSD fsid from %s' % path) + if os.path.exists(os.path.join(self.osd_data_dir, 'type')): + with open(os.path.join(self.osd_data_dir, 'type')) as f: + osd_type = f.read().strip() + else: + logger.info('"type" file missing for OSD data dir') + + return osd_fsid, osd_type + + def check_offline_lvm_osd(self): + # type: () -> Tuple[Optional[str], Optional[str]] + osd_fsid, osd_type = None, None + + c = get_ceph_volume_container( + self.ctx, + args=['lvm', 'list', '--format=json'], + ) + out, err, code = call_throws(self.ctx, c.run_cmd()) + if not code: + try: + js = json.loads(out) + if self.osd_id in js: + logger.info('Found offline LVM OSD {}'.format(self.osd_id)) + osd_fsid = js[self.osd_id][0]['tags']['ceph.osd_fsid'] + for device in js[self.osd_id]: + if device['tags']['ceph.type'] == 'block': + osd_type = 'bluestore' + break + if device['tags']['ceph.type'] == 'data': + osd_type = 'filestore' + break + except ValueError as e: + logger.info('Invalid JSON in ceph-volume lvm list: {}'.format(e)) + + return osd_fsid, osd_type + + def check_offline_simple_osd(self): + # type: () -> Tuple[Optional[str], Optional[str]] + osd_fsid, osd_type = None, None + + osd_file = glob('/etc/ceph/osd/{}-[a-f0-9-]*.json'.format(self.osd_id)) + if len(osd_file) == 1: + with open(osd_file[0], 'r') as f: + try: + js = json.loads(f.read()) + logger.info('Found offline simple OSD {}'.format(self.osd_id)) + osd_fsid = js['fsid'] + osd_type = js['type'] + if osd_type != 'filestore': + # need this to be mounted for the adopt to work, as it + # needs to move files from this directory + call_throws(self.ctx, ['mount', js['data']['path'], self.osd_data_dir]) + except ValueError as e: + logger.info('Invalid JSON in {}: {}'.format(osd_file, e)) + + return osd_fsid, osd_type + + def change_cluster_name(self) -> None: + logger.info('Attempting to convert osd cluster name to ceph . . .') + c = get_ceph_volume_container( + self.ctx, + args=['lvm', 'list', '{}'.format(self.osd_id), '--format=json'], + ) + out, err, code = call_throws(self.ctx, c.run_cmd()) + if code: + raise Exception(f'Failed to get list of LVs: {err}\nceph-volume failed with rc {code}') + try: + js = json.loads(out) + if not js: + raise RuntimeError(f'Failed to find osd.{self.osd_id}') + device: Optional[Dict[Any, Any]] = None + for d in js[self.osd_id]: + if d['type'] == 'block': + device = d + break + if not device: + raise RuntimeError(f'Failed to find block device for osd.{self.osd_id}') + vg = device['vg_name'] + out, err, code = call_throws(self.ctx, ['lvchange', '--deltag', f'ceph.cluster_name={self.ctx.cluster}', vg]) + if code: + raise RuntimeError(f"Can't delete tag ceph.cluster_name={self.ctx.cluster} on osd.{self.osd_id}.\nlvchange failed with rc {code}") + out, err, code = call_throws(self.ctx, ['lvchange', '--addtag', 'ceph.cluster_name=ceph', vg]) + if code: + raise RuntimeError(f"Can't add tag ceph.cluster_name=ceph on osd.{self.osd_id}.\nlvchange failed with rc {code}") + logger.info('Successfully converted osd cluster name') + except (Exception, RuntimeError) as e: + logger.info(f'Failed to convert osd cluster name: {e}') + + +def command_adopt_ceph(ctx, daemon_type, daemon_id, fsid): + # type: (CephadmContext, str, str, str) -> None + + (uid, gid) = extract_uid_gid(ctx) + + data_dir_src = ('/var/lib/ceph/%s/%s-%s' % + (daemon_type, ctx.cluster, daemon_id)) + data_dir_src = os.path.abspath(ctx.legacy_dir + data_dir_src) + + if not os.path.exists(data_dir_src): + raise Error("{}.{} data directory '{}' does not exist. " + 'Incorrect ID specified, or daemon already adopted?'.format( + daemon_type, daemon_id, data_dir_src)) + + osd_fsid = None + if daemon_type == 'osd': + adopt_osd = AdoptOsd(ctx, data_dir_src, daemon_id) + osd_fsid, osd_type = adopt_osd.check_online_osd() + if not osd_fsid: + osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd() + if not osd_fsid: + osd_fsid, osd_type = adopt_osd.check_offline_simple_osd() + if not osd_fsid: + raise Error('Unable to find OSD {}'.format(daemon_id)) + elif ctx.cluster != 'ceph': + adopt_osd.change_cluster_name() + logger.info('objectstore_type is %s' % osd_type) + assert osd_type + if osd_type == 'filestore': + raise Error('FileStore is not supported by cephadm') + + # NOTE: implicit assumption here that the units correspond to the + # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph + # CLUSTER field. + unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id) + (enabled, state, _) = check_unit(ctx, unit_name) + if state == 'running': + logger.info('Stopping old systemd unit %s...' % unit_name) + call_throws(ctx, ['systemctl', 'stop', unit_name]) + if enabled: + logger.info('Disabling old systemd unit %s...' % unit_name) + call_throws(ctx, ['systemctl', 'disable', unit_name]) + + # data + logger.info('Moving data...') + data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, + uid=uid, gid=gid) + move_files(ctx, glob(os.path.join(data_dir_src, '*')), + data_dir_dst, + uid=uid, gid=gid) + logger.debug('Remove dir `%s`' % (data_dir_src)) + if os.path.ismount(data_dir_src): + call_throws(ctx, ['umount', data_dir_src]) + os.rmdir(data_dir_src) + + logger.info('Chowning content...') + call_throws(ctx, ['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst]) + + if daemon_type == 'mon': + # rename *.ldb -> *.sst, in case they are coming from ubuntu + store = os.path.join(data_dir_dst, 'store.db') + num_renamed = 0 + if os.path.exists(store): + for oldf in os.listdir(store): + if oldf.endswith('.ldb'): + newf = oldf.replace('.ldb', '.sst') + oldp = os.path.join(store, oldf) + newp = os.path.join(store, newf) + logger.debug('Renaming %s -> %s' % (oldp, newp)) + os.rename(oldp, newp) + if num_renamed: + logger.info('Renamed %d leveldb *.ldb files to *.sst', + num_renamed) + if daemon_type == 'osd': + for n in ['block', 'block.db', 'block.wal']: + p = os.path.join(data_dir_dst, n) + if os.path.exists(p): + logger.info('Chowning %s...' % p) + os.chown(p, uid, gid) + # disable the ceph-volume 'simple' mode files on the host + simple_fn = os.path.join('/etc/ceph/osd', + '%s-%s.json' % (daemon_id, osd_fsid)) + if os.path.exists(simple_fn): + new_fn = simple_fn + '.adopted-by-cephadm' + logger.info('Renaming %s -> %s', simple_fn, new_fn) + os.rename(simple_fn, new_fn) + logger.info('Disabling host unit ceph-volume@ simple unit...') + call(ctx, ['systemctl', 'disable', + 'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)]) + else: + # assume this is an 'lvm' c-v for now, but don't error + # out if it's not. + logger.info('Disabling host unit ceph-volume@ lvm unit...') + call(ctx, ['systemctl', 'disable', + 'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)]) + + # config + config_src = '/etc/ceph/%s.conf' % (ctx.cluster) + config_src = os.path.abspath(ctx.legacy_dir + config_src) + config_dst = os.path.join(data_dir_dst, 'config') + copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) + + # logs + logger.info('Moving logs...') + log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' % + (ctx.cluster, daemon_type, daemon_id)) + log_dir_src = os.path.abspath(ctx.legacy_dir + log_dir_src) + log_dir_dst = make_log_dir(ctx, fsid, uid=uid, gid=gid) + move_files(ctx, glob(log_dir_src), + log_dir_dst, + uid=uid, gid=gid) + + logger.info('Creating new units...') + make_var_run(ctx, fsid, uid, gid) + c = get_container(ctx, fsid, daemon_type, daemon_id) + deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, c, + enable=True, # unconditionally enable the new unit + start=(state == 'running' or ctx.force_start), + osd_fsid=osd_fsid) + update_firewalld(ctx, daemon_type) + + +def command_adopt_prometheus(ctx, daemon_id, fsid): + # type: (CephadmContext, str, str) -> None + daemon_type = 'prometheus' + (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type) + # should try to set the ports we know cephadm defaults + # to for these services in the firewall. + ports = Monitoring.port_map['prometheus'] + endpoints = [EndPoint('0.0.0.0', p) for p in ports] + + _stop_and_disable(ctx, 'prometheus') + + data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, + uid=uid, gid=gid) + + # config + config_src = '/etc/prometheus/prometheus.yml' + config_src = os.path.abspath(ctx.legacy_dir + config_src) + config_dst = os.path.join(data_dir_dst, 'etc/prometheus') + makedirs(config_dst, uid, gid, 0o755) + copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) + + # data + data_src = '/var/lib/prometheus/metrics/' + data_src = os.path.abspath(ctx.legacy_dir + data_src) + data_dst = os.path.join(data_dir_dst, 'data') + copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid) + + make_var_run(ctx, fsid, uid, gid) + c = get_container(ctx, fsid, daemon_type, daemon_id) + deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid, + deployment_type=DeploymentType.REDEPLOY, endpoints=endpoints) + update_firewalld(ctx, daemon_type) + + +def command_adopt_grafana(ctx, daemon_id, fsid): + # type: (CephadmContext, str, str) -> None + + daemon_type = 'grafana' + (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type) + # should try to set the ports we know cephadm defaults + # to for these services in the firewall. + ports = Monitoring.port_map['grafana'] + endpoints = [EndPoint('0.0.0.0', p) for p in ports] + + _stop_and_disable(ctx, 'grafana-server') + + data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, + uid=uid, gid=gid) + + # config + config_src = '/etc/grafana/grafana.ini' + config_src = os.path.abspath(ctx.legacy_dir + config_src) + config_dst = os.path.join(data_dir_dst, 'etc/grafana') + makedirs(config_dst, uid, gid, 0o755) + copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) + + prov_src = '/etc/grafana/provisioning/' + prov_src = os.path.abspath(ctx.legacy_dir + prov_src) + prov_dst = os.path.join(data_dir_dst, 'etc/grafana') + copy_tree(ctx, [prov_src], prov_dst, uid=uid, gid=gid) + + # cert + cert = '/etc/grafana/grafana.crt' + key = '/etc/grafana/grafana.key' + if os.path.exists(cert) and os.path.exists(key): + cert_src = '/etc/grafana/grafana.crt' + cert_src = os.path.abspath(ctx.legacy_dir + cert_src) + makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755) + cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file') + copy_files(ctx, [cert_src], cert_dst, uid=uid, gid=gid) + + key_src = '/etc/grafana/grafana.key' + key_src = os.path.abspath(ctx.legacy_dir + key_src) + key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key') + copy_files(ctx, [key_src], key_dst, uid=uid, gid=gid) + + _adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini')) + else: + logger.debug('Skipping ssl, missing cert {} or key {}'.format(cert, key)) + + # data - possible custom dashboards/plugins + data_src = '/var/lib/grafana/' + data_src = os.path.abspath(ctx.legacy_dir + data_src) + data_dst = os.path.join(data_dir_dst, 'data') + copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid) + + make_var_run(ctx, fsid, uid, gid) + c = get_container(ctx, fsid, daemon_type, daemon_id) + deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid, + deployment_type=DeploymentType.REDEPLOY, endpoints=endpoints) + update_firewalld(ctx, daemon_type) + + +def command_adopt_alertmanager(ctx, daemon_id, fsid): + # type: (CephadmContext, str, str) -> None + + daemon_type = 'alertmanager' + (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type) + # should try to set the ports we know cephadm defaults + # to for these services in the firewall. + ports = Monitoring.port_map['alertmanager'] + endpoints = [EndPoint('0.0.0.0', p) for p in ports] + + _stop_and_disable(ctx, 'prometheus-alertmanager') + + data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, + uid=uid, gid=gid) + + # config + config_src = '/etc/prometheus/alertmanager.yml' + config_src = os.path.abspath(ctx.legacy_dir + config_src) + config_dst = os.path.join(data_dir_dst, 'etc/alertmanager') + makedirs(config_dst, uid, gid, 0o755) + copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) + + # data + data_src = '/var/lib/prometheus/alertmanager/' + data_src = os.path.abspath(ctx.legacy_dir + data_src) + data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data') + copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid) + + make_var_run(ctx, fsid, uid, gid) + c = get_container(ctx, fsid, daemon_type, daemon_id) + deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid, + deployment_type=DeploymentType.REDEPLOY, endpoints=endpoints) + update_firewalld(ctx, daemon_type) + + +def _adjust_grafana_ini(filename): + # type: (str) -> None + + # Update cert_file, cert_key pathnames in server section + # ConfigParser does not preserve comments + try: + with open(filename, 'r') as grafana_ini: + lines = grafana_ini.readlines() + with write_new(filename, perms=None) as grafana_ini: + server_section = False + for line in lines: + if line.startswith('['): + server_section = False + if line.startswith('[server]'): + server_section = True + if server_section: + line = re.sub(r'^cert_file.*', + 'cert_file = /etc/grafana/certs/cert_file', line) + line = re.sub(r'^cert_key.*', + 'cert_key = /etc/grafana/certs/cert_key', line) + grafana_ini.write(line) + except OSError as err: + raise Error('Cannot update {}: {}'.format(filename, err)) + + +def _stop_and_disable(ctx, unit_name): + # type: (CephadmContext, str) -> None + + (enabled, state, _) = check_unit(ctx, unit_name) + if state == 'running': + logger.info('Stopping old systemd unit %s...' % unit_name) + call_throws(ctx, ['systemctl', 'stop', unit_name]) + if enabled: + logger.info('Disabling old systemd unit %s...' % unit_name) + call_throws(ctx, ['systemctl', 'disable', unit_name]) + +################################## + + +def command_rm_daemon(ctx): + # type: (CephadmContext) -> None + lock = FileLock(ctx, ctx.fsid) + lock.acquire() + + (daemon_type, daemon_id) = ctx.name.split('.', 1) + unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name) + + if daemon_type in ['mon', 'osd'] and not ctx.force: + raise Error('must pass --force to proceed: ' + 'this command may destroy precious data!') + + call(ctx, ['systemctl', 'stop', unit_name], + verbosity=CallVerbosity.DEBUG) + call(ctx, ['systemctl', 'reset-failed', unit_name], + verbosity=CallVerbosity.DEBUG) + call(ctx, ['systemctl', 'disable', unit_name], + verbosity=CallVerbosity.DEBUG) + + # force remove rgw admin socket file if leftover + if daemon_type in ['rgw']: + rgw_asok_path = f'/var/run/ceph/{ctx.fsid}/ceph-client.{ctx.name}.*.asok' + call(ctx, ['rm', '-rf', rgw_asok_path], + verbosity=CallVerbosity.DEBUG) + + data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_id) + if daemon_type in ['mon', 'osd', 'prometheus'] and \ + not ctx.force_delete_data: + # rename it out of the way -- do not delete + backup_dir = os.path.join(ctx.data_dir, ctx.fsid, 'removed') + if not os.path.exists(backup_dir): + makedirs(backup_dir, 0, 0, DATA_DIR_MODE) + dirname = '%s.%s_%s' % (daemon_type, daemon_id, + datetime.datetime.utcnow().strftime(DATEFMT)) + os.rename(data_dir, + os.path.join(backup_dir, dirname)) + else: + call_throws(ctx, ['rm', '-rf', data_dir]) + + endpoints = fetch_tcp_ports(ctx) + ports: List[int] = [e.port for e in endpoints] + if ports: + try: + fw = Firewalld(ctx) + fw.close_ports(ports) + fw.apply_rules() + except RuntimeError as e: + # in case we cannot close the ports we will remove + # the daemon but keep them open. + logger.warning(f' Error when trying to close ports: {e}') + + +################################## + + +def _zap(ctx: CephadmContext, what: str) -> None: + mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None) + c = get_ceph_volume_container(ctx, + args=['lvm', 'zap', '--destroy', what], + volume_mounts=mounts, + envs=ctx.env) + logger.info(f'Zapping {what}...') + out, err, code = call_throws(ctx, c.run_cmd()) + + +@infer_image +def _zap_osds(ctx: CephadmContext) -> None: + # assume fsid lock already held + + # list + mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None) + c = get_ceph_volume_container(ctx, + args=['inventory', '--format', 'json'], + volume_mounts=mounts, + envs=ctx.env) + out, err, code = call_throws(ctx, c.run_cmd()) + if code: + raise Error('failed to list osd inventory') + try: + ls = json.loads(out) + except ValueError as e: + raise Error(f'Invalid JSON in ceph-volume inventory: {e}') + + for i in ls: + matches = [lv.get('cluster_fsid') == ctx.fsid and i.get('ceph_device') for lv in i.get('lvs', [])] + if any(matches) and all(matches): + _zap(ctx, i.get('path')) + elif any(matches): + lv_names = [lv['name'] for lv in i.get('lvs', [])] + # TODO: we need to map the lv_names back to device paths (the vg + # id isn't part of the output here!) + logger.warning(f'Not zapping LVs (not implemented): {lv_names}') + + +def command_zap_osds(ctx: CephadmContext) -> None: + if not ctx.force: + raise Error('must pass --force to proceed: ' + 'this command may destroy precious data!') + + lock = FileLock(ctx, ctx.fsid) + lock.acquire() + + _zap_osds(ctx) + +################################## + + +def get_ceph_cluster_count(ctx: CephadmContext) -> int: + return len([c for c in os.listdir(ctx.data_dir) if is_fsid(c)]) + + +def command_rm_cluster(ctx: CephadmContext) -> None: + if not ctx.force: + raise Error('must pass --force to proceed: ' + 'this command may destroy precious data!') + + lock = FileLock(ctx, ctx.fsid) + lock.acquire() + _rm_cluster(ctx, ctx.keep_logs, ctx.zap_osds) + + +def _rm_cluster(ctx: CephadmContext, keep_logs: bool, zap_osds: bool) -> None: + + if not ctx.fsid: + raise Error('must select the cluster to delete by passing --fsid to proceed') + + def disable_systemd_service(unit_name: str) -> None: + call(ctx, ['systemctl', 'stop', unit_name], + verbosity=CallVerbosity.DEBUG) + call(ctx, ['systemctl', 'reset-failed', unit_name], + verbosity=CallVerbosity.DEBUG) + call(ctx, ['systemctl', 'disable', unit_name], + verbosity=CallVerbosity.DEBUG) + + logger.info(f'Deleting cluster with fsid: {ctx.fsid}') + + # stop + disable individual daemon units + for d in list_daemons(ctx, detail=False): + if d['fsid'] != ctx.fsid: + continue + if d['style'] != 'cephadm:v1': + continue + disable_systemd_service(get_unit_name(ctx.fsid, d['name'])) + + # cluster units + for unit_name in ['ceph-%s.target' % ctx.fsid]: + disable_systemd_service(unit_name) + + slice_name = 'system-ceph\\x2d{}.slice'.format(ctx.fsid.replace('-', '\\x2d')) + call(ctx, ['systemctl', 'stop', slice_name], + verbosity=CallVerbosity.DEBUG) + + # osds? + if zap_osds: + _zap_osds(ctx) + + # rm units + call_throws(ctx, ['rm', '-f', ctx.unit_dir + + '/ceph-%s@.service' % ctx.fsid]) + call_throws(ctx, ['rm', '-f', ctx.unit_dir + + '/ceph-%s.target' % ctx.fsid]) + call_throws(ctx, ['rm', '-rf', + ctx.unit_dir + '/ceph-%s.target.wants' % ctx.fsid]) + # rm data + call_throws(ctx, ['rm', '-rf', ctx.data_dir + '/' + ctx.fsid]) + + if not keep_logs: + # rm logs + call_throws(ctx, ['rm', '-rf', ctx.log_dir + '/' + ctx.fsid]) + call_throws(ctx, ['rm', '-rf', ctx.log_dir + + '/*.wants/ceph-%s@*' % ctx.fsid]) + + # rm logrotate config + call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/ceph-%s' % ctx.fsid]) + + # if last cluster on host remove shared files + if get_ceph_cluster_count(ctx) == 0: + disable_systemd_service('ceph.target') + + # rm shared ceph target files + call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/multi-user.target.wants/ceph.target']) + call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/ceph.target']) + + # rm cephadm logrotate config + call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/cephadm']) + + if not keep_logs: + # remove all cephadm logs + for fname in glob(f'{ctx.log_dir}/cephadm.log*'): + os.remove(fname) + + # rm sysctl settings + sysctl_dirs: List[Path] = [Path(ctx.sysctl_dir), Path('/usr/lib/sysctl.d')] + + for sysctl_dir in sysctl_dirs: + for p in sysctl_dir.glob(f'90-ceph-{ctx.fsid}-*.conf'): + p.unlink() + + # cleanup remaining ceph directories + ceph_dirs = [f'/run/ceph/{ctx.fsid}', f'/tmp/cephadm-{ctx.fsid}', f'/var/run/ceph/{ctx.fsid}'] + for dd in ceph_dirs: + shutil.rmtree(dd, ignore_errors=True) + + # clean up config, keyring, and pub key files + files = [CEPH_DEFAULT_CONF, CEPH_DEFAULT_PUBKEY, CEPH_DEFAULT_KEYRING] + if os.path.exists(files[0]): + valid_fsid = False + with open(files[0]) as f: + if ctx.fsid in f.read(): + valid_fsid = True + if valid_fsid: + # rm configuration files on /etc/ceph + for n in range(0, len(files)): + if os.path.exists(files[n]): + os.remove(files[n]) + +################################## + + +def check_time_sync(ctx, enabler=None): + # type: (CephadmContext, Optional[Packager]) -> bool + units = [ + 'chrony.service', # 18.04 (at least) + 'chronyd.service', # el / opensuse + 'systemd-timesyncd.service', + 'ntpd.service', # el7 (at least) + 'ntp.service', # 18.04 (at least) + 'ntpsec.service', # 20.04 (at least) / buster + 'openntpd.service', # ubuntu / debian + ] + if not check_units(ctx, units, enabler): + logger.warning('No time sync service is running; checked for %s' % units) + return False + return True + + +def command_check_host(ctx: CephadmContext) -> None: + errors = [] + commands = ['systemctl', 'lvcreate'] + + try: + engine = check_container_engine(ctx) + logger.info(f'{engine} is present') + except Error as e: + errors.append(str(e)) + + for command in commands: + try: + find_program(command) + logger.info('%s is present' % command) + except ValueError: + errors.append('%s binary does not appear to be installed' % command) + + # check for configured+running chronyd or ntp + if not check_time_sync(ctx): + errors.append('No time synchronization is active') + + if 'expect_hostname' in ctx and ctx.expect_hostname: + if get_hostname().lower() != ctx.expect_hostname.lower(): + errors.append('hostname "%s" does not match expected hostname "%s"' % ( + get_hostname(), ctx.expect_hostname)) + else: + logger.info('Hostname "%s" matches what is expected.', + ctx.expect_hostname) + + if errors: + raise Error('\nERROR: '.join(errors)) + + logger.info('Host looks OK') + +################################## + + +def get_ssh_vars(ssh_user: str) -> Tuple[int, int, str]: + try: + s_pwd = pwd.getpwnam(ssh_user) + except KeyError: + raise Error('Cannot find uid/gid for ssh-user: %s' % (ssh_user)) + + ssh_uid = s_pwd.pw_uid + ssh_gid = s_pwd.pw_gid + ssh_dir = os.path.join(s_pwd.pw_dir, '.ssh') + return ssh_uid, ssh_gid, ssh_dir + + +def authorize_ssh_key(ssh_pub_key: str, ssh_user: str) -> bool: + """Authorize the public key for the provided ssh user""" + + def key_in_file(path: str, key: str) -> bool: + if not os.path.exists(path): + return False + with open(path) as f: + lines = f.readlines() + for line in lines: + if line.strip() == key.strip(): + return True + return False + + logger.info(f'Adding key to {ssh_user}@localhost authorized_keys...') + if ssh_pub_key is None or ssh_pub_key.isspace(): + raise Error('Trying to authorize an empty ssh key') + + ssh_pub_key = ssh_pub_key.strip() + ssh_uid, ssh_gid, ssh_dir = get_ssh_vars(ssh_user) + if not os.path.exists(ssh_dir): + makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700) + + auth_keys_file = '%s/authorized_keys' % ssh_dir + if key_in_file(auth_keys_file, ssh_pub_key): + logger.info(f'key already in {ssh_user}@localhost authorized_keys...') + return False + + add_newline = False + if os.path.exists(auth_keys_file): + with open(auth_keys_file, 'r') as f: + f.seek(0, os.SEEK_END) + if f.tell() > 0: + f.seek(f.tell() - 1, os.SEEK_SET) # go to last char + if f.read() != '\n': + add_newline = True + + with open(auth_keys_file, 'a') as f: + os.fchown(f.fileno(), ssh_uid, ssh_gid) # just in case we created it + os.fchmod(f.fileno(), DEFAULT_MODE) # just in case we created it + if add_newline: + f.write('\n') + f.write(ssh_pub_key + '\n') + + return True + + +def revoke_ssh_key(key: str, ssh_user: str) -> None: + """Revoke the public key authorization for the ssh user""" + ssh_uid, ssh_gid, ssh_dir = get_ssh_vars(ssh_user) + auth_keys_file = '%s/authorized_keys' % ssh_dir + deleted = False + if os.path.exists(auth_keys_file): + with open(auth_keys_file, 'r') as f: + lines = f.readlines() + _, filename = tempfile.mkstemp() + with open(filename, 'w') as f: + os.fchown(f.fileno(), ssh_uid, ssh_gid) + os.fchmod(f.fileno(), DEFAULT_MODE) # secure access to the keys file + for line in lines: + if line.strip() == key.strip(): + deleted = True + else: + f.write(line) + + if deleted: + shutil.move(filename, auth_keys_file) + else: + logger.warning('Cannot find the ssh key to be deleted') + + +def check_ssh_connectivity(ctx: CephadmContext) -> None: + + def cmd_is_available(cmd: str) -> bool: + if shutil.which(cmd) is None: + logger.warning(f'Command not found: {cmd}') + return False + return True + + if not cmd_is_available('ssh') or not cmd_is_available('ssh-keygen'): + logger.warning('Cannot check ssh connectivity. Skipping...') + return + + ssh_priv_key_path = '' + ssh_pub_key_path = '' + ssh_signed_cert_path = '' + if ctx.ssh_private_key and ctx.ssh_public_key: + # let's use the keys provided by the user + ssh_priv_key_path = pathify(ctx.ssh_private_key.name) + ssh_pub_key_path = pathify(ctx.ssh_public_key.name) + elif ctx.ssh_private_key and ctx.ssh_signed_cert: + # CA signed keys use case + ssh_priv_key_path = pathify(ctx.ssh_private_key.name) + ssh_signed_cert_path = pathify(ctx.ssh_signed_cert.name) + else: + # no custom keys, let's generate some random keys just for this check + ssh_priv_key_path = f'/tmp/ssh_key_{uuid.uuid1()}' + ssh_pub_key_path = f'{ssh_priv_key_path}.pub' + ssh_key_gen_cmd = ['ssh-keygen', '-q', '-t', 'rsa', '-N', '', '-C', '', '-f', ssh_priv_key_path] + _, _, code = call(ctx, ssh_key_gen_cmd) + if code != 0: + logger.warning('Cannot generate keys to check ssh connectivity.') + return + + if ssh_signed_cert_path: + logger.info('Verification for CA signed keys authentication not implemented. Skipping ...') + elif ssh_pub_key_path: + logger.info('Verifying ssh connectivity using standard pubkey authentication ...') + with open(ssh_pub_key_path, 'r') as f: + key = f.read().strip() + new_key = authorize_ssh_key(key, ctx.ssh_user) + ssh_cfg_file_arg = ['-F', pathify(ctx.ssh_config.name)] if ctx.ssh_config else [] + _, _, code = call(ctx, ['ssh', '-o StrictHostKeyChecking=no', + *ssh_cfg_file_arg, '-i', ssh_priv_key_path, + '-o PasswordAuthentication=no', + f'{ctx.ssh_user}@{get_hostname()}', + 'sudo echo']) + + # we only remove the key if it's a new one. In case the user has provided + # some already existing key then we don't alter authorized_keys file + if new_key: + revoke_ssh_key(key, ctx.ssh_user) + + pub_key_msg = '- The public key file configured by --ssh-public-key is valid\n' if ctx.ssh_public_key else '' + prv_key_msg = '- The private key file configured by --ssh-private-key is valid\n' if ctx.ssh_private_key else '' + ssh_cfg_msg = '- The ssh configuration file configured by --ssh-config is valid\n' if ctx.ssh_config else '' + err_msg = f""" +** Please verify your user's ssh configuration and make sure: +- User {ctx.ssh_user} must have passwordless sudo access +{pub_key_msg}{prv_key_msg}{ssh_cfg_msg} +""" + if code != 0: + raise Error(err_msg) + + +def command_prepare_host(ctx: CephadmContext) -> None: + logger.info('Verifying podman|docker is present...') + pkg = None + try: + check_container_engine(ctx) + except Error as e: + logger.warning(str(e)) + if not pkg: + pkg = create_packager(ctx) + pkg.install_podman() + + logger.info('Verifying lvm2 is present...') + if not find_executable('lvcreate'): + if not pkg: + pkg = create_packager(ctx) + pkg.install(['lvm2']) + + logger.info('Verifying time synchronization is in place...') + if not check_time_sync(ctx): + if not pkg: + pkg = create_packager(ctx) + pkg.install(['chrony']) + # check again, and this time try to enable + # the service + check_time_sync(ctx, enabler=pkg) + + if 'expect_hostname' in ctx and ctx.expect_hostname and ctx.expect_hostname != get_hostname(): + logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), ctx.expect_hostname)) + call_throws(ctx, ['hostname', ctx.expect_hostname]) + with open('/etc/hostname', 'w') as f: + f.write(ctx.expect_hostname + '\n') + + logger.info('Repeating the final host check...') + command_check_host(ctx) + +################################## + + +class CustomValidation(argparse.Action): + + def _check_name(self, values: str) -> None: + try: + (daemon_type, daemon_id) = values.split('.', 1) + except ValueError: + raise argparse.ArgumentError(self, + 'must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com') + + daemons = get_supported_daemons() + if daemon_type not in daemons: + raise argparse.ArgumentError(self, + 'name must declare the type of daemon e.g. ' + '{}'.format(', '.join(daemons))) + + def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: Union[str, Sequence[Any], None], + option_string: Optional[str] = None) -> None: + assert isinstance(values, str) + if self.dest == 'name': + self._check_name(values) + setattr(namespace, self.dest, values) + +################################## + + +def get_distro(): + # type: () -> Tuple[Optional[str], Optional[str], Optional[str]] + distro = None + distro_version = None + distro_codename = None + with open('/etc/os-release', 'r') as f: + for line in f.readlines(): + line = line.strip() + if '=' not in line or line.startswith('#'): + continue + (var, val) = line.split('=', 1) + if val[0] == '"' and val[-1] == '"': + val = val[1:-1] + if var == 'ID': + distro = val.lower() + elif var == 'VERSION_ID': + distro_version = val.lower() + elif var == 'VERSION_CODENAME': + distro_codename = val.lower() + return distro, distro_version, distro_codename + + +class Packager(object): + def __init__(self, ctx: CephadmContext, + stable: Optional[str] = None, version: Optional[str] = None, + branch: Optional[str] = None, commit: Optional[str] = None): + assert \ + (stable and not version and not branch and not commit) or \ + (not stable and version and not branch and not commit) or \ + (not stable and not version and branch) or \ + (not stable and not version and not branch and not commit) + self.ctx = ctx + self.stable = stable + self.version = version + self.branch = branch + self.commit = commit + + def validate(self) -> None: + """Validate parameters before writing any state to disk.""" + pass + + def add_repo(self) -> None: + raise NotImplementedError + + def rm_repo(self) -> None: + raise NotImplementedError + + def install(self, ls: List[str]) -> None: + raise NotImplementedError + + def install_podman(self) -> None: + raise NotImplementedError + + def query_shaman(self, distro: str, distro_version: Any, branch: Optional[str], commit: Optional[str]) -> str: + # query shaman + logger.info('Fetching repo metadata from shaman and chacra...') + shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format( + distro=distro, + distro_version=distro_version, + branch=branch, + sha1=commit or 'latest', + arch=get_arch() + ) + try: + shaman_response = urlopen(shaman_url) + except HTTPError as err: + logger.error('repository not found in shaman (might not be available yet)') + raise Error('%s, failed to fetch %s' % (err, shaman_url)) + chacra_url = '' + try: + chacra_url = shaman_response.geturl() + chacra_response = urlopen(chacra_url) + except HTTPError as err: + logger.error('repository not found in chacra (might not be available yet)') + raise Error('%s, failed to fetch %s' % (err, chacra_url)) + return chacra_response.read().decode('utf-8') + + def repo_gpgkey(self) -> Tuple[str, str]: + if self.ctx.gpg_url: + return self.ctx.gpg_url, 'manual' + if self.stable or self.version: + return 'https://download.ceph.com/keys/release.gpg', 'release' + else: + return 'https://download.ceph.com/keys/autobuild.gpg', 'autobuild' + + def enable_service(self, service: str) -> None: + """ + Start and enable the service (typically using systemd). + """ + call_throws(self.ctx, ['systemctl', 'enable', '--now', service]) + + +class Apt(Packager): + DISTRO_NAMES = { + 'ubuntu': 'ubuntu', + 'debian': 'debian', + } + + def __init__(self, ctx: CephadmContext, + stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str], + distro: Optional[str], distro_version: Optional[str], distro_codename: Optional[str]) -> None: + super(Apt, self).__init__(ctx, stable=stable, version=version, + branch=branch, commit=commit) + assert distro + self.ctx = ctx + self.distro = self.DISTRO_NAMES[distro] + self.distro_codename = distro_codename + self.distro_version = distro_version + + def repo_path(self) -> str: + return '/etc/apt/sources.list.d/ceph.list' + + def add_repo(self) -> None: + + url, name = self.repo_gpgkey() + logger.info('Installing repo GPG key from %s...' % url) + try: + response = urlopen(url) + except HTTPError as err: + logger.error('failed to fetch GPG repo key from %s: %s' % ( + url, err)) + raise Error('failed to fetch GPG key') + key = response.read() + with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'wb') as f: + f.write(key) + + if self.version: + content = 'deb %s/debian-%s/ %s main\n' % ( + self.ctx.repo_url, self.version, self.distro_codename) + elif self.stable: + content = 'deb %s/debian-%s/ %s main\n' % ( + self.ctx.repo_url, self.stable, self.distro_codename) + else: + content = self.query_shaman(self.distro, self.distro_codename, self.branch, + self.commit) + + logger.info('Installing repo file at %s...' % self.repo_path()) + with open(self.repo_path(), 'w') as f: + f.write(content) + + self.update() + + def rm_repo(self) -> None: + for name in ['autobuild', 'release', 'manual']: + p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name + if os.path.exists(p): + logger.info('Removing repo GPG key %s...' % p) + os.unlink(p) + if os.path.exists(self.repo_path()): + logger.info('Removing repo at %s...' % self.repo_path()) + os.unlink(self.repo_path()) + + if self.distro == 'ubuntu': + self.rm_kubic_repo() + + def install(self, ls: List[str]) -> None: + logger.info('Installing packages %s...' % ls) + call_throws(self.ctx, ['apt-get', 'install', '-y'] + ls) + + def update(self) -> None: + logger.info('Updating package list...') + call_throws(self.ctx, ['apt-get', 'update']) + + def install_podman(self) -> None: + if self.distro == 'ubuntu': + logger.info('Setting up repo for podman...') + self.add_kubic_repo() + self.update() + + logger.info('Attempting podman install...') + try: + self.install(['podman']) + except Error: + logger.info('Podman did not work. Falling back to docker...') + self.install(['docker.io']) + + def kubic_repo_url(self) -> str: + return 'https://download.opensuse.org/repositories/devel:/kubic:/' \ + 'libcontainers:/stable/xUbuntu_%s/' % self.distro_version + + def kubic_repo_path(self) -> str: + return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list' + + def kubic_repo_gpgkey_url(self) -> str: + return '%s/Release.key' % self.kubic_repo_url() + + def kubic_repo_gpgkey_path(self) -> str: + return '/etc/apt/trusted.gpg.d/kubic.release.gpg' + + def add_kubic_repo(self) -> None: + url = self.kubic_repo_gpgkey_url() + logger.info('Installing repo GPG key from %s...' % url) + try: + response = urlopen(url) + except HTTPError as err: + logger.error('failed to fetch GPG repo key from %s: %s' % ( + url, err)) + raise Error('failed to fetch GPG key') + key = response.read().decode('utf-8') + tmp_key = write_tmp(key, 0, 0) + keyring = self.kubic_repo_gpgkey_path() + call_throws(self.ctx, ['apt-key', '--keyring', keyring, 'add', tmp_key.name]) + + logger.info('Installing repo file at %s...' % self.kubic_repo_path()) + content = 'deb %s /\n' % self.kubic_repo_url() + with open(self.kubic_repo_path(), 'w') as f: + f.write(content) + + def rm_kubic_repo(self) -> None: + keyring = self.kubic_repo_gpgkey_path() + if os.path.exists(keyring): + logger.info('Removing repo GPG key %s...' % keyring) + os.unlink(keyring) + + p = self.kubic_repo_path() + if os.path.exists(p): + logger.info('Removing repo at %s...' % p) + os.unlink(p) + + +class YumDnf(Packager): + DISTRO_NAMES = { + 'centos': ('centos', 'el'), + 'rhel': ('centos', 'el'), + 'scientific': ('centos', 'el'), + 'rocky': ('centos', 'el'), + 'almalinux': ('centos', 'el'), + 'ol': ('centos', 'el'), + 'fedora': ('fedora', 'fc'), + 'mariner': ('mariner', 'cm'), + } + + def __init__(self, ctx: CephadmContext, + stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str], + distro: Optional[str], distro_version: Optional[str]) -> None: + super(YumDnf, self).__init__(ctx, stable=stable, version=version, + branch=branch, commit=commit) + assert distro + assert distro_version + self.ctx = ctx + self.major = int(distro_version.split('.')[0]) + self.distro_normalized = self.DISTRO_NAMES[distro][0] + self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major) + if (self.distro_code == 'fc' and self.major >= 30) or \ + (self.distro_code == 'el' and self.major >= 8): + self.tool = 'dnf' + elif (self.distro_code == 'cm'): + self.tool = 'tdnf' + else: + self.tool = 'yum' + + def custom_repo(self, **kw: Any) -> str: + """ + Repo files need special care in that a whole line should not be present + if there is no value for it. Because we were using `format()` we could + not conditionally add a line for a repo file. So the end result would + contain a key with a missing value (say if we were passing `None`). + + For example, it could look like:: + + [ceph repo] + name= ceph repo + proxy= + gpgcheck= + + Which breaks. This function allows us to conditionally add lines, + preserving an order and be more careful. + + Previously, and for historical purposes, this is how the template used + to look:: + + custom_repo = + [{repo_name}] + name={name} + baseurl={baseurl} + enabled={enabled} + gpgcheck={gpgcheck} + type={_type} + gpgkey={gpgkey} + proxy={proxy} + + """ + lines = [] + + # by using tuples (vs a dict) we preserve the order of what we want to + # return, like starting with a [repo name] + tmpl = ( + ('reponame', '[%s]'), + ('name', 'name=%s'), + ('baseurl', 'baseurl=%s'), + ('enabled', 'enabled=%s'), + ('gpgcheck', 'gpgcheck=%s'), + ('_type', 'type=%s'), + ('gpgkey', 'gpgkey=%s'), + ('proxy', 'proxy=%s'), + ('priority', 'priority=%s'), + ) + + for line in tmpl: + tmpl_key, tmpl_value = line # key values from tmpl + + # ensure that there is an actual value (not None nor empty string) + if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''): + lines.append(tmpl_value % kw.get(tmpl_key)) + + return '\n'.join(lines) + + def repo_path(self) -> str: + return '/etc/yum.repos.d/ceph.repo' + + def repo_baseurl(self) -> str: + assert self.stable or self.version + if self.version: + return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.version, + self.distro_code) + else: + return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.stable, + self.distro_code) + + def validate(self) -> None: + if self.distro_code.startswith('fc'): + raise Error('Ceph team does not build Fedora specific packages and therefore cannot add repos for this distro') + if self.distro_code == 'el7': + if self.stable and self.stable >= 'pacific': + raise Error('Ceph does not support pacific or later for this version of this linux distro and therefore cannot add a repo for it') + if self.version and self.version.split('.')[0] >= '16': + raise Error('Ceph does not support 16.y.z or later for this version of this linux distro and therefore cannot add a repo for it') + + if self.stable or self.version: + # we know that yum & dnf require there to be a + # $base_url/$arch/repodata/repomd.xml so we can test if this URL + # is gettable in order to validate the inputs + test_url = self.repo_baseurl() + '/noarch/repodata/repomd.xml' + try: + urlopen(test_url) + except HTTPError as err: + logger.error('unable to fetch repo metadata: %r', err) + raise Error('failed to fetch repository metadata. please check' + ' the provided parameters are correct and try again') + + def add_repo(self) -> None: + if self.stable or self.version: + content = '' + for n, t in { + 'Ceph': '$basearch', + 'Ceph-noarch': 'noarch', + 'Ceph-source': 'SRPMS'}.items(): + content += '[%s]\n' % (n) + content += self.custom_repo( + name='Ceph %s' % t, + baseurl=self.repo_baseurl() + '/' + t, + enabled=1, + gpgcheck=1, + gpgkey=self.repo_gpgkey()[0], + ) + content += '\n\n' + else: + content = self.query_shaman(self.distro_normalized, self.major, + self.branch, + self.commit) + + logger.info('Writing repo to %s...' % self.repo_path()) + with open(self.repo_path(), 'w') as f: + f.write(content) + + if self.distro_code.startswith('el'): + logger.info('Enabling EPEL...') + call_throws(self.ctx, [self.tool, 'install', '-y', 'epel-release']) + + def rm_repo(self) -> None: + if os.path.exists(self.repo_path()): + os.unlink(self.repo_path()) + + def install(self, ls: List[str]) -> None: + logger.info('Installing packages %s...' % ls) + call_throws(self.ctx, [self.tool, 'install', '-y'] + ls) + + def install_podman(self) -> None: + self.install(['podman']) + + +class Zypper(Packager): + DISTRO_NAMES = [ + 'sles', + 'opensuse-tumbleweed', + 'opensuse-leap' + ] + + def __init__(self, ctx: CephadmContext, + stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str], + distro: Optional[str], distro_version: Optional[str]) -> None: + super(Zypper, self).__init__(ctx, stable=stable, version=version, + branch=branch, commit=commit) + assert distro is not None + self.ctx = ctx + self.tool = 'zypper' + self.distro = 'opensuse' + self.distro_version = '15.1' + if 'tumbleweed' not in distro and distro_version is not None: + self.distro_version = distro_version + + def custom_repo(self, **kw: Any) -> str: + """ + See YumDnf for format explanation. + """ + lines = [] + + # by using tuples (vs a dict) we preserve the order of what we want to + # return, like starting with a [repo name] + tmpl = ( + ('reponame', '[%s]'), + ('name', 'name=%s'), + ('baseurl', 'baseurl=%s'), + ('enabled', 'enabled=%s'), + ('gpgcheck', 'gpgcheck=%s'), + ('_type', 'type=%s'), + ('gpgkey', 'gpgkey=%s'), + ('proxy', 'proxy=%s'), + ('priority', 'priority=%s'), + ) + + for line in tmpl: + tmpl_key, tmpl_value = line # key values from tmpl + + # ensure that there is an actual value (not None nor empty string) + if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''): + lines.append(tmpl_value % kw.get(tmpl_key)) + + return '\n'.join(lines) + + def repo_path(self) -> str: + return '/etc/zypp/repos.d/ceph.repo' + + def repo_baseurl(self) -> str: + assert self.stable or self.version + if self.version: + return '%s/rpm-%s/%s' % (self.ctx.repo_url, + self.stable, self.distro) + else: + return '%s/rpm-%s/%s' % (self.ctx.repo_url, + self.stable, self.distro) + + def add_repo(self) -> None: + if self.stable or self.version: + content = '' + for n, t in { + 'Ceph': '$basearch', + 'Ceph-noarch': 'noarch', + 'Ceph-source': 'SRPMS'}.items(): + content += '[%s]\n' % (n) + content += self.custom_repo( + name='Ceph %s' % t, + baseurl=self.repo_baseurl() + '/' + t, + enabled=1, + gpgcheck=1, + gpgkey=self.repo_gpgkey()[0], + ) + content += '\n\n' + else: + content = self.query_shaman(self.distro, self.distro_version, + self.branch, + self.commit) + + logger.info('Writing repo to %s...' % self.repo_path()) + with open(self.repo_path(), 'w') as f: + f.write(content) + + def rm_repo(self) -> None: + if os.path.exists(self.repo_path()): + os.unlink(self.repo_path()) + + def install(self, ls: List[str]) -> None: + logger.info('Installing packages %s...' % ls) + call_throws(self.ctx, [self.tool, 'in', '-y'] + ls) + + def install_podman(self) -> None: + self.install(['podman']) + + +def create_packager(ctx: CephadmContext, + stable: Optional[str] = None, version: Optional[str] = None, + branch: Optional[str] = None, commit: Optional[str] = None) -> Packager: + distro, distro_version, distro_codename = get_distro() + if distro in YumDnf.DISTRO_NAMES: + return YumDnf(ctx, stable=stable, version=version, + branch=branch, commit=commit, + distro=distro, distro_version=distro_version) + elif distro in Apt.DISTRO_NAMES: + return Apt(ctx, stable=stable, version=version, + branch=branch, commit=commit, + distro=distro, distro_version=distro_version, + distro_codename=distro_codename) + elif distro in Zypper.DISTRO_NAMES: + return Zypper(ctx, stable=stable, version=version, + branch=branch, commit=commit, + distro=distro, distro_version=distro_version) + raise Error('Distro %s version %s not supported' % (distro, distro_version)) + + +def command_add_repo(ctx: CephadmContext) -> None: + if ctx.version and ctx.release: + raise Error('you can specify either --release or --version but not both') + if not ctx.version and not ctx.release and not ctx.dev and not ctx.dev_commit: + raise Error('please supply a --release, --version, --dev or --dev-commit argument') + if ctx.version: + try: + (x, y, z) = ctx.version.split('.') + except Exception: + raise Error('version must be in the form x.y.z (e.g., 15.2.0)') + if ctx.release: + # Pacific =/= pacific in this case, set to undercase to avoid confusion + ctx.release = ctx.release.lower() + + pkg = create_packager(ctx, stable=ctx.release, + version=ctx.version, + branch=ctx.dev, + commit=ctx.dev_commit) + pkg.validate() + pkg.add_repo() + logger.info('Completed adding repo.') + + +def command_rm_repo(ctx: CephadmContext) -> None: + pkg = create_packager(ctx) + pkg.rm_repo() + + +def command_install(ctx: CephadmContext) -> None: + pkg = create_packager(ctx) + pkg.install(ctx.packages) + + +def command_rescan_disks(ctx: CephadmContext) -> str: + + def probe_hba(scan_path: str) -> None: + """Tell the adapter to rescan""" + with open(scan_path, 'w') as f: + f.write('- - -') + + cmd = ctx.func.__name__.replace('command_', '') + logger.info(f'{cmd}: starting') + start = time.time() + + all_scan_files = glob('/sys/class/scsi_host/*/scan') + scan_files = [] + skipped = [] + for scan_path in all_scan_files: + adapter_name = os.path.basename(os.path.dirname(scan_path)) + proc_name = read_file([os.path.join(os.path.dirname(scan_path), 'proc_name')]) + if proc_name in ['unknown', 'usb-storage']: + skipped.append(os.path.basename(scan_path)) + logger.info(f'{cmd}: rescan skipping incompatible host adapter {adapter_name} : {proc_name}') + continue + + scan_files.append(scan_path) + + if not scan_files: + logger.info(f'{cmd}: no compatible HBAs found') + return 'Ok. No compatible HBAs found' + + responses = async_run(concurrent_tasks(probe_hba, scan_files)) + failures = [r for r in responses if r] + + logger.info(f'{cmd}: Complete. {len(scan_files)} adapters rescanned, {len(failures)} failures, {len(skipped)} skipped') + + elapsed = time.time() - start + if failures: + plural = 's' if len(failures) > 1 else '' + if len(failures) == len(scan_files): + return f'Failed. All {len(scan_files)} rescan requests failed' + else: + return f'Partial. {len(scan_files) - len(failures)} successful, {len(failures)} failure{plural} against: {", ".join(failures)}' + + return f'Ok. {len(all_scan_files)} adapters detected: {len(scan_files)} rescanned, {len(skipped)} skipped, {len(failures)} failed ({elapsed:.2f}s)' + +################################## + + +def get_ipv4_address(ifname): + # type: (str) -> str + def _extract(sock: socket.socket, offset: int) -> str: + return socket.inet_ntop( + socket.AF_INET, + fcntl.ioctl( + sock.fileno(), + offset, + struct.pack('256s', bytes(ifname[:15], 'utf-8')) + )[20:24]) + + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + try: + addr = _extract(s, 35093) # '0x8915' = SIOCGIFADDR + dq_mask = _extract(s, 35099) # 0x891b = SIOCGIFNETMASK + except OSError: + # interface does not have an ipv4 address + return '' + + dec_mask = sum([bin(int(i)).count('1') + for i in dq_mask.split('.')]) + return '{}/{}'.format(addr, dec_mask) + + +def get_ipv6_address(ifname): + # type: (str) -> str + if not os.path.exists('/proc/net/if_inet6'): + return '' + + raw = read_file(['/proc/net/if_inet6']) + data = raw.splitlines() + # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html + # field 0 is ipv6, field 2 is scope + for iface_setting in data: + field = iface_setting.split() + if field[-1] == ifname: + ipv6_raw = field[0] + ipv6_fmtd = ':'.join([ipv6_raw[_p:_p + 4] for _p in range(0, len(field[0]), 4)]) + # apply naming rules using ipaddress module + ipv6 = ipaddress.ip_address(ipv6_fmtd) + return '{}/{}'.format(str(ipv6), int('0x{}'.format(field[2]), 16)) + return '' + + +def bytes_to_human(num, mode='decimal'): + # type: (float, str) -> str + """Convert a bytes value into it's human-readable form. + + :param num: number, in bytes, to convert + :param mode: Either decimal (default) or binary to determine divisor + :returns: string representing the bytes value in a more readable format + """ + unit_list = ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB'] + divisor = 1000.0 + yotta = 'YB' + + if mode == 'binary': + unit_list = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB'] + divisor = 1024.0 + yotta = 'YiB' + + for unit in unit_list: + if abs(num) < divisor: + return '%3.1f%s' % (num, unit) + num /= divisor + return '%.1f%s' % (num, yotta) + + +def read_file(path_list, file_name=''): + # type: (List[str], str) -> str + """Returns the content of the first file found within the `path_list` + + :param path_list: list of file paths to search + :param file_name: optional file_name to be applied to a file path + :returns: content of the file or 'Unknown' + """ + for path in path_list: + if file_name: + file_path = os.path.join(path, file_name) + else: + file_path = path + if os.path.exists(file_path): + with open(file_path, 'rb') as f: + try: + content = f.read().decode('utf-8', 'ignore').strip() + except OSError: + # sysfs may populate the file, but for devices like + # virtio reads can fail + return 'Unknown' + else: + return content + return 'Unknown' + +################################## + + +class Enclosure: + def __init__(self, enc_id: str, enc_path: str, dev_path: str): + """External disk enclosure metadata + + Args: + :param enc_id: enclosure id (normally a WWN) + :param enc_path: sysfs path to HBA attached to the enclosure + e.g. /sys/class/scsi_generic/sg11/device/enclosure/0:0:9:0 + :param dev_path: sysfs path to the generic scsi device for the enclosure HBA + e.g. /sys/class/scsi_generic/sg2 + """ + self._path: str = dev_path + self._dev_path: str = os.path.join(dev_path, 'device') + self._enc_path: str = enc_path + self.ses_paths: List[str] = [] + self.path_count: int = 0 + self.vendor: str = '' + self.model: str = '' + self.enc_id: str = enc_id + self.components: Union[int, str] = 0 + self.device_lookup: Dict[str, str] = {} + self.device_count: int = 0 + self.slot_map: Dict[str, Dict[str, str]] = {} + + self._probe() + + def _probe(self) -> None: + """Analyse the dev paths to identify enclosure related information""" + + self.vendor = read_file([os.path.join(self._dev_path, 'vendor')]) + self.model = read_file([os.path.join(self._dev_path, 'model')]) + self.components = read_file([os.path.join(self._enc_path, 'components')]) + slot_paths = glob(os.path.join(self._enc_path, '*', 'slot')) + for slot_path in slot_paths: + slot = read_file([slot_path]) + serial_path = os.path.join(os.path.dirname(slot_path), 'device', 'vpd_pg80') + serial = '' + if os.path.exists(serial_path): + serial_raw = read_file([serial_path]) + serial = (''.join(char for char in serial_raw if char in string.printable)).strip() + self.device_lookup[serial] = slot + slot_dir = os.path.dirname(slot_path) + self.slot_map[slot] = { + 'status': read_file([os.path.join(slot_dir, 'status')]), + 'fault': read_file([os.path.join(slot_dir, 'fault')]), + 'locate': read_file([os.path.join(slot_dir, 'locate')]), + 'serial': serial, + } + + self.device_count = len(self.device_lookup) + self.update(os.path.basename(self._path)) + + def update(self, dev_id: str) -> None: + """Update an enclosure object with a related sg device name + + :param dev_id (str): device name e.g. sg2 + """ + self.ses_paths.append(dev_id) + self.path_count = len(self.ses_paths) + + def _dump(self) -> Dict[str, Any]: + """Return a dict representation of the object""" + return {k: v for k, v in self.__dict__.items() if not k.startswith('_')} + + def __str__(self) -> str: + """Return a formatted json representation of the object as a string""" + return json.dumps(self._dump(), indent=2) + + def __repr__(self) -> str: + """Return a json representation of the object as a string""" + return json.dumps(self._dump()) + + def as_json(self) -> Dict[str, Any]: + """Return a dict representing the object""" + return self._dump() + + +class HostFacts(): + _dmi_path_list = ['/sys/class/dmi/id'] + _nic_path_list = ['/sys/class/net'] + _apparmor_path_list = ['/etc/apparmor'] + _disk_vendor_workarounds = { + '0x1af4': 'Virtio Block Device' + } + _excluded_block_devices = ('sr', 'zram', 'dm-', 'loop', 'md') + _sg_generic_glob = '/sys/class/scsi_generic/*' + + def __init__(self, ctx: CephadmContext): + self.ctx: CephadmContext = ctx + self.cpu_model: str = 'Unknown' + self.sysctl_options: Dict[str, str] = self._populate_sysctl_options() + self.cpu_count: int = 0 + self.cpu_cores: int = 0 + self.cpu_threads: int = 0 + self.interfaces: Dict[str, Any] = {} + + self._meminfo: List[str] = read_file(['/proc/meminfo']).splitlines() + self._get_cpuinfo() + self._process_nics() + self.arch: str = platform.processor() + self.kernel: str = platform.release() + self._enclosures = self._discover_enclosures() + self._block_devices = self._get_block_devs() + self._device_list = self._get_device_info() + + def _populate_sysctl_options(self) -> Dict[str, str]: + sysctl_options = {} + out, _, _ = call_throws(self.ctx, ['sysctl', '-a'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR) + if out: + for line in out.splitlines(): + option, value = line.split('=') + sysctl_options[option.strip()] = value.strip() + return sysctl_options + + def _discover_enclosures(self) -> Dict[str, Enclosure]: + """Build a dictionary of discovered scsi enclosures + + Enclosures are detected by walking the scsi generic sysfs hierarchy. + Any device tree that holds an 'enclosure' subdirectory is interpreted as + an enclosure. Once identified the enclosure directory is analysis to + identify key descriptors that will help relate disks to enclosures and + disks to enclosure slots. + + :return: Dict[str, Enclosure]: a map of enclosure id (hex) to enclosure object + """ + sg_paths: List[str] = glob(HostFacts._sg_generic_glob) + enclosures: Dict[str, Enclosure] = {} + + for sg_path in sg_paths: + enc_path = os.path.join(sg_path, 'device', 'enclosure') + if os.path.exists(enc_path): + enc_dirs = glob(os.path.join(enc_path, '*')) + if len(enc_dirs) != 1: + # incomplete enclosure spec - expecting ONE dir in the format + # host(adapter):bus:target:lun e.g. 16:0:0:0 + continue + enc_path = enc_dirs[0] + enc_id = read_file([os.path.join(enc_path, 'id')]) + if enc_id in enclosures: + enclosures[enc_id].update(os.path.basename(sg_path)) + continue + + enclosure = Enclosure(enc_id, enc_path, sg_path) + enclosures[enc_id] = enclosure + + return enclosures + + @property + def enclosures(self) -> Dict[str, Dict[str, Any]]: + """Dump the enclosure objects as dicts""" + return {k: v._dump() for k, v in self._enclosures.items()} + + @property + def enclosure_count(self) -> int: + """Return the number of enclosures detected""" + return len(self._enclosures.keys()) + + def _get_cpuinfo(self): + # type: () -> None + """Determine cpu information via /proc/cpuinfo""" + raw = read_file(['/proc/cpuinfo']) + output = raw.splitlines() + cpu_set = set() + + for line in output: + field = [f.strip() for f in line.split(':')] + if 'model name' in line: + self.cpu_model = field[1] + if 'physical id' in line: + cpu_set.add(field[1]) + if 'siblings' in line: + self.cpu_threads = int(field[1].strip()) + if 'cpu cores' in line: + self.cpu_cores = int(field[1].strip()) + pass + self.cpu_count = len(cpu_set) + + def _get_block_devs(self): + # type: () -> List[str] + """Determine the list of block devices by looking at /sys/block""" + return [dev for dev in os.listdir('/sys/block') + if not dev.startswith(HostFacts._excluded_block_devices)] + + @property + def operating_system(self): + # type: () -> str + """Determine OS version""" + raw_info = read_file(['/etc/os-release']) + os_release = raw_info.splitlines() + rel_str = 'Unknown' + rel_dict = dict() + + for line in os_release: + if '=' in line: + var_name, var_value = line.split('=') + rel_dict[var_name] = var_value.strip('"') + + # Would normally use PRETTY_NAME, but NAME and VERSION are more + # consistent + if all(_v in rel_dict for _v in ['NAME', 'VERSION']): + rel_str = '{} {}'.format(rel_dict['NAME'], rel_dict['VERSION']) + return rel_str + + @property + def hostname(self): + # type: () -> str + """Return the hostname""" + return platform.node() + + @property + def shortname(self) -> str: + return platform.node().split('.', 1)[0] + + @property + def fqdn(self) -> str: + return get_fqdn() + + @property + def subscribed(self): + # type: () -> str + """Highlevel check to see if the host is subscribed to receive updates/support""" + def _red_hat(): + # type: () -> str + # RHEL 7 and RHEL 8 + entitlements_dir = '/etc/pki/entitlement' + if os.path.exists(entitlements_dir): + pems = glob('{}/*.pem'.format(entitlements_dir)) + if len(pems) >= 2: + return 'Yes' + + return 'No' + + os_name = self.operating_system + if os_name.upper().startswith('RED HAT'): + return _red_hat() + + return 'Unknown' + + @property + def hdd_count(self): + # type: () -> int + """Return a count of HDDs (spinners)""" + return len(self.hdd_list) + + def _get_capacity(self, dev): + # type: (str) -> int + """Determine the size of a given device + + The kernel always bases device size calculations based on a 512 byte + sector. For more information see + https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/include/linux/types.h?h=v5.15.63#n120 + """ + size_path = os.path.join('/sys/block', dev, 'size') + size_blocks = int(read_file([size_path])) + return size_blocks * 512 + + def _get_capacity_by_type(self, disk_type='hdd'): + # type: (str) -> int + """Return the total capacity of a category of device (flash or hdd)""" + capacity: int = 0 + for dev in self._device_list: + if dev['disk_type'] == disk_type: + disk_capacity = cast(int, dev.get('disk_size_bytes', 0)) + capacity += disk_capacity + return capacity + + def _get_device_info(self): + # type: () -> List[Dict[str, object]] + """Return a 'pretty' name list for each unique device in the `dev_list`""" + disk_list = list() + + # serial_num_lookup is a dict of serial number -> List of devices with that serial number + serial_num_lookup: Dict[str, List[str]] = {} + + # make a map of devname -> disk path. this path name may indicate the physical slot + # of a drive (phyXX) + disk_path_map: Dict[str, str] = {} + for path in glob('/dev/disk/by-path/*'): + tgt_raw = Path(path).resolve() + tgt = os.path.basename(str(tgt_raw)) + disk_path_map[tgt] = path + + # make a map of holder (dm-XX) -> full mpath name + dm_device_map: Dict[str, str] = {} + for mpath in glob('/dev/mapper/mpath*'): + tgt_raw = Path(mpath).resolve() + tgt = os.path.basename(str(tgt_raw)) + dm_device_map[tgt] = mpath + + # main loop to process all eligible block devices + for dev in self._block_devices: + enclosure_id = '' + enclosure_slot = '' + scsi_addr = '' + mpath = '' + + disk_model = read_file(['/sys/block/{}/device/model'.format(dev)]).strip() + disk_rev = read_file(['/sys/block/{}/device/rev'.format(dev)]).strip() + disk_wwid = read_file(['/sys/block/{}/device/wwid'.format(dev)]).strip() + vendor = read_file(['/sys/block/{}/device/vendor'.format(dev)]).strip() + rotational = read_file(['/sys/block/{}/queue/rotational'.format(dev)]) + holders_raw = glob('/sys/block/{}/holders/*'.format(dev)) + if len(holders_raw) == 1: + # mpath will have 1 holder entry + holder = os.path.basename(holders_raw[0]) + mpath = dm_device_map.get(holder, '') + + disk_type = 'hdd' if rotational == '1' else 'flash' + scsi_addr_path = glob('/sys/block/{}/device/bsg/*'.format(dev)) + if len(scsi_addr_path) == 1: + scsi_addr = os.path.basename(scsi_addr_path[0]) + + # vpd_pg80 isn't guaranteed (libvirt, vmware for example) + serial_raw = read_file(['/sys/block/{}/device/vpd_pg80'.format(dev)]) + serial = (''.join(i for i in serial_raw if i in string.printable)).strip() + if serial.lower() == 'unknown': + serial = '' + else: + if serial in serial_num_lookup: + serial_num_lookup[serial].append(dev) + else: + serial_num_lookup[serial] = [dev] + for enc_id, enclosure in self._enclosures.items(): + if serial in enclosure.device_lookup.keys(): + enclosure_id = enc_id + enclosure_slot = enclosure.device_lookup[serial] + + disk_vendor = HostFacts._disk_vendor_workarounds.get(vendor, vendor) + disk_size_bytes = self._get_capacity(dev) + disk_list.append({ + 'description': '{} {} ({})'.format(disk_vendor, disk_model, bytes_to_human(disk_size_bytes)), + 'vendor': disk_vendor, + 'model': disk_model, + 'rev': disk_rev, + 'wwid': disk_wwid, + 'dev_name': dev, + 'disk_size_bytes': disk_size_bytes, + 'disk_type': disk_type, + 'serial': serial, + 'alt_dev_name': '', + 'scsi_addr': scsi_addr, + 'enclosure_id': enclosure_id, + 'enclosure_slot': enclosure_slot, + 'path_id': disk_path_map.get(dev, ''), + 'mpath': mpath, + }) + + # process the devices to drop duplicate physical devs based on matching + # the unique serial number + disk_list_unique: List[Dict[str, Any]] = [] + serials_seen: List[str] = [] + for dev in disk_list: + serial = str(dev['serial']) + if serial: + if serial in serials_seen: + continue + else: + serials_seen.append(serial) + devs = serial_num_lookup[serial].copy() + devs.remove(str(dev['dev_name'])) + dev['alt_dev_name'] = ','.join(devs) + disk_list_unique.append(dev) + + return disk_list_unique + + @property + def hdd_list(self): + # type: () -> List[Dict[str, object]] + """Return a list of devices that are HDDs (spinners)""" + return [dev for dev in self._device_list if dev['disk_type'] == 'hdd'] + + @property + def flash_list(self): + # type: () -> List[Dict[str, object]] + """Return a list of devices that are flash based (SSD, NVMe)""" + return [dev for dev in self._device_list if dev['disk_type'] == 'flash'] + + @property + def hdd_capacity_bytes(self): + # type: () -> int + """Return the total capacity for all HDD devices (bytes)""" + return self._get_capacity_by_type(disk_type='hdd') + + @property + def hdd_capacity(self): + # type: () -> str + """Return the total capacity for all HDD devices (human readable format)""" + return bytes_to_human(self.hdd_capacity_bytes) + + @property + def cpu_load(self): + # type: () -> Dict[str, float] + """Return the cpu load average data for the host""" + raw = read_file(['/proc/loadavg']).strip() + data = raw.split() + return { + '1min': float(data[0]), + '5min': float(data[1]), + '15min': float(data[2]), + } + + @property + def flash_count(self): + # type: () -> int + """Return the number of flash devices in the system (SSD, NVMe)""" + return len(self.flash_list) + + @property + def flash_capacity_bytes(self): + # type: () -> int + """Return the total capacity for all flash devices (bytes)""" + return self._get_capacity_by_type(disk_type='flash') + + @property + def flash_capacity(self): + # type: () -> str + """Return the total capacity for all Flash devices (human readable format)""" + return bytes_to_human(self.flash_capacity_bytes) + + def _process_nics(self): + # type: () -> None + """Look at the NIC devices and extract network related metadata""" + # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h + hw_lookup = { + '1': 'ethernet', + '32': 'infiniband', + '772': 'loopback', + } + + for nic_path in HostFacts._nic_path_list: + if not os.path.exists(nic_path): + continue + for iface in os.listdir(nic_path): + + if os.path.exists(os.path.join(nic_path, iface, 'bridge')): + nic_type = 'bridge' + elif os.path.exists(os.path.join(nic_path, iface, 'bonding')): + nic_type = 'bonding' + else: + nic_type = hw_lookup.get(read_file([os.path.join(nic_path, iface, 'type')]), 'Unknown') + + if nic_type == 'loopback': # skip loopback devices + continue + + lower_devs_list = [os.path.basename(link.replace('lower_', '')) for link in glob(os.path.join(nic_path, iface, 'lower_*'))] + upper_devs_list = [os.path.basename(link.replace('upper_', '')) for link in glob(os.path.join(nic_path, iface, 'upper_*'))] + + try: + mtu = int(read_file([os.path.join(nic_path, iface, 'mtu')])) + except ValueError: + mtu = 0 + + operstate = read_file([os.path.join(nic_path, iface, 'operstate')]) + try: + speed = int(read_file([os.path.join(nic_path, iface, 'speed')])) + except (OSError, ValueError): + # OSError : device doesn't support the ethtool get_link_ksettings + # ValueError : raised when the read fails, and returns Unknown + # + # Either way, we show a -1 when speed isn't available + speed = -1 + + dev_link = os.path.join(nic_path, iface, 'device') + if os.path.exists(dev_link): + iftype = 'physical' + driver_path = os.path.join(dev_link, 'driver') + if os.path.exists(driver_path): + driver = os.path.basename(os.path.realpath(driver_path)) + else: + driver = 'Unknown' + + else: + iftype = 'logical' + driver = '' + + self.interfaces[iface] = { + 'mtu': mtu, + 'upper_devs_list': upper_devs_list, + 'lower_devs_list': lower_devs_list, + 'operstate': operstate, + 'iftype': iftype, + 'nic_type': nic_type, + 'driver': driver, + 'speed': speed, + 'ipv4_address': get_ipv4_address(iface), + 'ipv6_address': get_ipv6_address(iface), + } + + @property + def nic_count(self): + # type: () -> int + """Return a total count of all physical NICs detected in the host""" + phys_devs = [] + for iface in self.interfaces: + if self.interfaces[iface]['iftype'] == 'physical': + phys_devs.append(iface) + return len(phys_devs) + + def _get_mem_data(self, field_name): + # type: (str) -> int + for line in self._meminfo: + if line.startswith(field_name): + _d = line.split() + return int(_d[1]) + return 0 + + @property + def memory_total_kb(self): + # type: () -> int + """Determine the memory installed (kb)""" + return self._get_mem_data('MemTotal') + + @property + def memory_free_kb(self): + # type: () -> int + """Determine the memory free (not cache, immediately usable)""" + return self._get_mem_data('MemFree') + + @property + def memory_available_kb(self): + # type: () -> int + """Determine the memory available to new applications without swapping""" + return self._get_mem_data('MemAvailable') + + @property + def vendor(self): + # type: () -> str + """Determine server vendor from DMI data in sysfs""" + return read_file(HostFacts._dmi_path_list, 'sys_vendor') + + @property + def model(self): + # type: () -> str + """Determine server model information from DMI data in sysfs""" + family = read_file(HostFacts._dmi_path_list, 'product_family') + product = read_file(HostFacts._dmi_path_list, 'product_name') + if family == 'Unknown' and product: + return '{}'.format(product) + + return '{} ({})'.format(family, product) + + @property + def bios_version(self): + # type: () -> str + """Determine server BIOS version from DMI data in sysfs""" + return read_file(HostFacts._dmi_path_list, 'bios_version') + + @property + def bios_date(self): + # type: () -> str + """Determine server BIOS date from DMI data in sysfs""" + return read_file(HostFacts._dmi_path_list, 'bios_date') + + @property + def chassis_serial(self): + # type: () -> str + """Determine chassis serial number from DMI data in sysfs""" + return read_file(HostFacts._dmi_path_list, 'chassis_serial') + + @property + def board_serial(self): + # type: () -> str + """Determine mainboard serial number from DMI data in sysfs""" + return read_file(HostFacts._dmi_path_list, 'board_serial') + + @property + def product_serial(self): + # type: () -> str + """Determine server's serial number from DMI data in sysfs""" + return read_file(HostFacts._dmi_path_list, 'product_serial') + + @property + def timestamp(self): + # type: () -> float + """Return the current time as Epoch seconds""" + return time.time() + + @property + def system_uptime(self): + # type: () -> float + """Return the system uptime (in secs)""" + raw_time = read_file(['/proc/uptime']) + up_secs, _ = raw_time.split() + return float(up_secs) + + @property + def kernel_security(self): + # type: () -> Dict[str, str] + """Determine the security features enabled in the kernel - SELinux, AppArmor""" + def _fetch_selinux() -> Dict[str, str]: + """Get the selinux status""" + security = {} + try: + out, err, code = call(self.ctx, ['sestatus'], + verbosity=CallVerbosity.QUIET) + security['type'] = 'SELinux' + status, mode, policy = '', '', '' + for line in out.split('\n'): + if line.startswith('SELinux status:'): + k, v = line.split(':') + status = v.strip() + elif line.startswith('Current mode:'): + k, v = line.split(':') + mode = v.strip() + elif line.startswith('Loaded policy name:'): + k, v = line.split(':') + policy = v.strip() + if status == 'disabled': + security['description'] = 'SELinux: Disabled' + else: + security['description'] = 'SELinux: Enabled({}, {})'.format(mode, policy) + except Exception as e: + logger.info('unable to get selinux status: %s' % e) + return security + + def _fetch_apparmor() -> Dict[str, str]: + """Read the apparmor profiles directly, returning an overview of AppArmor status""" + security = {} + for apparmor_path in HostFacts._apparmor_path_list: + if os.path.exists(apparmor_path): + security['type'] = 'AppArmor' + security['description'] = 'AppArmor: Enabled' + try: + profiles = read_file(['/sys/kernel/security/apparmor/profiles']) + if len(profiles) == 0: + return {} + except OSError: + pass + else: + summary = {} # type: Dict[str, int] + for line in profiles.split('\n'): + item, mode = line.split(' ') + mode = mode.strip('()') + if mode in summary: + summary[mode] += 1 + else: + summary[mode] = 0 + summary_str = ','.join(['{} {}'.format(v, k) for k, v in summary.items()]) + security = {**security, **summary} # type: ignore + security['description'] += '({})'.format(summary_str) + + return security + return {} + + ret = {} + if os.path.exists('/sys/kernel/security/lsm'): + lsm = read_file(['/sys/kernel/security/lsm']).strip() + if 'selinux' in lsm: + ret = _fetch_selinux() + elif 'apparmor' in lsm: + ret = _fetch_apparmor() + else: + return { + 'type': 'Unknown', + 'description': 'Linux Security Module framework is active, but is not using SELinux or AppArmor' + } + + if ret: + return ret + + return { + 'type': 'None', + 'description': 'Linux Security Module framework is not available' + } + + @property + def selinux_enabled(self) -> bool: + return (self.kernel_security['type'] == 'SELinux') and \ + (self.kernel_security['description'] != 'SELinux: Disabled') + + @property + def kernel_parameters(self): + # type: () -> Dict[str, str] + """Get kernel parameters required/used in Ceph clusters""" + + k_param = {} + out, _, _ = call_throws(self.ctx, ['sysctl', '-a'], verbosity=CallVerbosity.SILENT) + if out: + param_list = out.split('\n') + param_dict = {param.split(' = ')[0]: param.split(' = ')[-1] for param in param_list} + + # return only desired parameters + if 'net.ipv4.ip_nonlocal_bind' in param_dict: + k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind'] + + return k_param + + @staticmethod + def _process_net_data(tcp_file: str, protocol: str = 'tcp') -> List[int]: + listening_ports = [] + # Connections state documentation + # tcp - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/net/tcp_states.h + # udp - uses 07 (TCP_CLOSE or UNCONN, since udp is stateless. test with netcat -ul <port>) + listening_state = { + 'tcp': '0A', + 'udp': '07' + } + + if protocol not in listening_state.keys(): + return [] + + if os.path.exists(tcp_file): + with open(tcp_file) as f: + tcp_data = f.readlines()[1:] + + for con in tcp_data: + con_info = con.strip().split() + if con_info[3] == listening_state[protocol]: + local_port = int(con_info[1].split(':')[1], 16) + listening_ports.append(local_port) + + return listening_ports + + @property + def tcp_ports_used(self) -> List[int]: + return HostFacts._process_net_data('/proc/net/tcp') + + @property + def tcp6_ports_used(self) -> List[int]: + return HostFacts._process_net_data('/proc/net/tcp6') + + @property + def udp_ports_used(self) -> List[int]: + return HostFacts._process_net_data('/proc/net/udp', 'udp') + + @property + def udp6_ports_used(self) -> List[int]: + return HostFacts._process_net_data('/proc/net/udp6', 'udp') + + def dump(self): + # type: () -> str + """Return the attributes of this HostFacts object as json""" + data = { + k: getattr(self, k) for k in dir(self) + if not k.startswith('_') + and isinstance(getattr(self, k), (float, int, str, list, dict, tuple)) + } + return json.dumps(data, indent=2, sort_keys=True) + +################################## + + +def command_gather_facts(ctx: CephadmContext) -> None: + """gather_facts is intended to provide host related metadata to the caller""" + host = HostFacts(ctx) + print(host.dump()) + + +################################## + + +def systemd_target_state(ctx: CephadmContext, target_name: str, subsystem: str = 'ceph') -> bool: + # TODO: UNITTEST + return os.path.exists( + os.path.join( + ctx.unit_dir, + f'{subsystem}.target.wants', + target_name + ) + ) + + +def target_exists(ctx: CephadmContext) -> bool: + return os.path.exists(ctx.unit_dir + '/ceph.target') + + +@infer_fsid +def command_maintenance(ctx: CephadmContext) -> str: + if not ctx.fsid: + raise Error('failed - must pass --fsid to specify cluster') + + target = f'ceph-{ctx.fsid}.target' + + if ctx.maintenance_action.lower() == 'enter': + logger.info('Requested to place host into maintenance') + if systemd_target_state(ctx, target): + _out, _err, code = call(ctx, + ['systemctl', 'disable', target], + verbosity=CallVerbosity.DEBUG) + if code: + logger.error(f'Failed to disable the {target} target') + return 'failed - to disable the target' + else: + # stopping a target waits by default + _out, _err, code = call(ctx, + ['systemctl', 'stop', target], + verbosity=CallVerbosity.DEBUG) + if code: + logger.error(f'Failed to stop the {target} target') + return 'failed - to disable the target' + else: + return f'success - systemd target {target} disabled' + + else: + return 'skipped - target already disabled' + + else: + logger.info('Requested to exit maintenance state') + # if we've never deployed a daemon on this host there will be no systemd + # target to disable so attempting a disable will fail. We still need to + # return success here or host will be permanently stuck in maintenance mode + # as no daemons can be deployed so no systemd target will ever exist to disable. + if not target_exists(ctx): + return 'skipped - systemd target not present on this host. Host removed from maintenance mode.' + # exit maintenance request + if not systemd_target_state(ctx, target): + _out, _err, code = call(ctx, + ['systemctl', 'enable', target], + verbosity=CallVerbosity.DEBUG) + if code: + logger.error(f'Failed to enable the {target} target') + return 'failed - unable to enable the target' + else: + # starting a target waits by default + _out, _err, code = call(ctx, + ['systemctl', 'start', target], + verbosity=CallVerbosity.DEBUG) + if code: + logger.error(f'Failed to start the {target} target') + return 'failed - unable to start the target' + else: + return f'success - systemd target {target} enabled and started' + return f'success - systemd target {target} enabled and started' + +################################## + + +class ArgumentFacade: + def __init__(self) -> None: + self.defaults: Dict[str, Any] = {} + + def add_argument(self, *args: Any, **kwargs: Any) -> None: + if not args: + raise ValueError('expected at least one argument') + name = args[0] + if not name.startswith('--'): + raise ValueError(f'expected long option, got: {name!r}') + name = name[2:].replace('-', '_') + value = kwargs.pop('default', None) + self.defaults[name] = value + + def apply(self, ctx: CephadmContext) -> None: + for key, value in self.defaults.items(): + setattr(ctx, key, value) + + +def _add_deploy_parser_args( + parser_deploy: Union[argparse.ArgumentParser, ArgumentFacade], +) -> None: + parser_deploy.add_argument( + '--config', '-c', + help='config file for new daemon') + parser_deploy.add_argument( + '--config-json', + help='Additional configuration information in JSON format') + parser_deploy.add_argument( + '--keyring', + help='keyring for new daemon') + parser_deploy.add_argument( + '--key', + help='key for new daemon') + parser_deploy.add_argument( + '--osd-fsid', + help='OSD uuid, if creating an OSD container') + parser_deploy.add_argument( + '--skip-firewalld', + action='store_true', + help='Do not configure firewalld') + parser_deploy.add_argument( + '--tcp-ports', + help='List of tcp ports to open in the host firewall') + parser_deploy.add_argument( + '--port-ips', + help='JSON dict mapping ports to IPs they need to be bound on' + ) + parser_deploy.add_argument( + '--reconfig', + action='store_true', + help='Reconfigure a previously deployed daemon') + parser_deploy.add_argument( + '--allow-ptrace', + action='store_true', + help='Allow SYS_PTRACE on daemon container') + parser_deploy.add_argument( + '--container-init', + action='store_true', + default=CONTAINER_INIT, + help=argparse.SUPPRESS) + parser_deploy.add_argument( + '--memory-request', + help='Container memory request/target' + ) + parser_deploy.add_argument( + '--memory-limit', + help='Container memory hard limit' + ) + parser_deploy.add_argument( + '--meta-json', + help='JSON dict of additional metadata' + ) + parser_deploy.add_argument( + '--extra-container-args', + action='append', + default=[], + help='Additional container arguments to apply to daemon' + ) + parser_deploy.add_argument( + '--extra-entrypoint-args', + action='append', + default=[], + help='Additional entrypoint arguments to apply to deamon' + ) + + +def _get_parser(): + # type: () -> argparse.ArgumentParser + parser = argparse.ArgumentParser( + description='Bootstrap Ceph daemons with systemd and containers.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--image', + help='container image. Can also be set via the "CEPHADM_IMAGE" ' + 'env var') + parser.add_argument( + '--docker', + action='store_true', + help='use docker instead of podman') + parser.add_argument( + '--data-dir', + default=DATA_DIR, + help='base directory for daemon data') + parser.add_argument( + '--log-dir', + default=LOG_DIR, + help='base directory for daemon logs') + parser.add_argument( + '--logrotate-dir', + default=LOGROTATE_DIR, + help='location of logrotate configuration files') + parser.add_argument( + '--sysctl-dir', + default=SYSCTL_DIR, + help='location of sysctl configuration files') + parser.add_argument( + '--unit-dir', + default=UNIT_DIR, + help='base directory for systemd units') + parser.add_argument( + '--verbose', '-v', + action='store_true', + help='Show debug-level log messages') + parser.add_argument( + '--timeout', + type=int, + default=DEFAULT_TIMEOUT, + help='timeout in seconds') + parser.add_argument( + '--retry', + type=int, + default=DEFAULT_RETRY, + help='max number of retries') + parser.add_argument( + '--env', '-e', + action='append', + default=[], + help='set environment variable') + parser.add_argument( + '--no-container-init', + action='store_true', + default=not CONTAINER_INIT, + help='Do not run podman/docker with `--init`') + parser.add_argument( + '--no-cgroups-split', + action='store_true', + default=False, + help='Do not run containers with --cgroups=split (currently only relevant when using podman)') + + subparsers = parser.add_subparsers(help='sub-command') + + parser_version = subparsers.add_parser( + 'version', help='get cephadm version') + parser_version.set_defaults(func=command_version) + + parser_pull = subparsers.add_parser( + 'pull', help='pull the default container image') + parser_pull.set_defaults(func=command_pull) + parser_pull.add_argument( + '--insecure', + action='store_true', + help=argparse.SUPPRESS, + ) + + parser_inspect_image = subparsers.add_parser( + 'inspect-image', help='inspect local container image') + parser_inspect_image.set_defaults(func=command_inspect_image) + + parser_ls = subparsers.add_parser( + 'ls', help='list daemon instances on this host') + parser_ls.set_defaults(func=command_ls) + parser_ls.add_argument( + '--no-detail', + action='store_true', + help='Do not include daemon status') + parser_ls.add_argument( + '--legacy-dir', + default='/', + help='base directory for legacy daemon data') + + parser_list_networks = subparsers.add_parser( + 'list-networks', help='list IP networks') + parser_list_networks.set_defaults(func=command_list_networks) + + parser_adopt = subparsers.add_parser( + 'adopt', help='adopt daemon deployed with a different tool') + parser_adopt.set_defaults(func=command_adopt) + parser_adopt.add_argument( + '--name', '-n', + required=True, + help='daemon name (type.id)') + parser_adopt.add_argument( + '--style', + required=True, + help='deployment style (legacy, ...)') + parser_adopt.add_argument( + '--cluster', + default='ceph', + help='cluster name') + parser_adopt.add_argument( + '--legacy-dir', + default='/', + help='base directory for legacy daemon data') + parser_adopt.add_argument( + '--config-json', + help='Additional configuration information in JSON format') + parser_adopt.add_argument( + '--skip-firewalld', + action='store_true', + help='Do not configure firewalld') + parser_adopt.add_argument( + '--skip-pull', + action='store_true', + help='do not pull the default image before adopting') + parser_adopt.add_argument( + '--force-start', + action='store_true', + help='start newly adopted daemon, even if it was not running previously') + parser_adopt.add_argument( + '--container-init', + action='store_true', + default=CONTAINER_INIT, + help=argparse.SUPPRESS) + + parser_rm_daemon = subparsers.add_parser( + 'rm-daemon', help='remove daemon instance') + parser_rm_daemon.set_defaults(func=command_rm_daemon) + parser_rm_daemon.add_argument( + '--name', '-n', + required=True, + action=CustomValidation, + help='daemon name (type.id)') + parser_rm_daemon.add_argument( + '--tcp-ports', + help='List of tcp ports to close in the host firewall') + parser_rm_daemon.add_argument( + '--fsid', + required=True, + help='cluster FSID') + parser_rm_daemon.add_argument( + '--force', + action='store_true', + help='proceed, even though this may destroy valuable data') + parser_rm_daemon.add_argument( + '--force-delete-data', + action='store_true', + help='delete valuable daemon data instead of making a backup') + + parser_rm_cluster = subparsers.add_parser( + 'rm-cluster', help='remove all daemons for a cluster') + parser_rm_cluster.set_defaults(func=command_rm_cluster) + parser_rm_cluster.add_argument( + '--fsid', + required=True, + help='cluster FSID') + parser_rm_cluster.add_argument( + '--force', + action='store_true', + help='proceed, even though this may destroy valuable data') + parser_rm_cluster.add_argument( + '--keep-logs', + action='store_true', + help='do not remove log files') + parser_rm_cluster.add_argument( + '--zap-osds', + action='store_true', + help='zap OSD devices for this cluster') + + parser_run = subparsers.add_parser( + 'run', help='run a ceph daemon, in a container, in the foreground') + parser_run.set_defaults(func=command_run) + parser_run.add_argument( + '--name', '-n', + required=True, + help='daemon name (type.id)') + parser_run.add_argument( + '--fsid', + required=True, + help='cluster FSID') + + parser_shell = subparsers.add_parser( + 'shell', help='run an interactive shell inside a daemon container') + parser_shell.set_defaults(func=command_shell) + parser_shell.add_argument( + '--shared_ceph_folder', + metavar='CEPH_SOURCE_FOLDER', + help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder') + parser_shell.add_argument( + '--fsid', + help='cluster FSID') + parser_shell.add_argument( + '--name', '-n', + help='daemon name (type.id)') + parser_shell.add_argument( + '--config', '-c', + help='ceph.conf to pass through to the container') + parser_shell.add_argument( + '--keyring', '-k', + help='ceph.keyring to pass through to the container') + parser_shell.add_argument( + '--mount', '-m', + help=('mount a file or directory in the container. ' + 'Support multiple mounts. ' + 'ie: `--mount /foo /bar:/bar`. ' + 'When no destination is passed, default is /mnt'), + nargs='+') + parser_shell.add_argument( + '--env', '-e', + action='append', + default=[], + help='set environment variable') + parser_shell.add_argument( + '--volume', '-v', + action='append', + default=[], + help='set environment variable') + parser_shell.add_argument( + 'command', nargs=argparse.REMAINDER, + help='command (optional)') + parser_shell.add_argument( + '--no-hosts', + action='store_true', + help='dont pass /etc/hosts through to the container') + parser_shell.add_argument( + '--dry-run', + action='store_true', + help='print, but do not execute, the container command to start the shell') + + parser_enter = subparsers.add_parser( + 'enter', help='run an interactive shell inside a running daemon container') + parser_enter.set_defaults(func=command_enter) + parser_enter.add_argument( + '--fsid', + help='cluster FSID') + parser_enter.add_argument( + '--name', '-n', + required=True, + help='daemon name (type.id)') + parser_enter.add_argument( + 'command', nargs=argparse.REMAINDER, + help='command') + + parser_ceph_volume = subparsers.add_parser( + 'ceph-volume', help='run ceph-volume inside a container') + parser_ceph_volume.set_defaults(func=command_ceph_volume) + parser_ceph_volume.add_argument( + '--shared_ceph_folder', + metavar='CEPH_SOURCE_FOLDER', + help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder') + parser_ceph_volume.add_argument( + '--fsid', + help='cluster FSID') + parser_ceph_volume.add_argument( + '--config-json', + help='JSON file with config and (client.bootstrap-osd) key') + parser_ceph_volume.add_argument( + '--config', '-c', + help='ceph conf file') + parser_ceph_volume.add_argument( + '--keyring', '-k', + help='ceph.keyring to pass through to the container') + parser_ceph_volume.add_argument( + 'command', nargs=argparse.REMAINDER, + help='command') + + parser_zap_osds = subparsers.add_parser( + 'zap-osds', help='zap all OSDs associated with a particular fsid') + parser_zap_osds.set_defaults(func=command_zap_osds) + parser_zap_osds.add_argument( + '--fsid', + required=True, + help='cluster FSID') + parser_zap_osds.add_argument( + '--force', + action='store_true', + help='proceed, even though this may destroy valuable data') + + parser_unit = subparsers.add_parser( + 'unit', help="operate on the daemon's systemd unit") + parser_unit.set_defaults(func=command_unit) + parser_unit.add_argument( + 'command', + help='systemd command (start, stop, restart, enable, disable, ...)') + parser_unit.add_argument( + '--fsid', + help='cluster FSID') + parser_unit.add_argument( + '--name', '-n', + required=True, + help='daemon name (type.id)') + + parser_logs = subparsers.add_parser( + 'logs', help='print journald logs for a daemon container') + parser_logs.set_defaults(func=command_logs) + parser_logs.add_argument( + '--fsid', + help='cluster FSID') + parser_logs.add_argument( + '--name', '-n', + required=True, + help='daemon name (type.id)') + parser_logs.add_argument( + 'command', nargs='*', + help='additional journalctl args') + + parser_bootstrap = subparsers.add_parser( + 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)') + parser_bootstrap.set_defaults(func=command_bootstrap) + parser_bootstrap.add_argument( + '--config', '-c', + help='ceph conf file to incorporate') + parser_bootstrap.add_argument( + '--mon-id', + required=False, + help='mon id (default: local hostname)') + group = parser_bootstrap.add_mutually_exclusive_group() + group.add_argument( + '--mon-addrv', + help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])') + group.add_argument( + '--mon-ip', + help='mon IP') + parser_bootstrap.add_argument( + '--mgr-id', + required=False, + help='mgr id (default: randomly generated)') + parser_bootstrap.add_argument( + '--fsid', + help='cluster FSID') + parser_bootstrap.add_argument( + '--output-dir', + default='/etc/ceph', + help='directory to write config, keyring, and pub key files') + parser_bootstrap.add_argument( + '--output-keyring', + help='location to write keyring file with new cluster admin and mon keys') + parser_bootstrap.add_argument( + '--output-config', + help='location to write conf file to connect to new cluster') + parser_bootstrap.add_argument( + '--output-pub-ssh-key', + help="location to write the cluster's public SSH key") + parser_bootstrap.add_argument( + '--skip-admin-label', + action='store_true', + help='do not create admin label for ceph.conf and client.admin keyring distribution') + parser_bootstrap.add_argument( + '--skip-ssh', + action='store_true', + help='skip setup of ssh key on local host') + parser_bootstrap.add_argument( + '--initial-dashboard-user', + default='admin', + help='Initial user for the dashboard') + parser_bootstrap.add_argument( + '--initial-dashboard-password', + help='Initial password for the initial dashboard user') + parser_bootstrap.add_argument( + '--ssl-dashboard-port', + type=int, + default=8443, + help='Port number used to connect with dashboard using SSL') + parser_bootstrap.add_argument( + '--dashboard-key', + type=argparse.FileType('r'), + help='Dashboard key') + parser_bootstrap.add_argument( + '--dashboard-crt', + type=argparse.FileType('r'), + help='Dashboard certificate') + + parser_bootstrap.add_argument( + '--ssh-config', + type=argparse.FileType('r'), + help='SSH config') + parser_bootstrap.add_argument( + '--ssh-private-key', + type=argparse.FileType('r'), + help='SSH private key') + parser_bootstrap.add_argument( + '--ssh-public-key', + type=argparse.FileType('r'), + help='SSH public key') + parser_bootstrap.add_argument( + '--ssh-signed-cert', + type=argparse.FileType('r'), + help='Signed cert for setups using CA signed SSH keys') + parser_bootstrap.add_argument( + '--ssh-user', + default='root', + help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users') + parser_bootstrap.add_argument( + '--skip-mon-network', + action='store_true', + help='set mon public_network based on bootstrap mon ip') + parser_bootstrap.add_argument( + '--skip-dashboard', + action='store_true', + help='do not enable the Ceph Dashboard') + parser_bootstrap.add_argument( + '--dashboard-password-noupdate', + action='store_true', + help='stop forced dashboard password change') + parser_bootstrap.add_argument( + '--no-minimize-config', + action='store_true', + help='do not assimilate and minimize the config file') + parser_bootstrap.add_argument( + '--skip-ping-check', + action='store_true', + help='do not verify that mon IP is pingable') + parser_bootstrap.add_argument( + '--skip-pull', + action='store_true', + help='do not pull the default image before bootstrapping') + parser_bootstrap.add_argument( + '--skip-firewalld', + action='store_true', + help='Do not configure firewalld') + parser_bootstrap.add_argument( + '--allow-overwrite', + action='store_true', + help='allow overwrite of existing --output-* config/keyring/ssh files') + parser_bootstrap.add_argument( + '--cleanup-on-failure', + action='store_true', + default=False, + help='Delete cluster files in case of a failed installation') + parser_bootstrap.add_argument( + '--allow-fqdn-hostname', + action='store_true', + help='allow hostname that is fully-qualified (contains ".")') + parser_bootstrap.add_argument( + '--allow-mismatched-release', + action='store_true', + help="allow bootstrap of ceph that doesn't match this version of cephadm") + parser_bootstrap.add_argument( + '--skip-prepare-host', + action='store_true', + help='Do not prepare host') + parser_bootstrap.add_argument( + '--orphan-initial-daemons', + action='store_true', + help='Set mon and mgr service to `unmanaged`, Do not create the crash service') + parser_bootstrap.add_argument( + '--skip-monitoring-stack', + action='store_true', + help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)') + parser_bootstrap.add_argument( + '--with-centralized-logging', + action='store_true', + help='Automatically provision centralized logging (promtail, loki)') + parser_bootstrap.add_argument( + '--apply-spec', + help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)') + parser_bootstrap.add_argument( + '--shared_ceph_folder', + metavar='CEPH_SOURCE_FOLDER', + help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder') + + parser_bootstrap.add_argument( + '--registry-url', + help='url for custom registry') + parser_bootstrap.add_argument( + '--registry-username', + help='username for custom registry') + parser_bootstrap.add_argument( + '--registry-password', + help='password for custom registry') + parser_bootstrap.add_argument( + '--registry-json', + help='json file with custom registry login info (URL, Username, Password)') + parser_bootstrap.add_argument( + '--container-init', + action='store_true', + default=CONTAINER_INIT, + help=argparse.SUPPRESS) + parser_bootstrap.add_argument( + '--cluster-network', + help='subnet to use for cluster replication, recovery and heartbeats (in CIDR notation network/mask)') + parser_bootstrap.add_argument( + '--single-host-defaults', + action='store_true', + help='adjust configuration defaults to suit a single-host cluster') + parser_bootstrap.add_argument( + '--log-to-file', + action='store_true', + help='configure cluster to log to traditional log files in /var/log/ceph/$fsid') + + parser_deploy = subparsers.add_parser( + 'deploy', help='deploy a daemon') + parser_deploy.set_defaults(func=command_deploy) + parser_deploy.add_argument( + '--name', + required=True, + action=CustomValidation, + help='daemon name (type.id)') + parser_deploy.add_argument( + '--fsid', + required=True, + help='cluster FSID') + _add_deploy_parser_args(parser_deploy) + + parser_orch = subparsers.add_parser( + '_orch', + ) + subparsers_orch = parser_orch.add_subparsers( + title='Orchestrator Driven Commands', + description='Commands that are typically only run by cephadm mgr module', + ) + + parser_deploy_from = subparsers_orch.add_parser( + 'deploy', help='deploy a daemon') + parser_deploy_from.set_defaults(func=command_deploy_from) + # currently cephadm mgr module passes an fsid option on the CLI too + # TODO: remove this and always source fsid from the JSON? + parser_deploy_from.add_argument( + '--fsid', + help='cluster FSID') + parser_deploy_from.add_argument( + 'source', + default='-', + nargs='?', + help='Configuration input source file', + ) + + parser_check_host = subparsers.add_parser( + 'check-host', help='check host configuration') + parser_check_host.set_defaults(func=command_check_host) + parser_check_host.add_argument( + '--expect-hostname', + help='Check that hostname matches an expected value') + + parser_prepare_host = subparsers.add_parser( + 'prepare-host', help='prepare a host for cephadm use') + parser_prepare_host.set_defaults(func=command_prepare_host) + parser_prepare_host.add_argument( + '--expect-hostname', + help='Set hostname') + + parser_add_repo = subparsers.add_parser( + 'add-repo', help='configure package repository') + parser_add_repo.set_defaults(func=command_add_repo) + parser_add_repo.add_argument( + '--release', + help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE)) + parser_add_repo.add_argument( + '--version', + help='use specific upstream version (x.y.z)') + parser_add_repo.add_argument( + '--dev', + help='use specified bleeding edge build from git branch or tag') + parser_add_repo.add_argument( + '--dev-commit', + help='use specified bleeding edge build from git commit') + parser_add_repo.add_argument( + '--gpg-url', + help='specify alternative GPG key location') + parser_add_repo.add_argument( + '--repo-url', + default='https://download.ceph.com', + help='specify alternative repo location') + # TODO: proxy? + + parser_rm_repo = subparsers.add_parser( + 'rm-repo', help='remove package repository configuration') + parser_rm_repo.set_defaults(func=command_rm_repo) + + parser_install = subparsers.add_parser( + 'install', help='install ceph package(s)') + parser_install.set_defaults(func=command_install) + parser_install.add_argument( + 'packages', nargs='*', + default=['cephadm'], + help='packages') + + parser_registry_login = subparsers.add_parser( + 'registry-login', help='log host into authenticated registry') + parser_registry_login.set_defaults(func=command_registry_login) + parser_registry_login.add_argument( + '--registry-url', + help='url for custom registry') + parser_registry_login.add_argument( + '--registry-username', + help='username for custom registry') + parser_registry_login.add_argument( + '--registry-password', + help='password for custom registry') + parser_registry_login.add_argument( + '--registry-json', + help='json file with custom registry login info (URL, Username, Password)') + parser_registry_login.add_argument( + '--fsid', + help='cluster FSID') + + parser_gather_facts = subparsers.add_parser( + 'gather-facts', help='gather and return host related information (JSON format)') + parser_gather_facts.set_defaults(func=command_gather_facts) + + parser_maintenance = subparsers.add_parser( + 'host-maintenance', help='Manage the maintenance state of a host') + parser_maintenance.add_argument( + '--fsid', + help='cluster FSID') + parser_maintenance.add_argument( + 'maintenance_action', + type=str, + choices=['enter', 'exit'], + help='Maintenance action - enter maintenance, or exit maintenance') + parser_maintenance.set_defaults(func=command_maintenance) + + parser_agent = subparsers.add_parser( + 'agent', help='start cephadm agent') + parser_agent.set_defaults(func=command_agent) + parser_agent.add_argument( + '--fsid', + required=True, + help='cluster FSID') + parser_agent.add_argument( + '--daemon-id', + help='daemon id for agent') + + parser_disk_rescan = subparsers.add_parser( + 'disk-rescan', help='rescan all HBAs to detect new/removed devices') + parser_disk_rescan.set_defaults(func=command_rescan_disks) + + return parser + + +def _parse_args(av: List[str]) -> argparse.Namespace: + parser = _get_parser() + + args = parser.parse_args(av) + if 'command' in args and args.command and args.command[0] == '--': + args.command.pop(0) + + # workaround argparse to deprecate the subparser `--container-init` flag + # container_init and no_container_init must always be mutually exclusive + container_init_args = ('--container-init', '--no-container-init') + if set(container_init_args).issubset(av): + parser.error('argument %s: not allowed with argument %s' % (container_init_args)) + elif '--container-init' in av: + args.no_container_init = not args.container_init + else: + args.container_init = not args.no_container_init + assert args.container_init is not args.no_container_init + + return args + + +def cephadm_init_ctx(args: List[str]) -> CephadmContext: + ctx = CephadmContext() + ctx.set_args(_parse_args(args)) + return ctx + + +def cephadm_init_logging(ctx: CephadmContext, args: List[str]) -> None: + """Configure the logging for cephadm as well as updating the system + to have the expected log dir and logrotate configuration. + """ + logging.addLevelName(QUIET_LOG_LEVEL, 'QUIET') + global logger + if not os.path.exists(LOG_DIR): + os.makedirs(LOG_DIR) + operations = ['bootstrap', 'rm-cluster'] + if any(op in args for op in operations): + dictConfig(interactive_logging_config) + else: + dictConfig(logging_config) + + logger = logging.getLogger() + logger.setLevel(QUIET_LOG_LEVEL) + + if not os.path.exists(ctx.logrotate_dir + '/cephadm'): + with open(ctx.logrotate_dir + '/cephadm', 'w') as f: + f.write("""# created by cephadm +/var/log/ceph/cephadm.log { + rotate 7 + daily + compress + missingok + notifempty + su root root +} +""") + + if ctx.verbose: + for handler in logger.handlers: + if handler.name in ['console', 'log_file', 'console_stdout']: + handler.setLevel(QUIET_LOG_LEVEL) + logger.debug('%s\ncephadm %s' % ('-' * 80, args)) + + +def cephadm_require_root() -> None: + """Exit if the process is not running as root.""" + if os.geteuid() != 0: + sys.stderr.write('ERROR: cephadm should be run as root\n') + sys.exit(1) + + +def main() -> None: + av: List[str] = [] + av = sys.argv[1:] + + ctx = cephadm_init_ctx(av) + if not ctx.has_function(): + sys.stderr.write('No command specified; pass -h or --help for usage\n') + sys.exit(1) + + if ctx.has_function() and getattr(ctx.func, '_execute_early', False): + try: + sys.exit(ctx.func(ctx)) + except Error as e: + if ctx.verbose: + raise + logger.error('ERROR: %s' % e) + sys.exit(1) + + cephadm_require_root() + cephadm_init_logging(ctx, av) + try: + # podman or docker? + ctx.container_engine = find_container_engine(ctx) + if ctx.func not in \ + [ + command_check_host, + command_prepare_host, + command_add_repo, + command_rm_repo, + command_install + ]: + check_container_engine(ctx) + # command handler + r = ctx.func(ctx) + except (Error, ClusterAlreadyExists) as e: + if ctx.verbose: + raise + logger.error('ERROR: %s' % e) + sys.exit(1) + if not r: + r = 0 + sys.exit(r) + + +if __name__ == '__main__': + main() diff --git a/src/cephadm/containers/keepalived/Dockerfile b/src/cephadm/containers/keepalived/Dockerfile new file mode 100644 index 000000000..ac305f72b --- /dev/null +++ b/src/cephadm/containers/keepalived/Dockerfile @@ -0,0 +1,24 @@ +FROM registry.access.redhat.com/ubi8/ubi-minimal:latest + +RUN microdnf install --nodocs \ + bash \ + curl \ + iproute \ + keepalived-2.1.5 \ + && rm /etc/keepalived/keepalived.conf && microdnf clean all + +COPY /skel / + +RUN chmod +x init.sh + +CMD ["./init.sh"] + +# Build specific labels +LABEL maintainer="Guillaume Abrioux <gabrioux@redhat.com>" +LABEL com.redhat.component="keepalived-container" +LABEL version=2.1.5 +LABEL name="keepalived" +LABEL description="keepalived for Ceph" +LABEL summary="Provides keepalived on RHEL 8 for Ceph." +LABEL io.k8s.display-name="Keepalived on RHEL 8" +LABEL io.openshift.tags="Ceph keepalived" diff --git a/src/cephadm/containers/keepalived/LICENSE b/src/cephadm/containers/keepalived/LICENSE new file mode 100644 index 000000000..74b10b143 --- /dev/null +++ b/src/cephadm/containers/keepalived/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 University of Michigan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/src/cephadm/containers/keepalived/README.md b/src/cephadm/containers/keepalived/README.md new file mode 100644 index 000000000..bd7b605ac --- /dev/null +++ b/src/cephadm/containers/keepalived/README.md @@ -0,0 +1,233 @@ +# quay.io/ceph/keepalived + +A small [ubi8-minimal](https://catalog.redhat.com/software/containers/registry/registry.access.redhat.com/repository/ubi8/ubi-minimal) based Docker container that provides a method of IP high availability via [keepalived](http://www.keepalived.org/) (VRRP failover), and optional Kubernetes API Server monitoring. If allowed to auto configure (default behaviour) it will automatically generate a unicast based failover configuration with a minimal amount of user supplied information. + +For specific information on Keepalived, please see the man page on [keepalived.conf](http://linux.die.net/man/5/keepalived.conf) or the [Keepalived User Guide](http://www.keepalived.org/pdf/UserGuide.pdf). + + +## Index +- [quay.io/ceph/keepalived](#cephkeepalived) + - [Index](#index) + - [Prerequisites](#prerequisites) + - [Configuration](#configuration) + - [Execution Control](#execution-control) + - [Autoconfiguration Options](#autoconfiguration-options) + - [Kubernetes Options](#kubernetes-options) + - [Suggested Kubernetes Settings](#suggested-kubernetes-settings) + - [Example Keepalived Configs](#example-keepalived-configs) + - [Example Autogenerated Keepalived Master Config](#example-autogenerated-keepalived-master-config) + - [Example Autogenerated Keepalived Backup Config](#example-autogenerated-keepalived-backup-config) + - [Example Run Commands](#example-run-commands) + - [Example Master Run Command](#example-master-run-command) + - [Example Backup Run Command](#example-backup-run-command) + + +## Prerequisites + +Before attempting to deploy the keepalived container, the host must allow non local binding of ipv4 addresses. To do this, configure the sysctl tunable `net.ipv4.ip_nonlocal_bind=1`. + +In addition to enabling the nonlocal binds, the container must be run with both host networking (`--net=host`) and security setting CAP_NET_ADMIN (`--cap-add NET_ADMIN`) capability. These allow the container to manage the host's networking configuration, and this is essential to the function of keepalived. + + +## Configuration +### Execution Control + +| Variable | Default | +|:---------------------:|:------------------------------------------------:| +| `KEEPALIVED_AUTOCONF` | `true` | +| `KEEPALIVED_CONF` | `/etc/keepalived/keepalived.conf` | +| `KEEPALIVED_CMD` | `/usr/sbin/keepalived -n -l -f $KEEPALIVED_CONF` | +| `KEEPALIVED_DEBUG` | `false` | + +* `KEEPALIVED_AUTOCONF` - Enables or disables the auto-configuration of keepalived. + +* `KEEPALIVED_CONF` - The path to the keepalived configuration file. + +* `KEEPALIVED_CMD` - The command called to execute keepalived. + +* `KEEPALIVED_DEBUG` - Enables or disables debug level logging for keepalived (adds `-D` to `KEEPALIVED_CMD`. + + +### Autoconfiguration Options + +| Variable | Default | +|:-------------------------------------------:|:----------------------------------:| +| `KEEPALIVED_ADVERT_INT` | `1` | +| `KEEPALIVED_AUTH_PASS` | `pwd$KEEPALIVED_VIRTUAL_ROUTER_ID` | +| `KEEPALIVED_INTERFACE` | `eth0` | +| `KEEPALIVED_PRIORITY` | `200` | +| `KEEPALIVED_STATE` | `MASTER` | +| `KEEPALIVED_TRACK_INTERFACE_###` | | +| `KEEPALIVED_UNICAST_SRC_IP` | | +| `KEEPALIVED_UNICAST_PEER_###` | | +| `KEEPALIVED_VIRTUAL_IPADDRESS_###` | | +| `KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_###` | | +| `KEEPALIVED_VIRTUAL_ROUTER_ID` | `1` | +| `KEEPALIVED_KUBE_APISERVER_CHECK` | `false` | + +* `KEEPALIVED_ADVERT_INT` - The VRRP advertisement interval (in seconds). + +* `KEEPALIVED_AUTH_PASS` - A shared password used to authenticate each node in a VRRP group (**Note:** If password is longer than 8 characters, only the first 8 characters are used). + +* `KEEPALIVED_INTERFACE` - The host interface that keepalived will monitor and use for VRRP traffic. + +* `KEEPALIVED_PRIORITY` - Election value, the server configured with the highest priority will become the Master. + +* `KEEPALIVED_STATE` - Defines the server role as Master or Backup. (**Options:** `MASTER` or `BACKUP`). + +* `KEEPALIVED_TRACK_INTERFACE_###` - An interface that's state should be monitored (e.g. eth0). More than one can be supplied as long as the variable name ends in a number from 0-999. + +* `KEEPALIVED_UNICAST_SRC_IP` - The IP on the host that the keepalived daemon should bind to. **Note:** If not specified, it will be the first IP bound to the interface specified in `KEEPALIVED_INTERFACE`. + +* `KEEPALIVED_UNICAST_PEER_###` - An IP of a peer participating in the VRRP group. More tha one can be supplied as long as the variable name ends in a number from 0-999. + +* `KEEPALIVED_VIRTUAL_IPADDRESS_###` - An instance of an address that will be monitored and failed over from one host to another. These should be a quoted string in the form of: `<IPADDRESS>/<MASK> brd <BROADCAST_IP> dev <DEVICE> scope <SCOPE> label <LABEL>` At a minimum the ip address, mask and device should be specified e.g. `KEEPALIVED_VIRTUAL_IPADDRESS_1="10.10.0.2/24 dev eth0"`. More than one can be supplied as long as the variable name ends in a number from 0-999. **Note:** Keepalived has a hard limit of **20** addresses that can be monitored. More can be failed over with the monitored addresses via `KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_###`. + + +* `KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_###` - An instance of an address that will be failed over with the monitored addresses supplied via `KEEPALIVED_VIRTUAL_IPADDRESS_###`. These should be a quoted string in the form of: `<IPADDRESS>/<MASK> brd <BROADCAST_IP> dev <DEVICE> scope <SCOPE> label <LABEL>` At a minimum the ip address, mask and device should be specified e.g. `KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_1="172.16.1.20/24 dev eth1"`. More than one can be supplied as long as the variable name ends in a number from 0-999. + +* `KEEPALIVED_VIRTUAL_ROUTER_ID` - A unique number from 0 to 255 that should identify the VRRP group. Master and Backup should have the same value. Multiple instances of keepalived can be run on the same host, but each pair **MUST** have a unique virtual router id. + +* `KEEPALIVED_KUBE_APISERVER_CHECK` - If enabled it configures a simple check script for the Kubernetes API-Server. For more information on this feature, please see the [Kubernetes Options](#kubernetes-options) section. + + +### Kubernetes Options + + +| **Variable** | **Default** | +|:-----------------------------:|:----------------------------------------------:| +| `KUBE_APISERVER_ADDRESS` | parsed from `KEEPALIVED_VIRTUAL_IPADDRESS_###` | +| `KUBE_APISERVER_PORT` | `6443` | +| `KUBE_APISERVER_CHK_INTERVAL` | `3` | +| `KUBE_APISERVER_CHK_FALL` | `10` | +| `KUBE_APISERVER_CHK_RISE` | `2` | +| `KUBE_APISERVER_CHK_WEIGHT` | `-50` | + + + +* `KUBE_APISERVER_ADDRESS` - The Virtual IP being used for the Kube API Server. If none is supplied, it is assumed to be the lowest numbered entry in the `KEEPALIVED_VIRTUAL_IPADDRESS_###` variables. + +* `KUBE_APISERVER_PORT` - The port to use in conjunction with the `KUBE_APISERVER_ADDRESS`. + +* `KUBE_APISERVER_CHK_INTERVAL` - The interval in seconds between calling the script. + +* `KUBE_APISERVER_CHK_FALL` - The number of consecutive non-zero script exits before setting the state to `FAULT`. + +* `KUBE_APISERVER_CHK_RISE` - The number of consecutive zero script exits before exiting the `FAULT` state. + +* `KUBE_APISERVER_CHK_WEIGHT` - The weight to apply to the priority when the service enters the `FAULT` state. + + + +--- + +### Suggested Kubernetes Settings + +Assuming there are three nodes running the kube-apiserver, you cannot rely on setting just the`KEEPALIVED_STATE` parameter to manage failover across the nodes. + +To manage kube-apiserver failover, enable the healthcheck option with `KEEPALIVED_KUBE_APISERVER_CHECK`, and set the `KEEPALIVED_PRIORITY` manually for the three instances. + +| **Node** | **Priority** | +|:--------:|:------------:| +| node-01 | 200 | +| node-02 | 190 | +| node-03 | 180 | + +With the default weight of `-50`, if `node-01` has an issue, it's priority will drop to `150` and allow `node-02` to take over, the same is repeated if `node-02` has a failure dropping it's weight to `140` and `node-03` takes over. + +Recovery occurs in the same order with the system with the highest priority being promoted to master. + +### Example Keepalived Configs + +##### Example Autogenerated Keepalived Master Config +``` +vrrp_instance MAIN { + state MASTER + interface eth0 + virtual_router_id 2 + priority 200 + advert_int 1 + unicast_src_ip 10.10.0.21 + unicast_peer { + 10.10.0.22 + } + authentication { + auth_type PASS + auth_pass pwd1 + } + virtual_ipaddress { + 10.10.0.2/24 dev eth0 + } + virtual_ipaddress_excluded { + 172.16.1.20/24 dev eth1 + } + track_interface { + eth0 + eth1 + } +} +``` + +##### Example Autogenerated Keepalived Backup Config +``` +vrrp_instance MAIN { + state BACKUP + interface eth0 + virtual_router_id 2 + priority 100 + advert_int 1 + unicast_src_ip 10.10.0.22 + unicast_peer { + 10.10.0.21 + } + authentication { + auth_type PASS + auth_pass pwd1 + } + virtual_ipaddress { + 10.10.0.2/24 dev eth0 + } + virtual_ipaddress_excluded { + 172.16.1.20/24 dev eth1 + } + track_interface { + eth0 + eth1 + } +} + +``` + + +## Example Run Commands +##### Example Master Run Command +```bash +docker run -d --net=host --cap-add NET_ADMIN \ +-e KEEPALIVED_AUTOCONF=true \ +-e KEEPALIVED_STATE=MASTER \ +-e KEEPALIVED_INTERFACE=eth0 \ +-e KEEPALIVED_VIRTUAL_ROUTER_ID=2 \ +-e KEEPALIVED_UNICAST_SRC_IP=10.10.0.21 \ +-e KEEPALIVED_UNICAST_PEER_0=10.10.0.22 \ +-e KEEPALIVED_TRACK_INTERFACE_1=eth0 \ +-e KEEPALIVED_TRACK_INTERFACE_2=eth1 \ +-e KEEPALIVED_VIRTUAL_IPADDRESS_1="10.10.0.3/24 dev eth0" \ +-e KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_1="172.16.1.20/24 dev eth1" \ +quay.io/ceph/keepalived +``` + +##### Example Backup Run Command +```bash +docker run -d --net=host --cap-add NET_ADMIN \ +-e KEEPALIVED_AUTOCONF=true \ +-e KEEPALIVED_STATE=BACKUP \ +-e KEEPALIVED_INTERFACE=eth0 \ +-e KEEPALIVED_VIRTUAL_ROUTER_ID=2 \ +-e KEEPALIVED_UNICAST_SRC_IP=10.10.0.22 \ +-e KEEPALIVED_UNICAST_PEER_0=10.10.0.21 \ +-e KEEPALIVED_TRACK_INTERFACE_1=eth0 \ +-e KEEPALIVED_TRACK_INTERFACE_2=eth1 \ +-e KEEPALIVED_VIRTUAL_IPADDRESS_1="10.10.0.3/24 dev eth0" \ +-e KEEPALIVED_VIRTUAL_IPADDRESS_EXCLUDED_1="172.16.1.20/24 dev eth1" \ +quay.io/ceph/keepalived +``` diff --git a/src/cephadm/containers/keepalived/skel/init.sh b/src/cephadm/containers/keepalived/skel/init.sh new file mode 100755 index 000000000..9c86cfad2 --- /dev/null +++ b/src/cephadm/containers/keepalived/skel/init.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +set -e +set -o pipefail + + +KEEPALIVED_DEBUG=${KEEPALIVED_DEBUG:-false} +KEEPALIVED_KUBE_APISERVER_CHECK=${KEEPALIVED_KUBE_APISERVER_CHECK:-false} +KEEPALIVED_CONF=${KEEPALIVED_CONF:-/etc/keepalived/keepalived.conf} +KEEPALIVED_VAR_RUN=${KEEPALIVED_VAR_RUN:-/var/run/keepalived} + +if [[ ${KEEPALIVED_DEBUG,,} == 'true' ]]; then + kd_cmd="/usr/sbin/keepalived -n -l -D -f $KEEPALIVED_CONF" +else + kd_cmd="/usr/sbin/keepalived -n -l -f $KEEPALIVED_CONF" +fi + +KEEPALIVED_CMD=${KEEPALIVED_CMD:-"$kd_cmd"} + +rm -fr "$KEEPALIVED_VAR_RUN" + +exec $KEEPALIVED_CMD
\ No newline at end of file diff --git a/src/cephadm/samples/alertmanager.json b/src/cephadm/samples/alertmanager.json new file mode 100644 index 000000000..bacbad300 --- /dev/null +++ b/src/cephadm/samples/alertmanager.json @@ -0,0 +1,27 @@ +{ + "files": { + "alertmanager.yml": [ + "global:", + " resolve_timeout: 5m", + "", + "route:", + " group_by: ['alertname']", + " group_wait: 10s", + " group_interval: 10s", + " repeat_interval: 1h", + " receiver: 'web.hook'", + "receivers:", + "- name: 'web.hook'", + " webhook_configs:", + " - url: 'http://127.0.0.1:5001/'", + "inhibit_rules:", + " - source_match:", + " severity: 'critical'", + " target_match:", + " severity: 'warning'", + " equal: ['alertname', 'dev', 'instance']" + ] + }, + "peers": [] +} + diff --git a/src/cephadm/samples/custom_container.json b/src/cephadm/samples/custom_container.json new file mode 100644 index 000000000..194a44d2a --- /dev/null +++ b/src/cephadm/samples/custom_container.json @@ -0,0 +1,35 @@ +{ + "image": "docker.io/prom/alertmanager:v0.20.0", + "ports": [9093, 9094], + "args": [ + "-p", "9093:9093", + "-p", "9094:9094" + ], + "dirs": ["etc/alertmanager"], + "files": { + "etc/alertmanager/alertmanager.yml": [ + "global:", + " resolve_timeout: 5m", + "", + "route:", + " group_by: ['alertname']", + " group_wait: 10s", + " group_interval: 10s", + " repeat_interval: 1h", + " receiver: 'web.hook'", + "receivers:", + "- name: 'web.hook'", + " webhook_configs:", + " - url: 'http://127.0.0.1:5001/'", + "inhibit_rules:", + " - source_match:", + " severity: 'critical'", + " target_match:", + " severity: 'warning'", + " equal: ['alertname', 'dev', 'instance']" + ] + }, + "volume_mounts": { + "etc/alertmanager": "/etc/alertmanager" + } +} diff --git a/src/cephadm/samples/grafana.json b/src/cephadm/samples/grafana.json new file mode 100644 index 000000000..0e0689b7e --- /dev/null +++ b/src/cephadm/samples/grafana.json @@ -0,0 +1,90 @@ +{ + "files": { + "grafana.ini": [ + "[users]", + " default_theme = light", + "[auth.anonymous]", + " enabled = true", + " org_name = 'Main Org.'", + " org_role = 'Viewer'", + "[server]", + " domain = 'bootstrap.storage.lab'", + " protocol = https", + " cert_file = /etc/grafana/certs/cert_file", + " cert_key = /etc/grafana/certs/cert_key", + " http_port = 3000", + " http_addr = localhost", + "[security]", + " admin_user = admin", + " admin_password = admin", + " allow_embedding = true" + ], + "provisioning/datasources/ceph-dashboard.yml": [ + "deleteDatasources:", + " - name: 'Dashboard'", + " orgId: 1", + " ", + "datasources:", + " - name: 'Dashboard'", + " type: 'prometheus'", + " access: 'proxy'", + " orgId: 1", + " url: 'http://localhost:9095'", + " basicAuth: false", + " isDefault: true", + " editable: false" + ], + "certs/cert_file": [ + "-----BEGIN CERTIFICATE-----", + "MIIDLTCCAhWgAwIBAgIUEH0mq6u93LKsWlNXst5pxWcuqkQwDQYJKoZIhvcNAQEL", + "BQAwJjELMAkGA1UECgwCSVQxFzAVBgNVBAMMDmNlcGgtZGFzaGJvYXJkMB4XDTIw", + "MDEwNTIyNDYyMFoXDTMwMDEwMjIyNDYyMFowJjELMAkGA1UECgwCSVQxFzAVBgNV", + "BAMMDmNlcGgtZGFzaGJvYXJkMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC", + "AQEAqxh6eO0NTZJe+DoKZG/kozJCf+83eB3gWzwXoNinRmV/49f5WPR20DIxAe0R", + "saO6XynJXTrhvXT1bsARUq+LSmjWNFoYXopFuOJhGdWn4dmpuHwtpcFv2kjzNOKj", + "U2EG8j6bsRp1jFAzn7kdbSWT0UHySRXp9DPAjDiF3LjykMXiJMReccFXrB1pRi93", + "nJxED8d6oT5GazGB44svb+Zi6ABamZu5SDJC1Fr/O5rWFNQkH4hQEqDPj1817H9O", + "sm0mZiNy77ZQuAzOgZN153L3QOsyJismwNHfAMGMH9mzPKOjyhc13VlZyeEzml8p", + "ZpWQ2gi8P2r/FAr8bFL3MFnHKwIDAQABo1MwUTAdBgNVHQ4EFgQUZg3v7MX4J+hx", + "w3HENCrUkMK8tbwwHwYDVR0jBBgwFoAUZg3v7MX4J+hxw3HENCrUkMK8tbwwDwYD", + "VR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAaR/XPGKwUgVwH3KXAb6+", + "s9NTAt6lCmFdQz1ngoqFSizW7KGSXnOgd6xTiUCR0Tjjo2zKCwhIINaI6mwqMbrg", + "BOjb7diaqwFaitRs27AtdmaqMGndUqEBUn/k64Ld3VPGL4p0W2W+tXsyzZg1qQIn", + "JXb7c4+oWzXny7gHFheYQTwnHzDcNOf9vJiMGyYYvU1xTOGucu6dwtOVDDe1Z4Nq", + "AyIYWDScRr2FeAOXyx4aW2v5bjpTxvP+79/OOBbQ+p4y5F4PDrPeOSweGoo6huTR", + "+T+YI9Jfw2XCgV7NHWhfdt3fHHwUQzO6WszWU557pmCODLvXWsQ8P+GRiG7Nywm3", + "uA==", + "-----END CERTIFICATE-----" + ], + "certs/cert_key": [ + "-----BEGIN PRIVATE KEY-----", + "MIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCrGHp47Q1Nkl74", + "Ogpkb+SjMkJ/7zd4HeBbPBeg2KdGZX/j1/lY9HbQMjEB7RGxo7pfKcldOuG9dPVu", + "wBFSr4tKaNY0WhheikW44mEZ1afh2am4fC2lwW/aSPM04qNTYQbyPpuxGnWMUDOf", + "uR1tJZPRQfJJFen0M8CMOIXcuPKQxeIkxF5xwVesHWlGL3ecnEQPx3qhPkZrMYHj", + "iy9v5mLoAFqZm7lIMkLUWv87mtYU1CQfiFASoM+PXzXsf06ybSZmI3LvtlC4DM6B", + "k3XncvdA6zImKybA0d8AwYwf2bM8o6PKFzXdWVnJ4TOaXylmlZDaCLw/av8UCvxs", + "UvcwWccrAgMBAAECggEAeBv0BiYrm5QwdUORfhaKxAIJavRM1Vbr5EBYOgM90o54", + "bEN2ePsM2XUSsE5ziGfu8tVL1dX7GNwdW8UbpBc1ymO0VAYXa27YKUVKcy9o7oS1", + "v5v1E5Kq6esiSLL9gw/vJ2nKNFblxD2dL/hs7u1dSp5n7uSiW1tlRUp8toljRzts", + "1Cenp0J/a82HwWDE8j/H9NvitTOZ2cdwJ76V8GkBynlvr2ARjRfZGx0WXEJmoZYD", + "YUQVU303DB6Q2tkFco4LbPofkuhhMPhXsz3fZ/blHj/c78tqP9L5sQ29oqoPE1pS", + "DBOwKC/eoi5FY34RdLNL0dKq9MzbuYqEcCfZOJgxoQKBgQDf+5XF+aXQz2OmSaj6", + "1Yr+3KAKdfX/AYp22X1Wy4zWcZlgujgwQ1FG0zay8HVBM0/xn4UgOtcKCoXibePh", + "ag1t8aZINdRE1JcMzKmZoSvU9Xk30CNvygizuJVEKsJFPDbPzCpauDSplzcQb4pZ", + "wepucPuowkPMBx0iU3x0qSThWwKBgQDDjYs7d30xxSqWWXyCOZshy7UtHMNfqP15", + "kDfTXIZzuHvDf6ZNci10VY1eDZbpZfHgc6x1ElbKv2H4dYsgkENJZUi1YQDpVPKq", + "4N5teNykgAuagiR7dRFltSju3S7hIE6HInTv3hShaFPymlEE7zuBMuEUcuvYz5YN", + "RjxsvypKcQKBgCuuV+Y1KqZPW8K5SNAqRyIvCrMfkCr8NPG6tpvvtHa5zsyzZHPd", + "HQOv+1HoXSWrCSM5FfBUKU3XAYdIIRH76cSQRPp+LPiDcTXY0Baa/P5aJRrCZ7bM", + "cugBznJt2FdCR/o8eeIZXIPabq2w4w1gKQUC2cFuqWQn2wGvwGzL89pTAoGAAfpx", + "mSVpT9KVzrWTC+I3To04BP/QfixAfDVYSzwZZBxOrDijXw8zpISlDHmIuE2+t62T", + "5g9Mb3qmLBRMVwT+mUR8CtGzZ6jjV5U0yti5KrTc6TA93D3f8i51/oygR8jC4p0X", + "n8GYZdWfW8nx3eHpsTHpkwJinmvjMbkvLU51yBECgYAnUAMyhNOWjbYS5QWd8i1W", + "SFQansVDeeT98RebrzmGwlgrCImHItJz0Tz8gkNB3+S2B2balqT0WHaDxQ8vCtwX", + "xB4wd+gMomgdYtHGRnRwj1UyRXDk0c1TgGdRjOn3URaezBMibHTQSbFgPciJgAuU", + "mEl75h1ToBX9yvnH39o50g==", + "-----END PRIVATE KEY-----" + ] + } +} diff --git a/src/cephadm/samples/nfs.json b/src/cephadm/samples/nfs.json new file mode 100644 index 000000000..2e6625101 --- /dev/null +++ b/src/cephadm/samples/nfs.json @@ -0,0 +1,14 @@ +{ + "pool" : "nfs-ganesha", + "namespace" : "nfs-ns", + "files": { + "ganesha.conf": [ + "RADOS_URLS {", + " userid = admin;", + "}", + "", + "%url rados://nfs-ganesha/nfs-ns/conf-nfs.a", + "" + ] + } +} diff --git a/src/cephadm/samples/prometheus.json b/src/cephadm/samples/prometheus.json new file mode 100644 index 000000000..64727fb59 --- /dev/null +++ b/src/cephadm/samples/prometheus.json @@ -0,0 +1,17 @@ +{ + "files": { + "prometheus.yml": [ + "global:", + " scrape_interval: 5s", + " evaluation_interval: 10s", + "", + "rule_files: ", + " - '/etc/prometheus/alerting/*'", + "", + "scrape_configs:", + " - job_name: 'prometheus'", + " static_configs:", + " - targets: ['localhost:9095']" + ] + } +} diff --git a/src/cephadm/samples/rgw_ssl.json b/src/cephadm/samples/rgw_ssl.json new file mode 100644 index 000000000..3fe6fea1c --- /dev/null +++ b/src/cephadm/samples/rgw_ssl.json @@ -0,0 +1,101 @@ +{ + "rgw_realm": "default", + "rgw_zone": "default", + "service_type": "rgw", + "placement": { + "hosts": [{ + "hostname": "ironic-moliver", + "name": "", + "network": "" + }], + "count": 1 + }, + "ssl": true, + "rgw_frontend_port": 4343, + "rgw_frontend_ssl_certificate": [ + "-----BEGIN CERTIFICATE-----", + "MIIFmjCCA4KgAwIBAgIJAIZ2n35bmwXTMA0GCSqGSIb3DQEBCwUAMGIxCzAJBgNV", + "BAYTAkFVMQwwCgYDVQQIDANOU1cxHTAbBgNVBAoMFEV4YW1wbGUgUkdXIFNTTCBp", + "bmMuMSYwJAYDVQQDDB1yZ3ctZW5kcG9pbnQuZXhhbXBsZS1jZXBoLmNvbTAeFw0y", + "MDAyMDcwMDEzNTFaFw0zMDAyMDQwMDEzNTFaMGIxCzAJBgNVBAYTAkFVMQwwCgYD", + "VQQIDANOU1cxHTAbBgNVBAoMFEV4YW1wbGUgUkdXIFNTTCBpbmMuMSYwJAYDVQQD", + "DB1yZ3ctZW5kcG9pbnQuZXhhbXBsZS1jZXBoLmNvbTCCAiIwDQYJKoZIhvcNAQEB", + "BQADggIPADCCAgoCggIBAMptGJ523QkEbc37za8iuCTahj0Zr6hy+ToSX/Vfdzxj", + "iYHuD2PiZZyJB7t2eOqiA8sQ5N513EUtf2ZIBwtnnqFIzD5TqI3BxRajUTlOyXUX", + "onMwQwXu2ifDUy3LCmuQfzanOTWvVLac1NmkWbJHpJCXYbUnPb1Nvd0QjTTEH1jt", + "5bDHhfxwCIYK6PY+MqC72a09wB2ZF+EKsSdqghOKmibfJHtoJdsqGeLrysBLrzUJ", + "e/5ZW3V4Z85T2lja5KZnWgRofrUy5TmJV10HO4Hht92xvWvEi/rmjg2AVYZFUQQx", + "xKXpUBbF5T46eSVmaT7IH88Yp5ytgBTaigym7ETCjohp/DfCaK1DUehh0ce7iUq2", + "yCLviZsX4WdPYxzkoLflNrqm4YZP6iKcZSUR/A+qPKCzCXgMXFNA1JxilDwEq35F", + "zGN++ehJqdNmOQ1eQScsLwZQa6mC97d+upWdCvyntf1+S6vNcXhtRQpjNM4W37oW", + "r5nicsGA3/0rpDEHZW85KlkdWO1uCS/6ftgt8UUMaf5ew3PigzusqymBWTlMOjtW", + "uAQXxgZZvkRp+xdspn/uTCAP+bNShGD6Q+TO3U6IjTqHk83sGKCvg2dyU/dqgPr9", + "2IIzgQBFGk0W0nM/E83E8hUSwX17COLL3drhPZb4VRMChQ8PAa6u9nIymkX2wSVv", + "AgMBAAGjUzBRMB0GA1UdDgQWBBSsZHuY7KK80RrZHp+Gx+k16skuRDAfBgNVHSME", + "GDAWgBSsZHuY7KK80RrZHp+Gx+k16skuRDAPBgNVHRMBAf8EBTADAQH/MA0GCSqG", + "SIb3DQEBCwUAA4ICAQAE+BLtnu0p8FtK7vrBCRcCdvycWaSFGJUt7r5Nm8TD7sKw", + "bWeDLgXrRouyA7n6yt/JqQbXYcxt4MLAM0P6NQd5BlNrrnDk4rBnJiJgejppNE+S", + "BazR7Dv0uYcs8kPT4DPpwzv4aJ2aXCBaxYrq8Rx2xOqANCPVOrtPUk9yGpaQ5adU", + "GfxkVbpgIEz1c71PeQuK1KUU/Wpk7cpm+FQCizl9ftP2lHWsGhSLCuyWoMTjt68P", + "gYEWoV54eo/bzwj2ei6TcfNo+uHyzEiiG2qEvMh/cnYUFzs8O1t0mN19WPB1pSh1", + "faci5lGdtkRbLgP0g5RvpagE7Lw3mCc5Om8jmHs4mPfuVkssBVV23CrFpqLLrDX3", + "Acwb/zRGvA7T4WESBTJMYFOLgm0W0Y+AN8RcYNU9QbDhe++Te0uz/3Sy3GN2Xg5z", + "MxfD1+34x6KvMfCh8NjII2mFQ9ukcfrhcfO3oWDLlwsqlVbhkZxNiUOEIx9nzHcF", + "kWpZ2ypBDH45h2o3LyqvGjsu/BFkeG6JpEDCWbClKWcjKxOrLVDufhSDduffDjja", + "zOsgQJg0Yf//Ubb5p0c54GjHM/XDXEcV3m3sEtbmMYz6xGwuag4bx8P2E/QY8sFp", + "JxgIdS8vdl6YhDCjKJ2XzI30JwCdftgDIAiWSE0ivoDc+8+gG1nb11GT52HFzA==", + "-----END CERTIFICATE-----", + "-----BEGIN PRIVATE KEY-----", + "MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQDKbRiedt0JBG3N", + "+82vIrgk2oY9Ga+ocvk6El/1X3c8Y4mB7g9j4mWciQe7dnjqogPLEOTeddxFLX9m", + "SAcLZ56hSMw+U6iNwcUWo1E5Tsl1F6JzMEMF7tonw1MtywprkH82pzk1r1S2nNTZ", + "pFmyR6SQl2G1Jz29Tb3dEI00xB9Y7eWwx4X8cAiGCuj2PjKgu9mtPcAdmRfhCrEn", + "aoITipom3yR7aCXbKhni68rAS681CXv+WVt1eGfOU9pY2uSmZ1oEaH61MuU5iVdd", + "BzuB4bfdsb1rxIv65o4NgFWGRVEEMcSl6VAWxeU+OnklZmk+yB/PGKecrYAU2ooM", + "puxEwo6Iafw3wmitQ1HoYdHHu4lKtsgi74mbF+FnT2Mc5KC35Ta6puGGT+oinGUl", + "EfwPqjygswl4DFxTQNScYpQ8BKt+RcxjfvnoSanTZjkNXkEnLC8GUGupgve3frqV", + "nQr8p7X9fkurzXF4bUUKYzTOFt+6Fq+Z4nLBgN/9K6QxB2VvOSpZHVjtbgkv+n7Y", + "LfFFDGn+XsNz4oM7rKspgVk5TDo7VrgEF8YGWb5EafsXbKZ/7kwgD/mzUoRg+kPk", + "zt1OiI06h5PN7Bigr4NnclP3aoD6/diCM4EARRpNFtJzPxPNxPIVEsF9ewjiy93a", + "4T2W+FUTAoUPDwGurvZyMppF9sElbwIDAQABAoICAQC4sATwP563pXTRpNYq3lCI", + "P2COyqq70/qUA0PNygYt8Nr60srz5RG0WknVvefgm2U+lvFaDsqjyzkbhsf2ndnb", + "aWH/07BLdeluGB/5W2rvDFtJIVVlSmF8OffgJgohzbpjkPrfglKWMkz5LbwwrrD0", + "w0mAUIdB+nYqBfnvlKjNKHCSc9hJU6ZTNg0K7gCfKgUWzOpFlvJ0fp7XSZPYZHL0", + "2E6e0Y0Ig0cPBPb9r4/xoe+hRsHtUafUVik3PK+1K0K0FurUQ9VkQ2yUEg83F0v8", + "Vzht5OuaRVSB+P8O/JtIamfywAY0YOYhepQhjWikwU5UUzhJ+PqNDD87/+g9bA1B", + "xC25eoDxThiQlgDmRoH18ZsWDVf9TuJnm4cpxnZYX6ip+BLm/aidT39auZo0Fl+r", + "cJxRn0Qlm0Vm4Tc/6ZG6PQWB+Q6CjVFdoxeOvEQcTSuKA6VZBStLmqX++5In1Lmj", + "hVr3/aueHiZvXS5bNIdd2IfzatR+nP+uxzM/ryJRvGO2B2XTS00Cvv/lH84BDJYV", + "yt1PJIBoM9Dh7aUAHmKNVfRt83xzvcSPZx9VmSzA6wwqCQcO1GJk6keAuxOuligu", + "YdSFcfChOg90WvBcl+NzMblLkwrFSBQR7kgG0+dedv+Wkm4xO4T7B4W2G5+VIJKG", + "mrEAq6XQMFnfEJzNVg7JUQKCAQEA91eMvphoVVz+cxa4Ew7OokNXk5kSlvmQ8+Ij", + "ngFBvniXPZecxVzFEJglSthH5KI2ZqxwF3GJhKjxUihwf6K13Hx54EM7S/qV57ie", + "kVeKdAs+SGv+hRk1gQOoPBInbtKGKTni1V8T7iNginLueC/YikFugzv6IxiliBSG", + "3R7zjRepOW69aEoCPecx9amU4CkAwgeLJgBloBoqWD8sKM+bl7p5juQCU2sQ9D4/", + "kLnpG9+zPRUNjI4sog3L1wql3zthI6/4gf0TNuDhJTZ68vpMSi02pOUkVa0MmVOA", + "ex16luIp0BhxG/sUAeoevFL4KqR0CBbyAstbt2E/oPYOWMJ4MwKCAQEA0YMNXY7I", + "RNFOmiZ2Wn8kENCyJguqbOMd/li2+ercgp3MaSgTjC5KDFvZqTwXXlrURPu9hcyv", + "sJBSjp45g1T1LsUUq8UJgOIQgxykurIstGg33TAR+TN9VUu/xonLQF23GD8M6Vzd", + "EcZEVlBY33hgNXw4mRcBPnaoG5FZCBfHOgdBCExoYKW/RNKcmu0q+h9uhDBCbopv", + "04ROzw+HW1qc5qvNPR47buZ9+5QdonVK8s2bguMJ0phXwdSxL21wsjIsXyAO9m7w", + "qLHOq/hVokM0Fki09Exg4ppB8cLHC2ITpsVSgn4Dcz5zRtyvhozSKX4R9kMC64a0", + "AgMPVMllmGlR1QKCAQBIIGCrh7gNBIQyvXJKJGw/RxH3uZCBNB9/7vbh8Y3hZDr+", + "PAL8TpQsxaFCVRWJ53+jqy84tuQaKkXM5nv/zEvqEuZbbl+RRW6HVv/udC+srUap", + "Scy7tWEz0QQzGDwlhgCXbwjlnccrD2fsl51QsOsdTf1TCZ9ksqA6sXmua4MsJrUz", + "SUa0bbh/oraf46bFQ0+0RQzftQftixPEDg/rirbdpQQjlfvTpYoZHzncE0qV1ULo", + "UgZUcXU0gH9rovBBy4gFJyB5j3oV67fb6SorRrAOhWbE6QkSbtcYsw/pVuxTqXn1", + "89qwBSSNdl8mHa++h5xKa56BEBobvKEYaAhA+9yfAoIBAQDPFEE5n87Cdj7CjhGd", + "EN2M4Tmz8QPZ7AHRS85O5pxdXaqUpU/s1jPKU16nrwVJ9WypYkjI3q8oTP3MiQc/", + "j9FnENSFkpL6GHdJoB4Rido11myg6spZDVNr4xsCGWATlo1KIceZQHghAV66EWBG", + "QKyXMNigN+S64Hz4AomFPjtkV5cnpJ3mKO0MET9IwfIglsCdVzXSHHK7FaLvdeHL", + "oZxDQrvxFNiZnKgY6SUBVf1mT2LN06n5xSm4I4md3wXsmzrQKtefK7gihNxJjYLW", + "hqYNAIAalwOL9fwIAQTLc30I8S/EWtj+J1O5TpcO3lE7QahvR3yzXsi81Flq7ETG", + "iBKhAoIBAGHGpnjrLlCarNk9axh4Dw1OjgEvwPlEqsiWXt2tylLeab0OGC47MmJx", + "RmKwgVukMuxkQb8v4ANSRtih7R+E+qXfexjEFYtzh/uaRP1Z7ZrO/oqq0oLbPpsx", + "yTSRDL1i5/fgdIlKVH3N4IF7E8Pc3REgYIwLQxYjTdgVHEAM65XegQ2Lkpr4iae3", + "hm4IsD2PrsVITrlsLg65XnfcbsCs/OfQ5GuUp+xUBw5e0bQBmsWEiCaCjrq/EHJa", + "/oeJRqS7lyGYDC+wiSsE70x4dvu1um2F+V1Jw4LWjhu8Z8dNSXPSf8vLqXGkWAlk", + "805lq+iy7Mkhb+dlr4R9WhMWDyGwgYs=", + "-----END PRIVATE KEY-----" + ] +} diff --git a/src/cephadm/tests/__init__.py b/src/cephadm/tests/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/cephadm/tests/__init__.py diff --git a/src/cephadm/tests/fixtures.py b/src/cephadm/tests/fixtures.py new file mode 100644 index 000000000..76ac0b44c --- /dev/null +++ b/src/cephadm/tests/fixtures.py @@ -0,0 +1,162 @@ +import mock +import os +import pytest +import time + +from contextlib import contextmanager +from pyfakefs import fake_filesystem + +from typing import Dict, List, Optional + + +def import_cephadm(): + """Import cephadm as a module.""" + import cephadm as _cephadm + + return _cephadm + + +def mock_docker(): + _cephadm = import_cephadm() + docker = mock.Mock(_cephadm.Docker) + docker.path = '/usr/bin/docker' + return docker + + +def mock_podman(): + _cephadm = import_cephadm() + podman = mock.Mock(_cephadm.Podman) + podman.path = '/usr/bin/podman' + podman.version = (2, 1, 0) + return podman + + +def _daemon_path(): + return os.getcwd() + + +def mock_bad_firewalld(): + def raise_bad_firewalld(): + raise Exception('Called bad firewalld') + + _cephadm = import_cephadm() + f = mock.Mock(_cephadm.Firewalld) + f.enable_service_for = lambda _: raise_bad_firewalld() + f.apply_rules = lambda: raise_bad_firewalld() + f.open_ports = lambda _: raise_bad_firewalld() + + +def _mock_scrape_host(obj, interval): + try: + raise ValueError("wah") + except Exception as e: + obj._handle_thread_exception(e, 'host') + + +def _mock_run(obj): + t = obj._create_thread(obj._scrape_host_facts, 'host', 5) + time.sleep(1) + if not t.is_alive(): + obj.cephadm_cache.update_health('host', "inactive", "host thread stopped") + + +@pytest.fixture() +def cephadm_fs( + fs: fake_filesystem.FakeFilesystem, +): + """ + use pyfakefs to stub filesystem calls + """ + uid = os.getuid() + gid = os.getgid() + + def fchown(fd, _uid, _gid): + """pyfakefs doesn't provide a working fchown or fchmod. + In order to get permissions working generally across renames + we need to provide our own implemenation. + """ + file_obj = fs.get_open_file(fd).get_object() + file_obj.st_uid = _uid + file_obj.st_gid = _gid + + _cephadm = import_cephadm() + with mock.patch('os.fchown', side_effect=fchown), \ + mock.patch('os.fchmod'), \ + mock.patch('platform.processor', return_value='x86_64'), \ + mock.patch('cephadm.extract_uid_gid', return_value=(uid, gid)): + + try: + if not fake_filesystem.is_root(): + fake_filesystem.set_uid(0) + except AttributeError: + pass + + fs.create_dir(_cephadm.DATA_DIR) + fs.create_dir(_cephadm.LOG_DIR) + fs.create_dir(_cephadm.LOCK_DIR) + fs.create_dir(_cephadm.LOGROTATE_DIR) + fs.create_dir(_cephadm.UNIT_DIR) + fs.create_dir('/sys/block') + + yield fs + + +@pytest.fixture() +def host_sysfs(fs: fake_filesystem.FakeFilesystem): + """Create a fake filesystem to represent sysfs""" + enc_path = '/sys/class/scsi_generic/sg2/device/enclosure/0:0:1:0' + dev_path = '/sys/class/scsi_generic/sg2/device' + slot_count = 12 + fs.create_dir(dev_path) + fs.create_file(os.path.join(dev_path, 'vendor'), contents="EnclosuresInc") + fs.create_file(os.path.join(dev_path, 'model'), contents="D12") + fs.create_file(os.path.join(enc_path, 'id'), contents='1') + fs.create_file(os.path.join(enc_path, 'components'), contents=str(slot_count)) + for slot_num in range(slot_count): + slot_dir = os.path.join(enc_path, str(slot_num)) + fs.create_file(os.path.join(slot_dir, 'locate'), contents='0') + fs.create_file(os.path.join(slot_dir, 'fault'), contents='0') + fs.create_file(os.path.join(slot_dir, 'slot'), contents=str(slot_num)) + if slot_num < 6: + fs.create_file(os.path.join(slot_dir, 'status'), contents='Ok') + slot_dev = os.path.join(slot_dir, 'device') + fs.create_dir(slot_dev) + fs.create_file(os.path.join(slot_dev, 'vpd_pg80'), contents=f'fake{slot_num:0>3}') + else: + fs.create_file(os.path.join(slot_dir, 'status'), contents='not installed') + + yield fs + + +@contextmanager +def with_cephadm_ctx( + cmd: List[str], + list_networks: Optional[Dict[str, Dict[str, List[str]]]] = None, + hostname: Optional[str] = None, +): + """ + :param cmd: cephadm command argv + :param list_networks: mock 'list-networks' return + :param hostname: mock 'socket.gethostname' return + """ + if not hostname: + hostname = 'host1' + + _cephadm = import_cephadm() + with mock.patch('cephadm.attempt_bind'), \ + mock.patch('cephadm.call', return_value=('', '', 0)), \ + mock.patch('cephadm.call_timeout', return_value=0), \ + mock.patch('cephadm.find_executable', return_value='foo'), \ + mock.patch('cephadm.get_container_info', return_value=None), \ + mock.patch('cephadm.is_available', return_value=True), \ + mock.patch('cephadm.json_loads_retry', return_value={'epoch' : 1}), \ + mock.patch('cephadm.logger'), \ + mock.patch('socket.gethostname', return_value=hostname): + ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(cmd) + ctx.container_engine = mock_podman() + if list_networks is not None: + with mock.patch('cephadm.list_networks', return_value=list_networks): + yield ctx + else: + yield ctx + diff --git a/src/cephadm/tests/test_agent.py b/src/cephadm/tests/test_agent.py new file mode 100644 index 000000000..f9cf201e2 --- /dev/null +++ b/src/cephadm/tests/test_agent.py @@ -0,0 +1,800 @@ +from unittest import mock +import copy, datetime, json, os, socket, threading + +import pytest + +from tests.fixtures import with_cephadm_ctx, cephadm_fs, import_cephadm + +from typing import Optional + +_cephadm = import_cephadm() + + +FSID = "beefbeef-beef-beef-1234-beefbeefbeef" +AGENT_ID = 'host1' +AGENT_DIR = f'/var/lib/ceph/{FSID}/agent.{AGENT_ID}' + + +def test_agent_validate(): + required_files = _cephadm.CephadmAgent.required_files + with with_cephadm_ctx([]) as ctx: + agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID) + for i in range(len(required_files)): + incomplete_files = {s: 'text' for s in [f for j, f in enumerate(required_files) if j != i]} + with pytest.raises(_cephadm.Error, match=f'required file missing from config: {required_files[i]}'): + agent.validate(incomplete_files) + all_files = {s: 'text' for s in required_files} + agent.validate(all_files) + + +def _check_file(path, content): + assert os.path.exists(path) + with open(path) as f: + fcontent = f.read() + assert fcontent == content + + +@mock.patch('cephadm.call_throws') +def test_agent_deploy_daemon_unit(_call_throws, cephadm_fs): + _call_throws.return_value = ('', '', 0) + agent_id = AGENT_ID + + with with_cephadm_ctx([]) as ctx: + ctx.meta_json = json.dumps({'meta': 'data'}) + agent = _cephadm.CephadmAgent(ctx, FSID, agent_id) + cephadm_fs.create_dir(AGENT_DIR) + + with pytest.raises(_cephadm.Error, match='Agent needs a config'): + agent.deploy_daemon_unit() + + config = {s: f'text for {s}' for s in _cephadm.CephadmAgent.required_files} + config['not-required-file.txt'] = 'don\'t write me' + + agent.deploy_daemon_unit(config) + + # check required config file were all created + for fname in _cephadm.CephadmAgent.required_files: + _check_file(f'{AGENT_DIR}/{fname}', f'text for {fname}') + + # assert non-required file was not written + assert not os.path.exists(f'{AGENT_DIR}/not-required-file.txt') + + # check unit.run file was created correctly + _check_file(f'{AGENT_DIR}/unit.run', agent.unit_run()) + + # check unit.meta file created correctly + _check_file(f'{AGENT_DIR}/unit.meta', json.dumps({'meta': 'data'}, indent=4) + '\n') + + # check unit file was created correctly + _check_file(f'{ctx.unit_dir}/{agent.unit_name()}', agent.unit_file()) + + expected_call_throws_calls = [ + mock.call(ctx, ['systemctl', 'daemon-reload']), + mock.call(ctx, ['systemctl', 'enable', '--now', agent.unit_name()]), + ] + _call_throws.assert_has_calls(expected_call_throws_calls) + + expected_call_calls = [ + mock.call(ctx, ['systemctl', 'stop', agent.unit_name()], verbosity=_cephadm.CallVerbosity.DEBUG), + mock.call(ctx, ['systemctl', 'reset-failed', agent.unit_name()], verbosity=_cephadm.CallVerbosity.DEBUG), + ] + _cephadm.call.assert_has_calls(expected_call_calls) + + +@mock.patch('threading.Thread.is_alive') +def test_agent_shutdown(_is_alive): + with with_cephadm_ctx([]) as ctx: + agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID) + _is_alive.return_value = True + assert agent.stop == False + assert agent.mgr_listener.stop == False + assert agent.ls_gatherer.stop == False + assert agent.volume_gatherer.stop == False + agent.shutdown() + assert agent.stop == True + assert agent.mgr_listener.stop == True + assert agent.ls_gatherer.stop == True + assert agent.volume_gatherer.stop == True + + +def test_agent_wakeup(): + with with_cephadm_ctx([]) as ctx: + agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID) + assert agent.event.is_set() == False + agent.wakeup() + assert agent.event.is_set() == True + + +@mock.patch("cephadm.CephadmAgent.shutdown") +@mock.patch("cephadm.AgentGatherer.update_func") +def test_pull_conf_settings(_update_func, _shutdown, cephadm_fs): + target_ip = '192.168.0.0' + target_port = 9876 + refresh_period = 20 + listener_port = 5678 + host = AGENT_ID + device_enhanced_scan = 'True' + with with_cephadm_ctx([]) as ctx: + agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID) + full_config = { + 'target_ip': target_ip, + 'target_port': target_port, + 'refresh_period': refresh_period, + 'listener_port': listener_port, + 'host': host, + 'device_enhanced_scan': device_enhanced_scan + } + cephadm_fs.create_dir(AGENT_DIR) + with open(agent.config_path, 'w') as f: + f.write(json.dumps(full_config)) + + with pytest.raises(_cephadm.Error, match="Failed to get agent keyring:"): + agent.pull_conf_settings() + _shutdown.assert_called() + with open(agent.keyring_path, 'w') as f: + f.write('keyring') + + assert agent.device_enhanced_scan == False + agent.pull_conf_settings() + assert agent.host == host + assert agent.target_ip == target_ip + assert agent.target_port == target_port + assert agent.loop_interval == refresh_period + assert agent.starting_port == listener_port + assert agent.device_enhanced_scan == True + assert agent.keyring == 'keyring' + _update_func.assert_called() + + full_config.pop('target_ip') + with open(agent.config_path, 'w') as f: + f.write(json.dumps(full_config)) + with pytest.raises(_cephadm.Error, match="Failed to get agent target ip and port from config:"): + agent.pull_conf_settings() + + +@mock.patch("cephadm.command_ceph_volume") +def test_agent_ceph_volume(_ceph_volume): + + def _ceph_volume_outputter(_): + print("ceph-volume output") + + def _ceph_volume_empty(_): + pass + + with with_cephadm_ctx([]) as ctx: + agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID) + + _ceph_volume.side_effect = _ceph_volume_outputter + out, _ = agent._ceph_volume(False) + assert ctx.command == ['inventory', '--format=json'] + assert out == "ceph-volume output\n" + + out, _ = agent._ceph_volume(True) + assert ctx.command == ['inventory', '--format=json', '--with-lsm'] + assert out == "ceph-volume output\n" + + _ceph_volume.side_effect = _ceph_volume_empty + with pytest.raises(Exception, match='ceph-volume returned empty value'): + out, _ = agent._ceph_volume(False) + + +def test_agent_daemon_ls_subset(cephadm_fs): + # Basing part of this test on some actual sample output + + # Some sample "podman stats --format '{{.ID}},{{.MemUsage}}' --no-stream" output + # 3f2b31d19ecd,456.4MB / 41.96GB + # 5aca2499e0f8,7.082MB / 41.96GB + # fe0cef07d5f7,35.91MB / 41.96GB + + # Sample "podman ps --format '{{.ID}},{{.Names}}' --no-trunc" output with the same containers + # fe0cef07d5f71c5c604f7d1b4a4ac2e27873c96089d015014524e803361b4a30,ceph-4434fa7c-5602-11ed-b719-5254006ef86b-mon-host1 + # 3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e70660340cd1f,ceph-4434fa7c-5602-11ed-b719-5254006ef86b-mgr-host1-pntmho + # 5aca2499e0f8fb903788ff90eb03fe6ed58c7ed177caf278fed199936aff7b4a,ceph-4434fa7c-5602-11ed-b719-5254006ef86b-crash-host1 + + # Some of the components from that output + mgr_cid = '3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e70660340cd1f' + mon_cid = 'fe0cef07d5f71c5c604f7d1b4a4ac2e27873c96089d015014524e803361b4a30' + crash_cid = '5aca2499e0f8fb903788ff90eb03fe6ed58c7ed177caf278fed199936aff7b4a' + mgr_short_cid = mgr_cid[0:12] + mon_short_cid = mon_cid[0:12] + crash_short_cid = crash_cid[0:12] + + #Rebuilding the output but with our testing FSID and components (to allow alteration later for whatever reason) + mem_out = f"""{mgr_short_cid},456.4MB / 41.96GB +{crash_short_cid},7.082MB / 41.96GB +{mon_short_cid},35.91MB / 41.96GB""" + + ps_out = f"""{mon_cid},ceph-{FSID}-mon-host1 +{mgr_cid},ceph-{FSID}-mgr-host1-pntmho +{crash_cid},ceph-{FSID}-crash-host1""" + + def _fake_call(ctx, cmd, desc=None, verbosity=_cephadm.CallVerbosity.VERBOSE_ON_FAILURE, timeout=_cephadm.DEFAULT_TIMEOUT, **kwargs): + if 'stats' in cmd: + return (mem_out, '', 0) + elif 'ps' in cmd: + return (ps_out, '', 0) + return ('out', 'err', 0) + + cephadm_fs.create_dir(AGENT_DIR) + cephadm_fs.create_dir(f'/var/lib/ceph/mon/ceph-host1') # legacy daemon + cephadm_fs.create_dir(f'/var/lib/ceph/osd/nothing') # improper directory, should be skipped + cephadm_fs.create_dir(f'/var/lib/ceph/{FSID}/mgr.host1.pntmho') # cephadm daemon + cephadm_fs.create_dir(f'/var/lib/ceph/{FSID}/crash.host1') # cephadm daemon + + with with_cephadm_ctx([]) as ctx: + ctx.fsid = FSID + agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID) + _cephadm.call.side_effect = _fake_call + daemons = agent._daemon_ls_subset() + + assert 'agent.host1' in daemons + assert 'mgr.host1.pntmho' in daemons + assert 'crash.host1' in daemons + assert 'mon.host1' in daemons + + assert daemons['mon.host1']['style'] == 'legacy' + assert daemons['mgr.host1.pntmho']['style'] == 'cephadm:v1' + assert daemons['crash.host1']['style'] == 'cephadm:v1' + assert daemons['agent.host1']['style'] == 'cephadm:v1' + + assert daemons['mgr.host1.pntmho']['systemd_unit'] == f'ceph-{FSID}@mgr.host1.pntmho' + assert daemons['agent.host1']['systemd_unit'] == f'ceph-{FSID}@agent.host1' + assert daemons['crash.host1']['systemd_unit'] == f'ceph-{FSID}@crash.host1' + + assert daemons['mgr.host1.pntmho']['container_id'] == mgr_cid + assert daemons['crash.host1']['container_id'] == crash_cid + + assert daemons['mgr.host1.pntmho']['memory_usage'] == 478570086 # 456.4 MB + assert daemons['crash.host1']['memory_usage'] == 7426015 # 7.082 MB + + +@mock.patch("cephadm.list_daemons") +@mock.patch("cephadm.CephadmAgent._daemon_ls_subset") +def test_agent_get_ls(_ls_subset, _ls, cephadm_fs): + ls_out = [{ + "style": "cephadm:v1", + "name": "mgr.host1.pntmho", + "fsid": FSID, + "systemd_unit": f"ceph-{FSID}@mgr.host1.pntmho", + "enabled": True, + "state": "running", + "service_name": "mgr", + "memory_request": None, + "memory_limit": None, + "ports": [ + 9283, + 8765 + ], + "container_id": "3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e70660340cd1f", + "container_image_name": "quay.io/ceph/ceph:testing", + "container_image_id": "3300e39269f0c13ae45026cf233d8b3fff1303d52f2598a69c7fba0bb8405164", + "container_image_digests": [ + "quay.io/ceph/ceph@sha256:d4f3522528ee79904f9e530bdce438acac30a039e9a0b3cf31d8b614f9f96a30" + ], + "memory_usage": 507510784, + "cpu_percentage": "5.95%", + "version": "18.0.0-556-gb4d1a199", + "started": "2022-10-27T14:19:36.086664Z", + "created": "2022-10-27T14:19:36.282281Z", + "deployed": "2022-10-27T14:19:35.377275Z", + "configured": "2022-10-27T14:22:40.316912Z" + },{ + "style": "cephadm:v1", + "name": "agent.host1", + "fsid": FSID, + "systemd_unit": f"ceph-{FSID}@agent.host1", + "enabled": True, + "state": "running", + "service_name": "agent", + "ports": [], + "ip": None, + "deployed_by": [ + "quay.io/ceph/ceph@sha256:d4f3522528ee79904f9e530bdce438acac30a039e9a0b3cf31d8b614f9f96a30" + ], + "rank": None, + "rank_generation": None, + "extra_container_args": None, + "container_id": None, + "container_image_name": None, + "container_image_id": None, + "container_image_digests": None, + "version": None, + "started": None, + "created": "2022-10-27T19:46:49.751594Z", + "deployed": None, + "configured": "2022-10-27T19:46:49.751594Z" + }, { + "style": "legacy", + "name": "mon.host1", + "fsid": FSID, + "systemd_unit": "ceph-mon@host1", + "enabled": False, + "state": "stopped", + "host_version": None + }] + + ls_subset_out = { + 'mgr.host1.pntmho': { + "style": "cephadm:v1", + "fsid": FSID, + "systemd_unit": f"ceph-{FSID}@mgr.host1.pntmho", + "enabled": True, + "state": "running", + "container_id": "3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e70660340cd1f", + "memory_usage": 507510784, + }, + 'agent.host1': { + "style": "cephadm:v1", + "fsid": FSID, + "systemd_unit": f"ceph-{FSID}@agent.host1", + "enabled": True, + "state": "running", + "container_id": None + }, 'mon.host1': { + "style": "legacy", + "name": "mon.host1", + "fsid": FSID, + "systemd_unit": "ceph-mon@host1", + "enabled": False, + "state": "stopped", + "host_version": None + }} + + _ls.return_value = ls_out + _ls_subset.return_value = ls_subset_out + + with with_cephadm_ctx([]) as ctx: + ctx.fsid = FSID + agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID) + + # first pass, no cached daemon metadata + daemons, changed = agent._get_ls() + assert daemons == ls_out + assert changed + + # second pass, should recognize that daemons have not changed and just keep cached values + daemons, changed = agent._get_ls() + assert daemons == daemons + assert not changed + + # change a container id so it needs to get more info + ls_subset_out2 = copy.deepcopy(ls_subset_out) + ls_out2 = copy.deepcopy(ls_out) + ls_subset_out2['mgr.host1.pntmho']['container_id'] = '3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e7066034aaaaa' + ls_out2[0]['container_id'] = '3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e7066034aaaaa' + _ls.return_value = ls_out2 + _ls_subset.return_value = ls_subset_out2 + assert agent.cached_ls_values['mgr.host1.pntmho']['container_id'] == "3f2b31d19ecdd586640cc9c6ef7c0fe62157a3f7a71fcb60c91e70660340cd1f" + daemons, changed = agent._get_ls() + assert daemons == ls_out2 + assert changed + + # run again with the same data so it should use cached values + daemons, changed = agent._get_ls() + assert daemons == ls_out2 + assert not changed + + # change the state of a container so new daemon metadata is needed + ls_subset_out3 = copy.deepcopy(ls_subset_out2) + ls_out3 = copy.deepcopy(ls_out2) + ls_subset_out3['mgr.host1.pntmho']['enabled'] = False + ls_out3[0]['enabled'] = False + _ls.return_value = ls_out3 + _ls_subset.return_value = ls_subset_out3 + assert agent.cached_ls_values['mgr.host1.pntmho']['enabled'] == True + daemons, changed = agent._get_ls() + assert daemons == ls_out3 + assert changed + + # run again with the same data so it should use cached values + daemons, changed = agent._get_ls() + assert daemons == ls_out3 + assert not changed + + # remove a daemon so new metadats is needed + ls_subset_out4 = copy.deepcopy(ls_subset_out3) + ls_out4 = copy.deepcopy(ls_out3) + ls_subset_out4.pop('mon.host1') + ls_out4.pop() + _ls.return_value = ls_out4 + _ls_subset.return_value = ls_subset_out4 + assert 'mon.host1' in agent.cached_ls_values + daemons, changed = agent._get_ls() + assert daemons == ls_out4 + assert changed + + # run again with the same data so it should use cached values + daemons, changed = agent._get_ls() + assert daemons == ls_out4 + assert not changed + + +@mock.patch("threading.Event.clear") +@mock.patch("threading.Event.wait") +@mock.patch("urllib.request.Request.__init__") +@mock.patch("cephadm.urlopen") +@mock.patch("cephadm.list_networks") +@mock.patch("cephadm.HostFacts.dump") +@mock.patch("cephadm.HostFacts.__init__", lambda _, __: None) +@mock.patch("ssl.SSLContext.load_verify_locations") +@mock.patch("threading.Thread.is_alive") +@mock.patch("cephadm.MgrListener.start") +@mock.patch("cephadm.AgentGatherer.start") +@mock.patch("cephadm.port_in_use") +@mock.patch("cephadm.CephadmAgent.pull_conf_settings") +def test_agent_run(_pull_conf_settings, _port_in_use, _gatherer_start, + _listener_start, _is_alive, _load_verify_locations, + _HF_dump, _list_networks, _urlopen, _RQ_init, _wait, _clear): + target_ip = '192.168.0.0' + target_port = '9999' + refresh_period = 20 + listener_port = 7770 + open_listener_port = 7777 + host = AGENT_ID + device_enhanced_scan = False + + def _fake_port_in_use(ctx, endpoint): + if endpoint.port == open_listener_port: + return False + return True + + network_data: Dict[str, Dict[str, Set[str]]] = { + "10.2.1.0/24": { + "eth1": set(["10.2.1.122"]) + }, + "192.168.122.0/24": { + "eth0": set(["192.168.122.221"]) + }, + "fe80::/64": { + "eth0": set(["fe80::5054:ff:fe3f:d94e"]), + "eth1": set(["fe80::5054:ff:fe3f:aa4a"]), + } + } + + # the json serializable version of the networks data + # we expect the agent to actually send + network_data_no_sets: Dict[str, Dict[str, List[str]]] = { + "10.2.1.0/24": { + "eth1": ["10.2.1.122"] + }, + "192.168.122.0/24": { + "eth0": ["192.168.122.221"] + }, + "fe80::/64": { + "eth0": ["fe80::5054:ff:fe3f:d94e"], + "eth1": ["fe80::5054:ff:fe3f:aa4a"], + } + } + + class FakeHTTPResponse(): + def __init__(self): + pass + + def __enter__(self): + return self + + def __exit__(self, type, value, tb): + pass + + def read(self): + return json.dumps({'valid': 'output', 'result': '400'}) + + _port_in_use.side_effect = _fake_port_in_use + _is_alive.return_value = False + _HF_dump.return_value = 'Host Facts' + _list_networks.return_value = network_data + _urlopen.side_effect = lambda *args, **kwargs: FakeHTTPResponse() + _RQ_init.side_effect = lambda *args, **kwargs: None + with with_cephadm_ctx([]) as ctx: + ctx.fsid = FSID + agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID) + agent.keyring = 'agent keyring' + agent.ack = 7 + agent.volume_gatherer.ack = 7 + agent.volume_gatherer.data = 'ceph-volume inventory data' + agent.ls_gatherer.ack = 7 + agent.ls_gatherer.data = [{'valid_daemon': 'valid_metadata'}] + + def _set_conf(): + agent.target_ip = target_ip + agent.target_port = target_port + agent.loop_interval = refresh_period + agent.starting_port = listener_port + agent.host = host + agent.device_enhanced_scan = device_enhanced_scan + _pull_conf_settings.side_effect = _set_conf + + # technically the run function loops forever unless the agent + # is told to stop. To get around that we're going to have the + # event.wait() (which happens at the end of the loop) to throw + # a special exception type. If we catch this exception we can + # consider it as being a "success" run + class EventCleared(Exception): + pass + + _clear.side_effect = EventCleared('SUCCESS') + with pytest.raises(EventCleared, match='SUCCESS'): + agent.run() + + expected_data = { + 'host': host, + 'ls': [{'valid_daemon': 'valid_metadata'}], + 'networks': network_data_no_sets, + 'facts': 'Host Facts', + 'volume': 'ceph-volume inventory data', + 'ack': str(7), + 'keyring': 'agent keyring', + 'port': str(open_listener_port) + } + _RQ_init.assert_called_with( + f'https://{target_ip}:{target_port}/data/', + json.dumps(expected_data).encode('ascii'), + {'Content-Type': 'application/json'} + ) + _listener_start.assert_called() + _gatherer_start.assert_called() + _urlopen.assert_called() + + # agent should not go down if connections fail + _urlopen.side_effect = Exception() + with pytest.raises(EventCleared, match='SUCCESS'): + agent.run() + + # should fail if no ports are open for listener + _port_in_use.side_effect = lambda _, __: True + agent.listener_port = None + with pytest.raises(Exception, match='Failed to pick port for agent to listen on: All 1000 ports starting at 7770 taken.'): + agent.run() + + +@mock.patch("cephadm.CephadmAgent.pull_conf_settings") +@mock.patch("cephadm.CephadmAgent.wakeup") +def test_mgr_listener_handle_json_payload(_agent_wakeup, _pull_conf_settings, cephadm_fs): + with with_cephadm_ctx([]) as ctx: + ctx.fsid = FSID + agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID) + cephadm_fs.create_dir(AGENT_DIR) + + data_no_config = { + 'counter': 7 + } + agent.mgr_listener.handle_json_payload(data_no_config) + _agent_wakeup.assert_not_called() + _pull_conf_settings.assert_not_called() + assert not any(os.path.exists(os.path.join(AGENT_DIR, s)) for s in agent.required_files) + + data_with_config = { + 'counter': 7, + 'config': { + 'unrequired-file': 'unrequired-text' + } + } + data_with_config['config'].update({s: f'{s} text' for s in agent.required_files if s != agent.required_files[2]}) + agent.mgr_listener.handle_json_payload(data_with_config) + _agent_wakeup.assert_called() + _pull_conf_settings.assert_called() + assert all(os.path.exists(os.path.join(AGENT_DIR, s)) for s in agent.required_files if s != agent.required_files[2]) + assert not os.path.exists(os.path.join(AGENT_DIR, agent.required_files[2])) + assert not os.path.exists(os.path.join(AGENT_DIR, 'unrequired-file')) + + +@mock.patch("socket.socket") +@mock.patch("ssl.SSLContext.wrap_socket") +@mock.patch("cephadm.MgrListener.handle_json_payload") +@mock.patch("ssl.SSLContext.load_verify_locations") +@mock.patch("ssl.SSLContext.load_cert_chain") +def test_mgr_listener_run(_load_cert_chain, _load_verify_locations, _handle_json_payload, + _wrap_context, _socket, cephadm_fs): + + with with_cephadm_ctx([]) as ctx: + ctx.fsid = FSID + agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID) + cephadm_fs.create_dir(AGENT_DIR) + + payload = json.dumps({'counter': 3, + 'config': {s: f'{s} text' for s in agent.required_files if s != agent.required_files[1]}}) + + class FakeSocket: + + def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, fileno=None): + self.family = family + self.type = type + + def bind(*args, **kwargs): + return + + def settimeout(*args, **kwargs): + return + + def listen(*args, **kwargs): + return + + class FakeSecureSocket: + + def __init__(self, pload): + self.payload = pload + self._conn = FakeConn(self.payload) + self.accepted = False + + def accept(self): + # to make mgr listener run loop stop running, + # set it to stop after accepting a "connection" + # on our fake socket so only one iteration of the loop + # actually happens + agent.mgr_listener.stop = True + accepted = True + return self._conn, None + + def load_cert_chain(*args, **kwargs): + return + + def load_verify_locations(*args, **kwargs): + return + + class FakeConn: + + def __init__(self, payload: str = ''): + payload_len_str = str(len(payload.encode('utf-8'))) + while len(payload_len_str.encode('utf-8')) < 10: + payload_len_str = '0' + payload_len_str + self.payload = (payload_len_str + payload).encode('utf-8') + self.buffer_len = len(self.payload) + + def recv(self, len: Optional[int] = None): + if not len or len >= self.buffer_len: + ret = self.payload + self.payload = b'' + self.buffer_len = 0 + return ret + else: + ret = self.payload[:len] + self.payload = self.payload[len:] + self.buffer_len = self.buffer_len - len + return ret + + FSS_good_data = FakeSecureSocket(payload) + FSS_bad_json = FakeSecureSocket('bad json') + _socket = FakeSocket + agent.listener_port = 7777 + + # first run, should successfully receive properly structured json payload + _wrap_context.side_effect = [FSS_good_data] + agent.mgr_listener.stop = False + FakeConn.send = mock.Mock(return_value=None) + agent.mgr_listener.run() + + # verify payload was correctly extracted + assert _handle_json_payload.called_with(json.loads(payload)) + FakeConn.send.assert_called_once_with(b'ACK') + + # second run, with bad json data received + _wrap_context.side_effect = [FSS_bad_json] + agent.mgr_listener.stop = False + FakeConn.send = mock.Mock(return_value=None) + agent.mgr_listener.run() + FakeConn.send.assert_called_once_with(b'Failed to extract json payload from message: Expecting value: line 1 column 1 (char 0)') + + # third run, no proper length as beginning og payload + FSS_no_length = FakeSecureSocket(payload) + FSS_no_length.payload = FSS_no_length.payload[10:] + FSS_no_length._conn.payload = FSS_no_length._conn.payload[10:] + FSS_no_length._conn.buffer_len -= 10 + _wrap_context.side_effect = [FSS_no_length] + agent.mgr_listener.stop = False + FakeConn.send = mock.Mock(return_value=None) + agent.mgr_listener.run() + FakeConn.send.assert_called_once_with(b'Failed to extract length of payload from message: invalid literal for int() with base 10: \'{"counter"\'') + + # some exception handling for full coverage + FSS_exc_testing = FakeSecureSocket(payload) + FSS_exc_testing.accept = mock.MagicMock() + + def _accept(*args, **kwargs): + if not FSS_exc_testing.accepted: + FSS_exc_testing.accepted = True + raise socket.timeout() + else: + agent.mgr_listener.stop = True + raise Exception() + + FSS_exc_testing.accept.side_effect = _accept + _wrap_context.side_effect = [FSS_exc_testing] + agent.mgr_listener.stop = False + FakeConn.send = mock.Mock(return_value=None) + agent.mgr_listener.run() + FakeConn.send.assert_not_called() + FSS_exc_testing.accept.call_count == 3 + + +@mock.patch("cephadm.CephadmAgent._get_ls") +def test_gatherer_update_func(_get_ls, cephadm_fs): + with with_cephadm_ctx([]) as ctx: + ctx.fsid = FSID + agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID) + cephadm_fs.create_dir(AGENT_DIR) + + def _sample_func(): + return 7 + + agent.ls_gatherer.func() + _get_ls.assert_called() + + _get_ls = mock.MagicMock() + agent.ls_gatherer.update_func(_sample_func) + out = agent.ls_gatherer.func() + assert out == 7 + _get_ls.assert_not_called() + + +@mock.patch("cephadm.CephadmAgent.wakeup") +@mock.patch("time.monotonic") +@mock.patch("threading.Event.wait") +def test_gatherer_run(_wait, _time, _agent_wakeup, cephadm_fs): + with with_cephadm_ctx([]) as ctx: + ctx.fsid = FSID + agent = _cephadm.CephadmAgent(ctx, FSID, AGENT_ID) + cephadm_fs.create_dir(AGENT_DIR) + agent.loop_interval = 30 + agent.ack = 23 + + _sample_func = lambda *args, **kwargs: ('sample out', True) + agent.ls_gatherer.update_func(_sample_func) + agent.ls_gatherer.ack = 20 + agent.ls_gatherer.stop = False + + def _fake_clear(*args, **kwargs): + agent.ls_gatherer.stop = True + + _time.side_effect = [0, 20, 0, 20, 0, 20] # start at time 0, complete at time 20 + _wait.return_value = None + + with mock.patch("threading.Event.clear") as _clear: + _clear.side_effect = _fake_clear + agent.ls_gatherer.run() + + _wait.assert_called_with(10) # agent loop_interval - run time + assert agent.ls_gatherer.data == 'sample out' + assert agent.ls_gatherer.ack == 23 + _agent_wakeup.assert_called_once() + _clear.assert_called_once() + + _exc_func = lambda *args, **kwargs: Exception() + agent.ls_gatherer.update_func(_exc_func) + agent.ls_gatherer.ack = 20 + agent.ls_gatherer.stop = False + + with mock.patch("threading.Event.clear") as _clear: + _clear.side_effect = _fake_clear + agent.ls_gatherer.run() + assert agent.ls_gatherer.data is None + assert agent.ls_gatherer.ack == agent.ack + # should have run full loop despite exception + _clear.assert_called_once() + + # test general exception for full coverage + _agent_wakeup.side_effect = [Exception()] + agent.ls_gatherer.update_func(_sample_func) + agent.ls_gatherer.stop = False + # just to force only one iteration + _time.side_effect = _fake_clear + with mock.patch("threading.Event.clear") as _clear: + _clear.side_effect = Exception() + agent.ls_gatherer.run() + assert agent.ls_gatherer.data == 'sample out' + assert agent.ls_gatherer.ack == agent.ack + # should not have gotten to end of loop + _clear.assert_not_called() + + +@mock.patch("cephadm.CephadmAgent.run") +def test_command_agent(_agent_run, cephadm_fs): + with with_cephadm_ctx([]) as ctx: + ctx.fsid = FSID + ctx.daemon_id = AGENT_ID + + with pytest.raises(Exception, match=f"Agent daemon directory {AGENT_DIR} does not exist. Perhaps agent was never deployed?"): + _cephadm.command_agent(ctx) + + cephadm_fs.create_dir(AGENT_DIR) + _cephadm.command_agent(ctx) + _agent_run.assert_called() diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py new file mode 100644 index 000000000..d310215f6 --- /dev/null +++ b/src/cephadm/tests/test_cephadm.py @@ -0,0 +1,2708 @@ +# type: ignore + +import errno +import json +import mock +import os +import pytest +import socket +import unittest +from textwrap import dedent + +from .fixtures import ( + cephadm_fs, + mock_docker, + mock_podman, + with_cephadm_ctx, + mock_bad_firewalld, + import_cephadm, +) + +from pyfakefs import fake_filesystem +from pyfakefs import fake_filesystem_unittest + +_cephadm = import_cephadm() + + +def get_ceph_conf( + fsid='00000000-0000-0000-0000-0000deadbeef', + mon_host='[v2:192.168.1.1:3300/0,v1:192.168.1.1:6789/0]'): + return f''' +# minimal ceph.conf for {fsid} +[global] + fsid = {fsid} + mon_host = {mon_host} +''' + +class TestCephAdm(object): + + def test_docker_unit_file(self): + ctx = _cephadm.CephadmContext() + ctx.container_engine = mock_docker() + r = _cephadm.get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9') + assert 'Requires=docker.service' in r + ctx.container_engine = mock_podman() + r = _cephadm.get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9') + assert 'Requires=docker.service' not in r + + @mock.patch('cephadm.logger') + def test_attempt_bind(self, _logger): + ctx = None + address = None + port = 0 + + def os_error(errno): + _os_error = OSError() + _os_error.errno = errno + return _os_error + + for side_effect, expected_exception in ( + (os_error(errno.EADDRINUSE), _cephadm.PortOccupiedError), + (os_error(errno.EAFNOSUPPORT), OSError), + (os_error(errno.EADDRNOTAVAIL), OSError), + (None, None), + ): + _socket = mock.Mock() + _socket.bind.side_effect = side_effect + try: + _cephadm.attempt_bind(ctx, _socket, address, port) + except Exception as e: + assert isinstance(e, expected_exception) + else: + if expected_exception is not None: + assert False + + @mock.patch('cephadm.attempt_bind') + @mock.patch('cephadm.logger') + def test_port_in_use(self, _logger, _attempt_bind): + empty_ctx = None + + assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('0.0.0.0', 9100)) == False + + _attempt_bind.side_effect = _cephadm.PortOccupiedError('msg') + assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('0.0.0.0', 9100)) == True + + os_error = OSError() + os_error.errno = errno.EADDRNOTAVAIL + _attempt_bind.side_effect = os_error + assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('0.0.0.0', 9100)) == False + + os_error = OSError() + os_error.errno = errno.EAFNOSUPPORT + _attempt_bind.side_effect = os_error + assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('0.0.0.0', 9100)) == False + + @mock.patch('cephadm.socket.socket.bind') + @mock.patch('cephadm.logger') + def test_port_in_use_special_cases(self, _logger, _bind): + # port_in_use has special handling for + # EAFNOSUPPORT and EADDRNOTAVAIL errno OSErrors. + # If we get those specific errors when attempting + # to bind to the ip:port we should not say the + # port is in use + + def os_error(errno): + _os_error = OSError() + _os_error.errno = errno + return _os_error + + _bind.side_effect = os_error(errno.EADDRNOTAVAIL) + in_use = _cephadm.port_in_use(None, _cephadm.EndPoint('1.2.3.4', 10000)) + assert in_use == False + + _bind.side_effect = os_error(errno.EAFNOSUPPORT) + in_use = _cephadm.port_in_use(None, _cephadm.EndPoint('1.2.3.4', 10000)) + assert in_use == False + + # this time, have it raise the actual port taken error + # so it should report the port is in use + _bind.side_effect = os_error(errno.EADDRINUSE) + in_use = _cephadm.port_in_use(None, _cephadm.EndPoint('1.2.3.4', 10000)) + assert in_use == True + + @mock.patch('cephadm.attempt_bind') + @mock.patch('cephadm.logger') + def test_port_in_use_with_specific_ips(self, _logger, _attempt_bind): + empty_ctx = None + + def _fake_attempt_bind(ctx, s: socket.socket, addr: str, port: int) -> None: + occupied_error = _cephadm.PortOccupiedError('msg') + if addr.startswith('200'): + raise occupied_error + if addr.startswith('100'): + if port == 4567: + raise occupied_error + + _attempt_bind.side_effect = _fake_attempt_bind + + assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('200.0.0.0', 9100)) == True + assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('100.0.0.0', 9100)) == False + assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('100.0.0.0', 4567)) == True + assert _cephadm.port_in_use(empty_ctx, _cephadm.EndPoint('155.0.0.0', 4567)) == False + + @mock.patch('socket.socket') + @mock.patch('cephadm.logger') + def test_check_ip_port_success(self, _logger, _socket): + ctx = _cephadm.CephadmContext() + ctx.skip_ping_check = False # enables executing port check with `check_ip_port` + + for address, address_family in ( + ('0.0.0.0', socket.AF_INET), + ('::', socket.AF_INET6), + ): + try: + _cephadm.check_ip_port(ctx, _cephadm.EndPoint(address, 9100)) + except: + assert False + else: + assert _socket.call_args == mock.call(address_family, socket.SOCK_STREAM) + + @mock.patch('socket.socket') + @mock.patch('cephadm.logger') + def test_check_ip_port_failure(self, _logger, _socket): + ctx = _cephadm.CephadmContext() + ctx.skip_ping_check = False # enables executing port check with `check_ip_port` + + def os_error(errno): + _os_error = OSError() + _os_error.errno = errno + return _os_error + + for address, address_family in ( + ('0.0.0.0', socket.AF_INET), + ('::', socket.AF_INET6), + ): + for side_effect, expected_exception in ( + (os_error(errno.EADDRINUSE), _cephadm.PortOccupiedError), + (os_error(errno.EADDRNOTAVAIL), OSError), + (os_error(errno.EAFNOSUPPORT), OSError), + (None, None), + ): + mock_socket_obj = mock.Mock() + mock_socket_obj.bind.side_effect = side_effect + _socket.return_value = mock_socket_obj + try: + _cephadm.check_ip_port(ctx, _cephadm.EndPoint(address, 9100)) + except Exception as e: + assert isinstance(e, expected_exception) + else: + if side_effect is not None: + assert False + + + def test_is_not_fsid(self): + assert not _cephadm.is_fsid('no-uuid') + + def test_is_fsid(self): + assert _cephadm.is_fsid('e863154d-33c7-4350-bca5-921e0467e55b') + + def test__get_parser_image(self): + args = _cephadm._parse_args(['--image', 'foo', 'version']) + assert args.image == 'foo' + + def test_check_required_global_args(self): + ctx = _cephadm.CephadmContext() + mock_fn = mock.Mock() + mock_fn.return_value = 0 + require_image = _cephadm.require_image(mock_fn) + + with pytest.raises(_cephadm.Error, match='This command requires the global --image option to be set'): + require_image(ctx) + + ctx.image = 'sample-image' + require_image(ctx) + + @mock.patch('cephadm.logger') + def test_parse_mem_usage(self, _logger): + len, summary = _cephadm._parse_mem_usage(0, 'c6290e3f1489,-- / --') + assert summary == {} + + def test_CustomValidation(self): + assert _cephadm._parse_args(['deploy', '--name', 'mon.a', '--fsid', 'fsid']) + + with pytest.raises(SystemExit): + _cephadm._parse_args(['deploy', '--name', 'wrong', '--fsid', 'fsid']) + + @pytest.mark.parametrize("test_input, expected", [ + ("1.6.2", (1,6,2)), + ("1.6.2-stable2", (1,6,2)), + ]) + def test_parse_podman_version(self, test_input, expected): + assert _cephadm._parse_podman_version(test_input) == expected + + def test_parse_podman_version_invalid(self): + with pytest.raises(ValueError) as res: + _cephadm._parse_podman_version('inval.id') + assert 'inval' in str(res.value) + + @mock.patch('cephadm.logger') + def test_is_ipv6(self, _logger): + for good in ("[::1]", "::1", + "fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"): + assert _cephadm.is_ipv6(good) + for bad in ("127.0.0.1", + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffg", + "1:2:3:4:5:6:7:8:9", "fd00::1::1", "[fg::1]"): + assert not _cephadm.is_ipv6(bad) + + def test_unwrap_ipv6(self): + def unwrap_test(address, expected): + assert _cephadm.unwrap_ipv6(address) == expected + + tests = [ + ('::1', '::1'), ('[::1]', '::1'), + ('[fde4:8dba:82e1:0:5054:ff:fe6a:357]', 'fde4:8dba:82e1:0:5054:ff:fe6a:357'), + ('can actually be any string', 'can actually be any string'), + ('[but needs to be stripped] ', '[but needs to be stripped] ')] + for address, expected in tests: + unwrap_test(address, expected) + + def test_wrap_ipv6(self): + def wrap_test(address, expected): + assert _cephadm.wrap_ipv6(address) == expected + + tests = [ + ('::1', '[::1]'), ('[::1]', '[::1]'), + ('fde4:8dba:82e1:0:5054:ff:fe6a:357', + '[fde4:8dba:82e1:0:5054:ff:fe6a:357]'), + ('myhost.example.com', 'myhost.example.com'), + ('192.168.0.1', '192.168.0.1'), + ('', ''), ('fd00::1::1', 'fd00::1::1')] + for address, expected in tests: + wrap_test(address, expected) + + @mock.patch('cephadm.Firewalld', mock_bad_firewalld) + @mock.patch('cephadm.logger') + def test_skip_firewalld(self, _logger, cephadm_fs): + """ + test --skip-firewalld actually skips changing firewall + """ + + ctx = _cephadm.CephadmContext() + with pytest.raises(Exception): + _cephadm.update_firewalld(ctx, 'mon') + + ctx.skip_firewalld = True + _cephadm.update_firewalld(ctx, 'mon') + + ctx.skip_firewalld = False + with pytest.raises(Exception): + _cephadm.update_firewalld(ctx, 'mon') + + ctx = _cephadm.CephadmContext() + ctx.ssl_dashboard_port = 8888 + ctx.dashboard_key = None + ctx.dashboard_password_noupdate = True + ctx.initial_dashboard_password = 'password' + ctx.initial_dashboard_user = 'User' + with pytest.raises(Exception): + _cephadm.prepare_dashboard(ctx, 0, 0, lambda _, extra_mounts=None, ___=None : '5', lambda : None) + + ctx.skip_firewalld = True + _cephadm.prepare_dashboard(ctx, 0, 0, lambda _, extra_mounts=None, ___=None : '5', lambda : None) + + ctx.skip_firewalld = False + with pytest.raises(Exception): + _cephadm.prepare_dashboard(ctx, 0, 0, lambda _, extra_mounts=None, ___=None : '5', lambda : None) + + @mock.patch('cephadm.logger') + @mock.patch('cephadm.fetch_custom_config_files') + @mock.patch('cephadm.get_container') + def test_get_deployment_container(self, _get_container, _get_config, _logger): + """ + test get_deployment_container properly makes use of extra container args and custom conf files + """ + + ctx = _cephadm.CephadmContext() + ctx.config_json = '-' + ctx.extra_container_args = [ + '--pids-limit=12345', + '--something', + ] + ctx.data_dir = 'data' + _get_config.return_value = [ + { + 'mount_path': '/etc/testing.str', + 'content': 'this\nis\na\nstring', + } + ] + _get_container.return_value = _cephadm.CephContainer.for_daemon( + ctx, + fsid='9b9d7609-f4d5-4aba-94c8-effa764d96c9', + daemon_type='grafana', + daemon_id='host1', + entrypoint='', + args=[], + container_args=[], + volume_mounts={}, + bind_mounts=[], + envs=[], + privileged=False, + ptrace=False, + host_network=True, + ) + c = _cephadm.get_deployment_container(ctx, + '9b9d7609-f4d5-4aba-94c8-effa764d96c9', + 'grafana', + 'host1',) + + assert '--pids-limit=12345' in c.container_args + assert '--something' in c.container_args + assert os.path.join('data', '9b9d7609-f4d5-4aba-94c8-effa764d96c9', 'custom_config_files', 'grafana.host1', 'testing.str') in c.volume_mounts + assert c.volume_mounts[os.path.join('data', '9b9d7609-f4d5-4aba-94c8-effa764d96c9', 'custom_config_files', 'grafana.host1', 'testing.str')] == '/etc/testing.str' + + @mock.patch('cephadm.logger') + @mock.patch('cephadm.FileLock') + @mock.patch('cephadm.deploy_daemon') + @mock.patch('cephadm.fetch_configs') + @mock.patch('cephadm.make_var_run') + @mock.patch('cephadm.migrate_sysctl_dir') + @mock.patch('cephadm.check_unit', lambda *args, **kwargs: (None, 'running', None)) + @mock.patch('cephadm.get_unit_name', lambda *args, **kwargs: 'mon-unit-name') + @mock.patch('cephadm.get_deployment_container') + @mock.patch('cephadm.read_configuration_source', lambda c: {}) + @mock.patch('cephadm.apply_deploy_config_to_ctx', lambda d, c: None) + @mock.patch('cephadm.extract_uid_gid', lambda *args, **kwargs: ('ceph', 'ceph')) + def test_mon_crush_location(self, _get_deployment_container, _migrate_sysctl, _make_var_run, _fetch_configs, _deploy_daemon, _file_lock, _logger): + """ + test that crush location for mon is set if it is included in config_json + """ + + ctx = _cephadm.CephadmContext() + ctx.name = 'mon.test' + ctx.fsid = '9b9d7609-f4d5-4aba-94c8-effa764d96c9' + ctx.reconfig = False + ctx.container_engine = mock_docker() + ctx.allow_ptrace = True + ctx.config_json = '-' + ctx.osd_fsid = '0' + ctx.tcp_ports = '3300 6789' + _fetch_configs.return_value = { + 'crush_location': 'database=a' + } + + _get_deployment_container.return_value = _cephadm.CephContainer.for_daemon( + ctx, + fsid='9b9d7609-f4d5-4aba-94c8-effa764d96c9', + daemon_type='mon', + daemon_id='test', + entrypoint='', + args=[], + container_args=[], + volume_mounts={}, + bind_mounts=[], + envs=[], + privileged=False, + ptrace=False, + host_network=True, + ) + + def _crush_location_checker(ctx, fsid, daemon_type, daemon_id, container, uid, gid, **kwargs): + print(container.args) + raise Exception(' '.join(container.args)) + + _deploy_daemon.side_effect = _crush_location_checker + + with pytest.raises(Exception, match='--set-crush-location database=a'): + _cephadm.command_deploy_from(ctx) + + @mock.patch('cephadm.logger') + @mock.patch('cephadm.fetch_custom_config_files') + def test_write_custom_conf_files(self, _get_config, _logger, cephadm_fs): + """ + test _write_custom_conf_files writes the conf files correctly + """ + + ctx = _cephadm.CephadmContext() + ctx.config_json = '-' + ctx.data_dir = _cephadm.DATA_DIR + _get_config.return_value = [ + { + 'mount_path': '/etc/testing.str', + 'content': 'this\nis\na\nstring', + }, + { + 'mount_path': '/etc/testing.conf', + 'content': 'very_cool_conf_setting: very_cool_conf_value\nx: y', + }, + { + 'mount_path': '/etc/no-content.conf', + }, + ] + _cephadm._write_custom_conf_files(ctx, 'mon', 'host1', 'fsid', 0, 0) + with open(os.path.join(_cephadm.DATA_DIR, 'fsid', 'custom_config_files', 'mon.host1', 'testing.str'), 'r') as f: + assert 'this\nis\na\nstring' == f.read() + with open(os.path.join(_cephadm.DATA_DIR, 'fsid', 'custom_config_files', 'mon.host1', 'testing.conf'), 'r') as f: + assert 'very_cool_conf_setting: very_cool_conf_value\nx: y' == f.read() + with pytest.raises(FileNotFoundError): + open(os.path.join(_cephadm.DATA_DIR, 'fsid', 'custom_config_files', 'mon.host1', 'no-content.conf'), 'r') + + @mock.patch('cephadm.call_throws') + @mock.patch('cephadm.get_parm') + @mock.patch('cephadm.logger') + def test_registry_login(self, _logger, _get_parm, _call_throws): + # test normal valid login with url, username and password specified + _call_throws.return_value = '', '', 0 + ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx( + ['registry-login', '--registry-url', 'sample-url', + '--registry-username', 'sample-user', '--registry-password', + 'sample-pass']) + ctx.container_engine = mock_docker() + retval = _cephadm.command_registry_login(ctx) + assert retval == 0 + + # test bad login attempt with invalid arguments given + ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx( + ['registry-login', '--registry-url', 'bad-args-url']) + with pytest.raises(Exception) as e: + assert _cephadm.command_registry_login(ctx) + assert str(e.value) == ('Invalid custom registry arguments received. To login to a custom registry include ' + '--registry-url, --registry-username and --registry-password options or --registry-json option') + + # test normal valid login with json file + _get_parm.return_value = {"url": "sample-url", "username": "sample-username", "password": "sample-password"} + ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx( + ['registry-login', '--registry-json', 'sample-json']) + ctx.container_engine = mock_docker() + retval = _cephadm.command_registry_login(ctx) + assert retval == 0 + + # test bad login attempt with bad json file + _get_parm.return_value = {"bad-json": "bad-json"} + ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx( + ['registry-login', '--registry-json', 'sample-json']) + with pytest.raises(Exception) as e: + assert _cephadm.command_registry_login(ctx) + assert str(e.value) == ("json provided for custom registry login did not include all necessary fields. " + "Please setup json file as\n" + "{\n" + " \"url\": \"REGISTRY_URL\",\n" + " \"username\": \"REGISTRY_USERNAME\",\n" + " \"password\": \"REGISTRY_PASSWORD\"\n" + "}\n") + + # test login attempt with valid arguments where login command fails + _call_throws.side_effect = Exception + ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx( + ['registry-login', '--registry-url', 'sample-url', + '--registry-username', 'sample-user', '--registry-password', + 'sample-pass']) + with pytest.raises(Exception) as e: + _cephadm.command_registry_login(ctx) + assert str(e.value) == "Failed to login to custom registry @ sample-url as sample-user with given password" + + def test_get_image_info_from_inspect(self): + # podman + out = """204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1,[docker.io/ceph/ceph@sha256:1cc9b824e1b076cdff52a9aa3f0cc8557d879fb2fbbba0cafed970aca59a3992]""" + r = _cephadm.get_image_info_from_inspect(out, 'registry/ceph/ceph:latest') + print(r) + assert r == { + 'image_id': '204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1', + 'repo_digests': ['docker.io/ceph/ceph@sha256:1cc9b824e1b076cdff52a9aa3f0cc8557d879fb2fbbba0cafed970aca59a3992'] + } + + # docker + out = """sha256:16f4549cf7a8f112bbebf7946749e961fbbd1b0838627fe619aab16bc17ce552,[quay.ceph.io/ceph-ci/ceph@sha256:4e13da36c1bd6780b312a985410ae678984c37e6a9493a74c87e4a50b9bda41f]""" + r = _cephadm.get_image_info_from_inspect(out, 'registry/ceph/ceph:latest') + assert r == { + 'image_id': '16f4549cf7a8f112bbebf7946749e961fbbd1b0838627fe619aab16bc17ce552', + 'repo_digests': ['quay.ceph.io/ceph-ci/ceph@sha256:4e13da36c1bd6780b312a985410ae678984c37e6a9493a74c87e4a50b9bda41f'] + } + + # multiple digests (podman) + out = """e935122ab143a64d92ed1fbb27d030cf6e2f0258207be1baf1b509c466aeeb42,[docker.io/prom/prometheus@sha256:e4ca62c0d62f3e886e684806dfe9d4e0cda60d54986898173c1083856cfda0f4 docker.io/prom/prometheus@sha256:efd99a6be65885c07c559679a0df4ec709604bcdd8cd83f0d00a1a683b28fb6a]""" + r = _cephadm.get_image_info_from_inspect(out, 'registry/prom/prometheus:latest') + assert r == { + 'image_id': 'e935122ab143a64d92ed1fbb27d030cf6e2f0258207be1baf1b509c466aeeb42', + 'repo_digests': [ + 'docker.io/prom/prometheus@sha256:e4ca62c0d62f3e886e684806dfe9d4e0cda60d54986898173c1083856cfda0f4', + 'docker.io/prom/prometheus@sha256:efd99a6be65885c07c559679a0df4ec709604bcdd8cd83f0d00a1a683b28fb6a', + ] + } + + + def test_dict_get(self): + result = _cephadm.dict_get({'a': 1}, 'a', require=True) + assert result == 1 + result = _cephadm.dict_get({'a': 1}, 'b') + assert result is None + result = _cephadm.dict_get({'a': 1}, 'b', default=2) + assert result == 2 + + def test_dict_get_error(self): + with pytest.raises(_cephadm.Error): + _cephadm.dict_get({'a': 1}, 'b', require=True) + + def test_dict_get_join(self): + result = _cephadm.dict_get_join({'foo': ['a', 'b']}, 'foo') + assert result == 'a\nb' + result = _cephadm.dict_get_join({'foo': [1, 2]}, 'foo') + assert result == '1\n2' + result = _cephadm.dict_get_join({'bar': 'a'}, 'bar') + assert result == 'a' + result = _cephadm.dict_get_join({'a': 1}, 'a') + assert result == 1 + + @mock.patch('os.listdir', return_value=[]) + @mock.patch('cephadm.logger') + def test_infer_local_ceph_image(self, _logger, _listdir): + ctx = _cephadm.CephadmContext() + ctx.fsid = '00000000-0000-0000-0000-0000deadbeez' + ctx.container_engine = mock_podman() + + # make sure the right image is selected when container is found + cinfo = _cephadm.ContainerInfo('935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972', + 'registry.hub.docker.com/rkachach/ceph:custom-v0.5', + '514e6a882f6e74806a5856468489eeff8d7106095557578da96935e4d0ba4d9d', + '2022-04-19 13:45:20.97146228 +0000 UTC', + '') + out = '''quay.ceph.io/ceph-ci/ceph@sha256:87f200536bb887b36b959e887d5984dd7a3f008a23aa1f283ab55d48b22c6185|dad864ee21e9|main|2022-03-23 16:29:19 +0000 UTC + quay.ceph.io/ceph-ci/ceph@sha256:b50b130fcda2a19f8507ddde3435bb4722266956e1858ac395c838bc1dcf1c0e|514e6a882f6e|pacific|2022-03-23 15:58:34 +0000 UTC + docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC''' + with mock.patch('cephadm.call_throws', return_value=(out, '', '')): + with mock.patch('cephadm.get_container_info', return_value=cinfo): + image = _cephadm.infer_local_ceph_image(ctx, ctx.container_engine) + assert image == 'quay.ceph.io/ceph-ci/ceph@sha256:b50b130fcda2a19f8507ddde3435bb4722266956e1858ac395c838bc1dcf1c0e' + + # make sure first valid image is used when no container_info is found + out = '''quay.ceph.io/ceph-ci/ceph@sha256:87f200536bb887b36b959e887d5984dd7a3f008a23aa1f283ab55d48b22c6185|dad864ee21e9|main|2022-03-23 16:29:19 +0000 UTC + quay.ceph.io/ceph-ci/ceph@sha256:b50b130fcda2a19f8507ddde3435bb4722266956e1858ac395c838bc1dcf1c0e|514e6a882f6e|pacific|2022-03-23 15:58:34 +0000 UTC + docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC''' + with mock.patch('cephadm.call_throws', return_value=(out, '', '')): + with mock.patch('cephadm.get_container_info', return_value=None): + image = _cephadm.infer_local_ceph_image(ctx, ctx.container_engine) + assert image == 'quay.ceph.io/ceph-ci/ceph@sha256:87f200536bb887b36b959e887d5984dd7a3f008a23aa1f283ab55d48b22c6185' + + # make sure images without digest are discarded (no container_info is found) + out = '''quay.ceph.io/ceph-ci/ceph@||| + docker.io/ceph/ceph@||| + docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC''' + with mock.patch('cephadm.call_throws', return_value=(out, '', '')): + with mock.patch('cephadm.get_container_info', return_value=None): + image = _cephadm.infer_local_ceph_image(ctx, ctx.container_engine) + assert image == 'docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508' + + + + @pytest.mark.parametrize('daemon_filter, by_name, daemon_list, container_stats, output', + [ + # get container info by type ('mon') + ( + 'mon', + False, + [ + {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'}, + {'name': 'mgr.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'}, + ], + ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,", + "", + 0), + _cephadm.ContainerInfo('935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972', + 'registry.hub.docker.com/rkachach/ceph:custom-v0.5', + '666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4', + '2022-04-19 13:45:20.97146228 +0000 UTC', + '') + ), + # get container info by name ('mon.ceph-node-0') + ( + 'mon.ceph-node-0', + True, + [ + {'name': 'mgr.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'}, + {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'}, + ], + ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,", + "", + 0), + _cephadm.ContainerInfo('935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972', + 'registry.hub.docker.com/rkachach/ceph:custom-v0.5', + '666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4', + '2022-04-19 13:45:20.97146228 +0000 UTC', + '') + ), + # get container info by name (same daemon but two different fsids) + ( + 'mon.ceph-node-0', + True, + [ + {'name': 'mon.ceph-node-0', 'fsid': '10000000-0000-0000-0000-0000deadbeef'}, + {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'}, + ], + ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,", + "", + 0), + _cephadm.ContainerInfo('935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972', + 'registry.hub.docker.com/rkachach/ceph:custom-v0.5', + '666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4', + '2022-04-19 13:45:20.97146228 +0000 UTC', + '') + ), + # get container info by type (bad container stats: 127 code) + ( + 'mon', + False, + [ + {'name': 'mon.ceph-node-0', 'fsid': '00000000-FFFF-0000-0000-0000deadbeef'}, + {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'}, + ], + ("", + "", + 127), + None + ), + # get container info by name (bad container stats: 127 code) + ( + 'mon.ceph-node-0', + True, + [ + {'name': 'mgr.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'}, + {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'}, + ], + ("", + "", + 127), + None + ), + # get container info by invalid name (doens't contain '.') + ( + 'mon-ceph-node-0', + True, + [ + {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'}, + {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'}, + ], + ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,", + "", + 0), + None + ), + # get container info by invalid name (empty) + ( + '', + True, + [ + {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'}, + {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'}, + ], + ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,", + "", + 0), + None + ), + # get container info by invalid type (empty) + ( + '', + False, + [ + {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'}, + {'name': 'mon.ceph-node-0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'}, + ], + ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,", + "", + 0), + None + ), + # get container info by name: no match (invalid fsid) + ( + 'mon', + False, + [ + {'name': 'mon.ceph-node-0', 'fsid': '00000000-1111-0000-0000-0000deadbeef'}, + {'name': 'mon.ceph-node-0', 'fsid': '00000000-2222-0000-0000-0000deadbeef'}, + ], + ("935b549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972,registry.hub.docker.com/rkachach/ceph:custom-v0.5,666bbfa87e8df05702d6172cae11dd7bc48efb1d94f1b9e492952f19647199a4,2022-04-19 13:45:20.97146228 +0000 UTC,", + "", + 0), + None + ), + # get container info by name: no match + ( + 'mon.ceph-node-0', + True, + [], + None, + None + ), + # get container info by type: no match + ( + 'mgr', + False, + [], + None, + None + ), + ]) + @mock.patch('cephadm.logger') + def test_get_container_info(self, _logger, daemon_filter, by_name, daemon_list, container_stats, output): + ctx = _cephadm.CephadmContext() + ctx.fsid = '00000000-0000-0000-0000-0000deadbeef' + ctx.container_engine = mock_podman() + with mock.patch('cephadm.list_daemons', return_value=daemon_list): + with mock.patch('cephadm.get_container_stats', return_value=container_stats): + assert _cephadm.get_container_info(ctx, daemon_filter, by_name) == output + + def test_should_log_to_journald(self): + ctx = _cephadm.CephadmContext() + # explicit + ctx.log_to_journald = True + assert _cephadm.should_log_to_journald(ctx) + + ctx.log_to_journald = None + # enable if podman support --cgroup=split + ctx.container_engine = mock_podman() + ctx.container_engine.version = (2, 1, 0) + assert _cephadm.should_log_to_journald(ctx) + + # disable on old podman + ctx.container_engine.version = (2, 0, 0) + assert not _cephadm.should_log_to_journald(ctx) + + # disable on docker + ctx.container_engine = mock_docker() + assert not _cephadm.should_log_to_journald(ctx) + + def test_normalize_image_digest(self): + s = 'myhostname:5000/ceph/ceph@sha256:753886ad9049004395ae990fbb9b096923b5a518b819283141ee8716ddf55ad1' + assert _cephadm.normalize_image_digest(s) == s + + s = 'ceph/ceph:latest' + assert _cephadm.normalize_image_digest(s) == f'{_cephadm.DEFAULT_REGISTRY}/{s}' + + @pytest.mark.parametrize('fsid, ceph_conf, list_daemons, result, err, ', + [ + ( + None, + None, + [], + None, + None, + ), + ( + '00000000-0000-0000-0000-0000deadbeef', + None, + [], + '00000000-0000-0000-0000-0000deadbeef', + None, + ), + ( + '00000000-0000-0000-0000-0000deadbeef', + None, + [ + {'fsid': '10000000-0000-0000-0000-0000deadbeef'}, + {'fsid': '20000000-0000-0000-0000-0000deadbeef'}, + ], + '00000000-0000-0000-0000-0000deadbeef', + None, + ), + ( + None, + None, + [ + {'fsid': '00000000-0000-0000-0000-0000deadbeef'}, + ], + '00000000-0000-0000-0000-0000deadbeef', + None, + ), + ( + None, + None, + [ + {'fsid': '10000000-0000-0000-0000-0000deadbeef'}, + {'fsid': '20000000-0000-0000-0000-0000deadbeef'}, + ], + None, + r'Cannot infer an fsid', + ), + ( + None, + get_ceph_conf(fsid='00000000-0000-0000-0000-0000deadbeef'), + [], + '00000000-0000-0000-0000-0000deadbeef', + None, + ), + ( + None, + get_ceph_conf(fsid='00000000-0000-0000-0000-0000deadbeef'), + [ + {'fsid': '00000000-0000-0000-0000-0000deadbeef'}, + ], + '00000000-0000-0000-0000-0000deadbeef', + None, + ), + ( + None, + get_ceph_conf(fsid='00000000-0000-0000-0000-0000deadbeef'), + [ + {'fsid': '10000000-0000-0000-0000-0000deadbeef'}, + {'fsid': '20000000-0000-0000-0000-0000deadbeef'}, + ], + None, + r'Cannot infer an fsid', + ), + ]) + @mock.patch('cephadm.call') + @mock.patch('cephadm.logger') + def test_infer_fsid(self, _logger, _call, fsid, ceph_conf, list_daemons, result, err, cephadm_fs): + # build the context + ctx = _cephadm.CephadmContext() + ctx.fsid = fsid + + # mock the decorator + mock_fn = mock.Mock() + mock_fn.return_value = 0 + infer_fsid = _cephadm.infer_fsid(mock_fn) + + # mock the ceph.conf file content + if ceph_conf: + f = cephadm_fs.create_file('ceph.conf', contents=ceph_conf) + ctx.config = f.path + + # test + with mock.patch('cephadm.list_daemons', return_value=list_daemons): + if err: + with pytest.raises(_cephadm.Error, match=err): + infer_fsid(ctx) + else: + infer_fsid(ctx) + assert ctx.fsid == result + + @pytest.mark.parametrize('fsid, other_conf_files, config, name, list_daemons, result, ', + [ + # per cluster conf has more precedence than default conf + ( + '00000000-0000-0000-0000-0000deadbeef', + [_cephadm.CEPH_DEFAULT_CONF], + None, + None, + [], + '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/config/ceph.conf', + ), + # mon daemon conf has more precedence than cluster conf and default conf + ( + '00000000-0000-0000-0000-0000deadbeef', + ['/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/config/ceph.conf', + _cephadm.CEPH_DEFAULT_CONF], + None, + None, + [{'name': 'mon.a', 'fsid': '00000000-0000-0000-0000-0000deadbeef', 'style': 'cephadm:v1'}], + '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/mon.a/config', + ), + # daemon conf (--name option) has more precedence than cluster, default and mon conf + ( + '00000000-0000-0000-0000-0000deadbeef', + ['/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/config/ceph.conf', + '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/mon.a/config', + _cephadm.CEPH_DEFAULT_CONF], + None, + 'osd.0', + [{'name': 'mon.a', 'fsid': '00000000-0000-0000-0000-0000deadbeef', 'style': 'cephadm:v1'}, + {'name': 'osd.0', 'fsid': '00000000-0000-0000-0000-0000deadbeef'}], + '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/osd.0/config', + ), + # user provided conf ('/foo/ceph.conf') more precedence than any other conf + ( + '00000000-0000-0000-0000-0000deadbeef', + ['/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/config/ceph.conf', + _cephadm.CEPH_DEFAULT_CONF, + '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/mon.a/config'], + '/foo/ceph.conf', + None, + [{'name': 'mon.a', 'fsid': '00000000-0000-0000-0000-0000deadbeef', 'style': 'cephadm:v1'}], + '/foo/ceph.conf', + ), + ]) + @mock.patch('cephadm.call') + @mock.patch('cephadm.logger') + def test_infer_config_precedence(self, _logger, _call, other_conf_files, fsid, config, name, list_daemons, result, cephadm_fs): + # build the context + ctx = _cephadm.CephadmContext() + ctx.fsid = fsid + ctx.config = config + ctx.name = name + + # mock the decorator + mock_fn = mock.Mock() + mock_fn.return_value = 0 + infer_config = _cephadm.infer_config(mock_fn) + + # mock the config file + cephadm_fs.create_file(result) + + # mock other potential config files + for f in other_conf_files: + cephadm_fs.create_file(f) + + # test + with mock.patch('cephadm.list_daemons', return_value=list_daemons): + infer_config(ctx) + assert ctx.config == result + + @pytest.mark.parametrize('fsid, config, name, list_daemons, result, ', + [ + ( + None, + '/foo/bar.conf', + None, + [], + '/foo/bar.conf', + ), + ( + '00000000-0000-0000-0000-0000deadbeef', + None, + None, + [], + _cephadm.CEPH_DEFAULT_CONF, + ), + ( + '00000000-0000-0000-0000-0000deadbeef', + None, + None, + [], + '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/config/ceph.conf', + ), + ( + '00000000-0000-0000-0000-0000deadbeef', + None, + None, + [{'name': 'mon.a', 'fsid': '00000000-0000-0000-0000-0000deadbeef', 'style': 'cephadm:v1'}], + '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/mon.a/config', + ), + ( + '00000000-0000-0000-0000-0000deadbeef', + None, + None, + [{'name': 'mon.a', 'fsid': 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa', 'style': 'cephadm:v1'}], + _cephadm.CEPH_DEFAULT_CONF, + ), + ( + '00000000-0000-0000-0000-0000deadbeef', + None, + None, + [{'name': 'mon.a', 'fsid': '00000000-0000-0000-0000-0000deadbeef', 'style': 'legacy'}], + _cephadm.CEPH_DEFAULT_CONF, + ), + ( + '00000000-0000-0000-0000-0000deadbeef', + None, + None, + [{'name': 'osd.0'}], + _cephadm.CEPH_DEFAULT_CONF, + ), + ( + '00000000-0000-0000-0000-0000deadbeef', + '/foo/bar.conf', + 'mon.a', + [{'name': 'mon.a', 'style': 'cephadm:v1'}], + '/foo/bar.conf', + ), + ( + '00000000-0000-0000-0000-0000deadbeef', + None, + 'mon.a', + [], + '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/mon.a/config', + ), + ( + '00000000-0000-0000-0000-0000deadbeef', + None, + 'osd.0', + [], + '/var/lib/ceph/00000000-0000-0000-0000-0000deadbeef/osd.0/config', + ), + ( + None, + None, + None, + [], + _cephadm.CEPH_DEFAULT_CONF, + ), + ]) + @mock.patch('cephadm.call') + @mock.patch('cephadm.logger') + def test_infer_config(self, _logger, _call, fsid, config, name, list_daemons, result, cephadm_fs): + # build the context + ctx = _cephadm.CephadmContext() + ctx.fsid = fsid + ctx.config = config + ctx.name = name + + # mock the decorator + mock_fn = mock.Mock() + mock_fn.return_value = 0 + infer_config = _cephadm.infer_config(mock_fn) + + # mock the config file + cephadm_fs.create_file(result) + + # test + with mock.patch('cephadm.list_daemons', return_value=list_daemons): + infer_config(ctx) + assert ctx.config == result + + @mock.patch('cephadm.call') + def test_extract_uid_gid_fail(self, _call): + err = """Error: container_linux.go:370: starting container process caused: process_linux.go:459: container init caused: process_linux.go:422: setting cgroup config for procHooks process caused: Unit libpod-056038e1126191fba41d8a037275136f2d7aeec9710b9ee +ff792c06d8544b983.scope not found.: OCI runtime error""" + _call.return_value = ('', err, 127) + ctx = _cephadm.CephadmContext() + ctx.container_engine = mock_podman() + with pytest.raises(_cephadm.Error, match='OCI'): + _cephadm.extract_uid_gid(ctx) + + @pytest.mark.parametrize('test_input, expected', [ + ([_cephadm.make_fsid(), _cephadm.make_fsid(), _cephadm.make_fsid()], 3), + ([_cephadm.make_fsid(), 'invalid-fsid', _cephadm.make_fsid(), '0b87e50c-8e77-11ec-b890-'], 2), + (['f6860ec2-8e76-11ec-', '0b87e50c-8e77-11ec-b890-', ''], 0), + ([], 0), + ]) + def test_get_ceph_cluster_count(self, test_input, expected): + ctx = _cephadm.CephadmContext() + with mock.patch('os.listdir', return_value=test_input): + assert _cephadm.get_ceph_cluster_count(ctx) == expected + + def test_set_image_minimize_config(self): + def throw_cmd(cmd): + raise _cephadm.Error(' '.join(cmd)) + ctx = _cephadm.CephadmContext() + ctx.image = 'test_image' + ctx.no_minimize_config = True + fake_cli = lambda cmd, __=None, ___=None: throw_cmd(cmd) + with pytest.raises(_cephadm.Error, match='config set global container_image test_image'): + _cephadm.finish_bootstrap_config( + ctx=ctx, + fsid=_cephadm.make_fsid(), + config='', + mon_id='a', mon_dir='mon_dir', + mon_network=None, ipv6=False, + cli=fake_cli, + cluster_network=None, + ipv6_cluster_network=False + ) + + +class TestCustomContainer(unittest.TestCase): + cc: _cephadm.CustomContainer + + def setUp(self): + self.cc = _cephadm.CustomContainer( + 'e863154d-33c7-4350-bca5-921e0467e55b', + 'container', + config_json={ + 'entrypoint': 'bash', + 'gid': 1000, + 'args': [ + '--no-healthcheck', + '-p 6800:6800' + ], + 'envs': ['SECRET=password'], + 'ports': [8080, 8443], + 'volume_mounts': { + '/CONFIG_DIR': '/foo/conf', + 'bar/config': '/bar:ro' + }, + 'bind_mounts': [ + [ + 'type=bind', + 'source=/CONFIG_DIR', + 'destination=/foo/conf', + '' + ], + [ + 'type=bind', + 'source=bar/config', + 'destination=/bar:ro', + 'ro=true' + ] + ] + }, + image='docker.io/library/hello-world:latest' + ) + + def test_entrypoint(self): + self.assertEqual(self.cc.entrypoint, 'bash') + + def test_uid_gid(self): + self.assertEqual(self.cc.uid, 65534) + self.assertEqual(self.cc.gid, 1000) + + def test_ports(self): + self.assertEqual(self.cc.ports, [8080, 8443]) + + def test_get_container_args(self): + result = self.cc.get_container_args() + self.assertEqual(result, [ + '--no-healthcheck', + '-p 6800:6800' + ]) + + def test_get_container_envs(self): + result = self.cc.get_container_envs() + self.assertEqual(result, ['SECRET=password']) + + def test_get_container_mounts(self): + result = self.cc.get_container_mounts('/xyz') + self.assertDictEqual(result, { + '/CONFIG_DIR': '/foo/conf', + '/xyz/bar/config': '/bar:ro' + }) + + def test_get_container_binds(self): + result = self.cc.get_container_binds('/xyz') + self.assertEqual(result, [ + [ + 'type=bind', + 'source=/CONFIG_DIR', + 'destination=/foo/conf', + '' + ], + [ + 'type=bind', + 'source=/xyz/bar/config', + 'destination=/bar:ro', + 'ro=true' + ] + ]) + + +class TestMaintenance: + systemd_target = "ceph.00000000-0000-0000-0000-000000c0ffee.target" + fsid = '0ea8cdd0-1bbf-11ec-a9c7-5254002763fa' + + def test_systemd_target_OK(self, tmp_path): + base = tmp_path + wants = base / "ceph.target.wants" + wants.mkdir() + target = wants / TestMaintenance.systemd_target + target.touch() + ctx = _cephadm.CephadmContext() + ctx.unit_dir = str(base) + + assert _cephadm.systemd_target_state(ctx, target.name) + + def test_systemd_target_NOTOK(self, tmp_path): + base = tmp_path + ctx = _cephadm.CephadmContext() + ctx.unit_dir = str(base) + assert not _cephadm.systemd_target_state(ctx, TestMaintenance.systemd_target) + + def test_parser_OK(self): + args = _cephadm._parse_args(['host-maintenance', 'enter']) + assert args.maintenance_action == 'enter' + + def test_parser_BAD(self): + with pytest.raises(SystemExit): + _cephadm._parse_args(['host-maintenance', 'wah']) + + @mock.patch('os.listdir', return_value=[]) + @mock.patch('cephadm.call') + @mock.patch('cephadm.logger') + @mock.patch('cephadm.systemd_target_state') + def test_enter_failure_1(self, _target_state, _logger, _call, _listdir): + _call.return_value = '', '', 999 + _target_state.return_value = True + ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx( + ['host-maintenance', 'enter', '--fsid', TestMaintenance.fsid]) + ctx.container_engine = mock_podman() + retval = _cephadm.command_maintenance(ctx) + assert retval.startswith('failed') + + @mock.patch('os.listdir', return_value=[]) + @mock.patch('cephadm.call') + @mock.patch('cephadm.logger') + @mock.patch('cephadm.systemd_target_state') + def test_enter_failure_2(self, _target_state, _logger, _call, _listdir): + _call.side_effect = [('', '', 0), ('', '', 999), ('', '', 0), ('', '', 999)] + _target_state.return_value = True + ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx( + ['host-maintenance', 'enter', '--fsid', TestMaintenance.fsid]) + ctx.container_engine = mock_podman() + retval = _cephadm.command_maintenance(ctx) + assert retval.startswith('failed') + + @mock.patch('os.listdir', return_value=[]) + @mock.patch('cephadm.call') + @mock.patch('cephadm.logger') + @mock.patch('cephadm.systemd_target_state') + @mock.patch('cephadm.target_exists') + def test_exit_failure_1(self, _target_exists, _target_state, _logger, _call, _listdir): + _call.return_value = '', '', 999 + _target_state.return_value = False + _target_exists.return_value = True + ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx( + ['host-maintenance', 'exit', '--fsid', TestMaintenance.fsid]) + ctx.container_engine = mock_podman() + retval = _cephadm.command_maintenance(ctx) + assert retval.startswith('failed') + + @mock.patch('os.listdir', return_value=[]) + @mock.patch('cephadm.call') + @mock.patch('cephadm.logger') + @mock.patch('cephadm.systemd_target_state') + @mock.patch('cephadm.target_exists') + def test_exit_failure_2(self, _target_exists, _target_state, _logger, _call, _listdir): + _call.side_effect = [('', '', 0), ('', '', 999), ('', '', 0), ('', '', 999)] + _target_state.return_value = False + _target_exists.return_value = True + ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx( + ['host-maintenance', 'exit', '--fsid', TestMaintenance.fsid]) + ctx.container_engine = mock_podman() + retval = _cephadm.command_maintenance(ctx) + assert retval.startswith('failed') + + +class TestMonitoring(object): + @mock.patch('cephadm.call') + def test_get_version_alertmanager(self, _call): + ctx = _cephadm.CephadmContext() + ctx.container_engine = mock_podman() + daemon_type = 'alertmanager' + + # binary `prometheus` + _call.return_value = '', '{}, version 0.16.1'.format(daemon_type), 0 + version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type) + assert version == '0.16.1' + + # binary `prometheus-alertmanager` + _call.side_effect = ( + ('', '', 1), + ('', '{}, version 0.16.1'.format(daemon_type), 0), + ) + version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type) + assert version == '0.16.1' + + @mock.patch('cephadm.call') + def test_get_version_prometheus(self, _call): + ctx = _cephadm.CephadmContext() + ctx.container_engine = mock_podman() + daemon_type = 'prometheus' + _call.return_value = '', '{}, version 0.16.1'.format(daemon_type), 0 + version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type) + assert version == '0.16.1' + + def test_prometheus_external_url(self): + ctx = _cephadm.CephadmContext() + ctx.config_json = json.dumps({'files': {}, 'retention_time': '15d'}) + daemon_type = 'prometheus' + daemon_id = 'home' + fsid = 'aaf5a720-13fe-4a3b-82b9-2d99b7fd9704' + args = _cephadm.get_daemon_args(ctx, fsid, daemon_type, daemon_id) + assert any([x.startswith('--web.external-url=http://') for x in args]) + + @mock.patch('cephadm.call') + def test_get_version_node_exporter(self, _call): + ctx = _cephadm.CephadmContext() + ctx.container_engine = mock_podman() + daemon_type = 'node-exporter' + _call.return_value = '', '{}, version 0.16.1'.format(daemon_type.replace('-', '_')), 0 + version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type) + assert version == '0.16.1' + + def test_create_daemon_dirs_prometheus(self, cephadm_fs): + """ + Ensures the required and optional files given in the configuration are + created and mapped correctly inside the container. Tests absolute and + relative file paths given in the configuration. + """ + + fsid = 'aaf5a720-13fe-4a3b-82b9-2d99b7fd9704' + daemon_type = 'prometheus' + uid, gid = 50, 50 + daemon_id = 'home' + ctx = _cephadm.CephadmContext() + ctx.data_dir = '/somedir' + ctx.config_json = json.dumps({ + 'files': { + 'prometheus.yml': 'foo', + '/etc/prometheus/alerting/ceph_alerts.yml': 'bar' + } + }) + + _cephadm.create_daemon_dirs(ctx, + fsid, + daemon_type, + daemon_id, + uid, + gid, + config=None, + keyring=None) + + prefix = '{data_dir}/{fsid}/{daemon_type}.{daemon_id}'.format( + data_dir=ctx.data_dir, + fsid=fsid, + daemon_type=daemon_type, + daemon_id=daemon_id + ) + + expected = { + 'etc/prometheus/prometheus.yml': 'foo', + 'etc/prometheus/alerting/ceph_alerts.yml': 'bar', + } + + for file,content in expected.items(): + file = os.path.join(prefix, file) + assert os.path.exists(file) + with open(file) as f: + assert f.read() == content + + # assert uid/gid after redeploy + new_uid = uid+1 + new_gid = gid+1 + _cephadm.create_daemon_dirs(ctx, + fsid, + daemon_type, + daemon_id, + new_uid, + new_gid, + config=None, + keyring=None) + for file,content in expected.items(): + file = os.path.join(prefix, file) + assert os.stat(file).st_uid == new_uid + assert os.stat(file).st_gid == new_gid + + +class TestBootstrap(object): + + @staticmethod + def _get_cmd(*args): + return [ + 'bootstrap', + '--allow-mismatched-release', + '--skip-prepare-host', + '--skip-dashboard', + *args, + ] + + +###############################################3 + + def test_config(self, cephadm_fs): + conf_file = 'foo' + cmd = self._get_cmd( + '--mon-ip', '192.168.1.1', + '--skip-mon-network', + '--config', conf_file, + ) + + with with_cephadm_ctx(cmd) as ctx: + msg = r'No such file or directory' + with pytest.raises(_cephadm.Error, match=msg): + _cephadm.command_bootstrap(ctx) + + cephadm_fs.create_file(conf_file) + with with_cephadm_ctx(cmd) as ctx: + retval = _cephadm.command_bootstrap(ctx) + assert retval == 0 + + def test_no_mon_addr(self, cephadm_fs): + cmd = self._get_cmd() + with with_cephadm_ctx(cmd) as ctx: + msg = r'must specify --mon-ip or --mon-addrv' + with pytest.raises(_cephadm.Error, match=msg): + _cephadm.command_bootstrap(ctx) + + def test_skip_mon_network(self, cephadm_fs): + cmd = self._get_cmd('--mon-ip', '192.168.1.1') + + with with_cephadm_ctx(cmd, list_networks={}) as ctx: + msg = r'--skip-mon-network' + with pytest.raises(_cephadm.Error, match=msg): + _cephadm.command_bootstrap(ctx) + + cmd += ['--skip-mon-network'] + with with_cephadm_ctx(cmd, list_networks={}) as ctx: + retval = _cephadm.command_bootstrap(ctx) + assert retval == 0 + + @pytest.mark.parametrize('mon_ip, list_networks, result', + [ + # IPv4 + ( + 'eth0', + {'192.168.1.0/24': {'eth0': ['192.168.1.1']}}, + False, + ), + ( + '0.0.0.0', + {'192.168.1.0/24': {'eth0': ['192.168.1.1']}}, + False, + ), + ( + '192.168.1.0', + {'192.168.1.0/24': {'eth0': ['192.168.1.1']}}, + False, + ), + ( + '192.168.1.1', + {'192.168.1.0/24': {'eth0': ['192.168.1.1']}}, + True, + ), + ( + '192.168.1.1:1234', + {'192.168.1.0/24': {'eth0': ['192.168.1.1']}}, + True, + ), + ( + '192.168.1.1:0123', + {'192.168.1.0/24': {'eth0': ['192.168.1.1']}}, + True, + ), + # IPv6 + ( + '::', + {'192.168.1.0/24': {'eth0': ['192.168.1.1']}}, + False, + ), + ( + '::ffff:192.168.1.0', + {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}}, + False, + ), + ( + '::ffff:192.168.1.1', + {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}}, + True, + ), + ( + '::ffff:c0a8:101', + {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}}, + True, + ), + ( + '[::ffff:c0a8:101]:1234', + {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}}, + True, + ), + ( + '[::ffff:c0a8:101]:0123', + {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}}, + True, + ), + ( + '0000:0000:0000:0000:0000:FFFF:C0A8:0101', + {"ffff::/64": {"eth0": ["::ffff:c0a8:101"]}}, + True, + ), + ]) + def test_mon_ip(self, mon_ip, list_networks, result, cephadm_fs): + cmd = self._get_cmd('--mon-ip', mon_ip) + if not result: + with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx: + msg = r'--skip-mon-network' + with pytest.raises(_cephadm.Error, match=msg): + _cephadm.command_bootstrap(ctx) + else: + with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx: + retval = _cephadm.command_bootstrap(ctx) + assert retval == 0 + + @pytest.mark.parametrize('mon_addrv, list_networks, err', + [ + # IPv4 + ( + '192.168.1.1', + {'192.168.1.0/24': {'eth0': ['192.168.1.1']}}, + r'must use square brackets', + ), + ( + '[192.168.1.1]', + {'192.168.1.0/24': {'eth0': ['192.168.1.1']}}, + r'must include port number', + ), + ( + '[192.168.1.1:1234]', + {'192.168.1.0/24': {'eth0': ['192.168.1.1']}}, + None, + ), + ( + '[192.168.1.1:0123]', + {'192.168.1.0/24': {'eth0': ['192.168.1.1']}}, + None, + ), + ( + '[v2:192.168.1.1:3300,v1:192.168.1.1:6789]', + {'192.168.1.0/24': {'eth0': ['192.168.1.1']}}, + None, + ), + # IPv6 + ( + '[::ffff:192.168.1.1:1234]', + {'ffff::/64': {'eth0': ['::ffff:c0a8:101']}}, + None, + ), + ( + '[::ffff:192.168.1.1:0123]', + {'ffff::/64': {'eth0': ['::ffff:c0a8:101']}}, + None, + ), + ( + '[0000:0000:0000:0000:0000:FFFF:C0A8:0101:1234]', + {'ffff::/64': {'eth0': ['::ffff:c0a8:101']}}, + None, + ), + ( + '[v2:0000:0000:0000:0000:0000:FFFF:C0A8:0101:3300,v1:0000:0000:0000:0000:0000:FFFF:C0A8:0101:6789]', + {'ffff::/64': {'eth0': ['::ffff:c0a8:101']}}, + None, + ), + ]) + def test_mon_addrv(self, mon_addrv, list_networks, err, cephadm_fs): + cmd = self._get_cmd('--mon-addrv', mon_addrv) + if err: + with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx: + with pytest.raises(_cephadm.Error, match=err): + _cephadm.command_bootstrap(ctx) + else: + with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx: + retval = _cephadm.command_bootstrap(ctx) + assert retval == 0 + + def test_allow_fqdn_hostname(self, cephadm_fs): + hostname = 'foo.bar' + cmd = self._get_cmd( + '--mon-ip', '192.168.1.1', + '--skip-mon-network', + ) + + with with_cephadm_ctx(cmd, hostname=hostname) as ctx: + msg = r'--allow-fqdn-hostname' + with pytest.raises(_cephadm.Error, match=msg): + _cephadm.command_bootstrap(ctx) + + cmd += ['--allow-fqdn-hostname'] + with with_cephadm_ctx(cmd, hostname=hostname) as ctx: + retval = _cephadm.command_bootstrap(ctx) + assert retval == 0 + + @pytest.mark.parametrize('fsid, err', + [ + ('', None), + ('00000000-0000-0000-0000-0000deadbeef', None), + ('00000000-0000-0000-0000-0000deadbeez', 'not an fsid'), + ]) + def test_fsid(self, fsid, err, cephadm_fs): + cmd = self._get_cmd( + '--mon-ip', '192.168.1.1', + '--skip-mon-network', + '--fsid', fsid, + ) + + with with_cephadm_ctx(cmd) as ctx: + if err: + with pytest.raises(_cephadm.Error, match=err): + _cephadm.command_bootstrap(ctx) + else: + retval = _cephadm.command_bootstrap(ctx) + assert retval == 0 + + +class TestShell(object): + + def test_fsid(self, cephadm_fs): + fsid = '00000000-0000-0000-0000-0000deadbeef' + + cmd = ['shell', '--fsid', fsid] + with with_cephadm_ctx(cmd) as ctx: + retval = _cephadm.command_shell(ctx) + assert retval == 0 + assert ctx.fsid == fsid + + cmd = ['shell', '--fsid', '00000000-0000-0000-0000-0000deadbeez'] + with with_cephadm_ctx(cmd) as ctx: + err = 'not an fsid' + with pytest.raises(_cephadm.Error, match=err): + retval = _cephadm.command_shell(ctx) + assert retval == 1 + assert ctx.fsid == None + + s = get_ceph_conf(fsid=fsid) + f = cephadm_fs.create_file('ceph.conf', contents=s) + + cmd = ['shell', '--fsid', fsid, '--config', f.path] + with with_cephadm_ctx(cmd) as ctx: + retval = _cephadm.command_shell(ctx) + assert retval == 0 + assert ctx.fsid == fsid + + cmd = ['shell', '--fsid', '10000000-0000-0000-0000-0000deadbeef', '--config', f.path] + with with_cephadm_ctx(cmd) as ctx: + err = 'fsid does not match ceph.conf' + with pytest.raises(_cephadm.Error, match=err): + retval = _cephadm.command_shell(ctx) + assert retval == 1 + assert ctx.fsid == None + + def test_name(self, cephadm_fs): + cmd = ['shell', '--name', 'foo'] + with with_cephadm_ctx(cmd) as ctx: + retval = _cephadm.command_shell(ctx) + assert retval == 0 + + cmd = ['shell', '--name', 'foo.bar'] + with with_cephadm_ctx(cmd) as ctx: + err = r'must pass --fsid' + with pytest.raises(_cephadm.Error, match=err): + retval = _cephadm.command_shell(ctx) + assert retval == 1 + + fsid = '00000000-0000-0000-0000-0000deadbeef' + cmd = ['shell', '--name', 'foo.bar', '--fsid', fsid] + with with_cephadm_ctx(cmd) as ctx: + retval = _cephadm.command_shell(ctx) + assert retval == 0 + + def test_config(self, cephadm_fs): + cmd = ['shell'] + with with_cephadm_ctx(cmd) as ctx: + retval = _cephadm.command_shell(ctx) + assert retval == 0 + assert ctx.config == None + + cephadm_fs.create_file(_cephadm.CEPH_DEFAULT_CONF) + with with_cephadm_ctx(cmd) as ctx: + retval = _cephadm.command_shell(ctx) + assert retval == 0 + assert ctx.config == _cephadm.CEPH_DEFAULT_CONF + + cmd = ['shell', '--config', 'foo'] + with with_cephadm_ctx(cmd) as ctx: + retval = _cephadm.command_shell(ctx) + assert retval == 0 + assert ctx.config == 'foo' + + def test_keyring(self, cephadm_fs): + cmd = ['shell'] + with with_cephadm_ctx(cmd) as ctx: + retval = _cephadm.command_shell(ctx) + assert retval == 0 + assert ctx.keyring == None + + cephadm_fs.create_file(_cephadm.CEPH_DEFAULT_KEYRING) + with with_cephadm_ctx(cmd) as ctx: + retval = _cephadm.command_shell(ctx) + assert retval == 0 + assert ctx.keyring == _cephadm.CEPH_DEFAULT_KEYRING + + cmd = ['shell', '--keyring', 'foo'] + with with_cephadm_ctx(cmd) as ctx: + retval = _cephadm.command_shell(ctx) + assert retval == 0 + assert ctx.keyring == 'foo' + + @mock.patch('cephadm.CephContainer') + def test_mount_no_dst(self, _ceph_container, cephadm_fs): + cmd = ['shell', '--mount', '/etc/foo'] + with with_cephadm_ctx(cmd) as ctx: + retval = _cephadm.command_shell(ctx) + assert retval == 0 + assert _ceph_container.call_args.kwargs['volume_mounts']['/etc/foo'] == '/mnt/foo' + + @mock.patch('cephadm.CephContainer') + def test_mount_with_dst_no_opt(self, _ceph_container, cephadm_fs): + cmd = ['shell', '--mount', '/etc/foo:/opt/foo/bar'] + with with_cephadm_ctx(cmd) as ctx: + retval = _cephadm.command_shell(ctx) + assert retval == 0 + assert _ceph_container.call_args.kwargs['volume_mounts']['/etc/foo'] == '/opt/foo/bar' + + @mock.patch('cephadm.CephContainer') + def test_mount_with_dst_and_opt(self, _ceph_container, cephadm_fs): + cmd = ['shell', '--mount', '/etc/foo:/opt/foo/bar:Z'] + with with_cephadm_ctx(cmd) as ctx: + retval = _cephadm.command_shell(ctx) + assert retval == 0 + assert _ceph_container.call_args.kwargs['volume_mounts']['/etc/foo'] == '/opt/foo/bar:Z' + +class TestCephVolume(object): + + @staticmethod + def _get_cmd(*args): + return [ + 'ceph-volume', + *args, + '--', 'inventory', '--format', 'json' + ] + + def test_noop(self, cephadm_fs): + cmd = self._get_cmd() + with with_cephadm_ctx(cmd) as ctx: + _cephadm.command_ceph_volume(ctx) + assert ctx.fsid == None + assert ctx.config == None + assert ctx.keyring == None + assert ctx.config_json == None + + def test_fsid(self, cephadm_fs): + fsid = '00000000-0000-0000-0000-0000deadbeef' + + cmd = self._get_cmd('--fsid', fsid) + with with_cephadm_ctx(cmd) as ctx: + _cephadm.command_ceph_volume(ctx) + assert ctx.fsid == fsid + + cmd = self._get_cmd('--fsid', '00000000-0000-0000-0000-0000deadbeez') + with with_cephadm_ctx(cmd) as ctx: + err = 'not an fsid' + with pytest.raises(_cephadm.Error, match=err): + retval = _cephadm.command_shell(ctx) + assert retval == 1 + assert ctx.fsid == None + + s = get_ceph_conf(fsid=fsid) + f = cephadm_fs.create_file('ceph.conf', contents=s) + + cmd = self._get_cmd('--fsid', fsid, '--config', f.path) + with with_cephadm_ctx(cmd) as ctx: + _cephadm.command_ceph_volume(ctx) + assert ctx.fsid == fsid + + cmd = self._get_cmd('--fsid', '10000000-0000-0000-0000-0000deadbeef', '--config', f.path) + with with_cephadm_ctx(cmd) as ctx: + err = 'fsid does not match ceph.conf' + with pytest.raises(_cephadm.Error, match=err): + _cephadm.command_ceph_volume(ctx) + assert ctx.fsid == None + + def test_config(self, cephadm_fs): + cmd = self._get_cmd('--config', 'foo') + with with_cephadm_ctx(cmd) as ctx: + err = r'No such file or directory' + with pytest.raises(_cephadm.Error, match=err): + _cephadm.command_ceph_volume(ctx) + + cephadm_fs.create_file('bar') + cmd = self._get_cmd('--config', 'bar') + with with_cephadm_ctx(cmd) as ctx: + _cephadm.command_ceph_volume(ctx) + assert ctx.config == 'bar' + + def test_keyring(self, cephadm_fs): + cmd = self._get_cmd('--keyring', 'foo') + with with_cephadm_ctx(cmd) as ctx: + err = r'No such file or directory' + with pytest.raises(_cephadm.Error, match=err): + _cephadm.command_ceph_volume(ctx) + + cephadm_fs.create_file('bar') + cmd = self._get_cmd('--keyring', 'bar') + with with_cephadm_ctx(cmd) as ctx: + _cephadm.command_ceph_volume(ctx) + assert ctx.keyring == 'bar' + + +class TestIscsi: + def test_unit_run(self, cephadm_fs): + fsid = '9b9d7609-f4d5-4aba-94c8-effa764d96c9' + config_json = { + 'files': {'iscsi-gateway.cfg': ''} + } + with with_cephadm_ctx(['--image=ceph/ceph'], list_networks={}) as ctx: + import json + ctx.container_engine = mock_docker() + ctx.config_json = json.dumps(config_json) + ctx.fsid = fsid + _cephadm.get_parm.return_value = config_json + c = _cephadm.get_container(ctx, fsid, 'iscsi', 'daemon_id') + + _cephadm.make_data_dir(ctx, fsid, 'iscsi', 'daemon_id') + _cephadm.deploy_daemon_units( + ctx, + fsid, + 0, 0, + 'iscsi', + 'daemon_id', + c, + True, True + ) + + with open('/var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/unit.run') as f: + assert f.read() == """set -e +if ! grep -qs /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs /proc/mounts; then mount -t configfs none /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs; fi +# iscsi tcmu-runner container +! /usr/bin/docker rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi.daemon_id-tcmu 2> /dev/null +! /usr/bin/docker rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id-tcmu 2> /dev/null +/usr/bin/docker run --rm --ipc=host --stop-signal=SIGTERM --ulimit nofile=1048576 --net=host --entrypoint /usr/local/scripts/tcmu-runner-entrypoint.sh --privileged --group-add=disk --init --name ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id-tcmu --pids-limit=0 -e CONTAINER_IMAGE=ceph/ceph -e NODE_NAME=host1 -e CEPH_USE_RANDOM_NONCE=1 -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/config:/etc/ceph/ceph.conf:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/keyring:/etc/ceph/keyring:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/iscsi-gateway.cfg:/etc/ceph/iscsi-gateway.cfg:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs:/sys/kernel/config -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/tcmu-runner-entrypoint.sh:/usr/local/scripts/tcmu-runner-entrypoint.sh -v /var/log/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9:/var/log:z -v /dev:/dev --mount type=bind,source=/lib/modules,destination=/lib/modules,ro=true ceph/ceph & +# iscsi.daemon_id +! /usr/bin/docker rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi.daemon_id 2> /dev/null +! /usr/bin/docker rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id 2> /dev/null +/usr/bin/docker run --rm --ipc=host --stop-signal=SIGTERM --ulimit nofile=1048576 --net=host --entrypoint /usr/bin/rbd-target-api --privileged --group-add=disk --init --name ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id --pids-limit=0 -e CONTAINER_IMAGE=ceph/ceph -e NODE_NAME=host1 -e CEPH_USE_RANDOM_NONCE=1 -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/config:/etc/ceph/ceph.conf:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/keyring:/etc/ceph/keyring:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/iscsi-gateway.cfg:/etc/ceph/iscsi-gateway.cfg:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs:/sys/kernel/config -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/tcmu-runner-entrypoint.sh:/usr/local/scripts/tcmu-runner-entrypoint.sh -v /var/log/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9:/var/log:z -v /dev:/dev --mount type=bind,source=/lib/modules,destination=/lib/modules,ro=true ceph/ceph +""" + + def test_get_container(self): + """ + Due to a combination of socket.getfqdn() and podman's behavior to + add the container name into the /etc/hosts file, we cannot use periods + in container names. But we need to be able to detect old existing containers. + Assert this behaviour. I think we can remove this in Ceph R + """ + fsid = '9b9d7609-f4d5-4aba-94c8-effa764d96c9' + with with_cephadm_ctx(['--image=ceph/ceph'], list_networks={}) as ctx: + ctx.fsid = fsid + c = _cephadm.get_container(ctx, fsid, 'iscsi', 'something') + assert c.cname == 'ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-something' + assert c.old_cname == 'ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi.something' + + +class TestCheckHost: + + @mock.patch('cephadm.find_executable', return_value='foo') + @mock.patch('cephadm.check_time_sync', return_value=True) + @mock.patch('cephadm.logger') + def test_container_engine(self, _logger, _find_executable, _check_time_sync): + ctx = _cephadm.CephadmContext() + + ctx.container_engine = None + err = r'No container engine binary found' + with pytest.raises(_cephadm.Error, match=err): + _cephadm.command_check_host(ctx) + + ctx.container_engine = mock_podman() + _cephadm.command_check_host(ctx) + + ctx.container_engine = mock_docker() + _cephadm.command_check_host(ctx) + + +class TestRmRepo: + + @pytest.mark.parametrize('os_release', + [ + # Apt + dedent(""" + NAME="Ubuntu" + VERSION="20.04 LTS (Focal Fossa)" + ID=ubuntu + ID_LIKE=debian + PRETTY_NAME="Ubuntu 20.04 LTS" + VERSION_ID="20.04" + HOME_URL="https://www.ubuntu.com/" + SUPPORT_URL="https://help.ubuntu.com/" + BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/" + PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy" + VERSION_CODENAME=focal + UBUNTU_CODENAME=focal + """), + + # YumDnf + dedent(""" + NAME="CentOS Linux" + VERSION="8 (Core)" + ID="centos" + ID_LIKE="rhel fedora" + VERSION_ID="8" + PLATFORM_ID="platform:el8" + PRETTY_NAME="CentOS Linux 8 (Core)" + ANSI_COLOR="0;31" + CPE_NAME="cpe:/o:centos:centos:8" + HOME_URL="https://www.centos.org/" + BUG_REPORT_URL="https://bugs.centos.org/" + + CENTOS_MANTISBT_PROJECT="CentOS-8" + CENTOS_MANTISBT_PROJECT_VERSION="8" + REDHAT_SUPPORT_PRODUCT="centos" + REDHAT_SUPPORT_PRODUCT_VERSION="8" + """), + + # Zypper + dedent(""" + NAME="openSUSE Tumbleweed" + # VERSION="20210810" + ID="opensuse-tumbleweed" + ID_LIKE="opensuse suse" + VERSION_ID="20210810" + PRETTY_NAME="openSUSE Tumbleweed" + ANSI_COLOR="0;32" + CPE_NAME="cpe:/o:opensuse:tumbleweed:20210810" + BUG_REPORT_URL="https://bugs.opensuse.org" + HOME_URL="https://www.opensuse.org/" + DOCUMENTATION_URL="https://en.opensuse.org/Portal:Tumbleweed" + LOGO="distributor-logo" + """), + ]) + @mock.patch('cephadm.find_executable', return_value='foo') + def test_container_engine(self, _find_executable, os_release, cephadm_fs): + cephadm_fs.create_file('/etc/os-release', contents=os_release) + ctx = _cephadm.CephadmContext() + + ctx.container_engine = None + _cephadm.command_rm_repo(ctx) + + ctx.container_engine = mock_podman() + _cephadm.command_rm_repo(ctx) + + ctx.container_engine = mock_docker() + _cephadm.command_rm_repo(ctx) + + +class TestValidateRepo: + + @pytest.mark.parametrize('values', + [ + # Apt - no checks + dict( + version="", + release="pacific", + err_text="", + os_release=dedent(""" + NAME="Ubuntu" + VERSION="20.04 LTS (Focal Fossa)" + ID=ubuntu + ID_LIKE=debian + PRETTY_NAME="Ubuntu 20.04 LTS" + VERSION_ID="20.04" + HOME_URL="https://www.ubuntu.com/" + SUPPORT_URL="https://help.ubuntu.com/" + BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/" + PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy" + VERSION_CODENAME=focal + UBUNTU_CODENAME=focal + """)), + + # YumDnf on Centos8 - OK + dict( + version="", + release="pacific", + err_text="", + os_release=dedent(""" + NAME="CentOS Linux" + VERSION="8 (Core)" + ID="centos" + ID_LIKE="rhel fedora" + VERSION_ID="8" + PLATFORM_ID="platform:el8" + PRETTY_NAME="CentOS Linux 8 (Core)" + ANSI_COLOR="0;31" + CPE_NAME="cpe:/o:centos:centos:8" + HOME_URL="https://www.centos.org/" + BUG_REPORT_URL="https://bugs.centos.org/" + + CENTOS_MANTISBT_PROJECT="CentOS-8" + CENTOS_MANTISBT_PROJECT_VERSION="8" + REDHAT_SUPPORT_PRODUCT="centos" + REDHAT_SUPPORT_PRODUCT_VERSION="8" + """)), + + # YumDnf on Fedora - Fedora not supported + dict( + version="", + release="pacific", + err_text="does not build Fedora", + os_release=dedent(""" + NAME="Fedora Linux" + VERSION="35 (Cloud Edition)" + ID=fedora + VERSION_ID=35 + VERSION_CODENAME="" + PLATFORM_ID="platform:f35" + PRETTY_NAME="Fedora Linux 35 (Cloud Edition)" + ANSI_COLOR="0;38;2;60;110;180" + LOGO=fedora-logo-icon + CPE_NAME="cpe:/o:fedoraproject:fedora:35" + HOME_URL="https://fedoraproject.org/" + DOCUMENTATION_URL="https://docs.fedoraproject.org/en-US/fedora/f35/system-administrators-guide/" + SUPPORT_URL="https://ask.fedoraproject.org/" + BUG_REPORT_URL="https://bugzilla.redhat.com/" + REDHAT_BUGZILLA_PRODUCT="Fedora" + REDHAT_BUGZILLA_PRODUCT_VERSION=35 + REDHAT_SUPPORT_PRODUCT="Fedora" + REDHAT_SUPPORT_PRODUCT_VERSION=35 + PRIVACY_POLICY_URL="https://fedoraproject.org/wiki/Legal:PrivacyPolicy" + VARIANT="Cloud Edition" + VARIANT_ID=cloud + """)), + + # YumDnf on Centos 7 - no pacific + dict( + version="", + release="pacific", + err_text="does not support pacific", + os_release=dedent(""" + NAME="CentOS Linux" + VERSION="7 (Core)" + ID="centos" + ID_LIKE="rhel fedora" + VERSION_ID="7" + PRETTY_NAME="CentOS Linux 7 (Core)" + ANSI_COLOR="0;31" + CPE_NAME="cpe:/o:centos:centos:7" + HOME_URL="https://www.centos.org/" + BUG_REPORT_URL="https://bugs.centos.org/" + + CENTOS_MANTISBT_PROJECT="CentOS-7" + CENTOS_MANTISBT_PROJECT_VERSION="7" + REDHAT_SUPPORT_PRODUCT="centos" + REDHAT_SUPPORT_PRODUCT_VERSION="7" + """)), + + # YumDnf on Centos 7 - nothing after pacific + dict( + version="", + release="zillions", + err_text="does not support pacific", + os_release=dedent(""" + NAME="CentOS Linux" + VERSION="7 (Core)" + ID="centos" + ID_LIKE="rhel fedora" + VERSION_ID="7" + PRETTY_NAME="CentOS Linux 7 (Core)" + ANSI_COLOR="0;31" + CPE_NAME="cpe:/o:centos:centos:7" + HOME_URL="https://www.centos.org/" + BUG_REPORT_URL="https://bugs.centos.org/" + + CENTOS_MANTISBT_PROJECT="CentOS-7" + CENTOS_MANTISBT_PROJECT_VERSION="7" + REDHAT_SUPPORT_PRODUCT="centos" + REDHAT_SUPPORT_PRODUCT_VERSION="7" + """)), + + # YumDnf on Centos 7 - nothing v16 or higher + dict( + version="v16.1.3", + release="", + err_text="does not support", + os_release=dedent(""" + NAME="CentOS Linux" + VERSION="7 (Core)" + ID="centos" + ID_LIKE="rhel fedora" + VERSION_ID="7" + PRETTY_NAME="CentOS Linux 7 (Core)" + ANSI_COLOR="0;31" + CPE_NAME="cpe:/o:centos:centos:7" + HOME_URL="https://www.centos.org/" + BUG_REPORT_URL="https://bugs.centos.org/" + + CENTOS_MANTISBT_PROJECT="CentOS-7" + CENTOS_MANTISBT_PROJECT_VERSION="7" + REDHAT_SUPPORT_PRODUCT="centos" + REDHAT_SUPPORT_PRODUCT_VERSION="7" + """)), + ]) + @mock.patch('cephadm.find_executable', return_value='foo') + def test_distro_validation(self, _find_executable, values, cephadm_fs): + os_release = values['os_release'] + release = values['release'] + version = values['version'] + err_text = values['err_text'] + + cephadm_fs.create_file('/etc/os-release', contents=os_release) + ctx = _cephadm.CephadmContext() + ctx.repo_url = 'http://localhost' + pkg = _cephadm.create_packager(ctx, stable=release, version=version) + + if err_text: + with pytest.raises(_cephadm.Error, match=err_text): + pkg.validate() + else: + with mock.patch('cephadm.urlopen', return_value=None): + pkg.validate() + + @pytest.mark.parametrize('values', + [ + # Apt - not checked + dict( + version="", + release="pacific", + err_text="", + os_release=dedent(""" + NAME="Ubuntu" + VERSION="20.04 LTS (Focal Fossa)" + ID=ubuntu + ID_LIKE=debian + PRETTY_NAME="Ubuntu 20.04 LTS" + VERSION_ID="20.04" + HOME_URL="https://www.ubuntu.com/" + SUPPORT_URL="https://help.ubuntu.com/" + BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/" + PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy" + VERSION_CODENAME=focal + UBUNTU_CODENAME=focal + """)), + + # YumDnf on Centos8 - force failure + dict( + version="", + release="foobar", + err_text="failed to fetch repository metadata", + os_release=dedent(""" + NAME="CentOS Linux" + VERSION="8 (Core)" + ID="centos" + ID_LIKE="rhel fedora" + VERSION_ID="8" + PLATFORM_ID="platform:el8" + PRETTY_NAME="CentOS Linux 8 (Core)" + ANSI_COLOR="0;31" + CPE_NAME="cpe:/o:centos:centos:8" + HOME_URL="https://www.centos.org/" + BUG_REPORT_URL="https://bugs.centos.org/" + + CENTOS_MANTISBT_PROJECT="CentOS-8" + CENTOS_MANTISBT_PROJECT_VERSION="8" + REDHAT_SUPPORT_PRODUCT="centos" + REDHAT_SUPPORT_PRODUCT_VERSION="8" + """)), + ]) + @mock.patch('cephadm.find_executable', return_value='foo') + @mock.patch('cephadm.logger') + def test_http_validation(self, _logger, _find_executable, values, cephadm_fs): + from urllib.error import HTTPError + + os_release = values['os_release'] + release = values['release'] + version = values['version'] + err_text = values['err_text'] + + cephadm_fs.create_file('/etc/os-release', contents=os_release) + ctx = _cephadm.CephadmContext() + ctx.repo_url = 'http://localhost' + pkg = _cephadm.create_packager(ctx, stable=release, version=version) + + with mock.patch('cephadm.urlopen') as _urlopen: + _urlopen.side_effect = HTTPError(ctx.repo_url, 404, "not found", None, fp=None) + if err_text: + with pytest.raises(_cephadm.Error, match=err_text): + pkg.validate() + else: + pkg.validate() + + +class TestPull: + + @mock.patch('time.sleep') + @mock.patch('cephadm.call', return_value=('', '', 0)) + @mock.patch('cephadm.get_image_info_from_inspect', return_value={}) + @mock.patch('cephadm.logger') + def test_error(self, _logger, _get_image_info_from_inspect, _call, _sleep): + ctx = _cephadm.CephadmContext() + ctx.container_engine = mock_podman() + ctx.insecure = False + + _call.return_value = ('', '', 0) + retval = _cephadm.command_pull(ctx) + assert retval == 0 + + err = 'maximum retries reached' + + _call.return_value = ('', 'foobar', 1) + with pytest.raises(_cephadm.Error) as e: + _cephadm.command_pull(ctx) + assert err not in str(e.value) + + _call.return_value = ('', 'net/http: TLS handshake timeout', 1) + with pytest.raises(_cephadm.Error) as e: + _cephadm.command_pull(ctx) + assert err in str(e.value) + + @mock.patch('cephadm.get_image_info_from_inspect', return_value={}) + @mock.patch('cephadm.infer_local_ceph_image', return_value='last_local_ceph_image') + def test_image(self, _infer_local_ceph_image, _get_image_info_from_inspect): + cmd = ['pull'] + with with_cephadm_ctx(cmd) as ctx: + retval = _cephadm.command_pull(ctx) + assert retval == 0 + assert ctx.image == _cephadm.DEFAULT_IMAGE + + with mock.patch.dict(os.environ, {"CEPHADM_IMAGE": 'cephadm_image_environ'}): + cmd = ['pull'] + with with_cephadm_ctx(cmd) as ctx: + retval = _cephadm.command_pull(ctx) + assert retval == 0 + assert ctx.image == 'cephadm_image_environ' + + cmd = ['--image', 'cephadm_image_param', 'pull'] + with with_cephadm_ctx(cmd) as ctx: + retval = _cephadm.command_pull(ctx) + assert retval == 0 + assert ctx.image == 'cephadm_image_param' + + +class TestApplySpec: + + def test_extract_host_info_from_applied_spec(self, cephadm_fs): + yaml = '''--- +service_type: host +hostname: vm-00 +addr: 192.168.122.44 +labels: + - example1 + - example2 +--- +service_type: host +hostname: vm-01 +addr: 192.168.122.247 +labels: + - grafana +--- +service_type: host +hostname: vm-02 +--- +--- +service_type: rgw +service_id: myrgw +spec: + rgw_frontend_ssl_certificate: | + -----BEGIN PRIVATE KEY----- + V2VyIGRhcyBsaWVzdCBpc3QgZG9vZi4gTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFt + ZXQsIGNvbnNldGV0dXIgc2FkaXBzY2luZyBlbGl0ciwgc2VkIGRpYW0gbm9udW15 + IGVpcm1vZCB0ZW1wb3IgaW52aWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu + YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg + ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8= + -----END PRIVATE KEY----- + -----BEGIN CERTIFICATE----- + V2VyIGRhcyBsaWVzdCBpc3QgZG9vZi4gTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFt + ZXQsIGNvbnNldGV0dXIgc2FkaXBzY2luZyBlbGl0ciwgc2VkIGRpYW0gbm9udW15 + IGVpcm1vZCB0ZW1wb3IgaW52aWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu + YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg + ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8= + -----END CERTIFICATE----- + ssl: true +--- +''' + + cephadm_fs.create_file('spec.yml', contents=yaml) + retdic = [{'hostname': 'vm-00', 'addr': '192.168.122.44'}, + {'hostname': 'vm-01', 'addr': '192.168.122.247'}, + {'hostname': 'vm-02',}] + + with open('spec.yml') as f: + dic = _cephadm._extract_host_info_from_applied_spec(f) + assert dic == retdic + + @mock.patch('cephadm.call', return_value=('', '', 0)) + @mock.patch('cephadm.logger') + def test_distribute_ssh_keys(self, _logger, _call): + ctx = _cephadm.CephadmContext() + ctx.ssh_public_key = None + ctx.ssh_user = 'root' + + host_spec = {'service_type': 'host', 'hostname': 'vm-02', 'addr': '192.168.122.165'} + + retval = _cephadm._distribute_ssh_keys(ctx, host_spec, 'bootstrap_hostname') + + assert retval == 0 + + _call.return_value = ('', '', 1) + + retval = _cephadm._distribute_ssh_keys(ctx, host_spec, 'bootstrap_hostname') + + assert retval == 1 + + +class TestSNMPGateway: + V2c_config = { + 'snmp_community': 'public', + 'destination': '192.168.1.10:162', + 'snmp_version': 'V2c', + } + V3_no_priv_config = { + 'destination': '192.168.1.10:162', + 'snmp_version': 'V3', + 'snmp_v3_auth_username': 'myuser', + 'snmp_v3_auth_password': 'mypassword', + 'snmp_v3_auth_protocol': 'SHA', + 'snmp_v3_engine_id': '8000C53F00000000', + } + V3_priv_config = { + 'destination': '192.168.1.10:162', + 'snmp_version': 'V3', + 'snmp_v3_auth_username': 'myuser', + 'snmp_v3_auth_password': 'mypassword', + 'snmp_v3_auth_protocol': 'SHA', + 'snmp_v3_priv_protocol': 'DES', + 'snmp_v3_priv_password': 'mysecret', + 'snmp_v3_engine_id': '8000C53F00000000', + } + no_destination_config = { + 'snmp_version': 'V3', + 'snmp_v3_auth_username': 'myuser', + 'snmp_v3_auth_password': 'mypassword', + 'snmp_v3_auth_protocol': 'SHA', + 'snmp_v3_priv_protocol': 'DES', + 'snmp_v3_priv_password': 'mysecret', + 'snmp_v3_engine_id': '8000C53F00000000', + } + bad_version_config = { + 'snmp_community': 'public', + 'destination': '192.168.1.10:162', + 'snmp_version': 'V1', + } + + def test_unit_run_V2c(self, cephadm_fs): + fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6' + with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx: + import json + ctx.config_json = json.dumps(self.V2c_config) + ctx.fsid = fsid + ctx.tcp_ports = '9464' + _cephadm.get_parm.return_value = self.V2c_config + c = _cephadm.get_container(ctx, fsid, 'snmp-gateway', 'daemon_id') + + _cephadm.make_data_dir(ctx, fsid, 'snmp-gateway', 'daemon_id') + + _cephadm.create_daemon_dirs(ctx, fsid, 'snmp-gateway', 'daemon_id', 0, 0) + with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/snmp-gateway.conf', 'r') as f: + conf = f.read().rstrip() + assert conf == 'SNMP_NOTIFIER_COMMUNITY=public' + + _cephadm.deploy_daemon_units( + ctx, + fsid, + 0, 0, + 'snmp-gateway', + 'daemon_id', + c, + True, True + ) + with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/unit.run', 'r') as f: + run_cmd = f.readlines()[-1].rstrip() + assert run_cmd.endswith('docker.io/maxwo/snmp-notifier:v1.2.1 --web.listen-address=:9464 --snmp.destination=192.168.1.10:162 --snmp.version=V2c --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl') + + def test_unit_run_V3_noPriv(self, cephadm_fs): + fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6' + with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx: + import json + ctx.config_json = json.dumps(self.V3_no_priv_config) + ctx.fsid = fsid + ctx.tcp_ports = '9465' + _cephadm.get_parm.return_value = self.V3_no_priv_config + c = _cephadm.get_container(ctx, fsid, 'snmp-gateway', 'daemon_id') + + _cephadm.make_data_dir(ctx, fsid, 'snmp-gateway', 'daemon_id') + + _cephadm.create_daemon_dirs(ctx, fsid, 'snmp-gateway', 'daemon_id', 0, 0) + with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/snmp-gateway.conf', 'r') as f: + conf = f.read() + assert conf == 'SNMP_NOTIFIER_AUTH_USERNAME=myuser\nSNMP_NOTIFIER_AUTH_PASSWORD=mypassword\n' + + _cephadm.deploy_daemon_units( + ctx, + fsid, + 0, 0, + 'snmp-gateway', + 'daemon_id', + c, + True, True + ) + with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/unit.run', 'r') as f: + run_cmd = f.readlines()[-1].rstrip() + assert run_cmd.endswith('docker.io/maxwo/snmp-notifier:v1.2.1 --web.listen-address=:9465 --snmp.destination=192.168.1.10:162 --snmp.version=V3 --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl --snmp.authentication-enabled --snmp.authentication-protocol=SHA --snmp.security-engine-id=8000C53F00000000') + + def test_unit_run_V3_Priv(self, cephadm_fs): + fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6' + with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx: + import json + ctx.config_json = json.dumps(self.V3_priv_config) + ctx.fsid = fsid + ctx.tcp_ports = '9464' + _cephadm.get_parm.return_value = self.V3_priv_config + c = _cephadm.get_container(ctx, fsid, 'snmp-gateway', 'daemon_id') + + _cephadm.make_data_dir(ctx, fsid, 'snmp-gateway', 'daemon_id') + + _cephadm.create_daemon_dirs(ctx, fsid, 'snmp-gateway', 'daemon_id', 0, 0) + with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/snmp-gateway.conf', 'r') as f: + conf = f.read() + assert conf == 'SNMP_NOTIFIER_AUTH_USERNAME=myuser\nSNMP_NOTIFIER_AUTH_PASSWORD=mypassword\nSNMP_NOTIFIER_PRIV_PASSWORD=mysecret\n' + + _cephadm.deploy_daemon_units( + ctx, + fsid, + 0, 0, + 'snmp-gateway', + 'daemon_id', + c, + True, True + ) + with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/unit.run', 'r') as f: + run_cmd = f.readlines()[-1].rstrip() + assert run_cmd.endswith('docker.io/maxwo/snmp-notifier:v1.2.1 --web.listen-address=:9464 --snmp.destination=192.168.1.10:162 --snmp.version=V3 --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl --snmp.authentication-enabled --snmp.authentication-protocol=SHA --snmp.security-engine-id=8000C53F00000000 --snmp.private-enabled --snmp.private-protocol=DES') + + def test_unit_run_no_dest(self, cephadm_fs): + fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6' + with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx: + import json + ctx.config_json = json.dumps(self.no_destination_config) + ctx.fsid = fsid + ctx.tcp_ports = '9464' + _cephadm.get_parm.return_value = self.no_destination_config + + with pytest.raises(Exception) as e: + c = _cephadm.get_container(ctx, fsid, 'snmp-gateway', 'daemon_id') + assert str(e.value) == "config is missing destination attribute(<ip>:<port>) of the target SNMP listener" + + def test_unit_run_bad_version(self, cephadm_fs): + fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6' + with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx: + import json + ctx.config_json = json.dumps(self.bad_version_config) + ctx.fsid = fsid + ctx.tcp_ports = '9464' + _cephadm.get_parm.return_value = self.bad_version_config + + with pytest.raises(Exception) as e: + c = _cephadm.get_container(ctx, fsid, 'snmp-gateway', 'daemon_id') + assert str(e.value) == 'not a valid snmp version: V1' + +class TestNetworkValidation: + + def test_ipv4_subnet(self): + rc, v, msg = _cephadm.check_subnet('192.168.1.0/24') + assert rc == 0 and v[0] == 4 + + def test_ipv4_subnet_list(self): + rc, v, msg = _cephadm.check_subnet('192.168.1.0/24,10.90.90.0/24') + assert rc == 0 and not msg + + def test_ipv4_subnet_list_with_spaces(self): + rc, v, msg = _cephadm.check_subnet('192.168.1.0/24, 10.90.90.0/24 ') + assert rc == 0 and not msg + + def test_ipv4_subnet_badlist(self): + rc, v, msg = _cephadm.check_subnet('192.168.1.0/24,192.168.1.1') + assert rc == 1 and msg + + def test_ipv4_subnet_mixed(self): + rc, v, msg = _cephadm.check_subnet('192.168.100.0/24,fe80::/64') + assert rc == 0 and v == [4,6] + + def test_ipv6_subnet(self): + rc, v, msg = _cephadm.check_subnet('fe80::/64') + assert rc == 0 and v[0] == 6 + + def test_subnet_mask_missing(self): + rc, v, msg = _cephadm.check_subnet('192.168.1.58') + assert rc == 1 and msg + + def test_subnet_mask_junk(self): + rc, v, msg = _cephadm.check_subnet('wah') + assert rc == 1 and msg + + def test_ip_in_subnet(self): + # valid ip and only one valid subnet + rc = _cephadm.ip_in_subnets('192.168.100.1', '192.168.100.0/24') + assert rc is True + + # valid ip and valid subnets list without spaces + rc = _cephadm.ip_in_subnets('192.168.100.1', '192.168.100.0/24,10.90.90.0/24') + assert rc is True + + # valid ip and valid subnets list with spaces + rc = _cephadm.ip_in_subnets('10.90.90.2', '192.168.1.0/24, 192.168.100.0/24, 10.90.90.0/24') + assert rc is True + + # valid ip that doesn't belong to any subnet + rc = _cephadm.ip_in_subnets('192.168.100.2', '192.168.50.0/24, 10.90.90.0/24') + assert rc is False + + # valid ip that doesn't belong to the subnet (only 14 hosts) + rc = _cephadm.ip_in_subnets('192.168.100.20', '192.168.100.0/28') + assert rc is False + + # valid ip and valid IPV6 network + rc = _cephadm.ip_in_subnets('fe80::5054:ff:fef4:873a', 'fe80::/64') + assert rc is True + + # valid wrapped ip and valid IPV6 network + rc = _cephadm.ip_in_subnets('[fe80::5054:ff:fef4:873a]', 'fe80::/64') + assert rc is True + + # valid ip and that doesn't belong to IPV6 network + rc = _cephadm.ip_in_subnets('fe80::5054:ff:fef4:873a', '2001:db8:85a3::/64') + assert rc is False + + # invalid IPv4 and valid subnets list + with pytest.raises(Exception): + rc = _cephadm.ip_in_sublets('10.90.200.', '192.168.1.0/24, 192.168.100.0/24, 10.90.90.0/24') + + # invalid IPv6 and valid subnets list + with pytest.raises(Exception): + rc = _cephadm.ip_in_sublets('fe80:2030:31:24', 'fe80::/64') + + @pytest.mark.parametrize("conf", [ + """[global] +public_network='1.1.1.0/24,2.2.2.0/24' +cluster_network="3.3.3.0/24, 4.4.4.0/24" +""", + """[global] +public_network=" 1.1.1.0/24,2.2.2.0/24 " +cluster_network=3.3.3.0/24, 4.4.4.0/24 +""", + """[global] + public_network= 1.1.1.0/24, 2.2.2.0/24 + cluster_network='3.3.3.0/24,4.4.4.0/24' +"""]) + @mock.patch('cephadm.list_networks') + @mock.patch('cephadm.logger') + def test_get_networks_from_conf(self, _logger, _list_networks, conf, cephadm_fs): + cephadm_fs.create_file('ceph.conf', contents=conf) + _list_networks.return_value = {'1.1.1.0/24': {'eth0': ['1.1.1.1']}, + '2.2.2.0/24': {'eth1': ['2.2.2.2']}, + '3.3.3.0/24': {'eth2': ['3.3.3.3']}, + '4.4.4.0/24': {'eth3': ['4.4.4.4']}} + ctx = _cephadm.CephadmContext() + ctx.config = 'ceph.conf' + ctx.mon_ip = '1.1.1.1' + ctx.cluster_network = None + # what the cephadm module does with the public network string is + # [x.strip() for x in out.split(',')] + # so we must make sure our output, through that alteration, + # generates correctly formatted networks + def _str_to_networks(s): + return [x.strip() for x in s.split(',')] + public_network = _cephadm.get_public_net_from_cfg(ctx) + assert _str_to_networks(public_network) == ['1.1.1.0/24', '2.2.2.0/24'] + cluster_network, ipv6 = _cephadm.prepare_cluster_network(ctx) + assert not ipv6 + assert _str_to_networks(cluster_network) == ['3.3.3.0/24', '4.4.4.0/24'] + +class TestSysctl: + @mock.patch('cephadm.sysctl_get') + def test_filter_sysctl_settings(self, _sysctl_get): + ctx = _cephadm.CephadmContext() + input = [ + # comment-only lines should be ignored + "# just a comment", + # As should whitespace-only lines", + " \t ", + " = \t ", + # inline comments are stripped when querying + "something = value # inline comment", + "fs.aio-max-nr = 1048576", + "kernel.pid_max = 4194304", + "vm.lowmem_reserve_ratio = 256\t256\t32\t0\t0", + " vm.max_map_count = 65530 ", + " vm.max_map_count = 65530 ", + ] + _sysctl_get.side_effect = [ + "value", + "1", + "4194304", + "256\t256\t32\t0\t0", + "65530", + "something else", + ] + result = _cephadm.filter_sysctl_settings(ctx, input) + assert len(_sysctl_get.call_args_list) == 6 + assert _sysctl_get.call_args_list[0].args[1] == "something" + assert _sysctl_get.call_args_list[1].args[1] == "fs.aio-max-nr" + assert _sysctl_get.call_args_list[2].args[1] == "kernel.pid_max" + assert _sysctl_get.call_args_list[3].args[1] == "vm.lowmem_reserve_ratio" + assert _sysctl_get.call_args_list[4].args[1] == "vm.max_map_count" + assert _sysctl_get.call_args_list[5].args[1] == "vm.max_map_count" + assert result == [ + "fs.aio-max-nr = 1048576", + " vm.max_map_count = 65530 ", + ] + +class TestJaeger: + single_es_node_conf = { + 'elasticsearch_nodes': 'http://192.168.0.1:9200'} + multiple_es_nodes_conf = { + 'elasticsearch_nodes': 'http://192.168.0.1:9200,http://192.168.0.2:9300'} + agent_conf = { + 'collector_nodes': 'test:14250'} + + def test_single_es(self, cephadm_fs): + fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6' + with with_cephadm_ctx(['--image=quay.io/jaegertracing/jaeger-collector:1.29'], list_networks={}) as ctx: + import json + ctx.config_json = json.dumps(self.single_es_node_conf) + ctx.fsid = fsid + c = _cephadm.get_container(ctx, fsid, 'jaeger-collector', 'daemon_id') + _cephadm.create_daemon_dirs(ctx, fsid, 'jaeger-collector', 'daemon_id', 0, 0) + _cephadm.deploy_daemon_units( + ctx, + fsid, + 0, 0, + 'jaeger-collector', + 'daemon_id', + c, + True, True + ) + with open(f'/var/lib/ceph/{fsid}/jaeger-collector.daemon_id/unit.run', 'r') as f: + run_cmd = f.readlines()[-1].rstrip() + assert run_cmd.endswith('SPAN_STORAGE_TYPE=elasticsearch -e ES_SERVER_URLS=http://192.168.0.1:9200 quay.io/jaegertracing/jaeger-collector:1.29') + + def test_multiple_es(self, cephadm_fs): + fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6' + with with_cephadm_ctx(['--image=quay.io/jaegertracing/jaeger-collector:1.29'], list_networks={}) as ctx: + import json + ctx.config_json = json.dumps(self.multiple_es_nodes_conf) + ctx.fsid = fsid + c = _cephadm.get_container(ctx, fsid, 'jaeger-collector', 'daemon_id') + _cephadm.create_daemon_dirs(ctx, fsid, 'jaeger-collector', 'daemon_id', 0, 0) + _cephadm.deploy_daemon_units( + ctx, + fsid, + 0, 0, + 'jaeger-collector', + 'daemon_id', + c, + True, True + ) + with open(f'/var/lib/ceph/{fsid}/jaeger-collector.daemon_id/unit.run', 'r') as f: + run_cmd = f.readlines()[-1].rstrip() + assert run_cmd.endswith('SPAN_STORAGE_TYPE=elasticsearch -e ES_SERVER_URLS=http://192.168.0.1:9200,http://192.168.0.2:9300 quay.io/jaegertracing/jaeger-collector:1.29') + + def test_jaeger_agent(self, cephadm_fs): + fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6' + with with_cephadm_ctx(['--image=quay.io/jaegertracing/jaeger-agent:1.29'], list_networks={}) as ctx: + import json + ctx.config_json = json.dumps(self.agent_conf) + ctx.fsid = fsid + c = _cephadm.get_container(ctx, fsid, 'jaeger-agent', 'daemon_id') + _cephadm.create_daemon_dirs(ctx, fsid, 'jaeger-agent', 'daemon_id', 0, 0) + _cephadm.deploy_daemon_units( + ctx, + fsid, + 0, 0, + 'jaeger-agent', + 'daemon_id', + c, + True, True + ) + with open(f'/var/lib/ceph/{fsid}/jaeger-agent.daemon_id/unit.run', 'r') as f: + run_cmd = f.readlines()[-1].rstrip() + assert run_cmd.endswith('quay.io/jaegertracing/jaeger-agent:1.29 --reporter.grpc.host-port=test:14250 --processor.jaeger-compact.server-host-port=6799') + +class TestRescan(fake_filesystem_unittest.TestCase): + + def setUp(self): + self.setUpPyfakefs() + if not fake_filesystem.is_root(): + fake_filesystem.set_uid(0) + + self.fs.create_dir('/sys/class') + self.ctx = _cephadm.CephadmContext() + self.ctx.func = _cephadm.command_rescan_disks + + @mock.patch('cephadm.logger') + def test_no_hbas(self, _logger): + out = _cephadm.command_rescan_disks(self.ctx) + assert out == 'Ok. No compatible HBAs found' + + @mock.patch('cephadm.logger') + def test_success(self, _logger): + self.fs.create_file('/sys/class/scsi_host/host0/scan') + self.fs.create_file('/sys/class/scsi_host/host1/scan') + out = _cephadm.command_rescan_disks(self.ctx) + assert out.startswith('Ok. 2 adapters detected: 2 rescanned, 0 skipped, 0 failed') + + @mock.patch('cephadm.logger') + def test_skip_usb_adapter(self, _logger): + self.fs.create_file('/sys/class/scsi_host/host0/scan') + self.fs.create_file('/sys/class/scsi_host/host1/scan') + self.fs.create_file('/sys/class/scsi_host/host1/proc_name', contents='usb-storage') + out = _cephadm.command_rescan_disks(self.ctx) + assert out.startswith('Ok. 2 adapters detected: 1 rescanned, 1 skipped, 0 failed') + + @mock.patch('cephadm.logger') + def test_skip_unknown_adapter(self, _logger): + self.fs.create_file('/sys/class/scsi_host/host0/scan') + self.fs.create_file('/sys/class/scsi_host/host1/scan') + self.fs.create_file('/sys/class/scsi_host/host1/proc_name', contents='unknown') + out = _cephadm.command_rescan_disks(self.ctx) + assert out.startswith('Ok. 2 adapters detected: 1 rescanned, 1 skipped, 0 failed') diff --git a/src/cephadm/tests/test_container_engine.py b/src/cephadm/tests/test_container_engine.py new file mode 100644 index 000000000..433f01270 --- /dev/null +++ b/src/cephadm/tests/test_container_engine.py @@ -0,0 +1,54 @@ +from unittest import mock + +import pytest + +from tests.fixtures import with_cephadm_ctx, import_cephadm + +_cephadm = import_cephadm() + + +def test_container_engine(): + with pytest.raises(NotImplementedError): + _cephadm.ContainerEngine() + + class PhonyContainerEngine(_cephadm.ContainerEngine): + EXE = "true" + + with mock.patch("cephadm.find_program") as find_program: + find_program.return_value = "/usr/bin/true" + pce = PhonyContainerEngine() + assert str(pce) == "true (/usr/bin/true)" + + +def test_podman(): + with mock.patch("cephadm.find_program") as find_program: + find_program.return_value = "/usr/bin/podman" + pm = _cephadm.Podman() + find_program.assert_called() + with pytest.raises(RuntimeError): + pm.version + with mock.patch("cephadm.call_throws") as call_throws: + call_throws.return_value = ("4.9.9", None, None) + with with_cephadm_ctx([]) as ctx: + pm.get_version(ctx) + assert pm.version == (4, 9, 9) + assert str(pm) == "podman (/usr/bin/podman) version 4.9.9" + + +def test_podman_badversion(): + with mock.patch("cephadm.find_program") as find_program: + find_program.return_value = "/usr/bin/podman" + pm = _cephadm.Podman() + find_program.assert_called() + with mock.patch("cephadm.call_throws") as call_throws: + call_throws.return_value = ("4.10.beta2", None, None) + with with_cephadm_ctx([]) as ctx: + with pytest.raises(ValueError): + pm.get_version(ctx) + + +def test_docker(): + with mock.patch("cephadm.find_program") as find_program: + find_program.return_value = "/usr/bin/docker" + docker = _cephadm.Docker() + assert str(docker) == "docker (/usr/bin/docker)" diff --git a/src/cephadm/tests/test_enclosure.py b/src/cephadm/tests/test_enclosure.py new file mode 100644 index 000000000..1ea419fb3 --- /dev/null +++ b/src/cephadm/tests/test_enclosure.py @@ -0,0 +1,72 @@ +import pytest + +from unittest import mock +from tests.fixtures import host_sysfs, import_cephadm + +_cephadm = import_cephadm() + + +@pytest.fixture +def enclosure(host_sysfs): + e = _cephadm.Enclosure( + enc_id='1', + enc_path='/sys/class/scsi_generic/sg2/device/enclosure/0:0:1:0', + dev_path='/sys/class/scsi_generic/sg2') + yield e + + +class TestEnclosure: + + def test_enc_metadata(self, enclosure): + """Check metadata for the enclosure e.g. vendor and model""" + + assert enclosure.vendor == "EnclosuresInc" + assert enclosure.components == '12' + assert enclosure.model == "D12" + assert enclosure.enc_id == '1' + + assert enclosure.ses_paths == ['sg2'] + assert enclosure.path_count == 1 + + def test_enc_slots(self, enclosure): + """Check slot count""" + + assert len(enclosure.slot_map) == 12 + + def test_enc_slot_format(self, enclosure): + """Check the attributes of a slot are as expected""" + + assert all(k in ['fault', 'locate', 'serial', 'status'] + for k, _v in enclosure.slot_map['0'].items()) + + def test_enc_slot_status(self, enclosure): + """Check the number of occupied slots is correct""" + + occupied_slots = [slot_id for slot_id in enclosure.slot_map + if enclosure.slot_map[slot_id].get('status').upper() == 'OK'] + + assert len(occupied_slots) == 6 + + def test_enc_disk_count(self, enclosure): + """Check the disks found matches the slot info""" + + assert len(enclosure.device_lookup) == 6 + assert enclosure.device_count == 6 + + def test_enc_device_serial(self, enclosure): + """Check the device serial numbers are as expected""" + + assert all(fake_serial in enclosure.device_lookup.keys() + for fake_serial in [ + 'fake000', + 'fake001', + 'fake002', + 'fake003', + 'fake004', + 'fake005']) + + def test_enc_slot_to_serial(self, enclosure): + """Check serial number to slot matches across slot_map and device_lookup""" + + for serial, slot in enclosure.device_lookup.items(): + assert enclosure.slot_map[slot].get('serial') == serial diff --git a/src/cephadm/tests/test_ingress.py b/src/cephadm/tests/test_ingress.py new file mode 100644 index 000000000..798c73708 --- /dev/null +++ b/src/cephadm/tests/test_ingress.py @@ -0,0 +1,350 @@ +from unittest import mock +import json + +import pytest + +from tests.fixtures import with_cephadm_ctx, cephadm_fs, import_cephadm + +_cephadm = import_cephadm() + +SAMPLE_UUID = "2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae" +SAMPLE_HAPROXY_IMAGE = "registry.example.net/haproxy/haproxy:latest" +SAMPLE_KEEPALIVED_IMAGE = "registry.example.net/keepalive/keepalived:latest" + + +def good_haproxy_json(): + return haproxy_json(files=True) + + +def haproxy_json(**kwargs): + if kwargs.get("files"): + return { + "files": { + "haproxy.cfg": "", + }, + } + return {} + + +def good_keepalived_json(): + return keepalived_json(files=True) + + +def keepalived_json(**kwargs): + if kwargs.get("files"): + return { + "files": { + "keepalived.conf": "", + }, + } + return {} + + +@pytest.mark.parametrize( + "args", + # args: <fsid>, <daemon_id>, <config_json>, <image> + [ + # fail due to: invalid fsid + (["foobar", "wilma", good_haproxy_json(), SAMPLE_HAPROXY_IMAGE]), + # fail due to: invalid daemon_id + ([SAMPLE_UUID, "", good_haproxy_json(), SAMPLE_HAPROXY_IMAGE]), + # fail due to: invalid image + ([SAMPLE_UUID, "wilma", good_haproxy_json(), ""]), + # fail due to: no files in config_json + ( + [ + SAMPLE_UUID, + "wilma", + haproxy_json(files=False), + SAMPLE_HAPROXY_IMAGE, + ] + ), + ], +) +def test_haproxy_validation_errors(args): + with pytest.raises(_cephadm.Error): + with with_cephadm_ctx([]) as ctx: + _cephadm.HAproxy(ctx, *args) + + +def test_haproxy_init(): + with with_cephadm_ctx([]) as ctx: + ctx.config_json = json.dumps(good_haproxy_json()) + ctx.image = SAMPLE_HAPROXY_IMAGE + hap = _cephadm.HAproxy.init( + ctx, + SAMPLE_UUID, + "wilma", + ) + assert hap.fsid == SAMPLE_UUID + assert hap.daemon_id == "wilma" + assert hap.image == SAMPLE_HAPROXY_IMAGE + + +def test_haproxy_container_mounts(): + with with_cephadm_ctx([]) as ctx: + hap = _cephadm.HAproxy( + ctx, + SAMPLE_UUID, + "wilma", + good_haproxy_json(), + SAMPLE_HAPROXY_IMAGE, + ) + cmounts = hap.get_container_mounts("/var/tmp") + assert len(cmounts) == 1 + assert cmounts["/var/tmp/haproxy"] == "/var/lib/haproxy" + + +def test_haproxy_get_daemon_name(): + with with_cephadm_ctx([]) as ctx: + hap = _cephadm.HAproxy( + ctx, + SAMPLE_UUID, + "wilma", + good_haproxy_json(), + SAMPLE_HAPROXY_IMAGE, + ) + assert hap.get_daemon_name() == "haproxy.wilma" + + +def test_haproxy_get_container_name(): + with with_cephadm_ctx([]) as ctx: + hap = _cephadm.HAproxy( + ctx, + SAMPLE_UUID, + "wilma", + good_haproxy_json(), + SAMPLE_HAPROXY_IMAGE, + ) + name1 = hap.get_container_name() + assert ( + name1 == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-haproxy.wilma" + ) + name2 = hap.get_container_name(desc="extra") + assert ( + name2 + == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-haproxy.wilma-extra" + ) + + +def test_haproxy_get_daemon_args(): + with with_cephadm_ctx([]) as ctx: + hap = _cephadm.HAproxy( + ctx, + SAMPLE_UUID, + "wilma", + good_haproxy_json(), + SAMPLE_HAPROXY_IMAGE, + ) + args = hap.get_daemon_args() + assert args == ["haproxy", "-f", "/var/lib/haproxy/haproxy.cfg"] + + +@mock.patch("cephadm.logger") +def test_haproxy_create_daemon_dirs(_logger, cephadm_fs): + with with_cephadm_ctx([]) as ctx: + hap = _cephadm.HAproxy( + ctx, + SAMPLE_UUID, + "wilma", + good_haproxy_json(), + SAMPLE_HAPROXY_IMAGE, + ) + with pytest.raises(OSError): + hap.create_daemon_dirs("/var/tmp", 45, 54) + cephadm_fs.create_dir("/var/tmp") + hap.create_daemon_dirs("/var/tmp", 45, 54) + # TODO: make assertions about the dirs created + + +def test_haproxy_extract_uid_gid_haproxy(): + with with_cephadm_ctx([]) as ctx: + hap = _cephadm.HAproxy( + ctx, + SAMPLE_UUID, + "wilma", + good_haproxy_json(), + SAMPLE_HAPROXY_IMAGE, + ) + with mock.patch("cephadm.CephContainer") as cc: + cc.return_value.run.return_value = "500 500" + uid, gid = hap.extract_uid_gid_haproxy() + cc.return_value.run.assert_called() + assert uid == 500 + assert gid == 500 + + +def test_haproxy_get_sysctl_settings(): + with with_cephadm_ctx([]) as ctx: + hap = _cephadm.HAproxy( + ctx, + SAMPLE_UUID, + "wilma", + good_haproxy_json(), + SAMPLE_HAPROXY_IMAGE, + ) + ss = hap.get_sysctl_settings() + assert len(ss) == 3 + + +@pytest.mark.parametrize( + "args", + # args: <fsid>, <daemon_id>, <config_json>, <image> + [ + # fail due to: invalid fsid + ( + [ + "foobar", + "barney", + good_keepalived_json(), + SAMPLE_KEEPALIVED_IMAGE, + ] + ), + # fail due to: invalid daemon_id + ([SAMPLE_UUID, "", good_keepalived_json(), SAMPLE_KEEPALIVED_IMAGE]), + # fail due to: invalid image + ([SAMPLE_UUID, "barney", good_keepalived_json(), ""]), + # fail due to: no files in config_json + ( + [ + SAMPLE_UUID, + "barney", + keepalived_json(files=False), + SAMPLE_KEEPALIVED_IMAGE, + ] + ), + ], +) +def test_keepalived_validation_errors(args): + with pytest.raises(_cephadm.Error): + with with_cephadm_ctx([]) as ctx: + _cephadm.Keepalived(ctx, *args) + + +def test_keepalived_init(): + with with_cephadm_ctx([]) as ctx: + ctx.config_json = json.dumps(good_keepalived_json()) + ctx.image = SAMPLE_KEEPALIVED_IMAGE + kad = _cephadm.Keepalived.init( + ctx, + SAMPLE_UUID, + "barney", + ) + assert kad.fsid == SAMPLE_UUID + assert kad.daemon_id == "barney" + assert kad.image == SAMPLE_KEEPALIVED_IMAGE + + +def test_keepalived_container_mounts(): + with with_cephadm_ctx([]) as ctx: + kad = _cephadm.Keepalived( + ctx, + SAMPLE_UUID, + "barney", + good_keepalived_json(), + SAMPLE_KEEPALIVED_IMAGE, + ) + cmounts = kad.get_container_mounts("/var/tmp") + assert len(cmounts) == 1 + assert ( + cmounts["/var/tmp/keepalived.conf"] + == "/etc/keepalived/keepalived.conf" + ) + + +def test_keepalived_get_daemon_name(): + with with_cephadm_ctx([]) as ctx: + kad = _cephadm.Keepalived( + ctx, + SAMPLE_UUID, + "barney", + good_keepalived_json(), + SAMPLE_KEEPALIVED_IMAGE, + ) + assert kad.get_daemon_name() == "keepalived.barney" + + +def test_keepalived_get_container_name(): + with with_cephadm_ctx([]) as ctx: + kad = _cephadm.Keepalived( + ctx, + SAMPLE_UUID, + "barney", + good_keepalived_json(), + SAMPLE_KEEPALIVED_IMAGE, + ) + name1 = kad.get_container_name() + assert ( + name1 + == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-keepalived.barney" + ) + name2 = kad.get_container_name(desc="extra") + assert ( + name2 + == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-keepalived.barney-extra" + ) + + +def test_keepalived_get_container_envs(): + with with_cephadm_ctx([]) as ctx: + kad = _cephadm.Keepalived( + ctx, + SAMPLE_UUID, + "barney", + good_keepalived_json(), + SAMPLE_KEEPALIVED_IMAGE, + ) + args = kad.get_container_envs() + assert args == [ + "KEEPALIVED_AUTOCONF=false", + "KEEPALIVED_CONF=/etc/keepalived/keepalived.conf", + "KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf", + "KEEPALIVED_DEBUG=false", + ] + + +@mock.patch("cephadm.logger") +def test_keepalived_create_daemon_dirs(_logger, cephadm_fs): + with with_cephadm_ctx([]) as ctx: + kad = _cephadm.Keepalived( + ctx, + SAMPLE_UUID, + "barney", + good_keepalived_json(), + SAMPLE_KEEPALIVED_IMAGE, + ) + with pytest.raises(OSError): + kad.create_daemon_dirs("/var/tmp", 45, 54) + cephadm_fs.create_dir("/var/tmp") + kad.create_daemon_dirs("/var/tmp", 45, 54) + # TODO: make assertions about the dirs created + + +def test_keepalived_extract_uid_gid_keepalived(): + with with_cephadm_ctx([]) as ctx: + kad = _cephadm.Keepalived( + ctx, + SAMPLE_UUID, + "barney", + good_keepalived_json(), + SAMPLE_KEEPALIVED_IMAGE, + ) + with mock.patch("cephadm.CephContainer") as cc: + cc.return_value.run.return_value = "500 500" + uid, gid = kad.extract_uid_gid_keepalived() + cc.return_value.run.assert_called() + assert uid == 500 + assert gid == 500 + + +def test_keepalived_get_sysctl_settings(): + with with_cephadm_ctx([]) as ctx: + kad = _cephadm.Keepalived( + ctx, + SAMPLE_UUID, + "barney", + good_keepalived_json(), + SAMPLE_KEEPALIVED_IMAGE, + ) + ss = kad.get_sysctl_settings() + assert len(ss) == 3 diff --git a/src/cephadm/tests/test_networks.py b/src/cephadm/tests/test_networks.py new file mode 100644 index 000000000..7c0575046 --- /dev/null +++ b/src/cephadm/tests/test_networks.py @@ -0,0 +1,233 @@ +import json +from textwrap import dedent +from unittest import mock + +import pytest + +from tests.fixtures import with_cephadm_ctx, cephadm_fs, import_cephadm + +_cephadm = import_cephadm() + + +class TestCommandListNetworks: + @pytest.mark.parametrize("test_input, expected", [ + ( + dedent(""" + default via 192.168.178.1 dev enxd89ef3f34260 proto dhcp metric 100 + 10.0.0.0/8 via 10.4.0.1 dev tun0 proto static metric 50 + 10.3.0.0/21 via 10.4.0.1 dev tun0 proto static metric 50 + 10.4.0.1 dev tun0 proto kernel scope link src 10.4.0.2 metric 50 + 137.1.0.0/16 via 10.4.0.1 dev tun0 proto static metric 50 + 138.1.0.0/16 via 10.4.0.1 dev tun0 proto static metric 50 + 139.1.0.0/16 via 10.4.0.1 dev tun0 proto static metric 50 + 140.1.0.0/17 via 10.4.0.1 dev tun0 proto static metric 50 + 141.1.0.0/16 via 10.4.0.1 dev tun0 proto static metric 50 + 172.16.100.34 via 172.16.100.34 dev eth1 proto kernel scope link src 172.16.100.34 + 192.168.122.1 dev ens3 proto dhcp scope link src 192.168.122.236 metric 100 + 169.254.0.0/16 dev docker0 scope link metric 1000 + 172.17.0.0/16 dev docker0 proto kernel scope link src 172.17.0.1 + 192.168.39.0/24 dev virbr1 proto kernel scope link src 192.168.39.1 linkdown + 192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 linkdown + 192.168.178.0/24 dev enxd89ef3f34260 proto kernel scope link src 192.168.178.28 metric 100 + 192.168.178.1 dev enxd89ef3f34260 proto static scope link metric 100 + 195.135.221.12 via 192.168.178.1 dev enxd89ef3f34260 proto static metric 100 + """), + { + '172.16.100.34/32': {'eth1': {'172.16.100.34'}}, + '192.168.122.1/32': {'ens3': {'192.168.122.236'}}, + '10.4.0.1/32': {'tun0': {'10.4.0.2'}}, + '172.17.0.0/16': {'docker0': {'172.17.0.1'}}, + '192.168.39.0/24': {'virbr1': {'192.168.39.1'}}, + '192.168.122.0/24': {'virbr0': {'192.168.122.1'}}, + '192.168.178.0/24': {'enxd89ef3f34260': {'192.168.178.28'}} + } + ), ( + dedent(""" + default via 10.3.64.1 dev eno1 proto static metric 100 + 10.3.64.0/24 dev eno1 proto kernel scope link src 10.3.64.23 metric 100 + 10.3.64.0/24 dev eno1 proto kernel scope link src 10.3.64.27 metric 100 + 10.88.0.0/16 dev cni-podman0 proto kernel scope link src 10.88.0.1 linkdown + 172.21.0.0/20 via 172.21.3.189 dev tun0 + 172.21.1.0/20 via 172.21.3.189 dev tun0 + 172.21.2.1 via 172.21.3.189 dev tun0 + 172.21.3.1 dev tun0 proto kernel scope link src 172.21.3.2 + 172.21.4.0/24 via 172.21.3.1 dev tun0 + 172.21.5.0/24 via 172.21.3.1 dev tun0 + 172.21.6.0/24 via 172.21.3.1 dev tun0 + 172.21.7.0/24 via 172.21.3.1 dev tun0 + 192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 linkdown + 192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 linkdown + 192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 linkdown + 192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 linkdown + """), + { + '10.3.64.0/24': {'eno1': {'10.3.64.23', '10.3.64.27'}}, + '10.88.0.0/16': {'cni-podman0': {'10.88.0.1'}}, + '172.21.3.1/32': {'tun0': {'172.21.3.2'}}, + '192.168.122.0/24': {'virbr0': {'192.168.122.1'}} + } + ), + ]) + def test_parse_ipv4_route(self, test_input, expected): + assert _cephadm._parse_ipv4_route(test_input) == expected + + @pytest.mark.parametrize("test_routes, test_ips, expected", [ + ( + dedent(""" + ::1 dev lo proto kernel metric 256 pref medium + fe80::/64 dev eno1 proto kernel metric 100 pref medium + fe80::/64 dev br-3d443496454c proto kernel metric 256 linkdown pref medium + fe80::/64 dev tun0 proto kernel metric 256 pref medium + fe80::/64 dev br-4355f5dbb528 proto kernel metric 256 pref medium + fe80::/64 dev docker0 proto kernel metric 256 linkdown pref medium + fe80::/64 dev cni-podman0 proto kernel metric 256 linkdown pref medium + fe80::/64 dev veth88ba1e8 proto kernel metric 256 pref medium + fe80::/64 dev vethb6e5fc7 proto kernel metric 256 pref medium + fe80::/64 dev vethaddb245 proto kernel metric 256 pref medium + fe80::/64 dev vethbd14d6b proto kernel metric 256 pref medium + fe80::/64 dev veth13e8fd2 proto kernel metric 256 pref medium + fe80::/64 dev veth1d3aa9e proto kernel metric 256 pref medium + fe80::/64 dev vethe485ca9 proto kernel metric 256 pref medium + """), + dedent(""" + 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 state UNKNOWN qlen 1000 + inet6 ::1/128 scope host + valid_lft forever preferred_lft forever + 2: eno1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000 + inet6 fe80::225:90ff:fee5:26e8/64 scope link noprefixroute + valid_lft forever preferred_lft forever + 6: br-3d443496454c: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 state DOWN + inet6 fe80::42:23ff:fe9d:ee4/64 scope link + valid_lft forever preferred_lft forever + 7: br-4355f5dbb528: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP + inet6 fe80::42:6eff:fe35:41fe/64 scope link + valid_lft forever preferred_lft forever + 8: docker0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 state DOWN + inet6 fe80::42:faff:fee6:40a0/64 scope link + valid_lft forever preferred_lft forever + 11: tun0: <POINTOPOINT,MULTICAST,NOARP,UP,LOWER_UP> mtu 1500 state UNKNOWN qlen 100 + inet6 fe80::98a6:733e:dafd:350/64 scope link stable-privacy + valid_lft forever preferred_lft forever + 28: cni-podman0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 state DOWN qlen 1000 + inet6 fe80::3449:cbff:fe89:b87e/64 scope link + valid_lft forever preferred_lft forever + 31: vethaddb245@if30: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP + inet6 fe80::90f7:3eff:feed:a6bb/64 scope link + valid_lft forever preferred_lft forever + 33: veth88ba1e8@if32: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP + inet6 fe80::d:f5ff:fe73:8c82/64 scope link + valid_lft forever preferred_lft forever + 35: vethbd14d6b@if34: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP + inet6 fe80::b44f:8ff:fe6f:813d/64 scope link + valid_lft forever preferred_lft forever + 37: vethb6e5fc7@if36: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP + inet6 fe80::4869:c6ff:feaa:8afe/64 scope link + valid_lft forever preferred_lft forever + 39: veth13e8fd2@if38: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP + inet6 fe80::78f4:71ff:fefe:eb40/64 scope link + valid_lft forever preferred_lft forever + 41: veth1d3aa9e@if40: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP + inet6 fe80::24bd:88ff:fe28:5b18/64 scope link + valid_lft forever preferred_lft forever + 43: vethe485ca9@if42: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP + inet6 fe80::6425:87ff:fe42:b9f0/64 scope link + valid_lft forever preferred_lft forever + """), + { + "fe80::/64": { + "eno1": {"fe80::225:90ff:fee5:26e8"}, + "br-3d443496454c": {"fe80::42:23ff:fe9d:ee4"}, + "tun0": {"fe80::98a6:733e:dafd:350"}, + "br-4355f5dbb528": {"fe80::42:6eff:fe35:41fe"}, + "docker0": {"fe80::42:faff:fee6:40a0"}, + "cni-podman0": {"fe80::3449:cbff:fe89:b87e"}, + "veth88ba1e8": {"fe80::d:f5ff:fe73:8c82"}, + "vethb6e5fc7": {"fe80::4869:c6ff:feaa:8afe"}, + "vethaddb245": {"fe80::90f7:3eff:feed:a6bb"}, + "vethbd14d6b": {"fe80::b44f:8ff:fe6f:813d"}, + "veth13e8fd2": {"fe80::78f4:71ff:fefe:eb40"}, + "veth1d3aa9e": {"fe80::24bd:88ff:fe28:5b18"}, + "vethe485ca9": {"fe80::6425:87ff:fe42:b9f0"}, + } + } + ), + ( + dedent(""" + ::1 dev lo proto kernel metric 256 pref medium + 2001:1458:301:eb::100:1a dev ens20f0 proto kernel metric 100 pref medium + 2001:1458:301:eb::/64 dev ens20f0 proto ra metric 100 pref medium + fd01:1458:304:5e::/64 dev ens20f0 proto ra metric 100 pref medium + fe80::/64 dev ens20f0 proto kernel metric 100 pref medium + default proto ra metric 100 + nexthop via fe80::46ec:ce00:b8a0:d3c8 dev ens20f0 weight 1 + nexthop via fe80::46ec:ce00:b8a2:33c8 dev ens20f0 weight 1 pref medium + """), + dedent(""" + 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 state UNKNOWN qlen 1000 + inet6 ::1/128 scope host + valid_lft forever preferred_lft forever + 2: ens20f0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000 + inet6 2001:1458:301:eb::100:1a/128 scope global dynamic noprefixroute + valid_lft 590879sec preferred_lft 590879sec + inet6 fe80::2e60:cff:fef8:da41/64 scope link noprefixroute + valid_lft forever preferred_lft forever + inet6 fe80::2e60:cff:fef8:da41/64 scope link noprefixroute + valid_lft forever preferred_lft forever + inet6 fe80::2e60:cff:fef8:da41/64 scope link noprefixroute + valid_lft forever preferred_lft forever + """), + { + '2001:1458:301:eb::100:1a/128': { + 'ens20f0': { + '2001:1458:301:eb::100:1a' + }, + }, + '2001:1458:301:eb::/64': { + 'ens20f0': set(), + }, + 'fe80::/64': { + 'ens20f0': {'fe80::2e60:cff:fef8:da41'}, + }, + 'fd01:1458:304:5e::/64': { + 'ens20f0': set() + }, + } + ), + ( + dedent(""" + ::1 dev lo proto kernel metric 256 pref medium + fe80::/64 dev ceph-brx proto kernel metric 256 pref medium + fe80::/64 dev brx.0 proto kernel metric 256 pref medium + default via fe80::327c:5e00:6487:71e0 dev enp3s0f1 proto ra metric 1024 expires 1790sec hoplimit 64 pref medium """), + dedent(""" + 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 state UNKNOWN qlen 1000 + inet6 ::1/128 scope host + valid_lft forever preferred_lft forever + 5: enp3s0f1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000 + inet6 fe80::ec4:7aff:fe8f:cb83/64 scope link noprefixroute + valid_lft forever preferred_lft forever + 6: ceph-brx: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000 + inet6 fe80::d8a1:69ff:fede:8f58/64 scope link + valid_lft forever preferred_lft forever + 7: brx.0@eno1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000 + inet6 fe80::a4cb:54ff:fecc:f2a2/64 scope link + valid_lft forever preferred_lft forever + """), + { + 'fe80::/64': { + 'brx.0': {'fe80::a4cb:54ff:fecc:f2a2'}, + 'ceph-brx': {'fe80::d8a1:69ff:fede:8f58'} + } + } + ), + ]) + def test_parse_ipv6_route(self, test_routes, test_ips, expected): + assert _cephadm._parse_ipv6_route(test_routes, test_ips) == expected + + @mock.patch.object(_cephadm, 'call_throws', return_value=('10.4.0.1 dev tun0 proto kernel scope link src 10.4.0.2 metric 50\n', '', '')) + def test_command_list_networks(self, cephadm_fs, capsys): + with with_cephadm_ctx([]) as ctx: + _cephadm.command_list_networks(ctx) + assert json.loads(capsys.readouterr().out) == { + '10.4.0.1/32': {'tun0': ['10.4.0.2']} + } diff --git a/src/cephadm/tests/test_nfs.py b/src/cephadm/tests/test_nfs.py new file mode 100644 index 000000000..0649ef934 --- /dev/null +++ b/src/cephadm/tests/test_nfs.py @@ -0,0 +1,239 @@ +from unittest import mock +import json + +import pytest + +from tests.fixtures import with_cephadm_ctx, cephadm_fs, import_cephadm + +_cephadm = import_cephadm() + + +SAMPLE_UUID = "2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae" + + +def good_nfs_json(): + return nfs_json( + pool=True, + files=True, + ) + + +def nfs_json(**kwargs): + result = {} + if kwargs.get("pool"): + result["pool"] = "party" + if kwargs.get("files"): + result["files"] = { + "ganesha.conf": "", + } + if kwargs.get("rgw_content"): + result["rgw"] = dict(kwargs["rgw_content"]) + elif kwargs.get("rgw"): + result["rgw"] = { + "keyring": "foobar", + "user": "jsmith", + } + return result + + +@pytest.mark.parametrize( + "args,kwargs", + # args: <fsid>, <daemon_id>, <config_json>; kwargs: <image> + [ + # fail due to: invalid fsid + (["foobar", "fred", good_nfs_json()], {}), + # fail due to: invalid daemon_id + ([SAMPLE_UUID, "", good_nfs_json()], {}), + # fail due to: invalid image + ( + [SAMPLE_UUID, "fred", good_nfs_json()], + {"image": ""}, + ), + # fail due to: no files in config_json + ( + [ + SAMPLE_UUID, + "fred", + nfs_json(pool=True), + ], + {}, + ), + # fail due to: no pool in config_json + ( + [ + SAMPLE_UUID, + "fred", + nfs_json(files=True), + ], + {}, + ), + # fail due to: bad rgw content + ( + [ + SAMPLE_UUID, + "fred", + nfs_json(pool=True, files=True, rgw_content={"foo": True}), + ], + {}, + ), + # fail due to: rgw keyring given but no user + ( + [ + SAMPLE_UUID, + "fred", + nfs_json( + pool=True, files=True, rgw_content={"keyring": "foo"} + ), + ], + {}, + ), + ], +) +def test_nfsganesha_validation_errors(args, kwargs): + with pytest.raises(_cephadm.Error): + with with_cephadm_ctx([]) as ctx: + _cephadm.NFSGanesha(ctx, *args, **kwargs) + + +def test_nfsganesha_init(): + with with_cephadm_ctx([]) as ctx: + ctx.config_json = json.dumps(good_nfs_json()) + ctx.image = "test_image" + nfsg = _cephadm.NFSGanesha.init( + ctx, + SAMPLE_UUID, + "fred", + ) + assert nfsg.fsid == SAMPLE_UUID + assert nfsg.daemon_id == "fred" + assert nfsg.pool == "party" + + +def test_nfsganesha_container_mounts(): + with with_cephadm_ctx([]) as ctx: + nfsg = _cephadm.NFSGanesha( + ctx, + SAMPLE_UUID, + "fred", + good_nfs_json(), + ) + cmounts = nfsg.get_container_mounts("/var/tmp") + assert len(cmounts) == 3 + assert cmounts["/var/tmp/config"] == "/etc/ceph/ceph.conf:z" + assert cmounts["/var/tmp/keyring"] == "/etc/ceph/keyring:z" + assert cmounts["/var/tmp/etc/ganesha"] == "/etc/ganesha:z" + + with with_cephadm_ctx([]) as ctx: + nfsg = _cephadm.NFSGanesha( + ctx, + SAMPLE_UUID, + "fred", + nfs_json(pool=True, files=True, rgw=True), + ) + cmounts = nfsg.get_container_mounts("/var/tmp") + assert len(cmounts) == 4 + assert cmounts["/var/tmp/config"] == "/etc/ceph/ceph.conf:z" + assert cmounts["/var/tmp/keyring"] == "/etc/ceph/keyring:z" + assert cmounts["/var/tmp/etc/ganesha"] == "/etc/ganesha:z" + assert ( + cmounts["/var/tmp/keyring.rgw"] + == "/var/lib/ceph/radosgw/ceph-jsmith/keyring:z" + ) + + +def test_nfsganesha_container_envs(): + with with_cephadm_ctx([]) as ctx: + nfsg = _cephadm.NFSGanesha( + ctx, + SAMPLE_UUID, + "fred", + good_nfs_json(), + ) + envs = nfsg.get_container_envs() + assert len(envs) == 1 + assert envs[0] == "CEPH_CONF=/etc/ceph/ceph.conf" + + +def test_nfsganesha_get_version(): + with with_cephadm_ctx([]) as ctx: + nfsg = _cephadm.NFSGanesha( + ctx, + SAMPLE_UUID, + "fred", + good_nfs_json(), + ) + + with mock.patch("cephadm.call") as _call: + _call.return_value = ("NFS-Ganesha Release = V100", "", 0) + ver = nfsg.get_version(ctx, "fake_version") + _call.assert_called() + assert ver == "100" + + +def test_nfsganesha_get_daemon_name(): + with with_cephadm_ctx([]) as ctx: + nfsg = _cephadm.NFSGanesha( + ctx, + SAMPLE_UUID, + "fred", + good_nfs_json(), + ) + assert nfsg.get_daemon_name() == "nfs.fred" + + +def test_nfsganesha_get_container_name(): + with with_cephadm_ctx([]) as ctx: + nfsg = _cephadm.NFSGanesha( + ctx, + SAMPLE_UUID, + "fred", + good_nfs_json(), + ) + name1 = nfsg.get_container_name() + assert name1 == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-nfs.fred" + name2 = nfsg.get_container_name(desc="extra") + assert ( + name2 == "ceph-2d018a3f-8a8f-4cb9-a7cf-48bebb2cbaae-nfs.fred-extra" + ) + + +def test_nfsganesha_get_daemon_args(): + with with_cephadm_ctx([]) as ctx: + nfsg = _cephadm.NFSGanesha( + ctx, + SAMPLE_UUID, + "fred", + good_nfs_json(), + ) + args = nfsg.get_daemon_args() + assert args == ["-F", "-L", "STDERR"] + + +@mock.patch("cephadm.logger") +def test_nfsganesha_create_daemon_dirs(_logger, cephadm_fs): + with with_cephadm_ctx([]) as ctx: + nfsg = _cephadm.NFSGanesha( + ctx, + SAMPLE_UUID, + "fred", + good_nfs_json(), + ) + with pytest.raises(OSError): + nfsg.create_daemon_dirs("/var/tmp", 45, 54) + cephadm_fs.create_dir("/var/tmp") + nfsg.create_daemon_dirs("/var/tmp", 45, 54) + # TODO: make assertions about the dirs created + + +@mock.patch("cephadm.logger") +def test_nfsganesha_create_daemon_dirs_rgw(_logger, cephadm_fs): + with with_cephadm_ctx([]) as ctx: + nfsg = _cephadm.NFSGanesha( + ctx, + SAMPLE_UUID, + "fred", + nfs_json(pool=True, files=True, rgw=True), + ) + cephadm_fs.create_dir("/var/tmp") + nfsg.create_daemon_dirs("/var/tmp", 45, 54) + # TODO: make assertions about the dirs created diff --git a/src/cephadm/tests/test_util_funcs.py b/src/cephadm/tests/test_util_funcs.py new file mode 100644 index 000000000..270753a55 --- /dev/null +++ b/src/cephadm/tests/test_util_funcs.py @@ -0,0 +1,808 @@ +# Tests for various assorted utility functions found within cephadm +# +from unittest import mock + +import functools +import io +import os +import sys + +import pytest + +from tests.fixtures import with_cephadm_ctx, import_cephadm + +_cephadm = import_cephadm() + + +class TestCopyTree: + def _copy_tree(self, *args, **kwargs): + with with_cephadm_ctx([]) as ctx: + with mock.patch("cephadm.extract_uid_gid") as eug: + eug.return_value = (os.getuid(), os.getgid()) + _cephadm.copy_tree(ctx, *args, **kwargs) + + def test_one_dir(self, tmp_path): + """Copy one dir into a non-existing dest dir.""" + src1 = tmp_path / "src1" + dst = tmp_path / "dst" + src1.mkdir(parents=True) + + with (src1 / "foo.txt").open("w") as fh: + fh.write("hello\n") + fh.write("earth\n") + + assert not (dst / "foo.txt").exists() + + self._copy_tree([src1], dst) + assert (dst / "foo.txt").exists() + + def test_one_existing_dir(self, tmp_path): + """Copy one dir into an existing dest dir.""" + src1 = tmp_path / "src1" + dst = tmp_path / "dst" + src1.mkdir(parents=True) + dst.mkdir(parents=True) + + with (src1 / "foo.txt").open("w") as fh: + fh.write("hello\n") + fh.write("earth\n") + + assert not (dst / "src1").exists() + + self._copy_tree([src1], dst) + assert (dst / "src1/foo.txt").exists() + + def test_two_dirs(self, tmp_path): + """Copy two source directories into an existing dest dir.""" + src1 = tmp_path / "src1" + src2 = tmp_path / "src2" + dst = tmp_path / "dst" + src1.mkdir(parents=True) + src2.mkdir(parents=True) + dst.mkdir(parents=True) + + with (src1 / "foo.txt").open("w") as fh: + fh.write("hello\n") + fh.write("earth\n") + with (src2 / "bar.txt").open("w") as fh: + fh.write("goodbye\n") + fh.write("mars\n") + + assert not (dst / "src1").exists() + assert not (dst / "src2").exists() + + self._copy_tree([src1, src2], dst) + assert (dst / "src1/foo.txt").exists() + assert (dst / "src2/bar.txt").exists() + + def test_one_dir_set_uid(self, tmp_path): + """Explicity pass uid/gid values and assert these are passed to chown.""" + # Because this test will often be run by non-root users it is necessary + # to mock os.chown or we too easily run into perms issues. + src1 = tmp_path / "src1" + dst = tmp_path / "dst" + src1.mkdir(parents=True) + + with (src1 / "foo.txt").open("w") as fh: + fh.write("hello\n") + fh.write("earth\n") + + assert not (dst / "foo.txt").exists() + + with mock.patch("os.chown") as _chown: + _chown.return_value = None + self._copy_tree([src1], dst, uid=0, gid=0) + assert len(_chown.mock_calls) >= 2 + for c in _chown.mock_calls: + assert c == mock.call(mock.ANY, 0, 0) + assert (dst / "foo.txt").exists() + + +class TestCopyFiles: + def _copy_files(self, *args, **kwargs): + with with_cephadm_ctx([]) as ctx: + with mock.patch("cephadm.extract_uid_gid") as eug: + eug.return_value = (os.getuid(), os.getgid()) + _cephadm.copy_files(ctx, *args, **kwargs) + + def test_one_file(self, tmp_path): + """Copy one file into the dest dir.""" + file1 = tmp_path / "f1.txt" + dst = tmp_path / "dst" + dst.mkdir(parents=True) + + with file1.open("w") as fh: + fh.write("its test time\n") + + self._copy_files([file1], dst) + assert (dst / "f1.txt").exists() + + def test_one_file_nodest(self, tmp_path): + """Copy one file to the given destination path.""" + file1 = tmp_path / "f1.txt" + dst = tmp_path / "dst" + + with file1.open("w") as fh: + fh.write("its test time\n") + + self._copy_files([file1], dst) + assert not dst.is_dir() + assert dst.is_file() + assert dst.open("r").read() == "its test time\n" + + def test_three_files(self, tmp_path): + """Copy one file into the dest dir.""" + file1 = tmp_path / "f1.txt" + file2 = tmp_path / "f2.txt" + file3 = tmp_path / "f3.txt" + dst = tmp_path / "dst" + dst.mkdir(parents=True) + + with file1.open("w") as fh: + fh.write("its test time\n") + with file2.open("w") as fh: + fh.write("f2\n") + with file3.open("w") as fh: + fh.write("f3\n") + + self._copy_files([file1, file2, file3], dst) + assert (dst / "f1.txt").exists() + assert (dst / "f2.txt").exists() + assert (dst / "f3.txt").exists() + + def test_three_files_nodest(self, tmp_path): + """Copy files to dest path (not a dir). This is not a useful operation.""" + file1 = tmp_path / "f1.txt" + file2 = tmp_path / "f2.txt" + file3 = tmp_path / "f3.txt" + dst = tmp_path / "dst" + + with file1.open("w") as fh: + fh.write("its test time\n") + with file2.open("w") as fh: + fh.write("f2\n") + with file3.open("w") as fh: + fh.write("f3\n") + + self._copy_files([file1, file2, file3], dst) + assert not dst.is_dir() + assert dst.is_file() + assert dst.open("r").read() == "f3\n" + + def test_one_file_set_uid(self, tmp_path): + """Explicity pass uid/gid values and assert these are passed to chown.""" + # Because this test will often be run by non-root users it is necessary + # to mock os.chown or we too easily run into perms issues. + file1 = tmp_path / "f1.txt" + dst = tmp_path / "dst" + dst.mkdir(parents=True) + + with file1.open("w") as fh: + fh.write("its test time\n") + + assert not (dst / "f1.txt").exists() + + with mock.patch("os.chown") as _chown: + _chown.return_value = None + self._copy_files([file1], dst, uid=0, gid=0) + assert len(_chown.mock_calls) >= 1 + for c in _chown.mock_calls: + assert c == mock.call(mock.ANY, 0, 0) + assert (dst / "f1.txt").exists() + + +class TestMoveFiles: + def _move_files(self, *args, **kwargs): + with with_cephadm_ctx([]) as ctx: + with mock.patch("cephadm.extract_uid_gid") as eug: + eug.return_value = (os.getuid(), os.getgid()) + _cephadm.move_files(ctx, *args, **kwargs) + + def test_one_file(self, tmp_path): + """Move a named file to test dest path.""" + file1 = tmp_path / "f1.txt" + dst = tmp_path / "dst" + + with file1.open("w") as fh: + fh.write("lets moove\n") + + assert not dst.exists() + assert file1.is_file() + + self._move_files([file1], dst) + assert dst.is_file() + assert not file1.exists() + + def test_one_file_destdir(self, tmp_path): + """Move a file into an existing dest dir.""" + file1 = tmp_path / "f1.txt" + dst = tmp_path / "dst" + dst.mkdir(parents=True) + + with file1.open("w") as fh: + fh.write("lets moove\n") + + assert not (dst / "f1.txt").exists() + assert file1.is_file() + + self._move_files([file1], dst) + assert (dst / "f1.txt").is_file() + assert not file1.exists() + + def test_one_file_one_link(self, tmp_path): + """Move a file and a symlink to that file to a dest dir.""" + file1 = tmp_path / "f1.txt" + link1 = tmp_path / "lnk" + dst = tmp_path / "dst" + dst.mkdir(parents=True) + + with file1.open("w") as fh: + fh.write("lets moove\n") + os.symlink("f1.txt", link1) + + assert not (dst / "f1.txt").exists() + assert file1.is_file() + assert link1.exists() + + self._move_files([file1, link1], dst) + assert (dst / "f1.txt").is_file() + assert (dst / "lnk").is_symlink() + assert not file1.exists() + assert not link1.exists() + assert (dst / "f1.txt").open("r").read() == "lets moove\n" + assert (dst / "lnk").open("r").read() == "lets moove\n" + + def test_one_file_set_uid(self, tmp_path): + """Explicity pass uid/gid values and assert these are passed to chown.""" + # Because this test will often be run by non-root users it is necessary + # to mock os.chown or we too easily run into perms issues. + file1 = tmp_path / "f1.txt" + dst = tmp_path / "dst" + + with file1.open("w") as fh: + fh.write("lets moove\n") + + assert not dst.exists() + assert file1.is_file() + + with mock.patch("os.chown") as _chown: + _chown.return_value = None + self._move_files([file1], dst, uid=0, gid=0) + assert len(_chown.mock_calls) >= 1 + for c in _chown.mock_calls: + assert c == mock.call(mock.ANY, 0, 0) + assert dst.is_file() + assert not file1.exists() + + +def test_recursive_chown(tmp_path): + d1 = tmp_path / "dir1" + d2 = d1 / "dir2" + f1 = d2 / "file1.txt" + d2.mkdir(parents=True) + + with f1.open("w") as fh: + fh.write("low down\n") + + with mock.patch("os.chown") as _chown: + _chown.return_value = None + _cephadm.recursive_chown(str(d1), uid=500, gid=500) + assert len(_chown.mock_calls) == 3 + assert _chown.mock_calls[0] == mock.call(str(d1), 500, 500) + assert _chown.mock_calls[1] == mock.call(str(d2), 500, 500) + assert _chown.mock_calls[2] == mock.call(str(f1), 500, 500) + + +class TestFindExecutable: + def test_standard_exe(self): + # pretty much every system will have `true` on the path. It's a safe choice + # for the first assertion + exe = _cephadm.find_executable("true") + assert exe.endswith("true") + + def test_custom_path(self, tmp_path): + foo_sh = tmp_path / "foo.sh" + with open(foo_sh, "w") as fh: + fh.write("#!/bin/sh\n") + fh.write("echo foo\n") + foo_sh.chmod(0o755) + + exe = _cephadm.find_executable(foo_sh) + assert str(exe) == str(foo_sh) + + def test_no_path(self, monkeypatch): + monkeypatch.delenv("PATH") + exe = _cephadm.find_executable("true") + assert exe.endswith("true") + + def test_no_path_no_confstr(self, monkeypatch): + def _fail(_): + raise ValueError("fail") + + monkeypatch.delenv("PATH") + monkeypatch.setattr("os.confstr", _fail) + exe = _cephadm.find_executable("true") + assert exe.endswith("true") + + def test_unset_path(self): + exe = _cephadm.find_executable("true", path="") + assert exe is None + + def test_no_such_exe(self): + exe = _cephadm.find_executable("foo_bar-baz.noway") + assert exe is None + + +def test_find_program(): + exe = _cephadm.find_program("true") + assert exe.endswith("true") + + with pytest.raises(ValueError): + _cephadm.find_program("foo_bar-baz.noway") + + +def _mk_fake_call(enabled, active): + def _fake_call(ctx, cmd, **kwargs): + if "is-enabled" in cmd: + if isinstance(enabled, Exception): + raise enabled + return enabled + if "is-active" in cmd: + if isinstance(active, Exception): + raise active + return active + raise ValueError("should not get here") + + return _fake_call + + +@pytest.mark.parametrize( + "enabled_out, active_out, expected", + [ + ( + # ok, all is well + ("", "", 0), + ("active", "", 0), + (True, "running", True), + ), + ( + # disabled, unknown if active + ("disabled", "", 1), + ("", "", 0), + (False, "unknown", True), + ), + ( + # is-enabled error (not disabled, unknown if active + ("bleh", "", 1), + ("", "", 0), + (False, "unknown", False), + ), + ( + # is-enabled ok, inactive is stopped + ("", "", 0), + ("inactive", "", 0), + (True, "stopped", True), + ), + ( + # is-enabled ok, failed is error + ("", "", 0), + ("failed", "", 0), + (True, "error", True), + ), + ( + # is-enabled ok, auto-restart is error + ("", "", 0), + ("auto-restart", "", 0), + (True, "error", True), + ), + ( + # error exec'ing is-enabled cmd + ValueError("bonk"), + ("active", "", 0), + (False, "running", False), + ), + ( + # error exec'ing is-enabled cmd + ("", "", 0), + ValueError("blat"), + (True, "unknown", True), + ), + ], +) +def test_check_unit(enabled_out, active_out, expected): + with with_cephadm_ctx([]) as ctx: + _cephadm.call.side_effect = _mk_fake_call( + enabled=enabled_out, + active=active_out, + ) + enabled, state, installed = _cephadm.check_unit(ctx, "foobar") + assert (enabled, state, installed) == expected + + +class FakeEnabler: + def __init__(self, should_be_called): + self._should_be_called = should_be_called + self._services = [] + + def enable_service(self, service): + self._services.append(service) + + def check_expected(self): + if not self._should_be_called: + assert not self._services + return + # there are currently seven chron/chrony type services that + # cephadm looks for. Make sure it probed for each of them + # or more in case someone adds to the list. + assert len(self._services) >= 7 + assert "chrony.service" in self._services + assert "ntp.service" in self._services + + +@pytest.mark.parametrize( + "call_fn, enabler, expected", + [ + # Test that time sync services are not enabled + ( + _mk_fake_call( + enabled=("", "", 1), + active=("", "", 1), + ), + None, + False, + ), + # Test that time sync service is enabled + ( + _mk_fake_call( + enabled=("", "", 0), + active=("active", "", 0), + ), + None, + True, + ), + # Test that time sync is not enabled, and try to enable them. + # This one needs to be not running, but installed in order to + # call the enabler. It should call the enabler with every known + # service name. + ( + _mk_fake_call( + enabled=("disabled", "", 1), + active=("", "", 1), + ), + FakeEnabler(True), + False, + ), + # Test that time sync is enabled, with an enabler passed which + # will check that the enabler was never called. + ( + _mk_fake_call( + enabled=("", "", 0), + active=("active", "", 0), + ), + FakeEnabler(False), + True, + ), + ], +) +def test_check_time_sync(call_fn, enabler, expected): + """The check_time_sync call actually checks if a time synchronization service + is enabled. It is also the only consumer of check_units. + """ + with with_cephadm_ctx([]) as ctx: + _cephadm.call.side_effect = call_fn + result = _cephadm.check_time_sync(ctx, enabler=enabler) + assert result == expected + if enabler is not None: + enabler.check_expected() + + +@pytest.mark.parametrize( + "content, expected", + [ + ( + """#JUNK + FOO=1 + """, + (None, None, None), + ), + ( + """# A sample from a real centos system +NAME="CentOS Stream" +VERSION="8" +ID="centos" +ID_LIKE="rhel fedora" +VERSION_ID="8" +PLATFORM_ID="platform:el8" +PRETTY_NAME="CentOS Stream 8" +ANSI_COLOR="0;31" +CPE_NAME="cpe:/o:centos:centos:8" +HOME_URL="https://centos.org/" +BUG_REPORT_URL="https://bugzilla.redhat.com/" +REDHAT_SUPPORT_PRODUCT="Red Hat Enterprise Linux 8" +REDHAT_SUPPORT_PRODUCT_VERSION="CentOS Stream" + """, + ("centos", "8", None), + ), + ( + """# Minimal but complete, made up vals +ID="hpec" +VERSION_ID="33" +VERSION_CODENAME="hpec nimda" + """, + ("hpec", "33", "hpec nimda"), + ), + ( + """# Minimal but complete, no quotes +ID=hpec +VERSION_ID=33 +VERSION_CODENAME=hpec nimda + """, + ("hpec", "33", "hpec nimda"), + ), + ], +) +def test_get_distro(monkeypatch, content, expected): + def _fake_open(*args, **kwargs): + return io.StringIO(content) + + monkeypatch.setattr("builtins.open", _fake_open) + assert _cephadm.get_distro() == expected + + +class FakeContext: + """FakeContext is a minimal type for passing as a ctx, when + with_cephadm_ctx is not appropriate (it enables too many mocks, etc). + """ + + timeout = 30 + + +def _has_non_zero_exit(clog): + assert any("Non-zero exit" in ll for _, _, ll in clog.record_tuples) + + +def _has_values_somewhere(clog, values, non_zero=True): + if non_zero: + _has_non_zero_exit(clog) + for value in values: + assert any(value in ll for _, _, ll in clog.record_tuples) + + +@pytest.mark.parametrize( + "pyline, expected, call_kwargs, log_check", + [ + pytest.param( + "import time; time.sleep(0.1)", + ("", "", 0), + {}, + None, + id="brief-sleep", + ), + pytest.param( + "import sys; sys.exit(2)", + ("", "", 2), + {}, + _has_non_zero_exit, + id="exit-non-zero", + ), + pytest.param( + "import sys; sys.exit(0)", + ("", "", 0), + {"desc": "success"}, + None, + id="success-with-desc", + ), + pytest.param( + "print('foo'); print('bar')", + ("foo\nbar\n", "", 0), + {"desc": "stdout"}, + None, + id="stdout-print", + ), + pytest.param( + "import sys; sys.stderr.write('la\\nla\\nla\\n')", + ("", "la\nla\nla\n", 0), + {"desc": "stderr"}, + None, + id="stderr-print", + ), + pytest.param( + "for i in range(501): print(i, flush=True)", + lambda r: r[2] == 0 and r[1] == "" and "500" in r[0].splitlines(), + {}, + None, + id="stdout-long", + ), + pytest.param( + "for i in range(1000000): print(i, flush=True)", + lambda r: r[2] == 0 + and r[1] == "" + and len(r[0].splitlines()) == 1000000, + {}, + None, + id="stdout-very-long", + ), + pytest.param( + "import sys; sys.stderr.write('pow\\noof\\nouch\\n'); sys.exit(1)", + ("", "pow\noof\nouch\n", 1), + {"desc": "stderr"}, + functools.partial( + _has_values_somewhere, + values=["pow", "oof", "ouch"], + non_zero=True, + ), + id="stderr-logged-non-zero", + ), + pytest.param( + "import time; time.sleep(4)", + ("", "", 124), + {"timeout": 1}, + None, + id="long-sleep", + ), + pytest.param( + "import time\nfor i in range(100):\n\tprint(i, flush=True); time.sleep(0.01)", + ("", "", 124), + {"timeout": 0.5}, + None, + id="slow-print-timeout", + ), + # Commands that time out collect no logs, return empty std{out,err} strings + ], +) +def test_call(caplog, monkeypatch, pyline, expected, call_kwargs, log_check): + import logging + + caplog.set_level(logging.INFO) + monkeypatch.setattr("cephadm.logger", logging.getLogger()) + ctx = FakeContext() + result = _cephadm.call(ctx, [sys.executable, "-c", pyline], **call_kwargs) + if callable(expected): + assert expected(result) + else: + assert result == expected + if callable(log_check): + log_check(caplog) + + +class TestWriteNew: + def test_success(self, tmp_path): + "Test the simple basic feature of writing a file." + dest = tmp_path / "foo.txt" + with _cephadm.write_new(dest) as fh: + fh.write("something\n") + fh.write("something else\n") + + with open(dest, "r") as fh: + assert fh.read() == "something\nsomething else\n" + + def test_write_ower_mode(self, tmp_path): + "Test that the owner and perms options function." + dest = tmp_path / "foo.txt" + + # if this is test run as non-root, we can't really change ownership + uid = os.getuid() + gid = os.getgid() + + with _cephadm.write_new(dest, owner=(uid, gid), perms=0o600) as fh: + fh.write("xomething\n") + fh.write("xomething else\n") + + with open(dest, "r") as fh: + assert fh.read() == "xomething\nxomething else\n" + sr = os.fstat(fh.fileno()) + assert sr.st_uid == uid + assert sr.st_gid == gid + assert (sr.st_mode & 0o777) == 0o600 + + def test_encoding(self, tmp_path): + "Test that the encoding option functions." + dest = tmp_path / "foo.txt" + msg = "\u2603\u26C5\n" + with _cephadm.write_new(dest, encoding='utf-8') as fh: + fh.write(msg) + with open(dest, "rb") as fh: + b1 = fh.read() + assert b1.decode('utf-8') == msg + + dest = tmp_path / "foo2.txt" + with _cephadm.write_new(dest, encoding='utf-16le') as fh: + fh.write(msg) + with open(dest, "rb") as fh: + b2 = fh.read() + assert b2.decode('utf-16le') == msg + + # the binary data should differ due to the different encodings + assert b1 != b2 + + def test_cleanup(self, tmp_path): + "Test that an exception during write leaves no file behind." + dest = tmp_path / "foo.txt" + with pytest.raises(ValueError): + with _cephadm.write_new(dest) as fh: + fh.write("hello\n") + raise ValueError("foo") + fh.write("world\n") + assert not dest.exists() + assert not dest.with_name(dest.name+".new").exists() + assert list(dest.parent.iterdir()) == [] + + +class CompareContext1: + cfg_data = { + "name": "mane", + "fsid": "foobar", + "image": "fake.io/noway/nohow:gndn", + "meta": { + "fruit": "banana", + "vegetable": "carrot", + }, + "params": { + "osd_fsid": "robble", + "tcp_ports": [404, 9999], + }, + "config_blobs": { + "alpha": {"sloop": "John B"}, + "beta": {"forest": "birch"}, + "gamma": {"forest": "pine"}, + }, + } + + def check(self, ctx): + assert ctx.name == 'mane' + assert ctx.fsid == 'foobar' + assert ctx.image == 'fake.io/noway/nohow:gndn' + assert ctx.meta_properties == {"fruit": "banana", "vegetable": "carrot"} + assert ctx.config_blobs == { + "alpha": {"sloop": "John B"}, + "beta": {"forest": "birch"}, + "gamma": {"forest": "pine"}, + } + assert ctx.osd_fsid == "robble" + assert ctx.tcp_ports == [404, 9999] + + +class CompareContext2: + cfg_data = { + "name": "cc2", + "fsid": "foobar", + "meta": { + "fruit": "banana", + "vegetable": "carrot", + }, + "params": {}, + "config_blobs": { + "alpha": {"sloop": "John B"}, + "beta": {"forest": "birch"}, + "gamma": {"forest": "pine"}, + }, + } + + def check(self, ctx): + assert ctx.name == 'cc2' + assert ctx.fsid == 'foobar' + assert ctx.image == 'quay.io/ceph/ceph:v18' + assert ctx.meta_properties == {"fruit": "banana", "vegetable": "carrot"} + assert ctx.config_blobs == { + "alpha": {"sloop": "John B"}, + "beta": {"forest": "birch"}, + "gamma": {"forest": "pine"}, + } + assert ctx.osd_fsid is None + assert ctx.tcp_ports is None + + +@pytest.mark.parametrize( + "cc", + [ + CompareContext1(), + CompareContext2(), + ], +) +def test_apply_deploy_config_to_ctx(cc, monkeypatch): + import logging + + monkeypatch.setattr("cephadm.logger", logging.getLogger()) + ctx = FakeContext() + _cephadm.apply_deploy_config_to_ctx(cc.cfg_data, ctx) + cc.check(ctx) diff --git a/src/cephadm/tox.ini b/src/cephadm/tox.ini new file mode 100644 index 000000000..2cbfca70f --- /dev/null +++ b/src/cephadm/tox.ini @@ -0,0 +1,77 @@ +[tox] +envlist = + py3 + mypy + fix + flake8 +skipsdist = true + +[flake8] +max-line-length = 100 +inline-quotes = ' +ignore = + E501, \ + W503, +exclude = + .tox, \ + .vagrant, \ + __pycache__, \ + *.pyc, \ + templates, \ + .eggs +statistics = True + +[autopep8] +addopts = + --max-line-length {[flake8]max-line-length} \ + --ignore "{[flake8]ignore}" \ + --exclude "{[flake8]exclude}" \ + --in-place \ + --recursive \ + --ignore-local-config + +[testenv] +skip_install=true +deps = + pyfakefs == 4.5.6 ; python_version < "3.7" + pyfakefs >= 5, < 6 ; python_version >= "3.7" + mock + pytest +commands=pytest {posargs} + +[testenv:mypy] +basepython = python3 +deps = + mypy + -c{toxinidir}/../mypy-constrains.txt +commands = mypy --config-file ../mypy.ini {posargs:cephadm.py} + +[testenv:fix] +basepython = python3 +deps = + autopep8 +commands = + python --version + autopep8 {[autopep8]addopts} {posargs: cephadm.py} + +[testenv:flake8] +basepython = python3 +allowlist_externals = bash +deps = + flake8 == 5.0.4 + flake8-quotes +commands = + flake8 --config=tox.ini {posargs:cephadm.py} + bash -c "test $(grep -c 'docker.io' cephadm.py) == 11" +# Downstream distributions may choose to alter this "docker.io" number, +# to make sure no new references to docker.io are creeping in unnoticed. + +# coverage env is intentionally left out of the envlist. It is here for developers +# to run locally to generate and review test coverage of cephadm. +[testenv:coverage] +skip_install=true +deps = + {[testenv]deps} + pytest-cov +commands = + pytest -v --cov=cephadm --cov-report=term-missing --cov-report=html {posargs} diff --git a/src/cephadm/vstart-cleanup.sh b/src/cephadm/vstart-cleanup.sh new file mode 100755 index 000000000..facbdd100 --- /dev/null +++ b/src/cephadm/vstart-cleanup.sh @@ -0,0 +1,6 @@ +#!/bin/sh -ex + +bin/ceph mon rm `hostname` +for f in `bin/ceph orch ls | grep -v NAME | awk '{print $1}'` ; do + bin/ceph orch rm $f --force +done diff --git a/src/cephadm/vstart-smoke.sh b/src/cephadm/vstart-smoke.sh new file mode 100755 index 000000000..ecdb59d18 --- /dev/null +++ b/src/cephadm/vstart-smoke.sh @@ -0,0 +1,86 @@ +#!/bin/bash -ex + +# this is a smoke test, meant to be run against vstart.sh. + +host="$(hostname)" + +bin/init-ceph stop || true +MON=1 OSD=1 MDS=0 MGR=1 ../src/vstart.sh -d -n -x -l --cephadm + +export CEPH_DEV=1 + +bin/ceph orch ls +bin/ceph orch apply mds foo 1 +bin/ceph orch ls | grep foo +while ! bin/ceph orch ps | grep mds.foo ; do sleep 1 ; done +bin/ceph orch ps + +bin/ceph orch host ls + +bin/ceph orch rm crash +! bin/ceph orch ls | grep crash +bin/ceph orch apply crash '*' +bin/ceph orch ls | grep crash + +while ! bin/ceph orch ps | grep crash ; do sleep 1 ; done +bin/ceph orch ps | grep crash.$host | grep running +bin/ceph orch ls | grep crash | grep 1/1 +bin/ceph orch daemon rm crash.$host +while ! bin/ceph orch ps | grep crash ; do sleep 1 ; done + +bin/ceph orch daemon stop crash.$host +bin/ceph orch daemon start crash.$host +bin/ceph orch daemon restart crash.$host +bin/ceph orch daemon reconfig crash.$host +bin/ceph orch daemon redeploy crash.$host + +bin/ceph orch host ls | grep $host +bin/ceph orch host label add $host fooxyz +bin/ceph orch host ls | grep $host | grep fooxyz +bin/ceph orch host label rm $host fooxyz +! bin/ceph orch host ls | grep $host | grep fooxyz +bin/ceph orch host set-addr $host $host + +bin/ceph cephadm check-host $host +#! bin/ceph cephadm check-host $host 1.2.3.4 +#bin/ceph orch host set-addr $host 1.2.3.4 +#! bin/ceph cephadm check-host $host +bin/ceph orch host set-addr $host $host +bin/ceph cephadm check-host $host + +bin/ceph orch apply mgr 1 +bin/ceph orch rm mgr --force # we don't want a mgr to take over for ours + +bin/ceph orch daemon add mon $host:127.0.0.1 + +while ! bin/ceph mon dump | grep 'epoch 2' ; do sleep 1 ; done + +bin/ceph orch apply rbd-mirror 1 + +bin/ceph orch apply node-exporter '*' +bin/ceph orch apply prometheus 1 +bin/ceph orch apply alertmanager 1 +bin/ceph orch apply grafana 1 + +while ! bin/ceph dashboard get-grafana-api-url | grep $host ; do sleep 1 ; done + +bin/ceph orch apply rgw foo --placement=1 + +bin/ceph orch ps +bin/ceph orch ls + +# clean up +bin/ceph orch rm mds.foo +bin/ceph orch rm rgw.myrealm.myzone +bin/ceph orch rm rbd-mirror +bin/ceph orch rm node-exporter +bin/ceph orch rm alertmanager +bin/ceph orch rm grafana +bin/ceph orch rm prometheus +bin/ceph orch rm crash + +bin/ceph mon rm $host +! bin/ceph orch daemon rm mon.$host +bin/ceph orch daemon rm mon.$host --force + +echo OK |