diff options
Diffstat (limited to 'src/pybind/mgr/cephadm')
68 files changed, 26980 insertions, 0 deletions
diff --git a/src/pybind/mgr/cephadm/.gitignore b/src/pybind/mgr/cephadm/.gitignore new file mode 100644 index 000000000..a273f8603 --- /dev/null +++ b/src/pybind/mgr/cephadm/.gitignore @@ -0,0 +1,2 @@ +.vagrant +ssh-config diff --git a/src/pybind/mgr/cephadm/HACKING.rst b/src/pybind/mgr/cephadm/HACKING.rst new file mode 100644 index 000000000..fa6ea9e1b --- /dev/null +++ b/src/pybind/mgr/cephadm/HACKING.rst @@ -0,0 +1,272 @@ +Development +=========== + + +There are multiple ways to set up a development environment for the SSH orchestrator. +In the following I'll use the `vstart` method. + +1) Make sure remoto is installed (0.35 or newer) + +2) Use vstart to spin up a cluster + + +:: + + # ../src/vstart.sh -n --cephadm + +*Note that when you specify `--cephadm` you have to have passwordless ssh access to localhost* + +It will add your ~/.ssh/id_rsa and ~/.ssh/id_rsa.pub to `mgr/ssh/ssh_identity_{key, pub}` +and add your $HOSTNAME to the list of known hosts. + +This will also enable the cephadm mgr module and enable it as the orchestrator backend. + +*Optional:* + +While the above is sufficient for most operations, you may want to add a second host to the mix. +There is `Vagrantfile` for creating a minimal cluster in `src/pybind/mgr/cephadm/`. + +If you wish to extend the one-node-localhost cluster to i.e. test more sophisticated OSD deployments you can follow the next steps: + +From within the `src/pybind/mgr/cephadm` directory. + + +1) Spawn VMs + +:: + + # vagrant up + +This will spawn three machines by default. +mon0, mgr0 and osd0 with 2 additional disks. + +You can change that by passing `MONS` (default: 1), `MGRS` (default: 1), `OSDS` (default: 1) and +`DISKS` (default: 2) environment variables to overwrite the defaults. In order to not always have +to set the environment variables you can now create as JSON see `./vagrant.config.example.json` +for details. + +If will also come with the necessary packages preinstalled as well as your ~/.ssh/id_rsa.pub key +injected. (to users root and vagrant; the cephadm-orchestrator currently connects as root) + + +2) Update the ssh-config + +The cephadm orchestrator needs to understand how to connect to the new node. Most likely the VM +isn't reachable with the default settings used: + +``` +Host * +User root +StrictHostKeyChecking no +``` + +You want to adjust this by retrieving an adapted ssh_config from Vagrant. + +:: + + # vagrant ssh-config > ssh-config + + +Now set the newly created config for Ceph. + +:: + + # ceph cephadm set-ssh-config -i <path_to_ssh_conf> + + +3) Add the new host + +Add the newly created host(s) to the inventory. + +:: + + + # ceph orch host add <host> + + +4) Verify the inventory + +You should see the hostname in the list. + +:: + + # ceph orch host ls + + +5) Verify the devices + +To verify all disks are set and in good shape look if all devices have been spawned +and can be found + +:: + + # ceph orch device ls + + +6) Make a snapshot of all your VMs! + +To not go the long way again the next time snapshot your VMs in order to revert them back +if they are dirty. + +In `this repository <https://github.com/Devp00l/vagrant-helper-scripts>`_ you can find two +scripts that will help you with doing a snapshot and reverting it, without having to manual +snapshot and revert each VM individually. + + +Understanding ``AsyncCompletion`` +================================= + +How can I store temporary variables? +------------------------------------ + +Let's imagine you want to write code similar to + +.. code:: python + + hosts = self.get_hosts() + inventory = self.get_inventory(hosts) + return self._create_osd(hosts, drive_group, inventory) + +That won't work, as ``get_hosts`` and ``get_inventory`` return objects +of type ``AsyncCompletion``. + +Now let's imaging a Python 3 world, where we can use ``async`` and +``await``. Then we actually can write this like so: + +.. code:: python + + hosts = await self.get_hosts() + inventory = await self.get_inventory(hosts) + return self._create_osd(hosts, drive_group, inventory) + +Let's use a simple example to make this clear: + +.. code:: python + + val = await func_1() + return func_2(val) + +As we're not yet in Python 3, we need to do write ``await`` manually by +calling ``orchestrator.Completion.then()``: + +.. code:: python + + func_1().then(lambda val: func_2(val)) + + # or + func_1().then(func_2) + +Now let's desugar the original example: + +.. code:: python + + hosts = await self.get_hosts() + inventory = await self.get_inventory(hosts) + return self._create_osd(hosts, drive_group, inventory) + +Now let's replace one ``async`` at a time: + +.. code:: python + + hosts = await self.get_hosts() + return self.get_inventory(hosts).then(lambda inventory: + self._create_osd(hosts, drive_group, inventory)) + +Then finally: + +.. code:: python + + self.get_hosts().then(lambda hosts: + self.get_inventory(hosts).then(lambda inventory: + self._create_osd(hosts, + drive_group, inventory))) + +This also works without lambdas: + +.. code:: python + + def call_inventory(hosts): + def call_create(inventory) + return self._create_osd(hosts, drive_group, inventory) + + return self.get_inventory(hosts).then(call_create) + + self.get_hosts(call_inventory) + +We should add support for ``await`` as soon as we're on Python 3. + +I want to call my function for every host! +------------------------------------------ + +Imagine you have a function that looks like so: + +.. code:: python + + @async_completion + def deploy_stuff(name, node): + ... + +And you want to call ``deploy_stuff`` like so: + +.. code:: python + + return [deploy_stuff(name, node) for node in nodes] + +This won't work as expected. The number of ``AsyncCompletion`` objects +created should be ``O(1)``. But there is a solution: +``@async_map_completion`` + +.. code:: python + + @async_map_completion + def deploy_stuff(name, node): + ... + + return deploy_stuff([(name, node) for node in nodes]) + +This way, we're only creating one ``AsyncCompletion`` object. Note that +you should not create new ``AsyncCompletion`` within ``deploy_stuff``, as +we're then no longer have ``O(1)`` completions: + +.. code:: python + + @async_completion + def other_async_function(): + ... + + @async_map_completion + def deploy_stuff(name, node): + return other_async_function() # wrong! + +Why do we need this? +-------------------- + +I've tried to look into making Completions composable by being able to +call one completion from another completion. I.e. making them re-usable +using Promises E.g.: + +.. code:: python + + >>> return self.get_hosts().then(self._create_osd) + +where ``get_hosts`` returns a Completion of list of hosts and +``_create_osd`` takes a list of hosts. + +The concept behind this is to store the computation steps explicit and +then explicitly evaluate the chain: + +.. code:: python + + p = Completion(on_complete=lambda x: x*2).then(on_complete=lambda x: str(x)) + p.finalize(2) + assert p.result = "4" + +or graphically: + +:: + + +---------------+ +-----------------+ + | | then | | + | lambda x: x*x | +--> | lambda x: str(x)| + | | | | + +---------------+ +-----------------+ diff --git a/src/pybind/mgr/cephadm/Vagrantfile b/src/pybind/mgr/cephadm/Vagrantfile new file mode 100644 index 000000000..638258c3a --- /dev/null +++ b/src/pybind/mgr/cephadm/Vagrantfile @@ -0,0 +1,66 @@ +# vi: set ft=ruby : +# +# In order to reduce the need of recreating all vagrant boxes everytime they +# get dirty, snapshot them and revert the snapshot of them instead. +# Two helpful scripts to do this easily can be found here: +# https://github.com/Devp00l/vagrant-helper-scripts + +require 'json' +configFileName = 'vagrant.config.json' +CONFIG = File.file?(configFileName) && JSON.parse(File.read(File.join(File.dirname(__FILE__), configFileName))) + +def getConfig(name, default) + down = name.downcase + up = name.upcase + CONFIG && CONFIG[down] ? CONFIG[down] : (ENV[up] ? ENV[up].to_i : default) +end + +OSDS = getConfig('OSDS', 1) +MGRS = getConfig('MGRS', 1) +MONS = getConfig('MONS', 1) +DISKS = getConfig('DISKS', 2) + +# Activate only for test purpose as it changes the output of each vagrant command link to get the ssh_config. +# puts "Your setup:","OSDs: #{OSDS}","MGRs: #{MGRS}","MONs: #{MONS}","Disks per OSD: #{DISKS}" + +Vagrant.configure("2") do |config| + config.vm.synced_folder ".", "/vagrant", disabled: true + config.vm.network "private_network", type: "dhcp" + config.vm.box = "centos/stream8" + + (0..MONS - 1).each do |i| + config.vm.define "mon#{i}" do |mon| + mon.vm.hostname = "mon#{i}" + end + end + (0..MGRS - 1).each do |i| + config.vm.define "mgr#{i}" do |mgr| + mgr.vm.hostname = "mgr#{i}" + end + end + (0..OSDS - 1).each do |i| + config.vm.define "osd#{i}" do |osd| + osd.vm.hostname = "osd#{i}" + osd.vm.provider :libvirt do |libvirt| + (0..DISKS - 1).each do |d| + # In ruby value.chr makes ASCII char from value + libvirt.storage :file, :size => '20G', :device => "vd#{(98+d).chr}#{i}" + end + end + end + end + + config.vm.provision "file", source: "~/.ssh/id_rsa.pub", destination: "~/.ssh/id_rsa.pub" + config.vm.provision "shell", inline: <<-SHELL + cat /home/vagrant/.ssh/id_rsa.pub >> /home/vagrant/.ssh/authorized_keys + sudo cp -r /home/vagrant/.ssh /root/.ssh + SHELL + + config.vm.provision "shell", inline: <<-SHELL + sudo yum install -y yum-utils + sudo yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm + sudo rpm --import 'https://download.ceph.com/keys/release.asc' + curl -L https://shaman.ceph.com/api/repos/ceph/main/latest/centos/8/repo/ | sudo tee /etc/yum.repos.d/shaman.repo + sudo yum install -y python36 podman cephadm libseccomp-devel + SHELL +end diff --git a/src/pybind/mgr/cephadm/__init__.py b/src/pybind/mgr/cephadm/__init__.py new file mode 100644 index 000000000..597d883f7 --- /dev/null +++ b/src/pybind/mgr/cephadm/__init__.py @@ -0,0 +1,10 @@ +from .module import CephadmOrchestrator + +__all__ = [ + "CephadmOrchestrator", +] + +import os +if 'UNITTEST' in os.environ: + import tests + __all__.append(tests.__name__) diff --git a/src/pybind/mgr/cephadm/agent.py b/src/pybind/mgr/cephadm/agent.py new file mode 100644 index 000000000..93a08cb34 --- /dev/null +++ b/src/pybind/mgr/cephadm/agent.py @@ -0,0 +1,471 @@ +try: + import cherrypy + from cherrypy._cpserver import Server +except ImportError: + # to avoid sphinx build crash + class Server: # type: ignore + pass + +import json +import logging +import socket +import ssl +import tempfile +import threading +import time + +from orchestrator import DaemonDescriptionStatus +from orchestrator._interface import daemon_type_to_service +from ceph.utils import datetime_now +from ceph.deployment.inventory import Devices +from ceph.deployment.service_spec import ServiceSpec, PlacementSpec +from cephadm.services.cephadmservice import CephadmDaemonDeploySpec +from cephadm.ssl_cert_utils import SSLCerts +from mgr_util import test_port_allocation, PortAlreadyInUse + +from typing import Any, Dict, List, Set, TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + + +def cherrypy_filter(record: logging.LogRecord) -> int: + blocked = [ + 'TLSV1_ALERT_DECRYPT_ERROR' + ] + msg = record.getMessage() + return not any([m for m in blocked if m in msg]) + + +logging.getLogger('cherrypy.error').addFilter(cherrypy_filter) +cherrypy.log.access_log.propagate = False + + +class AgentEndpoint: + + KV_STORE_AGENT_ROOT_CERT = 'cephadm_agent/root/cert' + KV_STORE_AGENT_ROOT_KEY = 'cephadm_agent/root/key' + + def __init__(self, mgr: "CephadmOrchestrator") -> None: + self.mgr = mgr + self.ssl_certs = SSLCerts() + self.server_port = 7150 + self.server_addr = self.mgr.get_mgr_ip() + + def configure_routes(self) -> None: + d = cherrypy.dispatch.RoutesDispatcher() + d.connect(name='host-data', route='/data/', + controller=self.host_data.POST, + conditions=dict(method=['POST'])) + cherrypy.tree.mount(None, '/', config={'/': {'request.dispatch': d}}) + + def configure_tls(self, server: Server) -> None: + old_cert = self.mgr.get_store(self.KV_STORE_AGENT_ROOT_CERT) + old_key = self.mgr.get_store(self.KV_STORE_AGENT_ROOT_KEY) + if old_cert and old_key: + self.ssl_certs.load_root_credentials(old_cert, old_key) + else: + self.ssl_certs.generate_root_cert(self.mgr.get_mgr_ip()) + self.mgr.set_store(self.KV_STORE_AGENT_ROOT_CERT, self.ssl_certs.get_root_cert()) + self.mgr.set_store(self.KV_STORE_AGENT_ROOT_KEY, self.ssl_certs.get_root_key()) + + host = self.mgr.get_hostname() + addr = self.mgr.get_mgr_ip() + server.ssl_certificate, server.ssl_private_key = self.ssl_certs.generate_cert_files(host, addr) + + def find_free_port(self) -> None: + max_port = self.server_port + 150 + while self.server_port <= max_port: + try: + test_port_allocation(self.server_addr, self.server_port) + self.host_data.socket_port = self.server_port + self.mgr.log.debug(f'Cephadm agent endpoint using {self.server_port}') + return + except PortAlreadyInUse: + self.server_port += 1 + self.mgr.log.error(f'Cephadm agent could not find free port in range {max_port - 150}-{max_port} and failed to start') + + def configure(self) -> None: + self.host_data = HostData(self.mgr, self.server_port, self.server_addr) + self.configure_tls(self.host_data) + self.configure_routes() + self.find_free_port() + + +class HostData(Server): + exposed = True + + def __init__(self, mgr: "CephadmOrchestrator", port: int, host: str): + self.mgr = mgr + super().__init__() + self.socket_port = port + self.socket_host = host + self.subscribe() + + def stop(self) -> None: + # we must call unsubscribe before stopping the server, + # otherwise the port is not released and we will get + # an exception when trying to restart it + self.unsubscribe() + super().stop() + + @cherrypy.tools.json_in() + @cherrypy.tools.json_out() + def POST(self) -> Dict[str, Any]: + data: Dict[str, Any] = cherrypy.request.json + results: Dict[str, Any] = {} + try: + self.check_request_fields(data) + except Exception as e: + results['result'] = f'Bad metadata: {e}' + self.mgr.log.warning(f'Received bad metadata from an agent: {e}') + else: + # if we got here, we've already verified the keyring of the agent. If + # host agent is reporting on is marked offline, it shouldn't be any more + self.mgr.offline_hosts_remove(data['host']) + results['result'] = self.handle_metadata(data) + return results + + def check_request_fields(self, data: Dict[str, Any]) -> None: + fields = '{' + ', '.join([key for key in data.keys()]) + '}' + if 'host' not in data: + raise Exception( + f'No host in metadata from agent ("host" field). Only received fields {fields}') + host = data['host'] + if host not in self.mgr.cache.get_hosts(): + raise Exception(f'Received metadata from agent on unknown hostname {host}') + if 'keyring' not in data: + raise Exception( + f'Agent on host {host} not reporting its keyring for validation ("keyring" field). Only received fields {fields}') + if host not in self.mgr.agent_cache.agent_keys: + raise Exception(f'No agent keyring stored for host {host}. Cannot verify agent') + if data['keyring'] != self.mgr.agent_cache.agent_keys[host]: + raise Exception(f'Got wrong keyring from agent on host {host}.') + if 'port' not in data: + raise Exception( + f'Agent on host {host} not reporting its listener port ("port" fields). Only received fields {fields}') + if 'ack' not in data: + raise Exception( + f'Agent on host {host} not reporting its counter value ("ack" field). Only received fields {fields}') + try: + int(data['ack']) + except Exception as e: + raise Exception( + f'Counter value from agent on host {host} could not be converted to an integer: {e}') + metadata_types = ['ls', 'networks', 'facts', 'volume'] + metadata_types_str = '{' + ', '.join(metadata_types) + '}' + if not all(item in data.keys() for item in metadata_types): + self.mgr.log.warning( + f'Agent on host {host} reported incomplete metadata. Not all of {metadata_types_str} were present. Received fields {fields}') + + def handle_metadata(self, data: Dict[str, Any]) -> str: + try: + host = data['host'] + self.mgr.agent_cache.agent_ports[host] = int(data['port']) + if host not in self.mgr.agent_cache.agent_counter: + self.mgr.agent_cache.agent_counter[host] = 1 + self.mgr.agent_helpers._request_agent_acks({host}) + res = f'Got metadata from agent on host {host} with no known counter entry. Starting counter at 1 and requesting new metadata' + self.mgr.log.debug(res) + return res + + # update timestamp of most recent agent update + self.mgr.agent_cache.agent_timestamp[host] = datetime_now() + + error_daemons_old = set([dd.name() for dd in self.mgr.cache.get_error_daemons()]) + daemon_count_old = len(self.mgr.cache.get_daemons_by_host(host)) + + up_to_date = False + + int_ack = int(data['ack']) + if int_ack == self.mgr.agent_cache.agent_counter[host]: + up_to_date = True + else: + # we got old counter value with message, inform agent of new timestamp + if not self.mgr.agent_cache.messaging_agent(host): + self.mgr.agent_helpers._request_agent_acks({host}) + self.mgr.log.debug( + f'Received old metadata from agent on host {host}. Requested up-to-date metadata.') + + if 'ls' in data and data['ls']: + self.mgr._process_ls_output(host, data['ls']) + self.mgr.update_failed_daemon_health_check() + if 'networks' in data and data['networks']: + self.mgr.cache.update_host_networks(host, data['networks']) + if 'facts' in data and data['facts']: + self.mgr.cache.update_host_facts(host, json.loads(data['facts'])) + if 'volume' in data and data['volume']: + ret = Devices.from_json(json.loads(data['volume'])) + self.mgr.cache.update_host_devices(host, ret.devices) + + if ( + error_daemons_old != set([dd.name() for dd in self.mgr.cache.get_error_daemons()]) + or daemon_count_old != len(self.mgr.cache.get_daemons_by_host(host)) + ): + self.mgr.log.debug( + f'Change detected in state of daemons from {host} agent metadata. Kicking serve loop') + self.mgr._kick_serve_loop() + + if up_to_date and ('ls' in data and data['ls']): + was_out_of_date = not self.mgr.cache.all_host_metadata_up_to_date() + self.mgr.cache.metadata_up_to_date[host] = True + if was_out_of_date and self.mgr.cache.all_host_metadata_up_to_date(): + self.mgr.log.debug( + 'New metadata from agent has made all hosts up to date. Kicking serve loop') + self.mgr._kick_serve_loop() + self.mgr.log.debug( + f'Received up-to-date metadata from agent on host {host}.') + + self.mgr.agent_cache.save_agent(host) + return 'Successfully processed metadata.' + + except Exception as e: + err_str = f'Failed to update metadata with metadata from agent on host {host}: {e}' + self.mgr.log.warning(err_str) + return err_str + + +class AgentMessageThread(threading.Thread): + def __init__(self, host: str, port: int, data: Dict[Any, Any], mgr: "CephadmOrchestrator", daemon_spec: Optional[CephadmDaemonDeploySpec] = None) -> None: + self.mgr = mgr + self.agent = mgr.http_server.agent + self.host = host + self.addr = self.mgr.inventory.get_addr(host) if host in self.mgr.inventory else host + self.port = port + self.data: str = json.dumps(data) + self.daemon_spec: Optional[CephadmDaemonDeploySpec] = daemon_spec + super().__init__(target=self.run) + + def run(self) -> None: + self.mgr.log.debug(f'Sending message to agent on host {self.host}') + self.mgr.agent_cache.sending_agent_message[self.host] = True + try: + assert self.agent + root_cert = self.agent.ssl_certs.get_root_cert() + root_cert_tmp = tempfile.NamedTemporaryFile() + root_cert_tmp.write(root_cert.encode('utf-8')) + root_cert_tmp.flush() + root_cert_fname = root_cert_tmp.name + + cert, key = self.agent.ssl_certs.generate_cert( + self.mgr.get_hostname(), self.mgr.get_mgr_ip()) + + cert_tmp = tempfile.NamedTemporaryFile() + cert_tmp.write(cert.encode('utf-8')) + cert_tmp.flush() + cert_fname = cert_tmp.name + + key_tmp = tempfile.NamedTemporaryFile() + key_tmp.write(key.encode('utf-8')) + key_tmp.flush() + key_fname = key_tmp.name + + ssl_ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH, cafile=root_cert_fname) + ssl_ctx.verify_mode = ssl.CERT_REQUIRED + ssl_ctx.check_hostname = True + ssl_ctx.load_cert_chain(cert_fname, key_fname) + except Exception as e: + self.mgr.log.error(f'Failed to get certs for connecting to agent: {e}') + self.mgr.agent_cache.sending_agent_message[self.host] = False + return + try: + bytes_len: str = str(len(self.data.encode('utf-8'))) + if len(bytes_len.encode('utf-8')) > 10: + raise Exception( + f'Message is too big to send to agent. Message size is {bytes_len} bytes!') + while len(bytes_len.encode('utf-8')) < 10: + bytes_len = '0' + bytes_len + except Exception as e: + self.mgr.log.error(f'Failed to get length of json payload: {e}') + self.mgr.agent_cache.sending_agent_message[self.host] = False + return + for retry_wait in [3, 5]: + try: + agent_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + secure_agent_socket = ssl_ctx.wrap_socket(agent_socket, server_hostname=self.addr) + secure_agent_socket.connect((self.addr, self.port)) + msg = (bytes_len + self.data) + secure_agent_socket.sendall(msg.encode('utf-8')) + agent_response = secure_agent_socket.recv(1024).decode() + self.mgr.log.debug(f'Received "{agent_response}" from agent on host {self.host}') + if self.daemon_spec: + self.mgr.agent_cache.agent_config_successfully_delivered(self.daemon_spec) + self.mgr.agent_cache.sending_agent_message[self.host] = False + return + except ConnectionError as e: + # if it's a connection error, possibly try to connect again. + # We could have just deployed agent and it might not be ready + self.mgr.log.debug( + f'Retrying connection to agent on {self.host} in {str(retry_wait)} seconds. Connection failed with: {e}') + time.sleep(retry_wait) + except Exception as e: + # if it's not a connection error, something has gone wrong. Give up. + self.mgr.log.error(f'Failed to contact agent on host {self.host}: {e}') + self.mgr.agent_cache.sending_agent_message[self.host] = False + return + self.mgr.log.error(f'Could not connect to agent on host {self.host}') + self.mgr.agent_cache.sending_agent_message[self.host] = False + return + + +class CephadmAgentHelpers: + def __init__(self, mgr: "CephadmOrchestrator"): + self.mgr: "CephadmOrchestrator" = mgr + self.agent = mgr.http_server.agent + + def _request_agent_acks(self, hosts: Set[str], increment: bool = False, daemon_spec: Optional[CephadmDaemonDeploySpec] = None) -> None: + for host in hosts: + if increment: + self.mgr.cache.metadata_up_to_date[host] = False + if host not in self.mgr.agent_cache.agent_counter: + self.mgr.agent_cache.agent_counter[host] = 1 + elif increment: + self.mgr.agent_cache.agent_counter[host] = self.mgr.agent_cache.agent_counter[host] + 1 + payload: Dict[str, Any] = {'counter': self.mgr.agent_cache.agent_counter[host]} + if daemon_spec: + payload['config'] = daemon_spec.final_config + message_thread = AgentMessageThread( + host, self.mgr.agent_cache.agent_ports[host], payload, self.mgr, daemon_spec) + message_thread.start() + + def _request_ack_all_not_up_to_date(self) -> None: + self.mgr.agent_helpers._request_agent_acks( + set([h for h in self.mgr.cache.get_hosts() if + (not self.mgr.cache.host_metadata_up_to_date(h) + and h in self.mgr.agent_cache.agent_ports and not self.mgr.agent_cache.messaging_agent(h))])) + + def _agent_down(self, host: str) -> bool: + # if host is draining or drained (has _no_schedule label) there should not + # be an agent deployed there and therefore we should return False + if self.mgr.cache.is_host_draining(host): + return False + # if we haven't deployed an agent on the host yet, don't say an agent is down + if not self.mgr.cache.get_daemons_by_type('agent', host=host): + return False + # if we don't have a timestamp, it's likely because of a mgr fail over. + # just set the timestamp to now. However, if host was offline before, we + # should not allow creating a new timestamp to cause it to be marked online + if host not in self.mgr.agent_cache.agent_timestamp: + if host in self.mgr.offline_hosts: + return False + self.mgr.agent_cache.agent_timestamp[host] = datetime_now() + # agent hasn't reported in down multiplier * it's refresh rate. Something is likely wrong with it. + down_mult: float = max(self.mgr.agent_down_multiplier, 1.5) + time_diff = datetime_now() - self.mgr.agent_cache.agent_timestamp[host] + if time_diff.total_seconds() > down_mult * float(self.mgr.agent_refresh_rate): + return True + return False + + def _update_agent_down_healthcheck(self, down_agent_hosts: List[str]) -> None: + self.mgr.remove_health_warning('CEPHADM_AGENT_DOWN') + if down_agent_hosts: + detail: List[str] = [] + down_mult: float = max(self.mgr.agent_down_multiplier, 1.5) + for agent in down_agent_hosts: + detail.append((f'Cephadm agent on host {agent} has not reported in ' + f'{down_mult * self.mgr.agent_refresh_rate} seconds. Agent is assumed ' + 'down and host may be offline.')) + for dd in [d for d in self.mgr.cache.get_daemons_by_type('agent') if d.hostname in down_agent_hosts]: + dd.status = DaemonDescriptionStatus.error + self.mgr.set_health_warning( + 'CEPHADM_AGENT_DOWN', + summary='%d Cephadm Agent(s) are not reporting. Hosts may be offline' % ( + len(down_agent_hosts)), + count=len(down_agent_hosts), + detail=detail, + ) + + # this function probably seems very unnecessary, but it makes it considerably easier + # to get the unit tests working. All unit tests that check which daemons were deployed + # or services setup would have to be individually changed to expect an agent service or + # daemons, OR we can put this in its own function then mock the function + def _apply_agent(self) -> None: + spec = ServiceSpec( + service_type='agent', + placement=PlacementSpec(host_pattern='*') + ) + self.mgr.spec_store.save(spec) + + def _handle_use_agent_setting(self) -> bool: + need_apply = False + if self.mgr.use_agent: + # on the off chance there are still agents hanging around from + # when we turned the config option off, we need to redeploy them + # we can tell they're in that state if we don't have a keyring for + # them in the host cache + for agent in self.mgr.cache.get_daemons_by_service('agent'): + if agent.hostname not in self.mgr.agent_cache.agent_keys: + self.mgr._schedule_daemon_action(agent.name(), 'redeploy') + if 'agent' not in self.mgr.spec_store: + self.mgr.agent_helpers._apply_agent() + need_apply = True + else: + if 'agent' in self.mgr.spec_store: + self.mgr.spec_store.rm('agent') + need_apply = True + self.mgr.agent_cache.agent_counter = {} + self.mgr.agent_cache.agent_timestamp = {} + self.mgr.agent_cache.agent_keys = {} + self.mgr.agent_cache.agent_ports = {} + return need_apply + + def _check_agent(self, host: str) -> bool: + down = False + try: + assert self.agent + assert self.agent.ssl_certs.get_root_cert() + except Exception: + self.mgr.log.debug( + f'Delaying checking agent on {host} until cephadm endpoint finished creating root cert') + return down + if self.mgr.agent_helpers._agent_down(host): + down = True + try: + agent = self.mgr.cache.get_daemons_by_type('agent', host=host)[0] + assert agent.daemon_id is not None + assert agent.hostname is not None + except Exception as e: + self.mgr.log.debug( + f'Could not retrieve agent on host {host} from daemon cache: {e}') + return down + try: + spec = self.mgr.spec_store.active_specs.get('agent', None) + deps = self.mgr._calc_daemon_deps(spec, 'agent', agent.daemon_id) + last_deps, last_config = self.mgr.agent_cache.get_agent_last_config_deps(host) + if not last_config or last_deps != deps: + # if root cert is the dep that changed, we must use ssh to reconfig + # so it's necessary to check this one specifically + root_cert_match = False + try: + root_cert = self.agent.ssl_certs.get_root_cert() + if last_deps and root_cert in last_deps: + root_cert_match = True + except Exception: + pass + daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(agent) + # we need to know the agent port to try to reconfig w/ http + # otherwise there is no choice but a full ssh reconfig + if host in self.mgr.agent_cache.agent_ports and root_cert_match and not down: + daemon_spec = self.mgr.cephadm_services[daemon_type_to_service( + daemon_spec.daemon_type)].prepare_create(daemon_spec) + self.mgr.agent_helpers._request_agent_acks( + hosts={daemon_spec.host}, + increment=True, + daemon_spec=daemon_spec, + ) + else: + self.mgr._daemon_action(daemon_spec, action='reconfig') + return down + except Exception as e: + self.mgr.log.debug( + f'Agent on host {host} not ready to have config and deps checked: {e}') + action = self.mgr.cache.get_scheduled_daemon_action(agent.hostname, agent.name()) + if action: + try: + daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(agent) + self.mgr._daemon_action(daemon_spec, action=action) + self.mgr.cache.rm_scheduled_daemon_action(agent.hostname, agent.name()) + except Exception as e: + self.mgr.log.debug( + f'Agent on host {host} not ready to {action}: {e}') + return down diff --git a/src/pybind/mgr/cephadm/autotune.py b/src/pybind/mgr/cephadm/autotune.py new file mode 100644 index 000000000..51c931cba --- /dev/null +++ b/src/pybind/mgr/cephadm/autotune.py @@ -0,0 +1,54 @@ +import logging +from typing import List, Optional, Callable, Any, Tuple + +from orchestrator._interface import DaemonDescription + +logger = logging.getLogger(__name__) + + +class MemoryAutotuner(object): + + min_size_by_type = { + 'mds': 4096 * 1048576, + 'mgr': 4096 * 1048576, + 'mon': 1024 * 1048576, + 'crash': 128 * 1048576, + 'keepalived': 128 * 1048576, + 'haproxy': 128 * 1048576, + } + default_size = 1024 * 1048576 + + def __init__( + self, + daemons: List[DaemonDescription], + config_get: Callable[[str, str], Any], + total_mem: int, + ): + self.daemons = daemons + self.config_get = config_get + self.total_mem = total_mem + + def tune(self) -> Tuple[Optional[int], List[str]]: + tuned_osds: List[str] = [] + total = self.total_mem + for d in self.daemons: + if d.daemon_type == 'mds': + total -= self.config_get(d.name(), 'mds_cache_memory_limit') + continue + if d.daemon_type != 'osd': + assert d.daemon_type + total -= max( + self.min_size_by_type.get(d.daemon_type, self.default_size), + d.memory_usage or 0 + ) + continue + if not self.config_get(d.name(), 'osd_memory_target_autotune'): + total -= self.config_get(d.name(), 'osd_memory_target') + continue + tuned_osds.append(d.name()) + if total < 0: + return None, [] + if not tuned_osds: + return None, [] + per = total // len(tuned_osds) + return int(per), tuned_osds diff --git a/src/pybind/mgr/cephadm/ceph.repo b/src/pybind/mgr/cephadm/ceph.repo new file mode 100644 index 000000000..6f710e7ce --- /dev/null +++ b/src/pybind/mgr/cephadm/ceph.repo @@ -0,0 +1,23 @@ +[ceph] +name=Ceph packages for $basearch +baseurl=https://download.ceph.com/rpm-mimic/el7/$basearch +enabled=1 +priority=2 +gpgcheck=1 +gpgkey=https://download.ceph.com/keys/release.asc + +[ceph-noarch] +name=Ceph noarch packages +baseurl=https://download.ceph.com/rpm-mimic/el7/noarch +enabled=1 +priority=2 +gpgcheck=1 +gpgkey=https://download.ceph.com/keys/release.asc + +[ceph-source] +name=Ceph source packages +baseurl=https://download.ceph.com/rpm-mimic/el7/SRPMS +enabled=0 +priority=2 +gpgcheck=1 +gpgkey=https://download.ceph.com/keys/release.asc diff --git a/src/pybind/mgr/cephadm/configchecks.py b/src/pybind/mgr/cephadm/configchecks.py new file mode 100644 index 000000000..b9dcb18f4 --- /dev/null +++ b/src/pybind/mgr/cephadm/configchecks.py @@ -0,0 +1,705 @@ +import json +import ipaddress +import logging + +from mgr_module import ServiceInfoT + +from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast, Tuple, Callable + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + +logger = logging.getLogger(__name__) + + +class HostFacts: + + def __init__(self) -> None: + self.arch: Optional[str] = None + self.bios_date: Optional[str] = None + self.bios_version: Optional[str] = None + self.cpu_cores: Optional[int] = None + self.cpu_count: Optional[int] = None + self.cpu_load: Optional[Dict[str, float]] = None + self.cpu_model: Optional[str] = None + self.cpu_threads: Optional[int] = None + self.flash_capacity: Optional[str] = None + self.flash_capacity_bytes: Optional[int] = None + self.flash_count: Optional[int] = None + self.flash_list: Optional[List[Dict[str, Any]]] = None + self.hdd_capacity: Optional[str] = None + self.hdd_capacity_bytes: Optional[int] = None + self.hdd_count: Optional[int] = None + self.hdd_list: Optional[List[Dict[str, Any]]] = None + self.hostname: Optional[str] = None + self.interfaces: Optional[Dict[str, Dict[str, Any]]] = None + self.kernel: Optional[str] = None + self.kernel_parameters: Optional[Dict[str, Any]] = None + self.kernel_security: Optional[Dict[str, str]] = None + self.memory_available_kb: Optional[int] = None + self.memory_free_kb: Optional[int] = None + self.memory_total_kb: Optional[int] = None + self.model: Optional[str] = None + self.nic_count: Optional[int] = None + self.operating_system: Optional[str] = None + self.subscribed: Optional[str] = None + self.system_uptime: Optional[float] = None + self.timestamp: Optional[float] = None + self.vendor: Optional[str] = None + self._valid = False + + def load_facts(self, json_data: Dict[str, Any]) -> None: + + if isinstance(json_data, dict): + keys = json_data.keys() + if all([k in keys for k in self.__dict__ if not k.startswith('_')]): + self._valid = True + for k in json_data.keys(): + if hasattr(self, k): + setattr(self, k, json_data[k]) + else: + self._valid = False + else: + self._valid = False + + def subnet_to_nic(self, subnet: str) -> Optional[str]: + ip_version = ipaddress.ip_network(subnet).version + logger.debug(f"subnet {subnet} is IP version {ip_version}") + interfaces = cast(Dict[str, Dict[str, Any]], self.interfaces) + nic = None + for iface in interfaces.keys(): + addr = '' + if ip_version == 4: + addr = interfaces[iface].get('ipv4_address', '') + else: + addr = interfaces[iface].get('ipv6_address', '') + if addr: + a = addr.split('/')[0] + if ipaddress.ip_address(a) in ipaddress.ip_network(subnet): + nic = iface + break + return nic + + +class SubnetLookup: + def __init__(self, subnet: str, hostname: str, mtu: str, speed: str): + self.subnet = subnet + self.mtu_map = { + mtu: [hostname] + } + self.speed_map = { + speed: [hostname] + } + + @ property + def host_list(self) -> List[str]: + hosts = [] + for mtu in self.mtu_map: + hosts.extend(self.mtu_map.get(mtu, [])) + return hosts + + def update(self, hostname: str, mtu: str, speed: str) -> None: + if mtu in self.mtu_map and hostname not in self.mtu_map[mtu]: + self.mtu_map[mtu].append(hostname) + else: + self.mtu_map[mtu] = [hostname] + + if speed in self.speed_map and hostname not in self.speed_map[speed]: + self.speed_map[speed].append(hostname) + else: + self.speed_map[speed] = [hostname] + + def __repr__(self) -> str: + return json.dumps({ + "subnet": self.subnet, + "mtu_map": self.mtu_map, + "speed_map": self.speed_map + }) + + +class CephadmCheckDefinition: + def __init__(self, mgr: "CephadmOrchestrator", healthcheck_name: str, description: str, name: str, func: Callable) -> None: + self.mgr = mgr + self.log = logger + self.healthcheck_name = healthcheck_name + self.description = description + self.name = name + self.func = func + + @property + def status(self) -> str: + check_states: Dict[str, str] = {} + # Issuing a get each time, since the value could be set at the CLI + raw_states = self.mgr.get_store('config_checks') + if not raw_states: + self.log.error( + "config_checks setting is not defined - unable to determine healthcheck state") + return "Unknown" + + try: + check_states = json.loads(raw_states) + except json.JSONDecodeError: + self.log.error("Unable to serialize the config_checks settings to JSON") + return "Unavailable" + + return check_states.get(self.name, 'Missing') + + def to_json(self) -> Dict[str, Any]: + return { + "healthcheck_name": self.healthcheck_name, + "description": self.description, + "name": self.name, + "status": self.status, + "valid": True if self.func else False + } + + +class CephadmConfigChecks: + def __init__(self, mgr: "CephadmOrchestrator"): + self.mgr: "CephadmOrchestrator" = mgr + self.health_checks: List[CephadmCheckDefinition] = [ + CephadmCheckDefinition(mgr, "CEPHADM_CHECK_KERNEL_LSM", + "checks SELINUX/Apparmor profiles are consistent across cluster hosts", + "kernel_security", + self._check_kernel_lsm), + CephadmCheckDefinition(mgr, "CEPHADM_CHECK_SUBSCRIPTION", + "checks subscription states are consistent for all cluster hosts", + "os_subscription", + self._check_subscription), + CephadmCheckDefinition(mgr, "CEPHADM_CHECK_PUBLIC_MEMBERSHIP", + "check that all hosts have a NIC on the Ceph public_network", + "public_network", + self._check_public_network), + CephadmCheckDefinition(mgr, "CEPHADM_CHECK_MTU", + "check that OSD hosts share a common MTU setting", + "osd_mtu_size", + self._check_osd_mtu), + CephadmCheckDefinition(mgr, "CEPHADM_CHECK_LINKSPEED", + "check that OSD hosts share a common linkspeed", + "osd_linkspeed", + self._check_osd_linkspeed), + CephadmCheckDefinition(mgr, "CEPHADM_CHECK_NETWORK_MISSING", + "checks that the cluster/public networks defined exist on the Ceph hosts", + "network_missing", + self._check_network_missing), + CephadmCheckDefinition(mgr, "CEPHADM_CHECK_CEPH_RELEASE", + "check for Ceph version consistency - ceph daemons should be on the same release (unless upgrade is active)", + "ceph_release", + self._check_release_parity), + CephadmCheckDefinition(mgr, "CEPHADM_CHECK_KERNEL_VERSION", + "checks that the MAJ.MIN of the kernel on Ceph hosts is consistent", + "kernel_version", + self._check_kernel_version), + ] + self.log = logger + self.host_facts: Dict[str, HostFacts] = {} + self.subnet_lookup: Dict[str, SubnetLookup] = {} # subnet CIDR -> SubnetLookup Object + self.lsm_to_host: Dict[str, List[str]] = {} + self.subscribed: Dict[str, List[str]] = { + "yes": [], + "no": [], + "unknown": [], + } + self.host_to_role: Dict[str, List[str]] = {} + self.kernel_to_hosts: Dict[str, List[str]] = {} + + self.public_network_list: List[str] = [] + self.cluster_network_list: List[str] = [] + self.health_check_raised = False + self.active_checks: List[str] = [] # checks enabled and executed + self.skipped_checks: List[str] = [] # checks enabled, but skipped due to a pre-req failure + + raw_checks = self.mgr.get_store('config_checks') + if not raw_checks: + # doesn't exist, so seed the checks + self.seed_config_checks() + else: + # setting is there, so ensure there is an entry for each of the checks that + # this module supports (account for upgrades/changes) + try: + config_checks = json.loads(raw_checks) + except json.JSONDecodeError: + self.log.error("Unable to serialize config_checks config. Reset to defaults") + self.seed_config_checks() + else: + # Ensure the config_checks setting is consistent with this module + from_config = set(config_checks.keys()) + from_module = set([c.name for c in self.health_checks]) + old_checks = from_config.difference(from_module) + new_checks = from_module.difference(from_config) + + if old_checks: + self.log.debug(f"old checks being removed from config_checks: {old_checks}") + for i in old_checks: + del config_checks[i] + if new_checks: + self.log.debug(f"new checks being added to config_checks: {new_checks}") + for i in new_checks: + config_checks[i] = 'enabled' + + if old_checks or new_checks: + self.log.info( + f"config_checks updated: {len(old_checks)} removed, {len(new_checks)} added") + self.mgr.set_store('config_checks', json.dumps(config_checks)) + else: + self.log.debug("config_checks match module definition") + + def lookup_check(self, key_value: str, key_name: str = 'name') -> Optional[CephadmCheckDefinition]: + + for c in self.health_checks: + if getattr(c, key_name) == key_value: + return c + return None + + @property + def defined_checks(self) -> int: + return len(self.health_checks) + + @property + def active_checks_count(self) -> int: + return len(self.active_checks) + + def seed_config_checks(self) -> None: + defaults = {check.name: 'enabled' for check in self.health_checks} + self.mgr.set_store('config_checks', json.dumps(defaults)) + + @property + def skipped_checks_count(self) -> int: + return len(self.skipped_checks) + + def to_json(self) -> List[Dict[str, str]]: + return [check.to_json() for check in self.health_checks] + + def load_network_config(self) -> None: + ret, out, _err = self.mgr.check_mon_command({ + 'prefix': 'config dump', + 'format': 'json' + }) + assert ret == 0 + js = json.loads(out) + for item in js: + if item['name'] == "cluster_network": + self.cluster_network_list = item['value'].strip().split(',') + if item['name'] == "public_network": + self.public_network_list = item['value'].strip().split(',') + + self.log.debug(f"public networks {self.public_network_list}") + self.log.debug(f"cluster networks {self.cluster_network_list}") + + def _update_subnet(self, subnet: str, hostname: str, nic: Dict[str, Any]) -> None: + mtu = nic.get('mtu', None) + speed = nic.get('speed', None) + if not mtu or not speed: + return + + this_subnet = self.subnet_lookup.get(subnet, None) + if this_subnet: + this_subnet.update(hostname, mtu, speed) + else: + self.subnet_lookup[subnet] = SubnetLookup(subnet, hostname, mtu, speed) + + def _update_subnet_lookups(self, hostname: str, devname: str, nic: Dict[str, Any]) -> None: + if nic['ipv4_address']: + try: + iface4 = ipaddress.IPv4Interface(nic['ipv4_address']) + subnet = str(iface4.network) + except ipaddress.AddressValueError as e: + self.log.exception(f"Invalid network on {hostname}, interface {devname} : {str(e)}") + else: + self._update_subnet(subnet, hostname, nic) + + if nic['ipv6_address']: + try: + iface6 = ipaddress.IPv6Interface(nic['ipv6_address']) + subnet = str(iface6.network) + except ipaddress.AddressValueError as e: + self.log.exception(f"Invalid network on {hostname}, interface {devname} : {str(e)}") + else: + self._update_subnet(subnet, hostname, nic) + + def hosts_with_role(self, role: str) -> List[str]: + host_list = [] + for hostname, roles in self.host_to_role.items(): + if role in roles: + host_list.append(hostname) + return host_list + + def reset(self) -> None: + self.subnet_lookup.clear() + self.lsm_to_host.clear() + self.subscribed['yes'] = [] + self.subscribed['no'] = [] + self.subscribed['unknown'] = [] + self.host_to_role.clear() + self.kernel_to_hosts.clear() + + def _get_majority(self, data: Dict[str, List[str]]) -> Tuple[str, int]: + assert isinstance(data, dict) + + majority_key = '' + majority_count = 0 + for key in data: + if len(data[key]) > majority_count: + majority_count = len(data[key]) + majority_key = key + return majority_key, majority_count + + def get_ceph_metadata(self) -> Dict[str, Optional[Dict[str, str]]]: + """Build a map of service -> service metadata""" + service_map: Dict[str, Optional[Dict[str, str]]] = {} + + for server in self.mgr.list_servers(): + for service in cast(List[ServiceInfoT], server.get('services', [])): + if service: + service_map.update( + { + f"{service['type']}.{service['id']}": + self.mgr.get_metadata(service['type'], service['id']) + } + ) + return service_map + + def _check_kernel_lsm(self) -> None: + if len(self.lsm_to_host.keys()) > 1: + + majority_hosts_ptr, majority_hosts_count = self._get_majority(self.lsm_to_host) + lsm_copy = self.lsm_to_host.copy() + del lsm_copy[majority_hosts_ptr] + details = [] + for lsm_key in lsm_copy.keys(): + for host in lsm_copy[lsm_key]: + details.append( + f"{host} has inconsistent KSM settings compared to the " + f"majority of hosts({majority_hosts_count}) in the cluster") + host_sfx = 's' if len(details) > 1 else '' + self.mgr.health_checks['CEPHADM_CHECK_KERNEL_LSM'] = { + 'severity': 'warning', + 'summary': f"Kernel Security Module (SELinux/AppArmor) is inconsistent for " + f"{len(details)} host{host_sfx}", + 'count': len(details), + 'detail': details, + } + self.health_check_raised = True + else: + self.mgr.health_checks.pop('CEPHADM_CHECK_KERNEL_LSM', None) + + def _check_subscription(self) -> None: + if len(self.subscribed['yes']) > 0 and len(self.subscribed['no']) > 0: + # inconsistent subscription states - CEPHADM_CHECK_SUBSCRIPTION + details = [] + for host in self.subscribed['no']: + details.append(f"{host} does not have an active subscription") + self.mgr.health_checks['CEPHADM_CHECK_SUBSCRIPTION'] = { + 'severity': 'warning', + 'summary': f"Support subscriptions inactive on {len(details)} host(s)" + f"({len(self.subscribed['yes'])} subscriptions active)", + 'count': len(details), + 'detail': details, + } + self.health_check_raised = True + else: + self.mgr.health_checks.pop('CEPHADM_CHECK_SUBSCRIPTION', None) + + def _check_public_network(self) -> None: + hosts_remaining: List[str] = list(self.mgr.cache.facts.keys()) + hosts_removed: List[str] = [] + self.log.debug(f"checking public network membership for: {hosts_remaining}") + + for p_net in self.public_network_list: + self.log.debug(f"checking network {p_net}") + subnet_data = self.subnet_lookup.get(p_net, None) + self.log.debug(f"subnet data - {subnet_data}") + + if subnet_data: + hosts_in_subnet = subnet_data.host_list + for host in hosts_in_subnet: + if host in hosts_remaining: + hosts_remaining.remove(host) + hosts_removed.append(host) + else: + if host not in hosts_removed: + self.log.debug(f"host={host}, subnet={p_net}") + self.log.exception( + "Host listed for a subnet but not present in the host facts?") + + # Ideally all hosts will have been removed since they have an IP on at least + # one of the public networks + if hosts_remaining: + if len(hosts_remaining) != len(self.mgr.cache.facts): + # public network is visible on some hosts + details = [ + f"{host} does not have an interface on any public network" for host in hosts_remaining] + + self.mgr.health_checks['CEPHADM_CHECK_PUBLIC_MEMBERSHIP'] = { + 'severity': 'warning', + 'summary': f"Public network(s) is not directly accessible from {len(hosts_remaining)} " + "cluster hosts", + 'count': len(details), + 'detail': details, + } + self.health_check_raised = True + else: + self.mgr.health_checks.pop('CEPHADM_CHECK_PUBLIC_MEMBERSHIP', None) + + def _check_osd_mtu(self) -> None: + osd_hosts = set(self.hosts_with_role('osd')) + osd_network_list = self.cluster_network_list or self.public_network_list + mtu_errors: List[str] = [] + + for osd_net in osd_network_list: + subnet_data = self.subnet_lookup.get(osd_net, None) + + if subnet_data: + + self.log.debug(f"processing mtu map : {json.dumps(subnet_data.mtu_map)}") + mtu_count = {} + max_hosts = 0 + mtu_ptr = '' + diffs = {} + for mtu, host_list in subnet_data.mtu_map.items(): + mtu_hosts = set(host_list) + mtu_count[mtu] = len(mtu_hosts) + errors = osd_hosts.difference(mtu_hosts) + if errors: + diffs[mtu] = errors + if len(errors) > max_hosts: + mtu_ptr = mtu + + if diffs: + self.log.debug("MTU problems detected") + self.log.debug(f"most hosts using {mtu_ptr}") + mtu_copy = subnet_data.mtu_map.copy() + del mtu_copy[mtu_ptr] + for bad_mtu in mtu_copy: + for h in mtu_copy[bad_mtu]: + host = HostFacts() + host.load_facts(self.mgr.cache.facts[h]) + mtu_errors.append( + f"host {h}({host.subnet_to_nic(osd_net)}) is using MTU " + f"{bad_mtu} on {osd_net}, NICs on other hosts use {mtu_ptr}") + + if mtu_errors: + self.mgr.health_checks['CEPHADM_CHECK_MTU'] = { + 'severity': 'warning', + 'summary': f"MTU setting inconsistent on osd network NICs on {len(mtu_errors)} host(s)", + 'count': len(mtu_errors), + 'detail': mtu_errors, + } + self.health_check_raised = True + else: + self.mgr.health_checks.pop('CEPHADM_CHECK_MTU', None) + + def _check_osd_linkspeed(self) -> None: + osd_hosts = set(self.hosts_with_role('osd')) + osd_network_list = self.cluster_network_list or self.public_network_list + + linkspeed_errors = [] + + for osd_net in osd_network_list: + subnet_data = self.subnet_lookup.get(osd_net, None) + + if subnet_data: + + self.log.debug(f"processing subnet : {subnet_data}") + + speed_count = {} + max_hosts = 0 + speed_ptr = '' + diffs = {} + for speed, host_list in subnet_data.speed_map.items(): + speed_hosts = set(host_list) + speed_count[speed] = len(speed_hosts) + errors = osd_hosts.difference(speed_hosts) + if errors: + diffs[speed] = errors + if len(errors) > max_hosts: + speed_ptr = speed + + if diffs: + self.log.debug("linkspeed issue(s) detected") + self.log.debug(f"most hosts using {speed_ptr}") + speed_copy = subnet_data.speed_map.copy() + del speed_copy[speed_ptr] + for bad_speed in speed_copy: + if bad_speed > speed_ptr: + # skip speed is better than most...it can stay! + continue + for h in speed_copy[bad_speed]: + host = HostFacts() + host.load_facts(self.mgr.cache.facts[h]) + linkspeed_errors.append( + f"host {h}({host.subnet_to_nic(osd_net)}) has linkspeed of " + f"{bad_speed} on {osd_net}, NICs on other hosts use {speed_ptr}") + + if linkspeed_errors: + self.mgr.health_checks['CEPHADM_CHECK_LINKSPEED'] = { + 'severity': 'warning', + 'summary': "Link speed is inconsistent on osd network NICs for " + f"{len(linkspeed_errors)} host(s)", + 'count': len(linkspeed_errors), + 'detail': linkspeed_errors, + } + self.health_check_raised = True + else: + self.mgr.health_checks.pop('CEPHADM_CHECK_LINKSPEED', None) + + def _check_network_missing(self) -> None: + all_networks = self.public_network_list.copy() + all_networks.extend(self.cluster_network_list) + + missing_networks = [] + for subnet in all_networks: + subnet_data = self.subnet_lookup.get(subnet, None) + + if not subnet_data: + missing_networks.append(f"{subnet} not found on any host in the cluster") + self.log.warning( + f"Network {subnet} has been defined, but is not present on any host") + + if missing_networks: + net_sfx = 's' if len(missing_networks) > 1 else '' + self.mgr.health_checks['CEPHADM_CHECK_NETWORK_MISSING'] = { + 'severity': 'warning', + 'summary': f"Public/cluster network{net_sfx} defined, but can not be found on " + "any host", + 'count': len(missing_networks), + 'detail': missing_networks, + } + self.health_check_raised = True + else: + self.mgr.health_checks.pop('CEPHADM_CHECK_NETWORK_MISSING', None) + + def _check_release_parity(self) -> None: + upgrade_status = self.mgr.upgrade.upgrade_status() + if upgrade_status.in_progress: + # skip version consistency checks during an upgrade cycle + self.skipped_checks.append('ceph_release') + return + + services = self.get_ceph_metadata() + self.log.debug(json.dumps(services)) + version_to_svcs: Dict[str, List[str]] = {} + + for svc in services: + if services[svc]: + metadata = cast(Dict[str, str], services[svc]) + v = metadata.get('ceph_release', '') + if v in version_to_svcs: + version_to_svcs[v].append(svc) + else: + version_to_svcs[v] = [svc] + + if len(version_to_svcs) > 1: + majority_ptr, _majority_count = self._get_majority(version_to_svcs) + ver_copy = version_to_svcs.copy() + del ver_copy[majority_ptr] + details = [] + for v in ver_copy: + for svc in ver_copy[v]: + details.append( + f"{svc} is running {v} (majority of cluster is using {majority_ptr})") + + self.mgr.health_checks['CEPHADM_CHECK_CEPH_RELEASE'] = { + 'severity': 'warning', + 'summary': 'Ceph cluster running mixed ceph releases', + 'count': len(details), + 'detail': details, + } + self.health_check_raised = True + self.log.warning( + f"running with {len(version_to_svcs)} different ceph releases within this cluster") + else: + self.mgr.health_checks.pop('CEPHADM_CHECK_CEPH_RELEASE', None) + + def _check_kernel_version(self) -> None: + if len(self.kernel_to_hosts.keys()) > 1: + majority_hosts_ptr, majority_hosts_count = self._get_majority(self.kernel_to_hosts) + kver_copy = self.kernel_to_hosts.copy() + del kver_copy[majority_hosts_ptr] + details = [] + for k in kver_copy: + for h in kver_copy[k]: + details.append( + f"host {h} running kernel {k}, majority of hosts({majority_hosts_count}) " + f"running {majority_hosts_ptr}") + + self.log.warning("mixed kernel versions detected") + self.mgr.health_checks['CEPHADM_CHECK_KERNEL_VERSION'] = { + 'severity': 'warning', + 'summary': f"{len(details)} host(s) running different kernel versions", + 'count': len(details), + 'detail': details, + } + self.health_check_raised = True + else: + self.mgr.health_checks.pop('CEPHADM_CHECK_KERNEL_VERSION', None) + + def _process_hosts(self) -> None: + self.log.debug(f"processing data from {len(self.mgr.cache.facts)} hosts") + for hostname in self.mgr.cache.facts: + host = HostFacts() + host.load_facts(self.mgr.cache.facts[hostname]) + if not host._valid: + self.log.warning(f"skipping {hostname} - incompatible host facts") + continue + + kernel_lsm = cast(Dict[str, str], host.kernel_security) + lsm_desc = kernel_lsm.get('description', '') + if lsm_desc: + if lsm_desc in self.lsm_to_host: + self.lsm_to_host[lsm_desc].append(hostname) + else: + self.lsm_to_host[lsm_desc] = [hostname] + + subscription_state = host.subscribed.lower() if host.subscribed else None + if subscription_state: + self.subscribed[subscription_state].append(hostname) + + interfaces = cast(Dict[str, Dict[str, Any]], host.interfaces) + for name in interfaces.keys(): + if name in ['lo']: + continue + self._update_subnet_lookups(hostname, name, interfaces[name]) + + if host.kernel: + kernel_maj_min = '.'.join(host.kernel.split('.')[0:2]) + if kernel_maj_min in self.kernel_to_hosts: + self.kernel_to_hosts[kernel_maj_min].append(hostname) + else: + self.kernel_to_hosts[kernel_maj_min] = [hostname] + else: + self.log.warning(f"Host gather facts for {hostname} is missing kernel information") + + # NOTE: if daemondescription had systemd enabled state, we could check for systemd 'tampering' + self.host_to_role[hostname] = list(self.mgr.cache.get_daemon_types(hostname)) + + def run_checks(self) -> None: + checks_enabled = self.mgr.get_module_option('config_checks_enabled') + if checks_enabled is not True: + return + + self.reset() + + check_config: Dict[str, str] = {} + checks_raw: Optional[str] = self.mgr.get_store('config_checks') + if checks_raw: + try: + check_config.update(json.loads(checks_raw)) + except json.JSONDecodeError: + self.log.exception( + "mgr/cephadm/config_checks is not JSON serializable - all checks will run") + + # build lookup "maps" by walking the host facts, once + self._process_hosts() + + self.health_check_raised = False + self.active_checks = [] + self.skipped_checks = [] + + # process all healthchecks that are not explicitly disabled + for health_check in self.health_checks: + if check_config.get(health_check.name, '') != 'disabled': + self.active_checks.append(health_check.name) + health_check.func() + + self.mgr.set_health_checks(self.mgr.health_checks) diff --git a/src/pybind/mgr/cephadm/exchange.py b/src/pybind/mgr/cephadm/exchange.py new file mode 100644 index 000000000..76a613407 --- /dev/null +++ b/src/pybind/mgr/cephadm/exchange.py @@ -0,0 +1,164 @@ +# Data exchange formats for communicating more +# complex data structures between the cephadm binary +# an the mgr module. + +import json + +from typing import ( + Any, + Callable, + Dict, + List, + Optional, + TypeVar, + Union, + cast, +) + + +FuncT = TypeVar("FuncT", bound=Callable) + + +class _DataField: + """A descriptor to map object fields into a data dictionary.""" + + def __init__( + self, + name: Optional[str] = None, + field_type: Optional[FuncT] = None, + ): + self.name = name + self.field_type = field_type + + def __set_name__(self, _: str, name: str) -> None: + if not self.name: + self.name = name + + def __get__(self, obj: Any, objtype: Any = None) -> Any: + return obj.data[self.name] + + def __set__(self, obj: Any, value: Any) -> None: + if self.field_type is not None: + obj.data[self.name] = self.field_type(value) + else: + obj.data[self.name] = value + + +def _get_data(obj: Any) -> Any: + """Wrapper to get underlying data dicts from objects that + advertise having them. + """ + _gd = getattr(obj, "get_data", None) + if _gd: + return _gd() + return obj + + +def _or_none(field_type: FuncT) -> FuncT: + def _field_type_or_none(value: Any) -> Any: + if value is None: + return None + return field_type(value) + + return cast(FuncT, _field_type_or_none) + + +class DeployMeta: + """Deployment metadata. Child of Deploy. Used by cephadm to + determine when certain changes have been made. + """ + + service_name = _DataField(field_type=str) + ports = _DataField(field_type=list) + ip = _DataField(field_type=_or_none(str)) + deployed_by = _DataField(field_type=_or_none(list)) + rank = _DataField(field_type=_or_none(int)) + rank_generation = _DataField(field_type=_or_none(int)) + extra_container_args = _DataField(field_type=_or_none(list)) + extra_entrypoint_args = _DataField(field_type=_or_none(list)) + + def __init__( + self, + init_data: Optional[Dict[str, Any]] = None, + *, + service_name: str = "", + ports: Optional[List[int]] = None, + ip: Optional[str] = None, + deployed_by: Optional[List[str]] = None, + rank: Optional[int] = None, + rank_generation: Optional[int] = None, + extra_container_args: Optional[List[Union[str, Dict[str, Any]]]] = None, + extra_entrypoint_args: Optional[List[Union[str, Dict[str, Any]]]] = None, + ): + self.data = dict(init_data or {}) + # set fields + self.service_name = service_name + self.ports = ports or [] + self.ip = ip + self.deployed_by = deployed_by + self.rank = rank + self.rank_generation = rank_generation + self.extra_container_args = extra_container_args + self.extra_entrypoint_args = extra_entrypoint_args + + def get_data(self) -> Dict[str, Any]: + return self.data + + to_simplified = get_data + + @classmethod + def convert( + cls, + value: Union[Dict[str, Any], "DeployMeta", None], + ) -> "DeployMeta": + if not isinstance(value, DeployMeta): + return cls(value) + return value + + +class Deploy: + """Set of fields that instructs cephadm to deploy a + service/daemon. + """ + + fsid = _DataField(field_type=str) + name = _DataField(field_type=str) + image = _DataField(field_type=str) + deploy_arguments = _DataField(field_type=list) + params = _DataField(field_type=dict) + meta = _DataField(field_type=DeployMeta.convert) + config_blobs = _DataField(field_type=dict) + + def __init__( + self, + init_data: Optional[Dict[str, Any]] = None, + *, + fsid: str = "", + name: str = "", + image: str = "", + deploy_arguments: Optional[List[str]] = None, + params: Optional[Dict[str, Any]] = None, + meta: Optional[DeployMeta] = None, + config_blobs: Optional[Dict[str, Any]] = None, + ): + self.data = dict(init_data or {}) + # set fields + self.fsid = fsid + self.name = name + self.image = image + self.deploy_arguments = deploy_arguments or [] + self.params = params or {} + self.meta = DeployMeta.convert(meta) + self.config_blobs = config_blobs or {} + + def get_data(self) -> Dict[str, Any]: + """Return the underlying data dict.""" + return self.data + + def to_simplified(self) -> Dict[str, Any]: + """Return a simplified serializable version of the object.""" + return {k: _get_data(v) for k, v in self.get_data().items()} + + def dump_json_str(self) -> str: + """Return the object's JSON string representation.""" + return json.dumps(self.to_simplified()) diff --git a/src/pybind/mgr/cephadm/http_server.py b/src/pybind/mgr/cephadm/http_server.py new file mode 100644 index 000000000..ef29d3b4e --- /dev/null +++ b/src/pybind/mgr/cephadm/http_server.py @@ -0,0 +1,101 @@ +import cherrypy +import threading +import logging +from typing import TYPE_CHECKING + +from cephadm.agent import AgentEndpoint +from cephadm.service_discovery import ServiceDiscovery +from mgr_util import test_port_allocation, PortAlreadyInUse +from orchestrator import OrchestratorError + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + + +def cherrypy_filter(record: logging.LogRecord) -> int: + blocked = [ + 'TLSV1_ALERT_DECRYPT_ERROR' + ] + msg = record.getMessage() + return not any([m for m in blocked if m in msg]) + + +logging.getLogger('cherrypy.error').addFilter(cherrypy_filter) +cherrypy.log.access_log.propagate = False + + +class CephadmHttpServer(threading.Thread): + def __init__(self, mgr: "CephadmOrchestrator") -> None: + self.mgr = mgr + self.agent = AgentEndpoint(mgr) + self.service_discovery = ServiceDiscovery(mgr) + self.cherrypy_shutdown_event = threading.Event() + self._service_discovery_port = self.mgr.service_discovery_port + self.secure_monitoring_stack = self.mgr.secure_monitoring_stack + super().__init__(target=self.run) + + def configure_cherrypy(self) -> None: + cherrypy.config.update({ + 'environment': 'production', + 'engine.autoreload.on': False, + }) + + def configure(self) -> None: + self.configure_cherrypy() + self.agent.configure() + self.service_discovery.configure(self.mgr.service_discovery_port, + self.mgr.get_mgr_ip(), + self.secure_monitoring_stack) + + def config_update(self) -> None: + self.service_discovery_port = self.mgr.service_discovery_port + if self.secure_monitoring_stack != self.mgr.secure_monitoring_stack: + self.secure_monitoring_stack = self.mgr.secure_monitoring_stack + self.restart() + + @property + def service_discovery_port(self) -> int: + return self._service_discovery_port + + @service_discovery_port.setter + def service_discovery_port(self, value: int) -> None: + if self._service_discovery_port == value: + return + + try: + test_port_allocation(self.mgr.get_mgr_ip(), value) + except PortAlreadyInUse: + raise OrchestratorError(f'Service discovery port {value} is already in use. Listening on old port {self._service_discovery_port}.') + except Exception as e: + raise OrchestratorError(f'Cannot check service discovery port ip:{self.mgr.get_mgr_ip()} port:{value} error:{e}') + + self.mgr.log.info(f'Changing service discovery port from {self._service_discovery_port} to {value}...') + self._service_discovery_port = value + self.restart() + + def restart(self) -> None: + cherrypy.engine.stop() + cherrypy.server.httpserver = None + self.configure() + cherrypy.engine.start() + + def run(self) -> None: + try: + self.mgr.log.debug('Starting cherrypy engine...') + self.configure() + cherrypy.server.unsubscribe() # disable default server + cherrypy.engine.start() + self.mgr.log.debug('Cherrypy engine started.') + self.mgr._kick_serve_loop() + # wait for the shutdown event + self.cherrypy_shutdown_event.wait() + self.cherrypy_shutdown_event.clear() + cherrypy.engine.stop() + cherrypy.server.httpserver = None + self.mgr.log.debug('Cherrypy engine stopped.') + except Exception as e: + self.mgr.log.error(f'Failed to run cephadm http server: {e}') + + def shutdown(self) -> None: + self.mgr.log.debug('Stopping cherrypy engine...') + self.cherrypy_shutdown_event.set() diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py new file mode 100644 index 000000000..7153ca6dc --- /dev/null +++ b/src/pybind/mgr/cephadm/inventory.py @@ -0,0 +1,1565 @@ +import datetime +import enum +from copy import copy +import ipaddress +import itertools +import json +import logging +import math +import socket +from typing import TYPE_CHECKING, Dict, List, Iterator, Optional, Any, Tuple, Set, Mapping, cast, \ + NamedTuple, Type + +import orchestrator +from ceph.deployment import inventory +from ceph.deployment.service_spec import ServiceSpec, PlacementSpec, TunedProfileSpec, IngressSpec +from ceph.utils import str_to_datetime, datetime_to_str, datetime_now +from orchestrator import OrchestratorError, HostSpec, OrchestratorEvent, service_to_daemon_types +from cephadm.services.cephadmservice import CephadmDaemonDeploySpec + +from .utils import resolve_ip, SpecialHostLabels +from .migrations import queue_migrate_nfs_spec, queue_migrate_rgw_spec + +if TYPE_CHECKING: + from .module import CephadmOrchestrator + + +logger = logging.getLogger(__name__) + +HOST_CACHE_PREFIX = "host." +SPEC_STORE_PREFIX = "spec." +AGENT_CACHE_PREFIX = 'agent.' + + +class HostCacheStatus(enum.Enum): + stray = 'stray' + host = 'host' + devices = 'devices' + + +class Inventory: + """ + The inventory stores a HostSpec for all hosts persistently. + """ + + def __init__(self, mgr: 'CephadmOrchestrator'): + self.mgr = mgr + adjusted_addrs = False + + def is_valid_ip(ip: str) -> bool: + try: + ipaddress.ip_address(ip) + return True + except ValueError: + return False + + # load inventory + i = self.mgr.get_store('inventory') + if i: + self._inventory: Dict[str, dict] = json.loads(i) + # handle old clusters missing 'hostname' key from hostspec + for k, v in self._inventory.items(): + if 'hostname' not in v: + v['hostname'] = k + + # convert legacy non-IP addr? + if is_valid_ip(str(v.get('addr'))): + continue + if len(self._inventory) > 1: + if k == socket.gethostname(): + # Never try to resolve our own host! This is + # fraught and can lead to either a loopback + # address (due to podman's futzing with + # /etc/hosts) or a private IP based on the CNI + # configuration. Instead, wait until the mgr + # fails over to another host and let them resolve + # this host. + continue + ip = resolve_ip(cast(str, v.get('addr'))) + else: + # we only have 1 node in the cluster, so we can't + # rely on another host doing the lookup. use the + # IP the mgr binds to. + ip = self.mgr.get_mgr_ip() + if is_valid_ip(ip) and not ip.startswith('127.0.'): + self.mgr.log.info( + f"inventory: adjusted host {v['hostname']} addr '{v['addr']}' -> '{ip}'" + ) + v['addr'] = ip + adjusted_addrs = True + if adjusted_addrs: + self.save() + else: + self._inventory = dict() + self._all_known_names: Dict[str, List[str]] = {} + logger.debug('Loaded inventory %s' % self._inventory) + + def keys(self) -> List[str]: + return list(self._inventory.keys()) + + def __contains__(self, host: str) -> bool: + return host in self._inventory or host in itertools.chain.from_iterable(self._all_known_names.values()) + + def _get_stored_name(self, host: str) -> str: + self.assert_host(host) + if host in self._inventory: + return host + for stored_name, all_names in self._all_known_names.items(): + if host in all_names: + return stored_name + return host + + def update_known_hostnames(self, hostname: str, shortname: str, fqdn: str) -> None: + for hname in [hostname, shortname, fqdn]: + # if we know the host by any of the names, store the full set of names + # in order to be able to check against those names for matching a host + if hname in self._inventory: + self._all_known_names[hname] = [hostname, shortname, fqdn] + return + logger.debug(f'got hostname set from gather-facts for unknown host: {[hostname, shortname, fqdn]}') + + def assert_host(self, host: str) -> None: + if host not in self: + raise OrchestratorError('host %s does not exist' % host) + + def add_host(self, spec: HostSpec) -> None: + if spec.hostname in self: + # addr + if self.get_addr(spec.hostname) != spec.addr: + self.set_addr(spec.hostname, spec.addr) + # labels + for label in spec.labels: + self.add_label(spec.hostname, label) + else: + self._inventory[spec.hostname] = spec.to_json() + self.save() + + def rm_host(self, host: str) -> None: + host = self._get_stored_name(host) + del self._inventory[host] + self._all_known_names.pop(host, []) + self.save() + + def set_addr(self, host: str, addr: str) -> None: + host = self._get_stored_name(host) + self._inventory[host]['addr'] = addr + self.save() + + def add_label(self, host: str, label: str) -> None: + host = self._get_stored_name(host) + + if 'labels' not in self._inventory[host]: + self._inventory[host]['labels'] = list() + if label not in self._inventory[host]['labels']: + self._inventory[host]['labels'].append(label) + self.save() + + def rm_label(self, host: str, label: str) -> None: + host = self._get_stored_name(host) + + if 'labels' not in self._inventory[host]: + self._inventory[host]['labels'] = list() + if label in self._inventory[host]['labels']: + self._inventory[host]['labels'].remove(label) + self.save() + + def has_label(self, host: str, label: str) -> bool: + host = self._get_stored_name(host) + return ( + host in self._inventory + and label in self._inventory[host].get('labels', []) + ) + + def get_addr(self, host: str) -> str: + host = self._get_stored_name(host) + return self._inventory[host].get('addr', host) + + def spec_from_dict(self, info: dict) -> HostSpec: + hostname = info['hostname'] + hostname = self._get_stored_name(hostname) + return HostSpec( + hostname, + addr=info.get('addr', hostname), + labels=info.get('labels', []), + status='Offline' if hostname in self.mgr.offline_hosts else info.get('status', ''), + ) + + def all_specs(self) -> List[HostSpec]: + return list(map(self.spec_from_dict, self._inventory.values())) + + def get_host_with_state(self, state: str = "") -> List[str]: + """return a list of host names in a specific state""" + return [h for h in self._inventory if self._inventory[h].get("status", "").lower() == state] + + def save(self) -> None: + self.mgr.set_store('inventory', json.dumps(self._inventory)) + + +class SpecDescription(NamedTuple): + spec: ServiceSpec + rank_map: Optional[Dict[int, Dict[int, Optional[str]]]] + created: datetime.datetime + deleted: Optional[datetime.datetime] + + +class SpecStore(): + def __init__(self, mgr): + # type: (CephadmOrchestrator) -> None + self.mgr = mgr + self._specs = {} # type: Dict[str, ServiceSpec] + # service_name -> rank -> gen -> daemon_id + self._rank_maps = {} # type: Dict[str, Dict[int, Dict[int, Optional[str]]]] + self.spec_created = {} # type: Dict[str, datetime.datetime] + self.spec_deleted = {} # type: Dict[str, datetime.datetime] + self.spec_preview = {} # type: Dict[str, ServiceSpec] + self._needs_configuration: Dict[str, bool] = {} + + @property + def all_specs(self) -> Mapping[str, ServiceSpec]: + """ + returns active and deleted specs. Returns read-only dict. + """ + return self._specs + + def __contains__(self, name: str) -> bool: + return name in self._specs + + def __getitem__(self, name: str) -> SpecDescription: + if name not in self._specs: + raise OrchestratorError(f'Service {name} not found.') + return SpecDescription(self._specs[name], + self._rank_maps.get(name), + self.spec_created[name], + self.spec_deleted.get(name, None)) + + @property + def active_specs(self) -> Mapping[str, ServiceSpec]: + return {k: v for k, v in self._specs.items() if k not in self.spec_deleted} + + def load(self): + # type: () -> None + for k, v in self.mgr.get_store_prefix(SPEC_STORE_PREFIX).items(): + service_name = k[len(SPEC_STORE_PREFIX):] + try: + j = cast(Dict[str, dict], json.loads(v)) + if ( + (self.mgr.migration_current or 0) < 3 + and j['spec'].get('service_type') == 'nfs' + ): + self.mgr.log.debug(f'found legacy nfs spec {j}') + queue_migrate_nfs_spec(self.mgr, j) + + if ( + (self.mgr.migration_current or 0) < 6 + and j['spec'].get('service_type') == 'rgw' + ): + queue_migrate_rgw_spec(self.mgr, j) + + spec = ServiceSpec.from_json(j['spec']) + created = str_to_datetime(cast(str, j['created'])) + self._specs[service_name] = spec + self.spec_created[service_name] = created + + if 'deleted' in j: + deleted = str_to_datetime(cast(str, j['deleted'])) + self.spec_deleted[service_name] = deleted + + if 'needs_configuration' in j: + self._needs_configuration[service_name] = cast(bool, j['needs_configuration']) + + if 'rank_map' in j and isinstance(j['rank_map'], dict): + self._rank_maps[service_name] = {} + for rank_str, m in j['rank_map'].items(): + try: + rank = int(rank_str) + except ValueError: + logger.exception(f"failed to parse rank in {j['rank_map']}") + continue + if isinstance(m, dict): + self._rank_maps[service_name][rank] = {} + for gen_str, name in m.items(): + try: + gen = int(gen_str) + except ValueError: + logger.exception(f"failed to parse gen in {j['rank_map']}") + continue + if isinstance(name, str) or m is None: + self._rank_maps[service_name][rank][gen] = name + + self.mgr.log.debug('SpecStore: loaded spec for %s' % ( + service_name)) + except Exception as e: + self.mgr.log.warning('unable to load spec for %s: %s' % ( + service_name, e)) + pass + + def save( + self, + spec: ServiceSpec, + update_create: bool = True, + ) -> None: + name = spec.service_name() + if spec.preview_only: + self.spec_preview[name] = spec + return None + self._specs[name] = spec + self._needs_configuration[name] = True + + if update_create: + self.spec_created[name] = datetime_now() + self._save(name) + + def save_rank_map(self, + name: str, + rank_map: Dict[int, Dict[int, Optional[str]]]) -> None: + self._rank_maps[name] = rank_map + self._save(name) + + def _save(self, name: str) -> None: + data: Dict[str, Any] = { + 'spec': self._specs[name].to_json(), + } + if name in self.spec_created: + data['created'] = datetime_to_str(self.spec_created[name]) + if name in self._rank_maps: + data['rank_map'] = self._rank_maps[name] + if name in self.spec_deleted: + data['deleted'] = datetime_to_str(self.spec_deleted[name]) + if name in self._needs_configuration: + data['needs_configuration'] = self._needs_configuration[name] + + self.mgr.set_store( + SPEC_STORE_PREFIX + name, + json.dumps(data, sort_keys=True), + ) + self.mgr.events.for_service(self._specs[name], + OrchestratorEvent.INFO, + 'service was created') + + def rm(self, service_name: str) -> bool: + if service_name not in self._specs: + return False + + if self._specs[service_name].preview_only: + self.finally_rm(service_name) + return True + + self.spec_deleted[service_name] = datetime_now() + self.save(self._specs[service_name], update_create=False) + return True + + def finally_rm(self, service_name): + # type: (str) -> bool + found = service_name in self._specs + if found: + del self._specs[service_name] + if service_name in self._rank_maps: + del self._rank_maps[service_name] + del self.spec_created[service_name] + if service_name in self.spec_deleted: + del self.spec_deleted[service_name] + if service_name in self._needs_configuration: + del self._needs_configuration[service_name] + self.mgr.set_store(SPEC_STORE_PREFIX + service_name, None) + return found + + def get_created(self, spec: ServiceSpec) -> Optional[datetime.datetime]: + return self.spec_created.get(spec.service_name()) + + def set_unmanaged(self, service_name: str, value: bool) -> str: + if service_name not in self._specs: + return f'No service of name {service_name} found. Check "ceph orch ls" for all known services' + if self._specs[service_name].unmanaged == value: + return f'Service {service_name}{" already " if value else " not "}marked unmanaged. No action taken.' + self._specs[service_name].unmanaged = value + self.save(self._specs[service_name]) + return f'Set unmanaged to {str(value)} for service {service_name}' + + def needs_configuration(self, name: str) -> bool: + return self._needs_configuration.get(name, False) + + def mark_needs_configuration(self, name: str) -> None: + if name in self._specs: + self._needs_configuration[name] = True + self._save(name) + else: + self.mgr.log.warning(f'Attempted to mark unknown service "{name}" as needing configuration') + + def mark_configured(self, name: str) -> None: + if name in self._specs: + self._needs_configuration[name] = False + self._save(name) + else: + self.mgr.log.warning(f'Attempted to mark unknown service "{name}" as having been configured') + + +class ClientKeyringSpec(object): + """ + A client keyring file that we should maintain + """ + + def __init__( + self, + entity: str, + placement: PlacementSpec, + mode: Optional[int] = None, + uid: Optional[int] = None, + gid: Optional[int] = None, + ) -> None: + self.entity = entity + self.placement = placement + self.mode = mode or 0o600 + self.uid = uid or 0 + self.gid = gid or 0 + + def validate(self) -> None: + pass + + def to_json(self) -> Dict[str, Any]: + return { + 'entity': self.entity, + 'placement': self.placement.to_json(), + 'mode': self.mode, + 'uid': self.uid, + 'gid': self.gid, + } + + @property + def path(self) -> str: + return f'/etc/ceph/ceph.{self.entity}.keyring' + + @classmethod + def from_json(cls: Type, data: dict) -> 'ClientKeyringSpec': + c = data.copy() + if 'placement' in c: + c['placement'] = PlacementSpec.from_json(c['placement']) + _cls = cls(**c) + _cls.validate() + return _cls + + +class ClientKeyringStore(): + """ + Track client keyring files that we are supposed to maintain + """ + + def __init__(self, mgr): + # type: (CephadmOrchestrator) -> None + self.mgr: CephadmOrchestrator = mgr + self.mgr = mgr + self.keys: Dict[str, ClientKeyringSpec] = {} + + def load(self) -> None: + c = self.mgr.get_store('client_keyrings') or b'{}' + j = json.loads(c) + for e, d in j.items(): + self.keys[e] = ClientKeyringSpec.from_json(d) + + def save(self) -> None: + data = { + k: v.to_json() for k, v in self.keys.items() + } + self.mgr.set_store('client_keyrings', json.dumps(data)) + + def update(self, ks: ClientKeyringSpec) -> None: + self.keys[ks.entity] = ks + self.save() + + def rm(self, entity: str) -> None: + if entity in self.keys: + del self.keys[entity] + self.save() + + +class TunedProfileStore(): + """ + Store for out tuned profile information + """ + + def __init__(self, mgr: "CephadmOrchestrator") -> None: + self.mgr: CephadmOrchestrator = mgr + self.mgr = mgr + self.profiles: Dict[str, TunedProfileSpec] = {} + + def __contains__(self, profile: str) -> bool: + return profile in self.profiles + + def load(self) -> None: + c = self.mgr.get_store('tuned_profiles') or b'{}' + j = json.loads(c) + for k, v in j.items(): + self.profiles[k] = TunedProfileSpec.from_json(v) + self.profiles[k]._last_updated = datetime_to_str(datetime_now()) + + def exists(self, profile_name: str) -> bool: + return profile_name in self.profiles + + def save(self) -> None: + profiles_json = {k: v.to_json() for k, v in self.profiles.items()} + self.mgr.set_store('tuned_profiles', json.dumps(profiles_json)) + + def add_setting(self, profile: str, setting: str, value: str) -> None: + if profile in self.profiles: + self.profiles[profile].settings[setting] = value + self.profiles[profile]._last_updated = datetime_to_str(datetime_now()) + self.save() + else: + logger.error( + f'Attempted to set setting "{setting}" for nonexistent os tuning profile "{profile}"') + + def rm_setting(self, profile: str, setting: str) -> None: + if profile in self.profiles: + if setting in self.profiles[profile].settings: + self.profiles[profile].settings.pop(setting, '') + self.profiles[profile]._last_updated = datetime_to_str(datetime_now()) + self.save() + else: + logger.error( + f'Attemped to remove nonexistent setting "{setting}" from os tuning profile "{profile}"') + else: + logger.error( + f'Attempted to remove setting "{setting}" from nonexistent os tuning profile "{profile}"') + + def add_profile(self, spec: TunedProfileSpec) -> None: + spec._last_updated = datetime_to_str(datetime_now()) + self.profiles[spec.profile_name] = spec + self.save() + + def rm_profile(self, profile: str) -> None: + if profile in self.profiles: + self.profiles.pop(profile, TunedProfileSpec('')) + else: + logger.error(f'Attempted to remove nonexistent os tuning profile "{profile}"') + self.save() + + def last_updated(self, profile: str) -> Optional[datetime.datetime]: + if profile not in self.profiles or not self.profiles[profile]._last_updated: + return None + return str_to_datetime(self.profiles[profile]._last_updated) + + def set_last_updated(self, profile: str, new_datetime: datetime.datetime) -> None: + if profile in self.profiles: + self.profiles[profile]._last_updated = datetime_to_str(new_datetime) + + def list_profiles(self) -> List[TunedProfileSpec]: + return [p for p in self.profiles.values()] + + +class HostCache(): + """ + HostCache stores different things: + + 1. `daemons`: Deployed daemons O(daemons) + + They're part of the configuration nowadays and need to be + persistent. The name "daemon cache" is unfortunately a bit misleading. + Like for example we really need to know where daemons are deployed on + hosts that are offline. + + 2. `devices`: ceph-volume inventory cache O(hosts) + + As soon as this is populated, it becomes more or less read-only. + + 3. `networks`: network interfaces for each host. O(hosts) + + This is needed in order to deploy MONs. As this is mostly read-only. + + 4. `last_client_files` O(hosts) + + Stores the last digest and owner/mode for files we've pushed to /etc/ceph + (ceph.conf or client keyrings). + + 5. `scheduled_daemon_actions`: O(daemons) + + Used to run daemon actions after deploying a daemon. We need to + store it persistently, in order to stay consistent across + MGR failovers. + """ + + def __init__(self, mgr): + # type: (CephadmOrchestrator) -> None + self.mgr: CephadmOrchestrator = mgr + self.daemons = {} # type: Dict[str, Dict[str, orchestrator.DaemonDescription]] + self._tmp_daemons = {} # type: Dict[str, Dict[str, orchestrator.DaemonDescription]] + self.last_daemon_update = {} # type: Dict[str, datetime.datetime] + self.devices = {} # type: Dict[str, List[inventory.Device]] + self.facts = {} # type: Dict[str, Dict[str, Any]] + self.last_facts_update = {} # type: Dict[str, datetime.datetime] + self.last_autotune = {} # type: Dict[str, datetime.datetime] + self.osdspec_previews = {} # type: Dict[str, List[Dict[str, Any]]] + self.osdspec_last_applied = {} # type: Dict[str, Dict[str, datetime.datetime]] + self.networks = {} # type: Dict[str, Dict[str, Dict[str, List[str]]]] + self.last_network_update = {} # type: Dict[str, datetime.datetime] + self.last_device_update = {} # type: Dict[str, datetime.datetime] + self.last_device_change = {} # type: Dict[str, datetime.datetime] + self.last_tuned_profile_update = {} # type: Dict[str, datetime.datetime] + self.daemon_refresh_queue = [] # type: List[str] + self.device_refresh_queue = [] # type: List[str] + self.network_refresh_queue = [] # type: List[str] + self.osdspec_previews_refresh_queue = [] # type: List[str] + + # host -> daemon name -> dict + self.daemon_config_deps = {} # type: Dict[str, Dict[str, Dict[str,Any]]] + self.last_host_check = {} # type: Dict[str, datetime.datetime] + self.loading_osdspec_preview = set() # type: Set[str] + self.last_client_files: Dict[str, Dict[str, Tuple[str, int, int, int]]] = {} + self.registry_login_queue: Set[str] = set() + + self.scheduled_daemon_actions: Dict[str, Dict[str, str]] = {} + + self.metadata_up_to_date = {} # type: Dict[str, bool] + + def load(self): + # type: () -> None + for k, v in self.mgr.get_store_prefix(HOST_CACHE_PREFIX).items(): + host = k[len(HOST_CACHE_PREFIX):] + if self._get_host_cache_entry_status(host) != HostCacheStatus.host: + if self._get_host_cache_entry_status(host) == HostCacheStatus.devices: + continue + self.mgr.log.warning('removing stray HostCache host record %s' % ( + host)) + self.mgr.set_store(k, None) + try: + j = json.loads(v) + if 'last_device_update' in j: + self.last_device_update[host] = str_to_datetime(j['last_device_update']) + else: + self.device_refresh_queue.append(host) + if 'last_device_change' in j: + self.last_device_change[host] = str_to_datetime(j['last_device_change']) + # for services, we ignore the persisted last_*_update + # and always trigger a new scrape on mgr restart. + self.daemon_refresh_queue.append(host) + self.network_refresh_queue.append(host) + self.daemons[host] = {} + self.osdspec_previews[host] = [] + self.osdspec_last_applied[host] = {} + self.networks[host] = {} + self.daemon_config_deps[host] = {} + for name, d in j.get('daemons', {}).items(): + self.daemons[host][name] = \ + orchestrator.DaemonDescription.from_json(d) + self.devices[host] = [] + # still want to check old device location for upgrade scenarios + for d in j.get('devices', []): + self.devices[host].append(inventory.Device.from_json(d)) + self.devices[host] += self.load_host_devices(host) + self.networks[host] = j.get('networks_and_interfaces', {}) + self.osdspec_previews[host] = j.get('osdspec_previews', {}) + self.last_client_files[host] = j.get('last_client_files', {}) + for name, ts in j.get('osdspec_last_applied', {}).items(): + self.osdspec_last_applied[host][name] = str_to_datetime(ts) + + for name, d in j.get('daemon_config_deps', {}).items(): + self.daemon_config_deps[host][name] = { + 'deps': d.get('deps', []), + 'last_config': str_to_datetime(d['last_config']), + } + if 'last_host_check' in j: + self.last_host_check[host] = str_to_datetime(j['last_host_check']) + if 'last_tuned_profile_update' in j: + self.last_tuned_profile_update[host] = str_to_datetime( + j['last_tuned_profile_update']) + self.registry_login_queue.add(host) + self.scheduled_daemon_actions[host] = j.get('scheduled_daemon_actions', {}) + self.metadata_up_to_date[host] = j.get('metadata_up_to_date', False) + + self.mgr.log.debug( + 'HostCache.load: host %s has %d daemons, ' + '%d devices, %d networks' % ( + host, len(self.daemons[host]), len(self.devices[host]), + len(self.networks[host]))) + except Exception as e: + self.mgr.log.warning('unable to load cached state for %s: %s' % ( + host, e)) + pass + + def _get_host_cache_entry_status(self, host: str) -> HostCacheStatus: + # return whether a host cache entry in the config-key + # store is for a host, a set of devices or is stray. + # for a host, the entry name will match a hostname in our + # inventory. For devices, it will be formatted + # <hostname>.devices.<integer> where <hostname> is + # in out inventory. If neither case applies, it is stray + if host in self.mgr.inventory: + return HostCacheStatus.host + try: + # try stripping off the ".devices.<integer>" and see if we get + # a host name that matches our inventory + actual_host = '.'.join(host.split('.')[:-2]) + return HostCacheStatus.devices if actual_host in self.mgr.inventory else HostCacheStatus.stray + except Exception: + return HostCacheStatus.stray + + def update_host_daemons(self, host, dm): + # type: (str, Dict[str, orchestrator.DaemonDescription]) -> None + self.daemons[host] = dm + self._tmp_daemons.pop(host, {}) + self.last_daemon_update[host] = datetime_now() + + def append_tmp_daemon(self, host: str, dd: orchestrator.DaemonDescription) -> None: + # for storing empty daemon descriptions representing daemons we have + # just deployed but not yet had the chance to pick up in a daemon refresh + # _tmp_daemons is cleared for a host upon receiving a real update of the + # host's dameons + if host not in self._tmp_daemons: + self._tmp_daemons[host] = {} + self._tmp_daemons[host][dd.name()] = dd + + def update_host_facts(self, host, facts): + # type: (str, Dict[str, Dict[str, Any]]) -> None + self.facts[host] = facts + hostnames: List[str] = [] + for k in ['hostname', 'shortname', 'fqdn']: + v = facts.get(k, '') + hostnames.append(v if isinstance(v, str) else '') + self.mgr.inventory.update_known_hostnames(hostnames[0], hostnames[1], hostnames[2]) + self.last_facts_update[host] = datetime_now() + + def update_autotune(self, host: str) -> None: + self.last_autotune[host] = datetime_now() + + def invalidate_autotune(self, host: str) -> None: + if host in self.last_autotune: + del self.last_autotune[host] + + def devices_changed(self, host: str, b: List[inventory.Device]) -> bool: + old_devs = inventory.Devices(self.devices[host]) + new_devs = inventory.Devices(b) + # relying on Devices class __eq__ function here + if old_devs != new_devs: + self.mgr.log.info("Detected new or changed devices on %s" % host) + return True + return False + + def update_host_devices( + self, + host: str, + dls: List[inventory.Device], + ) -> None: + if ( + host not in self.devices + or host not in self.last_device_change + or self.devices_changed(host, dls) + ): + self.last_device_change[host] = datetime_now() + self.last_device_update[host] = datetime_now() + self.devices[host] = dls + + def update_host_networks( + self, + host: str, + nets: Dict[str, Dict[str, List[str]]] + ) -> None: + self.networks[host] = nets + self.last_network_update[host] = datetime_now() + + def update_daemon_config_deps(self, host: str, name: str, deps: List[str], stamp: datetime.datetime) -> None: + self.daemon_config_deps[host][name] = { + 'deps': deps, + 'last_config': stamp, + } + + def update_last_host_check(self, host): + # type: (str) -> None + self.last_host_check[host] = datetime_now() + + def update_osdspec_last_applied(self, host, service_name, ts): + # type: (str, str, datetime.datetime) -> None + self.osdspec_last_applied[host][service_name] = ts + + def update_client_file(self, + host: str, + path: str, + digest: str, + mode: int, + uid: int, + gid: int) -> None: + if host not in self.last_client_files: + self.last_client_files[host] = {} + self.last_client_files[host][path] = (digest, mode, uid, gid) + + def removed_client_file(self, host: str, path: str) -> None: + if ( + host in self.last_client_files + and path in self.last_client_files[host] + ): + del self.last_client_files[host][path] + + def prime_empty_host(self, host): + # type: (str) -> None + """ + Install an empty entry for a host + """ + self.daemons[host] = {} + self.devices[host] = [] + self.networks[host] = {} + self.osdspec_previews[host] = [] + self.osdspec_last_applied[host] = {} + self.daemon_config_deps[host] = {} + self.daemon_refresh_queue.append(host) + self.device_refresh_queue.append(host) + self.network_refresh_queue.append(host) + self.osdspec_previews_refresh_queue.append(host) + self.registry_login_queue.add(host) + self.last_client_files[host] = {} + + def refresh_all_host_info(self, host): + # type: (str) -> None + + self.last_host_check.pop(host, None) + self.daemon_refresh_queue.append(host) + self.registry_login_queue.add(host) + self.device_refresh_queue.append(host) + self.last_facts_update.pop(host, None) + self.osdspec_previews_refresh_queue.append(host) + self.last_autotune.pop(host, None) + + def invalidate_host_daemons(self, host): + # type: (str) -> None + self.daemon_refresh_queue.append(host) + if host in self.last_daemon_update: + del self.last_daemon_update[host] + self.mgr.event.set() + + def invalidate_host_devices(self, host): + # type: (str) -> None + self.device_refresh_queue.append(host) + if host in self.last_device_update: + del self.last_device_update[host] + self.mgr.event.set() + + def invalidate_host_networks(self, host): + # type: (str) -> None + self.network_refresh_queue.append(host) + if host in self.last_network_update: + del self.last_network_update[host] + self.mgr.event.set() + + def distribute_new_registry_login_info(self) -> None: + self.registry_login_queue = set(self.mgr.inventory.keys()) + + def save_host(self, host: str) -> None: + j: Dict[str, Any] = { + 'daemons': {}, + 'devices': [], + 'osdspec_previews': [], + 'osdspec_last_applied': {}, + 'daemon_config_deps': {}, + } + if host in self.last_daemon_update: + j['last_daemon_update'] = datetime_to_str(self.last_daemon_update[host]) + if host in self.last_device_update: + j['last_device_update'] = datetime_to_str(self.last_device_update[host]) + if host in self.last_network_update: + j['last_network_update'] = datetime_to_str(self.last_network_update[host]) + if host in self.last_device_change: + j['last_device_change'] = datetime_to_str(self.last_device_change[host]) + if host in self.last_tuned_profile_update: + j['last_tuned_profile_update'] = datetime_to_str(self.last_tuned_profile_update[host]) + if host in self.daemons: + for name, dd in self.daemons[host].items(): + j['daemons'][name] = dd.to_json() + if host in self.networks: + j['networks_and_interfaces'] = self.networks[host] + if host in self.daemon_config_deps: + for name, depi in self.daemon_config_deps[host].items(): + j['daemon_config_deps'][name] = { + 'deps': depi.get('deps', []), + 'last_config': datetime_to_str(depi['last_config']), + } + if host in self.osdspec_previews and self.osdspec_previews[host]: + j['osdspec_previews'] = self.osdspec_previews[host] + if host in self.osdspec_last_applied: + for name, ts in self.osdspec_last_applied[host].items(): + j['osdspec_last_applied'][name] = datetime_to_str(ts) + + if host in self.last_host_check: + j['last_host_check'] = datetime_to_str(self.last_host_check[host]) + + if host in self.last_client_files: + j['last_client_files'] = self.last_client_files[host] + if host in self.scheduled_daemon_actions: + j['scheduled_daemon_actions'] = self.scheduled_daemon_actions[host] + if host in self.metadata_up_to_date: + j['metadata_up_to_date'] = self.metadata_up_to_date[host] + if host in self.devices: + self.save_host_devices(host) + + self.mgr.set_store(HOST_CACHE_PREFIX + host, json.dumps(j)) + + def save_host_devices(self, host: str) -> None: + if host not in self.devices or not self.devices[host]: + logger.debug(f'Host {host} has no devices to save') + return + + devs: List[Dict[str, Any]] = [] + for d in self.devices[host]: + devs.append(d.to_json()) + + def byte_len(s: str) -> int: + return len(s.encode('utf-8')) + + dev_cache_counter: int = 0 + cache_size: int = self.mgr.get_foreign_ceph_option('mon', 'mon_config_key_max_entry_size') + if cache_size is not None and cache_size != 0 and byte_len(json.dumps(devs)) > cache_size - 1024: + # no guarantee all device entries take up the same amount of space + # splitting it up so there's one more entry than we need should be fairly + # safe and save a lot of extra logic checking sizes + cache_entries_needed = math.ceil(byte_len(json.dumps(devs)) / cache_size) + 1 + dev_sublist_size = math.ceil(len(devs) / cache_entries_needed) + dev_lists: List[List[Dict[str, Any]]] = [devs[i:i + dev_sublist_size] + for i in range(0, len(devs), dev_sublist_size)] + for dev_list in dev_lists: + dev_dict: Dict[str, Any] = {'devices': dev_list} + if dev_cache_counter == 0: + dev_dict.update({'entries': len(dev_lists)}) + self.mgr.set_store(HOST_CACHE_PREFIX + host + '.devices.' + + str(dev_cache_counter), json.dumps(dev_dict)) + dev_cache_counter += 1 + else: + self.mgr.set_store(HOST_CACHE_PREFIX + host + '.devices.' + + str(dev_cache_counter), json.dumps({'devices': devs, 'entries': 1})) + + def load_host_devices(self, host: str) -> List[inventory.Device]: + dev_cache_counter: int = 0 + devs: List[Dict[str, Any]] = [] + dev_entries: int = 0 + try: + # number of entries for the host's devices should be in + # the "entries" field of the first entry + dev_entries = json.loads(self.mgr.get_store( + HOST_CACHE_PREFIX + host + '.devices.0')).get('entries') + except Exception: + logger.debug(f'No device entries found for host {host}') + for i in range(dev_entries): + try: + new_devs = json.loads(self.mgr.get_store( + HOST_CACHE_PREFIX + host + '.devices.' + str(i))).get('devices', []) + if len(new_devs) > 0: + # verify list contains actual device objects by trying to load one from json + inventory.Device.from_json(new_devs[0]) + # if we didn't throw an Exception on above line, we can add the devices + devs = devs + new_devs + dev_cache_counter += 1 + except Exception as e: + logger.error(('Hit exception trying to load devices from ' + + f'{HOST_CACHE_PREFIX + host + ".devices." + str(dev_cache_counter)} in key store: {e}')) + return [] + return [inventory.Device.from_json(d) for d in devs] + + def rm_host(self, host): + # type: (str) -> None + if host in self.daemons: + del self.daemons[host] + if host in self.devices: + del self.devices[host] + if host in self.facts: + del self.facts[host] + if host in self.last_facts_update: + del self.last_facts_update[host] + if host in self.last_autotune: + del self.last_autotune[host] + if host in self.osdspec_previews: + del self.osdspec_previews[host] + if host in self.osdspec_last_applied: + del self.osdspec_last_applied[host] + if host in self.loading_osdspec_preview: + self.loading_osdspec_preview.remove(host) + if host in self.networks: + del self.networks[host] + if host in self.last_daemon_update: + del self.last_daemon_update[host] + if host in self.last_device_update: + del self.last_device_update[host] + if host in self.last_network_update: + del self.last_network_update[host] + if host in self.last_device_change: + del self.last_device_change[host] + if host in self.last_tuned_profile_update: + del self.last_tuned_profile_update[host] + if host in self.daemon_config_deps: + del self.daemon_config_deps[host] + if host in self.scheduled_daemon_actions: + del self.scheduled_daemon_actions[host] + if host in self.last_client_files: + del self.last_client_files[host] + self.mgr.set_store(HOST_CACHE_PREFIX + host, None) + + def get_hosts(self): + # type: () -> List[str] + return list(self.daemons) + + def get_schedulable_hosts(self) -> List[HostSpec]: + """ + Returns all usable hosts that went through _refresh_host_daemons(). + + This mitigates a potential race, where new host was added *after* + ``_refresh_host_daemons()`` was called, but *before* + ``_apply_all_specs()`` was called. thus we end up with a hosts + where daemons might be running, but we have not yet detected them. + """ + return [ + h for h in self.mgr.inventory.all_specs() + if ( + self.host_had_daemon_refresh(h.hostname) + and SpecialHostLabels.DRAIN_DAEMONS not in h.labels + ) + ] + + def get_conf_keyring_available_hosts(self) -> List[HostSpec]: + """ + Returns all hosts without the drain conf and keyrings + label (SpecialHostLabels.DRAIN_CONF_KEYRING) that have + had a refresh. That is equivalent to all hosts we + consider eligible for deployment of conf and keyring files + + Any host without that label is considered fair game for + a client keyring spec to match. However, we want to still + wait for refresh here so that we know what keyrings we've + already deployed here + """ + return [ + h for h in self.mgr.inventory.all_specs() + if ( + self.host_had_daemon_refresh(h.hostname) + and SpecialHostLabels.DRAIN_CONF_KEYRING not in h.labels + ) + ] + + def get_non_draining_hosts(self) -> List[HostSpec]: + """ + Returns all hosts that do not have drain daemon label + (SpecialHostLabels.DRAIN_DAEMONS). + + Useful for the agent who needs this specific list rather than the + schedulable_hosts since the agent needs to be deployed on hosts with + no daemon refresh + """ + return [ + h for h in self.mgr.inventory.all_specs() if SpecialHostLabels.DRAIN_DAEMONS not in h.labels + ] + + def get_draining_hosts(self) -> List[HostSpec]: + """ + Returns all hosts that have the drain daemons label (SpecialHostLabels.DRAIN_DAEMONS) + and therefore should have no daemons placed on them, but are potentially still reachable + """ + return [ + h for h in self.mgr.inventory.all_specs() if SpecialHostLabels.DRAIN_DAEMONS in h.labels + ] + + def get_conf_keyring_draining_hosts(self) -> List[HostSpec]: + """ + Returns all hosts that have drain conf and keyrings label (SpecialHostLabels.DRAIN_CONF_KEYRING) + and therefore should have no config files or client keyring placed on them, but are + potentially still reachable + """ + return [ + h for h in self.mgr.inventory.all_specs() if SpecialHostLabels.DRAIN_CONF_KEYRING in h.labels + ] + + def get_unreachable_hosts(self) -> List[HostSpec]: + """ + Return all hosts that are offline or in maintenance mode. + + The idea is we should not touch the daemons on these hosts (since + in theory the hosts are inaccessible so we CAN'T touch them) but + we still want to count daemons that exist on these hosts toward the + placement so daemons on these hosts aren't just moved elsewhere + """ + return [ + h for h in self.mgr.inventory.all_specs() + if ( + h.status.lower() in ['maintenance', 'offline'] + or h.hostname in self.mgr.offline_hosts + ) + ] + + def is_host_unreachable(self, hostname: str) -> bool: + # take hostname and return if it matches the hostname of an unreachable host + return hostname in [h.hostname for h in self.get_unreachable_hosts()] + + def is_host_schedulable(self, hostname: str) -> bool: + # take hostname and return if it matches the hostname of a schedulable host + return hostname in [h.hostname for h in self.get_schedulable_hosts()] + + def is_host_draining(self, hostname: str) -> bool: + # take hostname and return if it matches the hostname of a draining host + return hostname in [h.hostname for h in self.get_draining_hosts()] + + def get_facts(self, host: str) -> Dict[str, Any]: + return self.facts.get(host, {}) + + def _get_daemons(self) -> Iterator[orchestrator.DaemonDescription]: + for dm in self.daemons.copy().values(): + yield from dm.values() + + def _get_tmp_daemons(self) -> Iterator[orchestrator.DaemonDescription]: + for dm in self._tmp_daemons.copy().values(): + yield from dm.values() + + def get_daemons(self): + # type: () -> List[orchestrator.DaemonDescription] + return list(self._get_daemons()) + + def get_error_daemons(self) -> List[orchestrator.DaemonDescription]: + r = [] + for dd in self._get_daemons(): + if dd.status is not None and dd.status == orchestrator.DaemonDescriptionStatus.error: + r.append(dd) + return r + + def get_daemons_by_host(self, host: str) -> List[orchestrator.DaemonDescription]: + return list(self.daemons.get(host, {}).values()) + + def get_daemon(self, daemon_name: str, host: Optional[str] = None) -> orchestrator.DaemonDescription: + assert not daemon_name.startswith('ha-rgw.') + dds = self.get_daemons_by_host(host) if host else self._get_daemons() + for dd in dds: + if dd.name() == daemon_name: + return dd + + raise orchestrator.OrchestratorError(f'Unable to find {daemon_name} daemon(s)') + + def has_daemon(self, daemon_name: str, host: Optional[str] = None) -> bool: + try: + self.get_daemon(daemon_name, host) + except orchestrator.OrchestratorError: + return False + return True + + def get_daemons_with_volatile_status(self) -> Iterator[Tuple[str, Dict[str, orchestrator.DaemonDescription]]]: + def alter(host: str, dd_orig: orchestrator.DaemonDescription) -> orchestrator.DaemonDescription: + dd = copy(dd_orig) + if host in self.mgr.offline_hosts: + dd.status = orchestrator.DaemonDescriptionStatus.error + dd.status_desc = 'host is offline' + elif self.mgr.inventory._inventory[host].get("status", "").lower() == "maintenance": + # We do not refresh daemons on hosts in maintenance mode, so stored daemon statuses + # could be wrong. We must assume maintenance is working and daemons are stopped + dd.status = orchestrator.DaemonDescriptionStatus.stopped + dd.events = self.mgr.events.get_for_daemon(dd.name()) + return dd + + for host, dm in self.daemons.copy().items(): + yield host, {name: alter(host, d) for name, d in dm.items()} + + def get_daemons_by_service(self, service_name): + # type: (str) -> List[orchestrator.DaemonDescription] + assert not service_name.startswith('keepalived.') + assert not service_name.startswith('haproxy.') + + return list(dd for dd in self._get_daemons() if dd.service_name() == service_name) + + def get_related_service_daemons(self, service_spec: ServiceSpec) -> Optional[List[orchestrator.DaemonDescription]]: + if service_spec.service_type == 'ingress': + dds = list(dd for dd in self._get_daemons() if dd.service_name() == cast(IngressSpec, service_spec).backend_service) + dds += list(dd for dd in self._get_tmp_daemons() if dd.service_name() == cast(IngressSpec, service_spec).backend_service) + logger.debug(f'Found related daemons {dds} for service {service_spec.service_name()}') + return dds + else: + for ingress_spec in [cast(IngressSpec, s) for s in self.mgr.spec_store.active_specs.values() if s.service_type == 'ingress']: + if ingress_spec.backend_service == service_spec.service_name(): + dds = list(dd for dd in self._get_daemons() if dd.service_name() == ingress_spec.service_name()) + dds += list(dd for dd in self._get_tmp_daemons() if dd.service_name() == ingress_spec.service_name()) + logger.debug(f'Found related daemons {dds} for service {service_spec.service_name()}') + return dds + return None + + def get_daemons_by_type(self, service_type: str, host: str = '') -> List[orchestrator.DaemonDescription]: + assert service_type not in ['keepalived', 'haproxy'] + + daemons = self.daemons[host].values() if host else self._get_daemons() + + return [d for d in daemons if d.daemon_type in service_to_daemon_types(service_type)] + + def get_daemon_types(self, hostname: str) -> Set[str]: + """Provide a list of the types of daemons on the host""" + return cast(Set[str], {d.daemon_type for d in self.daemons[hostname].values()}) + + def get_daemon_names(self): + # type: () -> List[str] + return [d.name() for d in self._get_daemons()] + + def get_daemon_last_config_deps(self, host: str, name: str) -> Tuple[Optional[List[str]], Optional[datetime.datetime]]: + if host in self.daemon_config_deps: + if name in self.daemon_config_deps[host]: + return self.daemon_config_deps[host][name].get('deps', []), \ + self.daemon_config_deps[host][name].get('last_config', None) + return None, None + + def get_host_client_files(self, host: str) -> Dict[str, Tuple[str, int, int, int]]: + return self.last_client_files.get(host, {}) + + def host_needs_daemon_refresh(self, host): + # type: (str) -> bool + if host in self.mgr.offline_hosts: + logger.debug(f'Host "{host}" marked as offline. Skipping daemon refresh') + return False + if host in self.daemon_refresh_queue: + self.daemon_refresh_queue.remove(host) + return True + cutoff = datetime_now() - datetime.timedelta( + seconds=self.mgr.daemon_cache_timeout) + if host not in self.last_daemon_update or self.last_daemon_update[host] < cutoff: + return True + if not self.mgr.cache.host_metadata_up_to_date(host): + return True + return False + + def host_needs_facts_refresh(self, host): + # type: (str) -> bool + if host in self.mgr.offline_hosts: + logger.debug(f'Host "{host}" marked as offline. Skipping gather facts refresh') + return False + cutoff = datetime_now() - datetime.timedelta( + seconds=self.mgr.facts_cache_timeout) + if host not in self.last_facts_update or self.last_facts_update[host] < cutoff: + return True + if not self.mgr.cache.host_metadata_up_to_date(host): + return True + return False + + def host_needs_autotune_memory(self, host): + # type: (str) -> bool + if host in self.mgr.offline_hosts: + logger.debug(f'Host "{host}" marked as offline. Skipping autotune') + return False + cutoff = datetime_now() - datetime.timedelta( + seconds=self.mgr.autotune_interval) + if host not in self.last_autotune or self.last_autotune[host] < cutoff: + return True + return False + + def host_needs_tuned_profile_update(self, host: str, profile: str) -> bool: + if host in self.mgr.offline_hosts: + logger.debug(f'Host "{host}" marked as offline. Cannot apply tuned profile') + return False + if profile not in self.mgr.tuned_profiles: + logger.debug( + f'Cannot apply tuned profile {profile} on host {host}. Profile does not exist') + return False + if host not in self.last_tuned_profile_update: + return True + last_profile_update = self.mgr.tuned_profiles.last_updated(profile) + if last_profile_update is None: + self.mgr.tuned_profiles.set_last_updated(profile, datetime_now()) + return True + if self.last_tuned_profile_update[host] < last_profile_update: + return True + return False + + def host_had_daemon_refresh(self, host: str) -> bool: + """ + ... at least once. + """ + if host in self.last_daemon_update: + return True + if host not in self.daemons: + return False + return bool(self.daemons[host]) + + def host_needs_device_refresh(self, host): + # type: (str) -> bool + if host in self.mgr.offline_hosts: + logger.debug(f'Host "{host}" marked as offline. Skipping device refresh') + return False + if host in self.device_refresh_queue: + self.device_refresh_queue.remove(host) + return True + cutoff = datetime_now() - datetime.timedelta( + seconds=self.mgr.device_cache_timeout) + if host not in self.last_device_update or self.last_device_update[host] < cutoff: + return True + if not self.mgr.cache.host_metadata_up_to_date(host): + return True + return False + + def host_needs_network_refresh(self, host): + # type: (str) -> bool + if host in self.mgr.offline_hosts: + logger.debug(f'Host "{host}" marked as offline. Skipping network refresh') + return False + if host in self.network_refresh_queue: + self.network_refresh_queue.remove(host) + return True + cutoff = datetime_now() - datetime.timedelta( + seconds=self.mgr.device_cache_timeout) + if host not in self.last_network_update or self.last_network_update[host] < cutoff: + return True + if not self.mgr.cache.host_metadata_up_to_date(host): + return True + return False + + def host_needs_osdspec_preview_refresh(self, host: str) -> bool: + if host in self.mgr.offline_hosts: + logger.debug(f'Host "{host}" marked as offline. Skipping osdspec preview refresh') + return False + if host in self.osdspec_previews_refresh_queue: + self.osdspec_previews_refresh_queue.remove(host) + return True + # Since this is dependent on other factors (device and spec) this does not need + # to be updated periodically. + return False + + def host_needs_check(self, host): + # type: (str) -> bool + cutoff = datetime_now() - datetime.timedelta( + seconds=self.mgr.host_check_interval) + return host not in self.last_host_check or self.last_host_check[host] < cutoff + + def osdspec_needs_apply(self, host: str, spec: ServiceSpec) -> bool: + if ( + host not in self.devices + or host not in self.last_device_change + or host not in self.last_device_update + or host not in self.osdspec_last_applied + or spec.service_name() not in self.osdspec_last_applied[host] + ): + return True + created = self.mgr.spec_store.get_created(spec) + if not created or created > self.last_device_change[host]: + return True + return self.osdspec_last_applied[host][spec.service_name()] < self.last_device_change[host] + + def host_needs_registry_login(self, host: str) -> bool: + if host in self.mgr.offline_hosts: + return False + if host in self.registry_login_queue: + self.registry_login_queue.remove(host) + return True + return False + + def host_metadata_up_to_date(self, host: str) -> bool: + if host not in self.metadata_up_to_date or not self.metadata_up_to_date[host]: + return False + return True + + def all_host_metadata_up_to_date(self) -> bool: + if [h for h in self.get_hosts() if (not self.host_metadata_up_to_date(h) and not self.is_host_unreachable(h))]: + # this function is primarily for telling if it's safe to try and apply a service + # spec. Since offline/maintenance hosts aren't considered in that process anyway + # we don't want to return False if the host without up-to-date metadata is in one + # of those two categories. + return False + return True + + def add_daemon(self, host, dd): + # type: (str, orchestrator.DaemonDescription) -> None + assert host in self.daemons + self.daemons[host][dd.name()] = dd + + def rm_daemon(self, host: str, name: str) -> None: + assert not name.startswith('ha-rgw.') + + if host in self.daemons: + if name in self.daemons[host]: + del self.daemons[host][name] + + def daemon_cache_filled(self) -> bool: + """ + i.e. we have checked the daemons for each hosts at least once. + excluding offline hosts. + + We're not checking for `host_needs_daemon_refresh`, as this might never be + False for all hosts. + """ + return all((self.host_had_daemon_refresh(h) or h in self.mgr.offline_hosts) + for h in self.get_hosts()) + + def schedule_daemon_action(self, host: str, daemon_name: str, action: str) -> None: + assert not daemon_name.startswith('ha-rgw.') + + priorities = { + 'start': 1, + 'restart': 2, + 'reconfig': 3, + 'redeploy': 4, + 'stop': 5, + 'rotate-key': 6, + } + existing_action = self.scheduled_daemon_actions.get(host, {}).get(daemon_name, None) + if existing_action and priorities[existing_action] > priorities[action]: + logger.debug( + f'skipping {action}ing {daemon_name}, cause {existing_action} already scheduled.') + return + + if host not in self.scheduled_daemon_actions: + self.scheduled_daemon_actions[host] = {} + self.scheduled_daemon_actions[host][daemon_name] = action + + def rm_scheduled_daemon_action(self, host: str, daemon_name: str) -> bool: + found = False + if host in self.scheduled_daemon_actions: + if daemon_name in self.scheduled_daemon_actions[host]: + del self.scheduled_daemon_actions[host][daemon_name] + found = True + if not self.scheduled_daemon_actions[host]: + del self.scheduled_daemon_actions[host] + return found + + def get_scheduled_daemon_action(self, host: str, daemon: str) -> Optional[str]: + assert not daemon.startswith('ha-rgw.') + + return self.scheduled_daemon_actions.get(host, {}).get(daemon) + + +class AgentCache(): + """ + AgentCache is used for storing metadata about agent daemons that must be kept + through MGR failovers + """ + + def __init__(self, mgr): + # type: (CephadmOrchestrator) -> None + self.mgr: CephadmOrchestrator = mgr + self.agent_config_deps = {} # type: Dict[str, Dict[str,Any]] + self.agent_counter = {} # type: Dict[str, int] + self.agent_timestamp = {} # type: Dict[str, datetime.datetime] + self.agent_keys = {} # type: Dict[str, str] + self.agent_ports = {} # type: Dict[str, int] + self.sending_agent_message = {} # type: Dict[str, bool] + + def load(self): + # type: () -> None + for k, v in self.mgr.get_store_prefix(AGENT_CACHE_PREFIX).items(): + host = k[len(AGENT_CACHE_PREFIX):] + if host not in self.mgr.inventory: + self.mgr.log.warning('removing stray AgentCache record for agent on %s' % ( + host)) + self.mgr.set_store(k, None) + try: + j = json.loads(v) + self.agent_config_deps[host] = {} + conf_deps = j.get('agent_config_deps', {}) + if conf_deps: + conf_deps['last_config'] = str_to_datetime(conf_deps['last_config']) + self.agent_config_deps[host] = conf_deps + self.agent_counter[host] = int(j.get('agent_counter', 1)) + self.agent_timestamp[host] = str_to_datetime( + j.get('agent_timestamp', datetime_to_str(datetime_now()))) + self.agent_keys[host] = str(j.get('agent_keys', '')) + agent_port = int(j.get('agent_ports', 0)) + if agent_port: + self.agent_ports[host] = agent_port + + except Exception as e: + self.mgr.log.warning('unable to load cached state for agent on host %s: %s' % ( + host, e)) + pass + + def save_agent(self, host: str) -> None: + j: Dict[str, Any] = {} + if host in self.agent_config_deps: + j['agent_config_deps'] = { + 'deps': self.agent_config_deps[host].get('deps', []), + 'last_config': datetime_to_str(self.agent_config_deps[host]['last_config']), + } + if host in self.agent_counter: + j['agent_counter'] = self.agent_counter[host] + if host in self.agent_keys: + j['agent_keys'] = self.agent_keys[host] + if host in self.agent_ports: + j['agent_ports'] = self.agent_ports[host] + if host in self.agent_timestamp: + j['agent_timestamp'] = datetime_to_str(self.agent_timestamp[host]) + + self.mgr.set_store(AGENT_CACHE_PREFIX + host, json.dumps(j)) + + def update_agent_config_deps(self, host: str, deps: List[str], stamp: datetime.datetime) -> None: + self.agent_config_deps[host] = { + 'deps': deps, + 'last_config': stamp, + } + + def get_agent_last_config_deps(self, host: str) -> Tuple[Optional[List[str]], Optional[datetime.datetime]]: + if host in self.agent_config_deps: + return self.agent_config_deps[host].get('deps', []), \ + self.agent_config_deps[host].get('last_config', None) + return None, None + + def messaging_agent(self, host: str) -> bool: + if host not in self.sending_agent_message or not self.sending_agent_message[host]: + return False + return True + + def agent_config_successfully_delivered(self, daemon_spec: CephadmDaemonDeploySpec) -> None: + # agent successfully received new config. Update config/deps + assert daemon_spec.service_name == 'agent' + self.update_agent_config_deps( + daemon_spec.host, daemon_spec.deps, datetime_now()) + self.agent_timestamp[daemon_spec.host] = datetime_now() + self.agent_counter[daemon_spec.host] = 1 + self.save_agent(daemon_spec.host) + + +class EventStore(): + def __init__(self, mgr): + # type: (CephadmOrchestrator) -> None + self.mgr: CephadmOrchestrator = mgr + self.events = {} # type: Dict[str, List[OrchestratorEvent]] + + def add(self, event: OrchestratorEvent) -> None: + + if event.kind_subject() not in self.events: + self.events[event.kind_subject()] = [event] + + for e in self.events[event.kind_subject()]: + if e.message == event.message: + return + + self.events[event.kind_subject()].append(event) + + # limit to five events for now. + self.events[event.kind_subject()] = self.events[event.kind_subject()][-5:] + + def for_service(self, spec: ServiceSpec, level: str, message: str) -> None: + e = OrchestratorEvent(datetime_now(), 'service', + spec.service_name(), level, message) + self.add(e) + + def from_orch_error(self, e: OrchestratorError) -> None: + if e.event_subject is not None: + self.add(OrchestratorEvent( + datetime_now(), + e.event_subject[0], + e.event_subject[1], + "ERROR", + str(e) + )) + + def for_daemon(self, daemon_name: str, level: str, message: str) -> None: + e = OrchestratorEvent(datetime_now(), 'daemon', daemon_name, level, message) + self.add(e) + + def for_daemon_from_exception(self, daemon_name: str, e: Exception) -> None: + self.for_daemon( + daemon_name, + "ERROR", + str(e) + ) + + def cleanup(self) -> None: + # Needs to be properly done, in case events are persistently stored. + + unknowns: List[str] = [] + daemons = self.mgr.cache.get_daemon_names() + specs = self.mgr.spec_store.all_specs.keys() + for k_s, v in self.events.items(): + kind, subject = k_s.split(':') + if kind == 'service': + if subject not in specs: + unknowns.append(k_s) + elif kind == 'daemon': + if subject not in daemons: + unknowns.append(k_s) + + for k_s in unknowns: + del self.events[k_s] + + def get_for_service(self, name: str) -> List[OrchestratorEvent]: + return self.events.get('service:' + name, []) + + def get_for_daemon(self, name: str) -> List[OrchestratorEvent]: + return self.events.get('daemon:' + name, []) diff --git a/src/pybind/mgr/cephadm/migrations.py b/src/pybind/mgr/cephadm/migrations.py new file mode 100644 index 000000000..27f777af6 --- /dev/null +++ b/src/pybind/mgr/cephadm/migrations.py @@ -0,0 +1,441 @@ +import json +import re +import logging +from typing import TYPE_CHECKING, Iterator, Optional, Dict, Any, List + +from ceph.deployment.service_spec import PlacementSpec, ServiceSpec, HostPlacementSpec, RGWSpec +from cephadm.schedule import HostAssignment +from cephadm.utils import SpecialHostLabels +import rados + +from mgr_module import NFS_POOL_NAME +from orchestrator import OrchestratorError, DaemonDescription + +if TYPE_CHECKING: + from .module import CephadmOrchestrator + +LAST_MIGRATION = 6 + +logger = logging.getLogger(__name__) + + +class Migrations: + def __init__(self, mgr: "CephadmOrchestrator"): + self.mgr = mgr + + # Why having a global counter, instead of spec versions? + # + # for the first migration: + # The specs don't change in (this) migration. but the scheduler here. + # Adding the version to the specs at this time just felt wrong to me. + # + # And the specs are only another part of cephadm which needs potential upgrades. + # We have the cache, the inventory, the config store, the upgrade (imagine changing the + # upgrade code, while an old upgrade is still in progress), naming of daemons, + # fs-layout of the daemons, etc. + self.set_sane_migration_current() + + v = mgr.get_store('nfs_migration_queue') + self.nfs_migration_queue = json.loads(v) if v else [] + + r = mgr.get_store('rgw_migration_queue') + self.rgw_migration_queue = json.loads(r) if r else [] + + # for some migrations, we don't need to do anything except for + # incrementing migration_current. + # let's try to shortcut things here. + self.migrate(True) + + def set(self, val: int) -> None: + self.mgr.set_module_option('migration_current', val) + self.mgr.migration_current = val + + def set_sane_migration_current(self) -> None: + # migration current should always be an integer + # between 0 and LAST_MIGRATION (inclusive) in order to + # actually carry out migration. If we find + # it is None or too high of a value here we should + # set it to some sane value + mc: Optional[int] = self.mgr.migration_current + if mc is None: + logger.info('Found migration_current of "None". Setting to last migration.') + self.set(LAST_MIGRATION) + return + + if mc > LAST_MIGRATION: + logger.error(f'Found migration_current of {mc} when max should be {LAST_MIGRATION}. Setting back to 0.') + # something has gone wrong and caused migration_current + # to be higher than it should be able to be. Best option + # we have here is to just set it back to 0 + self.set(0) + + def is_migration_ongoing(self) -> bool: + self.set_sane_migration_current() + mc: Optional[int] = self.mgr.migration_current + return mc is None or mc < LAST_MIGRATION + + def verify_no_migration(self) -> None: + if self.is_migration_ongoing(): + # this is raised in module.serve() + raise OrchestratorError( + "cephadm migration still ongoing. Please wait, until the migration is complete.") + + def migrate(self, startup: bool = False) -> None: + if self.mgr.migration_current == 0: + if self.migrate_0_1(): + self.set(1) + + if self.mgr.migration_current == 1: + if self.migrate_1_2(): + self.set(2) + + if self.mgr.migration_current == 2 and not startup: + if self.migrate_2_3(): + self.set(3) + + if self.mgr.migration_current == 3: + if self.migrate_3_4(): + self.set(4) + + if self.mgr.migration_current == 4: + if self.migrate_4_5(): + self.set(5) + + if self.mgr.migration_current == 5: + if self.migrate_5_6(): + self.set(6) + + def migrate_0_1(self) -> bool: + """ + Migration 0 -> 1 + New scheduler that takes PlacementSpec as the bound and not as recommendation. + I.e. the new scheduler won't suggest any new placements outside of the hosts + specified by label etc. + + Which means, we have to make sure, we're not removing any daemons directly after + upgrading to the new scheduler. + + There is a potential race here: + 1. user updates his spec to remove daemons + 2. mgr gets upgraded to new scheduler, before the old scheduler removed the daemon + 3. now, we're converting the spec to explicit placement, thus reverting (1.) + I think this is ok. + """ + + def interesting_specs() -> Iterator[ServiceSpec]: + for s in self.mgr.spec_store.all_specs.values(): + if s.unmanaged: + continue + p = s.placement + if p is None: + continue + if p.count is None: + continue + if not p.hosts and not p.host_pattern and not p.label: + continue + yield s + + def convert_to_explicit(spec: ServiceSpec) -> None: + existing_daemons = self.mgr.cache.get_daemons_by_service(spec.service_name()) + placements, to_add, to_remove = HostAssignment( + spec=spec, + hosts=self.mgr.inventory.all_specs(), + unreachable_hosts=self.mgr.cache.get_unreachable_hosts(), + draining_hosts=self.mgr.cache.get_draining_hosts(), + daemons=existing_daemons, + ).place() + + # We have to migrate, only if the new scheduler would remove daemons + if len(placements) >= len(existing_daemons): + return + + def to_hostname(d: DaemonDescription) -> HostPlacementSpec: + if d.hostname in old_hosts: + return old_hosts[d.hostname] + else: + assert d.hostname + return HostPlacementSpec(d.hostname, '', '') + + old_hosts = {h.hostname: h for h in spec.placement.hosts} + new_hosts = [to_hostname(d) for d in existing_daemons] + + new_placement = PlacementSpec( + hosts=new_hosts, + count=spec.placement.count + ) + + new_spec = ServiceSpec.from_json(spec.to_json()) + new_spec.placement = new_placement + + logger.info(f"Migrating {spec.one_line_str()} to explicit placement") + + self.mgr.spec_store.save(new_spec) + + specs = list(interesting_specs()) + if not specs: + return True # nothing to do. shortcut + + if not self.mgr.cache.daemon_cache_filled(): + logger.info("Unable to migrate yet. Daemon Cache still incomplete.") + return False + + for spec in specs: + convert_to_explicit(spec) + + return True + + def migrate_1_2(self) -> bool: + """ + After 15.2.4, we unified some service IDs: MONs, MGRs etc no longer have a service id. + Which means, the service names changed: + + mon.foo -> mon + mgr.foo -> mgr + + This fixes the data structure consistency + """ + bad_specs = {} + for name, spec in self.mgr.spec_store.all_specs.items(): + if name != spec.service_name(): + bad_specs[name] = (spec.service_name(), spec) + + for old, (new, old_spec) in bad_specs.items(): + if new not in self.mgr.spec_store.all_specs: + spec = old_spec + else: + spec = self.mgr.spec_store.all_specs[new] + spec.unmanaged = True + self.mgr.spec_store.save(spec) + self.mgr.spec_store.finally_rm(old) + + return True + + def migrate_2_3(self) -> bool: + if self.nfs_migration_queue: + from nfs.cluster import create_ganesha_pool + + create_ganesha_pool(self.mgr) + for service_id, pool, ns in self.nfs_migration_queue: + if pool != '.nfs': + self.migrate_nfs_spec(service_id, pool, ns) + self.nfs_migration_queue = [] + self.mgr.log.info('Done migrating all NFS services') + return True + + def migrate_nfs_spec(self, service_id: str, pool: str, ns: Optional[str]) -> None: + renamed = False + if service_id.startswith('ganesha-'): + service_id = service_id[8:] + renamed = True + + self.mgr.log.info( + f'Migrating nfs.{service_id} from legacy pool {pool} namespace {ns}' + ) + + # read exports + ioctx = self.mgr.rados.open_ioctx(pool) + if ns is not None: + ioctx.set_namespace(ns) + object_iterator = ioctx.list_objects() + exports = [] + while True: + try: + obj = object_iterator.__next__() + if obj.key.startswith('export-'): + self.mgr.log.debug(f'reading {obj.key}') + exports.append(obj.read().decode()) + except StopIteration: + break + self.mgr.log.info(f'Found {len(exports)} exports for legacy nfs.{service_id}') + + # copy grace file + if service_id != ns: + try: + grace = ioctx.read("grace") + new_ioctx = self.mgr.rados.open_ioctx(NFS_POOL_NAME) + new_ioctx.set_namespace(service_id) + new_ioctx.write_full("grace", grace) + self.mgr.log.info('Migrated nfs-ganesha grace file') + except rados.ObjectNotFound: + self.mgr.log.debug('failed to read old grace file; skipping') + + if renamed and f'nfs.ganesha-{service_id}' in self.mgr.spec_store: + # rename from nfs.ganesha-* to nfs.*. This will destroy old daemons and + # deploy new ones. + self.mgr.log.info(f'Replacing nfs.ganesha-{service_id} with nfs.{service_id}') + spec = self.mgr.spec_store[f'nfs.ganesha-{service_id}'].spec + self.mgr.spec_store.rm(f'nfs.ganesha-{service_id}') + spec.service_id = service_id + self.mgr.spec_store.save(spec, True) + + # We have to remove the old daemons here as well, otherwise we'll end up with a port conflict. + daemons = [d.name() + for d in self.mgr.cache.get_daemons_by_service(f'nfs.ganesha-{service_id}')] + self.mgr.log.info(f'Removing old nfs.ganesha-{service_id} daemons {daemons}') + self.mgr.remove_daemons(daemons) + else: + # redeploy all ganesha daemons to ensures that the daemon + # cephx are correct AND container configs are set up properly + daemons = [d.name() for d in self.mgr.cache.get_daemons_by_service(f'nfs.{service_id}')] + self.mgr.log.info(f'Removing old nfs.{service_id} daemons {daemons}') + self.mgr.remove_daemons(daemons) + + # re-save service spec (without pool and namespace properties!) + spec = self.mgr.spec_store[f'nfs.{service_id}'].spec + self.mgr.spec_store.save(spec) + + # import exports + for export in exports: + ex = '' + for line in export.splitlines(): + if ( + line.startswith(' secret_access_key =') + or line.startswith(' user_id =') + ): + continue + ex += line + '\n' + self.mgr.log.debug(f'importing export: {ex}') + ret, out, err = self.mgr.mon_command({ + 'prefix': 'nfs export apply', + 'cluster_id': service_id + }, inbuf=ex) + if ret: + self.mgr.log.warning(f'Failed to migrate export ({ret}): {err}\nExport was:\n{ex}') + self.mgr.log.info(f'Done migrating nfs.{service_id}') + + def migrate_3_4(self) -> bool: + # We can't set any host with the _admin label, but we're + # going to warn when calling `ceph orch host rm...` + if 'client.admin' not in self.mgr.keys.keys: + self.mgr._client_keyring_set( + entity='client.admin', + placement=f'label:{SpecialHostLabels.ADMIN}', + ) + return True + + def migrate_4_5(self) -> bool: + registry_url = self.mgr.get_module_option('registry_url') + registry_username = self.mgr.get_module_option('registry_username') + registry_password = self.mgr.get_module_option('registry_password') + if registry_url and registry_username and registry_password: + + registry_credentials = {'url': registry_url, + 'username': registry_username, 'password': registry_password} + self.mgr.set_store('registry_credentials', json.dumps(registry_credentials)) + + self.mgr.set_module_option('registry_url', None) + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': 'mgr', + 'key': 'mgr/cephadm/registry_url', + }) + self.mgr.set_module_option('registry_username', None) + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': 'mgr', + 'key': 'mgr/cephadm/registry_username', + }) + self.mgr.set_module_option('registry_password', None) + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': 'mgr', + 'key': 'mgr/cephadm/registry_password', + }) + + self.mgr.log.info('Done migrating registry login info') + return True + + def migrate_rgw_spec(self, spec: Dict[Any, Any]) -> Optional[RGWSpec]: + """ Migrate an old rgw spec to the new format.""" + new_spec = spec.copy() + field_content: List[str] = re.split(' +', new_spec['spec']['rgw_frontend_type']) + valid_spec = False + if 'beast' in field_content: + new_spec['spec']['rgw_frontend_type'] = 'beast' + field_content.remove('beast') + valid_spec = True + elif 'civetweb' in field_content: + new_spec['spec']['rgw_frontend_type'] = 'civetweb' + field_content.remove('civetweb') + valid_spec = True + else: + # Error: Should not happen as that would be an invalid RGW spec. In that case + # we keep the spec as it, mark it as unmanaged to avoid the daemons being deleted + # and raise a health warning so the user can fix the issue manually later. + self.mgr.log.error("Cannot migrate RGW spec, bad rgw_frontend_type value: {spec['spec']['rgw_frontend_type']}.") + + if valid_spec: + new_spec['spec']['rgw_frontend_extra_args'] = [] + new_spec['spec']['rgw_frontend_extra_args'].extend(field_content) + + return RGWSpec.from_json(new_spec) + + def rgw_spec_needs_migration(self, spec: Dict[Any, Any]) -> bool: + if 'spec' not in spec: + # if users allowed cephadm to set up most of the + # attributes, it's possible there is no "spec" section + # inside the spec. In that case, no migration is needed + return False + return 'rgw_frontend_type' in spec['spec'] \ + and spec['spec']['rgw_frontend_type'] is not None \ + and spec['spec']['rgw_frontend_type'].strip() not in ['beast', 'civetweb'] + + def migrate_5_6(self) -> bool: + """ + Migration 5 -> 6 + + Old RGW spec used to allow 'bad' values on the rgw_frontend_type field. For example + the following value used to be valid: + + rgw_frontend_type: "beast endpoint=10.16.96.54:8043 tcp_nodelay=1" + + As of 17.2.6 release, these kind of entries are not valid anymore and a more strict check + has been added to validate this field. + + This migration logic detects this 'bad' values and tries to transform them to the new + valid format where rgw_frontend_type field can only be either 'beast' or 'civetweb'. + Any extra arguments detected on rgw_frontend_type field will be parsed and passed in the + new spec field rgw_frontend_extra_args. + """ + self.mgr.log.debug(f'Starting rgw migration (queue length is {len(self.rgw_migration_queue)})') + for s in self.rgw_migration_queue: + spec = s['spec'] + if self.rgw_spec_needs_migration(spec): + rgw_spec = self.migrate_rgw_spec(spec) + if rgw_spec is not None: + logger.info(f"Migrating {spec} to new RGW with extra args format {rgw_spec}") + self.mgr.spec_store.save(rgw_spec) + else: + logger.info(f"No Migration is needed for rgw spec: {spec}") + self.rgw_migration_queue = [] + return True + + +def queue_migrate_rgw_spec(mgr: "CephadmOrchestrator", spec_dict: Dict[Any, Any]) -> None: + """ + As aprt of 17.2.6 a stricter RGW spec validation has been added so the field + rgw_frontend_type cannot be used to pass rgw-frontends parameters. + """ + service_id = spec_dict['spec']['service_id'] + queued = mgr.get_store('rgw_migration_queue') or '[]' + ls = json.loads(queued) + ls.append(spec_dict) + mgr.set_store('rgw_migration_queue', json.dumps(ls)) + mgr.log.info(f'Queued rgw.{service_id} for migration') + + +def queue_migrate_nfs_spec(mgr: "CephadmOrchestrator", spec_dict: Dict[Any, Any]) -> None: + """ + After 16.2.5 we dropped the NFSServiceSpec pool and namespace properties. + Queue up a migration to process later, once we are sure that RADOS is available + and so on. + """ + service_id = spec_dict['spec']['service_id'] + args = spec_dict['spec'].get('spec', {}) + pool = args.pop('pool', 'nfs-ganesha') + ns = args.pop('namespace', service_id) + queued = mgr.get_store('nfs_migration_queue') or '[]' + ls = json.loads(queued) + ls.append([service_id, pool, ns]) + mgr.set_store('nfs_migration_queue', json.dumps(ls)) + mgr.log.info(f'Queued nfs.{service_id} for migration') diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py new file mode 100644 index 000000000..7b97ce74a --- /dev/null +++ b/src/pybind/mgr/cephadm/module.py @@ -0,0 +1,3405 @@ +import asyncio +import json +import errno +import ipaddress +import logging +import re +import shlex +from collections import defaultdict +from configparser import ConfigParser +from contextlib import contextmanager +from functools import wraps +from tempfile import TemporaryDirectory, NamedTemporaryFile +from threading import Event + +from cephadm.service_discovery import ServiceDiscovery + +import string +from typing import List, Dict, Optional, Callable, Tuple, TypeVar, \ + Any, Set, TYPE_CHECKING, cast, NamedTuple, Sequence, Type, \ + Awaitable, Iterator + +import datetime +import os +import random +import multiprocessing.pool +import subprocess +from prettytable import PrettyTable + +from ceph.deployment import inventory +from ceph.deployment.drive_group import DriveGroupSpec +from ceph.deployment.service_spec import \ + ServiceSpec, PlacementSpec, \ + HostPlacementSpec, IngressSpec, \ + TunedProfileSpec, IscsiServiceSpec +from ceph.utils import str_to_datetime, datetime_to_str, datetime_now +from cephadm.serve import CephadmServe +from cephadm.services.cephadmservice import CephadmDaemonDeploySpec +from cephadm.http_server import CephadmHttpServer +from cephadm.agent import CephadmAgentHelpers + + +from mgr_module import MgrModule, HandleCommandResult, Option, NotifyType +import orchestrator +from orchestrator.module import to_format, Format + +from orchestrator import OrchestratorError, OrchestratorValidationError, HostSpec, \ + CLICommandMeta, DaemonDescription, DaemonDescriptionStatus, handle_orch_error, \ + service_to_daemon_types +from orchestrator._interface import GenericSpec +from orchestrator._interface import daemon_type_to_service + +from . import utils +from . import ssh +from .migrations import Migrations +from .services.cephadmservice import MonService, MgrService, MdsService, RgwService, \ + RbdMirrorService, CrashService, CephadmService, CephfsMirrorService, CephadmAgent, \ + CephExporterService +from .services.ingress import IngressService +from .services.container import CustomContainerService +from .services.iscsi import IscsiService +from .services.nvmeof import NvmeofService +from .services.nfs import NFSService +from .services.osd import OSDRemovalQueue, OSDService, OSD, NotFoundError +from .services.monitoring import GrafanaService, AlertmanagerService, PrometheusService, \ + NodeExporterService, SNMPGatewayService, LokiService, PromtailService +from .services.jaeger import ElasticSearchService, JaegerAgentService, JaegerCollectorService, JaegerQueryService +from .schedule import HostAssignment +from .inventory import Inventory, SpecStore, HostCache, AgentCache, EventStore, \ + ClientKeyringStore, ClientKeyringSpec, TunedProfileStore +from .upgrade import CephadmUpgrade +from .template import TemplateMgr +from .utils import CEPH_IMAGE_TYPES, RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES, forall_hosts, \ + cephadmNoImage, CEPH_UPGRADE_ORDER, SpecialHostLabels +from .configchecks import CephadmConfigChecks +from .offline_watcher import OfflineHostWatcher +from .tuned_profiles import TunedProfileUtils + +try: + import asyncssh +except ImportError as e: + asyncssh = None # type: ignore + asyncssh_import_error = str(e) + +logger = logging.getLogger(__name__) + +T = TypeVar('T') + +DEFAULT_SSH_CONFIG = """ +Host * + User root + StrictHostKeyChecking no + UserKnownHostsFile /dev/null + ConnectTimeout=30 +""" + +# cherrypy likes to sys.exit on error. don't let it take us down too! + + +def os_exit_noop(status: int) -> None: + pass + + +os._exit = os_exit_noop # type: ignore + + +# Default container images ----------------------------------------------------- +DEFAULT_IMAGE = 'quay.io/ceph/ceph' # DO NOT ADD TAG TO THIS +DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.43.0' +DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.5.0' +DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:0.0.2' +DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0' +DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0' +DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.25.0' +DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:9.4.7' +DEFAULT_HAPROXY_IMAGE = 'quay.io/ceph/haproxy:2.3' +DEFAULT_KEEPALIVED_IMAGE = 'quay.io/ceph/keepalived:2.2.4' +DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1' +DEFAULT_ELASTICSEARCH_IMAGE = 'quay.io/omrizeneva/elasticsearch:6.8.23' +DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29' +DEFAULT_JAEGER_AGENT_IMAGE = 'quay.io/jaegertracing/jaeger-agent:1.29' +DEFAULT_JAEGER_QUERY_IMAGE = 'quay.io/jaegertracing/jaeger-query:1.29' +# ------------------------------------------------------------------------------ + + +def host_exists(hostname_position: int = 1) -> Callable: + """Check that a hostname exists in the inventory""" + def inner(func: Callable) -> Callable: + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Any: + this = args[0] # self object + hostname = args[hostname_position] + if hostname not in this.cache.get_hosts(): + candidates = ','.join([h for h in this.cache.get_hosts() if h.startswith(hostname)]) + help_msg = f"Did you mean {candidates}?" if candidates else "" + raise OrchestratorError( + f"Cannot find host '{hostname}' in the inventory. {help_msg}") + + return func(*args, **kwargs) + return wrapper + return inner + + +class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, + metaclass=CLICommandMeta): + + _STORE_HOST_PREFIX = "host" + + instance = None + NOTIFY_TYPES = [NotifyType.mon_map, NotifyType.pg_summary] + NATIVE_OPTIONS = [] # type: List[Any] + MODULE_OPTIONS = [ + Option( + 'ssh_config_file', + type='str', + default=None, + desc='customized SSH config file to connect to managed hosts', + ), + Option( + 'device_cache_timeout', + type='secs', + default=30 * 60, + desc='seconds to cache device inventory', + ), + Option( + 'device_enhanced_scan', + type='bool', + default=False, + desc='Use libstoragemgmt during device scans', + ), + Option( + 'inventory_list_all', + type='bool', + default=False, + desc='Whether ceph-volume inventory should report ' + 'more devices (mostly mappers (LVs / mpaths), partitions...)', + ), + Option( + 'daemon_cache_timeout', + type='secs', + default=10 * 60, + desc='seconds to cache service (daemon) inventory', + ), + Option( + 'facts_cache_timeout', + type='secs', + default=1 * 60, + desc='seconds to cache host facts data', + ), + Option( + 'host_check_interval', + type='secs', + default=10 * 60, + desc='how frequently to perform a host check', + ), + Option( + 'mode', + type='str', + enum_allowed=['root', 'cephadm-package'], + default='root', + desc='mode for remote execution of cephadm', + ), + Option( + 'container_image_base', + default=DEFAULT_IMAGE, + desc='Container image name, without the tag', + runtime=True, + ), + Option( + 'container_image_prometheus', + default=DEFAULT_PROMETHEUS_IMAGE, + desc='Prometheus container image', + ), + Option( + 'container_image_nvmeof', + default=DEFAULT_NVMEOF_IMAGE, + desc='Nvme-of container image', + ), + Option( + 'container_image_grafana', + default=DEFAULT_GRAFANA_IMAGE, + desc='Prometheus container image', + ), + Option( + 'container_image_alertmanager', + default=DEFAULT_ALERT_MANAGER_IMAGE, + desc='Prometheus container image', + ), + Option( + 'container_image_node_exporter', + default=DEFAULT_NODE_EXPORTER_IMAGE, + desc='Prometheus container image', + ), + Option( + 'container_image_loki', + default=DEFAULT_LOKI_IMAGE, + desc='Loki container image', + ), + Option( + 'container_image_promtail', + default=DEFAULT_PROMTAIL_IMAGE, + desc='Promtail container image', + ), + Option( + 'container_image_haproxy', + default=DEFAULT_HAPROXY_IMAGE, + desc='HAproxy container image', + ), + Option( + 'container_image_keepalived', + default=DEFAULT_KEEPALIVED_IMAGE, + desc='Keepalived container image', + ), + Option( + 'container_image_snmp_gateway', + default=DEFAULT_SNMP_GATEWAY_IMAGE, + desc='SNMP Gateway container image', + ), + Option( + 'container_image_elasticsearch', + default=DEFAULT_ELASTICSEARCH_IMAGE, + desc='elasticsearch container image', + ), + Option( + 'container_image_jaeger_agent', + default=DEFAULT_JAEGER_AGENT_IMAGE, + desc='Jaeger agent container image', + ), + Option( + 'container_image_jaeger_collector', + default=DEFAULT_JAEGER_COLLECTOR_IMAGE, + desc='Jaeger collector container image', + ), + Option( + 'container_image_jaeger_query', + default=DEFAULT_JAEGER_QUERY_IMAGE, + desc='Jaeger query container image', + ), + Option( + 'warn_on_stray_hosts', + type='bool', + default=True, + desc='raise a health warning if daemons are detected on a host ' + 'that is not managed by cephadm', + ), + Option( + 'warn_on_stray_daemons', + type='bool', + default=True, + desc='raise a health warning if daemons are detected ' + 'that are not managed by cephadm', + ), + Option( + 'warn_on_failed_host_check', + type='bool', + default=True, + desc='raise a health warning if the host check fails', + ), + Option( + 'log_to_cluster', + type='bool', + default=True, + desc='log to the "cephadm" cluster log channel"', + ), + Option( + 'allow_ptrace', + type='bool', + default=False, + desc='allow SYS_PTRACE capability on ceph containers', + long_desc='The SYS_PTRACE capability is needed to attach to a ' + 'process with gdb or strace. Enabling this options ' + 'can allow debugging daemons that encounter problems ' + 'at runtime.', + ), + Option( + 'container_init', + type='bool', + default=True, + desc='Run podman/docker with `--init`' + ), + Option( + 'prometheus_alerts_path', + type='str', + default='/etc/prometheus/ceph/ceph_default_alerts.yml', + desc='location of alerts to include in prometheus deployments', + ), + Option( + 'migration_current', + type='int', + default=None, + desc='internal - do not modify', + # used to track spec and other data migrations. + ), + Option( + 'config_dashboard', + type='bool', + default=True, + desc='manage configs like API endpoints in Dashboard.' + ), + Option( + 'manage_etc_ceph_ceph_conf', + type='bool', + default=False, + desc='Manage and own /etc/ceph/ceph.conf on the hosts.', + ), + Option( + 'manage_etc_ceph_ceph_conf_hosts', + type='str', + default='*', + desc='PlacementSpec describing on which hosts to manage /etc/ceph/ceph.conf', + ), + # not used anymore + Option( + 'registry_url', + type='str', + default=None, + desc='Registry url for login purposes. This is not the default registry' + ), + Option( + 'registry_username', + type='str', + default=None, + desc='Custom repository username. Only used for logging into a registry.' + ), + Option( + 'registry_password', + type='str', + default=None, + desc='Custom repository password. Only used for logging into a registry.' + ), + #### + Option( + 'registry_insecure', + type='bool', + default=False, + desc='Registry is to be considered insecure (no TLS available). Only for development purposes.' + ), + Option( + 'use_repo_digest', + type='bool', + default=True, + desc='Automatically convert image tags to image digest. Make sure all daemons use the same image', + ), + Option( + 'config_checks_enabled', + type='bool', + default=False, + desc='Enable or disable the cephadm configuration analysis', + ), + Option( + 'default_registry', + type='str', + default='docker.io', + desc='Search-registry to which we should normalize unqualified image names. ' + 'This is not the default registry', + ), + Option( + 'max_count_per_host', + type='int', + default=10, + desc='max number of daemons per service per host', + ), + Option( + 'autotune_memory_target_ratio', + type='float', + default=.7, + desc='ratio of total system memory to divide amongst autotuned daemons' + ), + Option( + 'autotune_interval', + type='secs', + default=10 * 60, + desc='how frequently to autotune daemon memory' + ), + Option( + 'use_agent', + type='bool', + default=False, + desc='Use cephadm agent on each host to gather and send metadata' + ), + Option( + 'agent_refresh_rate', + type='secs', + default=20, + desc='How often agent on each host will try to gather and send metadata' + ), + Option( + 'agent_starting_port', + type='int', + default=4721, + desc='First port agent will try to bind to (will also try up to next 1000 subsequent ports if blocked)' + ), + Option( + 'agent_down_multiplier', + type='float', + default=3.0, + desc='Multiplied by agent refresh rate to calculate how long agent must not report before being marked down' + ), + Option( + 'max_osd_draining_count', + type='int', + default=10, + desc='max number of osds that will be drained simultaneously when osds are removed' + ), + Option( + 'service_discovery_port', + type='int', + default=8765, + desc='cephadm service discovery port' + ), + Option( + 'cgroups_split', + type='bool', + default=True, + desc='Pass --cgroups=split when cephadm creates containers (currently podman only)' + ), + Option( + 'log_refresh_metadata', + type='bool', + default=False, + desc='Log all refresh metadata. Includes daemon, device, and host info collected regularly. Only has effect if logging at debug level' + ), + Option( + 'secure_monitoring_stack', + type='bool', + default=False, + desc='Enable TLS security for all the monitoring stack daemons' + ), + Option( + 'default_cephadm_command_timeout', + type='secs', + default=15 * 60, + desc='Default timeout applied to cephadm commands run directly on ' + 'the host (in seconds)' + ), + ] + + def __init__(self, *args: Any, **kwargs: Any): + super(CephadmOrchestrator, self).__init__(*args, **kwargs) + self._cluster_fsid: str = self.get('mon_map')['fsid'] + self.last_monmap: Optional[datetime.datetime] = None + + # for serve() + self.run = True + self.event = Event() + + self.ssh = ssh.SSHManager(self) + + if self.get_store('pause'): + self.paused = True + else: + self.paused = False + + # for mypy which does not run the code + if TYPE_CHECKING: + self.ssh_config_file = None # type: Optional[str] + self.device_cache_timeout = 0 + self.daemon_cache_timeout = 0 + self.facts_cache_timeout = 0 + self.host_check_interval = 0 + self.max_count_per_host = 0 + self.mode = '' + self.container_image_base = '' + self.container_image_prometheus = '' + self.container_image_nvmeof = '' + self.container_image_grafana = '' + self.container_image_alertmanager = '' + self.container_image_node_exporter = '' + self.container_image_loki = '' + self.container_image_promtail = '' + self.container_image_haproxy = '' + self.container_image_keepalived = '' + self.container_image_snmp_gateway = '' + self.container_image_elasticsearch = '' + self.container_image_jaeger_agent = '' + self.container_image_jaeger_collector = '' + self.container_image_jaeger_query = '' + self.warn_on_stray_hosts = True + self.warn_on_stray_daemons = True + self.warn_on_failed_host_check = True + self.allow_ptrace = False + self.container_init = True + self.prometheus_alerts_path = '' + self.migration_current: Optional[int] = None + self.config_dashboard = True + self.manage_etc_ceph_ceph_conf = True + self.manage_etc_ceph_ceph_conf_hosts = '*' + self.registry_url: Optional[str] = None + self.registry_username: Optional[str] = None + self.registry_password: Optional[str] = None + self.registry_insecure: bool = False + self.use_repo_digest = True + self.default_registry = '' + self.autotune_memory_target_ratio = 0.0 + self.autotune_interval = 0 + self.ssh_user: Optional[str] = None + self._ssh_options: Optional[str] = None + self.tkey = NamedTemporaryFile() + self.ssh_config_fname: Optional[str] = None + self.ssh_config: Optional[str] = None + self._temp_files: List = [] + self.ssh_key: Optional[str] = None + self.ssh_pub: Optional[str] = None + self.ssh_cert: Optional[str] = None + self.use_agent = False + self.agent_refresh_rate = 0 + self.agent_down_multiplier = 0.0 + self.agent_starting_port = 0 + self.service_discovery_port = 0 + self.secure_monitoring_stack = False + self.apply_spec_fails: List[Tuple[str, str]] = [] + self.max_osd_draining_count = 10 + self.device_enhanced_scan = False + self.inventory_list_all = False + self.cgroups_split = True + self.log_refresh_metadata = False + self.default_cephadm_command_timeout = 0 + + self.notify(NotifyType.mon_map, None) + self.config_notify() + + path = self.get_ceph_option('cephadm_path') + try: + assert isinstance(path, str) + with open(path, 'rb') as f: + self._cephadm = f.read() + except (IOError, TypeError) as e: + raise RuntimeError("unable to read cephadm at '%s': %s" % ( + path, str(e))) + + self.cephadm_binary_path = self._get_cephadm_binary_path() + + self._worker_pool = multiprocessing.pool.ThreadPool(10) + + self.ssh._reconfig_ssh() + + CephadmOrchestrator.instance = self + + self.upgrade = CephadmUpgrade(self) + + self.health_checks: Dict[str, dict] = {} + + self.inventory = Inventory(self) + + self.cache = HostCache(self) + self.cache.load() + + self.agent_cache = AgentCache(self) + self.agent_cache.load() + + self.to_remove_osds = OSDRemovalQueue(self) + self.to_remove_osds.load_from_store() + + self.spec_store = SpecStore(self) + self.spec_store.load() + + self.keys = ClientKeyringStore(self) + self.keys.load() + + self.tuned_profiles = TunedProfileStore(self) + self.tuned_profiles.load() + + self.tuned_profile_utils = TunedProfileUtils(self) + + # ensure the host lists are in sync + for h in self.inventory.keys(): + if h not in self.cache.daemons: + self.cache.prime_empty_host(h) + for h in self.cache.get_hosts(): + if h not in self.inventory: + self.cache.rm_host(h) + + # in-memory only. + self.events = EventStore(self) + self.offline_hosts: Set[str] = set() + + self.migration = Migrations(self) + + _service_classes: Sequence[Type[CephadmService]] = [ + OSDService, NFSService, MonService, MgrService, MdsService, + RgwService, RbdMirrorService, GrafanaService, AlertmanagerService, + PrometheusService, NodeExporterService, LokiService, PromtailService, CrashService, IscsiService, + IngressService, CustomContainerService, CephfsMirrorService, NvmeofService, + CephadmAgent, CephExporterService, SNMPGatewayService, ElasticSearchService, + JaegerQueryService, JaegerAgentService, JaegerCollectorService + ] + + # https://github.com/python/mypy/issues/8993 + self.cephadm_services: Dict[str, CephadmService] = { + cls.TYPE: cls(self) for cls in _service_classes} # type: ignore + + self.mgr_service: MgrService = cast(MgrService, self.cephadm_services['mgr']) + self.osd_service: OSDService = cast(OSDService, self.cephadm_services['osd']) + self.iscsi_service: IscsiService = cast(IscsiService, self.cephadm_services['iscsi']) + self.nvmeof_service: NvmeofService = cast(NvmeofService, self.cephadm_services['nvmeof']) + + self.scheduled_async_actions: List[Callable] = [] + + self.template = TemplateMgr(self) + + self.requires_post_actions: Set[str] = set() + self.need_connect_dashboard_rgw = False + + self.config_checker = CephadmConfigChecks(self) + + self.http_server = CephadmHttpServer(self) + self.http_server.start() + self.agent_helpers = CephadmAgentHelpers(self) + if self.use_agent: + self.agent_helpers._apply_agent() + + self.offline_watcher = OfflineHostWatcher(self) + self.offline_watcher.start() + + def shutdown(self) -> None: + self.log.debug('shutdown') + self._worker_pool.close() + self._worker_pool.join() + self.http_server.shutdown() + self.offline_watcher.shutdown() + self.run = False + self.event.set() + + def _get_cephadm_service(self, service_type: str) -> CephadmService: + assert service_type in ServiceSpec.KNOWN_SERVICE_TYPES + return self.cephadm_services[service_type] + + def _get_cephadm_binary_path(self) -> str: + import hashlib + m = hashlib.sha256() + m.update(self._cephadm) + return f'/var/lib/ceph/{self._cluster_fsid}/cephadm.{m.hexdigest()}' + + def _kick_serve_loop(self) -> None: + self.log.debug('_kick_serve_loop') + self.event.set() + + def serve(self) -> None: + """ + The main loop of cephadm. + + A command handler will typically change the declarative state + of cephadm. This loop will then attempt to apply this new state. + """ + # for ssh in serve + self.event_loop = ssh.EventLoopThread() + + serve = CephadmServe(self) + serve.serve() + + def wait_async(self, coro: Awaitable[T], timeout: Optional[int] = None) -> T: + if not timeout: + timeout = self.default_cephadm_command_timeout + # put a lower bound of 60 seconds in case users + # accidentally set it to something unreasonable. + # For example if they though it was in minutes + # rather than seconds + if timeout < 60: + self.log.info(f'Found default timeout set to {timeout}. Instead trying minimum of 60.') + timeout = 60 + return self.event_loop.get_result(coro, timeout) + + @contextmanager + def async_timeout_handler(self, host: Optional[str] = '', + cmd: Optional[str] = '', + timeout: Optional[int] = None) -> Iterator[None]: + # this is meant to catch asyncio.TimeoutError and convert it into an + # OrchestratorError which much of the cephadm codebase is better equipped to handle. + # If the command being run, the host it is run on, or the timeout being used + # are provided, that will be included in the OrchestratorError's message + try: + yield + except asyncio.TimeoutError: + err_str: str = '' + if cmd: + err_str = f'Command "{cmd}" timed out ' + else: + err_str = 'Command timed out ' + if host: + err_str += f'on host {host} ' + if timeout: + err_str += f'(non-default {timeout} second timeout)' + else: + err_str += (f'(default {self.default_cephadm_command_timeout} second timeout)') + raise OrchestratorError(err_str) + + def set_container_image(self, entity: str, image: str) -> None: + self.check_mon_command({ + 'prefix': 'config set', + 'name': 'container_image', + 'value': image, + 'who': entity, + }) + + def config_notify(self) -> None: + """ + This method is called whenever one of our config options is changed. + + TODO: this method should be moved into mgr_module.py + """ + for opt in self.MODULE_OPTIONS: + setattr(self, + opt['name'], # type: ignore + self.get_module_option(opt['name'])) # type: ignore + self.log.debug(' mgr option %s = %s', + opt['name'], getattr(self, opt['name'])) # type: ignore + for opt in self.NATIVE_OPTIONS: + setattr(self, + opt, # type: ignore + self.get_ceph_option(opt)) + self.log.debug(' native option %s = %s', opt, getattr(self, opt)) # type: ignore + + self.event.set() + + def notify(self, notify_type: NotifyType, notify_id: Optional[str]) -> None: + if notify_type == NotifyType.mon_map: + # get monmap mtime so we can refresh configs when mons change + monmap = self.get('mon_map') + self.last_monmap = str_to_datetime(monmap['modified']) + if self.last_monmap and self.last_monmap > datetime_now(): + self.last_monmap = None # just in case clocks are skewed + if getattr(self, 'manage_etc_ceph_ceph_conf', False): + # getattr, due to notify() being called before config_notify() + self._kick_serve_loop() + if notify_type == NotifyType.pg_summary: + self._trigger_osd_removal() + + def _trigger_osd_removal(self) -> None: + remove_queue = self.to_remove_osds.as_osd_ids() + if not remove_queue: + return + data = self.get("osd_stats") + for osd in data.get('osd_stats', []): + if osd.get('num_pgs') == 0: + # if _ANY_ osd that is currently in the queue appears to be empty, + # start the removal process + if int(osd.get('osd')) in remove_queue: + self.log.debug('Found empty osd. Starting removal process') + # if the osd that is now empty is also part of the removal queue + # start the process + self._kick_serve_loop() + + def pause(self) -> None: + if not self.paused: + self.log.info('Paused') + self.set_store('pause', 'true') + self.paused = True + # wake loop so we update the health status + self._kick_serve_loop() + + def resume(self) -> None: + if self.paused: + self.log.info('Resumed') + self.paused = False + self.set_store('pause', None) + # unconditionally wake loop so that 'orch resume' can be used to kick + # cephadm + self._kick_serve_loop() + + def get_unique_name( + self, + daemon_type: str, + host: str, + existing: List[orchestrator.DaemonDescription], + prefix: Optional[str] = None, + forcename: Optional[str] = None, + rank: Optional[int] = None, + rank_generation: Optional[int] = None, + ) -> str: + """ + Generate a unique random service name + """ + suffix = daemon_type not in [ + 'mon', 'crash', 'ceph-exporter', + 'prometheus', 'node-exporter', 'grafana', 'alertmanager', + 'container', 'agent', 'snmp-gateway', 'loki', 'promtail', + 'elasticsearch', 'jaeger-collector', 'jaeger-agent', 'jaeger-query' + ] + if forcename: + if len([d for d in existing if d.daemon_id == forcename]): + raise orchestrator.OrchestratorValidationError( + f'name {daemon_type}.{forcename} already in use') + return forcename + + if '.' in host: + host = host.split('.')[0] + while True: + if prefix: + name = prefix + '.' + else: + name = '' + if rank is not None and rank_generation is not None: + name += f'{rank}.{rank_generation}.' + name += host + if suffix: + name += '.' + ''.join(random.choice(string.ascii_lowercase) + for _ in range(6)) + if len([d for d in existing if d.daemon_id == name]): + if not suffix: + raise orchestrator.OrchestratorValidationError( + f'name {daemon_type}.{name} already in use') + self.log.debug('name %s exists, trying again', name) + continue + return name + + def validate_ssh_config_content(self, ssh_config: Optional[str]) -> None: + if ssh_config is None or len(ssh_config.strip()) == 0: + raise OrchestratorValidationError('ssh_config cannot be empty') + # StrictHostKeyChecking is [yes|no] ? + res = re.findall(r'StrictHostKeyChecking\s+.*', ssh_config) + if not res: + raise OrchestratorValidationError('ssh_config requires StrictHostKeyChecking') + for s in res: + if 'ask' in s.lower(): + raise OrchestratorValidationError(f'ssh_config cannot contain: \'{s}\'') + + def validate_ssh_config_fname(self, ssh_config_fname: str) -> None: + if not os.path.isfile(ssh_config_fname): + raise OrchestratorValidationError("ssh_config \"{}\" does not exist".format( + ssh_config_fname)) + + def _process_ls_output(self, host: str, ls: List[Dict[str, Any]]) -> None: + def _as_datetime(value: Optional[str]) -> Optional[datetime.datetime]: + return str_to_datetime(value) if value is not None else None + + dm = {} + for d in ls: + if not d['style'].startswith('cephadm'): + continue + if d['fsid'] != self._cluster_fsid: + continue + if '.' not in d['name']: + continue + daemon_type = d['name'].split('.')[0] + if daemon_type not in orchestrator.KNOWN_DAEMON_TYPES: + logger.warning(f"Found unknown daemon type {daemon_type} on host {host}") + continue + + container_id = d.get('container_id') + if container_id: + # shorten the hash + container_id = container_id[0:12] + rank = int(d['rank']) if d.get('rank') is not None else None + rank_generation = int(d['rank_generation']) if d.get( + 'rank_generation') is not None else None + status, status_desc = None, 'unknown' + if 'state' in d: + status_desc = d['state'] + status = { + 'running': DaemonDescriptionStatus.running, + 'stopped': DaemonDescriptionStatus.stopped, + 'error': DaemonDescriptionStatus.error, + 'unknown': DaemonDescriptionStatus.error, + }[d['state']] + sd = orchestrator.DaemonDescription( + daemon_type=daemon_type, + daemon_id='.'.join(d['name'].split('.')[1:]), + hostname=host, + container_id=container_id, + container_image_id=d.get('container_image_id'), + container_image_name=d.get('container_image_name'), + container_image_digests=d.get('container_image_digests'), + version=d.get('version'), + status=status, + status_desc=status_desc, + created=_as_datetime(d.get('created')), + started=_as_datetime(d.get('started')), + last_refresh=datetime_now(), + last_configured=_as_datetime(d.get('last_configured')), + last_deployed=_as_datetime(d.get('last_deployed')), + memory_usage=d.get('memory_usage'), + memory_request=d.get('memory_request'), + memory_limit=d.get('memory_limit'), + cpu_percentage=d.get('cpu_percentage'), + service_name=d.get('service_name'), + ports=d.get('ports'), + ip=d.get('ip'), + deployed_by=d.get('deployed_by'), + rank=rank, + rank_generation=rank_generation, + extra_container_args=d.get('extra_container_args'), + extra_entrypoint_args=d.get('extra_entrypoint_args'), + ) + dm[sd.name()] = sd + self.log.debug('Refreshed host %s daemons (%d)' % (host, len(dm))) + self.cache.update_host_daemons(host, dm) + self.cache.save_host(host) + return None + + def update_watched_hosts(self) -> None: + # currently, we are watching hosts with nfs daemons + hosts_to_watch = [d.hostname for d in self.cache.get_daemons( + ) if d.daemon_type in RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES] + self.offline_watcher.set_hosts(list(set([h for h in hosts_to_watch if h is not None]))) + + def offline_hosts_remove(self, host: str) -> None: + if host in self.offline_hosts: + self.offline_hosts.remove(host) + + def update_failed_daemon_health_check(self) -> None: + failed_daemons = [] + for dd in self.cache.get_error_daemons(): + if dd.daemon_type != 'agent': # agents tracked by CEPHADM_AGENT_DOWN + failed_daemons.append('daemon %s on %s is in %s state' % ( + dd.name(), dd.hostname, dd.status_desc + )) + self.remove_health_warning('CEPHADM_FAILED_DAEMON') + if failed_daemons: + self.set_health_warning('CEPHADM_FAILED_DAEMON', f'{len(failed_daemons)} failed cephadm daemon(s)', len( + failed_daemons), failed_daemons) + + @staticmethod + def can_run() -> Tuple[bool, str]: + if asyncssh is not None: + return True, "" + else: + return False, "loading asyncssh library:{}".format( + asyncssh_import_error) + + def available(self) -> Tuple[bool, str, Dict[str, Any]]: + """ + The cephadm orchestrator is always available. + """ + ok, err = self.can_run() + if not ok: + return ok, err, {} + if not self.ssh_key or not self.ssh_pub: + return False, 'SSH keys not set. Use `ceph cephadm set-priv-key` and `ceph cephadm set-pub-key` or `ceph cephadm generate-key`', {} + + # mypy is unable to determine type for _processes since it's private + worker_count: int = self._worker_pool._processes # type: ignore + ret = { + "workers": worker_count, + "paused": self.paused, + } + + return True, err, ret + + def _validate_and_set_ssh_val(self, what: str, new: Optional[str], old: Optional[str]) -> None: + self.set_store(what, new) + self.ssh._reconfig_ssh() + if self.cache.get_hosts(): + # Can't check anything without hosts + host = self.cache.get_hosts()[0] + r = CephadmServe(self)._check_host(host) + if r is not None: + # connection failed reset user + self.set_store(what, old) + self.ssh._reconfig_ssh() + raise OrchestratorError('ssh connection %s@%s failed' % (self.ssh_user, host)) + self.log.info(f'Set ssh {what}') + + @orchestrator._cli_write_command( + prefix='cephadm set-ssh-config') + def _set_ssh_config(self, inbuf: Optional[str] = None) -> Tuple[int, str, str]: + """ + Set the ssh_config file (use -i <ssh_config>) + """ + # Set an ssh_config file provided from stdin + + old = self.ssh_config + if inbuf == old: + return 0, "value unchanged", "" + self.validate_ssh_config_content(inbuf) + self._validate_and_set_ssh_val('ssh_config', inbuf, old) + return 0, "", "" + + @orchestrator._cli_write_command('cephadm clear-ssh-config') + def _clear_ssh_config(self) -> Tuple[int, str, str]: + """ + Clear the ssh_config file + """ + # Clear the ssh_config file provided from stdin + self.set_store("ssh_config", None) + self.ssh_config_tmp = None + self.log.info('Cleared ssh_config') + self.ssh._reconfig_ssh() + return 0, "", "" + + @orchestrator._cli_read_command('cephadm get-ssh-config') + def _get_ssh_config(self) -> HandleCommandResult: + """ + Returns the ssh config as used by cephadm + """ + if self.ssh_config_file: + self.validate_ssh_config_fname(self.ssh_config_file) + with open(self.ssh_config_file) as f: + return HandleCommandResult(stdout=f.read()) + ssh_config = self.get_store("ssh_config") + if ssh_config: + return HandleCommandResult(stdout=ssh_config) + return HandleCommandResult(stdout=DEFAULT_SSH_CONFIG) + + @orchestrator._cli_write_command('cephadm generate-key') + def _generate_key(self) -> Tuple[int, str, str]: + """ + Generate a cluster SSH key (if not present) + """ + if not self.ssh_pub or not self.ssh_key: + self.log.info('Generating ssh key...') + tmp_dir = TemporaryDirectory() + path = tmp_dir.name + '/key' + try: + subprocess.check_call([ + '/usr/bin/ssh-keygen', + '-C', 'ceph-%s' % self._cluster_fsid, + '-N', '', + '-f', path + ]) + with open(path, 'r') as f: + secret = f.read() + with open(path + '.pub', 'r') as f: + pub = f.read() + finally: + os.unlink(path) + os.unlink(path + '.pub') + tmp_dir.cleanup() + self.set_store('ssh_identity_key', secret) + self.set_store('ssh_identity_pub', pub) + self.ssh._reconfig_ssh() + return 0, '', '' + + @orchestrator._cli_write_command( + 'cephadm set-priv-key') + def _set_priv_key(self, inbuf: Optional[str] = None) -> Tuple[int, str, str]: + """Set cluster SSH private key (use -i <private_key>)""" + if inbuf is None or len(inbuf) == 0: + return -errno.EINVAL, "", "empty private ssh key provided" + old = self.ssh_key + if inbuf == old: + return 0, "value unchanged", "" + self._validate_and_set_ssh_val('ssh_identity_key', inbuf, old) + self.log.info('Set ssh private key') + return 0, "", "" + + @orchestrator._cli_write_command( + 'cephadm set-pub-key') + def _set_pub_key(self, inbuf: Optional[str] = None) -> Tuple[int, str, str]: + """Set cluster SSH public key (use -i <public_key>)""" + if inbuf is None or len(inbuf) == 0: + return -errno.EINVAL, "", "empty public ssh key provided" + old = self.ssh_pub + if inbuf == old: + return 0, "value unchanged", "" + self._validate_and_set_ssh_val('ssh_identity_pub', inbuf, old) + return 0, "", "" + + @orchestrator._cli_write_command( + 'cephadm set-signed-cert') + def _set_signed_cert(self, inbuf: Optional[str] = None) -> Tuple[int, str, str]: + """Set a signed cert if CA signed keys are being used (use -i <cert_filename>)""" + if inbuf is None or len(inbuf) == 0: + return -errno.EINVAL, "", "empty cert file provided" + old = self.ssh_cert + if inbuf == old: + return 0, "value unchanged", "" + self._validate_and_set_ssh_val('ssh_identity_cert', inbuf, old) + return 0, "", "" + + @orchestrator._cli_write_command( + 'cephadm clear-key') + def _clear_key(self) -> Tuple[int, str, str]: + """Clear cluster SSH key""" + self.set_store('ssh_identity_key', None) + self.set_store('ssh_identity_pub', None) + self.set_store('ssh_identity_cert', None) + self.ssh._reconfig_ssh() + self.log.info('Cleared cluster SSH key') + return 0, '', '' + + @orchestrator._cli_read_command( + 'cephadm get-pub-key') + def _get_pub_key(self) -> Tuple[int, str, str]: + """Show SSH public key for connecting to cluster hosts""" + if self.ssh_pub: + return 0, self.ssh_pub, '' + else: + return -errno.ENOENT, '', 'No cluster SSH key defined' + + @orchestrator._cli_read_command( + 'cephadm get-signed-cert') + def _get_signed_cert(self) -> Tuple[int, str, str]: + """Show SSH signed cert for connecting to cluster hosts using CA signed keys""" + if self.ssh_cert: + return 0, self.ssh_cert, '' + else: + return -errno.ENOENT, '', 'No signed cert defined' + + @orchestrator._cli_read_command( + 'cephadm get-user') + def _get_user(self) -> Tuple[int, str, str]: + """ + Show user for SSHing to cluster hosts + """ + if self.ssh_user is None: + return -errno.ENOENT, '', 'No cluster SSH user configured' + else: + return 0, self.ssh_user, '' + + @orchestrator._cli_read_command( + 'cephadm set-user') + def set_ssh_user(self, user: str) -> Tuple[int, str, str]: + """ + Set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users + """ + current_user = self.ssh_user + if user == current_user: + return 0, "value unchanged", "" + + self._validate_and_set_ssh_val('ssh_user', user, current_user) + current_ssh_config = self._get_ssh_config() + new_ssh_config = re.sub(r"(\s{2}User\s)(.*)", r"\1" + user, current_ssh_config.stdout) + self._set_ssh_config(new_ssh_config) + + msg = 'ssh user set to %s' % user + if user != 'root': + msg += '. sudo will be used' + self.log.info(msg) + return 0, msg, '' + + @orchestrator._cli_read_command( + 'cephadm registry-login') + def registry_login(self, url: Optional[str] = None, username: Optional[str] = None, password: Optional[str] = None, inbuf: Optional[str] = None) -> Tuple[int, str, str]: + """ + Set custom registry login info by providing url, username and password or json file with login info (-i <file>) + """ + # if password not given in command line, get it through file input + if not (url and username and password) and (inbuf is None or len(inbuf) == 0): + return -errno.EINVAL, "", ("Invalid arguments. Please provide arguments <url> <username> <password> " + "or -i <login credentials json file>") + elif (url and username and password): + registry_json = {'url': url, 'username': username, 'password': password} + else: + assert isinstance(inbuf, str) + registry_json = json.loads(inbuf) + if "url" not in registry_json or "username" not in registry_json or "password" not in registry_json: + return -errno.EINVAL, "", ("json provided for custom registry login did not include all necessary fields. " + "Please setup json file as\n" + "{\n" + " \"url\": \"REGISTRY_URL\",\n" + " \"username\": \"REGISTRY_USERNAME\",\n" + " \"password\": \"REGISTRY_PASSWORD\"\n" + "}\n") + + # verify login info works by attempting login on random host + host = None + for host_name in self.inventory.keys(): + host = host_name + break + if not host: + raise OrchestratorError('no hosts defined') + with self.async_timeout_handler(host, 'cephadm registry-login'): + r = self.wait_async(CephadmServe(self)._registry_login(host, registry_json)) + if r is not None: + return 1, '', r + # if logins succeeded, store info + self.log.debug("Host logins successful. Storing login info.") + self.set_store('registry_credentials', json.dumps(registry_json)) + # distribute new login info to all hosts + self.cache.distribute_new_registry_login_info() + return 0, "registry login scheduled", '' + + @orchestrator._cli_read_command('cephadm check-host') + def check_host(self, host: str, addr: Optional[str] = None) -> Tuple[int, str, str]: + """Check whether we can access and manage a remote host""" + try: + with self.async_timeout_handler(host, f'cephadm check-host --expect-hostname {host}'): + out, err, code = self.wait_async( + CephadmServe(self)._run_cephadm( + host, cephadmNoImage, 'check-host', ['--expect-hostname', host], + addr=addr, error_ok=True, no_fsid=True)) + if code: + return 1, '', ('check-host failed:\n' + '\n'.join(err)) + except ssh.HostConnectionError as e: + self.log.exception( + f"check-host failed for '{host}' at addr ({e.addr}) due to connection failure: {str(e)}") + return 1, '', ('check-host failed:\n' + + f"Failed to connect to {host} at address ({e.addr}): {str(e)}") + except OrchestratorError: + self.log.exception(f"check-host failed for '{host}'") + return 1, '', ('check-host failed:\n' + + f"Host '{host}' not found. Use 'ceph orch host ls' to see all managed hosts.") + # if we have an outstanding health alert for this host, give the + # serve thread a kick + if 'CEPHADM_HOST_CHECK_FAILED' in self.health_checks: + for item in self.health_checks['CEPHADM_HOST_CHECK_FAILED']['detail']: + if item.startswith('host %s ' % host): + self.event.set() + return 0, '%s (%s) ok' % (host, addr), '\n'.join(err) + + @orchestrator._cli_read_command( + 'cephadm prepare-host') + def _prepare_host(self, host: str, addr: Optional[str] = None) -> Tuple[int, str, str]: + """Prepare a remote host for use with cephadm""" + with self.async_timeout_handler(host, 'cephadm prepare-host'): + out, err, code = self.wait_async( + CephadmServe(self)._run_cephadm( + host, cephadmNoImage, 'prepare-host', ['--expect-hostname', host], + addr=addr, error_ok=True, no_fsid=True)) + if code: + return 1, '', ('prepare-host failed:\n' + '\n'.join(err)) + # if we have an outstanding health alert for this host, give the + # serve thread a kick + if 'CEPHADM_HOST_CHECK_FAILED' in self.health_checks: + for item in self.health_checks['CEPHADM_HOST_CHECK_FAILED']['detail']: + if item.startswith('host %s ' % host): + self.event.set() + return 0, '%s (%s) ok' % (host, addr), '\n'.join(err) + + @orchestrator._cli_write_command( + prefix='cephadm set-extra-ceph-conf') + def _set_extra_ceph_conf(self, inbuf: Optional[str] = None) -> HandleCommandResult: + """ + Text that is appended to all daemon's ceph.conf. + Mainly a workaround, till `config generate-minimal-conf` generates + a complete ceph.conf. + + Warning: this is a dangerous operation. + """ + if inbuf: + # sanity check. + cp = ConfigParser() + cp.read_string(inbuf, source='<infile>') + + self.set_store("extra_ceph_conf", json.dumps({ + 'conf': inbuf, + 'last_modified': datetime_to_str(datetime_now()) + })) + self.log.info('Set extra_ceph_conf') + self._kick_serve_loop() + return HandleCommandResult() + + @orchestrator._cli_read_command( + 'cephadm get-extra-ceph-conf') + def _get_extra_ceph_conf(self) -> HandleCommandResult: + """ + Get extra ceph conf that is appended + """ + return HandleCommandResult(stdout=self.extra_ceph_conf().conf) + + @orchestrator._cli_read_command('cephadm config-check ls') + def _config_checks_list(self, format: Format = Format.plain) -> HandleCommandResult: + """List the available configuration checks and their current state""" + + if format not in [Format.plain, Format.json, Format.json_pretty]: + return HandleCommandResult( + retval=1, + stderr="Requested format is not supported when listing configuration checks" + ) + + if format in [Format.json, Format.json_pretty]: + return HandleCommandResult( + stdout=to_format(self.config_checker.health_checks, + format, + many=True, + cls=None)) + + # plain formatting + table = PrettyTable( + ['NAME', + 'HEALTHCHECK', + 'STATUS', + 'DESCRIPTION' + ], border=False) + table.align['NAME'] = 'l' + table.align['HEALTHCHECK'] = 'l' + table.align['STATUS'] = 'l' + table.align['DESCRIPTION'] = 'l' + table.left_padding_width = 0 + table.right_padding_width = 2 + for c in self.config_checker.health_checks: + table.add_row(( + c.name, + c.healthcheck_name, + c.status, + c.description, + )) + + return HandleCommandResult(stdout=table.get_string()) + + @orchestrator._cli_read_command('cephadm config-check status') + def _config_check_status(self) -> HandleCommandResult: + """Show whether the configuration checker feature is enabled/disabled""" + status = self.get_module_option('config_checks_enabled') + return HandleCommandResult(stdout="Enabled" if status else "Disabled") + + @orchestrator._cli_write_command('cephadm config-check enable') + def _config_check_enable(self, check_name: str) -> HandleCommandResult: + """Enable a specific configuration check""" + if not self._config_check_valid(check_name): + return HandleCommandResult(retval=1, stderr="Invalid check name") + + err, msg = self._update_config_check(check_name, 'enabled') + if err: + return HandleCommandResult( + retval=err, + stderr=f"Failed to enable check '{check_name}' : {msg}") + + return HandleCommandResult(stdout="ok") + + @orchestrator._cli_write_command('cephadm config-check disable') + def _config_check_disable(self, check_name: str) -> HandleCommandResult: + """Disable a specific configuration check""" + if not self._config_check_valid(check_name): + return HandleCommandResult(retval=1, stderr="Invalid check name") + + err, msg = self._update_config_check(check_name, 'disabled') + if err: + return HandleCommandResult(retval=err, stderr=f"Failed to disable check '{check_name}': {msg}") + else: + # drop any outstanding raised healthcheck for this check + config_check = self.config_checker.lookup_check(check_name) + if config_check: + if config_check.healthcheck_name in self.health_checks: + self.health_checks.pop(config_check.healthcheck_name, None) + self.set_health_checks(self.health_checks) + else: + self.log.error( + f"Unable to resolve a check name ({check_name}) to a healthcheck definition?") + + return HandleCommandResult(stdout="ok") + + def _config_check_valid(self, check_name: str) -> bool: + return check_name in [chk.name for chk in self.config_checker.health_checks] + + def _update_config_check(self, check_name: str, status: str) -> Tuple[int, str]: + checks_raw = self.get_store('config_checks') + if not checks_raw: + return 1, "config_checks setting is not available" + + checks = json.loads(checks_raw) + checks.update({ + check_name: status + }) + self.log.info(f"updated config check '{check_name}' : {status}") + self.set_store('config_checks', json.dumps(checks)) + return 0, "" + + class ExtraCephConf(NamedTuple): + conf: str + last_modified: Optional[datetime.datetime] + + def extra_ceph_conf(self) -> 'CephadmOrchestrator.ExtraCephConf': + data = self.get_store('extra_ceph_conf') + if not data: + return CephadmOrchestrator.ExtraCephConf('', None) + try: + j = json.loads(data) + except ValueError: + msg = 'Unable to load extra_ceph_conf: Cannot decode JSON' + self.log.exception('%s: \'%s\'', msg, data) + return CephadmOrchestrator.ExtraCephConf('', None) + return CephadmOrchestrator.ExtraCephConf(j['conf'], str_to_datetime(j['last_modified'])) + + def extra_ceph_conf_is_newer(self, dt: datetime.datetime) -> bool: + conf = self.extra_ceph_conf() + if not conf.last_modified: + return False + return conf.last_modified > dt + + @orchestrator._cli_write_command( + 'cephadm osd activate' + ) + def _osd_activate(self, host: List[str]) -> HandleCommandResult: + """ + Start OSD containers for existing OSDs + """ + + @forall_hosts + def run(h: str) -> str: + with self.async_timeout_handler(h, 'cephadm deploy (osd daemon)'): + return self.wait_async(self.osd_service.deploy_osd_daemons_for_existing_osds(h, 'osd')) + + return HandleCommandResult(stdout='\n'.join(run(host))) + + @orchestrator._cli_read_command('orch client-keyring ls') + def _client_keyring_ls(self, format: Format = Format.plain) -> HandleCommandResult: + """ + List client keyrings under cephadm management + """ + if format != Format.plain: + output = to_format(self.keys.keys.values(), format, many=True, cls=ClientKeyringSpec) + else: + table = PrettyTable( + ['ENTITY', 'PLACEMENT', 'MODE', 'OWNER', 'PATH'], + border=False) + table.align = 'l' + table.left_padding_width = 0 + table.right_padding_width = 2 + for ks in sorted(self.keys.keys.values(), key=lambda ks: ks.entity): + table.add_row(( + ks.entity, ks.placement.pretty_str(), + utils.file_mode_to_str(ks.mode), + f'{ks.uid}:{ks.gid}', + ks.path, + )) + output = table.get_string() + return HandleCommandResult(stdout=output) + + @orchestrator._cli_write_command('orch client-keyring set') + def _client_keyring_set( + self, + entity: str, + placement: str, + owner: Optional[str] = None, + mode: Optional[str] = None, + ) -> HandleCommandResult: + """ + Add or update client keyring under cephadm management + """ + if not entity.startswith('client.'): + raise OrchestratorError('entity must start with client.') + if owner: + try: + uid, gid = map(int, owner.split(':')) + except Exception: + raise OrchestratorError('owner must look like "<uid>:<gid>", e.g., "0:0"') + else: + uid = 0 + gid = 0 + if mode: + try: + imode = int(mode, 8) + except Exception: + raise OrchestratorError('mode must be an octal mode, e.g. "600"') + else: + imode = 0o600 + pspec = PlacementSpec.from_string(placement) + ks = ClientKeyringSpec(entity, pspec, mode=imode, uid=uid, gid=gid) + self.keys.update(ks) + self._kick_serve_loop() + return HandleCommandResult() + + @orchestrator._cli_write_command('orch client-keyring rm') + def _client_keyring_rm( + self, + entity: str, + ) -> HandleCommandResult: + """ + Remove client keyring from cephadm management + """ + self.keys.rm(entity) + self._kick_serve_loop() + return HandleCommandResult() + + def _get_container_image(self, daemon_name: str) -> Optional[str]: + daemon_type = daemon_name.split('.', 1)[0] # type: ignore + image: Optional[str] = None + if daemon_type in CEPH_IMAGE_TYPES: + # get container image + image = str(self.get_foreign_ceph_option( + utils.name_to_config_section(daemon_name), + 'container_image' + )).strip() + elif daemon_type == 'prometheus': + image = self.container_image_prometheus + elif daemon_type == 'nvmeof': + image = self.container_image_nvmeof + elif daemon_type == 'grafana': + image = self.container_image_grafana + elif daemon_type == 'alertmanager': + image = self.container_image_alertmanager + elif daemon_type == 'node-exporter': + image = self.container_image_node_exporter + elif daemon_type == 'loki': + image = self.container_image_loki + elif daemon_type == 'promtail': + image = self.container_image_promtail + elif daemon_type == 'haproxy': + image = self.container_image_haproxy + elif daemon_type == 'keepalived': + image = self.container_image_keepalived + elif daemon_type == 'elasticsearch': + image = self.container_image_elasticsearch + elif daemon_type == 'jaeger-agent': + image = self.container_image_jaeger_agent + elif daemon_type == 'jaeger-collector': + image = self.container_image_jaeger_collector + elif daemon_type == 'jaeger-query': + image = self.container_image_jaeger_query + elif daemon_type == CustomContainerService.TYPE: + # The image can't be resolved, the necessary information + # is only available when a container is deployed (given + # via spec). + image = None + elif daemon_type == 'snmp-gateway': + image = self.container_image_snmp_gateway + else: + assert False, daemon_type + + self.log.debug('%s container image %s' % (daemon_name, image)) + + return image + + def _check_valid_addr(self, host: str, addr: str) -> str: + # make sure hostname is resolvable before trying to make a connection + try: + ip_addr = utils.resolve_ip(addr) + except OrchestratorError as e: + msg = str(e) + f''' +You may need to supply an address for {addr} + +Please make sure that the host is reachable and accepts connections using the cephadm SSH key +To add the cephadm SSH key to the host: +> ceph cephadm get-pub-key > ~/ceph.pub +> ssh-copy-id -f -i ~/ceph.pub {self.ssh_user}@{addr} + +To check that the host is reachable open a new shell with the --no-hosts flag: +> cephadm shell --no-hosts + +Then run the following: +> ceph cephadm get-ssh-config > ssh_config +> ceph config-key get mgr/cephadm/ssh_identity_key > ~/cephadm_private_key +> chmod 0600 ~/cephadm_private_key +> ssh -F ssh_config -i ~/cephadm_private_key {self.ssh_user}@{addr}''' + raise OrchestratorError(msg) + + if ipaddress.ip_address(ip_addr).is_loopback and host == addr: + # if this is a re-add, use old address. otherwise error + if host not in self.inventory or self.inventory.get_addr(host) == host: + raise OrchestratorError( + (f'Cannot automatically resolve ip address of host {host}. Ip resolved to loopback address: {ip_addr}\n' + + f'Please explicitly provide the address (ceph orch host add {host} --addr <ip-addr>)')) + self.log.debug( + f'Received loopback address resolving ip for {host}: {ip_addr}. Falling back to previous address.') + ip_addr = self.inventory.get_addr(host) + try: + with self.async_timeout_handler(host, f'cephadm check-host --expect-hostname {host}'): + out, err, code = self.wait_async(CephadmServe(self)._run_cephadm( + host, cephadmNoImage, 'check-host', + ['--expect-hostname', host], + addr=addr, + error_ok=True, no_fsid=True)) + if code: + msg = 'check-host failed:\n' + '\n'.join(err) + # err will contain stdout and stderr, so we filter on the message text to + # only show the errors + errors = [_i.replace("ERROR: ", "") for _i in err if _i.startswith('ERROR')] + if errors: + msg = f'Host {host} ({addr}) failed check(s): {errors}' + raise OrchestratorError(msg) + except ssh.HostConnectionError as e: + raise OrchestratorError(str(e)) + return ip_addr + + def _add_host(self, spec): + # type: (HostSpec) -> str + """ + Add a host to be managed by the orchestrator. + + :param host: host name + """ + HostSpec.validate(spec) + ip_addr = self._check_valid_addr(spec.hostname, spec.addr) + if spec.addr == spec.hostname and ip_addr: + spec.addr = ip_addr + + if spec.hostname in self.inventory and self.inventory.get_addr(spec.hostname) != spec.addr: + self.cache.refresh_all_host_info(spec.hostname) + + # prime crush map? + if spec.location: + self.check_mon_command({ + 'prefix': 'osd crush add-bucket', + 'name': spec.hostname, + 'type': 'host', + 'args': [f'{k}={v}' for k, v in spec.location.items()], + }) + + if spec.hostname not in self.inventory: + self.cache.prime_empty_host(spec.hostname) + self.inventory.add_host(spec) + self.offline_hosts_remove(spec.hostname) + if spec.status == 'maintenance': + self._set_maintenance_healthcheck() + self.event.set() # refresh stray health check + self.log.info('Added host %s' % spec.hostname) + return "Added host '{}' with addr '{}'".format(spec.hostname, spec.addr) + + @handle_orch_error + def add_host(self, spec: HostSpec) -> str: + return self._add_host(spec) + + @handle_orch_error + def remove_host(self, host: str, force: bool = False, offline: bool = False) -> str: + """ + Remove a host from orchestrator management. + + :param host: host name + :param force: bypass running daemons check + :param offline: remove offline host + """ + + # check if host is offline + host_offline = host in self.offline_hosts + + if host_offline and not offline: + raise OrchestratorValidationError( + "{} is offline, please use --offline and --force to remove this host. This can potentially cause data loss".format(host)) + + if not host_offline and offline: + raise OrchestratorValidationError( + "{} is online, please remove host without --offline.".format(host)) + + if offline and not force: + raise OrchestratorValidationError("Removing an offline host requires --force") + + # check if there are daemons on the host + if not force: + daemons = self.cache.get_daemons_by_host(host) + if daemons: + self.log.warning(f"Blocked {host} removal. Daemons running: {daemons}") + + daemons_table = "" + daemons_table += "{:<20} {:<15}\n".format("type", "id") + daemons_table += "{:<20} {:<15}\n".format("-" * 20, "-" * 15) + for d in daemons: + daemons_table += "{:<20} {:<15}\n".format(d.daemon_type, d.daemon_id) + + raise OrchestratorValidationError("Not allowed to remove %s from cluster. " + "The following daemons are running in the host:" + "\n%s\nPlease run 'ceph orch host drain %s' to remove daemons from host" % ( + host, daemons_table, host)) + + # check, if there we're removing the last _admin host + if not force: + p = PlacementSpec(label=SpecialHostLabels.ADMIN) + admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs()) + if len(admin_hosts) == 1 and admin_hosts[0] == host: + raise OrchestratorValidationError(f"Host {host} is the last host with the '{SpecialHostLabels.ADMIN}'" + f" label. Please add the '{SpecialHostLabels.ADMIN}' label to a host" + " or add --force to this command") + + def run_cmd(cmd_args: dict) -> None: + ret, out, err = self.mon_command(cmd_args) + if ret != 0: + self.log.debug(f"ran {cmd_args} with mon_command") + self.log.error( + f"cmd: {cmd_args.get('prefix')} failed with: {err}. (errno:{ret})") + self.log.debug(f"cmd: {cmd_args.get('prefix')} returns: {out}") + + if offline: + daemons = self.cache.get_daemons_by_host(host) + for d in daemons: + self.log.info(f"removing: {d.name()}") + + if d.daemon_type != 'osd': + self.cephadm_services[daemon_type_to_service(str(d.daemon_type))].pre_remove(d) + self.cephadm_services[daemon_type_to_service( + str(d.daemon_type))].post_remove(d, is_failed_deploy=False) + else: + cmd_args = { + 'prefix': 'osd purge-actual', + 'id': int(str(d.daemon_id)), + 'yes_i_really_mean_it': True + } + run_cmd(cmd_args) + + cmd_args = { + 'prefix': 'osd crush rm', + 'name': host + } + run_cmd(cmd_args) + + self.inventory.rm_host(host) + self.cache.rm_host(host) + self.ssh.reset_con(host) + # if host was in offline host list, we should remove it now. + self.offline_hosts_remove(host) + self.event.set() # refresh stray health check + self.log.info('Removed host %s' % host) + return "Removed {} host '{}'".format('offline' if offline else '', host) + + @handle_orch_error + def update_host_addr(self, host: str, addr: str) -> str: + self._check_valid_addr(host, addr) + self.inventory.set_addr(host, addr) + self.ssh.reset_con(host) + self.event.set() # refresh stray health check + self.log.info('Set host %s addr to %s' % (host, addr)) + return "Updated host '{}' addr to '{}'".format(host, addr) + + @handle_orch_error + def get_hosts(self): + # type: () -> List[orchestrator.HostSpec] + """ + Return a list of hosts managed by the orchestrator. + + Notes: + - skip async: manager reads from cache. + """ + return list(self.inventory.all_specs()) + + @handle_orch_error + def get_facts(self, hostname: Optional[str] = None) -> List[Dict[str, Any]]: + """ + Return a list of hosts metadata(gather_facts) managed by the orchestrator. + + Notes: + - skip async: manager reads from cache. + """ + if hostname: + return [self.cache.get_facts(hostname)] + + return [self.cache.get_facts(hostname) for hostname in self.cache.get_hosts()] + + @handle_orch_error + def add_host_label(self, host: str, label: str) -> str: + self.inventory.add_label(host, label) + self.log.info('Added label %s to host %s' % (label, host)) + self._kick_serve_loop() + return 'Added label %s to host %s' % (label, host) + + @handle_orch_error + def remove_host_label(self, host: str, label: str, force: bool = False) -> str: + # if we remove the _admin label from the only host that has it we could end up + # removing the only instance of the config and keyring and cause issues + if not force and label == SpecialHostLabels.ADMIN: + p = PlacementSpec(label=SpecialHostLabels.ADMIN) + admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs()) + if len(admin_hosts) == 1 and admin_hosts[0] == host: + raise OrchestratorValidationError(f"Host {host} is the last host with the '{SpecialHostLabels.ADMIN}'" + f" label.\nRemoving the {SpecialHostLabels.ADMIN} label from this host could cause the removal" + " of the last cluster config/keyring managed by cephadm.\n" + f"It is recommended to add the {SpecialHostLabels.ADMIN} label to another host" + " before completing this operation.\nIf you're certain this is" + " what you want rerun this command with --force.") + if self.inventory.has_label(host, label): + self.inventory.rm_label(host, label) + msg = f'Removed label {label} from host {host}' + else: + msg = f"Host {host} does not have label '{label}'. Please use 'ceph orch host ls' to list all the labels." + self.log.info(msg) + self._kick_serve_loop() + return msg + + def _host_ok_to_stop(self, hostname: str, force: bool = False) -> Tuple[int, str]: + self.log.debug("running host-ok-to-stop checks") + daemons = self.cache.get_daemons() + daemon_map: Dict[str, List[str]] = defaultdict(lambda: []) + for dd in daemons: + assert dd.hostname is not None + assert dd.daemon_type is not None + assert dd.daemon_id is not None + if dd.hostname == hostname: + daemon_map[dd.daemon_type].append(dd.daemon_id) + + notifications: List[str] = [] + error_notifications: List[str] = [] + okay: bool = True + for daemon_type, daemon_ids in daemon_map.items(): + r = self.cephadm_services[daemon_type_to_service( + daemon_type)].ok_to_stop(daemon_ids, force=force) + if r.retval: + okay = False + # collect error notifications so user can see every daemon causing host + # to not be okay to stop + error_notifications.append(r.stderr) + if r.stdout: + # if extra notifications to print for user, add them to notifications list + notifications.append(r.stdout) + + if not okay: + # at least one daemon is not okay to stop + return 1, '\n'.join(error_notifications) + + if notifications: + return 0, (f'It is presumed safe to stop host {hostname}. ' + + 'Note the following:\n\n' + '\n'.join(notifications)) + return 0, f'It is presumed safe to stop host {hostname}' + + @handle_orch_error + def host_ok_to_stop(self, hostname: str) -> str: + if hostname not in self.cache.get_hosts(): + raise OrchestratorError(f'Cannot find host "{hostname}"') + + rc, msg = self._host_ok_to_stop(hostname) + if rc: + raise OrchestratorError(msg, errno=rc) + + self.log.info(msg) + return msg + + def _set_maintenance_healthcheck(self) -> None: + """Raise/update or clear the maintenance health check as needed""" + + in_maintenance = self.inventory.get_host_with_state("maintenance") + if not in_maintenance: + self.remove_health_warning('HOST_IN_MAINTENANCE') + else: + s = "host is" if len(in_maintenance) == 1 else "hosts are" + self.set_health_warning("HOST_IN_MAINTENANCE", f"{len(in_maintenance)} {s} in maintenance mode", 1, [ + f"{h} is in maintenance" for h in in_maintenance]) + + @handle_orch_error + @host_exists() + def enter_host_maintenance(self, hostname: str, force: bool = False, yes_i_really_mean_it: bool = False) -> str: + """ Attempt to place a cluster host in maintenance + + Placing a host into maintenance disables the cluster's ceph target in systemd + and stops all ceph daemons. If the host is an osd host we apply the noout flag + for the host subtree in crush to prevent data movement during a host maintenance + window. + + :param hostname: (str) name of the host (must match an inventory hostname) + + :raises OrchestratorError: Hostname is invalid, host is already in maintenance + """ + if yes_i_really_mean_it and not force: + raise OrchestratorError("--force must be passed with --yes-i-really-mean-it") + + if len(self.cache.get_hosts()) == 1 and not yes_i_really_mean_it: + raise OrchestratorError("Maintenance feature is not supported on single node clusters") + + # if upgrade is active, deny + if self.upgrade.upgrade_state and not yes_i_really_mean_it: + raise OrchestratorError( + f"Unable to place {hostname} in maintenance with upgrade active/paused") + + tgt_host = self.inventory._inventory[hostname] + if tgt_host.get("status", "").lower() == "maintenance": + raise OrchestratorError(f"Host {hostname} is already in maintenance") + + host_daemons = self.cache.get_daemon_types(hostname) + self.log.debug("daemons on host {}".format(','.join(host_daemons))) + if host_daemons: + # daemons on this host, so check the daemons can be stopped + # and if so, place the host into maintenance by disabling the target + rc, msg = self._host_ok_to_stop(hostname, force) + if rc and not yes_i_really_mean_it: + raise OrchestratorError( + msg + '\nNote: Warnings can be bypassed with the --force flag', errno=rc) + + # call the host-maintenance function + with self.async_timeout_handler(hostname, 'cephadm host-maintenance enter'): + _out, _err, _code = self.wait_async( + CephadmServe(self)._run_cephadm( + hostname, cephadmNoImage, "host-maintenance", + ["enter"], + error_ok=True)) + returned_msg = _err[0].split('\n')[-1] + if (returned_msg.startswith('failed') or returned_msg.startswith('ERROR')) and not yes_i_really_mean_it: + raise OrchestratorError( + f"Failed to place {hostname} into maintenance for cluster {self._cluster_fsid}") + + if "osd" in host_daemons: + crush_node = hostname if '.' not in hostname else hostname.split('.')[0] + rc, out, err = self.mon_command({ + 'prefix': 'osd set-group', + 'flags': 'noout', + 'who': [crush_node], + 'format': 'json' + }) + if rc and not yes_i_really_mean_it: + self.log.warning( + f"maintenance mode request for {hostname} failed to SET the noout group (rc={rc})") + raise OrchestratorError( + f"Unable to set the osds on {hostname} to noout (rc={rc})") + elif not rc: + self.log.info( + f"maintenance mode request for {hostname} has SET the noout group") + + # update the host status in the inventory + tgt_host["status"] = "maintenance" + self.inventory._inventory[hostname] = tgt_host + self.inventory.save() + + self._set_maintenance_healthcheck() + return f'Daemons for Ceph cluster {self._cluster_fsid} stopped on host {hostname}. Host {hostname} moved to maintenance mode' + + @handle_orch_error + @host_exists() + def exit_host_maintenance(self, hostname: str) -> str: + """Exit maintenance mode and return a host to an operational state + + Returning from maintenance will enable the clusters systemd target and + start it, and remove any noout that has been added for the host if the + host has osd daemons + + :param hostname: (str) host name + + :raises OrchestratorError: Unable to return from maintenance, or unset the + noout flag + """ + tgt_host = self.inventory._inventory[hostname] + if tgt_host['status'] != "maintenance": + raise OrchestratorError(f"Host {hostname} is not in maintenance mode") + + with self.async_timeout_handler(hostname, 'cephadm host-maintenance exit'): + outs, errs, _code = self.wait_async( + CephadmServe(self)._run_cephadm(hostname, cephadmNoImage, + 'host-maintenance', ['exit'], error_ok=True)) + returned_msg = errs[0].split('\n')[-1] + if returned_msg.startswith('failed') or returned_msg.startswith('ERROR'): + raise OrchestratorError( + f"Failed to exit maintenance state for host {hostname}, cluster {self._cluster_fsid}") + + if "osd" in self.cache.get_daemon_types(hostname): + crush_node = hostname if '.' not in hostname else hostname.split('.')[0] + rc, _out, _err = self.mon_command({ + 'prefix': 'osd unset-group', + 'flags': 'noout', + 'who': [crush_node], + 'format': 'json' + }) + if rc: + self.log.warning( + f"exit maintenance request failed to UNSET the noout group for {hostname}, (rc={rc})") + raise OrchestratorError(f"Unable to set the osds on {hostname} to noout (rc={rc})") + else: + self.log.info( + f"exit maintenance request has UNSET for the noout group on host {hostname}") + + # update the host record status + tgt_host['status'] = "" + self.inventory._inventory[hostname] = tgt_host + self.inventory.save() + + self._set_maintenance_healthcheck() + + return f"Ceph cluster {self._cluster_fsid} on {hostname} has exited maintenance mode" + + @handle_orch_error + @host_exists() + def rescan_host(self, hostname: str) -> str: + """Use cephadm to issue a disk rescan on each HBA + + Some HBAs and external enclosures don't automatically register + device insertion with the kernel, so for these scenarios we need + to manually rescan + + :param hostname: (str) host name + """ + self.log.info(f'disk rescan request sent to host "{hostname}"') + with self.async_timeout_handler(hostname, 'cephadm disk-rescan'): + _out, _err, _code = self.wait_async( + CephadmServe(self)._run_cephadm(hostname, cephadmNoImage, "disk-rescan", + [], no_fsid=True, error_ok=True)) + if not _err: + raise OrchestratorError('Unexpected response from cephadm disk-rescan call') + + msg = _err[0].split('\n')[-1] + log_msg = f'disk rescan: {msg}' + if msg.upper().startswith('OK'): + self.log.info(log_msg) + else: + self.log.warning(log_msg) + + return f'{msg}' + + def get_minimal_ceph_conf(self) -> str: + _, config, _ = self.check_mon_command({ + "prefix": "config generate-minimal-conf", + }) + extra = self.extra_ceph_conf().conf + if extra: + try: + config = self._combine_confs(config, extra) + except Exception as e: + self.log.error(f'Failed to add extra ceph conf settings to minimal ceph conf: {e}') + return config + + def _combine_confs(self, conf1: str, conf2: str) -> str: + section_to_option: Dict[str, List[str]] = {} + final_conf: str = '' + for conf in [conf1, conf2]: + if not conf: + continue + section = '' + for line in conf.split('\n'): + if line.strip().startswith('#') or not line.strip(): + continue + if line.strip().startswith('[') and line.strip().endswith(']'): + section = line.strip().replace('[', '').replace(']', '') + if section not in section_to_option: + section_to_option[section] = [] + else: + section_to_option[section].append(line.strip()) + + first_section = True + for section, options in section_to_option.items(): + if not first_section: + final_conf += '\n' + final_conf += f'[{section}]\n' + for option in options: + final_conf += f'{option}\n' + first_section = False + + return final_conf + + def _invalidate_daemons_and_kick_serve(self, filter_host: Optional[str] = None) -> None: + if filter_host: + self.cache.invalidate_host_daemons(filter_host) + else: + for h in self.cache.get_hosts(): + # Also discover daemons deployed manually + self.cache.invalidate_host_daemons(h) + + self._kick_serve_loop() + + @handle_orch_error + def describe_service(self, service_type: Optional[str] = None, service_name: Optional[str] = None, + refresh: bool = False) -> List[orchestrator.ServiceDescription]: + if refresh: + self._invalidate_daemons_and_kick_serve() + self.log.debug('Kicked serve() loop to refresh all services') + + sm: Dict[str, orchestrator.ServiceDescription] = {} + + # known services + for nm, spec in self.spec_store.all_specs.items(): + if service_type is not None and service_type != spec.service_type: + continue + if service_name is not None and service_name != nm: + continue + + if spec.service_type != 'osd': + size = spec.placement.get_target_count(self.cache.get_schedulable_hosts()) + else: + # osd counting is special + size = 0 + + sm[nm] = orchestrator.ServiceDescription( + spec=spec, + size=size, + running=0, + events=self.events.get_for_service(spec.service_name()), + created=self.spec_store.spec_created[nm], + deleted=self.spec_store.spec_deleted.get(nm, None), + virtual_ip=spec.get_virtual_ip(), + ports=spec.get_port_start(), + ) + if spec.service_type == 'ingress': + # ingress has 2 daemons running per host + # but only if it's the full ingress service, not for keepalive-only + if not cast(IngressSpec, spec).keepalive_only: + sm[nm].size *= 2 + + # factor daemons into status + for h, dm in self.cache.get_daemons_with_volatile_status(): + for name, dd in dm.items(): + assert dd.hostname is not None, f'no hostname for {dd!r}' + assert dd.daemon_type is not None, f'no daemon_type for {dd!r}' + + n: str = dd.service_name() + + if ( + service_type + and service_type != daemon_type_to_service(dd.daemon_type) + ): + continue + if service_name and service_name != n: + continue + + if n not in sm: + # new unmanaged service + spec = ServiceSpec( + unmanaged=True, + service_type=daemon_type_to_service(dd.daemon_type), + service_id=dd.service_id(), + ) + sm[n] = orchestrator.ServiceDescription( + last_refresh=dd.last_refresh, + container_image_id=dd.container_image_id, + container_image_name=dd.container_image_name, + spec=spec, + size=0, + ) + + if dd.status == DaemonDescriptionStatus.running: + sm[n].running += 1 + if dd.daemon_type == 'osd': + # The osd count can't be determined by the Placement spec. + # Showing an actual/expected representation cannot be determined + # here. So we're setting running = size for now. + sm[n].size += 1 + if ( + not sm[n].last_refresh + or not dd.last_refresh + or dd.last_refresh < sm[n].last_refresh # type: ignore + ): + sm[n].last_refresh = dd.last_refresh + + return list(sm.values()) + + @handle_orch_error + def list_daemons(self, + service_name: Optional[str] = None, + daemon_type: Optional[str] = None, + daemon_id: Optional[str] = None, + host: Optional[str] = None, + refresh: bool = False) -> List[orchestrator.DaemonDescription]: + if refresh: + self._invalidate_daemons_and_kick_serve(host) + self.log.debug('Kicked serve() loop to refresh all daemons') + + result = [] + for h, dm in self.cache.get_daemons_with_volatile_status(): + if host and h != host: + continue + for name, dd in dm.items(): + if daemon_type is not None and daemon_type != dd.daemon_type: + continue + if daemon_id is not None and daemon_id != dd.daemon_id: + continue + if service_name is not None and service_name != dd.service_name(): + continue + if not dd.memory_request and dd.daemon_type in ['osd', 'mon']: + dd.memory_request = cast(Optional[int], self.get_foreign_ceph_option( + dd.name(), + f"{dd.daemon_type}_memory_target" + )) + result.append(dd) + return result + + @handle_orch_error + def service_action(self, action: str, service_name: str) -> List[str]: + if service_name not in self.spec_store.all_specs.keys(): + raise OrchestratorError(f'Invalid service name "{service_name}".' + + ' View currently running services using "ceph orch ls"') + dds: List[DaemonDescription] = self.cache.get_daemons_by_service(service_name) + if not dds: + raise OrchestratorError(f'No daemons exist under service name "{service_name}".' + + ' View currently running services using "ceph orch ls"') + if action == 'stop' and service_name.split('.')[0].lower() in ['mgr', 'mon', 'osd']: + return [f'Stopping entire {service_name} service is prohibited.'] + self.log.info('%s service %s' % (action.capitalize(), service_name)) + return [ + self._schedule_daemon_action(dd.name(), action) + for dd in dds + ] + + def _rotate_daemon_key(self, daemon_spec: CephadmDaemonDeploySpec) -> str: + self.log.info(f'Rotating authentication key for {daemon_spec.name()}') + rc, out, err = self.mon_command({ + 'prefix': 'auth get-or-create-pending', + 'entity': daemon_spec.entity_name(), + 'format': 'json', + }) + j = json.loads(out) + pending_key = j[0]['pending_key'] + + # deploy a new keyring file + if daemon_spec.daemon_type != 'osd': + daemon_spec = self.cephadm_services[daemon_type_to_service( + daemon_spec.daemon_type)].prepare_create(daemon_spec) + with self.async_timeout_handler(daemon_spec.host, f'cephadm deploy ({daemon_spec.daemon_type} daemon)'): + self.wait_async(CephadmServe(self)._create_daemon(daemon_spec, reconfig=True)) + + # try to be clever, or fall back to restarting the daemon + rc = -1 + if daemon_spec.daemon_type == 'osd': + rc, out, err = self.tool_exec( + args=['ceph', 'tell', daemon_spec.name(), 'rotate-stored-key', '-i', '-'], + stdin=pending_key.encode() + ) + if not rc: + rc, out, err = self.tool_exec( + args=['ceph', 'tell', daemon_spec.name(), 'rotate-key', '-i', '-'], + stdin=pending_key.encode() + ) + elif daemon_spec.daemon_type == 'mds': + rc, out, err = self.tool_exec( + args=['ceph', 'tell', daemon_spec.name(), 'rotate-key', '-i', '-'], + stdin=pending_key.encode() + ) + elif ( + daemon_spec.daemon_type == 'mgr' + and daemon_spec.daemon_id == self.get_mgr_id() + ): + rc, out, err = self.tool_exec( + args=['ceph', 'tell', daemon_spec.name(), 'rotate-key', '-i', '-'], + stdin=pending_key.encode() + ) + if rc: + self._daemon_action(daemon_spec, 'restart') + + return f'Rotated key for {daemon_spec.name()}' + + def _daemon_action(self, + daemon_spec: CephadmDaemonDeploySpec, + action: str, + image: Optional[str] = None) -> str: + self._daemon_action_set_image(action, image, daemon_spec.daemon_type, + daemon_spec.daemon_id) + + if (action == 'redeploy' or action == 'restart') and self.daemon_is_self(daemon_spec.daemon_type, + daemon_spec.daemon_id): + self.mgr_service.fail_over() + return '' # unreachable + + if action == 'rotate-key': + return self._rotate_daemon_key(daemon_spec) + + if action == 'redeploy' or action == 'reconfig': + if daemon_spec.daemon_type != 'osd': + daemon_spec = self.cephadm_services[daemon_type_to_service( + daemon_spec.daemon_type)].prepare_create(daemon_spec) + else: + # for OSDs, we still need to update config, just not carry out the full + # prepare_create function + daemon_spec.final_config, daemon_spec.deps = self.osd_service.generate_config( + daemon_spec) + with self.async_timeout_handler(daemon_spec.host, f'cephadm deploy ({daemon_spec.daemon_type} daemon)'): + return self.wait_async( + CephadmServe(self)._create_daemon(daemon_spec, reconfig=(action == 'reconfig'))) + + actions = { + 'start': ['reset-failed', 'start'], + 'stop': ['stop'], + 'restart': ['reset-failed', 'restart'], + } + name = daemon_spec.name() + for a in actions[action]: + try: + with self.async_timeout_handler(daemon_spec.host, f'cephadm unit --name {name}'): + out, err, code = self.wait_async(CephadmServe(self)._run_cephadm( + daemon_spec.host, name, 'unit', + ['--name', name, a])) + except Exception: + self.log.exception(f'`{daemon_spec.host}: cephadm unit {name} {a}` failed') + self.cache.invalidate_host_daemons(daemon_spec.host) + msg = "{} {} from host '{}'".format(action, name, daemon_spec.host) + self.events.for_daemon(name, 'INFO', msg) + return msg + + def _daemon_action_set_image(self, action: str, image: Optional[str], daemon_type: str, daemon_id: str) -> None: + if image is not None: + if action != 'redeploy': + raise OrchestratorError( + f'Cannot execute {action} with new image. `action` needs to be `redeploy`') + if daemon_type not in CEPH_IMAGE_TYPES: + raise OrchestratorError( + f'Cannot redeploy {daemon_type}.{daemon_id} with a new image: Supported ' + f'types are: {", ".join(CEPH_IMAGE_TYPES)}') + + self.check_mon_command({ + 'prefix': 'config set', + 'name': 'container_image', + 'value': image, + 'who': utils.name_to_config_section(daemon_type + '.' + daemon_id), + }) + + @handle_orch_error + def daemon_action(self, action: str, daemon_name: str, image: Optional[str] = None) -> str: + d = self.cache.get_daemon(daemon_name) + assert d.daemon_type is not None + assert d.daemon_id is not None + + if (action == 'redeploy' or action == 'restart') and self.daemon_is_self(d.daemon_type, d.daemon_id) \ + and not self.mgr_service.mgr_map_has_standby(): + raise OrchestratorError( + f'Unable to schedule redeploy for {daemon_name}: No standby MGRs') + + if action == 'rotate-key': + if d.daemon_type not in ['mgr', 'osd', 'mds', + 'rgw', 'crash', 'nfs', 'rbd-mirror', 'iscsi']: + raise OrchestratorError( + f'key rotation not supported for {d.daemon_type}' + ) + + self._daemon_action_set_image(action, image, d.daemon_type, d.daemon_id) + + self.log.info(f'Schedule {action} daemon {daemon_name}') + return self._schedule_daemon_action(daemon_name, action) + + def daemon_is_self(self, daemon_type: str, daemon_id: str) -> bool: + return daemon_type == 'mgr' and daemon_id == self.get_mgr_id() + + def get_active_mgr(self) -> DaemonDescription: + return self.mgr_service.get_active_daemon(self.cache.get_daemons_by_type('mgr')) + + def get_active_mgr_digests(self) -> List[str]: + digests = self.mgr_service.get_active_daemon( + self.cache.get_daemons_by_type('mgr')).container_image_digests + return digests if digests else [] + + def _schedule_daemon_action(self, daemon_name: str, action: str) -> str: + dd = self.cache.get_daemon(daemon_name) + assert dd.daemon_type is not None + assert dd.daemon_id is not None + assert dd.hostname is not None + if (action == 'redeploy' or action == 'restart') and self.daemon_is_self(dd.daemon_type, dd.daemon_id) \ + and not self.mgr_service.mgr_map_has_standby(): + raise OrchestratorError( + f'Unable to schedule redeploy for {daemon_name}: No standby MGRs') + self.cache.schedule_daemon_action(dd.hostname, dd.name(), action) + self.cache.save_host(dd.hostname) + msg = "Scheduled to {} {} on host '{}'".format(action, daemon_name, dd.hostname) + self._kick_serve_loop() + return msg + + @handle_orch_error + def remove_daemons(self, names): + # type: (List[str]) -> List[str] + args = [] + for host, dm in self.cache.daemons.items(): + for name in names: + if name in dm: + args.append((name, host)) + if not args: + raise OrchestratorError('Unable to find daemon(s) %s' % (names)) + self.log.info('Remove daemons %s' % ' '.join([a[0] for a in args])) + return self._remove_daemons(args) + + @handle_orch_error + def remove_service(self, service_name: str, force: bool = False) -> str: + self.log.info('Remove service %s' % service_name) + self._trigger_preview_refresh(service_name=service_name) + if service_name in self.spec_store: + if self.spec_store[service_name].spec.service_type in ('mon', 'mgr'): + return f'Unable to remove {service_name} service.\n' \ + f'Note, you might want to mark the {service_name} service as "unmanaged"' + else: + return f"Invalid service '{service_name}'. Use 'ceph orch ls' to list available services.\n" + + # Report list of affected OSDs? + if not force and service_name.startswith('osd.'): + osds_msg = {} + for h, dm in self.cache.get_daemons_with_volatile_status(): + osds_to_remove = [] + for name, dd in dm.items(): + if dd.daemon_type == 'osd' and dd.service_name() == service_name: + osds_to_remove.append(str(dd.daemon_id)) + if osds_to_remove: + osds_msg[h] = osds_to_remove + if osds_msg: + msg = '' + for h, ls in osds_msg.items(): + msg += f'\thost {h}: {" ".join([f"osd.{id}" for id in ls])}' + raise OrchestratorError( + f'If {service_name} is removed then the following OSDs will remain, --force to proceed anyway\n{msg}') + + found = self.spec_store.rm(service_name) + if found and service_name.startswith('osd.'): + self.spec_store.finally_rm(service_name) + self._kick_serve_loop() + return f'Removed service {service_name}' + + @handle_orch_error + def get_inventory(self, host_filter: Optional[orchestrator.InventoryFilter] = None, refresh: bool = False) -> List[orchestrator.InventoryHost]: + """ + Return the storage inventory of hosts matching the given filter. + + :param host_filter: host filter + + TODO: + - add filtering by label + """ + if refresh: + if host_filter and host_filter.hosts: + for h in host_filter.hosts: + self.log.debug(f'will refresh {h} devs') + self.cache.invalidate_host_devices(h) + self.cache.invalidate_host_networks(h) + else: + for h in self.cache.get_hosts(): + self.log.debug(f'will refresh {h} devs') + self.cache.invalidate_host_devices(h) + self.cache.invalidate_host_networks(h) + + self.event.set() + self.log.debug('Kicked serve() loop to refresh devices') + + result = [] + for host, dls in self.cache.devices.items(): + if host_filter and host_filter.hosts and host not in host_filter.hosts: + continue + result.append(orchestrator.InventoryHost(host, + inventory.Devices(dls))) + return result + + @handle_orch_error + def zap_device(self, host: str, path: str) -> str: + """Zap a device on a managed host. + + Use ceph-volume zap to return a device to an unused/free state + + Args: + host (str): hostname of the cluster host + path (str): device path + + Raises: + OrchestratorError: host is not a cluster host + OrchestratorError: host is in maintenance and therefore unavailable + OrchestratorError: device path not found on the host + OrchestratorError: device is known to a different ceph cluster + OrchestratorError: device holds active osd + OrchestratorError: device cache hasn't been populated yet.. + + Returns: + str: output from the zap command + """ + + self.log.info('Zap device %s:%s' % (host, path)) + + if host not in self.inventory.keys(): + raise OrchestratorError( + f"Host '{host}' is not a member of the cluster") + + host_info = self.inventory._inventory.get(host, {}) + if host_info.get('status', '').lower() == 'maintenance': + raise OrchestratorError( + f"Host '{host}' is in maintenance mode, which prevents any actions against it.") + + if host not in self.cache.devices: + raise OrchestratorError( + f"Host '{host} hasn't been scanned yet to determine it's inventory. Please try again later.") + + host_devices = self.cache.devices[host] + path_found = False + osd_id_list: List[str] = [] + + for dev in host_devices: + if dev.path == path: + path_found = True + break + if not path_found: + raise OrchestratorError( + f"Device path '{path}' not found on host '{host}'") + + if osd_id_list: + dev_name = os.path.basename(path) + active_osds: List[str] = [] + for osd_id in osd_id_list: + metadata = self.get_metadata('osd', str(osd_id)) + if metadata: + if metadata.get('hostname', '') == host and dev_name in metadata.get('devices', '').split(','): + active_osds.append("osd." + osd_id) + if active_osds: + raise OrchestratorError( + f"Unable to zap: device '{path}' on {host} has {len(active_osds)} active " + f"OSD{'s' if len(active_osds) > 1 else ''}" + f" ({', '.join(active_osds)}). Use 'ceph orch osd rm' first.") + + cv_args = ['--', 'lvm', 'zap', '--destroy', path] + with self.async_timeout_handler(host, f'cephadm ceph-volume {" ".join(cv_args)}'): + out, err, code = self.wait_async(CephadmServe(self)._run_cephadm( + host, 'osd', 'ceph-volume', cv_args, error_ok=True)) + + self.cache.invalidate_host_devices(host) + self.cache.invalidate_host_networks(host) + if code: + raise OrchestratorError('Zap failed: %s' % '\n'.join(out + err)) + msg = f'zap successful for {path} on {host}' + self.log.info(msg) + + return msg + '\n' + + @handle_orch_error + def blink_device_light(self, ident_fault: str, on: bool, locs: List[orchestrator.DeviceLightLoc]) -> List[str]: + """ + Blink a device light. Calling something like:: + + lsmcli local-disk-ident-led-on --path $path + + If you must, you can customize this via:: + + ceph config-key set mgr/cephadm/blink_device_light_cmd '<my jinja2 template>' + ceph config-key set mgr/cephadm/<host>/blink_device_light_cmd '<my jinja2 template>' + + See templates/blink_device_light_cmd.j2 + """ + @forall_hosts + def blink(host: str, dev: str, path: str) -> str: + cmd_line = self.template.render('blink_device_light_cmd.j2', + { + 'on': on, + 'ident_fault': ident_fault, + 'dev': dev, + 'path': path + }, + host=host) + cmd_args = shlex.split(cmd_line) + + with self.async_timeout_handler(host, f'cephadm shell -- {" ".join(cmd_args)}'): + out, err, code = self.wait_async(CephadmServe(self)._run_cephadm( + host, 'osd', 'shell', ['--'] + cmd_args, + error_ok=True)) + if code: + raise OrchestratorError( + 'Unable to affect %s light for %s:%s. Command: %s' % ( + ident_fault, host, dev, ' '.join(cmd_args))) + self.log.info('Set %s light for %s:%s %s' % ( + ident_fault, host, dev, 'on' if on else 'off')) + return "Set %s light for %s:%s %s" % ( + ident_fault, host, dev, 'on' if on else 'off') + + return blink(locs) + + def get_osd_uuid_map(self, only_up=False): + # type: (bool) -> Dict[str, str] + osd_map = self.get('osd_map') + r = {} + for o in osd_map['osds']: + # only include OSDs that have ever started in this map. this way + # an interrupted osd create can be repeated and succeed the second + # time around. + osd_id = o.get('osd') + if osd_id is None: + raise OrchestratorError("Could not retrieve osd_id from osd_map") + if not only_up: + r[str(osd_id)] = o.get('uuid', '') + return r + + def get_osd_by_id(self, osd_id: int) -> Optional[Dict[str, Any]]: + osd = [x for x in self.get('osd_map')['osds'] + if x['osd'] == osd_id] + + if len(osd) != 1: + return None + + return osd[0] + + def _trigger_preview_refresh(self, + specs: Optional[List[DriveGroupSpec]] = None, + service_name: Optional[str] = None, + ) -> None: + # Only trigger a refresh when a spec has changed + trigger_specs = [] + if specs: + for spec in specs: + preview_spec = self.spec_store.spec_preview.get(spec.service_name()) + # the to-be-preview spec != the actual spec, this means we need to + # trigger a refresh, if the spec has been removed (==None) we need to + # refresh as well. + if not preview_spec or spec != preview_spec: + trigger_specs.append(spec) + if service_name: + trigger_specs = [cast(DriveGroupSpec, self.spec_store.spec_preview.get(service_name))] + if not any(trigger_specs): + return None + + refresh_hosts = self.osd_service.resolve_hosts_for_osdspecs(specs=trigger_specs) + for host in refresh_hosts: + self.log.info(f"Marking host: {host} for OSDSpec preview refresh.") + self.cache.osdspec_previews_refresh_queue.append(host) + + @handle_orch_error + def apply_drivegroups(self, specs: List[DriveGroupSpec]) -> List[str]: + """ + Deprecated. Please use `apply()` instead. + + Keeping this around to be compatible to mgr/dashboard + """ + return [self._apply(spec) for spec in specs] + + @handle_orch_error + def create_osds(self, drive_group: DriveGroupSpec) -> str: + hosts: List[HostSpec] = self.inventory.all_specs() + filtered_hosts: List[str] = drive_group.placement.filter_matching_hostspecs(hosts) + if not filtered_hosts: + return "Invalid 'host:device' spec: host not found in cluster. Please check 'ceph orch host ls' for available hosts" + return self.osd_service.create_from_spec(drive_group) + + def _preview_osdspecs(self, + osdspecs: Optional[List[DriveGroupSpec]] = None + ) -> dict: + if not osdspecs: + return {'n/a': [{'error': True, + 'message': 'No OSDSpec or matching hosts found.'}]} + matching_hosts = self.osd_service.resolve_hosts_for_osdspecs(specs=osdspecs) + if not matching_hosts: + return {'n/a': [{'error': True, + 'message': 'No OSDSpec or matching hosts found.'}]} + # Is any host still loading previews or still in the queue to be previewed + pending_hosts = {h for h in self.cache.loading_osdspec_preview if h in matching_hosts} + if pending_hosts or any(item in self.cache.osdspec_previews_refresh_queue for item in matching_hosts): + # Report 'pending' when any of the matching hosts is still loading previews (flag is True) + return {'n/a': [{'error': True, + 'message': 'Preview data is being generated.. ' + 'Please re-run this command in a bit.'}]} + # drop all keys that are not in search_hosts and only select reports that match the requested osdspecs + previews_for_specs = {} + for host, raw_reports in self.cache.osdspec_previews.items(): + if host not in matching_hosts: + continue + osd_reports = [] + for osd_report in raw_reports: + if osd_report.get('osdspec') in [x.service_id for x in osdspecs]: + osd_reports.append(osd_report) + previews_for_specs.update({host: osd_reports}) + return previews_for_specs + + def _calc_daemon_deps(self, + spec: Optional[ServiceSpec], + daemon_type: str, + daemon_id: str) -> List[str]: + + def get_daemon_names(daemons: List[str]) -> List[str]: + daemon_names = [] + for daemon_type in daemons: + for dd in self.cache.get_daemons_by_type(daemon_type): + daemon_names.append(dd.name()) + return daemon_names + + alertmanager_user, alertmanager_password = self._get_alertmanager_credentials() + prometheus_user, prometheus_password = self._get_prometheus_credentials() + + deps = [] + if daemon_type == 'haproxy': + # because cephadm creates new daemon instances whenever + # port or ip changes, identifying daemons by name is + # sufficient to detect changes. + if not spec: + return [] + ingress_spec = cast(IngressSpec, spec) + assert ingress_spec.backend_service + daemons = self.cache.get_daemons_by_service(ingress_spec.backend_service) + deps = [d.name() for d in daemons] + elif daemon_type == 'keepalived': + # because cephadm creates new daemon instances whenever + # port or ip changes, identifying daemons by name is + # sufficient to detect changes. + if not spec: + return [] + daemons = self.cache.get_daemons_by_service(spec.service_name()) + deps = [d.name() for d in daemons if d.daemon_type == 'haproxy'] + elif daemon_type == 'agent': + root_cert = '' + server_port = '' + try: + server_port = str(self.http_server.agent.server_port) + root_cert = self.http_server.agent.ssl_certs.get_root_cert() + except Exception: + pass + deps = sorted([self.get_mgr_ip(), server_port, root_cert, + str(self.device_enhanced_scan)]) + elif daemon_type == 'iscsi': + if spec: + iscsi_spec = cast(IscsiServiceSpec, spec) + deps = [self.iscsi_service.get_trusted_ips(iscsi_spec)] + else: + deps = [self.get_mgr_ip()] + elif daemon_type == 'prometheus': + # for prometheus we add the active mgr as an explicit dependency, + # this way we force a redeploy after a mgr failover + deps.append(self.get_active_mgr().name()) + deps.append(str(self.get_module_option_ex('prometheus', 'server_port', 9283))) + deps.append(str(self.service_discovery_port)) + # prometheus yaml configuration file (generated by prometheus.yml.j2) contains + # a scrape_configs section for each service type. This should be included only + # when at least one daemon of the corresponding service is running. Therefore, + # an explicit dependency is added for each service-type to force a reconfig + # whenever the number of daemons for those service-type changes from 0 to greater + # than zero and vice versa. + deps += [s for s in ['node-exporter', 'alertmanager'] + if self.cache.get_daemons_by_service(s)] + if len(self.cache.get_daemons_by_type('ingress')) > 0: + deps.append('ingress') + # add dependency on ceph-exporter daemons + deps += [d.name() for d in self.cache.get_daemons_by_service('ceph-exporter')] + if self.secure_monitoring_stack: + if prometheus_user and prometheus_password: + deps.append(f'{hash(prometheus_user + prometheus_password)}') + if alertmanager_user and alertmanager_password: + deps.append(f'{hash(alertmanager_user + alertmanager_password)}') + elif daemon_type == 'grafana': + deps += get_daemon_names(['prometheus', 'loki']) + if self.secure_monitoring_stack and prometheus_user and prometheus_password: + deps.append(f'{hash(prometheus_user + prometheus_password)}') + elif daemon_type == 'alertmanager': + deps += get_daemon_names(['mgr', 'alertmanager', 'snmp-gateway']) + if self.secure_monitoring_stack and alertmanager_user and alertmanager_password: + deps.append(f'{hash(alertmanager_user + alertmanager_password)}') + elif daemon_type == 'promtail': + deps += get_daemon_names(['loki']) + else: + # TODO(redo): some error message! + pass + + if daemon_type in ['prometheus', 'node-exporter', 'alertmanager', 'grafana']: + deps.append(f'secure_monitoring_stack:{self.secure_monitoring_stack}') + + return sorted(deps) + + @forall_hosts + def _remove_daemons(self, name: str, host: str) -> str: + return CephadmServe(self)._remove_daemon(name, host) + + def _check_pool_exists(self, pool: str, service_name: str) -> None: + logger.info(f'Checking pool "{pool}" exists for service {service_name}') + if not self.rados.pool_exists(pool): + raise OrchestratorError(f'Cannot find pool "{pool}" for ' + f'service {service_name}') + + def _add_daemon(self, + daemon_type: str, + spec: ServiceSpec) -> List[str]: + """ + Add (and place) a daemon. Require explicit host placement. Do not + schedule, and do not apply the related scheduling limitations. + """ + if spec.service_name() not in self.spec_store: + raise OrchestratorError('Unable to add a Daemon without Service.\n' + 'Please use `ceph orch apply ...` to create a Service.\n' + 'Note, you might want to create the service with "unmanaged=true"') + + self.log.debug('_add_daemon %s spec %s' % (daemon_type, spec.placement)) + if not spec.placement.hosts: + raise OrchestratorError('must specify host(s) to deploy on') + count = spec.placement.count or len(spec.placement.hosts) + daemons = self.cache.get_daemons_by_service(spec.service_name()) + return self._create_daemons(daemon_type, spec, daemons, + spec.placement.hosts, count) + + def _create_daemons(self, + daemon_type: str, + spec: ServiceSpec, + daemons: List[DaemonDescription], + hosts: List[HostPlacementSpec], + count: int) -> List[str]: + if count > len(hosts): + raise OrchestratorError('too few hosts: want %d, have %s' % ( + count, hosts)) + + did_config = False + service_type = daemon_type_to_service(daemon_type) + + args = [] # type: List[CephadmDaemonDeploySpec] + for host, network, name in hosts: + daemon_id = self.get_unique_name(daemon_type, host, daemons, + prefix=spec.service_id, + forcename=name) + + if not did_config: + self.cephadm_services[service_type].config(spec) + did_config = True + + daemon_spec = self.cephadm_services[service_type].make_daemon_spec( + host, daemon_id, network, spec, + # NOTE: this does not consider port conflicts! + ports=spec.get_port_start()) + self.log.debug('Placing %s.%s on host %s' % ( + daemon_type, daemon_id, host)) + args.append(daemon_spec) + + # add to daemon list so next name(s) will also be unique + sd = orchestrator.DaemonDescription( + hostname=host, + daemon_type=daemon_type, + daemon_id=daemon_id, + ) + daemons.append(sd) + + @ forall_hosts + def create_func_map(*args: Any) -> str: + daemon_spec = self.cephadm_services[daemon_type].prepare_create(*args) + with self.async_timeout_handler(daemon_spec.host, f'cephadm deploy ({daemon_spec.daemon_type} daemon)'): + return self.wait_async(CephadmServe(self)._create_daemon(daemon_spec)) + + return create_func_map(args) + + @handle_orch_error + def add_daemon(self, spec: ServiceSpec) -> List[str]: + ret: List[str] = [] + try: + with orchestrator.set_exception_subject('service', spec.service_name(), overwrite=True): + for d_type in service_to_daemon_types(spec.service_type): + ret.extend(self._add_daemon(d_type, spec)) + return ret + except OrchestratorError as e: + self.events.from_orch_error(e) + raise + + def _get_alertmanager_credentials(self) -> Tuple[str, str]: + user = self.get_store(AlertmanagerService.USER_CFG_KEY) + password = self.get_store(AlertmanagerService.PASS_CFG_KEY) + if user is None or password is None: + user = 'admin' + password = 'admin' + self.set_store(AlertmanagerService.USER_CFG_KEY, user) + self.set_store(AlertmanagerService.PASS_CFG_KEY, password) + return (user, password) + + def _get_prometheus_credentials(self) -> Tuple[str, str]: + user = self.get_store(PrometheusService.USER_CFG_KEY) + password = self.get_store(PrometheusService.PASS_CFG_KEY) + if user is None or password is None: + user = 'admin' + password = 'admin' + self.set_store(PrometheusService.USER_CFG_KEY, user) + self.set_store(PrometheusService.PASS_CFG_KEY, password) + return (user, password) + + @handle_orch_error + def set_prometheus_access_info(self, user: str, password: str) -> str: + self.set_store(PrometheusService.USER_CFG_KEY, user) + self.set_store(PrometheusService.PASS_CFG_KEY, password) + return 'prometheus credentials updated correctly' + + @handle_orch_error + def set_alertmanager_access_info(self, user: str, password: str) -> str: + self.set_store(AlertmanagerService.USER_CFG_KEY, user) + self.set_store(AlertmanagerService.PASS_CFG_KEY, password) + return 'alertmanager credentials updated correctly' + + @handle_orch_error + def get_prometheus_access_info(self) -> Dict[str, str]: + user, password = self._get_prometheus_credentials() + return {'user': user, + 'password': password, + 'certificate': self.http_server.service_discovery.ssl_certs.get_root_cert()} + + @handle_orch_error + def get_alertmanager_access_info(self) -> Dict[str, str]: + user, password = self._get_alertmanager_credentials() + return {'user': user, + 'password': password, + 'certificate': self.http_server.service_discovery.ssl_certs.get_root_cert()} + + @handle_orch_error + def apply_mon(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + def _apply(self, spec: GenericSpec) -> str: + if spec.service_type == 'host': + return self._add_host(cast(HostSpec, spec)) + + if spec.service_type == 'osd': + # _trigger preview refresh needs to be smart and + # should only refresh if a change has been detected + self._trigger_preview_refresh(specs=[cast(DriveGroupSpec, spec)]) + + return self._apply_service_spec(cast(ServiceSpec, spec)) + + def _get_candidate_hosts(self, placement: PlacementSpec) -> List[str]: + """Return a list of candidate hosts according to the placement specification.""" + all_hosts = self.cache.get_schedulable_hosts() + candidates = [] + if placement.hosts: + candidates = [h.hostname for h in placement.hosts if h.hostname in placement.hosts] + elif placement.label: + candidates = [x.hostname for x in [h for h in all_hosts if placement.label in h.labels]] + elif placement.host_pattern: + candidates = [x for x in placement.filter_matching_hostspecs(all_hosts)] + elif (placement.count is not None or placement.count_per_host is not None): + candidates = [x.hostname for x in all_hosts] + return [h for h in candidates if not self.cache.is_host_draining(h)] + + def _validate_one_shot_placement_spec(self, spec: PlacementSpec) -> None: + """Validate placement specification for TunedProfileSpec and ClientKeyringSpec.""" + if spec.count is not None: + raise OrchestratorError( + "Placement 'count' field is no supported for this specification.") + if spec.count_per_host is not None: + raise OrchestratorError( + "Placement 'count_per_host' field is no supported for this specification.") + if spec.hosts: + all_hosts = [h.hostname for h in self.inventory.all_specs()] + invalid_hosts = [h.hostname for h in spec.hosts if h.hostname not in all_hosts] + if invalid_hosts: + raise OrchestratorError(f"Found invalid host(s) in placement section: {invalid_hosts}. " + f"Please check 'ceph orch host ls' for available hosts.") + elif not self._get_candidate_hosts(spec): + raise OrchestratorError("Invalid placement specification. No host(s) matched placement spec.\n" + "Please check 'ceph orch host ls' for available hosts.\n" + "Note: draining hosts are excluded from the candidate list.") + + def _validate_tunedprofile_settings(self, spec: TunedProfileSpec) -> Dict[str, List[str]]: + candidate_hosts = spec.placement.filter_matching_hostspecs(self.inventory.all_specs()) + invalid_options: Dict[str, List[str]] = {} + for host in candidate_hosts: + host_sysctl_options = self.cache.get_facts(host).get('sysctl_options', {}) + invalid_options[host] = [] + for option in spec.settings: + if option not in host_sysctl_options: + invalid_options[host].append(option) + return invalid_options + + def _validate_tuned_profile_spec(self, spec: TunedProfileSpec) -> None: + if not spec.settings: + raise OrchestratorError("Invalid spec: settings section cannot be empty.") + self._validate_one_shot_placement_spec(spec.placement) + invalid_options = self._validate_tunedprofile_settings(spec) + if any(e for e in invalid_options.values()): + raise OrchestratorError( + f'Failed to apply tuned profile. Invalid sysctl option(s) for host(s) detected: {invalid_options}') + + @handle_orch_error + def apply_tuned_profiles(self, specs: List[TunedProfileSpec], no_overwrite: bool = False) -> str: + outs = [] + for spec in specs: + self._validate_tuned_profile_spec(spec) + if no_overwrite and self.tuned_profiles.exists(spec.profile_name): + outs.append( + f"Tuned profile '{spec.profile_name}' already exists (--no-overwrite was passed)") + else: + # done, let's save the specs + self.tuned_profiles.add_profile(spec) + outs.append(f'Saved tuned profile {spec.profile_name}') + self._kick_serve_loop() + return '\n'.join(outs) + + @handle_orch_error + def rm_tuned_profile(self, profile_name: str) -> str: + if profile_name not in self.tuned_profiles: + raise OrchestratorError( + f'Tuned profile {profile_name} does not exist. Nothing to remove.') + self.tuned_profiles.rm_profile(profile_name) + self._kick_serve_loop() + return f'Removed tuned profile {profile_name}' + + @handle_orch_error + def tuned_profile_ls(self) -> List[TunedProfileSpec]: + return self.tuned_profiles.list_profiles() + + @handle_orch_error + def tuned_profile_add_setting(self, profile_name: str, setting: str, value: str) -> str: + if profile_name not in self.tuned_profiles: + raise OrchestratorError( + f'Tuned profile {profile_name} does not exist. Cannot add setting.') + self.tuned_profiles.add_setting(profile_name, setting, value) + self._kick_serve_loop() + return f'Added setting {setting} with value {value} to tuned profile {profile_name}' + + @handle_orch_error + def tuned_profile_rm_setting(self, profile_name: str, setting: str) -> str: + if profile_name not in self.tuned_profiles: + raise OrchestratorError( + f'Tuned profile {profile_name} does not exist. Cannot remove setting.') + self.tuned_profiles.rm_setting(profile_name, setting) + self._kick_serve_loop() + return f'Removed setting {setting} from tuned profile {profile_name}' + + @handle_orch_error + def service_discovery_dump_cert(self) -> str: + root_cert = self.get_store(ServiceDiscovery.KV_STORE_SD_ROOT_CERT) + if not root_cert: + raise OrchestratorError('No certificate found for service discovery') + return root_cert + + def set_health_warning(self, name: str, summary: str, count: int, detail: List[str]) -> None: + self.health_checks[name] = { + 'severity': 'warning', + 'summary': summary, + 'count': count, + 'detail': detail, + } + self.set_health_checks(self.health_checks) + + def remove_health_warning(self, name: str) -> None: + if name in self.health_checks: + del self.health_checks[name] + self.set_health_checks(self.health_checks) + + def _plan(self, spec: ServiceSpec) -> dict: + if spec.service_type == 'osd': + return {'service_name': spec.service_name(), + 'service_type': spec.service_type, + 'data': self._preview_osdspecs(osdspecs=[cast(DriveGroupSpec, spec)])} + + svc = self.cephadm_services[spec.service_type] + ha = HostAssignment( + spec=spec, + hosts=self.cache.get_schedulable_hosts(), + unreachable_hosts=self.cache.get_unreachable_hosts(), + draining_hosts=self.cache.get_draining_hosts(), + networks=self.cache.networks, + daemons=self.cache.get_daemons_by_service(spec.service_name()), + allow_colo=svc.allow_colo(), + rank_map=self.spec_store[spec.service_name()].rank_map if svc.ranked() else None + ) + ha.validate() + hosts, to_add, to_remove = ha.place() + + return { + 'service_name': spec.service_name(), + 'service_type': spec.service_type, + 'add': [hs.hostname for hs in to_add], + 'remove': [d.name() for d in to_remove] + } + + @handle_orch_error + def plan(self, specs: Sequence[GenericSpec]) -> List: + results = [{'warning': 'WARNING! Dry-Runs are snapshots of a certain point in time and are bound \n' + 'to the current inventory setup. If any of these conditions change, the \n' + 'preview will be invalid. Please make sure to have a minimal \n' + 'timeframe between planning and applying the specs.'}] + if any([spec.service_type == 'host' for spec in specs]): + return [{'error': 'Found <HostSpec>. Previews that include Host Specifications are not supported, yet.'}] + for spec in specs: + results.append(self._plan(cast(ServiceSpec, spec))) + return results + + def _apply_service_spec(self, spec: ServiceSpec) -> str: + if spec.placement.is_empty(): + # fill in default placement + defaults = { + 'mon': PlacementSpec(count=5), + 'mgr': PlacementSpec(count=2), + 'mds': PlacementSpec(count=2), + 'rgw': PlacementSpec(count=2), + 'ingress': PlacementSpec(count=2), + 'iscsi': PlacementSpec(count=1), + 'nvmeof': PlacementSpec(count=1), + 'rbd-mirror': PlacementSpec(count=2), + 'cephfs-mirror': PlacementSpec(count=1), + 'nfs': PlacementSpec(count=1), + 'grafana': PlacementSpec(count=1), + 'alertmanager': PlacementSpec(count=1), + 'prometheus': PlacementSpec(count=1), + 'node-exporter': PlacementSpec(host_pattern='*'), + 'ceph-exporter': PlacementSpec(host_pattern='*'), + 'loki': PlacementSpec(count=1), + 'promtail': PlacementSpec(host_pattern='*'), + 'crash': PlacementSpec(host_pattern='*'), + 'container': PlacementSpec(count=1), + 'snmp-gateway': PlacementSpec(count=1), + 'elasticsearch': PlacementSpec(count=1), + 'jaeger-agent': PlacementSpec(host_pattern='*'), + 'jaeger-collector': PlacementSpec(count=1), + 'jaeger-query': PlacementSpec(count=1) + } + spec.placement = defaults[spec.service_type] + elif spec.service_type in ['mon', 'mgr'] and \ + spec.placement.count is not None and \ + spec.placement.count < 1: + raise OrchestratorError('cannot scale %s service below 1' % ( + spec.service_type)) + + host_count = len(self.inventory.keys()) + max_count = self.max_count_per_host + + if spec.placement.count is not None: + if spec.service_type in ['mon', 'mgr']: + if spec.placement.count > max(5, host_count): + raise OrchestratorError( + (f'The maximum number of {spec.service_type} daemons allowed with {host_count} hosts is {max(5, host_count)}.')) + elif spec.service_type != 'osd': + if spec.placement.count > (max_count * host_count): + raise OrchestratorError((f'The maximum number of {spec.service_type} daemons allowed with {host_count} hosts is {host_count*max_count} ({host_count}x{max_count}).' + + ' This limit can be adjusted by changing the mgr/cephadm/max_count_per_host config option')) + + if spec.placement.count_per_host is not None and spec.placement.count_per_host > max_count and spec.service_type != 'osd': + raise OrchestratorError((f'The maximum count_per_host allowed is {max_count}.' + + ' This limit can be adjusted by changing the mgr/cephadm/max_count_per_host config option')) + + HostAssignment( + spec=spec, + hosts=self.inventory.all_specs(), # All hosts, even those without daemon refresh + unreachable_hosts=self.cache.get_unreachable_hosts(), + draining_hosts=self.cache.get_draining_hosts(), + networks=self.cache.networks, + daemons=self.cache.get_daemons_by_service(spec.service_name()), + allow_colo=self.cephadm_services[spec.service_type].allow_colo(), + ).validate() + + self.log.info('Saving service %s spec with placement %s' % ( + spec.service_name(), spec.placement.pretty_str())) + self.spec_store.save(spec) + self._kick_serve_loop() + return "Scheduled %s update..." % spec.service_name() + + @handle_orch_error + def apply(self, specs: Sequence[GenericSpec], no_overwrite: bool = False) -> List[str]: + results = [] + for spec in specs: + if no_overwrite: + if spec.service_type == 'host' and cast(HostSpec, spec).hostname in self.inventory: + results.append('Skipped %s host spec. To change %s spec omit --no-overwrite flag' + % (cast(HostSpec, spec).hostname, spec.service_type)) + continue + elif cast(ServiceSpec, spec).service_name() in self.spec_store: + results.append('Skipped %s service spec. To change %s spec omit --no-overwrite flag' + % (cast(ServiceSpec, spec).service_name(), cast(ServiceSpec, spec).service_name())) + continue + results.append(self._apply(spec)) + return results + + @handle_orch_error + def apply_mgr(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_mds(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_rgw(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_ingress(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_iscsi(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_rbd_mirror(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_nfs(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + def _get_dashboard_url(self): + # type: () -> str + return self.get('mgr_map').get('services', {}).get('dashboard', '') + + @handle_orch_error + def apply_prometheus(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_loki(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_promtail(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_node_exporter(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_ceph_exporter(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_crash(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_grafana(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_alertmanager(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_container(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_snmp_gateway(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def set_unmanaged(self, service_name: str, value: bool) -> str: + return self.spec_store.set_unmanaged(service_name, value) + + @handle_orch_error + def upgrade_check(self, image: str, version: str) -> str: + if self.inventory.get_host_with_state("maintenance"): + raise OrchestratorError("check aborted - you have hosts in maintenance state") + + if version: + target_name = self.container_image_base + ':v' + version + elif image: + target_name = image + else: + raise OrchestratorError('must specify either image or version') + + with self.async_timeout_handler(cmd=f'cephadm inspect-image (image {target_name})'): + image_info = self.wait_async(CephadmServe(self)._get_container_image_info(target_name)) + + ceph_image_version = image_info.ceph_version + if not ceph_image_version: + return f'Unable to extract ceph version from {target_name}.' + if ceph_image_version.startswith('ceph version '): + ceph_image_version = ceph_image_version.split(' ')[2] + version_error = self.upgrade._check_target_version(ceph_image_version) + if version_error: + return f'Incompatible upgrade: {version_error}' + + self.log.debug(f'image info {image} -> {image_info}') + r: dict = { + 'target_name': target_name, + 'target_id': image_info.image_id, + 'target_version': image_info.ceph_version, + 'needs_update': dict(), + 'up_to_date': list(), + 'non_ceph_image_daemons': list() + } + for host, dm in self.cache.daemons.items(): + for name, dd in dm.items(): + # check if the container digest for the digest we're checking upgrades for matches + # the container digests for the daemon if "use_repo_digest" setting is true + # or that the image name matches the daemon's image name if "use_repo_digest" + # is false. The idea is to generally check if the daemon is already using + # the image we're checking upgrade to. + if ( + (self.use_repo_digest and dd.matches_digests(image_info.repo_digests)) + or (not self.use_repo_digest and dd.matches_image_name(image)) + ): + r['up_to_date'].append(dd.name()) + elif dd.daemon_type in CEPH_IMAGE_TYPES: + r['needs_update'][dd.name()] = { + 'current_name': dd.container_image_name, + 'current_id': dd.container_image_id, + 'current_version': dd.version, + } + else: + r['non_ceph_image_daemons'].append(dd.name()) + if self.use_repo_digest and image_info.repo_digests: + # FIXME: we assume the first digest is the best one to use + r['target_digest'] = image_info.repo_digests[0] + + return json.dumps(r, indent=4, sort_keys=True) + + @handle_orch_error + def upgrade_status(self) -> orchestrator.UpgradeStatusSpec: + return self.upgrade.upgrade_status() + + @handle_orch_error + def upgrade_ls(self, image: Optional[str], tags: bool, show_all_versions: Optional[bool]) -> Dict[Any, Any]: + return self.upgrade.upgrade_ls(image, tags, show_all_versions) + + @handle_orch_error + def upgrade_start(self, image: str, version: str, daemon_types: Optional[List[str]] = None, host_placement: Optional[str] = None, + services: Optional[List[str]] = None, limit: Optional[int] = None) -> str: + if self.inventory.get_host_with_state("maintenance"): + raise OrchestratorError("Upgrade aborted - you have host(s) in maintenance state") + if self.offline_hosts: + raise OrchestratorError( + f"Upgrade aborted - Some host(s) are currently offline: {self.offline_hosts}") + if daemon_types is not None and services is not None: + raise OrchestratorError('--daemon-types and --services are mutually exclusive') + if daemon_types is not None: + for dtype in daemon_types: + if dtype not in CEPH_UPGRADE_ORDER: + raise OrchestratorError(f'Upgrade aborted - Got unexpected daemon type "{dtype}".\n' + f'Viable daemon types for this command are: {utils.CEPH_TYPES + utils.GATEWAY_TYPES}') + if services is not None: + for service in services: + if service not in self.spec_store: + raise OrchestratorError(f'Upgrade aborted - Got unknown service name "{service}".\n' + f'Known services are: {self.spec_store.all_specs.keys()}') + hosts: Optional[List[str]] = None + if host_placement is not None: + all_hosts = list(self.inventory.all_specs()) + placement = PlacementSpec.from_string(host_placement) + hosts = placement.filter_matching_hostspecs(all_hosts) + if not hosts: + raise OrchestratorError( + f'Upgrade aborted - hosts parameter "{host_placement}" provided did not match any hosts') + + if limit is not None: + if limit < 1: + raise OrchestratorError( + f'Upgrade aborted - --limit arg must be a positive integer, not {limit}') + + return self.upgrade.upgrade_start(image, version, daemon_types, hosts, services, limit) + + @handle_orch_error + def upgrade_pause(self) -> str: + return self.upgrade.upgrade_pause() + + @handle_orch_error + def upgrade_resume(self) -> str: + return self.upgrade.upgrade_resume() + + @handle_orch_error + def upgrade_stop(self) -> str: + return self.upgrade.upgrade_stop() + + @handle_orch_error + def remove_osds(self, osd_ids: List[str], + replace: bool = False, + force: bool = False, + zap: bool = False, + no_destroy: bool = False) -> str: + """ + Takes a list of OSDs and schedules them for removal. + The function that takes care of the actual removal is + process_removal_queue(). + """ + + daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_type('osd') + to_remove_daemons = list() + for daemon in daemons: + if daemon.daemon_id in osd_ids: + to_remove_daemons.append(daemon) + if not to_remove_daemons: + return f"Unable to find OSDs: {osd_ids}" + + for daemon in to_remove_daemons: + assert daemon.daemon_id is not None + try: + self.to_remove_osds.enqueue(OSD(osd_id=int(daemon.daemon_id), + replace=replace, + force=force, + zap=zap, + no_destroy=no_destroy, + hostname=daemon.hostname, + process_started_at=datetime_now(), + remove_util=self.to_remove_osds.rm_util)) + except NotFoundError: + return f"Unable to find OSDs: {osd_ids}" + + # trigger the serve loop to initiate the removal + self._kick_serve_loop() + warning_zap = "" if zap else ("\nVG/LV for the OSDs won't be zapped (--zap wasn't passed).\n" + "Run the `ceph-volume lvm zap` command with `--destroy`" + " against the VG/LV if you want them to be destroyed.") + return f"Scheduled OSD(s) for removal.{warning_zap}" + + @handle_orch_error + def stop_remove_osds(self, osd_ids: List[str]) -> str: + """ + Stops a `removal` process for a List of OSDs. + This will revert their weight and remove it from the osds_to_remove queue + """ + for osd_id in osd_ids: + try: + self.to_remove_osds.rm(OSD(osd_id=int(osd_id), + remove_util=self.to_remove_osds.rm_util)) + except (NotFoundError, KeyError, ValueError): + return f'Unable to find OSD in the queue: {osd_id}' + + # trigger the serve loop to halt the removal + self._kick_serve_loop() + return "Stopped OSD(s) removal" + + @handle_orch_error + def remove_osds_status(self) -> List[OSD]: + """ + The CLI call to retrieve an osd removal report + """ + return self.to_remove_osds.all_osds() + + @handle_orch_error + def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False, zap_osd_devices: bool = False) -> str: + """ + Drain all daemons from a host. + :param host: host name + """ + + # if we drain the last admin host we could end up removing the only instance + # of the config and keyring and cause issues + if not force: + p = PlacementSpec(label=SpecialHostLabels.ADMIN) + admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs()) + if len(admin_hosts) == 1 and admin_hosts[0] == hostname: + raise OrchestratorValidationError(f"Host {hostname} is the last host with the '{SpecialHostLabels.ADMIN}'" + " label.\nDraining this host could cause the removal" + " of the last cluster config/keyring managed by cephadm.\n" + f"It is recommended to add the {SpecialHostLabels.ADMIN} label to another host" + " before completing this operation.\nIf you're certain this is" + " what you want rerun this command with --force.") + + self.add_host_label(hostname, '_no_schedule') + if not keep_conf_keyring: + self.add_host_label(hostname, SpecialHostLabels.DRAIN_CONF_KEYRING) + + daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_host(hostname) + + osds_to_remove = [d.daemon_id for d in daemons if d.daemon_type == 'osd'] + self.remove_osds(osds_to_remove, zap=zap_osd_devices) + + daemons_table = "" + daemons_table += "{:<20} {:<15}\n".format("type", "id") + daemons_table += "{:<20} {:<15}\n".format("-" * 20, "-" * 15) + for d in daemons: + daemons_table += "{:<20} {:<15}\n".format(d.daemon_type, d.daemon_id) + + return "Scheduled to remove the following daemons from host '{}'\n{}".format(hostname, daemons_table) + + def trigger_connect_dashboard_rgw(self) -> None: + self.need_connect_dashboard_rgw = True + self.event.set() diff --git a/src/pybind/mgr/cephadm/offline_watcher.py b/src/pybind/mgr/cephadm/offline_watcher.py new file mode 100644 index 000000000..2b7751dfc --- /dev/null +++ b/src/pybind/mgr/cephadm/offline_watcher.py @@ -0,0 +1,60 @@ +import logging +from typing import List, Optional, TYPE_CHECKING + +import multiprocessing as mp +import threading + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + +logger = logging.getLogger(__name__) + + +class OfflineHostWatcher(threading.Thread): + def __init__(self, mgr: "CephadmOrchestrator") -> None: + self.mgr = mgr + self.hosts: Optional[List[str]] = None + self.new_hosts: Optional[List[str]] = None + self.stop = False + self.event = threading.Event() + super(OfflineHostWatcher, self).__init__(target=self.run) + + def run(self) -> None: + self.thread_pool = mp.pool.ThreadPool(10) + while not self.stop: + # only need to take action if we have hosts to check + if self.hosts or self.new_hosts: + if self.new_hosts: + self.hosts = self.new_hosts + self.new_hosts = None + logger.debug(f'OfflineHostDetector: Checking if hosts: {self.hosts} are offline.') + assert self.hosts is not None + self.thread_pool.map(self.check_host, self.hosts) + self.event.wait(20) + self.event.clear() + self.thread_pool.close() + self.thread_pool.join() + + def check_host(self, host: str) -> None: + if host not in self.mgr.offline_hosts: + try: + self.mgr.ssh.check_execute_command(host, ['true'], log_command=self.mgr.log_refresh_metadata) + except Exception: + logger.debug(f'OfflineHostDetector: detected {host} to be offline') + # kick serve loop in case corrective action must be taken for offline host + self.mgr._kick_serve_loop() + + def set_hosts(self, hosts: List[str]) -> None: + hosts.sort() + if (not self.hosts or self.hosts != hosts) and hosts: + self.new_hosts = hosts + logger.debug( + f'OfflineHostDetector: Hosts to check if offline swapped to: {self.new_hosts}.') + self.wakeup() + + def wakeup(self) -> None: + self.event.set() + + def shutdown(self) -> None: + self.stop = True + self.wakeup() diff --git a/src/pybind/mgr/cephadm/registry.py b/src/pybind/mgr/cephadm/registry.py new file mode 100644 index 000000000..31e5fb23e --- /dev/null +++ b/src/pybind/mgr/cephadm/registry.py @@ -0,0 +1,65 @@ +import requests +from typing import List, Dict, Tuple +from requests import Response + + +class Registry: + + def __init__(self, url: str): + self._url: str = url + + @property + def api_domain(self) -> str: + if self._url == 'docker.io': + return 'registry-1.docker.io' + return self._url + + def get_token(self, response: Response) -> str: + realm, params = self.parse_www_authenticate(response.headers['Www-Authenticate']) + r = requests.get(realm, params=params) + r.raise_for_status() + ret = r.json() + if 'access_token' in ret: + return ret['access_token'] + if 'token' in ret: + return ret['token'] + raise ValueError(f'Unknown token reply {ret}') + + def parse_www_authenticate(self, text: str) -> Tuple[str, Dict[str, str]]: + # 'Www-Authenticate': 'Bearer realm="https://auth.docker.io/token",service="registry.docker.io",scope="repository:ceph/ceph:pull"' + r: Dict[str, str] = {} + for token in text.split(','): + key, value = token.split('=', 1) + r[key] = value.strip('"') + realm = r.pop('Bearer realm') + return realm, r + + def get_tags(self, image: str) -> List[str]: + tags = [] + headers = {'Accept': 'application/json'} + url = f'https://{self.api_domain}/v2/{image}/tags/list' + while True: + try: + r = requests.get(url, headers=headers) + except requests.exceptions.ConnectionError as e: + msg = f"Cannot get tags from url '{url}': {e}" + raise ValueError(msg) from e + if r.status_code == 401: + if 'Authorization' in headers: + raise ValueError('failed authentication') + token = self.get_token(r) + headers['Authorization'] = f'Bearer {token}' + continue + r.raise_for_status() + + new_tags = r.json()['tags'] + tags.extend(new_tags) + + if 'Link' not in r.headers: + break + + # strip < > brackets off and prepend the domain + url = f'https://{self.api_domain}' + r.headers['Link'].split(';')[0][1:-1] + continue + + return tags diff --git a/src/pybind/mgr/cephadm/schedule.py b/src/pybind/mgr/cephadm/schedule.py new file mode 100644 index 000000000..6666d761e --- /dev/null +++ b/src/pybind/mgr/cephadm/schedule.py @@ -0,0 +1,481 @@ +import ipaddress +import hashlib +import logging +import random +from typing import List, Optional, Callable, TypeVar, Tuple, NamedTuple, Dict + +import orchestrator +from ceph.deployment.service_spec import ServiceSpec +from orchestrator._interface import DaemonDescription +from orchestrator import OrchestratorValidationError +from .utils import RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES + +logger = logging.getLogger(__name__) +T = TypeVar('T') + + +class DaemonPlacement(NamedTuple): + daemon_type: str + hostname: str + network: str = '' # for mons only + name: str = '' + ip: Optional[str] = None + ports: List[int] = [] + rank: Optional[int] = None + rank_generation: Optional[int] = None + + def __str__(self) -> str: + res = self.daemon_type + ':' + self.hostname + other = [] + if self.rank is not None: + other.append(f'rank={self.rank}.{self.rank_generation}') + if self.network: + other.append(f'network={self.network}') + if self.name: + other.append(f'name={self.name}') + if self.ports: + other.append(f'{self.ip or "*"}:{",".join(map(str, self.ports))}') + if other: + res += '(' + ' '.join(other) + ')' + return res + + def renumber_ports(self, n: int) -> 'DaemonPlacement': + return DaemonPlacement( + self.daemon_type, + self.hostname, + self.network, + self.name, + self.ip, + [p + n for p in self.ports], + self.rank, + self.rank_generation, + ) + + def assign_rank(self, rank: int, gen: int) -> 'DaemonPlacement': + return DaemonPlacement( + self.daemon_type, + self.hostname, + self.network, + self.name, + self.ip, + self.ports, + rank, + gen, + ) + + def assign_name(self, name: str) -> 'DaemonPlacement': + return DaemonPlacement( + self.daemon_type, + self.hostname, + self.network, + name, + self.ip, + self.ports, + self.rank, + self.rank_generation, + ) + + def assign_rank_generation( + self, + rank: int, + rank_map: Dict[int, Dict[int, Optional[str]]] + ) -> 'DaemonPlacement': + if rank not in rank_map: + rank_map[rank] = {} + gen = 0 + else: + gen = max(rank_map[rank].keys()) + 1 + rank_map[rank][gen] = None + return DaemonPlacement( + self.daemon_type, + self.hostname, + self.network, + self.name, + self.ip, + self.ports, + rank, + gen, + ) + + def matches_daemon(self, dd: DaemonDescription) -> bool: + if self.daemon_type != dd.daemon_type: + return False + if self.hostname != dd.hostname: + return False + # fixme: how to match against network? + if self.name and self.name != dd.daemon_id: + return False + if self.ports: + if self.ports != dd.ports and dd.ports: + return False + if self.ip != dd.ip and dd.ip: + return False + return True + + def matches_rank_map( + self, + dd: DaemonDescription, + rank_map: Optional[Dict[int, Dict[int, Optional[str]]]], + ranks: List[int] + ) -> bool: + if rank_map is None: + # daemon should have no rank + return dd.rank is None + + if dd.rank is None: + return False + + if dd.rank not in rank_map: + return False + if dd.rank not in ranks: + return False + + # must be the highest/newest rank_generation + if dd.rank_generation != max(rank_map[dd.rank].keys()): + return False + + # must be *this* daemon + return rank_map[dd.rank][dd.rank_generation] == dd.daemon_id + + +class HostAssignment(object): + + def __init__(self, + spec: ServiceSpec, + hosts: List[orchestrator.HostSpec], + unreachable_hosts: List[orchestrator.HostSpec], + draining_hosts: List[orchestrator.HostSpec], + daemons: List[orchestrator.DaemonDescription], + related_service_daemons: Optional[List[DaemonDescription]] = None, + networks: Dict[str, Dict[str, Dict[str, List[str]]]] = {}, + filter_new_host: Optional[Callable[[str, ServiceSpec], bool]] = None, + allow_colo: bool = False, + primary_daemon_type: Optional[str] = None, + per_host_daemon_type: Optional[str] = None, + rank_map: Optional[Dict[int, Dict[int, Optional[str]]]] = None, + ): + assert spec + self.spec = spec # type: ServiceSpec + self.primary_daemon_type = primary_daemon_type or spec.service_type + self.hosts: List[orchestrator.HostSpec] = hosts + self.unreachable_hosts: List[orchestrator.HostSpec] = unreachable_hosts + self.draining_hosts: List[orchestrator.HostSpec] = draining_hosts + self.filter_new_host = filter_new_host + self.service_name = spec.service_name() + self.daemons = daemons + self.related_service_daemons = related_service_daemons + self.networks = networks + self.allow_colo = allow_colo + self.per_host_daemon_type = per_host_daemon_type + self.ports_start = spec.get_port_start() + self.rank_map = rank_map + + def hosts_by_label(self, label: str) -> List[orchestrator.HostSpec]: + return [h for h in self.hosts if label in h.labels] + + def get_hostnames(self) -> List[str]: + return [h.hostname for h in self.hosts] + + def validate(self) -> None: + self.spec.validate() + + if self.spec.placement.count == 0: + raise OrchestratorValidationError( + f'<count> can not be 0 for {self.spec.one_line_str()}') + + if ( + self.spec.placement.count_per_host is not None + and self.spec.placement.count_per_host > 1 + and not self.allow_colo + ): + raise OrchestratorValidationError( + f'Cannot place more than one {self.spec.service_type} per host' + ) + + if self.spec.placement.hosts: + explicit_hostnames = {h.hostname for h in self.spec.placement.hosts} + known_hosts = self.get_hostnames() + [h.hostname for h in self.draining_hosts] + unknown_hosts = explicit_hostnames.difference(set(known_hosts)) + if unknown_hosts: + raise OrchestratorValidationError( + f'Cannot place {self.spec.one_line_str()} on {", ".join(sorted(unknown_hosts))}: Unknown hosts') + + if self.spec.placement.host_pattern: + pattern_hostnames = self.spec.placement.filter_matching_hostspecs(self.hosts) + if not pattern_hostnames: + raise OrchestratorValidationError( + f'Cannot place {self.spec.one_line_str()}: No matching hosts') + + if self.spec.placement.label: + label_hosts = self.hosts_by_label(self.spec.placement.label) + if not label_hosts: + raise OrchestratorValidationError( + f'Cannot place {self.spec.one_line_str()}: No matching ' + f'hosts for label {self.spec.placement.label}') + + def place_per_host_daemons( + self, + slots: List[DaemonPlacement], + to_add: List[DaemonPlacement], + to_remove: List[orchestrator.DaemonDescription], + ) -> Tuple[List[DaemonPlacement], List[DaemonPlacement], List[orchestrator.DaemonDescription]]: + if self.per_host_daemon_type: + host_slots = [ + DaemonPlacement(daemon_type=self.per_host_daemon_type, + hostname=hostname) + for hostname in set([s.hostname for s in slots]) + ] + existing = [ + d for d in self.daemons if d.daemon_type == self.per_host_daemon_type + ] + slots += host_slots + for dd in existing: + found = False + for p in host_slots: + if p.matches_daemon(dd): + host_slots.remove(p) + found = True + break + if not found: + to_remove.append(dd) + to_add += host_slots + + to_remove = [d for d in to_remove if d.hostname not in [ + h.hostname for h in self.unreachable_hosts]] + + return slots, to_add, to_remove + + def place(self): + # type: () -> Tuple[List[DaemonPlacement], List[DaemonPlacement], List[orchestrator.DaemonDescription]] + """ + Generate a list of HostPlacementSpec taking into account: + + * all known hosts + * hosts with existing daemons + * placement spec + * self.filter_new_host + """ + + self.validate() + + count = self.spec.placement.count + + # get candidate hosts based on [hosts, label, host_pattern] + candidates = self.get_candidates() # type: List[DaemonPlacement] + if self.primary_daemon_type in RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES: + # remove unreachable hosts that are not in maintenance so daemons + # on these hosts will be rescheduled + candidates = self.remove_non_maintenance_unreachable_candidates(candidates) + + def expand_candidates(ls: List[DaemonPlacement], num: int) -> List[DaemonPlacement]: + r = [] + for offset in range(num): + r.extend([dp.renumber_ports(offset) for dp in ls]) + return r + + # consider enough slots to fulfill target count-per-host or count + if count is None: + if self.spec.placement.count_per_host: + per_host = self.spec.placement.count_per_host + else: + per_host = 1 + candidates = expand_candidates(candidates, per_host) + elif self.allow_colo and candidates: + per_host = 1 + ((count - 1) // len(candidates)) + candidates = expand_candidates(candidates, per_host) + + # consider (preserve) existing daemons in a particular order... + daemons = sorted( + [ + d for d in self.daemons if d.daemon_type == self.primary_daemon_type + ], + key=lambda d: ( + not d.is_active, # active before standby + d.rank is not None, # ranked first, then non-ranked + d.rank, # low ranks + 0 - (d.rank_generation or 0), # newer generations first + ) + ) + + # sort candidates into existing/used slots that already have a + # daemon, and others (the rest) + existing_active: List[orchestrator.DaemonDescription] = [] + existing_standby: List[orchestrator.DaemonDescription] = [] + existing_slots: List[DaemonPlacement] = [] + to_add: List[DaemonPlacement] = [] + to_remove: List[orchestrator.DaemonDescription] = [] + ranks: List[int] = list(range(len(candidates))) + others: List[DaemonPlacement] = candidates.copy() + for dd in daemons: + found = False + for p in others: + if p.matches_daemon(dd) and p.matches_rank_map(dd, self.rank_map, ranks): + others.remove(p) + if dd.is_active: + existing_active.append(dd) + else: + existing_standby.append(dd) + if dd.rank is not None: + assert dd.rank_generation is not None + p = p.assign_rank(dd.rank, dd.rank_generation) + ranks.remove(dd.rank) + existing_slots.append(p) + found = True + break + if not found: + to_remove.append(dd) + + # TODO: At some point we want to deploy daemons that are on offline hosts + # at what point we do this differs per daemon type. Stateless daemons we could + # do quickly to improve availability. Stateful daemons we might want to wait longer + # to see if the host comes back online + + existing = existing_active + existing_standby + + # build to_add + if not count: + to_add = [dd for dd in others if dd.hostname not in [ + h.hostname for h in self.unreachable_hosts]] + else: + # The number of new slots that need to be selected in order to fulfill count + need = count - len(existing) + + # we don't need any additional placements + if need <= 0: + to_remove.extend(existing[count:]) + del existing_slots[count:] + return self.place_per_host_daemons(existing_slots, [], to_remove) + + if self.related_service_daemons: + # prefer to put daemons on the same host(s) as daemons of the related service + # Note that we are only doing this over picking arbitrary hosts to satisfy + # the count. We are not breaking any deterministic placements in order to + # match the placement with a related service. + related_service_hosts = list(set(dd.hostname for dd in self.related_service_daemons)) + matching_dps = [dp for dp in others if dp.hostname in related_service_hosts] + for dp in matching_dps: + if need <= 0: + break + if dp.hostname in related_service_hosts and dp.hostname not in [h.hostname for h in self.unreachable_hosts]: + logger.debug(f'Preferring {dp.hostname} for service {self.service_name} as related daemons have been placed there') + to_add.append(dp) + need -= 1 # this is last use of need so it can work as a counter + # at this point, we've either met our placement quota entirely using hosts with related + # service daemons, or we still need to place more. If we do need to place more, + # we should make sure not to re-use hosts with related service daemons by filtering + # them out from the "others" list + if need > 0: + others = [dp for dp in others if dp.hostname not in related_service_hosts] + + for dp in others: + if need <= 0: + break + if dp.hostname not in [h.hostname for h in self.unreachable_hosts]: + to_add.append(dp) + need -= 1 # this is last use of need in this function so it can work as a counter + + if self.rank_map is not None: + # assign unused ranks (and rank_generations) to to_add + assert len(ranks) >= len(to_add) + for i in range(len(to_add)): + to_add[i] = to_add[i].assign_rank_generation(ranks[i], self.rank_map) + + logger.debug('Combine hosts with existing daemons %s + new hosts %s' % (existing, to_add)) + return self.place_per_host_daemons(existing_slots + to_add, to_add, to_remove) + + def find_ip_on_host(self, hostname: str, subnets: List[str]) -> Optional[str]: + for subnet in subnets: + ips: List[str] = [] + # following is to allow loopback interfaces for both ipv4 and ipv6. Since we + # only have the subnet (and no IP) we assume default loopback IP address. + if ipaddress.ip_network(subnet).is_loopback: + if ipaddress.ip_network(subnet).version == 4: + ips.append('127.0.0.1') + else: + ips.append('::1') + for iface, iface_ips in self.networks.get(hostname, {}).get(subnet, {}).items(): + ips.extend(iface_ips) + if ips: + return sorted(ips)[0] + return None + + def get_candidates(self) -> List[DaemonPlacement]: + if self.spec.placement.hosts: + ls = [ + DaemonPlacement(daemon_type=self.primary_daemon_type, + hostname=h.hostname, network=h.network, name=h.name, + ports=self.ports_start) + for h in self.spec.placement.hosts if h.hostname not in [dh.hostname for dh in self.draining_hosts] + ] + elif self.spec.placement.label: + ls = [ + DaemonPlacement(daemon_type=self.primary_daemon_type, + hostname=x.hostname, ports=self.ports_start) + for x in self.hosts_by_label(self.spec.placement.label) + ] + elif self.spec.placement.host_pattern: + ls = [ + DaemonPlacement(daemon_type=self.primary_daemon_type, + hostname=x, ports=self.ports_start) + for x in self.spec.placement.filter_matching_hostspecs(self.hosts) + ] + elif ( + self.spec.placement.count is not None + or self.spec.placement.count_per_host is not None + ): + ls = [ + DaemonPlacement(daemon_type=self.primary_daemon_type, + hostname=x.hostname, ports=self.ports_start) + for x in self.hosts + ] + else: + raise OrchestratorValidationError( + "placement spec is empty: no hosts, no label, no pattern, no count") + + # allocate an IP? + if self.spec.networks: + orig = ls.copy() + ls = [] + for p in orig: + ip = self.find_ip_on_host(p.hostname, self.spec.networks) + if ip: + ls.append(DaemonPlacement(daemon_type=self.primary_daemon_type, + hostname=p.hostname, network=p.network, + name=p.name, ports=p.ports, ip=ip)) + else: + logger.debug( + f'Skipping {p.hostname} with no IP in network(s) {self.spec.networks}' + ) + + if self.filter_new_host: + old = ls.copy() + ls = [] + for h in old: + if self.filter_new_host(h.hostname, self.spec): + ls.append(h) + if len(old) > len(ls): + logger.debug('Filtered %s down to %s' % (old, ls)) + + # now that we have the list of nodes candidates based on the configured + # placement, let's shuffle the list for node pseudo-random selection. For this, + # we generate a seed from the service name and we use to shuffle the candidates. + # This makes shuffling deterministic for the same service name. + seed = int( + hashlib.sha1(self.spec.service_name().encode('utf-8')).hexdigest(), + 16 + ) % (2 ** 32) # truncate result to 32 bits + final = sorted(ls) + random.Random(seed).shuffle(final) + return final + + def remove_non_maintenance_unreachable_candidates(self, candidates: List[DaemonPlacement]) -> List[DaemonPlacement]: + in_maintenance: Dict[str, bool] = {} + for h in self.hosts: + if h.status.lower() == 'maintenance': + in_maintenance[h.hostname] = True + continue + in_maintenance[h.hostname] = False + unreachable_hosts = [h.hostname for h in self.unreachable_hosts] + candidates = [ + c for c in candidates if c.hostname not in unreachable_hosts or in_maintenance[c.hostname]] + return candidates diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py new file mode 100644 index 000000000..5dfdc27a3 --- /dev/null +++ b/src/pybind/mgr/cephadm/serve.py @@ -0,0 +1,1680 @@ +import ipaddress +import hashlib +import json +import logging +import uuid +import os +from collections import defaultdict +from typing import TYPE_CHECKING, Optional, List, cast, Dict, Any, Union, Tuple, Set, \ + DefaultDict, Callable + +from ceph.deployment import inventory +from ceph.deployment.drive_group import DriveGroupSpec +from ceph.deployment.service_spec import ( + ArgumentList, + ArgumentSpec, + CustomContainerSpec, + PlacementSpec, + RGWSpec, + ServiceSpec, + IngressSpec, +) +from ceph.utils import datetime_now + +import orchestrator +from orchestrator import OrchestratorError, set_exception_subject, OrchestratorEvent, \ + DaemonDescriptionStatus, daemon_type_to_service +from cephadm.services.cephadmservice import CephadmDaemonDeploySpec +from cephadm.schedule import HostAssignment +from cephadm.autotune import MemoryAutotuner +from cephadm.utils import forall_hosts, cephadmNoImage, is_repo_digest, \ + CephadmNoImage, CEPH_TYPES, ContainerInspectInfo, SpecialHostLabels +from mgr_module import MonCommandFailed +from mgr_util import format_bytes, verify_tls, get_cert_issuer_info, ServerConfigException + +from . import utils +from . import exchange + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + +logger = logging.getLogger(__name__) + +REQUIRES_POST_ACTIONS = ['grafana', 'iscsi', 'prometheus', 'alertmanager', 'rgw'] + + +class CephadmServe: + """ + This module contains functions that are executed in the + serve() thread. Thus they don't block the CLI. + + Please see the `Note regarding network calls from CLI handlers` + chapter in the cephadm developer guide. + + On the other hand, These function should *not* be called form + CLI handlers, to avoid blocking the CLI + """ + + def __init__(self, mgr: "CephadmOrchestrator"): + self.mgr: "CephadmOrchestrator" = mgr + self.log = logger + + def serve(self) -> None: + """ + The main loop of cephadm. + + A command handler will typically change the declarative state + of cephadm. This loop will then attempt to apply this new state. + """ + self.log.debug("serve starting") + self.mgr.config_checker.load_network_config() + + while self.mgr.run: + self.log.debug("serve loop start") + + try: + + self.convert_tags_to_repo_digest() + + # refresh daemons + self.log.debug('refreshing hosts and daemons') + self._refresh_hosts_and_daemons() + + self._check_for_strays() + + self._update_paused_health() + + if self.mgr.need_connect_dashboard_rgw and self.mgr.config_dashboard: + self.mgr.need_connect_dashboard_rgw = False + if 'dashboard' in self.mgr.get('mgr_map')['modules']: + self.log.info('Checking dashboard <-> RGW credentials') + self.mgr.remote('dashboard', 'set_rgw_credentials') + + if not self.mgr.paused: + self._run_async_actions() + + self.mgr.to_remove_osds.process_removal_queue() + + self.mgr.migration.migrate() + if self.mgr.migration.is_migration_ongoing(): + continue + + if self._apply_all_services(): + continue # did something, refresh + + self._check_daemons() + + self._check_certificates() + + self._purge_deleted_services() + + self._check_for_moved_osds() + + if self.mgr.agent_helpers._handle_use_agent_setting(): + continue + + if self.mgr.upgrade.continue_upgrade(): + continue + + except OrchestratorError as e: + if e.event_subject: + self.mgr.events.from_orch_error(e) + + self.log.debug("serve loop sleep") + self._serve_sleep() + self.log.debug("serve loop wake") + self.log.debug("serve exit") + + def _check_certificates(self) -> None: + for d in self.mgr.cache.get_daemons_by_type('grafana'): + cert = self.mgr.get_store(f'{d.hostname}/grafana_crt') + key = self.mgr.get_store(f'{d.hostname}/grafana_key') + if (not cert or not cert.strip()) and (not key or not key.strip()): + # certificate/key are empty... nothing to check + return + + try: + get_cert_issuer_info(cert) + verify_tls(cert, key) + self.mgr.remove_health_warning('CEPHADM_CERT_ERROR') + except ServerConfigException as e: + err_msg = f""" + Detected invalid grafana certificates. Please, use the following commands: + + > ceph config-key set mgr/cephadm/{d.hostname}/grafana_crt -i <path-to-ctr-file> + > ceph config-key set mgr/cephadm/{d.hostname}/grafana_key -i <path-to-key-file> + + to set valid key and certificate or reset their value to an empty string + in case you want cephadm to generate self-signed Grafana certificates. + + Once done, run the following command to reconfig the daemon: + + > ceph orch daemon reconfig grafana.{d.hostname} + + """ + self.log.error(f'Detected invalid grafana certificate on host {d.hostname}: {e}') + self.mgr.set_health_warning('CEPHADM_CERT_ERROR', + f'Invalid grafana certificate on host {d.hostname}: {e}', + 1, [err_msg]) + break + + def _serve_sleep(self) -> None: + sleep_interval = max( + 30, + min( + self.mgr.host_check_interval, + self.mgr.facts_cache_timeout, + self.mgr.daemon_cache_timeout, + self.mgr.device_cache_timeout, + ) + ) + self.log.debug('Sleeping for %d seconds', sleep_interval) + self.mgr.event.wait(sleep_interval) + self.mgr.event.clear() + + def _update_paused_health(self) -> None: + self.log.debug('_update_paused_health') + if self.mgr.paused: + self.mgr.set_health_warning('CEPHADM_PAUSED', 'cephadm background work is paused', 1, [ + "'ceph orch resume' to resume"]) + else: + self.mgr.remove_health_warning('CEPHADM_PAUSED') + + def _autotune_host_memory(self, host: str) -> None: + total_mem = self.mgr.cache.get_facts(host).get('memory_total_kb', 0) + if not total_mem: + val = None + else: + total_mem *= 1024 # kb -> bytes + total_mem *= self.mgr.autotune_memory_target_ratio + a = MemoryAutotuner( + daemons=self.mgr.cache.get_daemons_by_host(host), + config_get=self.mgr.get_foreign_ceph_option, + total_mem=total_mem, + ) + val, osds = a.tune() + any_changed = False + for o in osds: + if self.mgr.get_foreign_ceph_option(o, 'osd_memory_target') != val: + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': o, + 'name': 'osd_memory_target', + }) + any_changed = True + if val is not None: + if any_changed: + self.mgr.log.info( + f'Adjusting osd_memory_target on {host} to {format_bytes(val, 6)}' + ) + ret, out, err = self.mgr.mon_command({ + 'prefix': 'config set', + 'who': f'osd/host:{host.split(".")[0]}', + 'name': 'osd_memory_target', + 'value': str(val), + }) + if ret: + self.log.warning( + f'Unable to set osd_memory_target on {host} to {val}: {err}' + ) + else: + # if osd memory autotuning is off, we don't want to remove these config + # options as users may be using them. Since there is no way to set autotuning + # on/off at a host level, best we can do is check if it is globally on. + if self.mgr.get_foreign_ceph_option('osd', 'osd_memory_target_autotune'): + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': f'osd/host:{host.split(".")[0]}', + 'name': 'osd_memory_target', + }) + self.mgr.cache.update_autotune(host) + + def _refresh_hosts_and_daemons(self) -> None: + self.log.debug('_refresh_hosts_and_daemons') + bad_hosts = [] + failures = [] + agents_down: List[str] = [] + + @forall_hosts + def refresh(host: str) -> None: + + # skip hosts that are in maintenance - they could be powered off + if self.mgr.inventory._inventory[host].get("status", "").lower() == "maintenance": + return + + if self.mgr.use_agent: + if self.mgr.agent_helpers._check_agent(host): + agents_down.append(host) + + if self.mgr.cache.host_needs_check(host): + r = self._check_host(host) + if r is not None: + bad_hosts.append(r) + + if ( + not self.mgr.use_agent + or self.mgr.cache.is_host_draining(host) + or host in agents_down + ): + if self.mgr.cache.host_needs_daemon_refresh(host): + self.log.debug('refreshing %s daemons' % host) + r = self._refresh_host_daemons(host) + if r: + failures.append(r) + + if self.mgr.cache.host_needs_facts_refresh(host): + self.log.debug(('Refreshing %s facts' % host)) + r = self._refresh_facts(host) + if r: + failures.append(r) + + if self.mgr.cache.host_needs_network_refresh(host): + self.log.debug(('Refreshing %s networks' % host)) + r = self._refresh_host_networks(host) + if r: + failures.append(r) + + if self.mgr.cache.host_needs_device_refresh(host): + self.log.debug('refreshing %s devices' % host) + r = self._refresh_host_devices(host) + if r: + failures.append(r) + self.mgr.cache.metadata_up_to_date[host] = True + elif not self.mgr.cache.get_daemons_by_type('agent', host=host): + if self.mgr.cache.host_needs_daemon_refresh(host): + self.log.debug('refreshing %s daemons' % host) + r = self._refresh_host_daemons(host) + if r: + failures.append(r) + self.mgr.cache.metadata_up_to_date[host] = True + + if self.mgr.cache.host_needs_registry_login(host) and self.mgr.get_store('registry_credentials'): + self.log.debug(f"Logging `{host}` into custom registry") + with self.mgr.async_timeout_handler(host, 'cephadm registry-login'): + r = self.mgr.wait_async(self._registry_login( + host, json.loads(str(self.mgr.get_store('registry_credentials'))))) + if r: + bad_hosts.append(r) + + if self.mgr.cache.host_needs_osdspec_preview_refresh(host): + self.log.debug(f"refreshing OSDSpec previews for {host}") + r = self._refresh_host_osdspec_previews(host) + if r: + failures.append(r) + + if ( + self.mgr.cache.host_needs_autotune_memory(host) + and not self.mgr.inventory.has_label(host, SpecialHostLabels.NO_MEMORY_AUTOTUNE) + ): + self.log.debug(f"autotuning memory for {host}") + self._autotune_host_memory(host) + + refresh(self.mgr.cache.get_hosts()) + + self._write_all_client_files() + + self.mgr.agent_helpers._update_agent_down_healthcheck(agents_down) + self.mgr.http_server.config_update() + + self.mgr.config_checker.run_checks() + + for k in [ + 'CEPHADM_HOST_CHECK_FAILED', + 'CEPHADM_REFRESH_FAILED', + ]: + self.mgr.remove_health_warning(k) + if bad_hosts: + self.mgr.set_health_warning( + 'CEPHADM_HOST_CHECK_FAILED', f'{len(bad_hosts)} hosts fail cephadm check', len(bad_hosts), bad_hosts) + if failures: + self.mgr.set_health_warning( + 'CEPHADM_REFRESH_FAILED', 'failed to probe daemons or devices', len(failures), failures) + self.mgr.update_failed_daemon_health_check() + + def _check_host(self, host: str) -> Optional[str]: + if host not in self.mgr.inventory: + return None + self.log.debug(' checking %s' % host) + try: + addr = self.mgr.inventory.get_addr(host) if host in self.mgr.inventory else host + with self.mgr.async_timeout_handler(host, 'cephadm check-host'): + out, err, code = self.mgr.wait_async(self._run_cephadm( + host, cephadmNoImage, 'check-host', [], + error_ok=True, no_fsid=True, log_output=self.mgr.log_refresh_metadata)) + self.mgr.cache.update_last_host_check(host) + self.mgr.cache.save_host(host) + if code: + self.log.debug(' host %s (%s) failed check' % (host, addr)) + if self.mgr.warn_on_failed_host_check: + return 'host %s (%s) failed check: %s' % (host, addr, err) + else: + self.log.debug(' host %s (%s) ok' % (host, addr)) + except Exception as e: + self.log.debug(' host %s (%s) failed check' % (host, addr)) + return 'host %s (%s) failed check: %s' % (host, addr, e) + return None + + def _refresh_host_daemons(self, host: str) -> Optional[str]: + try: + with self.mgr.async_timeout_handler(host, 'cephadm ls'): + ls = self.mgr.wait_async(self._run_cephadm_json( + host, 'mon', 'ls', [], no_fsid=True, log_output=self.mgr.log_refresh_metadata)) + except OrchestratorError as e: + return str(e) + self.mgr._process_ls_output(host, ls) + return None + + def _refresh_facts(self, host: str) -> Optional[str]: + try: + with self.mgr.async_timeout_handler(host, 'cephadm gather-facts'): + val = self.mgr.wait_async(self._run_cephadm_json( + host, cephadmNoImage, 'gather-facts', [], + no_fsid=True, log_output=self.mgr.log_refresh_metadata)) + except OrchestratorError as e: + return str(e) + + self.mgr.cache.update_host_facts(host, val) + + return None + + def _refresh_host_devices(self, host: str) -> Optional[str]: + with_lsm = self.mgr.device_enhanced_scan + list_all = self.mgr.inventory_list_all + inventory_args = ['--', 'inventory', + '--format=json-pretty', + '--filter-for-batch'] + if with_lsm: + inventory_args.insert(-1, "--with-lsm") + if list_all: + inventory_args.insert(-1, "--list-all") + + try: + try: + with self.mgr.async_timeout_handler(host, 'cephadm ceph-volume -- inventory'): + devices = self.mgr.wait_async(self._run_cephadm_json( + host, 'osd', 'ceph-volume', inventory_args, log_output=self.mgr.log_refresh_metadata)) + except OrchestratorError as e: + if 'unrecognized arguments: --filter-for-batch' in str(e): + rerun_args = inventory_args.copy() + rerun_args.remove('--filter-for-batch') + with self.mgr.async_timeout_handler(host, 'cephadm ceph-volume -- inventory'): + devices = self.mgr.wait_async(self._run_cephadm_json( + host, 'osd', 'ceph-volume', rerun_args, log_output=self.mgr.log_refresh_metadata)) + else: + raise + + except OrchestratorError as e: + return str(e) + + self.log.debug('Refreshed host %s devices (%d)' % ( + host, len(devices))) + ret = inventory.Devices.from_json(devices) + self.mgr.cache.update_host_devices(host, ret.devices) + self.update_osdspec_previews(host) + self.mgr.cache.save_host(host) + return None + + def _refresh_host_networks(self, host: str) -> Optional[str]: + try: + with self.mgr.async_timeout_handler(host, 'cephadm list-networks'): + networks = self.mgr.wait_async(self._run_cephadm_json( + host, 'mon', 'list-networks', [], no_fsid=True, log_output=self.mgr.log_refresh_metadata)) + except OrchestratorError as e: + return str(e) + + self.log.debug('Refreshed host %s networks (%s)' % ( + host, len(networks))) + self.mgr.cache.update_host_networks(host, networks) + self.mgr.cache.save_host(host) + return None + + def _refresh_host_osdspec_previews(self, host: str) -> Optional[str]: + self.update_osdspec_previews(host) + self.mgr.cache.save_host(host) + self.log.debug(f'Refreshed OSDSpec previews for host <{host}>') + return None + + def update_osdspec_previews(self, search_host: str = '') -> None: + # Set global 'pending' flag for host + self.mgr.cache.loading_osdspec_preview.add(search_host) + previews = [] + # query OSDSpecs for host <search host> and generate/get the preview + # There can be multiple previews for one host due to multiple OSDSpecs. + previews.extend(self.mgr.osd_service.get_previews(search_host)) + self.log.debug(f'Loading OSDSpec previews to HostCache for host <{search_host}>') + self.mgr.cache.osdspec_previews[search_host] = previews + # Unset global 'pending' flag for host + self.mgr.cache.loading_osdspec_preview.remove(search_host) + + def _run_async_actions(self) -> None: + while self.mgr.scheduled_async_actions: + (self.mgr.scheduled_async_actions.pop(0))() + + def _check_for_strays(self) -> None: + self.log.debug('_check_for_strays') + for k in ['CEPHADM_STRAY_HOST', + 'CEPHADM_STRAY_DAEMON']: + self.mgr.remove_health_warning(k) + if self.mgr.warn_on_stray_hosts or self.mgr.warn_on_stray_daemons: + ls = self.mgr.list_servers() + self.log.debug(ls) + managed = self.mgr.cache.get_daemon_names() + host_detail = [] # type: List[str] + host_num_daemons = 0 + daemon_detail = [] # type: List[str] + for item in ls: + host = item.get('hostname') + assert isinstance(host, str) + daemons = item.get('services') # misnomer! + assert isinstance(daemons, list) + missing_names = [] + for s in daemons: + daemon_id = s.get('id') + assert daemon_id + name = '%s.%s' % (s.get('type'), daemon_id) + if s.get('type') in ['rbd-mirror', 'cephfs-mirror', 'rgw', 'rgw-nfs']: + metadata = self.mgr.get_metadata( + cast(str, s.get('type')), daemon_id, {}) + assert metadata is not None + try: + if s.get('type') == 'rgw-nfs': + # https://tracker.ceph.com/issues/49573 + name = metadata['id'][:-4] + else: + name = '%s.%s' % (s.get('type'), metadata['id']) + except (KeyError, TypeError): + self.log.debug( + "Failed to find daemon id for %s service %s" % ( + s.get('type'), s.get('id') + ) + ) + if s.get('type') == 'tcmu-runner': + # because we don't track tcmu-runner daemons in the host cache + # and don't have a way to check if the daemon is part of iscsi service + # we assume that all tcmu-runner daemons are managed by cephadm + managed.append(name) + if host not in self.mgr.inventory: + missing_names.append(name) + host_num_daemons += 1 + if name not in managed: + daemon_detail.append( + 'stray daemon %s on host %s not managed by cephadm' % (name, host)) + if missing_names: + host_detail.append( + 'stray host %s has %d stray daemons: %s' % ( + host, len(missing_names), missing_names)) + if self.mgr.warn_on_stray_hosts and host_detail: + self.mgr.set_health_warning( + 'CEPHADM_STRAY_HOST', f'{len(host_detail)} stray host(s) with {host_num_daemons} daemon(s) not managed by cephadm', len(host_detail), host_detail) + if self.mgr.warn_on_stray_daemons and daemon_detail: + self.mgr.set_health_warning( + 'CEPHADM_STRAY_DAEMON', f'{len(daemon_detail)} stray daemon(s) not managed by cephadm', len(daemon_detail), daemon_detail) + + def _check_for_moved_osds(self) -> None: + self.log.debug('_check_for_moved_osds') + all_osds: DefaultDict[int, List[orchestrator.DaemonDescription]] = defaultdict(list) + for dd in self.mgr.cache.get_daemons_by_type('osd'): + assert dd.daemon_id + all_osds[int(dd.daemon_id)].append(dd) + for osd_id, dds in all_osds.items(): + if len(dds) <= 1: + continue + running = [dd for dd in dds if dd.status == DaemonDescriptionStatus.running] + error = [dd for dd in dds if dd.status == DaemonDescriptionStatus.error] + msg = f'Found duplicate OSDs: {", ".join(str(dd) for dd in dds)}' + logger.info(msg) + if len(running) != 1: + continue + osd = self.mgr.get_osd_by_id(osd_id) + if not osd or not osd['up']: + continue + for e in error: + assert e.hostname + try: + self._remove_daemon(e.name(), e.hostname, no_post_remove=True) + self.mgr.events.for_daemon( + e.name(), 'INFO', f"Removed duplicated daemon on host '{e.hostname}'") + except OrchestratorError as ex: + self.mgr.events.from_orch_error(ex) + logger.exception(f'failed to remove duplicated daemon {e}') + + def _apply_all_services(self) -> bool: + self.log.debug('_apply_all_services') + r = False + specs = [] # type: List[ServiceSpec] + # if metadata is not up to date, we still need to apply spec for agent + # since the agent is the one who gather the metadata. If we don't we + # end up stuck between wanting metadata to be up to date to apply specs + # and needing to apply the agent spec to get up to date metadata + if self.mgr.use_agent and not self.mgr.cache.all_host_metadata_up_to_date(): + self.log.info('Metadata not up to date on all hosts. Skipping non agent specs') + try: + specs.append(self.mgr.spec_store['agent'].spec) + except Exception as e: + self.log.debug(f'Failed to find agent spec: {e}') + self.mgr.agent_helpers._apply_agent() + return r + else: + _specs: List[ServiceSpec] = [] + for sn, spec in self.mgr.spec_store.active_specs.items(): + _specs.append(spec) + # apply specs that don't use count first sice their placement is deterministic + # and not dependant on other daemon's placements in any way + specs = [s for s in _specs if not s.placement.count] + [s for s in _specs if s.placement.count] + + for name in ['CEPHADM_APPLY_SPEC_FAIL', 'CEPHADM_DAEMON_PLACE_FAIL']: + self.mgr.remove_health_warning(name) + self.mgr.apply_spec_fails = [] + for spec in specs: + try: + if self._apply_service(spec): + r = True + except Exception as e: + msg = f'Failed to apply {spec.service_name()} spec {spec}: {str(e)}' + self.log.exception(msg) + self.mgr.events.for_service(spec, 'ERROR', 'Failed to apply: ' + str(e)) + self.mgr.apply_spec_fails.append((spec.service_name(), str(e))) + warnings = [] + for x in self.mgr.apply_spec_fails: + warnings.append(f'{x[0]}: {x[1]}') + self.mgr.set_health_warning('CEPHADM_APPLY_SPEC_FAIL', + f"Failed to apply {len(self.mgr.apply_spec_fails)} service(s): {','.join(x[0] for x in self.mgr.apply_spec_fails)}", + len(self.mgr.apply_spec_fails), + warnings) + self.mgr.update_watched_hosts() + self.mgr.tuned_profile_utils._write_all_tuned_profiles() + return r + + def _apply_service_config(self, spec: ServiceSpec) -> None: + if spec.config: + section = utils.name_to_config_section(spec.service_name()) + for name in ['CEPHADM_INVALID_CONFIG_OPTION', 'CEPHADM_FAILED_SET_OPTION']: + self.mgr.remove_health_warning(name) + invalid_config_options = [] + options_failed_to_set = [] + for k, v in spec.config.items(): + try: + current = self.mgr.get_foreign_ceph_option(section, k) + except KeyError: + msg = f'Ignoring invalid {spec.service_name()} config option {k}' + self.log.warning(msg) + self.mgr.events.for_service( + spec, OrchestratorEvent.ERROR, f'Invalid config option {k}' + ) + invalid_config_options.append(msg) + continue + if current != v: + self.log.debug(f'setting [{section}] {k} = {v}') + try: + self.mgr.check_mon_command({ + 'prefix': 'config set', + 'name': k, + 'value': str(v), + 'who': section, + }) + except MonCommandFailed as e: + msg = f'Failed to set {spec.service_name()} option {k}: {e}' + self.log.warning(msg) + options_failed_to_set.append(msg) + + if invalid_config_options: + self.mgr.set_health_warning('CEPHADM_INVALID_CONFIG_OPTION', f'Ignoring {len(invalid_config_options)} invalid config option(s)', len( + invalid_config_options), invalid_config_options) + if options_failed_to_set: + self.mgr.set_health_warning('CEPHADM_FAILED_SET_OPTION', f'Failed to set {len(options_failed_to_set)} option(s)', len( + options_failed_to_set), options_failed_to_set) + + def _update_rgw_endpoints(self, rgw_spec: RGWSpec) -> None: + + if not rgw_spec.update_endpoints or rgw_spec.rgw_realm_token is None: + return + + ep = [] + protocol = 'https' if rgw_spec.ssl else 'http' + for s in self.mgr.cache.get_daemons_by_service(rgw_spec.service_name()): + if s.ports: + for p in s.ports: + ep.append(f'{protocol}://{s.hostname}:{p}') + zone_update_cmd = { + 'prefix': 'rgw zone modify', + 'realm_name': rgw_spec.rgw_realm, + 'zonegroup_name': rgw_spec.rgw_zonegroup, + 'zone_name': rgw_spec.rgw_zone, + 'realm_token': rgw_spec.rgw_realm_token, + 'zone_endpoints': ep, + } + self.log.debug(f'rgw cmd: {zone_update_cmd}') + rc, out, err = self.mgr.mon_command(zone_update_cmd) + rgw_spec.update_endpoints = (rc != 0) # keep trying on failure + if rc != 0: + self.log.error(f'Error when trying to update rgw zone: {err}') + self.mgr.set_health_warning('CEPHADM_RGW', 'Cannot update rgw endpoints, error: {err}', 1, + [f'Cannot update rgw endpoints for daemon {rgw_spec.service_name()}, error: {err}']) + else: + self.mgr.remove_health_warning('CEPHADM_RGW') + + def _apply_service(self, spec: ServiceSpec) -> bool: + """ + Schedule a service. Deploy new daemons or remove old ones, depending + on the target label and count specified in the placement. + """ + self.mgr.migration.verify_no_migration() + + service_type = spec.service_type + service_name = spec.service_name() + if spec.unmanaged: + self.log.debug('Skipping unmanaged service %s' % service_name) + return False + if spec.preview_only: + self.log.debug('Skipping preview_only service %s' % service_name) + return False + self.log.debug('Applying service %s spec' % service_name) + + if service_type == 'agent': + try: + assert self.mgr.http_server.agent + assert self.mgr.http_server.agent.ssl_certs.get_root_cert() + except Exception: + self.log.info( + 'Delaying applying agent spec until cephadm endpoint root cert created') + return False + + self._apply_service_config(spec) + + if service_type == 'osd': + self.mgr.osd_service.create_from_spec(cast(DriveGroupSpec, spec)) + # TODO: return True would result in a busy loop + # can't know if daemon count changed; create_from_spec doesn't + # return a solid indication + return False + + svc = self.mgr.cephadm_services[service_type] + daemons = self.mgr.cache.get_daemons_by_service(service_name) + related_service_daemons = self.mgr.cache.get_related_service_daemons(spec) + + public_networks: List[str] = [] + if service_type == 'mon': + out = str(self.mgr.get_foreign_ceph_option('mon', 'public_network')) + if '/' in out: + public_networks = [x.strip() for x in out.split(',')] + self.log.debug('mon public_network(s) is %s' % public_networks) + + def matches_public_network(host: str, sspec: ServiceSpec) -> bool: + # make sure the host has at least one network that belongs to some configured public network(s) + for pn in public_networks: + public_network = ipaddress.ip_network(pn) + for hn in self.mgr.cache.networks[host]: + host_network = ipaddress.ip_network(hn) + if host_network.overlaps(public_network): + return True + + host_networks = ','.join(self.mgr.cache.networks[host]) + pub_networks = ','.join(public_networks) + self.log.info( + f"Filtered out host {host}: does not belong to mon public_network(s): " + f" {pub_networks}, host network(s): {host_networks}" + ) + return False + + def has_interface_for_vip(host: str, sspec: ServiceSpec) -> bool: + # make sure the host has an interface that can + # actually accomodate the VIP + if not sspec or sspec.service_type != 'ingress': + return True + ingress_spec = cast(IngressSpec, sspec) + virtual_ips = [] + if ingress_spec.virtual_ip: + virtual_ips.append(ingress_spec.virtual_ip) + elif ingress_spec.virtual_ips_list: + virtual_ips = ingress_spec.virtual_ips_list + for vip in virtual_ips: + found = False + bare_ip = str(vip).split('/')[0] + for subnet, ifaces in self.mgr.cache.networks.get(host, {}).items(): + if ifaces and ipaddress.ip_address(bare_ip) in ipaddress.ip_network(subnet): + # found matching interface for this IP, move on + self.log.debug( + f'{bare_ip} is in {subnet} on {host} interface {list(ifaces.keys())[0]}' + ) + found = True + break + if not found: + self.log.info( + f"Filtered out host {host}: Host has no interface available for VIP: {vip}" + ) + return False + return True + + host_filters: Dict[str, Callable[[str, ServiceSpec], bool]] = { + 'mon': matches_public_network, + 'ingress': has_interface_for_vip + } + + rank_map = None + if svc.ranked(): + rank_map = self.mgr.spec_store[spec.service_name()].rank_map or {} + ha = HostAssignment( + spec=spec, + hosts=self.mgr.cache.get_non_draining_hosts() if spec.service_name( + ) == 'agent' else self.mgr.cache.get_schedulable_hosts(), + unreachable_hosts=self.mgr.cache.get_unreachable_hosts(), + draining_hosts=self.mgr.cache.get_draining_hosts(), + daemons=daemons, + related_service_daemons=related_service_daemons, + networks=self.mgr.cache.networks, + filter_new_host=host_filters.get(service_type, None), + allow_colo=svc.allow_colo(), + primary_daemon_type=svc.primary_daemon_type(spec), + per_host_daemon_type=svc.per_host_daemon_type(spec), + rank_map=rank_map, + ) + + try: + all_slots, slots_to_add, daemons_to_remove = ha.place() + daemons_to_remove = [d for d in daemons_to_remove if (d.hostname and self.mgr.inventory._inventory[d.hostname].get( + 'status', '').lower() not in ['maintenance', 'offline'] and d.hostname not in self.mgr.offline_hosts)] + self.log.debug('Add %s, remove %s' % (slots_to_add, daemons_to_remove)) + except OrchestratorError as e: + msg = f'Failed to apply {spec.service_name()} spec {spec}: {str(e)}' + self.log.error(msg) + self.mgr.events.for_service(spec, 'ERROR', 'Failed to apply: ' + str(e)) + self.mgr.apply_spec_fails.append((spec.service_name(), str(e))) + warnings = [] + for x in self.mgr.apply_spec_fails: + warnings.append(f'{x[0]}: {x[1]}') + self.mgr.set_health_warning('CEPHADM_APPLY_SPEC_FAIL', + f"Failed to apply {len(self.mgr.apply_spec_fails)} service(s): {','.join(x[0] for x in self.mgr.apply_spec_fails)}", + len(self.mgr.apply_spec_fails), + warnings) + return False + + r = None + + # sanity check + final_count = len(daemons) + len(slots_to_add) - len(daemons_to_remove) + if service_type in ['mon', 'mgr'] and final_count < 1: + self.log.debug('cannot scale mon|mgr below 1)') + return False + + # progress + progress_id = str(uuid.uuid4()) + delta: List[str] = [] + if slots_to_add: + delta += [f'+{len(slots_to_add)}'] + if daemons_to_remove: + delta += [f'-{len(daemons_to_remove)}'] + progress_title = f'Updating {spec.service_name()} deployment ({" ".join(delta)} -> {len(all_slots)})' + progress_total = len(slots_to_add) + len(daemons_to_remove) + progress_done = 0 + + def update_progress() -> None: + self.mgr.remote( + 'progress', 'update', progress_id, + ev_msg=progress_title, + ev_progress=(progress_done / progress_total), + add_to_ceph_s=True, + ) + + if progress_total: + update_progress() + + self.log.debug('Hosts that will receive new daemons: %s' % slots_to_add) + self.log.debug('Daemons that will be removed: %s' % daemons_to_remove) + + hosts_altered: Set[str] = set() + + try: + # assign names + for i in range(len(slots_to_add)): + slot = slots_to_add[i] + slot = slot.assign_name(self.mgr.get_unique_name( + slot.daemon_type, + slot.hostname, + [d for d in daemons if d not in daemons_to_remove], + prefix=spec.service_id, + forcename=slot.name, + rank=slot.rank, + rank_generation=slot.rank_generation, + )) + slots_to_add[i] = slot + if rank_map is not None: + assert slot.rank is not None + assert slot.rank_generation is not None + assert rank_map[slot.rank][slot.rank_generation] is None + rank_map[slot.rank][slot.rank_generation] = slot.name + + if rank_map: + # record the rank_map before we make changes so that if we fail the + # next mgr will clean up. + self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map) + + # remove daemons now, since we are going to fence them anyway + for d in daemons_to_remove: + assert d.hostname is not None + self._remove_daemon(d.name(), d.hostname) + daemons_to_remove = [] + + # fence them + svc.fence_old_ranks(spec, rank_map, len(all_slots)) + + # create daemons + daemon_place_fails = [] + for slot in slots_to_add: + # first remove daemon with conflicting port or name? + if slot.ports or slot.name in [d.name() for d in daemons_to_remove]: + for d in daemons_to_remove: + if ( + d.hostname != slot.hostname + or not (set(d.ports or []) & set(slot.ports)) + or (d.ip and slot.ip and d.ip != slot.ip) + and d.name() != slot.name + ): + continue + if d.name() != slot.name: + self.log.info( + f'Removing {d.name()} before deploying to {slot} to avoid a port or conflict' + ) + # NOTE: we don't check ok-to-stop here to avoid starvation if + # there is only 1 gateway. + self._remove_daemon(d.name(), d.hostname) + daemons_to_remove.remove(d) + progress_done += 1 + hosts_altered.add(d.hostname) + break + + # deploy new daemon + daemon_id = slot.name + + daemon_spec = svc.make_daemon_spec( + slot.hostname, daemon_id, slot.network, spec, + daemon_type=slot.daemon_type, + ports=slot.ports, + ip=slot.ip, + rank=slot.rank, + rank_generation=slot.rank_generation, + ) + self.log.debug('Placing %s.%s on host %s' % ( + slot.daemon_type, daemon_id, slot.hostname)) + + try: + daemon_spec = svc.prepare_create(daemon_spec) + with self.mgr.async_timeout_handler(slot.hostname, f'cephadm deploy ({daemon_spec.daemon_type} type dameon)'): + self.mgr.wait_async(self._create_daemon(daemon_spec)) + r = True + progress_done += 1 + update_progress() + hosts_altered.add(daemon_spec.host) + self.mgr.spec_store.mark_needs_configuration(spec.service_name()) + except (RuntimeError, OrchestratorError) as e: + msg = (f"Failed while placing {slot.daemon_type}.{daemon_id} " + f"on {slot.hostname}: {e}") + self.mgr.events.for_service(spec, 'ERROR', msg) + self.mgr.log.error(msg) + daemon_place_fails.append(msg) + # only return "no change" if no one else has already succeeded. + # later successes will also change to True + if r is None: + r = False + progress_done += 1 + update_progress() + continue + + # add to daemon list so next name(s) will also be unique + sd = orchestrator.DaemonDescription( + hostname=slot.hostname, + daemon_type=slot.daemon_type, + daemon_id=daemon_id, + service_name=spec.service_name() + ) + daemons.append(sd) + self.mgr.cache.append_tmp_daemon(slot.hostname, sd) + + if daemon_place_fails: + self.mgr.set_health_warning('CEPHADM_DAEMON_PLACE_FAIL', f'Failed to place {len(daemon_place_fails)} daemon(s)', len( + daemon_place_fails), daemon_place_fails) + + if service_type == 'mgr': + active_mgr = svc.get_active_daemon(self.mgr.cache.get_daemons_by_type('mgr')) + if active_mgr.daemon_id in [d.daemon_id for d in daemons_to_remove]: + # We can't just remove the active mgr like any other daemon. + # Need to fail over later so it can be removed on next pass. + # This can be accomplished by scheduling a restart of the active mgr. + self.mgr._schedule_daemon_action(active_mgr.name(), 'restart') + + if service_type == 'rgw': + self._update_rgw_endpoints(cast(RGWSpec, spec)) + + # remove any? + def _ok_to_stop(remove_daemons: List[orchestrator.DaemonDescription]) -> bool: + daemon_ids = [d.daemon_id for d in remove_daemons] + assert None not in daemon_ids + # setting force flag retains previous behavior + r = svc.ok_to_stop(cast(List[str], daemon_ids), force=True) + return not r.retval + + while daemons_to_remove and not _ok_to_stop(daemons_to_remove): + # let's find a subset that is ok-to-stop + non_error_daemon_index = -1 + # prioritize removing daemons in error state + for i, dmon in enumerate(daemons_to_remove): + if dmon.status != DaemonDescriptionStatus.error: + non_error_daemon_index = i + break + if non_error_daemon_index != -1: + daemons_to_remove.pop(non_error_daemon_index) + else: + # all daemons in list are in error state + # we should be able to remove all of them + break + for d in daemons_to_remove: + r = True + assert d.hostname is not None + self._remove_daemon(d.name(), d.hostname) + + progress_done += 1 + update_progress() + hosts_altered.add(d.hostname) + self.mgr.spec_store.mark_needs_configuration(spec.service_name()) + + self.mgr.remote('progress', 'complete', progress_id) + except Exception as e: + self.mgr.remote('progress', 'fail', progress_id, str(e)) + raise + finally: + if self.mgr.spec_store.needs_configuration(spec.service_name()): + svc.config(spec) + self.mgr.spec_store.mark_configured(spec.service_name()) + if self.mgr.use_agent: + # can only send ack to agents if we know for sure port they bound to + hosts_altered = set([h for h in hosts_altered if (h in self.mgr.agent_cache.agent_ports and not self.mgr.cache.is_host_draining(h))]) + self.mgr.agent_helpers._request_agent_acks(hosts_altered, increment=True) + + if r is None: + r = False + return r + + def _check_daemons(self) -> None: + self.log.debug('_check_daemons') + daemons = self.mgr.cache.get_daemons() + daemons_post: Dict[str, List[orchestrator.DaemonDescription]] = defaultdict(list) + for dd in daemons: + # orphan? + spec = self.mgr.spec_store.active_specs.get(dd.service_name(), None) + assert dd.hostname is not None + assert dd.daemon_type is not None + assert dd.daemon_id is not None + + # any action we can try will fail for a daemon on an offline host, + # including removing the daemon + if dd.hostname in self.mgr.offline_hosts: + continue + + if not spec and dd.daemon_type not in ['mon', 'mgr', 'osd']: + # (mon and mgr specs should always exist; osds aren't matched + # to a service spec) + self.log.info('Removing orphan daemon %s...' % dd.name()) + self._remove_daemon(dd.name(), dd.hostname) + + # ignore unmanaged services + if spec and spec.unmanaged: + continue + + # ignore daemons for deleted services + if dd.service_name() in self.mgr.spec_store.spec_deleted: + continue + + if dd.daemon_type == 'agent': + try: + self.mgr.agent_helpers._check_agent(dd.hostname) + except Exception as e: + self.log.debug( + f'Agent {dd.name()} could not be checked in _check_daemons: {e}') + continue + + # These daemon types require additional configs after creation + if dd.daemon_type in REQUIRES_POST_ACTIONS: + daemons_post[dd.daemon_type].append(dd) + + if self.mgr.cephadm_services[daemon_type_to_service(dd.daemon_type)].get_active_daemon( + self.mgr.cache.get_daemons_by_service(dd.service_name())).daemon_id == dd.daemon_id: + dd.is_active = True + else: + dd.is_active = False + + deps = self.mgr._calc_daemon_deps(spec, dd.daemon_type, dd.daemon_id) + last_deps, last_config = self.mgr.cache.get_daemon_last_config_deps( + dd.hostname, dd.name()) + if last_deps is None: + last_deps = [] + action = self.mgr.cache.get_scheduled_daemon_action(dd.hostname, dd.name()) + if not last_config: + self.log.info('Reconfiguring %s (unknown last config time)...' % ( + dd.name())) + action = 'reconfig' + elif last_deps != deps: + self.log.debug(f'{dd.name()} deps {last_deps} -> {deps}') + self.log.info(f'Reconfiguring {dd.name()} (dependencies changed)...') + action = 'reconfig' + # we need only redeploy if secure_monitoring_stack value has changed: + if dd.daemon_type in ['prometheus', 'node-exporter', 'alertmanager']: + diff = list(set(last_deps) - set(deps)) + if any('secure_monitoring_stack' in e for e in diff): + action = 'redeploy' + + elif spec is not None and hasattr(spec, 'extra_container_args') and dd.extra_container_args != spec.extra_container_args: + self.log.debug( + f'{dd.name()} container cli args {dd.extra_container_args} -> {spec.extra_container_args}') + self.log.info(f'Redeploying {dd.name()}, (container cli args changed) . . .') + dd.extra_container_args = spec.extra_container_args + action = 'redeploy' + elif spec is not None and hasattr(spec, 'extra_entrypoint_args') and dd.extra_entrypoint_args != spec.extra_entrypoint_args: + self.log.info(f'Redeploying {dd.name()}, (entrypoint args changed) . . .') + self.log.debug( + f'{dd.name()} daemon entrypoint args {dd.extra_entrypoint_args} -> {spec.extra_entrypoint_args}') + dd.extra_entrypoint_args = spec.extra_entrypoint_args + action = 'redeploy' + elif self.mgr.last_monmap and \ + self.mgr.last_monmap > last_config and \ + dd.daemon_type in CEPH_TYPES: + self.log.info('Reconfiguring %s (monmap changed)...' % dd.name()) + action = 'reconfig' + elif self.mgr.extra_ceph_conf_is_newer(last_config) and \ + dd.daemon_type in CEPH_TYPES: + self.log.info('Reconfiguring %s (extra config changed)...' % dd.name()) + action = 'reconfig' + if action: + if self.mgr.cache.get_scheduled_daemon_action(dd.hostname, dd.name()) == 'redeploy' \ + and action == 'reconfig': + action = 'redeploy' + try: + daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(dd) + self.mgr._daemon_action(daemon_spec, action=action) + if self.mgr.cache.rm_scheduled_daemon_action(dd.hostname, dd.name()): + self.mgr.cache.save_host(dd.hostname) + except OrchestratorError as e: + self.log.exception(e) + self.mgr.events.from_orch_error(e) + if dd.daemon_type in daemons_post: + del daemons_post[dd.daemon_type] + # continue... + except Exception as e: + self.log.exception(e) + self.mgr.events.for_daemon_from_exception(dd.name(), e) + if dd.daemon_type in daemons_post: + del daemons_post[dd.daemon_type] + # continue... + + # do daemon post actions + for daemon_type, daemon_descs in daemons_post.items(): + run_post = False + for d in daemon_descs: + if d.name() in self.mgr.requires_post_actions: + self.mgr.requires_post_actions.remove(d.name()) + run_post = True + if run_post: + self.mgr._get_cephadm_service(daemon_type_to_service( + daemon_type)).daemon_check_post(daemon_descs) + + def _purge_deleted_services(self) -> None: + self.log.debug('_purge_deleted_services') + existing_services = self.mgr.spec_store.all_specs.items() + for service_name, spec in list(existing_services): + if service_name not in self.mgr.spec_store.spec_deleted: + continue + if self.mgr.cache.get_daemons_by_service(service_name): + continue + if spec.service_type in ['mon', 'mgr']: + continue + + logger.info(f'Purge service {service_name}') + + self.mgr.cephadm_services[spec.service_type].purge(service_name) + self.mgr.spec_store.finally_rm(service_name) + + def convert_tags_to_repo_digest(self) -> None: + if not self.mgr.use_repo_digest: + return + settings = self.mgr.upgrade.get_distinct_container_image_settings() + digests: Dict[str, ContainerInspectInfo] = {} + for container_image_ref in set(settings.values()): + if not is_repo_digest(container_image_ref): + with self.mgr.async_timeout_handler(cmd=f'cephadm inspect-image (image {container_image_ref})'): + image_info = self.mgr.wait_async( + self._get_container_image_info(container_image_ref)) + if image_info.repo_digests: + # FIXME: we assume the first digest here is the best + assert is_repo_digest(image_info.repo_digests[0]), image_info + digests[container_image_ref] = image_info + + for entity, container_image_ref in settings.items(): + if not is_repo_digest(container_image_ref): + image_info = digests[container_image_ref] + if image_info.repo_digests: + # FIXME: we assume the first digest here is the best + self.mgr.set_container_image(entity, image_info.repo_digests[0]) + + def _calc_client_files(self) -> Dict[str, Dict[str, Tuple[int, int, int, bytes, str]]]: + # host -> path -> (mode, uid, gid, content, digest) + client_files: Dict[str, Dict[str, Tuple[int, int, int, bytes, str]]] = {} + + # ceph.conf + config = self.mgr.get_minimal_ceph_conf().encode('utf-8') + config_digest = ''.join('%02x' % c for c in hashlib.sha256(config).digest()) + cluster_cfg_dir = f'/var/lib/ceph/{self.mgr._cluster_fsid}/config' + + if self.mgr.manage_etc_ceph_ceph_conf: + try: + pspec = PlacementSpec.from_string(self.mgr.manage_etc_ceph_ceph_conf_hosts) + ha = HostAssignment( + spec=ServiceSpec('mon', placement=pspec), + hosts=self.mgr.cache.get_conf_keyring_available_hosts(), + unreachable_hosts=self.mgr.cache.get_unreachable_hosts(), + draining_hosts=self.mgr.cache.get_conf_keyring_draining_hosts(), + daemons=[], + networks=self.mgr.cache.networks, + ) + all_slots, _, _ = ha.place() + for host in {s.hostname for s in all_slots}: + if host not in client_files: + client_files[host] = {} + ceph_conf = (0o644, 0, 0, bytes(config), str(config_digest)) + client_files[host]['/etc/ceph/ceph.conf'] = ceph_conf + client_files[host][f'{cluster_cfg_dir}/ceph.conf'] = ceph_conf + except Exception as e: + self.mgr.log.warning( + f'unable to calc conf hosts: {self.mgr.manage_etc_ceph_ceph_conf_hosts}: {e}') + + # client keyrings + for ks in self.mgr.keys.keys.values(): + try: + ret, keyring, err = self.mgr.mon_command({ + 'prefix': 'auth get', + 'entity': ks.entity, + }) + if ret: + self.log.warning(f'unable to fetch keyring for {ks.entity}') + continue + digest = ''.join('%02x' % c for c in hashlib.sha256( + keyring.encode('utf-8')).digest()) + ha = HostAssignment( + spec=ServiceSpec('mon', placement=ks.placement), + hosts=self.mgr.cache.get_conf_keyring_available_hosts(), + unreachable_hosts=self.mgr.cache.get_unreachable_hosts(), + draining_hosts=self.mgr.cache.get_conf_keyring_draining_hosts(), + daemons=[], + networks=self.mgr.cache.networks, + ) + all_slots, _, _ = ha.place() + for host in {s.hostname for s in all_slots}: + if host not in client_files: + client_files[host] = {} + ceph_conf = (0o644, 0, 0, bytes(config), str(config_digest)) + client_files[host]['/etc/ceph/ceph.conf'] = ceph_conf + client_files[host][f'{cluster_cfg_dir}/ceph.conf'] = ceph_conf + ceph_admin_key = (ks.mode, ks.uid, ks.gid, keyring.encode('utf-8'), digest) + client_files[host][ks.path] = ceph_admin_key + client_files[host][f'{cluster_cfg_dir}/{os.path.basename(ks.path)}'] = ceph_admin_key + except Exception as e: + self.log.warning( + f'unable to calc client keyring {ks.entity} placement {ks.placement}: {e}') + return client_files + + def _write_all_client_files(self) -> None: + if self.mgr.manage_etc_ceph_ceph_conf or self.mgr.keys.keys: + client_files = self._calc_client_files() + else: + client_files = {} + + @forall_hosts + def _write_files(host: str) -> None: + self._write_client_files(client_files, host) + + _write_files(self.mgr.cache.get_hosts()) + + def _write_client_files(self, + client_files: Dict[str, Dict[str, Tuple[int, int, int, bytes, str]]], + host: str) -> None: + updated_files = False + if self.mgr.cache.is_host_unreachable(host): + return + old_files = self.mgr.cache.get_host_client_files(host).copy() + for path, m in client_files.get(host, {}).items(): + mode, uid, gid, content, digest = m + if path in old_files: + match = old_files[path] == (digest, mode, uid, gid) + del old_files[path] + if match: + continue + self.log.info(f'Updating {host}:{path}') + self.mgr.ssh.write_remote_file(host, path, content, mode, uid, gid) + self.mgr.cache.update_client_file(host, path, digest, mode, uid, gid) + updated_files = True + for path in old_files.keys(): + if path == '/etc/ceph/ceph.conf': + continue + self.log.info(f'Removing {host}:{path}') + cmd = ['rm', '-f', path] + self.mgr.ssh.check_execute_command(host, cmd) + updated_files = True + self.mgr.cache.removed_client_file(host, path) + if updated_files: + self.mgr.cache.save_host(host) + + async def _create_daemon(self, + daemon_spec: CephadmDaemonDeploySpec, + reconfig: bool = False, + osd_uuid_map: Optional[Dict[str, Any]] = None, + ) -> str: + + daemon_params: Dict[str, Any] = {} + with set_exception_subject('service', orchestrator.DaemonDescription( + daemon_type=daemon_spec.daemon_type, + daemon_id=daemon_spec.daemon_id, + hostname=daemon_spec.host, + ).service_id(), overwrite=True): + + try: + image = '' + start_time = datetime_now() + ports: List[int] = daemon_spec.ports if daemon_spec.ports else [] + port_ips: Dict[str, str] = daemon_spec.port_ips if daemon_spec.port_ips else {} + + if daemon_spec.daemon_type == 'container': + spec = cast(CustomContainerSpec, + self.mgr.spec_store[daemon_spec.service_name].spec) + image = spec.image + if spec.ports: + ports.extend(spec.ports) + + # TCP port to open in the host firewall + if len(ports) > 0: + daemon_params['tcp_ports'] = list(ports) + + if port_ips: + daemon_params['port_ips'] = port_ips + + # osd deployments needs an --osd-uuid arg + if daemon_spec.daemon_type == 'osd': + if not osd_uuid_map: + osd_uuid_map = self.mgr.get_osd_uuid_map() + osd_uuid = osd_uuid_map.get(daemon_spec.daemon_id) + if not osd_uuid: + raise OrchestratorError('osd.%s not in osdmap' % daemon_spec.daemon_id) + daemon_params['osd_fsid'] = osd_uuid + + if reconfig: + daemon_params['reconfig'] = True + if self.mgr.allow_ptrace: + daemon_params['allow_ptrace'] = True + + daemon_spec, extra_container_args, extra_entrypoint_args = self._setup_extra_deployment_args(daemon_spec, daemon_params) + + if daemon_spec.service_name in self.mgr.spec_store: + configs = self.mgr.spec_store[daemon_spec.service_name].spec.custom_configs + if configs is not None: + daemon_spec.final_config.update( + {'custom_config_files': [c.to_json() for c in configs]}) + + if self.mgr.cache.host_needs_registry_login(daemon_spec.host) and self.mgr.registry_url: + await self._registry_login(daemon_spec.host, json.loads(str(self.mgr.get_store('registry_credentials')))) + + self.log.info('%s daemon %s on %s' % ( + 'Reconfiguring' if reconfig else 'Deploying', + daemon_spec.name(), daemon_spec.host)) + + out, err, code = await self._run_cephadm( + daemon_spec.host, + daemon_spec.name(), + ['_orch', 'deploy'], + [], + stdin=exchange.Deploy( + fsid=self.mgr._cluster_fsid, + name=daemon_spec.name(), + image=image, + params=daemon_params, + meta=exchange.DeployMeta( + service_name=daemon_spec.service_name, + ports=daemon_spec.ports, + ip=daemon_spec.ip, + deployed_by=self.mgr.get_active_mgr_digests(), + rank=daemon_spec.rank, + rank_generation=daemon_spec.rank_generation, + extra_container_args=ArgumentSpec.map_json( + extra_container_args, + ), + extra_entrypoint_args=ArgumentSpec.map_json( + extra_entrypoint_args, + ), + ), + config_blobs=daemon_spec.final_config, + ).dump_json_str(), + ) + + if daemon_spec.daemon_type == 'agent': + self.mgr.agent_cache.agent_timestamp[daemon_spec.host] = datetime_now() + self.mgr.agent_cache.agent_counter[daemon_spec.host] = 1 + + # refresh daemon state? (ceph daemon reconfig does not need it) + if not reconfig or daemon_spec.daemon_type not in CEPH_TYPES: + if not code and daemon_spec.host in self.mgr.cache.daemons: + # prime cached service state with what we (should have) + # just created + sd = daemon_spec.to_daemon_description( + DaemonDescriptionStatus.starting, 'starting') + self.mgr.cache.add_daemon(daemon_spec.host, sd) + if daemon_spec.daemon_type in REQUIRES_POST_ACTIONS: + self.mgr.requires_post_actions.add(daemon_spec.name()) + self.mgr.cache.invalidate_host_daemons(daemon_spec.host) + + if daemon_spec.daemon_type != 'agent': + self.mgr.cache.update_daemon_config_deps( + daemon_spec.host, daemon_spec.name(), daemon_spec.deps, start_time) + self.mgr.cache.save_host(daemon_spec.host) + else: + self.mgr.agent_cache.update_agent_config_deps( + daemon_spec.host, daemon_spec.deps, start_time) + self.mgr.agent_cache.save_agent(daemon_spec.host) + msg = "{} {} on host '{}'".format( + 'Reconfigured' if reconfig else 'Deployed', daemon_spec.name(), daemon_spec.host) + if not code: + self.mgr.events.for_daemon(daemon_spec.name(), OrchestratorEvent.INFO, msg) + else: + what = 'reconfigure' if reconfig else 'deploy' + self.mgr.events.for_daemon( + daemon_spec.name(), OrchestratorEvent.ERROR, f'Failed to {what}: {err}') + return msg + except OrchestratorError: + redeploy = daemon_spec.name() in self.mgr.cache.get_daemon_names() + if not reconfig and not redeploy: + # we have to clean up the daemon. E.g. keyrings. + servict_type = daemon_type_to_service(daemon_spec.daemon_type) + dd = daemon_spec.to_daemon_description(DaemonDescriptionStatus.error, 'failed') + self.mgr.cephadm_services[servict_type].post_remove(dd, is_failed_deploy=True) + raise + + def _setup_extra_deployment_args( + self, + daemon_spec: CephadmDaemonDeploySpec, + params: Dict[str, Any], + ) -> Tuple[CephadmDaemonDeploySpec, Optional[ArgumentList], Optional[ArgumentList]]: + # this function is for handling any potential user specified + # (in the service spec) extra runtime or entrypoint args for a daemon + # we are going to deploy. Effectively just adds a set of extra args to + # pass to the cephadm binary to indicate the daemon being deployed + # needs extra runtime/entrypoint args. Returns the modified daemon spec + # as well as what args were added (as those are included in unit.meta file) + def _to_args(lst: ArgumentList) -> List[str]: + out: List[str] = [] + for argspec in lst: + out.extend(argspec.to_args()) + return out + + try: + eca = daemon_spec.extra_container_args + if eca: + params['extra_container_args'] = _to_args(eca) + except AttributeError: + eca = None + try: + eea = daemon_spec.extra_entrypoint_args + if eea: + params['extra_entrypoint_args'] = _to_args(eea) + except AttributeError: + eea = None + return daemon_spec, eca, eea + + def _remove_daemon(self, name: str, host: str, no_post_remove: bool = False) -> str: + """ + Remove a daemon + """ + (daemon_type, daemon_id) = name.split('.', 1) + daemon = orchestrator.DaemonDescription( + daemon_type=daemon_type, + daemon_id=daemon_id, + hostname=host) + + with set_exception_subject('service', daemon.service_id(), overwrite=True): + + self.mgr.cephadm_services[daemon_type_to_service(daemon_type)].pre_remove(daemon) + # NOTE: we are passing the 'force' flag here, which means + # we can delete a mon instances data. + dd = self.mgr.cache.get_daemon(daemon.daemon_name) + if dd.ports: + args = ['--name', name, '--force', '--tcp-ports', ' '.join(map(str, dd.ports))] + else: + args = ['--name', name, '--force'] + + self.log.info('Removing daemon %s from %s -- ports %s' % (name, host, dd.ports)) + with self.mgr.async_timeout_handler(host, f'cephadm rm-daemon (daemon {name})'): + out, err, code = self.mgr.wait_async(self._run_cephadm( + host, name, 'rm-daemon', args)) + if not code: + # remove item from cache + self.mgr.cache.rm_daemon(host, name) + self.mgr.cache.invalidate_host_daemons(host) + + if not no_post_remove: + if daemon_type not in ['iscsi']: + self.mgr.cephadm_services[daemon_type_to_service( + daemon_type)].post_remove(daemon, is_failed_deploy=False) + else: + self.mgr.scheduled_async_actions.append(lambda: self.mgr.cephadm_services[daemon_type_to_service( + daemon_type)].post_remove(daemon, is_failed_deploy=False)) + self.mgr._kick_serve_loop() + + return "Removed {} from host '{}'".format(name, host) + + async def _run_cephadm_json(self, + host: str, + entity: Union[CephadmNoImage, str], + command: str, + args: List[str], + no_fsid: Optional[bool] = False, + error_ok: Optional[bool] = False, + image: Optional[str] = "", + log_output: Optional[bool] = True, + ) -> Any: + try: + out, err, code = await self._run_cephadm( + host, entity, command, args, no_fsid=no_fsid, error_ok=error_ok, + image=image, log_output=log_output) + if code: + raise OrchestratorError(f'host {host} `cephadm {command}` returned {code}: {err}') + except Exception as e: + raise OrchestratorError(f'host {host} `cephadm {command}` failed: {e}') + try: + return json.loads(''.join(out)) + except (ValueError, KeyError): + msg = f'host {host} `cephadm {command}` failed: Cannot decode JSON' + self.log.exception(f'{msg}: {"".join(out)}') + raise OrchestratorError(msg) + + async def _run_cephadm(self, + host: str, + entity: Union[CephadmNoImage, str], + command: Union[str, List[str]], + args: List[str], + addr: Optional[str] = "", + stdin: Optional[str] = "", + no_fsid: Optional[bool] = False, + error_ok: Optional[bool] = False, + image: Optional[str] = "", + env_vars: Optional[List[str]] = None, + log_output: Optional[bool] = True, + timeout: Optional[int] = None, # timeout in seconds + ) -> Tuple[List[str], List[str], int]: + """ + Run cephadm on the remote host with the given command + args + + Important: You probably don't want to run _run_cephadm from CLI handlers + + :env_vars: in format -> [KEY=VALUE, ..] + """ + + await self.mgr.ssh._remote_connection(host, addr) + + self.log.debug(f"_run_cephadm : command = {command}") + self.log.debug(f"_run_cephadm : args = {args}") + + bypass_image = ('agent') + + assert image or entity + # Skip the image check for daemons deployed that are not ceph containers + if not str(entity).startswith(bypass_image): + if not image and entity is not cephadmNoImage: + image = self.mgr._get_container_image(entity) + + final_args = [] + + # global args + if env_vars: + for env_var_pair in env_vars: + final_args.extend(['--env', env_var_pair]) + + if image: + final_args.extend(['--image', image]) + + if not self.mgr.container_init: + final_args += ['--no-container-init'] + + if not self.mgr.cgroups_split: + final_args += ['--no-cgroups-split'] + + if not timeout: + # default global timeout if no timeout was passed + timeout = self.mgr.default_cephadm_command_timeout + # put a lower bound of 60 seconds in case users + # accidentally set it to something unreasonable. + # For example if they though it was in minutes + # rather than seconds + if timeout < 60: + self.log.info(f'Found default timeout set to {timeout}. Instead trying minimum of 60.') + timeout = 60 + # subtract a small amount to give this timeout + # in the binary a chance to actually happen over + # the asyncio based timeout in the mgr module + timeout -= 5 + final_args += ['--timeout', str(timeout)] + + # subcommand + if isinstance(command, list): + final_args.extend([str(v) for v in command]) + else: + final_args.append(command) + + # subcommand args + if not no_fsid: + final_args += ['--fsid', self.mgr._cluster_fsid] + + final_args += args + + # exec + self.log.debug('args: %s' % (' '.join(final_args))) + if self.mgr.mode == 'root': + # agent has cephadm binary as an extra file which is + # therefore passed over stdin. Even for debug logs it's too much + if stdin and 'agent' not in str(entity): + self.log.debug('stdin: %s' % stdin) + + cmd = ['which', 'python3'] + python = await self.mgr.ssh._check_execute_command(host, cmd, addr=addr) + cmd = [python, self.mgr.cephadm_binary_path] + final_args + + try: + out, err, code = await self.mgr.ssh._execute_command( + host, cmd, stdin=stdin, addr=addr) + if code == 2: + ls_cmd = ['ls', self.mgr.cephadm_binary_path] + out_ls, err_ls, code_ls = await self.mgr.ssh._execute_command(host, ls_cmd, addr=addr, + log_command=log_output) + if code_ls == 2: + await self._deploy_cephadm_binary(host, addr) + out, err, code = await self.mgr.ssh._execute_command( + host, cmd, stdin=stdin, addr=addr) + # if there is an agent on this host, make sure it is using the most recent + # version of cephadm binary + if host in self.mgr.inventory: + for agent in self.mgr.cache.get_daemons_by_type('agent', host): + self.mgr._schedule_daemon_action(agent.name(), 'redeploy') + + except Exception as e: + await self.mgr.ssh._reset_con(host) + if error_ok: + return [], [str(e)], 1 + raise + + elif self.mgr.mode == 'cephadm-package': + try: + cmd = ['/usr/bin/cephadm'] + final_args + out, err, code = await self.mgr.ssh._execute_command( + host, cmd, stdin=stdin, addr=addr) + except Exception as e: + await self.mgr.ssh._reset_con(host) + if error_ok: + return [], [str(e)], 1 + raise + else: + assert False, 'unsupported mode' + + if log_output: + self.log.debug(f'code: {code}') + if out: + self.log.debug(f'out: {out}') + if err: + self.log.debug(f'err: {err}') + if code and not error_ok: + raise OrchestratorError( + f'cephadm exited with an error code: {code}, stderr: {err}') + return [out], [err], code + + async def _get_container_image_info(self, image_name: str) -> ContainerInspectInfo: + # pick a random host... + host = None + for host_name in self.mgr.inventory.keys(): + host = host_name + break + if not host: + raise OrchestratorError('no hosts defined') + if self.mgr.cache.host_needs_registry_login(host) and self.mgr.registry_url: + await self._registry_login(host, json.loads(str(self.mgr.get_store('registry_credentials')))) + + j = None + try: + j = await self._run_cephadm_json(host, '', 'inspect-image', [], + image=image_name, no_fsid=True, + error_ok=True) + except OrchestratorError: + pass + + if not j: + pullargs: List[str] = [] + if self.mgr.registry_insecure: + pullargs.append("--insecure") + + j = await self._run_cephadm_json(host, '', 'pull', pullargs, + image=image_name, no_fsid=True) + r = ContainerInspectInfo( + j['image_id'], + j.get('ceph_version'), + j.get('repo_digests') + ) + self.log.debug(f'image {image_name} -> {r}') + return r + + # function responsible for logging single host into custom registry + async def _registry_login(self, host: str, registry_json: Dict[str, str]) -> Optional[str]: + self.log.debug( + f"Attempting to log host {host} into custom registry @ {registry_json['url']}") + # want to pass info over stdin rather than through normal list of args + out, err, code = await self._run_cephadm( + host, 'mon', 'registry-login', + ['--registry-json', '-'], stdin=json.dumps(registry_json), error_ok=True) + if code: + return f"Host {host} failed to login to {registry_json['url']} as {registry_json['username']} with given password" + return None + + async def _deploy_cephadm_binary(self, host: str, addr: Optional[str] = None) -> None: + # Use tee (from coreutils) to create a copy of cephadm on the target machine + self.log.info(f"Deploying cephadm binary to {host}") + await self.mgr.ssh._write_remote_file(host, self.mgr.cephadm_binary_path, + self.mgr._cephadm, addr=addr) diff --git a/src/pybind/mgr/cephadm/service_discovery.py b/src/pybind/mgr/cephadm/service_discovery.py new file mode 100644 index 000000000..ddc0574e2 --- /dev/null +++ b/src/pybind/mgr/cephadm/service_discovery.py @@ -0,0 +1,239 @@ +try: + import cherrypy + from cherrypy._cpserver import Server +except ImportError: + # to avoid sphinx build crash + class Server: # type: ignore + pass + +import logging +import socket + +import orchestrator # noqa +from mgr_module import ServiceInfoT +from mgr_util import build_url +from typing import Dict, List, TYPE_CHECKING, cast, Collection, Callable, NamedTuple, Optional +from cephadm.services.monitoring import AlertmanagerService, NodeExporterService, PrometheusService +import secrets + +from cephadm.services.ingress import IngressSpec +from cephadm.ssl_cert_utils import SSLCerts +from cephadm.services.cephadmservice import CephExporterService + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + + +def cherrypy_filter(record: logging.LogRecord) -> int: + blocked = [ + 'TLSV1_ALERT_DECRYPT_ERROR' + ] + msg = record.getMessage() + return not any([m for m in blocked if m in msg]) + + +logging.getLogger('cherrypy.error').addFilter(cherrypy_filter) +cherrypy.log.access_log.propagate = False + + +class Route(NamedTuple): + name: str + route: str + controller: Callable + + +class ServiceDiscovery: + + KV_STORE_SD_ROOT_CERT = 'service_discovery/root/cert' + KV_STORE_SD_ROOT_KEY = 'service_discovery/root/key' + + def __init__(self, mgr: "CephadmOrchestrator") -> None: + self.mgr = mgr + self.ssl_certs = SSLCerts() + self.username: Optional[str] = None + self.password: Optional[str] = None + + def validate_password(self, realm: str, username: str, password: str) -> bool: + return (password == self.password and username == self.username) + + def configure_routes(self, server: Server, enable_auth: bool) -> None: + ROUTES = [ + Route('index', '/', server.index), + Route('sd-config', '/prometheus/sd-config', server.get_sd_config), + Route('rules', '/prometheus/rules', server.get_prometheus_rules), + ] + d = cherrypy.dispatch.RoutesDispatcher() + for route in ROUTES: + d.connect(**route._asdict()) + if enable_auth: + conf = { + '/': { + 'request.dispatch': d, + 'tools.auth_basic.on': True, + 'tools.auth_basic.realm': 'localhost', + 'tools.auth_basic.checkpassword': self.validate_password + } + } + else: + conf = {'/': {'request.dispatch': d}} + cherrypy.tree.mount(None, '/sd', config=conf) + + def enable_auth(self) -> None: + self.username = self.mgr.get_store('service_discovery/root/username') + self.password = self.mgr.get_store('service_discovery/root/password') + if not self.password or not self.username: + self.username = 'admin' # TODO(redo): what should be the default username + self.password = secrets.token_urlsafe(20) + self.mgr.set_store('service_discovery/root/password', self.password) + self.mgr.set_store('service_discovery/root/username', self.username) + + def configure_tls(self, server: Server) -> None: + old_cert = self.mgr.get_store(self.KV_STORE_SD_ROOT_CERT) + old_key = self.mgr.get_store(self.KV_STORE_SD_ROOT_KEY) + if old_key and old_cert: + self.ssl_certs.load_root_credentials(old_cert, old_key) + else: + self.ssl_certs.generate_root_cert(self.mgr.get_mgr_ip()) + self.mgr.set_store(self.KV_STORE_SD_ROOT_CERT, self.ssl_certs.get_root_cert()) + self.mgr.set_store(self.KV_STORE_SD_ROOT_KEY, self.ssl_certs.get_root_key()) + addr = self.mgr.get_mgr_ip() + host_fqdn = socket.getfqdn(addr) + server.ssl_certificate, server.ssl_private_key = self.ssl_certs.generate_cert_files( + host_fqdn, addr) + + def configure(self, port: int, addr: str, enable_security: bool) -> None: + # we create a new server to enforce TLS/SSL config refresh + self.root_server = Root(self.mgr, port, addr) + self.root_server.ssl_certificate = None + self.root_server.ssl_private_key = None + if enable_security: + self.enable_auth() + self.configure_tls(self.root_server) + self.configure_routes(self.root_server, enable_security) + + +class Root(Server): + + # collapse everything to '/' + def _cp_dispatch(self, vpath: str) -> 'Root': + cherrypy.request.path = '' + return self + + def stop(self) -> None: + # we must call unsubscribe before stopping the server, + # otherwise the port is not released and we will get + # an exception when trying to restart it + self.unsubscribe() + super().stop() + + def __init__(self, mgr: "CephadmOrchestrator", port: int = 0, host: str = ''): + self.mgr = mgr + super().__init__() + self.socket_port = port + self.socket_host = host + self.subscribe() + + @cherrypy.expose + def index(self) -> str: + return '''<!DOCTYPE html> +<html> +<head><title>Cephadm HTTP Endpoint</title></head> +<body> +<h2>Cephadm Service Discovery Endpoints</h2> +<p><a href='prometheus/sd-config?service=mgr-prometheus'>mgr/Prometheus http sd-config</a></p> +<p><a href='prometheus/sd-config?service=alertmanager'>Alertmanager http sd-config</a></p> +<p><a href='prometheus/sd-config?service=node-exporter'>Node exporter http sd-config</a></p> +<p><a href='prometheus/sd-config?service=haproxy'>HAProxy http sd-config</a></p> +<p><a href='prometheus/sd-config?service=ceph-exporter'>Ceph exporter http sd-config</a></p> +<p><a href='prometheus/rules'>Prometheus rules</a></p> +</body> +</html>''' + + @cherrypy.expose + @cherrypy.tools.json_out() + def get_sd_config(self, service: str) -> List[Dict[str, Collection[str]]]: + """Return <http_sd_config> compatible prometheus config for the specified service.""" + if service == 'mgr-prometheus': + return self.prometheus_sd_config() + elif service == 'alertmanager': + return self.alertmgr_sd_config() + elif service == 'node-exporter': + return self.node_exporter_sd_config() + elif service == 'haproxy': + return self.haproxy_sd_config() + elif service == 'ceph-exporter': + return self.ceph_exporter_sd_config() + else: + return [] + + def prometheus_sd_config(self) -> List[Dict[str, Collection[str]]]: + """Return <http_sd_config> compatible prometheus config for prometheus service.""" + servers = self.mgr.list_servers() + targets = [] + for server in servers: + hostname = server.get('hostname', '') + for service in cast(List[ServiceInfoT], server.get('services', [])): + if service['type'] != 'mgr' or service['id'] != self.mgr.get_mgr_id(): + continue + port = self.mgr.get_module_option_ex( + 'prometheus', 'server_port', PrometheusService.DEFAULT_MGR_PROMETHEUS_PORT) + targets.append(f'{hostname}:{port}') + return [{"targets": targets, "labels": {}}] + + def alertmgr_sd_config(self) -> List[Dict[str, Collection[str]]]: + """Return <http_sd_config> compatible prometheus config for mgr alertmanager service.""" + srv_entries = [] + for dd in self.mgr.cache.get_daemons_by_service('alertmanager'): + assert dd.hostname is not None + addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname) + port = dd.ports[0] if dd.ports else AlertmanagerService.DEFAULT_SERVICE_PORT + srv_entries.append('{}'.format(build_url(host=addr, port=port).lstrip('/'))) + return [{"targets": srv_entries, "labels": {}}] + + def node_exporter_sd_config(self) -> List[Dict[str, Collection[str]]]: + """Return <http_sd_config> compatible prometheus config for node-exporter service.""" + srv_entries = [] + for dd in self.mgr.cache.get_daemons_by_service('node-exporter'): + assert dd.hostname is not None + addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname) + port = dd.ports[0] if dd.ports else NodeExporterService.DEFAULT_SERVICE_PORT + srv_entries.append({ + 'targets': [build_url(host=addr, port=port).lstrip('/')], + 'labels': {'instance': dd.hostname} + }) + return srv_entries + + def haproxy_sd_config(self) -> List[Dict[str, Collection[str]]]: + """Return <http_sd_config> compatible prometheus config for haproxy service.""" + srv_entries = [] + for dd in self.mgr.cache.get_daemons_by_type('ingress'): + if dd.service_name() in self.mgr.spec_store: + spec = cast(IngressSpec, self.mgr.spec_store[dd.service_name()].spec) + assert dd.hostname is not None + if dd.daemon_type == 'haproxy': + addr = self.mgr.inventory.get_addr(dd.hostname) + srv_entries.append({ + 'targets': [f"{build_url(host=addr, port=spec.monitor_port).lstrip('/')}"], + 'labels': {'instance': dd.service_name()} + }) + return srv_entries + + def ceph_exporter_sd_config(self) -> List[Dict[str, Collection[str]]]: + """Return <http_sd_config> compatible prometheus config for ceph-exporter service.""" + srv_entries = [] + for dd in self.mgr.cache.get_daemons_by_service('ceph-exporter'): + assert dd.hostname is not None + addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname) + port = dd.ports[0] if dd.ports else CephExporterService.DEFAULT_SERVICE_PORT + srv_entries.append({ + 'targets': [build_url(host=addr, port=port).lstrip('/')], + 'labels': {'instance': dd.hostname} + }) + return srv_entries + + @cherrypy.expose(alias='prometheus/rules') + def get_prometheus_rules(self) -> str: + """Return currently configured prometheus rules as Yaml.""" + cherrypy.response.headers['Content-Type'] = 'text/plain' + with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f: + return f.read() diff --git a/src/pybind/mgr/cephadm/services/__init__.py b/src/pybind/mgr/cephadm/services/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/pybind/mgr/cephadm/services/__init__.py diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py new file mode 100644 index 000000000..7d7a04dad --- /dev/null +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -0,0 +1,1254 @@ +import errno +import json +import logging +import re +import socket +import time +from abc import ABCMeta, abstractmethod +from typing import TYPE_CHECKING, List, Callable, TypeVar, \ + Optional, Dict, Any, Tuple, NewType, cast + +from mgr_module import HandleCommandResult, MonCommandFailed + +from ceph.deployment.service_spec import ( + ArgumentList, + CephExporterSpec, + GeneralArgList, + MONSpec, + RGWSpec, + ServiceSpec, +) +from ceph.deployment.utils import is_ipv6, unwrap_ipv6 +from mgr_util import build_url, merge_dicts +from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus +from orchestrator._interface import daemon_type_to_service +from cephadm import utils + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + +logger = logging.getLogger(__name__) + +ServiceSpecs = TypeVar('ServiceSpecs', bound=ServiceSpec) +AuthEntity = NewType('AuthEntity', str) + + +def get_auth_entity(daemon_type: str, daemon_id: str, host: str = "") -> AuthEntity: + """ + Map the daemon id to a cephx keyring entity name + """ + # despite this mapping entity names to daemons, self.TYPE within + # the CephService class refers to service types, not daemon types + if daemon_type in ['rgw', 'rbd-mirror', 'cephfs-mirror', 'nfs', "iscsi", 'nvmeof', 'ingress', 'ceph-exporter']: + return AuthEntity(f'client.{daemon_type}.{daemon_id}') + elif daemon_type in ['crash', 'agent']: + if host == "": + raise OrchestratorError( + f'Host not provided to generate <{daemon_type}> auth entity name') + return AuthEntity(f'client.{daemon_type}.{host}') + elif daemon_type == 'mon': + return AuthEntity('mon.') + elif daemon_type in ['mgr', 'osd', 'mds']: + return AuthEntity(f'{daemon_type}.{daemon_id}') + else: + raise OrchestratorError(f"unknown daemon type {daemon_type}") + + +class CephadmDaemonDeploySpec: + # typing.NamedTuple + Generic is broken in py36 + def __init__(self, host: str, daemon_id: str, + service_name: str, + network: Optional[str] = None, + keyring: Optional[str] = None, + extra_args: Optional[List[str]] = None, + ceph_conf: str = '', + extra_files: Optional[Dict[str, Any]] = None, + daemon_type: Optional[str] = None, + ip: Optional[str] = None, + ports: Optional[List[int]] = None, + port_ips: Optional[Dict[str, str]] = None, + rank: Optional[int] = None, + rank_generation: Optional[int] = None, + extra_container_args: Optional[ArgumentList] = None, + extra_entrypoint_args: Optional[ArgumentList] = None, + ): + """ + A data struction to encapsulate `cephadm deploy ... + """ + self.host: str = host + self.daemon_id = daemon_id + self.service_name = service_name + daemon_type = daemon_type or (service_name.split('.')[0]) + assert daemon_type is not None + self.daemon_type: str = daemon_type + + # mons + self.network = network + + # for run_cephadm. + self.keyring: Optional[str] = keyring + + # FIXME: finish removing this + # For run_cephadm. Would be great to have more expressive names. + # self.extra_args: List[str] = extra_args or [] + assert not extra_args + + self.ceph_conf = ceph_conf + self.extra_files = extra_files or {} + + # TCP ports used by the daemon + self.ports: List[int] = ports or [] + # mapping of ports to IP addresses for ports + # we know we will only bind to on a specific IP. + # Useful for allowing multiple daemons to bind + # to the same port on different IPs on the same node + self.port_ips: Dict[str, str] = port_ips or {} + self.ip: Optional[str] = ip + + # values to be populated during generate_config calls + # and then used in _run_cephadm + self.final_config: Dict[str, Any] = {} + self.deps: List[str] = [] + + self.rank: Optional[int] = rank + self.rank_generation: Optional[int] = rank_generation + + self.extra_container_args = extra_container_args + self.extra_entrypoint_args = extra_entrypoint_args + + def name(self) -> str: + return '%s.%s' % (self.daemon_type, self.daemon_id) + + def entity_name(self) -> str: + return get_auth_entity(self.daemon_type, self.daemon_id, host=self.host) + + def config_get_files(self) -> Dict[str, Any]: + files = self.extra_files + if self.ceph_conf: + files['config'] = self.ceph_conf + + return files + + @staticmethod + def from_daemon_description(dd: DaemonDescription) -> 'CephadmDaemonDeploySpec': + assert dd.hostname + assert dd.daemon_id + assert dd.daemon_type + return CephadmDaemonDeploySpec( + host=dd.hostname, + daemon_id=dd.daemon_id, + daemon_type=dd.daemon_type, + service_name=dd.service_name(), + ip=dd.ip, + ports=dd.ports, + rank=dd.rank, + rank_generation=dd.rank_generation, + extra_container_args=dd.extra_container_args, + extra_entrypoint_args=dd.extra_entrypoint_args, + ) + + def to_daemon_description(self, status: DaemonDescriptionStatus, status_desc: str) -> DaemonDescription: + return DaemonDescription( + daemon_type=self.daemon_type, + daemon_id=self.daemon_id, + service_name=self.service_name, + hostname=self.host, + status=status, + status_desc=status_desc, + ip=self.ip, + ports=self.ports, + rank=self.rank, + rank_generation=self.rank_generation, + extra_container_args=cast(GeneralArgList, self.extra_container_args), + extra_entrypoint_args=cast(GeneralArgList, self.extra_entrypoint_args), + ) + + @property + def extra_args(self) -> List[str]: + return [] + + +class CephadmService(metaclass=ABCMeta): + """ + Base class for service types. Often providing a create() and config() fn. + """ + + @property + @abstractmethod + def TYPE(self) -> str: + pass + + def __init__(self, mgr: "CephadmOrchestrator"): + self.mgr: "CephadmOrchestrator" = mgr + + def allow_colo(self) -> bool: + """ + Return True if multiple daemons of the same type can colocate on + the same host. + """ + return False + + def primary_daemon_type(self, spec: Optional[ServiceSpec] = None) -> str: + """ + This is the type of the primary (usually only) daemon to be deployed. + """ + return self.TYPE + + def per_host_daemon_type(self, spec: Optional[ServiceSpec] = None) -> Optional[str]: + """ + If defined, this type of daemon will be deployed once for each host + containing one or more daemons of the primary type. + """ + return None + + def ranked(self) -> bool: + """ + If True, we will assign a stable rank (0, 1, ...) and monotonically increasing + generation (0, 1, ...) to each daemon we create/deploy. + """ + return False + + def fence_old_ranks(self, + spec: ServiceSpec, + rank_map: Dict[int, Dict[int, Optional[str]]], + num_ranks: int) -> None: + assert False + + def make_daemon_spec( + self, + host: str, + daemon_id: str, + network: str, + spec: ServiceSpecs, + daemon_type: Optional[str] = None, + ports: Optional[List[int]] = None, + ip: Optional[str] = None, + rank: Optional[int] = None, + rank_generation: Optional[int] = None, + ) -> CephadmDaemonDeploySpec: + return CephadmDaemonDeploySpec( + host=host, + daemon_id=daemon_id, + service_name=spec.service_name(), + network=network, + daemon_type=daemon_type, + ports=ports, + ip=ip, + rank=rank, + rank_generation=rank_generation, + extra_container_args=spec.extra_container_args if hasattr( + spec, 'extra_container_args') else None, + extra_entrypoint_args=spec.extra_entrypoint_args if hasattr( + spec, 'extra_entrypoint_args') else None, + ) + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + raise NotImplementedError() + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + raise NotImplementedError() + + def config(self, spec: ServiceSpec) -> None: + """ + Configure the cluster for this service. Only called *once* per + service apply. Not for every daemon. + """ + pass + + def daemon_check_post(self, daemon_descrs: List[DaemonDescription]) -> None: + """The post actions needed to be done after daemons are checked""" + if self.mgr.config_dashboard: + if 'dashboard' in self.mgr.get('mgr_map')['modules']: + self.config_dashboard(daemon_descrs) + else: + logger.debug('Dashboard is not enabled. Skip configuration.') + + def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: + """Config dashboard settings.""" + raise NotImplementedError() + + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + # if this is called for a service type where it hasn't explicitly been + # defined, return empty Daemon Desc + return DaemonDescription() + + def get_keyring_with_caps(self, entity: AuthEntity, caps: List[str]) -> str: + ret, keyring, err = self.mgr.mon_command({ + 'prefix': 'auth get-or-create', + 'entity': entity, + 'caps': caps, + }) + if err: + ret, out, err = self.mgr.mon_command({ + 'prefix': 'auth caps', + 'entity': entity, + 'caps': caps, + }) + if err: + self.mgr.log.warning(f"Unable to update caps for {entity}") + + # get keyring anyway + ret, keyring, err = self.mgr.mon_command({ + 'prefix': 'auth get', + 'entity': entity, + }) + if err: + raise OrchestratorError(f"Unable to fetch keyring for {entity}: {err}") + + # strip down keyring + # - don't include caps (auth get includes them; get-or-create does not) + # - use pending key if present + key = None + for line in keyring.splitlines(): + if ' = ' not in line: + continue + line = line.strip() + (ls, rs) = line.split(' = ', 1) + if ls == 'key' and not key: + key = rs + if ls == 'pending key': + key = rs + keyring = f'[{entity}]\nkey = {key}\n' + return keyring + + def _inventory_get_fqdn(self, hostname: str) -> str: + """Get a host's FQDN with its hostname. + + If the FQDN can't be resolved, the address from the inventory will + be returned instead. + """ + addr = self.mgr.inventory.get_addr(hostname) + return socket.getfqdn(addr) + + def _set_service_url_on_dashboard(self, + service_name: str, + get_mon_cmd: str, + set_mon_cmd: str, + service_url: str) -> None: + """A helper to get and set service_url via Dashboard's MON command. + + If result of get_mon_cmd differs from service_url, set_mon_cmd will + be sent to set the service_url. + """ + def get_set_cmd_dicts(out: str) -> List[dict]: + cmd_dict = { + 'prefix': set_mon_cmd, + 'value': service_url + } + return [cmd_dict] if service_url != out else [] + + self._check_and_set_dashboard( + service_name=service_name, + get_cmd=get_mon_cmd, + get_set_cmd_dicts=get_set_cmd_dicts + ) + + def _check_and_set_dashboard(self, + service_name: str, + get_cmd: str, + get_set_cmd_dicts: Callable[[str], List[dict]]) -> None: + """A helper to set configs in the Dashboard. + + The method is useful for the pattern: + - Getting a config from Dashboard by using a Dashboard command. e.g. current iSCSI + gateways. + - Parse or deserialize previous output. e.g. Dashboard command returns a JSON string. + - Determine if the config need to be update. NOTE: This step is important because if a + Dashboard command modified Ceph config, cephadm's config_notify() is called. Which + kicks the serve() loop and the logic using this method is likely to be called again. + A config should be updated only when needed. + - Update a config in Dashboard by using a Dashboard command. + + :param service_name: the service name to be used for logging + :type service_name: str + :param get_cmd: Dashboard command prefix to get config. e.g. dashboard get-grafana-api-url + :type get_cmd: str + :param get_set_cmd_dicts: function to create a list, and each item is a command dictionary. + e.g. + [ + { + 'prefix': 'dashboard iscsi-gateway-add', + 'service_url': 'http://admin:admin@aaa:5000', + 'name': 'aaa' + }, + { + 'prefix': 'dashboard iscsi-gateway-add', + 'service_url': 'http://admin:admin@bbb:5000', + 'name': 'bbb' + } + ] + The function should return empty list if no command need to be sent. + :type get_set_cmd_dicts: Callable[[str], List[dict]] + """ + + try: + _, out, _ = self.mgr.check_mon_command({ + 'prefix': get_cmd + }) + except MonCommandFailed as e: + logger.warning('Failed to get Dashboard config for %s: %s', service_name, e) + return + cmd_dicts = get_set_cmd_dicts(out.strip()) + for cmd_dict in list(cmd_dicts): + try: + inbuf = cmd_dict.pop('inbuf', None) + _, out, _ = self.mgr.check_mon_command(cmd_dict, inbuf) + except MonCommandFailed as e: + logger.warning('Failed to set Dashboard config for %s: %s', service_name, e) + + def ok_to_stop_osd( + self, + osds: List[str], + known: Optional[List[str]] = None, # output argument + force: bool = False) -> HandleCommandResult: + r = HandleCommandResult(*self.mgr.mon_command({ + 'prefix': "osd ok-to-stop", + 'ids': osds, + 'max': 16, + })) + j = None + try: + j = json.loads(r.stdout) + except json.decoder.JSONDecodeError: + self.mgr.log.warning("osd ok-to-stop didn't return structured result") + raise + if r.retval: + return r + if known is not None and j and j.get('ok_to_stop'): + self.mgr.log.debug(f"got {j}") + known.extend([f'osd.{x}' for x in j.get('osds', [])]) + return HandleCommandResult( + 0, + f'{",".join(["osd.%s" % o for o in osds])} {"is" if len(osds) == 1 else "are"} safe to restart', + '' + ) + + def ok_to_stop( + self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None # output argument + ) -> HandleCommandResult: + names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids] + out = f'It appears safe to stop {",".join(names)}' + err = f'It is NOT safe to stop {",".join(names)} at this time' + + if self.TYPE not in ['mon', 'osd', 'mds']: + logger.debug(out) + return HandleCommandResult(0, out) + + if self.TYPE == 'osd': + return self.ok_to_stop_osd(daemon_ids, known, force) + + r = HandleCommandResult(*self.mgr.mon_command({ + 'prefix': f'{self.TYPE} ok-to-stop', + 'ids': daemon_ids, + })) + + if r.retval: + err = f'{err}: {r.stderr}' if r.stderr else err + logger.debug(err) + return HandleCommandResult(r.retval, r.stdout, err) + + out = f'{out}: {r.stdout}' if r.stdout else out + logger.debug(out) + return HandleCommandResult(r.retval, out, r.stderr) + + def _enough_daemons_to_stop(self, daemon_type: str, daemon_ids: List[str], service: str, low_limit: int, alert: bool = False) -> Tuple[bool, str]: + # Provides a warning about if it possible or not to stop <n> daemons in a service + names = [f'{daemon_type}.{d_id}' for d_id in daemon_ids] + number_of_running_daemons = len( + [daemon + for daemon in self.mgr.cache.get_daemons_by_type(daemon_type) + if daemon.status == DaemonDescriptionStatus.running]) + if (number_of_running_daemons - len(daemon_ids)) >= low_limit: + return False, f'It is presumed safe to stop {names}' + + num_daemons_left = number_of_running_daemons - len(daemon_ids) + + def plural(count: int) -> str: + return 'daemon' if count == 1 else 'daemons' + + left_count = "no" if num_daemons_left == 0 else num_daemons_left + + if alert: + out = (f'ALERT: Cannot stop {names} in {service} service. ' + f'Not enough remaining {service} daemons. ' + f'Please deploy at least {low_limit + 1} {service} daemons before stopping {names}. ') + else: + out = (f'WARNING: Stopping {len(daemon_ids)} out of {number_of_running_daemons} daemons in {service} service. ' + f'Service will not be operational with {left_count} {plural(num_daemons_left)} left. ' + f'At least {low_limit} {plural(low_limit)} must be running to guarantee service. ') + return True, out + + def pre_remove(self, daemon: DaemonDescription) -> None: + """ + Called before the daemon is removed. + """ + assert daemon.daemon_type is not None + assert self.TYPE == daemon_type_to_service(daemon.daemon_type) + logger.debug(f'Pre remove daemon {self.TYPE}.{daemon.daemon_id}') + + def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: + """ + Called after the daemon is removed. + """ + assert daemon.daemon_type is not None + assert self.TYPE == daemon_type_to_service(daemon.daemon_type) + logger.debug(f'Post remove daemon {self.TYPE}.{daemon.daemon_id}') + + def purge(self, service_name: str) -> None: + """Called to carry out any purge tasks following service removal""" + logger.debug(f'Purge called for {self.TYPE} - no action taken') + + +class CephService(CephadmService): + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + # Ceph.daemons (mon, mgr, mds, osd, etc) + cephadm_config = self.get_config_and_keyring( + daemon_spec.daemon_type, + daemon_spec.daemon_id, + host=daemon_spec.host, + keyring=daemon_spec.keyring, + extra_ceph_config=daemon_spec.ceph_conf) + + if daemon_spec.config_get_files(): + cephadm_config.update({'files': daemon_spec.config_get_files()}) + + return cephadm_config, [] + + def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: + super().post_remove(daemon, is_failed_deploy=is_failed_deploy) + self.remove_keyring(daemon) + + def get_auth_entity(self, daemon_id: str, host: str = "") -> AuthEntity: + return get_auth_entity(self.TYPE, daemon_id, host=host) + + def get_config_and_keyring(self, + daemon_type: str, + daemon_id: str, + host: str, + keyring: Optional[str] = None, + extra_ceph_config: Optional[str] = None + ) -> Dict[str, Any]: + # keyring + if not keyring: + entity: AuthEntity = self.get_auth_entity(daemon_id, host=host) + ret, keyring, err = self.mgr.check_mon_command({ + 'prefix': 'auth get', + 'entity': entity, + }) + config = self.mgr.get_minimal_ceph_conf() + + if extra_ceph_config: + config += extra_ceph_config + + return { + 'config': config, + 'keyring': keyring, + } + + def remove_keyring(self, daemon: DaemonDescription) -> None: + assert daemon.daemon_id is not None + assert daemon.hostname is not None + daemon_id: str = daemon.daemon_id + host: str = daemon.hostname + + assert daemon.daemon_type != 'mon' + + entity = self.get_auth_entity(daemon_id, host=host) + + logger.info(f'Removing key for {entity}') + ret, out, err = self.mgr.mon_command({ + 'prefix': 'auth rm', + 'entity': entity, + }) + + +class MonService(CephService): + TYPE = 'mon' + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + """ + Create a new monitor on the given host. + """ + assert self.TYPE == daemon_spec.daemon_type + name, _, network = daemon_spec.daemon_id, daemon_spec.host, daemon_spec.network + + # get mon. key + ret, keyring, err = self.mgr.check_mon_command({ + 'prefix': 'auth get', + 'entity': daemon_spec.entity_name(), + }) + + extra_config = '[mon.%s]\n' % name + if network: + # infer whether this is a CIDR network, addrvec, or plain IP + if '/' in network: + extra_config += 'public network = %s\n' % network + elif network.startswith('[v') and network.endswith(']'): + extra_config += 'public addrv = %s\n' % network + elif is_ipv6(network): + extra_config += 'public addr = %s\n' % unwrap_ipv6(network) + elif ':' not in network: + extra_config += 'public addr = %s\n' % network + else: + raise OrchestratorError( + 'Must specify a CIDR network, ceph addrvec, or plain IP: \'%s\'' % network) + else: + # try to get the public_network from the config + ret, network, err = self.mgr.check_mon_command({ + 'prefix': 'config get', + 'who': 'mon', + 'key': 'public_network', + }) + network = network.strip() if network else network + if not network: + raise OrchestratorError( + 'Must set public_network config option or specify a CIDR network, ceph addrvec, or plain IP') + if '/' not in network: + raise OrchestratorError( + 'public_network is set but does not look like a CIDR network: \'%s\'' % network) + extra_config += 'public network = %s\n' % network + + daemon_spec.ceph_conf = extra_config + daemon_spec.keyring = keyring + + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + + return daemon_spec + + def config(self, spec: ServiceSpec) -> None: + assert self.TYPE == spec.service_type + self.set_crush_locations(self.mgr.cache.get_daemons_by_type('mon'), spec) + + def _get_quorum_status(self) -> Dict[Any, Any]: + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'quorum_status', + }) + try: + j = json.loads(out) + except Exception as e: + raise OrchestratorError(f'failed to parse mon quorum status: {e}') + return j + + def _check_safe_to_destroy(self, mon_id: str) -> None: + quorum_status = self._get_quorum_status() + mons = [m['name'] for m in quorum_status['monmap']['mons']] + if mon_id not in mons: + logger.info('Safe to remove mon.%s: not in monmap (%s)' % ( + mon_id, mons)) + return + new_mons = [m for m in mons if m != mon_id] + new_quorum = [m for m in quorum_status['quorum_names'] if m != mon_id] + if len(new_quorum) > len(new_mons) / 2: + logger.info('Safe to remove mon.%s: new quorum should be %s (from %s)' % + (mon_id, new_quorum, new_mons)) + return + raise OrchestratorError( + 'Removing %s would break mon quorum (new quorum %s, new mons %s)' % (mon_id, new_quorum, new_mons)) + + def pre_remove(self, daemon: DaemonDescription) -> None: + super().pre_remove(daemon) + + assert daemon.daemon_id is not None + daemon_id: str = daemon.daemon_id + self._check_safe_to_destroy(daemon_id) + + # remove mon from quorum before we destroy the daemon + logger.info('Removing monitor %s from monmap...' % daemon_id) + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'mon rm', + 'name': daemon_id, + }) + + def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: + # Do not remove the mon keyring. + # super().post_remove(daemon) + pass + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + daemon_spec.final_config, daemon_spec.deps = super().generate_config(daemon_spec) + + # realistically, we expect there to always be a mon spec + # in a real deployment, but the way teuthology deploys some daemons + # it's possible there might not be. For that reason we need to + # verify the service is present in the spec store. + if daemon_spec.service_name in self.mgr.spec_store: + mon_spec = cast(MONSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + if mon_spec.crush_locations: + if daemon_spec.host in mon_spec.crush_locations: + # the --crush-location flag only supports a single bucket=loc pair so + # others will have to be handled later. The idea is to set the flag + # for the first bucket=loc pair in the list in order to facilitate + # replacing a tiebreaker mon (https://docs.ceph.com/en/quincy/rados/operations/stretch-mode/#other-commands) + c_loc = mon_spec.crush_locations[daemon_spec.host][0] + daemon_spec.final_config['crush_location'] = c_loc + + return daemon_spec.final_config, daemon_spec.deps + + def set_crush_locations(self, daemon_descrs: List[DaemonDescription], spec: ServiceSpec) -> None: + logger.debug('Setting mon crush locations from spec') + if not daemon_descrs: + return + assert self.TYPE == spec.service_type + mon_spec = cast(MONSpec, spec) + + if not mon_spec.crush_locations: + return + + quorum_status = self._get_quorum_status() + mons_in_monmap = [m['name'] for m in quorum_status['monmap']['mons']] + for dd in daemon_descrs: + assert dd.daemon_id is not None + assert dd.hostname is not None + if dd.hostname not in mon_spec.crush_locations: + continue + if dd.daemon_id not in mons_in_monmap: + continue + # expected format for crush_locations from the quorum status is + # {bucket1=loc1,bucket2=loc2} etc. for the number of bucket=loc pairs + try: + current_crush_locs = [m['crush_location'] for m in quorum_status['monmap']['mons'] if m['name'] == dd.daemon_id][0] + except (KeyError, IndexError) as e: + logger.warning(f'Failed setting crush location for mon {dd.daemon_id}: {e}\n' + 'Mon may not have a monmap entry yet. Try re-applying mon spec once mon is confirmed up.') + desired_crush_locs = '{' + ','.join(mon_spec.crush_locations[dd.hostname]) + '}' + logger.debug(f'Found spec defined crush locations for mon on {dd.hostname}: {desired_crush_locs}') + logger.debug(f'Current crush locations for mon on {dd.hostname}: {current_crush_locs}') + if current_crush_locs != desired_crush_locs: + logger.info(f'Setting crush location for mon {dd.daemon_id} to {desired_crush_locs}') + try: + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'mon set_location', + 'name': dd.daemon_id, + 'args': mon_spec.crush_locations[dd.hostname] + }) + except Exception as e: + logger.error(f'Failed setting crush location for mon {dd.daemon_id}: {e}') + + +class MgrService(CephService): + TYPE = 'mgr' + + def allow_colo(self) -> bool: + if self.mgr.get_ceph_option('mgr_standby_modules'): + # traditional mgr mode: standby daemons' modules listen on + # ports and redirect to the primary. we must not schedule + # multiple mgrs on the same host or else ports will + # conflict. + return False + else: + # standby daemons do nothing, and therefore port conflicts + # are not a concern. + return True + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + """ + Create a new manager instance on a host. + """ + assert self.TYPE == daemon_spec.daemon_type + mgr_id, _ = daemon_spec.daemon_id, daemon_spec.host + + # get mgr. key + keyring = self.get_keyring_with_caps(self.get_auth_entity(mgr_id), + ['mon', 'profile mgr', + 'osd', 'allow *', + 'mds', 'allow *']) + + # Retrieve ports used by manager modules + # In the case of the dashboard port and with several manager daemons + # running in different hosts, it exists the possibility that the + # user has decided to use different dashboard ports in each server + # If this is the case then the dashboard port opened will be only the used + # as default. + ports = [] + ret, mgr_services, err = self.mgr.check_mon_command({ + 'prefix': 'mgr services', + }) + if mgr_services: + mgr_endpoints = json.loads(mgr_services) + for end_point in mgr_endpoints.values(): + port = re.search(r'\:\d+\/', end_point) + if port: + ports.append(int(port[0][1:-1])) + + if ports: + daemon_spec.ports = ports + + daemon_spec.ports.append(self.mgr.service_discovery_port) + daemon_spec.keyring = keyring + + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + + return daemon_spec + + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + for daemon in daemon_descrs: + assert daemon.daemon_type is not None + assert daemon.daemon_id is not None + if self.mgr.daemon_is_self(daemon.daemon_type, daemon.daemon_id): + return daemon + # if no active mgr found, return empty Daemon Desc + return DaemonDescription() + + def fail_over(self) -> None: + # this has been seen to sometimes transiently fail even when there are multiple + # mgr daemons. As long as there are multiple known mgr daemons, we should retry. + class NoStandbyError(OrchestratorError): + pass + no_standby_exc = NoStandbyError('Need standby mgr daemon', event_kind_subject=( + 'daemon', 'mgr' + self.mgr.get_mgr_id())) + for sleep_secs in [2, 8, 15]: + try: + if not self.mgr_map_has_standby(): + raise no_standby_exc + self.mgr.events.for_daemon('mgr' + self.mgr.get_mgr_id(), + 'INFO', 'Failing over to other MGR') + logger.info('Failing over to other MGR') + + # fail over + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'mgr fail', + 'who': self.mgr.get_mgr_id(), + }) + return + except NoStandbyError: + logger.info( + f'Failed to find standby mgr for failover. Retrying in {sleep_secs} seconds') + time.sleep(sleep_secs) + raise no_standby_exc + + def mgr_map_has_standby(self) -> bool: + """ + This is a bit safer than asking our inventory. If the mgr joined the mgr map, + we know it joined the cluster + """ + mgr_map = self.mgr.get('mgr_map') + num = len(mgr_map.get('standbys')) + return bool(num) + + def ok_to_stop( + self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None # output argument + ) -> HandleCommandResult: + # ok to stop if there is more than 1 mgr and not trying to stop the active mgr + + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Mgr', 1, True) + if warn: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + mgr_daemons = self.mgr.cache.get_daemons_by_type(self.TYPE) + active = self.get_active_daemon(mgr_daemons).daemon_id + if active in daemon_ids: + warn_message = 'ALERT: Cannot stop active Mgr daemon, Please switch active Mgrs with \'ceph mgr fail %s\'' % active + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + return HandleCommandResult(0, warn_message, '') + + +class MdsService(CephService): + TYPE = 'mds' + + def allow_colo(self) -> bool: + return True + + def config(self, spec: ServiceSpec) -> None: + assert self.TYPE == spec.service_type + assert spec.service_id + + # ensure mds_join_fs is set for these daemons + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config set', + 'who': 'mds.' + spec.service_id, + 'name': 'mds_join_fs', + 'value': spec.service_id, + }) + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + mds_id, _ = daemon_spec.daemon_id, daemon_spec.host + + # get mds. key + keyring = self.get_keyring_with_caps(self.get_auth_entity(mds_id), + ['mon', 'profile mds', + 'osd', 'allow rw tag cephfs *=*', + 'mds', 'allow']) + daemon_spec.keyring = keyring + + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + + return daemon_spec + + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + active_mds_strs = list() + for fs in self.mgr.get('fs_map')['filesystems']: + mds_map = fs['mdsmap'] + if mds_map is not None: + for mds_id, mds_status in mds_map['info'].items(): + if mds_status['state'] == 'up:active': + active_mds_strs.append(mds_status['name']) + if len(active_mds_strs) != 0: + for daemon in daemon_descrs: + if daemon.daemon_id in active_mds_strs: + return daemon + # if no mds found, return empty Daemon Desc + return DaemonDescription() + + def purge(self, service_name: str) -> None: + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': service_name, + 'name': 'mds_join_fs', + }) + + +class RgwService(CephService): + TYPE = 'rgw' + + def allow_colo(self) -> bool: + return True + + def config(self, spec: RGWSpec) -> None: # type: ignore + assert self.TYPE == spec.service_type + + # set rgw_realm rgw_zonegroup and rgw_zone, if present + if spec.rgw_realm: + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config set', + 'who': f"{utils.name_to_config_section('rgw')}.{spec.service_id}", + 'name': 'rgw_realm', + 'value': spec.rgw_realm, + }) + if spec.rgw_zonegroup: + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config set', + 'who': f"{utils.name_to_config_section('rgw')}.{spec.service_id}", + 'name': 'rgw_zonegroup', + 'value': spec.rgw_zonegroup, + }) + if spec.rgw_zone: + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config set', + 'who': f"{utils.name_to_config_section('rgw')}.{spec.service_id}", + 'name': 'rgw_zone', + 'value': spec.rgw_zone, + }) + + if spec.rgw_frontend_ssl_certificate: + if isinstance(spec.rgw_frontend_ssl_certificate, list): + cert_data = '\n'.join(spec.rgw_frontend_ssl_certificate) + elif isinstance(spec.rgw_frontend_ssl_certificate, str): + cert_data = spec.rgw_frontend_ssl_certificate + else: + raise OrchestratorError( + 'Invalid rgw_frontend_ssl_certificate: %s' + % spec.rgw_frontend_ssl_certificate) + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config-key set', + 'key': f'rgw/cert/{spec.service_name()}', + 'val': cert_data, + }) + + # TODO: fail, if we don't have a spec + logger.info('Saving service %s spec with placement %s' % ( + spec.service_name(), spec.placement.pretty_str())) + self.mgr.spec_store.save(spec) + self.mgr.trigger_connect_dashboard_rgw() + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + rgw_id, _ = daemon_spec.daemon_id, daemon_spec.host + spec = cast(RGWSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + + keyring = self.get_keyring(rgw_id) + + if daemon_spec.ports: + port = daemon_spec.ports[0] + else: + # this is a redeploy of older instance that doesn't have an explicitly + # assigned port, in which case we can assume there is only 1 per host + # and it matches the spec. + port = spec.get_port() + + # configure frontend + args = [] + ftype = spec.rgw_frontend_type or "beast" + if ftype == 'beast': + if spec.ssl: + if daemon_spec.ip: + args.append( + f"ssl_endpoint={build_url(host=daemon_spec.ip, port=port).lstrip('/')}") + else: + args.append(f"ssl_port={port}") + args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}") + else: + if daemon_spec.ip: + args.append(f"endpoint={build_url(host=daemon_spec.ip, port=port).lstrip('/')}") + else: + args.append(f"port={port}") + elif ftype == 'civetweb': + if spec.ssl: + if daemon_spec.ip: + # note the 's' suffix on port + args.append(f"port={build_url(host=daemon_spec.ip, port=port).lstrip('/')}s") + else: + args.append(f"port={port}s") # note the 's' suffix on port + args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}") + else: + if daemon_spec.ip: + args.append(f"port={build_url(host=daemon_spec.ip, port=port).lstrip('/')}") + else: + args.append(f"port={port}") + else: + raise OrchestratorError(f'Invalid rgw_frontend_type parameter: {ftype}. Valid values are: beast, civetweb.') + + if spec.rgw_frontend_extra_args is not None: + args.extend(spec.rgw_frontend_extra_args) + + frontend = f'{ftype} {" ".join(args)}' + + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config set', + 'who': utils.name_to_config_section(daemon_spec.name()), + 'name': 'rgw_frontends', + 'value': frontend + }) + + daemon_spec.keyring = keyring + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + + return daemon_spec + + def get_keyring(self, rgw_id: str) -> str: + keyring = self.get_keyring_with_caps(self.get_auth_entity(rgw_id), + ['mon', 'allow *', + 'mgr', 'allow rw', + 'osd', 'allow rwx tag rgw *=*']) + return keyring + + def purge(self, service_name: str) -> None: + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': utils.name_to_config_section(service_name), + 'name': 'rgw_realm', + }) + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': utils.name_to_config_section(service_name), + 'name': 'rgw_zone', + }) + self.mgr.check_mon_command({ + 'prefix': 'config-key rm', + 'key': f'rgw/cert/{service_name}', + }) + self.mgr.trigger_connect_dashboard_rgw() + + def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: + super().post_remove(daemon, is_failed_deploy=is_failed_deploy) + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': utils.name_to_config_section(daemon.name()), + 'name': 'rgw_frontends', + }) + + def ok_to_stop( + self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None # output argument + ) -> HandleCommandResult: + # if load balancer (ingress) is present block if only 1 daemon up otherwise ok + # if no load balancer, warn if > 1 daemon, block if only 1 daemon + def ingress_present() -> bool: + running_ingress_daemons = [ + daemon for daemon in self.mgr.cache.get_daemons_by_type('ingress') if daemon.status == 1] + running_haproxy_daemons = [ + daemon for daemon in running_ingress_daemons if daemon.daemon_type == 'haproxy'] + running_keepalived_daemons = [ + daemon for daemon in running_ingress_daemons if daemon.daemon_type == 'keepalived'] + # check that there is at least one haproxy and keepalived daemon running + if running_haproxy_daemons and running_keepalived_daemons: + return True + return False + + # if only 1 rgw, alert user (this is not passable with --force) + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'RGW', 1, True) + if warn: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + # if reached here, there is > 1 rgw daemon. + # Say okay if load balancer present or force flag set + if ingress_present() or force: + return HandleCommandResult(0, warn_message, '') + + # if reached here, > 1 RGW daemon, no load balancer and no force flag. + # Provide warning + warn_message = "WARNING: Removing RGW daemons can cause clients to lose connectivity. " + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: + self.mgr.trigger_connect_dashboard_rgw() + + +class RbdMirrorService(CephService): + TYPE = 'rbd-mirror' + + def allow_colo(self) -> bool: + return True + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_id, _ = daemon_spec.daemon_id, daemon_spec.host + + keyring = self.get_keyring_with_caps(self.get_auth_entity(daemon_id), + ['mon', 'profile rbd-mirror', + 'osd', 'profile rbd']) + + daemon_spec.keyring = keyring + + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + + return daemon_spec + + def ok_to_stop( + self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None # output argument + ) -> HandleCommandResult: + # if only 1 rbd-mirror, alert user (this is not passable with --force) + warn, warn_message = self._enough_daemons_to_stop( + self.TYPE, daemon_ids, 'Rbdmirror', 1, True) + if warn: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + return HandleCommandResult(0, warn_message, '') + + +class CrashService(CephService): + TYPE = 'crash' + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_id, host = daemon_spec.daemon_id, daemon_spec.host + + keyring = self.get_keyring_with_caps(self.get_auth_entity(daemon_id, host=host), + ['mon', 'profile crash', + 'mgr', 'profile crash']) + + daemon_spec.keyring = keyring + + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + + return daemon_spec + + +class CephExporterService(CephService): + TYPE = 'ceph-exporter' + DEFAULT_SERVICE_PORT = 9926 + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + spec = cast(CephExporterSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + keyring = self.get_keyring_with_caps(self.get_auth_entity(daemon_spec.daemon_id), + ['mon', 'profile ceph-exporter', + 'mon', 'allow r', + 'mgr', 'allow r', + 'osd', 'allow r']) + exporter_config = {} + if spec.sock_dir: + exporter_config.update({'sock-dir': spec.sock_dir}) + if spec.port: + exporter_config.update({'port': f'{spec.port}'}) + if spec.prio_limit is not None: + exporter_config.update({'prio-limit': f'{spec.prio_limit}'}) + if spec.stats_period: + exporter_config.update({'stats-period': f'{spec.stats_period}'}) + + daemon_spec.keyring = keyring + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + daemon_spec.final_config = merge_dicts(daemon_spec.final_config, exporter_config) + return daemon_spec + + +class CephfsMirrorService(CephService): + TYPE = 'cephfs-mirror' + + def config(self, spec: ServiceSpec) -> None: + # make sure mirroring module is enabled + mgr_map = self.mgr.get('mgr_map') + mod_name = 'mirroring' + if mod_name not in mgr_map.get('services', {}): + self.mgr.check_mon_command({ + 'prefix': 'mgr module enable', + 'module': mod_name + }) + # we shouldn't get here (mon will tell the mgr to respawn), but no + # harm done if we do. + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + + ret, keyring, err = self.mgr.check_mon_command({ + 'prefix': 'auth get-or-create', + 'entity': daemon_spec.entity_name(), + 'caps': ['mon', 'profile cephfs-mirror', + 'mds', 'allow r', + 'osd', 'allow rw tag cephfs metadata=*, allow r tag cephfs data=*', + 'mgr', 'allow r'], + }) + + daemon_spec.keyring = keyring + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + +class CephadmAgent(CephService): + TYPE = 'agent' + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_id, host = daemon_spec.daemon_id, daemon_spec.host + + if not self.mgr.http_server.agent: + raise OrchestratorError('Cannot deploy agent before creating cephadm endpoint') + + keyring = self.get_keyring_with_caps(self.get_auth_entity(daemon_id, host=host), []) + daemon_spec.keyring = keyring + self.mgr.agent_cache.agent_keys[host] = keyring + + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + agent = self.mgr.http_server.agent + try: + assert agent + assert agent.ssl_certs.get_root_cert() + assert agent.server_port + except Exception: + raise OrchestratorError( + 'Cannot deploy agent daemons until cephadm endpoint has finished generating certs') + + cfg = {'target_ip': self.mgr.get_mgr_ip(), + 'target_port': agent.server_port, + 'refresh_period': self.mgr.agent_refresh_rate, + 'listener_port': self.mgr.agent_starting_port, + 'host': daemon_spec.host, + 'device_enhanced_scan': str(self.mgr.device_enhanced_scan)} + + listener_cert, listener_key = agent.ssl_certs.generate_cert(daemon_spec.host, self.mgr.inventory.get_addr(daemon_spec.host)) + config = { + 'agent.json': json.dumps(cfg), + 'keyring': daemon_spec.keyring, + 'root_cert.pem': agent.ssl_certs.get_root_cert(), + 'listener.crt': listener_cert, + 'listener.key': listener_key, + } + + return config, sorted([str(self.mgr.get_mgr_ip()), str(agent.server_port), + agent.ssl_certs.get_root_cert(), + str(self.mgr.get_module_option('device_enhanced_scan'))]) diff --git a/src/pybind/mgr/cephadm/services/container.py b/src/pybind/mgr/cephadm/services/container.py new file mode 100644 index 000000000..b9cdfad5e --- /dev/null +++ b/src/pybind/mgr/cephadm/services/container.py @@ -0,0 +1,29 @@ +import logging +from typing import List, Any, Tuple, Dict, cast + +from ceph.deployment.service_spec import CustomContainerSpec + +from .cephadmservice import CephadmService, CephadmDaemonDeploySpec + +logger = logging.getLogger(__name__) + + +class CustomContainerService(CephadmService): + TYPE = 'container' + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) \ + -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) \ + -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + deps: List[str] = [] + spec = cast(CustomContainerSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + config: Dict[str, Any] = spec.config_json() + logger.debug( + 'Generated configuration for \'%s\' service: config-json=%s, dependencies=%s' % + (self.TYPE, config, deps)) + return config, deps diff --git a/src/pybind/mgr/cephadm/services/ingress.py b/src/pybind/mgr/cephadm/services/ingress.py new file mode 100644 index 000000000..55be30454 --- /dev/null +++ b/src/pybind/mgr/cephadm/services/ingress.py @@ -0,0 +1,381 @@ +import ipaddress +import logging +import random +import string +from typing import List, Dict, Any, Tuple, cast, Optional + +from ceph.deployment.service_spec import ServiceSpec, IngressSpec +from mgr_util import build_url +from cephadm import utils +from orchestrator import OrchestratorError, DaemonDescription +from cephadm.services.cephadmservice import CephadmDaemonDeploySpec, CephService + +logger = logging.getLogger(__name__) + + +class IngressService(CephService): + TYPE = 'ingress' + MAX_KEEPALIVED_PASS_LEN = 8 + + def primary_daemon_type(self, spec: Optional[ServiceSpec] = None) -> str: + if spec: + ispec = cast(IngressSpec, spec) + # in keepalive only setups, we are only deploying keepalived, + # so that should be marked as the primary daemon type. Otherwise, + # we consider haproxy to be the primary. + if hasattr(spec, 'keepalive_only') and ispec.keepalive_only: + return 'keepalived' + return 'haproxy' + + def per_host_daemon_type(self, spec: Optional[ServiceSpec] = None) -> Optional[str]: + if spec: + ispec = cast(IngressSpec, spec) + # if we are using "keepalive_only" mode on this ingress service + # we are only deploying keepalived daemons, so there should + # only be a primary daemon type and the per host daemon type + # should be empty + if hasattr(spec, 'keepalive_only') and ispec.keepalive_only: + return None + return 'keepalived' + + def prepare_create( + self, + daemon_spec: CephadmDaemonDeploySpec, + ) -> CephadmDaemonDeploySpec: + if daemon_spec.daemon_type == 'haproxy': + return self.haproxy_prepare_create(daemon_spec) + if daemon_spec.daemon_type == 'keepalived': + return self.keepalived_prepare_create(daemon_spec) + assert False, "unexpected daemon type" + + def generate_config( + self, + daemon_spec: CephadmDaemonDeploySpec + ) -> Tuple[Dict[str, Any], List[str]]: + if daemon_spec.daemon_type == 'haproxy': + return self.haproxy_generate_config(daemon_spec) + else: + return self.keepalived_generate_config(daemon_spec) + assert False, "unexpected daemon type" + + def haproxy_prepare_create( + self, + daemon_spec: CephadmDaemonDeploySpec, + ) -> CephadmDaemonDeploySpec: + assert daemon_spec.daemon_type == 'haproxy' + + daemon_id = daemon_spec.daemon_id + host = daemon_spec.host + spec = cast(IngressSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + + logger.debug('prepare_create haproxy.%s on host %s with spec %s' % ( + daemon_id, host, spec)) + + daemon_spec.final_config, daemon_spec.deps = self.haproxy_generate_config(daemon_spec) + + return daemon_spec + + def haproxy_generate_config( + self, + daemon_spec: CephadmDaemonDeploySpec, + ) -> Tuple[Dict[str, Any], List[str]]: + spec = cast(IngressSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + assert spec.backend_service + if spec.backend_service not in self.mgr.spec_store: + raise RuntimeError( + f'{spec.service_name()} backend service {spec.backend_service} does not exist') + backend_spec = self.mgr.spec_store[spec.backend_service].spec + daemons = self.mgr.cache.get_daemons_by_service(spec.backend_service) + deps = [d.name() for d in daemons] + + # generate password? + pw_key = f'{spec.service_name()}/monitor_password' + password = self.mgr.get_store(pw_key) + if password is None: + if not spec.monitor_password: + password = ''.join(random.choice(string.ascii_lowercase) + for _ in range(self.MAX_KEEPALIVED_PASS_LEN)) + self.mgr.set_store(pw_key, password) + else: + if spec.monitor_password: + self.mgr.set_store(pw_key, None) + if spec.monitor_password: + password = spec.monitor_password + + if backend_spec.service_type == 'nfs': + mode = 'tcp' + # we need to get the nfs daemon with the highest rank_generation for + # each rank we are currently deploying for the haproxy config + # for example if we had three (rank, rank_generation) pairs of + # (0, 0), (0, 1), (1, 0) we would want the nfs daemon corresponding + # to (0, 1) and (1, 0) because those are the two with the highest + # rank_generation for the existing ranks (0 and 1, with the highest + # rank_generation for rank 0 being 1 and highest rank_generation for + # rank 1 being 0) + ranked_daemons = [d for d in daemons if (d.rank is not None and d.rank_generation is not None)] + by_rank: Dict[int, DaemonDescription] = {} + for d in ranked_daemons: + # It doesn't seem like mypy can figure out that rank + # and rank_generation for both the daemon we're looping on + # and all those in by_rank cannot be None due to the filtering + # when creating the ranked_daemons list, which is why these + # seemingly unnecessary assertions are here. + assert d.rank is not None + if d.rank not in by_rank: + by_rank[d.rank] = d + else: + same_rank_nfs = by_rank[d.rank] + assert d.rank_generation is not None + assert same_rank_nfs.rank_generation is not None + # if we have multiple of the same rank. take the one + # with the highesr rank generation + if d.rank_generation > same_rank_nfs.rank_generation: + by_rank[d.rank] = d + servers = [] + + # try to establish how many ranks we *should* have + num_ranks = backend_spec.placement.count + if not num_ranks: + num_ranks = 1 + max(by_rank.keys()) + + for rank in range(num_ranks): + if rank in by_rank: + d = by_rank[rank] + assert d.ports + servers.append({ + 'name': f"{spec.backend_service}.{rank}", + 'ip': d.ip or utils.resolve_ip(self.mgr.inventory.get_addr(str(d.hostname))), + 'port': d.ports[0], + }) + else: + # offline/missing server; leave rank in place + servers.append({ + 'name': f"{spec.backend_service}.{rank}", + 'ip': '0.0.0.0', + 'port': 0, + }) + else: + mode = 'http' + servers = [ + { + 'name': d.name(), + 'ip': d.ip or utils.resolve_ip(self.mgr.inventory.get_addr(str(d.hostname))), + 'port': d.ports[0], + } for d in daemons if d.ports + ] + + host_ip = daemon_spec.ip or self.mgr.inventory.get_addr(daemon_spec.host) + server_opts = [] + if spec.enable_haproxy_protocol: + server_opts.append("send-proxy-v2") + logger.debug("enabled default server opts: %r", server_opts) + ip = '*' if spec.virtual_ips_list else str(spec.virtual_ip).split('/')[0] or daemon_spec.ip or '*' + frontend_port = daemon_spec.ports[0] if daemon_spec.ports else spec.frontend_port + if ip != '*' and frontend_port: + daemon_spec.port_ips = {str(frontend_port): ip} + haproxy_conf = self.mgr.template.render( + 'services/ingress/haproxy.cfg.j2', + { + 'spec': spec, + 'backend_spec': backend_spec, + 'mode': mode, + 'servers': servers, + 'user': spec.monitor_user or 'admin', + 'password': password, + 'ip': ip, + 'frontend_port': frontend_port, + 'monitor_port': daemon_spec.ports[1] if daemon_spec.ports else spec.monitor_port, + 'local_host_ip': host_ip, + 'default_server_opts': server_opts, + } + ) + config_files = { + 'files': { + "haproxy.cfg": haproxy_conf, + } + } + if spec.ssl_cert: + ssl_cert = spec.ssl_cert + if isinstance(ssl_cert, list): + ssl_cert = '\n'.join(ssl_cert) + config_files['files']['haproxy.pem'] = ssl_cert + + return config_files, sorted(deps) + + def keepalived_prepare_create( + self, + daemon_spec: CephadmDaemonDeploySpec, + ) -> CephadmDaemonDeploySpec: + assert daemon_spec.daemon_type == 'keepalived' + + daemon_id = daemon_spec.daemon_id + host = daemon_spec.host + spec = cast(IngressSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + + logger.debug('prepare_create keepalived.%s on host %s with spec %s' % ( + daemon_id, host, spec)) + + daemon_spec.final_config, daemon_spec.deps = self.keepalived_generate_config(daemon_spec) + + return daemon_spec + + def keepalived_generate_config( + self, + daemon_spec: CephadmDaemonDeploySpec, + ) -> Tuple[Dict[str, Any], List[str]]: + spec = cast(IngressSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + assert spec.backend_service + + # generate password? + pw_key = f'{spec.service_name()}/keepalived_password' + password = self.mgr.get_store(pw_key) + if password is None: + if not spec.keepalived_password: + password = ''.join(random.choice(string.ascii_lowercase) + for _ in range(self.MAX_KEEPALIVED_PASS_LEN)) + self.mgr.set_store(pw_key, password) + else: + if spec.keepalived_password: + self.mgr.set_store(pw_key, None) + if spec.keepalived_password: + password = spec.keepalived_password + + daemons = self.mgr.cache.get_daemons_by_service(spec.service_name()) + + if not daemons and not spec.keepalive_only: + raise OrchestratorError( + f'Failed to generate keepalived.conf: No daemons deployed for {spec.service_name()}') + + deps = sorted([d.name() for d in daemons if d.daemon_type == 'haproxy']) + + host = daemon_spec.host + hosts = sorted(list(set([host] + [str(d.hostname) for d in daemons]))) + + def _get_valid_interface_and_ip(vip: str, host: str) -> Tuple[str, str]: + # interface + bare_ip = ipaddress.ip_interface(vip).ip + host_ip = '' + interface = None + for subnet, ifaces in self.mgr.cache.networks.get(host, {}).items(): + if ifaces and ipaddress.ip_address(bare_ip) in ipaddress.ip_network(subnet): + interface = list(ifaces.keys())[0] + host_ip = ifaces[interface][0] + logger.info( + f'{bare_ip} is in {subnet} on {host} interface {interface}' + ) + break + # try to find interface by matching spec.virtual_interface_networks + if not interface and spec.virtual_interface_networks: + for subnet, ifaces in self.mgr.cache.networks.get(host, {}).items(): + if subnet in spec.virtual_interface_networks: + interface = list(ifaces.keys())[0] + host_ip = ifaces[interface][0] + logger.info( + f'{spec.virtual_ip} will be configured on {host} interface ' + f'{interface} (which is in subnet {subnet})' + ) + break + if not interface: + raise OrchestratorError( + f"Unable to identify interface for {spec.virtual_ip} on {host}" + ) + return interface, host_ip + + # script to monitor health + script = '/usr/bin/false' + for d in daemons: + if d.hostname == host: + if d.daemon_type == 'haproxy': + assert d.ports + port = d.ports[1] # monitoring port + host_ip = d.ip or self.mgr.inventory.get_addr(d.hostname) + script = f'/usr/bin/curl {build_url(scheme="http", host=host_ip, port=port)}/health' + assert script + + states = [] + priorities = [] + virtual_ips = [] + + # Set state and priority. Have one master for each VIP. Or at least the first one as master if only one VIP. + if spec.virtual_ip: + virtual_ips.append(spec.virtual_ip) + if hosts[0] == host: + states.append('MASTER') + priorities.append(100) + else: + states.append('BACKUP') + priorities.append(90) + + elif spec.virtual_ips_list: + virtual_ips = spec.virtual_ips_list + if len(virtual_ips) > len(hosts): + raise OrchestratorError( + "Number of virtual IPs for ingress is greater than number of available hosts" + ) + for x in range(len(virtual_ips)): + if hosts[x] == host: + states.append('MASTER') + priorities.append(100) + else: + states.append('BACKUP') + priorities.append(90) + + # remove host, daemon is being deployed on from hosts list for + # other_ips in conf file and converter to ips + if host in hosts: + hosts.remove(host) + host_ips: List[str] = [] + other_ips: List[List[str]] = [] + interfaces: List[str] = [] + for vip in virtual_ips: + interface, ip = _get_valid_interface_and_ip(vip, host) + host_ips.append(ip) + interfaces.append(interface) + ips: List[str] = [] + for h in hosts: + _, ip = _get_valid_interface_and_ip(vip, h) + ips.append(ip) + other_ips.append(ips) + + # Use interface as vrrp_interface for vrrp traffic if vrrp_interface_network not set on the spec + vrrp_interfaces: List[str] = [] + if not spec.vrrp_interface_network: + vrrp_interfaces = interfaces + else: + for subnet, ifaces in self.mgr.cache.networks.get(host, {}).items(): + if subnet == spec.vrrp_interface_network: + vrrp_interface = [list(ifaces.keys())[0]] * len(interfaces) + logger.info( + f'vrrp will be configured on {host} interface ' + f'{vrrp_interface} (which is in subnet {subnet})' + ) + break + else: + raise OrchestratorError( + f"Unable to identify vrrp interface for {spec.vrrp_interface_network} on {host}" + ) + + keepalived_conf = self.mgr.template.render( + 'services/ingress/keepalived.conf.j2', + { + 'spec': spec, + 'script': script, + 'password': password, + 'interfaces': interfaces, + 'vrrp_interfaces': vrrp_interfaces, + 'virtual_ips': virtual_ips, + 'first_virtual_router_id': spec.first_virtual_router_id, + 'states': states, + 'priorities': priorities, + 'other_ips': other_ips, + 'host_ips': host_ips, + } + ) + + config_file = { + 'files': { + "keepalived.conf": keepalived_conf, + } + } + + return config_file, deps diff --git a/src/pybind/mgr/cephadm/services/iscsi.py b/src/pybind/mgr/cephadm/services/iscsi.py new file mode 100644 index 000000000..61b157b44 --- /dev/null +++ b/src/pybind/mgr/cephadm/services/iscsi.py @@ -0,0 +1,212 @@ +import errno +import json +import logging +import subprocess +from typing import List, cast, Optional +from ipaddress import ip_address, IPv6Address + +from mgr_module import HandleCommandResult +from ceph.deployment.service_spec import IscsiServiceSpec + +from orchestrator import DaemonDescription, DaemonDescriptionStatus +from .cephadmservice import CephadmDaemonDeploySpec, CephService +from .. import utils + +logger = logging.getLogger(__name__) + + +class IscsiService(CephService): + TYPE = 'iscsi' + + def config(self, spec: IscsiServiceSpec) -> None: # type: ignore + assert self.TYPE == spec.service_type + assert spec.pool + self.mgr._check_pool_exists(spec.pool, spec.service_name()) + + def get_trusted_ips(self, spec: IscsiServiceSpec) -> str: + # add active mgr ip address to trusted list so dashboard can access + trusted_ip_list = spec.trusted_ip_list if spec.trusted_ip_list else '' + mgr_ip = self.mgr.get_mgr_ip() + if mgr_ip not in [s.strip() for s in trusted_ip_list.split(',')]: + if trusted_ip_list: + trusted_ip_list += ',' + trusted_ip_list += mgr_ip + return trusted_ip_list + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + + spec = cast(IscsiServiceSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + igw_id = daemon_spec.daemon_id + + keyring = self.get_keyring_with_caps(self.get_auth_entity(igw_id), + ['mon', 'profile rbd, ' + 'allow command "osd blocklist", ' + 'allow command "config-key get" with "key" prefix "iscsi/"', + 'mgr', 'allow command "service status"', + 'osd', 'allow rwx']) + + if spec.ssl_cert: + if isinstance(spec.ssl_cert, list): + cert_data = '\n'.join(spec.ssl_cert) + else: + cert_data = spec.ssl_cert + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config-key set', + 'key': f'iscsi/{utils.name_to_config_section("iscsi")}.{igw_id}/iscsi-gateway.crt', + 'val': cert_data, + }) + + if spec.ssl_key: + if isinstance(spec.ssl_key, list): + key_data = '\n'.join(spec.ssl_key) + else: + key_data = spec.ssl_key + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config-key set', + 'key': f'iscsi/{utils.name_to_config_section("iscsi")}.{igw_id}/iscsi-gateway.key', + 'val': key_data, + }) + + trusted_ip_list = self.get_trusted_ips(spec) + + context = { + 'client_name': '{}.{}'.format(utils.name_to_config_section('iscsi'), igw_id), + 'trusted_ip_list': trusted_ip_list, + 'spec': spec + } + igw_conf = self.mgr.template.render('services/iscsi/iscsi-gateway.cfg.j2', context) + + daemon_spec.keyring = keyring + daemon_spec.extra_files = {'iscsi-gateway.cfg': igw_conf} + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + daemon_spec.deps = [trusted_ip_list] + return daemon_spec + + def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: + def get_set_cmd_dicts(out: str) -> List[dict]: + gateways = json.loads(out)['gateways'] + cmd_dicts = [] + # TODO: fail, if we don't have a spec + spec = cast(IscsiServiceSpec, + self.mgr.spec_store.all_specs.get(daemon_descrs[0].service_name(), None)) + if spec.api_secure and spec.ssl_cert and spec.ssl_key: + cmd_dicts.append({ + 'prefix': 'dashboard set-iscsi-api-ssl-verification', + 'value': "false" + }) + else: + cmd_dicts.append({ + 'prefix': 'dashboard set-iscsi-api-ssl-verification', + 'value': "true" + }) + for dd in daemon_descrs: + assert dd.hostname is not None + # todo: this can fail: + spec = cast(IscsiServiceSpec, + self.mgr.spec_store.all_specs.get(dd.service_name(), None)) + if not spec: + logger.warning('No ServiceSpec found for %s', dd) + continue + ip = utils.resolve_ip(self.mgr.inventory.get_addr(dd.hostname)) + # IPv6 URL encoding requires square brackets enclosing the ip + if type(ip_address(ip)) is IPv6Address: + ip = f'[{ip}]' + protocol = "http" + if spec.api_secure and spec.ssl_cert and spec.ssl_key: + protocol = "https" + service_url = '{}://{}:{}@{}:{}'.format( + protocol, spec.api_user or 'admin', spec.api_password or 'admin', ip, spec.api_port or '5000') + gw = gateways.get(dd.hostname) + if not gw or gw['service_url'] != service_url: + safe_service_url = '{}://{}:{}@{}:{}'.format( + protocol, '<api-user>', '<api-password>', ip, spec.api_port or '5000') + logger.info('Adding iSCSI gateway %s to Dashboard', safe_service_url) + cmd_dicts.append({ + 'prefix': 'dashboard iscsi-gateway-add', + 'inbuf': service_url, + 'name': dd.hostname + }) + return cmd_dicts + + self._check_and_set_dashboard( + service_name='iSCSI', + get_cmd='dashboard iscsi-gateway-list', + get_set_cmd_dicts=get_set_cmd_dicts + ) + + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: + # if only 1 iscsi, alert user (this is not passable with --force) + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Iscsi', 1, True) + if warn: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + # if reached here, there is > 1 nfs daemon. make sure none are down + warn_message = ( + 'ALERT: 1 iscsi daemon is already down. Please bring it back up before stopping this one') + iscsi_daemons = self.mgr.cache.get_daemons_by_type(self.TYPE) + for i in iscsi_daemons: + if i.status != DaemonDescriptionStatus.running: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids] + warn_message = f'It is presumed safe to stop {names}' + return HandleCommandResult(0, warn_message, '') + + def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: + """ + Called after the daemon is removed. + """ + logger.debug(f'Post remove daemon {self.TYPE}.{daemon.daemon_id}') + + # remove config for dashboard iscsi gateways + ret, out, err = self.mgr.mon_command({ + 'prefix': 'dashboard iscsi-gateway-rm', + 'name': daemon.hostname, + }) + if not ret: + logger.info(f'{daemon.hostname} removed from iscsi gateways dashboard config') + + # needed to know if we have ssl stuff for iscsi in ceph config + iscsi_config_dict = {} + ret, iscsi_config, err = self.mgr.mon_command({ + 'prefix': 'config-key dump', + 'key': 'iscsi', + }) + if iscsi_config: + iscsi_config_dict = json.loads(iscsi_config) + + # remove iscsi cert and key from ceph config + for iscsi_key, value in iscsi_config_dict.items(): + if f'iscsi/client.{daemon.name()}/' in iscsi_key: + ret, out, err = self.mgr.mon_command({ + 'prefix': 'config-key rm', + 'key': iscsi_key, + }) + logger.info(f'{iscsi_key} removed from ceph config') + + def purge(self, service_name: str) -> None: + """Removes configuration + """ + spec = cast(IscsiServiceSpec, self.mgr.spec_store[service_name].spec) + try: + # remove service configuration from the pool + try: + subprocess.run(['rados', + '-k', str(self.mgr.get_ceph_option('keyring')), + '-n', f'mgr.{self.mgr.get_mgr_id()}', + '-p', cast(str, spec.pool), + 'rm', + 'gateway.conf'], + timeout=5) + logger.info(f'<gateway.conf> removed from {spec.pool}') + except subprocess.CalledProcessError as ex: + logger.error(f'Error executing <<{ex.cmd}>>: {ex.output}') + except subprocess.TimeoutExpired: + logger.error(f'timeout (5s) trying to remove <gateway.conf> from {spec.pool}') + + except Exception: + logger.exception(f'failed to purge {service_name}') diff --git a/src/pybind/mgr/cephadm/services/jaeger.py b/src/pybind/mgr/cephadm/services/jaeger.py new file mode 100644 index 000000000..c136d20e6 --- /dev/null +++ b/src/pybind/mgr/cephadm/services/jaeger.py @@ -0,0 +1,73 @@ +from typing import List, cast +from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec +from ceph.deployment.service_spec import TracingSpec +from mgr_util import build_url + + +class ElasticSearchService(CephadmService): + TYPE = 'elasticsearch' + DEFAULT_SERVICE_PORT = 9200 + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + return daemon_spec + + +class JaegerAgentService(CephadmService): + TYPE = 'jaeger-agent' + DEFAULT_SERVICE_PORT = 6799 + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + collectors = [] + for dd in self.mgr.cache.get_daemons_by_type(JaegerCollectorService.TYPE): + # scrape jaeger-collector nodes + assert dd.hostname is not None + port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT + url = build_url(host=dd.hostname, port=port).lstrip('/') + collectors.append(url) + daemon_spec.final_config = {'collector_nodes': ",".join(collectors)} + return daemon_spec + + +class JaegerCollectorService(CephadmService): + TYPE = 'jaeger-collector' + DEFAULT_SERVICE_PORT = 14250 + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + elasticsearch_nodes = get_elasticsearch_nodes(self, daemon_spec) + daemon_spec.final_config = {'elasticsearch_nodes': ",".join(elasticsearch_nodes)} + return daemon_spec + + +class JaegerQueryService(CephadmService): + TYPE = 'jaeger-query' + DEFAULT_SERVICE_PORT = 16686 + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + elasticsearch_nodes = get_elasticsearch_nodes(self, daemon_spec) + daemon_spec.final_config = {'elasticsearch_nodes': ",".join(elasticsearch_nodes)} + return daemon_spec + + +def get_elasticsearch_nodes(service: CephadmService, daemon_spec: CephadmDaemonDeploySpec) -> List[str]: + elasticsearch_nodes = [] + for dd in service.mgr.cache.get_daemons_by_type(ElasticSearchService.TYPE): + assert dd.hostname is not None + addr = dd.ip if dd.ip else service.mgr.inventory.get_addr(dd.hostname) + port = dd.ports[0] if dd.ports else ElasticSearchService.DEFAULT_SERVICE_PORT + url = build_url(host=addr, port=port).lstrip('/') + elasticsearch_nodes.append(f'http://{url}') + + if len(elasticsearch_nodes) == 0: + # takes elasticsearch address from TracingSpec + spec: TracingSpec = cast( + TracingSpec, service.mgr.spec_store.active_specs[daemon_spec.service_name]) + assert spec.es_nodes is not None + urls = spec.es_nodes.split(",") + for url in urls: + elasticsearch_nodes.append(f'http://{url}') + + return elasticsearch_nodes diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py new file mode 100644 index 000000000..114c84860 --- /dev/null +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -0,0 +1,688 @@ +import errno +import ipaddress +import logging +import os +import socket +from typing import List, Any, Tuple, Dict, Optional, cast +from urllib.parse import urlparse + +from mgr_module import HandleCommandResult + +from orchestrator import DaemonDescription +from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \ + SNMPGatewaySpec, PrometheusSpec +from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec +from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url, get_cert_issuer_info, password_hash +from ceph.deployment.utils import wrap_ipv6 + +logger = logging.getLogger(__name__) + + +class GrafanaService(CephadmService): + TYPE = 'grafana' + DEFAULT_SERVICE_PORT = 3000 + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials() + deps = [] # type: List[str] + if self.mgr.secure_monitoring_stack and prometheus_user and prometheus_password: + deps.append(f'{hash(prometheus_user + prometheus_password)}') + deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') + + prom_services = [] # type: List[str] + for dd in self.mgr.cache.get_daemons_by_service('prometheus'): + assert dd.hostname is not None + addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + port = dd.ports[0] if dd.ports else 9095 + protocol = 'https' if self.mgr.secure_monitoring_stack else 'http' + prom_services.append(build_url(scheme=protocol, host=addr, port=port)) + + deps.append(dd.name()) + + daemons = self.mgr.cache.get_daemons_by_service('loki') + loki_host = '' + for i, dd in enumerate(daemons): + assert dd.hostname is not None + if i == 0: + addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + loki_host = build_url(scheme='http', host=addr, port=3100) + + deps.append(dd.name()) + + root_cert = self.mgr.http_server.service_discovery.ssl_certs.get_root_cert() + oneline_root_cert = '\\n'.join([line.strip() for line in root_cert.splitlines()]) + grafana_data_sources = self.mgr.template.render('services/grafana/ceph-dashboard.yml.j2', + {'hosts': prom_services, + 'prometheus_user': prometheus_user, + 'prometheus_password': prometheus_password, + 'cephadm_root_ca': oneline_root_cert, + 'security_enabled': self.mgr.secure_monitoring_stack, + 'loki_host': loki_host}) + + spec: GrafanaSpec = cast( + GrafanaSpec, self.mgr.spec_store.active_specs[daemon_spec.service_name]) + grafana_ini = self.mgr.template.render( + 'services/grafana/grafana.ini.j2', { + 'anonymous_access': spec.anonymous_access, + 'initial_admin_password': spec.initial_admin_password, + 'http_port': daemon_spec.ports[0] if daemon_spec.ports else self.DEFAULT_SERVICE_PORT, + 'protocol': spec.protocol, + 'http_addr': daemon_spec.ip if daemon_spec.ip else '' + }) + + if 'dashboard' in self.mgr.get('mgr_map')['modules'] and spec.initial_admin_password: + self.mgr.check_mon_command( + {'prefix': 'dashboard set-grafana-api-password'}, inbuf=spec.initial_admin_password) + + cert, pkey = self.prepare_certificates(daemon_spec) + config_file = { + 'files': { + "grafana.ini": grafana_ini, + 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources, + 'certs/cert_file': '# generated by cephadm\n%s' % cert, + 'certs/cert_key': '# generated by cephadm\n%s' % pkey, + } + } + return config_file, sorted(deps) + + def prepare_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]: + cert_path = f'{daemon_spec.host}/grafana_crt' + key_path = f'{daemon_spec.host}/grafana_key' + cert = self.mgr.get_store(cert_path) + pkey = self.mgr.get_store(key_path) + certs_present = (cert and pkey) + is_valid_certificate = False + (org, cn) = (None, None) + if certs_present: + try: + (org, cn) = get_cert_issuer_info(cert) + verify_tls(cert, pkey) + is_valid_certificate = True + except ServerConfigException as e: + logger.warning(f'Provided grafana TLS certificates are invalid: {e}') + + if is_valid_certificate: + # let's clear health error just in case it was set + self.mgr.remove_health_warning('CEPHADM_CERT_ERROR') + return cert, pkey + + # certificate is not valid, to avoid overwriting user generated + # certificates we only re-generate in case of self signed certificates + # that were originally generated by cephadm or in case cert/key are empty. + if not certs_present or (org == 'Ceph' and cn == 'cephadm'): + logger.info('Regenerating cephadm self-signed grafana TLS certificates') + host_fqdn = socket.getfqdn(daemon_spec.host) + cert, pkey = create_self_signed_cert('Ceph', host_fqdn) + self.mgr.set_store(cert_path, cert) + self.mgr.set_store(key_path, pkey) + if 'dashboard' in self.mgr.get('mgr_map')['modules']: + self.mgr.check_mon_command({ + 'prefix': 'dashboard set-grafana-api-ssl-verify', + 'value': 'false', + }) + self.mgr.remove_health_warning('CEPHADM_CERT_ERROR') # clear if any + else: + # the certificate was not generated by cephadm, we cannot overwrite + # it by new self-signed ones. Let's warn the user to fix the issue + err_msg = """ + Detected invalid grafana certificates. Set mgr/cephadm/grafana_crt + and mgr/cephadm/grafana_key to valid certificates or reset their value + to an empty string in case you want cephadm to generate self-signed Grafana + certificates. + + Once done, run the following command to reconfig the daemon: + + > ceph orch daemon reconfig <grafana-daemon> + + """ + self.mgr.set_health_warning( + 'CEPHADM_CERT_ERROR', 'Invalid grafana certificate: ', 1, [err_msg]) + + return cert, pkey + + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + # Use the least-created one as the active daemon + if daemon_descrs: + return daemon_descrs[-1] + # if empty list provided, return empty Daemon Desc + return DaemonDescription() + + def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: + # TODO: signed cert + dd = self.get_active_daemon(daemon_descrs) + assert dd.hostname is not None + addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT + spec = cast(GrafanaSpec, self.mgr.spec_store[dd.service_name()].spec) + service_url = build_url(scheme=spec.protocol, host=addr, port=port) + self._set_service_url_on_dashboard( + 'Grafana', + 'dashboard get-grafana-api-url', + 'dashboard set-grafana-api-url', + service_url + ) + + def pre_remove(self, daemon: DaemonDescription) -> None: + """ + Called before grafana daemon is removed. + """ + if daemon.hostname is not None: + # delete cert/key entires for this grafana daemon + cert_path = f'{daemon.hostname}/grafana_crt' + key_path = f'{daemon.hostname}/grafana_key' + self.mgr.set_store(cert_path, None) + self.mgr.set_store(key_path, None) + + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Grafana', 1) + if warn and not force: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + return HandleCommandResult(0, warn_message, '') + + +class AlertmanagerService(CephadmService): + TYPE = 'alertmanager' + DEFAULT_SERVICE_PORT = 9093 + USER_CFG_KEY = 'alertmanager/web_user' + PASS_CFG_KEY = 'alertmanager/web_password' + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + deps: List[str] = [] + default_webhook_urls: List[str] = [] + + spec = cast(AlertManagerSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + try: + secure = spec.secure + except AttributeError: + secure = False + user_data = spec.user_data + if 'default_webhook_urls' in user_data and isinstance( + user_data['default_webhook_urls'], list): + default_webhook_urls.extend(user_data['default_webhook_urls']) + + # dashboard(s) + dashboard_urls: List[str] = [] + snmp_gateway_urls: List[str] = [] + mgr_map = self.mgr.get('mgr_map') + port = None + proto = None # http: or https: + url = mgr_map.get('services', {}).get('dashboard', None) + if url: + p_result = urlparse(url.rstrip('/')) + hostname = socket.getfqdn(p_result.hostname) + + try: + ip = ipaddress.ip_address(hostname) + except ValueError: + pass + else: + if ip.version == 6: + hostname = f'[{hostname}]' + + dashboard_urls.append( + f'{p_result.scheme}://{hostname}:{p_result.port}{p_result.path}') + proto = p_result.scheme + port = p_result.port + + # scan all mgrs to generate deps and to get standbys too. + # assume that they are all on the same port as the active mgr. + for dd in self.mgr.cache.get_daemons_by_service('mgr'): + # we consider mgr a dep even if the dashboard is disabled + # in order to be consistent with _calc_daemon_deps(). + deps.append(dd.name()) + if not port: + continue + if dd.daemon_id == self.mgr.get_mgr_id(): + continue + assert dd.hostname is not None + addr = self._inventory_get_fqdn(dd.hostname) + dashboard_urls.append(build_url(scheme=proto, host=addr, port=port).rstrip('/')) + + for dd in self.mgr.cache.get_daemons_by_service('snmp-gateway'): + assert dd.hostname is not None + assert dd.ports + addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + deps.append(dd.name()) + + snmp_gateway_urls.append(build_url(scheme='http', host=addr, + port=dd.ports[0], path='/alerts')) + + context = { + 'secure_monitoring_stack': self.mgr.secure_monitoring_stack, + 'dashboard_urls': dashboard_urls, + 'default_webhook_urls': default_webhook_urls, + 'snmp_gateway_urls': snmp_gateway_urls, + 'secure': secure, + } + yml = self.mgr.template.render('services/alertmanager/alertmanager.yml.j2', context) + + peers = [] + port = 9094 + for dd in self.mgr.cache.get_daemons_by_service('alertmanager'): + assert dd.hostname is not None + deps.append(dd.name()) + addr = self._inventory_get_fqdn(dd.hostname) + peers.append(build_url(host=addr, port=port).lstrip('/')) + + deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') + + if self.mgr.secure_monitoring_stack: + alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() + if alertmanager_user and alertmanager_password: + deps.append(f'{hash(alertmanager_user + alertmanager_password)}') + node_ip = self.mgr.inventory.get_addr(daemon_spec.host) + host_fqdn = self._inventory_get_fqdn(daemon_spec.host) + cert, key = self.mgr.http_server.service_discovery.ssl_certs.generate_cert( + host_fqdn, node_ip) + context = { + 'alertmanager_web_user': alertmanager_user, + 'alertmanager_web_password': password_hash(alertmanager_password), + } + return { + "files": { + "alertmanager.yml": yml, + 'alertmanager.crt': cert, + 'alertmanager.key': key, + 'web.yml': self.mgr.template.render('services/alertmanager/web.yml.j2', context), + 'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert() + }, + 'peers': peers, + 'web_config': '/etc/alertmanager/web.yml' + }, sorted(deps) + else: + return { + "files": { + "alertmanager.yml": yml + }, + "peers": peers + }, sorted(deps) + + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + # TODO: if there are multiple daemons, who is the active one? + if daemon_descrs: + return daemon_descrs[0] + # if empty list provided, return empty Daemon Desc + return DaemonDescription() + + def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: + dd = self.get_active_daemon(daemon_descrs) + assert dd.hostname is not None + addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT + protocol = 'https' if self.mgr.secure_monitoring_stack else 'http' + service_url = build_url(scheme=protocol, host=addr, port=port) + self._set_service_url_on_dashboard( + 'AlertManager', + 'dashboard get-alertmanager-api-host', + 'dashboard set-alertmanager-api-host', + service_url + ) + + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Alertmanager', 1) + if warn and not force: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + return HandleCommandResult(0, warn_message, '') + + +class PrometheusService(CephadmService): + TYPE = 'prometheus' + DEFAULT_SERVICE_PORT = 9095 + DEFAULT_MGR_PROMETHEUS_PORT = 9283 + USER_CFG_KEY = 'prometheus/web_user' + PASS_CFG_KEY = 'prometheus/web_password' + + def config(self, spec: ServiceSpec) -> None: + # make sure module is enabled + mgr_map = self.mgr.get('mgr_map') + if 'prometheus' not in mgr_map.get('services', {}): + self.mgr.check_mon_command({ + 'prefix': 'mgr module enable', + 'module': 'prometheus' + }) + # we shouldn't get here (mon will tell the mgr to respawn), but no + # harm done if we do. + + def prepare_create( + self, + daemon_spec: CephadmDaemonDeploySpec, + ) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config( + self, + daemon_spec: CephadmDaemonDeploySpec, + ) -> Tuple[Dict[str, Any], List[str]]: + + assert self.TYPE == daemon_spec.daemon_type + spec = cast(PrometheusSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + + try: + retention_time = spec.retention_time if spec.retention_time else '15d' + except AttributeError: + retention_time = '15d' + + try: + retention_size = spec.retention_size if spec.retention_size else '0' + except AttributeError: + # default to disabled + retention_size = '0' + + # build service discovery end-point + port = self.mgr.service_discovery_port + mgr_addr = wrap_ipv6(self.mgr.get_mgr_ip()) + protocol = 'https' if self.mgr.secure_monitoring_stack else 'http' + srv_end_point = f'{protocol}://{mgr_addr}:{port}/sd/prometheus/sd-config?' + + node_exporter_cnt = len(self.mgr.cache.get_daemons_by_service('node-exporter')) + alertmgr_cnt = len(self.mgr.cache.get_daemons_by_service('alertmanager')) + haproxy_cnt = len(self.mgr.cache.get_daemons_by_type('ingress')) + node_exporter_sd_url = f'{srv_end_point}service=node-exporter' if node_exporter_cnt > 0 else None + alertmanager_sd_url = f'{srv_end_point}service=alertmanager' if alertmgr_cnt > 0 else None + haproxy_sd_url = f'{srv_end_point}service=haproxy' if haproxy_cnt > 0 else None + mgr_prometheus_sd_url = f'{srv_end_point}service=mgr-prometheus' # always included + ceph_exporter_sd_url = f'{srv_end_point}service=ceph-exporter' # always included + + alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() + prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials() + + # generate the prometheus configuration + context = { + 'alertmanager_web_user': alertmanager_user, + 'alertmanager_web_password': alertmanager_password, + 'secure_monitoring_stack': self.mgr.secure_monitoring_stack, + 'service_discovery_username': self.mgr.http_server.service_discovery.username, + 'service_discovery_password': self.mgr.http_server.service_discovery.password, + 'mgr_prometheus_sd_url': mgr_prometheus_sd_url, + 'node_exporter_sd_url': node_exporter_sd_url, + 'alertmanager_sd_url': alertmanager_sd_url, + 'haproxy_sd_url': haproxy_sd_url, + 'ceph_exporter_sd_url': ceph_exporter_sd_url + } + + web_context = { + 'prometheus_web_user': prometheus_user, + 'prometheus_web_password': password_hash(prometheus_password), + } + + if self.mgr.secure_monitoring_stack: + cfg_key = 'mgr/prometheus/root/cert' + cmd = {'prefix': 'config-key get', 'key': cfg_key} + ret, mgr_prometheus_rootca, err = self.mgr.mon_command(cmd) + if ret != 0: + logger.error(f'mon command to get config-key {cfg_key} failed: {err}') + else: + node_ip = self.mgr.inventory.get_addr(daemon_spec.host) + host_fqdn = self._inventory_get_fqdn(daemon_spec.host) + cert, key = self.mgr.http_server.service_discovery.ssl_certs.generate_cert(host_fqdn, node_ip) + r: Dict[str, Any] = { + 'files': { + 'prometheus.yml': self.mgr.template.render('services/prometheus/prometheus.yml.j2', context), + 'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert(), + 'mgr_prometheus_cert.pem': mgr_prometheus_rootca, + 'web.yml': self.mgr.template.render('services/prometheus/web.yml.j2', web_context), + 'prometheus.crt': cert, + 'prometheus.key': key, + }, + 'retention_time': retention_time, + 'retention_size': retention_size, + 'web_config': '/etc/prometheus/web.yml' + } + else: + r = { + 'files': { + 'prometheus.yml': self.mgr.template.render('services/prometheus/prometheus.yml.j2', context) + }, + 'retention_time': retention_time, + 'retention_size': retention_size + } + + # include alerts, if present in the container + if os.path.exists(self.mgr.prometheus_alerts_path): + with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f: + alerts = f.read() + r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts + + # Include custom alerts if present in key value store. This enables the + # users to add custom alerts. Write the file in any case, so that if the + # content of the key value store changed, that file is overwritten + # (emptied in case they value has been removed from the key value + # store). This prevents the necessity to adapt `cephadm` binary to + # remove the file. + # + # Don't use the template engine for it as + # + # 1. the alerts are always static and + # 2. they are a template themselves for the Go template engine, which + # use curly braces and escaping that is cumbersome and unnecessary + # for the user. + # + r['files']['/etc/prometheus/alerting/custom_alerts.yml'] = \ + self.mgr.get_store('services/prometheus/alerting/custom_alerts.yml', '') + + return r, sorted(self.calculate_deps()) + + def calculate_deps(self) -> List[str]: + deps = [] # type: List[str] + port = cast(int, self.mgr.get_module_option_ex('prometheus', 'server_port', self.DEFAULT_MGR_PROMETHEUS_PORT)) + deps.append(str(port)) + deps.append(str(self.mgr.service_discovery_port)) + # add an explicit dependency on the active manager. This will force to + # re-deploy prometheus if the mgr has changed (due to a fail-over i.e). + deps.append(self.mgr.get_active_mgr().name()) + if self.mgr.secure_monitoring_stack: + alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() + prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials() + if prometheus_user and prometheus_password: + deps.append(f'{hash(prometheus_user + prometheus_password)}') + if alertmanager_user and alertmanager_password: + deps.append(f'{hash(alertmanager_user + alertmanager_password)}') + deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') + # add dependency on ceph-exporter daemons + deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('ceph-exporter')] + deps += [s for s in ['node-exporter', 'alertmanager'] if self.mgr.cache.get_daemons_by_service(s)] + if len(self.mgr.cache.get_daemons_by_type('ingress')) > 0: + deps.append('ingress') + return deps + + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + # TODO: if there are multiple daemons, who is the active one? + if daemon_descrs: + return daemon_descrs[0] + # if empty list provided, return empty Daemon Desc + return DaemonDescription() + + def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: + dd = self.get_active_daemon(daemon_descrs) + assert dd.hostname is not None + addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT + protocol = 'https' if self.mgr.secure_monitoring_stack else 'http' + service_url = build_url(scheme=protocol, host=addr, port=port) + self._set_service_url_on_dashboard( + 'Prometheus', + 'dashboard get-prometheus-api-host', + 'dashboard set-prometheus-api-host', + service_url + ) + + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Prometheus', 1) + if warn and not force: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + return HandleCommandResult(0, warn_message, '') + + +class NodeExporterService(CephadmService): + TYPE = 'node-exporter' + DEFAULT_SERVICE_PORT = 9100 + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + deps = [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}'] + if self.mgr.secure_monitoring_stack: + node_ip = self.mgr.inventory.get_addr(daemon_spec.host) + host_fqdn = self._inventory_get_fqdn(daemon_spec.host) + cert, key = self.mgr.http_server.service_discovery.ssl_certs.generate_cert( + host_fqdn, node_ip) + r = { + 'files': { + 'web.yml': self.mgr.template.render('services/node-exporter/web.yml.j2', {}), + 'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert(), + 'node_exporter.crt': cert, + 'node_exporter.key': key, + }, + 'web_config': '/etc/node-exporter/web.yml' + } + else: + r = {} + + return r, deps + + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: + # since node exporter runs on each host and cannot compromise data, no extra checks required + names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids] + out = f'It is presumed safe to stop {names}' + return HandleCommandResult(0, out, '') + + +class LokiService(CephadmService): + TYPE = 'loki' + DEFAULT_SERVICE_PORT = 3100 + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + deps: List[str] = [] + + yml = self.mgr.template.render('services/loki.yml.j2') + return { + "files": { + "loki.yml": yml + } + }, sorted(deps) + + +class PromtailService(CephadmService): + TYPE = 'promtail' + DEFAULT_SERVICE_PORT = 9080 + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + deps: List[str] = [] + + daemons = self.mgr.cache.get_daemons_by_service('loki') + loki_host = '' + for i, dd in enumerate(daemons): + assert dd.hostname is not None + if i == 0: + loki_host = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + + deps.append(dd.name()) + + context = { + 'client_hostname': loki_host, + } + + yml = self.mgr.template.render('services/promtail.yml.j2', context) + return { + "files": { + "promtail.yml": yml + } + }, sorted(deps) + + +class SNMPGatewayService(CephadmService): + TYPE = 'snmp-gateway' + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + deps: List[str] = [] + + spec = cast(SNMPGatewaySpec, self.mgr.spec_store[daemon_spec.service_name].spec) + config = { + "destination": spec.snmp_destination, + "snmp_version": spec.snmp_version, + } + if spec.snmp_version == 'V2c': + community = spec.credentials.get('snmp_community', None) + assert community is not None + + config.update({ + "snmp_community": community + }) + else: + # SNMP v3 settings can be either authNoPriv or authPriv + auth_protocol = 'SHA' if not spec.auth_protocol else spec.auth_protocol + + auth_username = spec.credentials.get('snmp_v3_auth_username', None) + auth_password = spec.credentials.get('snmp_v3_auth_password', None) + assert auth_username is not None + assert auth_password is not None + assert spec.engine_id is not None + + config.update({ + "snmp_v3_auth_protocol": auth_protocol, + "snmp_v3_auth_username": auth_username, + "snmp_v3_auth_password": auth_password, + "snmp_v3_engine_id": spec.engine_id, + }) + # authPriv adds encryption + if spec.privacy_protocol: + priv_password = spec.credentials.get('snmp_v3_priv_password', None) + assert priv_password is not None + + config.update({ + "snmp_v3_priv_protocol": spec.privacy_protocol, + "snmp_v3_priv_password": priv_password, + }) + + logger.debug( + f"Generated configuration for '{self.TYPE}' service. Dependencies={deps}") + + return config, sorted(deps) diff --git a/src/pybind/mgr/cephadm/services/nfs.py b/src/pybind/mgr/cephadm/services/nfs.py new file mode 100644 index 000000000..f94a00f5b --- /dev/null +++ b/src/pybind/mgr/cephadm/services/nfs.py @@ -0,0 +1,331 @@ +import errno +import ipaddress +import logging +import os +import subprocess +import tempfile +from typing import Dict, Tuple, Any, List, cast, Optional + +from mgr_module import HandleCommandResult +from mgr_module import NFS_POOL_NAME as POOL_NAME + +from ceph.deployment.service_spec import ServiceSpec, NFSServiceSpec + +from orchestrator import DaemonDescription + +from cephadm.services.cephadmservice import AuthEntity, CephadmDaemonDeploySpec, CephService + +logger = logging.getLogger(__name__) + + +class NFSService(CephService): + TYPE = 'nfs' + + def ranked(self) -> bool: + return True + + def fence(self, daemon_id: str) -> None: + logger.info(f'Fencing old nfs.{daemon_id}') + ret, out, err = self.mgr.mon_command({ + 'prefix': 'auth rm', + 'entity': f'client.nfs.{daemon_id}', + }) + + # TODO: block/fence this entity (in case it is still running somewhere) + + def fence_old_ranks(self, + spec: ServiceSpec, + rank_map: Dict[int, Dict[int, Optional[str]]], + num_ranks: int) -> None: + for rank, m in list(rank_map.items()): + if rank >= num_ranks: + for daemon_id in m.values(): + if daemon_id is not None: + self.fence(daemon_id) + del rank_map[rank] + nodeid = f'{spec.service_name()}.{rank}' + self.mgr.log.info(f'Removing {nodeid} from the ganesha grace table') + self.run_grace_tool(cast(NFSServiceSpec, spec), 'remove', nodeid) + self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map) + else: + max_gen = max(m.keys()) + for gen, daemon_id in list(m.items()): + if gen < max_gen: + if daemon_id is not None: + self.fence(daemon_id) + del rank_map[rank][gen] + self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map) + + def config(self, spec: NFSServiceSpec) -> None: # type: ignore + from nfs.cluster import create_ganesha_pool + + assert self.TYPE == spec.service_type + create_ganesha_pool(self.mgr) + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + + daemon_type = daemon_spec.daemon_type + daemon_id = daemon_spec.daemon_id + host = daemon_spec.host + spec = cast(NFSServiceSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + + deps: List[str] = [] + + nodeid = f'{daemon_spec.service_name}.{daemon_spec.rank}' + + # create the RADOS recovery pool keyring + rados_user = f'{daemon_type}.{daemon_id}' + rados_keyring = self.create_keyring(daemon_spec) + + # ensure rank is known to ganesha + self.mgr.log.info(f'Ensuring {nodeid} is in the ganesha grace table') + self.run_grace_tool(spec, 'add', nodeid) + + # create the rados config object + self.create_rados_config_obj(spec) + + # create the RGW keyring + rgw_user = f'{rados_user}-rgw' + rgw_keyring = self.create_rgw_keyring(daemon_spec) + if spec.virtual_ip: + bind_addr = spec.virtual_ip + else: + bind_addr = daemon_spec.ip if daemon_spec.ip else '' + if not bind_addr: + logger.warning(f'Bind address in {daemon_type}.{daemon_id}\'s ganesha conf is defaulting to empty') + else: + logger.debug("using haproxy bind address: %r", bind_addr) + + # generate the ganesha config + def get_ganesha_conf() -> str: + context: Dict[str, Any] = { + "user": rados_user, + "nodeid": nodeid, + "pool": POOL_NAME, + "namespace": spec.service_id, + "rgw_user": rgw_user, + "url": f'rados://{POOL_NAME}/{spec.service_id}/{spec.rados_config_name()}', + # fall back to default NFS port if not present in daemon_spec + "port": daemon_spec.ports[0] if daemon_spec.ports else 2049, + "bind_addr": bind_addr, + "haproxy_hosts": [], + } + if spec.enable_haproxy_protocol: + context["haproxy_hosts"] = self._haproxy_hosts() + logger.debug("selected haproxy_hosts: %r", context["haproxy_hosts"]) + return self.mgr.template.render('services/nfs/ganesha.conf.j2', context) + + # generate the cephadm config json + def get_cephadm_config() -> Dict[str, Any]: + config: Dict[str, Any] = {} + config['pool'] = POOL_NAME + config['namespace'] = spec.service_id + config['userid'] = rados_user + config['extra_args'] = ['-N', 'NIV_EVENT'] + config['files'] = { + 'ganesha.conf': get_ganesha_conf(), + } + config.update( + self.get_config_and_keyring( + daemon_type, daemon_id, + keyring=rados_keyring, + host=host + ) + ) + config['rgw'] = { + 'cluster': 'ceph', + 'user': rgw_user, + 'keyring': rgw_keyring, + } + logger.debug('Generated cephadm config-json: %s' % config) + return config + + return get_cephadm_config(), deps + + def create_rados_config_obj(self, + spec: NFSServiceSpec, + clobber: bool = False) -> None: + objname = spec.rados_config_name() + cmd = [ + 'rados', + '-n', f"mgr.{self.mgr.get_mgr_id()}", + '-k', str(self.mgr.get_ceph_option('keyring')), + '-p', POOL_NAME, + '--namespace', cast(str, spec.service_id), + ] + result = subprocess.run( + cmd + ['get', objname, '-'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + timeout=10) + if not result.returncode and not clobber: + logger.info('Rados config object exists: %s' % objname) + else: + logger.info('Creating rados config object: %s' % objname) + result = subprocess.run( + cmd + ['put', objname, '-'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + timeout=10) + if result.returncode: + self.mgr.log.warning( + f'Unable to create rados config object {objname}: {result.stderr.decode("utf-8")}' + ) + raise RuntimeError(result.stderr.decode("utf-8")) + + def create_keyring(self, daemon_spec: CephadmDaemonDeploySpec) -> str: + daemon_id = daemon_spec.daemon_id + spec = cast(NFSServiceSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + entity: AuthEntity = self.get_auth_entity(daemon_id) + + osd_caps = 'allow rw pool=%s namespace=%s' % (POOL_NAME, spec.service_id) + + logger.info('Creating key for %s' % entity) + keyring = self.get_keyring_with_caps(entity, + ['mon', 'allow r', + 'osd', osd_caps]) + + return keyring + + def create_rgw_keyring(self, daemon_spec: CephadmDaemonDeploySpec) -> str: + daemon_id = daemon_spec.daemon_id + entity: AuthEntity = self.get_auth_entity(f'{daemon_id}-rgw') + + logger.info('Creating key for %s' % entity) + keyring = self.get_keyring_with_caps(entity, + ['mon', 'allow r', + 'osd', 'allow rwx tag rgw *=*']) + + return keyring + + def run_grace_tool(self, + spec: NFSServiceSpec, + action: str, + nodeid: str) -> None: + # write a temp keyring and referencing config file. this is a kludge + # because the ganesha-grace-tool can only authenticate as a client (and + # not a mgr). Also, it doesn't allow you to pass a keyring location via + # the command line, nor does it parse the CEPH_ARGS env var. + tmp_id = f'mgr.nfs.grace.{spec.service_name()}' + entity = AuthEntity(f'client.{tmp_id}') + keyring = self.get_keyring_with_caps( + entity, + ['mon', 'allow r', 'osd', f'allow rwx pool {POOL_NAME}'] + ) + tmp_keyring = tempfile.NamedTemporaryFile(mode='w', prefix='mgr-grace-keyring') + os.fchmod(tmp_keyring.fileno(), 0o600) + tmp_keyring.write(keyring) + tmp_keyring.flush() + tmp_conf = tempfile.NamedTemporaryFile(mode='w', prefix='mgr-grace-conf') + tmp_conf.write(self.mgr.get_minimal_ceph_conf()) + tmp_conf.write(f'\tkeyring = {tmp_keyring.name}\n') + tmp_conf.flush() + try: + cmd: List[str] = [ + 'ganesha-rados-grace', + '--cephconf', tmp_conf.name, + '--userid', tmp_id, + '--pool', POOL_NAME, + '--ns', cast(str, spec.service_id), + action, nodeid, + ] + self.mgr.log.debug(cmd) + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + timeout=10) + if result.returncode: + self.mgr.log.warning( + f'ganesha-rados-grace tool failed: {result.stderr.decode("utf-8")}' + ) + raise RuntimeError(f'grace tool failed: {result.stderr.decode("utf-8")}') + + finally: + self.mgr.check_mon_command({ + 'prefix': 'auth rm', + 'entity': entity, + }) + + def remove_rgw_keyring(self, daemon: DaemonDescription) -> None: + assert daemon.daemon_id is not None + daemon_id: str = daemon.daemon_id + entity: AuthEntity = self.get_auth_entity(f'{daemon_id}-rgw') + + logger.info(f'Removing key for {entity}') + self.mgr.check_mon_command({ + 'prefix': 'auth rm', + 'entity': entity, + }) + + def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: + super().post_remove(daemon, is_failed_deploy=is_failed_deploy) + self.remove_rgw_keyring(daemon) + + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: + # if only 1 nfs, alert user (this is not passable with --force) + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'NFS', 1, True) + if warn: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + # if reached here, there is > 1 nfs daemon. + if force: + return HandleCommandResult(0, warn_message, '') + + # if reached here, > 1 nfs daemon and no force flag. + # Provide warning + warn_message = "WARNING: Removing NFS daemons can cause clients to lose connectivity. " + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + def purge(self, service_name: str) -> None: + if service_name not in self.mgr.spec_store: + return + spec = cast(NFSServiceSpec, self.mgr.spec_store[service_name].spec) + + logger.info(f'Removing grace file for {service_name}') + cmd = [ + 'rados', + '-n', f"mgr.{self.mgr.get_mgr_id()}", + '-k', str(self.mgr.get_ceph_option('keyring')), + '-p', POOL_NAME, + '--namespace', cast(str, spec.service_id), + 'rm', 'grace', + ] + subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=10 + ) + + def _haproxy_hosts(self) -> List[str]: + # NB: Ideally, we would limit the list to IPs on hosts running + # haproxy/ingress only, but due to the nature of cephadm today + # we'd "only know the set of haproxy hosts after they've been + # deployed" (quoth @adk7398). As it is today we limit the list + # of hosts we know are managed by cephadm. That ought to be + # good enough to prevent acceping haproxy protocol messages + # from "rouge" systems that are not under our control. At + # least until we learn otherwise. + cluster_ips: List[str] = [] + for host in self.mgr.inventory.keys(): + default_addr = self.mgr.inventory.get_addr(host) + cluster_ips.append(default_addr) + nets = self.mgr.cache.networks.get(host) + if not nets: + continue + for subnet, iface in nets.items(): + ip_subnet = ipaddress.ip_network(subnet) + if ipaddress.ip_address(default_addr) in ip_subnet: + continue # already present + if ip_subnet.is_loopback or ip_subnet.is_link_local: + continue # ignore special subnets + addrs: List[str] = sum((addr_list for addr_list in iface.values()), []) + if addrs: + # one address per interface/subnet is enough + cluster_ips.append(addrs[0]) + return cluster_ips diff --git a/src/pybind/mgr/cephadm/services/nvmeof.py b/src/pybind/mgr/cephadm/services/nvmeof.py new file mode 100644 index 000000000..7d2dd16cf --- /dev/null +++ b/src/pybind/mgr/cephadm/services/nvmeof.py @@ -0,0 +1,93 @@ +import errno +import logging +import json +from typing import List, cast, Optional + +from mgr_module import HandleCommandResult +from ceph.deployment.service_spec import NvmeofServiceSpec + +from orchestrator import DaemonDescription, DaemonDescriptionStatus +from .cephadmservice import CephadmDaemonDeploySpec, CephService +from .. import utils + +logger = logging.getLogger(__name__) + + +class NvmeofService(CephService): + TYPE = 'nvmeof' + + def config(self, spec: NvmeofServiceSpec) -> None: # type: ignore + assert self.TYPE == spec.service_type + assert spec.pool + self.mgr._check_pool_exists(spec.pool, spec.service_name()) + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + + spec = cast(NvmeofServiceSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + nvmeof_gw_id = daemon_spec.daemon_id + host_ip = self.mgr.inventory.get_addr(daemon_spec.host) + + keyring = self.get_keyring_with_caps(self.get_auth_entity(nvmeof_gw_id), + ['mon', 'profile rbd', + 'osd', 'allow all tag rbd *=*']) + + # TODO: check if we can force jinja2 to generate dicts with double quotes instead of using json.dumps + transport_tcp_options = json.dumps(spec.transport_tcp_options) if spec.transport_tcp_options else None + name = '{}.{}'.format(utils.name_to_config_section('nvmeof'), nvmeof_gw_id) + rados_id = name[len('client.'):] if name.startswith('client.') else name + context = { + 'spec': spec, + 'name': name, + 'addr': host_ip, + 'port': spec.port, + 'log_level': 'WARN', + 'rpc_socket': '/var/tmp/spdk.sock', + 'transport_tcp_options': transport_tcp_options, + 'rados_id': rados_id + } + gw_conf = self.mgr.template.render('services/nvmeof/ceph-nvmeof.conf.j2', context) + + daemon_spec.keyring = keyring + daemon_spec.extra_files = {'ceph-nvmeof.conf': gw_conf} + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + daemon_spec.deps = [] + return daemon_spec + + def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: + # TODO: what integration do we need with the dashboard? + pass + + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: + # if only 1 nvmeof, alert user (this is not passable with --force) + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Nvmeof', 1, True) + if warn: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + # if reached here, there is > 1 nvmeof daemon. make sure none are down + warn_message = ('ALERT: 1 nvmeof daemon is already down. Please bring it back up before stopping this one') + nvmeof_daemons = self.mgr.cache.get_daemons_by_type(self.TYPE) + for i in nvmeof_daemons: + if i.status != DaemonDescriptionStatus.running: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids] + warn_message = f'It is presumed safe to stop {names}' + return HandleCommandResult(0, warn_message, '') + + def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: + """ + Called after the daemon is removed. + """ + logger.debug(f'Post remove daemon {self.TYPE}.{daemon.daemon_id}') + # TODO: remove config for dashboard nvmeof gateways if any + # and any certificates being used for mTLS + + def purge(self, service_name: str) -> None: + """Removes configuration + """ + # TODO: what should we purge in this case (if any)? + pass diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py new file mode 100644 index 000000000..bfecc5723 --- /dev/null +++ b/src/pybind/mgr/cephadm/services/osd.py @@ -0,0 +1,972 @@ +import json +import logging +from asyncio import gather +from threading import Lock +from typing import List, Dict, Any, Set, Tuple, cast, Optional, TYPE_CHECKING + +from ceph.deployment import translate +from ceph.deployment.drive_group import DriveGroupSpec +from ceph.deployment.drive_selection import DriveSelection +from ceph.deployment.inventory import Device +from ceph.utils import datetime_to_str, str_to_datetime + +from datetime import datetime +import orchestrator +from cephadm.serve import CephadmServe +from cephadm.utils import SpecialHostLabels +from ceph.utils import datetime_now +from orchestrator import OrchestratorError, DaemonDescription +from mgr_module import MonCommandFailed + +from cephadm.services.cephadmservice import CephadmDaemonDeploySpec, CephService + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + +logger = logging.getLogger(__name__) + + +class OSDService(CephService): + TYPE = 'osd' + + def create_from_spec(self, drive_group: DriveGroupSpec) -> str: + logger.debug(f"Processing DriveGroup {drive_group}") + osd_id_claims = OsdIdClaims(self.mgr) + if osd_id_claims.get(): + logger.info( + f"Found osd claims for drivegroup {drive_group.service_id} -> {osd_id_claims.get()}") + + async def create_from_spec_one(host: str, drive_selection: DriveSelection) -> Optional[str]: + # skip this host if there has been no change in inventory + if not self.mgr.cache.osdspec_needs_apply(host, drive_group): + self.mgr.log.debug("skipping apply of %s on %s (no change)" % ( + host, drive_group)) + return None + # skip this host if we cannot schedule here + if self.mgr.inventory.has_label(host, SpecialHostLabels.DRAIN_DAEMONS): + return None + + osd_id_claims_for_host = osd_id_claims.filtered_by_host(host) + + cmds: List[str] = self.driveselection_to_ceph_volume(drive_selection, + osd_id_claims_for_host) + if not cmds: + logger.debug("No data_devices, skipping DriveGroup: {}".format( + drive_group.service_id)) + return None + + logger.debug('Applying service osd.%s on host %s...' % ( + drive_group.service_id, host + )) + start_ts = datetime_now() + env_vars: List[str] = [f"CEPH_VOLUME_OSDSPEC_AFFINITY={drive_group.service_id}"] + ret_msg = await self.create_single_host( + drive_group, host, cmds, + replace_osd_ids=osd_id_claims_for_host, env_vars=env_vars + ) + self.mgr.cache.update_osdspec_last_applied( + host, drive_group.service_name(), start_ts + ) + self.mgr.cache.save_host(host) + return ret_msg + + async def all_hosts() -> List[Optional[str]]: + futures = [create_from_spec_one(h, ds) + for h, ds in self.prepare_drivegroup(drive_group)] + return await gather(*futures) + + with self.mgr.async_timeout_handler('cephadm deploy (osd daemon)'): + ret = self.mgr.wait_async(all_hosts()) + return ", ".join(filter(None, ret)) + + async def create_single_host(self, + drive_group: DriveGroupSpec, + host: str, cmds: List[str], replace_osd_ids: List[str], + env_vars: Optional[List[str]] = None) -> str: + for cmd in cmds: + out, err, code = await self._run_ceph_volume_command(host, cmd, env_vars=env_vars) + if code == 1 and ', it is already prepared' in '\n'.join(err): + # HACK: when we create against an existing LV, ceph-volume + # returns an error and the above message. To make this + # command idempotent, tolerate this "error" and continue. + logger.debug('the device was already prepared; continuing') + code = 0 + if code: + raise RuntimeError( + 'cephadm exited with an error code: %d, stderr:%s' % ( + code, '\n'.join(err))) + return await self.deploy_osd_daemons_for_existing_osds(host, drive_group.service_name(), + replace_osd_ids) + + async def deploy_osd_daemons_for_existing_osds(self, host: str, service_name: str, + replace_osd_ids: Optional[List[str]] = None) -> str: + + if replace_osd_ids is None: + replace_osd_ids = OsdIdClaims(self.mgr).filtered_by_host(host) + assert replace_osd_ids is not None + + # check result: lvm + osds_elems: dict = await CephadmServe(self.mgr)._run_cephadm_json( + host, 'osd', 'ceph-volume', + [ + '--', + 'lvm', 'list', + '--format', 'json', + ]) + before_osd_uuid_map = self.mgr.get_osd_uuid_map(only_up=True) + fsid = self.mgr._cluster_fsid + osd_uuid_map = self.mgr.get_osd_uuid_map() + created = [] + for osd_id, osds in osds_elems.items(): + for osd in osds: + if osd['type'] == 'db': + continue + if osd['tags']['ceph.cluster_fsid'] != fsid: + logger.debug('mismatched fsid, skipping %s' % osd) + continue + if osd_id in before_osd_uuid_map and osd_id not in replace_osd_ids: + # if it exists but is part of the replacement operation, don't skip + continue + if self.mgr.cache.has_daemon(f'osd.{osd_id}', host): + # cephadm daemon instance already exists + logger.debug(f'osd id {osd_id} daemon already exists') + continue + if osd_id not in osd_uuid_map: + logger.debug('osd id {} does not exist in cluster'.format(osd_id)) + continue + if osd_uuid_map.get(osd_id) != osd['tags']['ceph.osd_fsid']: + logger.debug('mismatched osd uuid (cluster has %s, osd ' + 'has %s)' % ( + osd_uuid_map.get(osd_id), + osd['tags']['ceph.osd_fsid'])) + continue + + created.append(osd_id) + daemon_spec: CephadmDaemonDeploySpec = CephadmDaemonDeploySpec( + service_name=service_name, + daemon_id=str(osd_id), + host=host, + daemon_type='osd', + ) + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + await CephadmServe(self.mgr)._create_daemon( + daemon_spec, + osd_uuid_map=osd_uuid_map) + + # check result: raw + raw_elems: dict = await CephadmServe(self.mgr)._run_cephadm_json( + host, 'osd', 'ceph-volume', + [ + '--', + 'raw', 'list', + '--format', 'json', + ]) + for osd_uuid, osd in raw_elems.items(): + if osd.get('ceph_fsid') != fsid: + continue + osd_id = str(osd.get('osd_id', '-1')) + if osd_id in before_osd_uuid_map and osd_id not in replace_osd_ids: + # if it exists but is part of the replacement operation, don't skip + continue + if self.mgr.cache.has_daemon(f'osd.{osd_id}', host): + # cephadm daemon instance already exists + logger.debug(f'osd id {osd_id} daemon already exists') + continue + if osd_id not in osd_uuid_map: + logger.debug('osd id {} does not exist in cluster'.format(osd_id)) + continue + if osd_uuid_map.get(osd_id) != osd_uuid: + logger.debug('mismatched osd uuid (cluster has %s, osd ' + 'has %s)' % (osd_uuid_map.get(osd_id), osd_uuid)) + continue + if osd_id in created: + continue + + created.append(osd_id) + daemon_spec = CephadmDaemonDeploySpec( + service_name=service_name, + daemon_id=osd_id, + host=host, + daemon_type='osd', + ) + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + await CephadmServe(self.mgr)._create_daemon( + daemon_spec, + osd_uuid_map=osd_uuid_map) + + if created: + self.mgr.cache.invalidate_host_devices(host) + self.mgr.cache.invalidate_autotune(host) + return "Created osd(s) %s on host '%s'" % (','.join(created), host) + else: + return "Created no osd(s) on host %s; already created?" % host + + def prepare_drivegroup(self, drive_group: DriveGroupSpec) -> List[Tuple[str, DriveSelection]]: + # 1) use fn_filter to determine matching_hosts + matching_hosts = drive_group.placement.filter_matching_hostspecs( + self.mgr.cache.get_schedulable_hosts()) + # 2) Map the inventory to the InventoryHost object + host_ds_map = [] + + # set osd_id_claims + + def _find_inv_for_host(hostname: str, inventory_dict: dict) -> List[Device]: + # This is stupid and needs to be loaded with the host + for _host, _inventory in inventory_dict.items(): + if _host == hostname: + return _inventory + raise OrchestratorError("No inventory found for host: {}".format(hostname)) + + # 3) iterate over matching_host and call DriveSelection + logger.debug(f"Checking matching hosts -> {matching_hosts}") + for host in matching_hosts: + inventory_for_host = _find_inv_for_host(host, self.mgr.cache.devices) + logger.debug(f"Found inventory for host {inventory_for_host}") + + # List of Daemons on that host + dd_for_spec = self.mgr.cache.get_daemons_by_service(drive_group.service_name()) + dd_for_spec_and_host = [dd for dd in dd_for_spec if dd.hostname == host] + + drive_selection = DriveSelection(drive_group, inventory_for_host, + existing_daemons=len(dd_for_spec_and_host)) + logger.debug(f"Found drive selection {drive_selection}") + if drive_group.method and drive_group.method == 'raw': + # ceph-volume can currently only handle a 1:1 mapping + # of data/db/wal devices for raw mode osds. If db/wal devices + # are defined and the number does not match the number of data + # devices, we need to bail out + if drive_selection.data_devices() and drive_selection.db_devices(): + if len(drive_selection.data_devices()) != len(drive_selection.db_devices()): + raise OrchestratorError('Raw mode only supports a 1:1 ratio of data to db devices. Found ' + f'{len(drive_selection.data_devices())} potential data device(s) and ' + f'{len(drive_selection.db_devices())} potential db device(s) on host {host}') + if drive_selection.data_devices() and drive_selection.wal_devices(): + if len(drive_selection.data_devices()) != len(drive_selection.wal_devices()): + raise OrchestratorError('Raw mode only supports a 1:1 ratio of data to wal devices. Found ' + f'{len(drive_selection.data_devices())} potential data device(s) and ' + f'{len(drive_selection.wal_devices())} potential wal device(s) on host {host}') + host_ds_map.append((host, drive_selection)) + return host_ds_map + + @staticmethod + def driveselection_to_ceph_volume(drive_selection: DriveSelection, + osd_id_claims: Optional[List[str]] = None, + preview: bool = False) -> List[str]: + logger.debug(f"Translating DriveGroup <{drive_selection.spec}> to ceph-volume command") + cmds: List[str] = translate.to_ceph_volume(drive_selection, + osd_id_claims, preview=preview).run() + logger.debug(f"Resulting ceph-volume cmds: {cmds}") + return cmds + + def get_previews(self, host: str) -> List[Dict[str, Any]]: + # Find OSDSpecs that match host. + osdspecs = self.resolve_osdspecs_for_host(host) + return self.generate_previews(osdspecs, host) + + def generate_previews(self, osdspecs: List[DriveGroupSpec], for_host: str) -> List[Dict[str, Any]]: + """ + + The return should look like this: + + [ + {'data': {<metadata>}, + 'osdspec': <name of osdspec>, + 'host': <name of host>, + 'notes': <notes> + }, + + {'data': ..., + 'osdspec': .., + 'host': ..., + 'notes': ... + } + ] + + Note: One host can have multiple previews based on its assigned OSDSpecs. + """ + self.mgr.log.debug(f"Generating OSDSpec previews for {osdspecs}") + ret_all: List[Dict[str, Any]] = [] + if not osdspecs: + return ret_all + for osdspec in osdspecs: + + # populate osd_id_claims + osd_id_claims = OsdIdClaims(self.mgr) + + # prepare driveselection + for host, ds in self.prepare_drivegroup(osdspec): + if host != for_host: + continue + + # driveselection for host + cmds: List[str] = self.driveselection_to_ceph_volume(ds, + osd_id_claims.filtered_by_host( + host), + preview=True) + if not cmds: + logger.debug("No data_devices, skipping DriveGroup: {}".format( + osdspec.service_name())) + continue + + # get preview data from ceph-volume + for cmd in cmds: + with self.mgr.async_timeout_handler(host, f'cephadm ceph-volume -- {cmd}'): + out, err, code = self.mgr.wait_async(self._run_ceph_volume_command(host, cmd)) + if out: + try: + concat_out: Dict[str, Any] = json.loads(' '.join(out)) + except ValueError: + logger.exception('Cannot decode JSON: \'%s\'' % ' '.join(out)) + concat_out = {} + notes = [] + if osdspec.data_devices is not None and osdspec.data_devices.limit and len(concat_out) < osdspec.data_devices.limit: + found = len(concat_out) + limit = osdspec.data_devices.limit + notes.append( + f'NOTE: Did not find enough disks matching filter on host {host} to reach data device limit (Found: {found} | Limit: {limit})') + ret_all.append({'data': concat_out, + 'osdspec': osdspec.service_id, + 'host': host, + 'notes': notes}) + return ret_all + + def resolve_hosts_for_osdspecs(self, + specs: Optional[List[DriveGroupSpec]] = None + ) -> List[str]: + osdspecs = [] + if specs: + osdspecs = [cast(DriveGroupSpec, spec) for spec in specs] + if not osdspecs: + self.mgr.log.debug("No OSDSpecs found") + return [] + return sum([spec.placement.filter_matching_hostspecs(self.mgr.cache.get_schedulable_hosts()) for spec in osdspecs], []) + + def resolve_osdspecs_for_host(self, host: str, + specs: Optional[List[DriveGroupSpec]] = None) -> List[DriveGroupSpec]: + matching_specs = [] + self.mgr.log.debug(f"Finding OSDSpecs for host: <{host}>") + if not specs: + specs = [cast(DriveGroupSpec, spec) for (sn, spec) in self.mgr.spec_store.spec_preview.items() + if spec.service_type == 'osd'] + for spec in specs: + if host in spec.placement.filter_matching_hostspecs(self.mgr.cache.get_schedulable_hosts()): + self.mgr.log.debug(f"Found OSDSpecs for host: <{host}> -> <{spec}>") + matching_specs.append(spec) + return matching_specs + + async def _run_ceph_volume_command(self, host: str, + cmd: str, env_vars: Optional[List[str]] = None + ) -> Tuple[List[str], List[str], int]: + self.mgr.inventory.assert_host(host) + + # get bootstrap key + ret, keyring, err = self.mgr.check_mon_command({ + 'prefix': 'auth get', + 'entity': 'client.bootstrap-osd', + }) + + j = json.dumps({ + 'config': self.mgr.get_minimal_ceph_conf(), + 'keyring': keyring, + }) + + split_cmd = cmd.split(' ') + _cmd = ['--config-json', '-', '--'] + _cmd.extend(split_cmd) + out, err, code = await CephadmServe(self.mgr)._run_cephadm( + host, 'osd', 'ceph-volume', + _cmd, + env_vars=env_vars, + stdin=j, + error_ok=True) + return out, err, code + + def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: + # Do not remove the osd.N keyring, if we failed to deploy the OSD, because + # we cannot recover from it. The OSD keys are created by ceph-volume and not by + # us. + if not is_failed_deploy: + super().post_remove(daemon, is_failed_deploy=is_failed_deploy) + + +class OsdIdClaims(object): + """ + Retrieve and provide osd ids that can be reused in the cluster + """ + + def __init__(self, mgr: "CephadmOrchestrator") -> None: + self.mgr: "CephadmOrchestrator" = mgr + self.osd_host_map: Dict[str, List[str]] = dict() + self.refresh() + + def refresh(self) -> None: + try: + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'osd tree', + 'states': ['destroyed'], + 'format': 'json' + }) + except MonCommandFailed as e: + logger.exception('osd tree failed') + raise OrchestratorError(str(e)) + try: + tree = json.loads(out) + except ValueError: + logger.exception(f'Cannot decode JSON: \'{out}\'') + return + + nodes = tree.get('nodes', {}) + for node in nodes: + if node.get('type') == 'host': + self.osd_host_map.update( + {node.get('name'): [str(_id) for _id in node.get('children', list())]} + ) + if self.osd_host_map: + self.mgr.log.info(f"Found osd claims -> {self.osd_host_map}") + + def get(self) -> Dict[str, List[str]]: + return self.osd_host_map + + def filtered_by_host(self, host: str) -> List[str]: + """ + Return the list of osd ids that can be reused in a host + + OSD id claims in CRUSH map are linked to the bare name of + the hostname. In case of FQDN hostnames the host is searched by the + bare name + """ + return self.osd_host_map.get(host.split(".")[0], []) + + +class RemoveUtil(object): + def __init__(self, mgr: "CephadmOrchestrator") -> None: + self.mgr: "CephadmOrchestrator" = mgr + + def get_osds_in_cluster(self) -> List[str]: + osd_map = self.mgr.get_osdmap() + return [str(x.get('osd')) for x in osd_map.dump().get('osds', [])] + + def osd_df(self) -> dict: + base_cmd = 'osd df' + ret, out, err = self.mgr.mon_command({ + 'prefix': base_cmd, + 'format': 'json' + }) + try: + return json.loads(out) + except ValueError: + logger.exception(f'Cannot decode JSON: \'{out}\'') + return {} + + def get_pg_count(self, osd_id: int, osd_df: Optional[dict] = None) -> int: + if not osd_df: + osd_df = self.osd_df() + osd_nodes = osd_df.get('nodes', []) + for osd_node in osd_nodes: + if osd_node.get('id') == int(osd_id): + return osd_node.get('pgs', -1) + return -1 + + def find_osd_stop_threshold(self, osds: List["OSD"]) -> Optional[List["OSD"]]: + """ + Cut osd_id list in half until it's ok-to-stop + + :param osds: list of osd_ids + :return: list of ods_ids that can be stopped at once + """ + if not osds: + return [] + while not self.ok_to_stop(osds): + if len(osds) <= 1: + # can't even stop one OSD, aborting + self.mgr.log.debug( + "Can't even stop one OSD. Cluster is probably busy. Retrying later..") + return [] + + # This potentially prolongs the global wait time. + self.mgr.event.wait(1) + # splitting osd_ids in half until ok_to_stop yields success + # maybe popping ids off one by one is better here..depends on the cluster size I guess.. + # There's a lot of room for micro adjustments here + osds = osds[len(osds) // 2:] + return osds + + # todo start draining + # return all([osd.start_draining() for osd in osds]) + + def ok_to_stop(self, osds: List["OSD"]) -> bool: + cmd_args = { + 'prefix': "osd ok-to-stop", + 'ids': [str(osd.osd_id) for osd in osds] + } + return self._run_mon_cmd(cmd_args, error_ok=True) + + def set_osd_flag(self, osds: List["OSD"], flag: str) -> bool: + base_cmd = f"osd {flag}" + self.mgr.log.debug(f"running cmd: {base_cmd} on ids {osds}") + ret, out, err = self.mgr.mon_command({ + 'prefix': base_cmd, + 'ids': [str(osd.osd_id) for osd in osds] + }) + if ret != 0: + self.mgr.log.error(f"Could not set {flag} flag for {osds}. <{err}>") + return False + self.mgr.log.info(f"{','.join([str(o) for o in osds])} now {flag}") + return True + + def get_weight(self, osd: "OSD") -> Optional[float]: + ret, out, err = self.mgr.mon_command({ + 'prefix': 'osd crush tree', + 'format': 'json', + }) + if ret != 0: + self.mgr.log.error(f"Could not dump crush weights. <{err}>") + return None + j = json.loads(out) + for n in j.get("nodes", []): + if n.get("name") == f"osd.{osd.osd_id}": + self.mgr.log.info(f"{osd} crush weight is {n.get('crush_weight')}") + return n.get("crush_weight") + return None + + def reweight_osd(self, osd: "OSD", weight: float) -> bool: + self.mgr.log.debug(f"running cmd: osd crush reweight on {osd}") + ret, out, err = self.mgr.mon_command({ + 'prefix': "osd crush reweight", + 'name': f"osd.{osd.osd_id}", + 'weight': weight, + }) + if ret != 0: + self.mgr.log.error(f"Could not reweight {osd} to {weight}. <{err}>") + return False + self.mgr.log.info(f"{osd} weight is now {weight}") + return True + + def zap_osd(self, osd: "OSD") -> str: + "Zaps all devices that are associated with an OSD" + if osd.hostname is not None: + cmd = ['--', 'lvm', 'zap', '--osd-id', str(osd.osd_id)] + if not osd.no_destroy: + cmd.append('--destroy') + with self.mgr.async_timeout_handler(osd.hostname, f'cephadm ceph-volume {" ".join(cmd)}'): + out, err, code = self.mgr.wait_async(CephadmServe(self.mgr)._run_cephadm( + osd.hostname, 'osd', 'ceph-volume', + cmd, + error_ok=True)) + self.mgr.cache.invalidate_host_devices(osd.hostname) + if code: + raise OrchestratorError('Zap failed: %s' % '\n'.join(out + err)) + return '\n'.join(out + err) + raise OrchestratorError(f"Failed to zap OSD {osd.osd_id} because host was unknown") + + def safe_to_destroy(self, osd_ids: List[int]) -> bool: + """ Queries the safe-to-destroy flag for OSDs """ + cmd_args = {'prefix': 'osd safe-to-destroy', + 'ids': [str(x) for x in osd_ids]} + return self._run_mon_cmd(cmd_args, error_ok=True) + + def destroy_osd(self, osd_id: int) -> bool: + """ Destroys an OSD (forcefully) """ + cmd_args = {'prefix': 'osd destroy-actual', + 'id': int(osd_id), + 'yes_i_really_mean_it': True} + return self._run_mon_cmd(cmd_args) + + def purge_osd(self, osd_id: int) -> bool: + """ Purges an OSD from the cluster (forcefully) """ + cmd_args = { + 'prefix': 'osd purge-actual', + 'id': int(osd_id), + 'yes_i_really_mean_it': True + } + return self._run_mon_cmd(cmd_args) + + def _run_mon_cmd(self, cmd_args: dict, error_ok: bool = False) -> bool: + """ + Generic command to run mon_command and evaluate/log the results + """ + ret, out, err = self.mgr.mon_command(cmd_args) + if ret != 0: + self.mgr.log.debug(f"ran {cmd_args} with mon_command") + if not error_ok: + self.mgr.log.error( + f"cmd: {cmd_args.get('prefix')} failed with: {err}. (errno:{ret})") + return False + self.mgr.log.debug(f"cmd: {cmd_args.get('prefix')} returns: {out}") + return True + + +class NotFoundError(Exception): + pass + + +class OSD: + + def __init__(self, + osd_id: int, + remove_util: RemoveUtil, + drain_started_at: Optional[datetime] = None, + process_started_at: Optional[datetime] = None, + drain_stopped_at: Optional[datetime] = None, + drain_done_at: Optional[datetime] = None, + draining: bool = False, + started: bool = False, + stopped: bool = False, + replace: bool = False, + force: bool = False, + hostname: Optional[str] = None, + zap: bool = False, + no_destroy: bool = False): + # the ID of the OSD + self.osd_id = osd_id + + # when did process (not the actual draining) start + self.process_started_at = process_started_at + + # when did the drain start + self.drain_started_at = drain_started_at + + # when did the drain stop + self.drain_stopped_at = drain_stopped_at + + # when did the drain finish + self.drain_done_at = drain_done_at + + # did the draining start + self.draining = draining + + # was the operation started + self.started = started + + # was the operation stopped + self.stopped = stopped + + # If this is a replace or remove operation + self.replace = replace + # If we wait for the osd to be drained + self.force = force + # The name of the node + self.hostname = hostname + + # mgr obj to make mgr/mon calls + self.rm_util: RemoveUtil = remove_util + + self.original_weight: Optional[float] = None + + # Whether devices associated with the OSD should be zapped (DATA ERASED) + self.zap = zap + # Whether all associated LV devices should be destroyed. + self.no_destroy = no_destroy + + def start(self) -> None: + if self.started: + logger.debug(f"Already started draining {self}") + return None + self.started = True + self.stopped = False + + def start_draining(self) -> bool: + if self.stopped: + logger.debug(f"Won't start draining {self}. OSD draining is stopped.") + return False + if self.replace: + self.rm_util.set_osd_flag([self], 'out') + else: + self.original_weight = self.rm_util.get_weight(self) + self.rm_util.reweight_osd(self, 0.0) + self.drain_started_at = datetime.utcnow() + self.draining = True + logger.debug(f"Started draining {self}.") + return True + + def stop_draining(self) -> bool: + if self.replace: + self.rm_util.set_osd_flag([self], 'in') + else: + if self.original_weight: + self.rm_util.reweight_osd(self, self.original_weight) + self.drain_stopped_at = datetime.utcnow() + self.draining = False + logger.debug(f"Stopped draining {self}.") + return True + + def stop(self) -> None: + if self.stopped: + logger.debug(f"Already stopped draining {self}") + return None + self.started = False + self.stopped = True + self.stop_draining() + + @property + def is_draining(self) -> bool: + """ + Consider an OSD draining when it is + actively draining but not yet empty + """ + return self.draining and not self.is_empty + + @property + def is_ok_to_stop(self) -> bool: + return self.rm_util.ok_to_stop([self]) + + @property + def is_empty(self) -> bool: + if self.get_pg_count() == 0: + if not self.drain_done_at: + self.drain_done_at = datetime.utcnow() + self.draining = False + return True + return False + + def safe_to_destroy(self) -> bool: + return self.rm_util.safe_to_destroy([self.osd_id]) + + def down(self) -> bool: + return self.rm_util.set_osd_flag([self], 'down') + + def destroy(self) -> bool: + return self.rm_util.destroy_osd(self.osd_id) + + def do_zap(self) -> str: + return self.rm_util.zap_osd(self) + + def purge(self) -> bool: + return self.rm_util.purge_osd(self.osd_id) + + def get_pg_count(self) -> int: + return self.rm_util.get_pg_count(self.osd_id) + + @property + def exists(self) -> bool: + return str(self.osd_id) in self.rm_util.get_osds_in_cluster() + + def drain_status_human(self) -> str: + default_status = 'not started' + status = 'started' if self.started and not self.draining else default_status + status = 'draining' if self.draining else status + status = 'done, waiting for purge' if self.drain_done_at and not self.draining else status + return status + + def pg_count_str(self) -> str: + return 'n/a' if self.get_pg_count() < 0 else str(self.get_pg_count()) + + def to_json(self) -> dict: + out: Dict[str, Any] = dict() + out['osd_id'] = self.osd_id + out['started'] = self.started + out['draining'] = self.draining + out['stopped'] = self.stopped + out['replace'] = self.replace + out['force'] = self.force + out['zap'] = self.zap + out['hostname'] = self.hostname # type: ignore + + for k in ['drain_started_at', 'drain_stopped_at', 'drain_done_at', 'process_started_at']: + if getattr(self, k): + out[k] = datetime_to_str(getattr(self, k)) + else: + out[k] = getattr(self, k) + return out + + @classmethod + def from_json(cls, inp: Optional[Dict[str, Any]], rm_util: RemoveUtil) -> Optional["OSD"]: + if not inp: + return None + for date_field in ['drain_started_at', 'drain_stopped_at', 'drain_done_at', 'process_started_at']: + if inp.get(date_field): + inp.update({date_field: str_to_datetime(inp.get(date_field, ''))}) + inp.update({'remove_util': rm_util}) + if 'nodename' in inp: + hostname = inp.pop('nodename') + inp['hostname'] = hostname + return cls(**inp) + + def __hash__(self) -> int: + return hash(self.osd_id) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, OSD): + return NotImplemented + return self.osd_id == other.osd_id + + def __repr__(self) -> str: + return f"osd.{self.osd_id}{' (draining)' if self.draining else ''}" + + +class OSDRemovalQueue(object): + + def __init__(self, mgr: "CephadmOrchestrator") -> None: + self.mgr: "CephadmOrchestrator" = mgr + self.osds: Set[OSD] = set() + self.rm_util = RemoveUtil(mgr) + + # locks multithreaded access to self.osds. Please avoid locking + # network calls, like mon commands. + self.lock = Lock() + + def process_removal_queue(self) -> None: + """ + Performs actions in the _serve() loop to remove an OSD + when criteria is met. + + we can't hold self.lock, as we're calling _remove_daemon in the loop + """ + + # make sure that we don't run on OSDs that are not in the cluster anymore. + self.cleanup() + + # find osds that are ok-to-stop and not yet draining + ready_to_drain_osds = self._ready_to_drain_osds() + if ready_to_drain_osds: + # start draining those + _ = [osd.start_draining() for osd in ready_to_drain_osds] + + all_osds = self.all_osds() + + logger.debug( + f"{self.queue_size()} OSDs are scheduled " + f"for removal: {all_osds}") + + # Check all osds for their state and take action (remove, purge etc) + new_queue: Set[OSD] = set() + for osd in all_osds: # type: OSD + if not osd.force: + # skip criteria + if not osd.is_empty: + logger.debug(f"{osd} is not empty yet. Waiting a bit more") + new_queue.add(osd) + continue + + if not osd.safe_to_destroy(): + logger.debug( + f"{osd} is not safe-to-destroy yet. Waiting a bit more") + new_queue.add(osd) + continue + + # abort criteria + if not osd.down(): + # also remove it from the remove_osd list and set a health_check warning? + raise orchestrator.OrchestratorError( + f"Could not mark {osd} down") + + # stop and remove daemon + assert osd.hostname is not None + + if self.mgr.cache.has_daemon(f'osd.{osd.osd_id}'): + CephadmServe(self.mgr)._remove_daemon(f'osd.{osd.osd_id}', osd.hostname) + logger.info(f"Successfully removed {osd} on {osd.hostname}") + else: + logger.info(f"Daemon {osd} on {osd.hostname} was already removed") + + if osd.replace: + # mark destroyed in osdmap + if not osd.destroy(): + raise orchestrator.OrchestratorError( + f"Could not destroy {osd}") + logger.info( + f"Successfully destroyed old {osd} on {osd.hostname}; ready for replacement") + else: + # purge from osdmap + if not osd.purge(): + raise orchestrator.OrchestratorError(f"Could not purge {osd}") + logger.info(f"Successfully purged {osd} on {osd.hostname}") + + if osd.zap: + # throws an exception if the zap fails + logger.info(f"Zapping devices for {osd} on {osd.hostname}") + osd.do_zap() + logger.info(f"Successfully zapped devices for {osd} on {osd.hostname}") + + logger.debug(f"Removing {osd} from the queue.") + + # self could change while this is processing (osds get added from the CLI) + # The new set is: 'an intersection of all osds that are still not empty/removed (new_queue) and + # osds that were added while this method was executed' + with self.lock: + self.osds.intersection_update(new_queue) + self._save_to_store() + + def cleanup(self) -> None: + # OSDs can always be cleaned up manually. This ensures that we run on existing OSDs + with self.lock: + for osd in self._not_in_cluster(): + self.osds.remove(osd) + + def _ready_to_drain_osds(self) -> List["OSD"]: + """ + Returns OSDs that are ok to stop and not yet draining. Only returns as many OSDs as can + be accommodated by the 'max_osd_draining_count' config value, considering the number of OSDs + that are already draining. + """ + draining_limit = max(1, self.mgr.max_osd_draining_count) + num_already_draining = len(self.draining_osds()) + num_to_start_draining = max(0, draining_limit - num_already_draining) + stoppable_osds = self.rm_util.find_osd_stop_threshold(self.idling_osds()) + return [] if stoppable_osds is None else stoppable_osds[:num_to_start_draining] + + def _save_to_store(self) -> None: + osd_queue = [osd.to_json() for osd in self.osds] + logger.debug(f"Saving {osd_queue} to store") + self.mgr.set_store('osd_remove_queue', json.dumps(osd_queue)) + + def load_from_store(self) -> None: + with self.lock: + for k, v in self.mgr.get_store_prefix('osd_remove_queue').items(): + for osd in json.loads(v): + logger.debug(f"Loading osd ->{osd} from store") + osd_obj = OSD.from_json(osd, rm_util=self.rm_util) + if osd_obj is not None: + self.osds.add(osd_obj) + + def as_osd_ids(self) -> List[int]: + with self.lock: + return [osd.osd_id for osd in self.osds] + + def queue_size(self) -> int: + with self.lock: + return len(self.osds) + + def draining_osds(self) -> List["OSD"]: + with self.lock: + return [osd for osd in self.osds if osd.is_draining] + + def idling_osds(self) -> List["OSD"]: + with self.lock: + return [osd for osd in self.osds if not osd.is_draining and not osd.is_empty] + + def empty_osds(self) -> List["OSD"]: + with self.lock: + return [osd for osd in self.osds if osd.is_empty] + + def all_osds(self) -> List["OSD"]: + with self.lock: + return [osd for osd in self.osds] + + def _not_in_cluster(self) -> List["OSD"]: + return [osd for osd in self.osds if not osd.exists] + + def enqueue(self, osd: "OSD") -> None: + if not osd.exists: + raise NotFoundError() + with self.lock: + self.osds.add(osd) + osd.start() + + def rm(self, osd: "OSD") -> None: + if not osd.exists: + raise NotFoundError() + osd.stop() + with self.lock: + try: + logger.debug(f'Removing {osd} from the queue.') + self.osds.remove(osd) + except KeyError: + logger.debug(f"Could not find {osd} in queue.") + raise KeyError + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, OSDRemovalQueue): + return False + with self.lock: + return self.osds == other.osds diff --git a/src/pybind/mgr/cephadm/ssh.py b/src/pybind/mgr/cephadm/ssh.py new file mode 100644 index 000000000..d17cc0fcc --- /dev/null +++ b/src/pybind/mgr/cephadm/ssh.py @@ -0,0 +1,369 @@ +import logging +import os +import asyncio +from tempfile import NamedTemporaryFile +from threading import Thread +from contextlib import contextmanager +from io import StringIO +from shlex import quote +from typing import TYPE_CHECKING, Optional, List, Tuple, Dict, Iterator, TypeVar, Awaitable, Union +from orchestrator import OrchestratorError + +try: + import asyncssh +except ImportError: + asyncssh = None # type: ignore + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + from asyncssh.connection import SSHClientConnection + +T = TypeVar('T') + + +logger = logging.getLogger(__name__) + +asyncssh_logger = logging.getLogger('asyncssh') +asyncssh_logger.propagate = False + + +class HostConnectionError(OrchestratorError): + def __init__(self, message: str, hostname: str, addr: str) -> None: + super().__init__(message) + self.hostname = hostname + self.addr = addr + + +DEFAULT_SSH_CONFIG = """ +Host * + User root + StrictHostKeyChecking no + UserKnownHostsFile /dev/null + ConnectTimeout=30 +""" + + +class EventLoopThread(Thread): + + def __init__(self) -> None: + self._loop = asyncio.new_event_loop() + asyncio.set_event_loop(self._loop) + + super().__init__(target=self._loop.run_forever) + self.start() + + def get_result(self, coro: Awaitable[T], timeout: Optional[int] = None) -> T: + # useful to note: This "run_coroutine_threadsafe" returns a + # concurrent.futures.Future, rather than an asyncio.Future. They are + # fairly similar but have a few differences, notably in our case + # that the result function of a concurrent.futures.Future accepts + # a timeout argument + future = asyncio.run_coroutine_threadsafe(coro, self._loop) + try: + return future.result(timeout) + except asyncio.TimeoutError: + # try to cancel the task before raising the exception further up + future.cancel() + raise + + +class SSHManager: + + def __init__(self, mgr: "CephadmOrchestrator"): + self.mgr: "CephadmOrchestrator" = mgr + self.cons: Dict[str, "SSHClientConnection"] = {} + + async def _remote_connection(self, + host: str, + addr: Optional[str] = None, + ) -> "SSHClientConnection": + if not self.cons.get(host) or host not in self.mgr.inventory: + if not addr and host in self.mgr.inventory: + addr = self.mgr.inventory.get_addr(host) + + if not addr: + raise OrchestratorError("host address is empty") + + assert self.mgr.ssh_user + n = self.mgr.ssh_user + '@' + addr + logger.debug("Opening connection to {} with ssh options '{}'".format( + n, self.mgr._ssh_options)) + + asyncssh.set_log_level('DEBUG') + asyncssh.set_debug_level(3) + + with self.redirect_log(host, addr): + try: + ssh_options = asyncssh.SSHClientConnectionOptions( + keepalive_interval=7, keepalive_count_max=3) + conn = await asyncssh.connect(addr, username=self.mgr.ssh_user, client_keys=[self.mgr.tkey.name], + known_hosts=None, config=[self.mgr.ssh_config_fname], + preferred_auth=['publickey'], options=ssh_options) + except OSError: + raise + except asyncssh.Error: + raise + except Exception: + raise + self.cons[host] = conn + + self.mgr.offline_hosts_remove(host) + + return self.cons[host] + + @contextmanager + def redirect_log(self, host: str, addr: str) -> Iterator[None]: + log_string = StringIO() + ch = logging.StreamHandler(log_string) + ch.setLevel(logging.INFO) + asyncssh_logger.addHandler(ch) + + try: + yield + except OSError as e: + self.mgr.offline_hosts.add(host) + log_content = log_string.getvalue() + msg = f"Can't communicate with remote host `{addr}`, possibly because the host is not reachable or python3 is not installed on the host. {str(e)}" + logger.exception(msg) + raise HostConnectionError(msg, host, addr) + except asyncssh.Error as e: + self.mgr.offline_hosts.add(host) + log_content = log_string.getvalue() + msg = f'Failed to connect to {host} ({addr}). {str(e)}' + '\n' + f'Log: {log_content}' + logger.debug(msg) + raise HostConnectionError(msg, host, addr) + except Exception as e: + self.mgr.offline_hosts.add(host) + log_content = log_string.getvalue() + logger.exception(str(e)) + raise HostConnectionError( + f'Failed to connect to {host} ({addr}): {repr(e)}' + '\n' f'Log: {log_content}', host, addr) + finally: + log_string.flush() + asyncssh_logger.removeHandler(ch) + + def remote_connection(self, + host: str, + addr: Optional[str] = None, + ) -> "SSHClientConnection": + with self.mgr.async_timeout_handler(host, f'ssh {host} (addr {addr})'): + return self.mgr.wait_async(self._remote_connection(host, addr)) + + async def _execute_command(self, + host: str, + cmd_components: List[str], + stdin: Optional[str] = None, + addr: Optional[str] = None, + log_command: Optional[bool] = True, + ) -> Tuple[str, str, int]: + + conn = await self._remote_connection(host, addr) + sudo_prefix = "sudo " if self.mgr.ssh_user != 'root' else "" + cmd = sudo_prefix + " ".join(quote(x) for x in cmd_components) + try: + address = addr or self.mgr.inventory.get_addr(host) + except Exception: + address = host + if log_command: + logger.debug(f'Running command: {cmd}') + try: + r = await conn.run(f'{sudo_prefix}true', check=True, timeout=5) # host quick check + r = await conn.run(cmd, input=stdin) + # handle these Exceptions otherwise you might get a weird error like + # TypeError: __init__() missing 1 required positional argument: 'reason' (due to the asyncssh error interacting with raise_if_exception) + except asyncssh.ChannelOpenError as e: + # SSH connection closed or broken, will create new connection next call + logger.debug(f'Connection to {host} failed. {str(e)}') + await self._reset_con(host) + self.mgr.offline_hosts.add(host) + raise HostConnectionError(f'Unable to reach remote host {host}. {str(e)}', host, address) + except asyncssh.ProcessError as e: + msg = f"Cannot execute the command '{cmd}' on the {host}. {str(e.stderr)}." + logger.debug(msg) + await self._reset_con(host) + self.mgr.offline_hosts.add(host) + raise HostConnectionError(msg, host, address) + except Exception as e: + msg = f"Generic error while executing command '{cmd}' on the host {host}. {str(e)}." + logger.debug(msg) + await self._reset_con(host) + self.mgr.offline_hosts.add(host) + raise HostConnectionError(msg, host, address) + + def _rstrip(v: Union[bytes, str, None]) -> str: + if not v: + return '' + if isinstance(v, str): + return v.rstrip('\n') + if isinstance(v, bytes): + return v.decode().rstrip('\n') + raise OrchestratorError( + f'Unable to parse ssh output with type {type(v)} from remote host {host}') + + out = _rstrip(r.stdout) + err = _rstrip(r.stderr) + rc = r.returncode if r.returncode else 0 + + return out, err, rc + + def execute_command(self, + host: str, + cmd: List[str], + stdin: Optional[str] = None, + addr: Optional[str] = None, + log_command: Optional[bool] = True + ) -> Tuple[str, str, int]: + with self.mgr.async_timeout_handler(host, " ".join(cmd)): + return self.mgr.wait_async(self._execute_command(host, cmd, stdin, addr, log_command)) + + async def _check_execute_command(self, + host: str, + cmd: List[str], + stdin: Optional[str] = None, + addr: Optional[str] = None, + log_command: Optional[bool] = True + ) -> str: + out, err, code = await self._execute_command(host, cmd, stdin, addr, log_command) + if code != 0: + msg = f'Command {cmd} failed. {err}' + logger.debug(msg) + raise OrchestratorError(msg) + return out + + def check_execute_command(self, + host: str, + cmd: List[str], + stdin: Optional[str] = None, + addr: Optional[str] = None, + log_command: Optional[bool] = True, + ) -> str: + with self.mgr.async_timeout_handler(host, " ".join(cmd)): + return self.mgr.wait_async(self._check_execute_command(host, cmd, stdin, addr, log_command)) + + async def _write_remote_file(self, + host: str, + path: str, + content: bytes, + mode: Optional[int] = None, + uid: Optional[int] = None, + gid: Optional[int] = None, + addr: Optional[str] = None, + ) -> None: + try: + cephadm_tmp_dir = f"/tmp/cephadm-{self.mgr._cluster_fsid}" + dirname = os.path.dirname(path) + await self._check_execute_command(host, ['mkdir', '-p', dirname], addr=addr) + await self._check_execute_command(host, ['mkdir', '-p', cephadm_tmp_dir + dirname], addr=addr) + tmp_path = cephadm_tmp_dir + path + '.new' + await self._check_execute_command(host, ['touch', tmp_path], addr=addr) + if self.mgr.ssh_user != 'root': + assert self.mgr.ssh_user + await self._check_execute_command(host, ['chown', '-R', self.mgr.ssh_user, cephadm_tmp_dir], addr=addr) + await self._check_execute_command(host, ['chmod', str(644), tmp_path], addr=addr) + with NamedTemporaryFile(prefix='cephadm-write-remote-file-') as f: + os.fchmod(f.fileno(), 0o600) + f.write(content) + f.flush() + conn = await self._remote_connection(host, addr) + async with conn.start_sftp_client() as sftp: + await sftp.put(f.name, tmp_path) + if uid is not None and gid is not None and mode is not None: + # shlex quote takes str or byte object, not int + await self._check_execute_command(host, ['chown', '-R', str(uid) + ':' + str(gid), tmp_path], addr=addr) + await self._check_execute_command(host, ['chmod', oct(mode)[2:], tmp_path], addr=addr) + await self._check_execute_command(host, ['mv', tmp_path, path], addr=addr) + except Exception as e: + msg = f"Unable to write {host}:{path}: {e}" + logger.exception(msg) + raise OrchestratorError(msg) + + def write_remote_file(self, + host: str, + path: str, + content: bytes, + mode: Optional[int] = None, + uid: Optional[int] = None, + gid: Optional[int] = None, + addr: Optional[str] = None, + ) -> None: + with self.mgr.async_timeout_handler(host, f'writing file {path}'): + self.mgr.wait_async(self._write_remote_file( + host, path, content, mode, uid, gid, addr)) + + async def _reset_con(self, host: str) -> None: + conn = self.cons.get(host) + if conn: + logger.debug(f'_reset_con close {host}') + conn.close() + del self.cons[host] + + def reset_con(self, host: str) -> None: + with self.mgr.async_timeout_handler(cmd=f'resetting ssh connection to {host}'): + self.mgr.wait_async(self._reset_con(host)) + + def _reset_cons(self) -> None: + for host, conn in self.cons.items(): + logger.debug(f'_reset_cons close {host}') + conn.close() + self.cons = {} + + def _reconfig_ssh(self) -> None: + temp_files = [] # type: list + ssh_options = [] # type: List[str] + + # ssh_config + self.mgr.ssh_config_fname = self.mgr.ssh_config_file + ssh_config = self.mgr.get_store("ssh_config") + if ssh_config is not None or self.mgr.ssh_config_fname is None: + if not ssh_config: + ssh_config = DEFAULT_SSH_CONFIG + f = NamedTemporaryFile(prefix='cephadm-conf-') + os.fchmod(f.fileno(), 0o600) + f.write(ssh_config.encode('utf-8')) + f.flush() # make visible to other processes + temp_files += [f] + self.mgr.ssh_config_fname = f.name + if self.mgr.ssh_config_fname: + self.mgr.validate_ssh_config_fname(self.mgr.ssh_config_fname) + ssh_options += ['-F', self.mgr.ssh_config_fname] + self.mgr.ssh_config = ssh_config + + # identity + ssh_key = self.mgr.get_store("ssh_identity_key") + ssh_pub = self.mgr.get_store("ssh_identity_pub") + ssh_cert = self.mgr.get_store("ssh_identity_cert") + self.mgr.ssh_pub = ssh_pub + self.mgr.ssh_key = ssh_key + self.mgr.ssh_cert = ssh_cert + if ssh_key: + self.mgr.tkey = NamedTemporaryFile(prefix='cephadm-identity-') + self.mgr.tkey.write(ssh_key.encode('utf-8')) + os.fchmod(self.mgr.tkey.fileno(), 0o600) + self.mgr.tkey.flush() # make visible to other processes + temp_files += [self.mgr.tkey] + if ssh_pub: + tpub = open(self.mgr.tkey.name + '.pub', 'w') + os.fchmod(tpub.fileno(), 0o600) + tpub.write(ssh_pub) + tpub.flush() # make visible to other processes + temp_files += [tpub] + if ssh_cert: + tcert = open(self.mgr.tkey.name + '-cert.pub', 'w') + os.fchmod(tcert.fileno(), 0o600) + tcert.write(ssh_cert) + tcert.flush() # make visible to other processes + temp_files += [tcert] + ssh_options += ['-i', self.mgr.tkey.name] + + self.mgr._temp_files = temp_files + if ssh_options: + self.mgr._ssh_options = ' '.join(ssh_options) + else: + self.mgr._ssh_options = None + + if self.mgr.mode == 'root': + self.mgr.ssh_user = self.mgr.get_store('ssh_user', default='root') + elif self.mgr.mode == 'cephadm-package': + self.mgr.ssh_user = 'cephadm' + + self._reset_cons() diff --git a/src/pybind/mgr/cephadm/ssl_cert_utils.py b/src/pybind/mgr/cephadm/ssl_cert_utils.py new file mode 100644 index 000000000..fcc6f00ea --- /dev/null +++ b/src/pybind/mgr/cephadm/ssl_cert_utils.py @@ -0,0 +1,156 @@ + +from typing import Any, Tuple, IO +import ipaddress +import tempfile +import logging + +from datetime import datetime, timedelta +from cryptography import x509 +from cryptography.x509.oid import NameOID +from cryptography.hazmat.primitives.asymmetric import rsa +from cryptography.hazmat.primitives import hashes, serialization +from cryptography.hazmat.backends import default_backend +from mgr_util import verify_tls_files + +from orchestrator import OrchestratorError + + +logger = logging.getLogger(__name__) + + +class SSLConfigException(Exception): + pass + + +class SSLCerts: + def __init__(self) -> None: + self.root_cert: Any + self.root_key: Any + self.key_file: IO[bytes] + self.cert_file: IO[bytes] + + def generate_root_cert(self, addr: str) -> Tuple[str, str]: + self.root_key = rsa.generate_private_key( + public_exponent=65537, key_size=4096, backend=default_backend()) + root_public_key = self.root_key.public_key() + root_builder = x509.CertificateBuilder() + root_builder = root_builder.subject_name(x509.Name([ + x509.NameAttribute(NameOID.COMMON_NAME, u'cephadm-root'), + ])) + root_builder = root_builder.issuer_name(x509.Name([ + x509.NameAttribute(NameOID.COMMON_NAME, u'cephadm-root'), + ])) + root_builder = root_builder.not_valid_before(datetime.now()) + root_builder = root_builder.not_valid_after(datetime.now() + timedelta(days=(365 * 10 + 3))) + root_builder = root_builder.serial_number(x509.random_serial_number()) + root_builder = root_builder.public_key(root_public_key) + root_builder = root_builder.add_extension( + x509.SubjectAlternativeName( + [x509.IPAddress(ipaddress.IPv4Address(addr))] + ), + critical=False + ) + root_builder = root_builder.add_extension( + x509.BasicConstraints(ca=True, path_length=None), critical=True, + ) + + self.root_cert = root_builder.sign( + private_key=self.root_key, algorithm=hashes.SHA256(), backend=default_backend() + ) + + cert_str = self.root_cert.public_bytes(encoding=serialization.Encoding.PEM).decode('utf-8') + key_str = self.root_key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.TraditionalOpenSSL, + encryption_algorithm=serialization.NoEncryption() + ).decode('utf-8') + + return (cert_str, key_str) + + def generate_cert(self, host: str, addr: str) -> Tuple[str, str]: + have_ip = True + try: + ip = x509.IPAddress(ipaddress.IPv4Address(addr)) + except Exception: + try: + ip = x509.IPAddress(ipaddress.IPv6Address(addr)) + except Exception: + have_ip = False + + private_key = rsa.generate_private_key( + public_exponent=65537, key_size=4096, backend=default_backend()) + public_key = private_key.public_key() + + builder = x509.CertificateBuilder() + builder = builder.subject_name(x509.Name([x509.NameAttribute(NameOID.COMMON_NAME, addr), ])) + builder = builder.issuer_name( + x509.Name([x509.NameAttribute(NameOID.COMMON_NAME, u'cephadm-root'), ])) + builder = builder.not_valid_before(datetime.now()) + builder = builder.not_valid_after(datetime.now() + timedelta(days=(365 * 10 + 3))) + builder = builder.serial_number(x509.random_serial_number()) + builder = builder.public_key(public_key) + if have_ip: + builder = builder.add_extension( + x509.SubjectAlternativeName( + [ip, x509.DNSName(host)] + ), + critical=False + ) + else: + builder = builder.add_extension( + x509.SubjectAlternativeName( + [x509.DNSName(host)] + ), + critical=False + ) + builder = builder.add_extension(x509.BasicConstraints( + ca=False, path_length=None), critical=True,) + + cert = builder.sign(private_key=self.root_key, + algorithm=hashes.SHA256(), backend=default_backend()) + cert_str = cert.public_bytes(encoding=serialization.Encoding.PEM).decode('utf-8') + key_str = private_key.private_bytes(encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.TraditionalOpenSSL, + encryption_algorithm=serialization.NoEncryption() + ).decode('utf-8') + + return (cert_str, key_str) + + def generate_cert_files(self, host: str, addr: str) -> Tuple[str, str]: + cert, key = self.generate_cert(host, addr) + + self.cert_file = tempfile.NamedTemporaryFile() + self.cert_file.write(cert.encode('utf-8')) + self.cert_file.flush() # cert_tmp must not be gc'ed + + self.key_file = tempfile.NamedTemporaryFile() + self.key_file.write(key.encode('utf-8')) + self.key_file.flush() # pkey_tmp must not be gc'ed + + verify_tls_files(self.cert_file.name, self.key_file.name) + return self.cert_file.name, self.key_file.name + + def get_root_cert(self) -> str: + try: + return self.root_cert.public_bytes(encoding=serialization.Encoding.PEM).decode('utf-8') + except AttributeError: + return '' + + def get_root_key(self) -> str: + try: + return self.root_key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.TraditionalOpenSSL, + encryption_algorithm=serialization.NoEncryption(), + ).decode('utf-8') + except AttributeError: + return '' + + def load_root_credentials(self, cert: str, priv_key: str) -> None: + given_cert = x509.load_pem_x509_certificate(cert.encode('utf-8'), backend=default_backend()) + tz = given_cert.not_valid_after.tzinfo + if datetime.now(tz) >= given_cert.not_valid_after: + raise OrchestratorError('Given cert is expired') + self.root_cert = given_cert + self.root_key = serialization.load_pem_private_key( + data=priv_key.encode('utf-8'), backend=default_backend(), password=None) diff --git a/src/pybind/mgr/cephadm/template.py b/src/pybind/mgr/cephadm/template.py new file mode 100644 index 000000000..0d62e587c --- /dev/null +++ b/src/pybind/mgr/cephadm/template.py @@ -0,0 +1,109 @@ +import copy +from typing import Optional, TYPE_CHECKING + +from jinja2 import Environment, PackageLoader, select_autoescape, StrictUndefined +from jinja2 import exceptions as j2_exceptions + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + + +class TemplateError(Exception): + pass + + +class UndefinedError(TemplateError): + pass + + +class TemplateNotFoundError(TemplateError): + pass + + +class TemplateEngine: + def render(self, name: str, context: Optional[dict] = None) -> str: + raise NotImplementedError() + + +class Jinja2Engine(TemplateEngine): + def __init__(self) -> None: + self.env = Environment( + loader=PackageLoader('cephadm', 'templates'), + autoescape=select_autoescape(['html', 'xml'], default_for_string=False), + trim_blocks=True, + lstrip_blocks=True, + undefined=StrictUndefined + ) + + def render(self, name: str, context: Optional[dict] = None) -> str: + try: + template = self.env.get_template(name) + if context is None: + return template.render() + return template.render(context) + except j2_exceptions.UndefinedError as e: + raise UndefinedError(e.message) + except j2_exceptions.TemplateNotFound as e: + raise TemplateNotFoundError(e.message) + + def render_plain(self, source: str, context: Optional[dict]) -> str: + try: + template = self.env.from_string(source) + if context is None: + return template.render() + return template.render(context) + except j2_exceptions.UndefinedError as e: + raise UndefinedError(e.message) + except j2_exceptions.TemplateNotFound as e: + raise TemplateNotFoundError(e.message) + + +class TemplateMgr: + def __init__(self, mgr: "CephadmOrchestrator"): + self.engine = Jinja2Engine() + self.base_context = { + 'cephadm_managed': 'This file is generated by cephadm.' + } + self.mgr = mgr + + def render(self, name: str, + context: Optional[dict] = None, + managed_context: bool = True, + host: Optional[str] = None) -> str: + """Render a string from a template with context. + + :param name: template name. e.g. services/nfs/ganesha.conf.j2 + :type name: str + :param context: a dictionary that contains values to be used in the template, defaults + to None + :type context: Optional[dict], optional + :param managed_context: to inject default context like managed header or not, defaults + to True + :type managed_context: bool, optional + :param host: The host name used to build the key to access + the module's persistent key-value store. + :type host: Optional[str], optional + :return: the templated string + :rtype: str + """ + ctx = {} + if managed_context: + ctx = copy.deepcopy(self.base_context) + if context is not None: + ctx = {**ctx, **context} + + # Check if the given name exists in the module's persistent + # key-value store, e.g. + # - blink_device_light_cmd + # - <host>/blink_device_light_cmd + # - services/nfs/ganesha.conf + store_name = name.rstrip('.j2') + custom_template = self.mgr.get_store(store_name, None) + if host and custom_template is None: + store_name = '{}/{}'.format(host, store_name) + custom_template = self.mgr.get_store(store_name, None) + + if custom_template: + return self.engine.render_plain(custom_template, ctx) + else: + return self.engine.render(name, ctx) diff --git a/src/pybind/mgr/cephadm/templates/blink_device_light_cmd.j2 b/src/pybind/mgr/cephadm/templates/blink_device_light_cmd.j2 new file mode 100644 index 000000000..dab115833 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/blink_device_light_cmd.j2 @@ -0,0 +1 @@ +lsmcli local-disk-{{ ident_fault }}-led-{{'on' if on else 'off'}} --path '{{ path or dev }}' diff --git a/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 b/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 new file mode 100644 index 000000000..b34a1fc17 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 @@ -0,0 +1,51 @@ +# {{ cephadm_managed }} +# See https://prometheus.io/docs/alerting/configuration/ for documentation. + +global: + resolve_timeout: 5m +{% if not secure %} + http_config: + tls_config: +{% if secure_monitoring_stack %} + ca_file: root_cert.pem +{% else %} + insecure_skip_verify: true +{% endif %} +{% endif %} + +route: + receiver: 'default' + routes: + - group_by: ['alertname'] + group_wait: 10s + group_interval: 10s + repeat_interval: 1h + receiver: 'ceph-dashboard' +{% if snmp_gateway_urls %} + continue: true + - receiver: 'snmp-gateway' + repeat_interval: 1h + group_interval: 10s + group_by: ['alertname'] + match_re: + oid: "(1.3.6.1.4.1.50495.).*" +{% endif %} + +receivers: +- name: 'default' + webhook_configs: +{% for url in default_webhook_urls %} + - url: '{{ url }}' +{% endfor %} +- name: 'ceph-dashboard' + webhook_configs: +{% for url in dashboard_urls %} + - url: '{{ url }}/api/prometheus_receiver' +{% endfor %} +{% if snmp_gateway_urls %} +- name: 'snmp-gateway' + webhook_configs: +{% for url in snmp_gateway_urls %} + - url: '{{ url }}' +{% endfor %} +{% endif %} diff --git a/src/pybind/mgr/cephadm/templates/services/alertmanager/web.yml.j2 b/src/pybind/mgr/cephadm/templates/services/alertmanager/web.yml.j2 new file mode 100644 index 000000000..ef4f0b4c7 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/alertmanager/web.yml.j2 @@ -0,0 +1,5 @@ +tls_server_config: + cert_file: alertmanager.crt + key_file: alertmanager.key +basic_auth_users: + {{ alertmanager_web_user }}: {{ alertmanager_web_password }} diff --git a/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 b/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 new file mode 100644 index 000000000..46aea864f --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 @@ -0,0 +1,39 @@ +# {{ cephadm_managed }} +apiVersion: 1 + +deleteDatasources: +{% for host in hosts %} + - name: 'Dashboard{{ loop.index }}' + orgId: 1 +{% endfor %} + +datasources: +{% for host in hosts %} + - name: 'Dashboard{{ loop.index }}' + type: 'prometheus' + access: 'proxy' + orgId: 1 + url: '{{ host }}' + basicAuth: {{ 'true' if security_enabled else 'false' }} + isDefault: {{ 'true' if loop.first else 'false' }} + editable: false +{% if security_enabled %} + basicAuthUser: {{ prometheus_user }} + jsonData: + graphiteVersion: "1.1" + tlsAuth: false + tlsAuthWithCACert: true + tlsSkipVerify: false + secureJsonData: + basicAuthPassword: {{ prometheus_password }} + tlsCACert: "{{ cephadm_root_ca }}" +{% endif %} +{% endfor %} + + - name: 'Loki' + type: 'loki' + access: 'proxy' + url: '{{ loki_host }}' + basicAuth: false + isDefault: false + editable: false diff --git a/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 b/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 new file mode 100644 index 000000000..e6c7bce15 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 @@ -0,0 +1,28 @@ +# {{ cephadm_managed }} +[users] + default_theme = light +{% if anonymous_access %} +[auth.anonymous] + enabled = true + org_name = 'Main Org.' + org_role = 'Viewer' +{% endif %} +[server] + domain = 'bootstrap.storage.lab' + protocol = {{ protocol }} + cert_file = /etc/grafana/certs/cert_file + cert_key = /etc/grafana/certs/cert_key + http_port = {{ http_port }} + http_addr = {{ http_addr }} +[snapshots] + external_enabled = false +[security] +{% if not initial_admin_password %} + disable_initial_admin_creation = true +{% else %} + admin_user = admin + admin_password = {{ initial_admin_password }} +{% endif %} + cookie_secure = true + cookie_samesite = none + allow_embedding = true diff --git a/src/pybind/mgr/cephadm/templates/services/ingress/haproxy.cfg.j2 b/src/pybind/mgr/cephadm/templates/services/ingress/haproxy.cfg.j2 new file mode 100644 index 000000000..100acce40 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/ingress/haproxy.cfg.j2 @@ -0,0 +1,90 @@ +# {{ cephadm_managed }} +global + log 127.0.0.1 local2 + chroot /var/lib/haproxy + pidfile /var/lib/haproxy/haproxy.pid + maxconn 8000 + daemon + stats socket /var/lib/haproxy/stats +{% if spec.ssl_cert %} + {% if spec.ssl_dh_param %} + tune.ssl.default-dh-param {{ spec.ssl_dh_param }} + {% endif %} + {% if spec.ssl_ciphers %} + ssl-default-bind-ciphers {{ spec.ssl_ciphers | join(':') }} + {% endif %} + {% if spec.ssl_options %} + ssl-default-bind-options {{ spec.ssl_options | join(' ') }} + {% endif %} +{% endif %} + +defaults + mode {{ mode }} + log global +{% if mode == 'http' %} + option httplog + option dontlognull + option http-server-close + option forwardfor except 127.0.0.0/8 + option redispatch + retries 3 + timeout queue 20s + timeout connect 5s + timeout http-request 1s + timeout http-keep-alive 5s + timeout client 30s + timeout server 30s + timeout check 5s +{% endif %} +{% if mode == 'tcp' %} + timeout queue 1m + timeout connect 10s + timeout client 1m + timeout server 1m + timeout check 10s +{% endif %} + maxconn 8000 + +frontend stats + mode http + bind {{ ip }}:{{ monitor_port }} + bind {{ local_host_ip }}:{{ monitor_port }} + stats enable + stats uri /stats + stats refresh 10s + stats auth {{ user }}:{{ password }} + http-request use-service prometheus-exporter if { path /metrics } + monitor-uri /health + +frontend frontend +{% if spec.ssl_cert %} + bind {{ ip }}:{{ frontend_port }} ssl crt /var/lib/haproxy/haproxy.pem +{% else %} + bind {{ ip }}:{{ frontend_port }} +{% endif %} + default_backend backend + +backend backend +{% if mode == 'http' %} + option forwardfor +{% if backend_spec.ssl %} + default-server ssl + default-server verify none +{% endif %} + balance static-rr + option httpchk HEAD / HTTP/1.0 + {% for server in servers %} + server {{ server.name }} {{ server.ip }}:{{ server.port }} check weight 100 + {% endfor %} +{% endif %} +{% if mode == 'tcp' %} + mode tcp + balance source + hash-type consistent +{% if default_server_opts %} + default-server {{ default_server_opts|join(" ") }} +{% endif %} + {% for server in servers %} + server {{ server.name }} {{ server.ip }}:{{ server.port }} + {% endfor %} +{% endif %} diff --git a/src/pybind/mgr/cephadm/templates/services/ingress/keepalived.conf.j2 b/src/pybind/mgr/cephadm/templates/services/ingress/keepalived.conf.j2 new file mode 100644 index 000000000..e19f556c6 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/ingress/keepalived.conf.j2 @@ -0,0 +1,36 @@ +# {{ cephadm_managed }} +vrrp_script check_backend { + script "{{ script }}" + weight -20 + interval 2 + rise 2 + fall 2 +} + +{% for x in range(virtual_ips|length) %} +vrrp_instance VI_{{ x }} { + state {{ states[x] }} + priority {{ priorities[x] }} + interface {{ vrrp_interfaces[x] }} + virtual_router_id {{ first_virtual_router_id + x }} + advert_int 1 + authentication { + auth_type PASS + auth_pass {{ password }} + } +{% if not spec.use_keepalived_multicast %} + unicast_src_ip {{ host_ips[x] }} + unicast_peer { + {% for ip in other_ips[x] %} + {{ ip }} + {% endfor %} + } +{% endif %} + virtual_ipaddress { + {{ virtual_ips[x] }} dev {{ interfaces[x] }} + } + track_script { + check_backend + } +} +{% endfor %} diff --git a/src/pybind/mgr/cephadm/templates/services/iscsi/iscsi-gateway.cfg.j2 b/src/pybind/mgr/cephadm/templates/services/iscsi/iscsi-gateway.cfg.j2 new file mode 100644 index 000000000..c2582ace7 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/iscsi/iscsi-gateway.cfg.j2 @@ -0,0 +1,13 @@ +# {{ cephadm_managed }} +[config] +cluster_client_name = {{ client_name }} +pool = {{ spec.pool }} +trusted_ip_list = {{ trusted_ip_list|default("''", true) }} +minimum_gateways = 1 +api_port = {{ spec.api_port|default("''", true) }} +api_user = {{ spec.api_user|default("''", true) }} +api_password = {{ spec.api_password|default("''", true) }} +api_secure = {{ spec.api_secure|default('False', true) }} +log_to_stderr = True +log_to_stderr_prefix = debug +log_to_file = False diff --git a/src/pybind/mgr/cephadm/templates/services/loki.yml.j2 b/src/pybind/mgr/cephadm/templates/services/loki.yml.j2 new file mode 100644 index 000000000..271437231 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/loki.yml.j2 @@ -0,0 +1,28 @@ +# {{ cephadm_managed }} +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 8080 + +common: + path_prefix: /tmp/loki + storage: + filesystem: + chunks_directory: /tmp/loki/chunks + rules_directory: /tmp/loki/rules + replication_factor: 1 + ring: + instance_addr: 127.0.0.1 + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2020-10-24 + store: boltdb-shipper + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h diff --git a/src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2 b/src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2 new file mode 100644 index 000000000..ab8df7192 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2 @@ -0,0 +1,38 @@ +# {{ cephadm_managed }} +NFS_CORE_PARAM { + Enable_NLM = false; + Enable_RQUOTA = false; + Protocols = 4; + NFS_Port = {{ port }}; +{% if bind_addr %} + Bind_addr = {{ bind_addr }}; +{% endif %} +{% if haproxy_hosts %} + HAProxy_Hosts = {{ haproxy_hosts|join(", ") }}; +{% endif %} +} + +NFSv4 { + Delegations = false; + RecoveryBackend = 'rados_cluster'; + Minor_Versions = 1, 2; +} + +RADOS_KV { + UserId = "{{ user }}"; + nodeid = "{{ nodeid }}"; + pool = "{{ pool }}"; + namespace = "{{ namespace }}"; +} + +RADOS_URLS { + UserId = "{{ user }}"; + watch_url = "{{ url }}"; +} + +RGW { + cluster = "ceph"; + name = "client.{{ rgw_user }}"; +} + +%url {{ url }} diff --git a/src/pybind/mgr/cephadm/templates/services/node-exporter/web.yml.j2 b/src/pybind/mgr/cephadm/templates/services/node-exporter/web.yml.j2 new file mode 100644 index 000000000..1c1220345 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/node-exporter/web.yml.j2 @@ -0,0 +1,3 @@ +tls_server_config: + cert_file: node_exporter.crt + key_file: node_exporter.key diff --git a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 new file mode 100644 index 000000000..69b8332cd --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 @@ -0,0 +1,34 @@ +# {{ cephadm_managed }} +[gateway] +name = {{ name }} +group = {{ spec.group }} +addr = {{ addr }} +port = {{ port }} +enable_auth = {{ spec.enable_auth }} +state_update_notify = True +state_update_interval_sec = 5 + +[ceph] +pool = {{ spec.pool }} +config_file = /etc/ceph/ceph.conf +id = {{ rados_id }} + +[mtls] +server_key = {{ spec.server_key }} +client_key = {{ spec.client_key }} +server_cert = {{ spec.server_cert }} +client_cert = {{ spec.client_cert }} + +[spdk] +tgt_path = {{ spec.tgt_path }} +rpc_socket = {{ rpc_socket }} +timeout = {{ spec.timeout }} +log_level = {{ log_level }} +conn_retries = {{ spec.conn_retries }} +transports = {{ spec.transports }} +{% if transport_tcp_options %} +transport_tcp_options = {{ transport_tcp_options }} +{% endif %} +{% if spec.tgt_cmd_extra_args %} +tgt_cmd_extra_args = {{ spec.tgt_cmd_extra_args }} +{% endif %} diff --git a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 new file mode 100644 index 000000000..b56843994 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 @@ -0,0 +1,109 @@ +# {{ cephadm_managed }} +global: + scrape_interval: 10s + evaluation_interval: 10s +rule_files: + - /etc/prometheus/alerting/* + +{% if alertmanager_sd_url %} +alerting: + alertmanagers: +{% if secure_monitoring_stack %} + - scheme: https + basic_auth: + username: {{ alertmanager_web_user }} + password: {{ alertmanager_web_password }} + tls_config: + ca_file: root_cert.pem + http_sd_configs: + - url: {{ alertmanager_sd_url }} + basic_auth: + username: {{ service_discovery_username }} + password: {{ service_discovery_password }} + tls_config: + ca_file: root_cert.pem +{% else %} + - scheme: http + http_sd_configs: + - url: {{ alertmanager_sd_url }} +{% endif %} +{% endif %} + +scrape_configs: + - job_name: 'ceph' +{% if secure_monitoring_stack %} + scheme: https + tls_config: + ca_file: mgr_prometheus_cert.pem + honor_labels: true + http_sd_configs: + - url: {{ mgr_prometheus_sd_url }} + basic_auth: + username: {{ service_discovery_username }} + password: {{ service_discovery_password }} + tls_config: + ca_file: root_cert.pem +{% else %} + honor_labels: true + http_sd_configs: + - url: {{ mgr_prometheus_sd_url }} +{% endif %} + +{% if node_exporter_sd_url %} + - job_name: 'node' +{% if secure_monitoring_stack %} + scheme: https + tls_config: + ca_file: root_cert.pem + http_sd_configs: + - url: {{ node_exporter_sd_url }} + basic_auth: + username: {{ service_discovery_username }} + password: {{ service_discovery_password }} + tls_config: + ca_file: root_cert.pem +{% else %} + http_sd_configs: + - url: {{ node_exporter_sd_url }} +{% endif %} +{% endif %} + +{% if haproxy_sd_url %} + - job_name: 'haproxy' +{% if secure_monitoring_stack %} + scheme: https + tls_config: + ca_file: root_cert.pem + http_sd_configs: + - url: {{ haproxy_sd_url }} + basic_auth: + username: {{ service_discovery_username }} + password: {{ service_discovery_password }} + tls_config: + ca_file: root_cert.pem +{% else %} + http_sd_configs: + - url: {{ haproxy_sd_url }} +{% endif %} +{% endif %} + +{% if ceph_exporter_sd_url %} + - job_name: 'ceph-exporter' +{% if secure_monitoring_stack %} + honor_labels: true + scheme: https + tls_config: + ca_file: root_cert.pem + http_sd_configs: + - url: {{ ceph_exporter_sd_url }} + basic_auth: + username: {{ service_discovery_username }} + password: {{ service_discovery_password }} + tls_config: + ca_file: root_cert.pem +{% else %} + honor_labels: true + http_sd_configs: + - url: {{ ceph_exporter_sd_url }} +{% endif %} +{% endif %} diff --git a/src/pybind/mgr/cephadm/templates/services/prometheus/web.yml.j2 b/src/pybind/mgr/cephadm/templates/services/prometheus/web.yml.j2 new file mode 100644 index 000000000..da3c3d724 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/prometheus/web.yml.j2 @@ -0,0 +1,5 @@ +tls_server_config: + cert_file: prometheus.crt + key_file: prometheus.key +basic_auth_users: + {{ prometheus_web_user }}: {{ prometheus_web_password }} diff --git a/src/pybind/mgr/cephadm/templates/services/promtail.yml.j2 b/src/pybind/mgr/cephadm/templates/services/promtail.yml.j2 new file mode 100644 index 000000000..5ce7a3103 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/promtail.yml.j2 @@ -0,0 +1,17 @@ +# {{ cephadm_managed }} +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://{{ client_hostname }}:3100/loki/api/v1/push + +scrape_configs: +- job_name: system + static_configs: + - labels: + job: Cluster Logs + __path__: /var/log/ceph/**/*.log
\ No newline at end of file diff --git a/src/pybind/mgr/cephadm/tests/__init__.py b/src/pybind/mgr/cephadm/tests/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/__init__.py diff --git a/src/pybind/mgr/cephadm/tests/conftest.py b/src/pybind/mgr/cephadm/tests/conftest.py new file mode 100644 index 000000000..e8add2c7b --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/conftest.py @@ -0,0 +1,27 @@ +import pytest + +from cephadm.services.osd import RemoveUtil, OSD +from tests import mock + +from .fixtures import with_cephadm_module + + +@pytest.fixture() +def cephadm_module(): + with with_cephadm_module({}) as m: + yield m + + +@pytest.fixture() +def rm_util(): + with with_cephadm_module({}) as m: + r = RemoveUtil.__new__(RemoveUtil) + r.__init__(m) + yield r + + +@pytest.fixture() +def osd_obj(): + with mock.patch("cephadm.services.osd.RemoveUtil"): + o = OSD(0, mock.MagicMock()) + yield o diff --git a/src/pybind/mgr/cephadm/tests/fixtures.py b/src/pybind/mgr/cephadm/tests/fixtures.py new file mode 100644 index 000000000..6281283d7 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/fixtures.py @@ -0,0 +1,200 @@ +import fnmatch +import asyncio +import sys +from tempfile import NamedTemporaryFile +from contextlib import contextmanager + +from ceph.deployment.service_spec import PlacementSpec, ServiceSpec +from ceph.utils import datetime_to_str, datetime_now +from cephadm.serve import CephadmServe, cephadmNoImage + +try: + from typing import Any, Iterator, List, Callable, Dict +except ImportError: + pass + +from cephadm import CephadmOrchestrator +from orchestrator import raise_if_exception, OrchResult, HostSpec, DaemonDescriptionStatus +from tests import mock + + +def async_side_effect(result): + async def side_effect(*args, **kwargs): + return result + return side_effect + + +def get_ceph_option(_, key): + return __file__ + + +def get_module_option_ex(_, module, key, default=None): + if module == 'prometheus': + if key == 'server_port': + return 9283 + return None + + +def _run_cephadm(ret): + async def foo(s, host, entity, cmd, e, **kwargs): + if cmd == 'gather-facts': + return '{}', '', 0 + return [ret], '', 0 + return foo + + +def match_glob(val, pat): + ok = fnmatch.fnmatchcase(val, pat) + if not ok: + assert pat in val + + +class MockEventLoopThread: + def get_result(self, coro, timeout): + if sys.version_info >= (3, 7): + return asyncio.run(coro) + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + return loop.run_until_complete(coro) + finally: + loop.close() + asyncio.set_event_loop(None) + + +def receive_agent_metadata(m: CephadmOrchestrator, host: str, ops: List[str] = None) -> None: + to_update: Dict[str, Callable[[str, Any], None]] = { + 'ls': m._process_ls_output, + 'gather-facts': m.cache.update_host_facts, + 'list-networks': m.cache.update_host_networks, + } + if ops: + for op in ops: + out = m.wait_async(CephadmServe(m)._run_cephadm_json(host, cephadmNoImage, op, [])) + to_update[op](host, out) + m.cache.last_daemon_update[host] = datetime_now() + m.cache.last_facts_update[host] = datetime_now() + m.cache.last_network_update[host] = datetime_now() + m.cache.metadata_up_to_date[host] = True + + +def receive_agent_metadata_all_hosts(m: CephadmOrchestrator) -> None: + for host in m.cache.get_hosts(): + receive_agent_metadata(m, host) + + +@contextmanager +def with_cephadm_module(module_options=None, store=None): + """ + :param module_options: Set opts as if they were set before module.__init__ is called + :param store: Set the store before module.__init__ is called + """ + with mock.patch("cephadm.module.CephadmOrchestrator.get_ceph_option", get_ceph_option), \ + mock.patch("cephadm.services.osd.RemoveUtil._run_mon_cmd"), \ + mock.patch('cephadm.module.CephadmOrchestrator.get_module_option_ex', get_module_option_ex), \ + mock.patch("cephadm.module.CephadmOrchestrator.get_osdmap"), \ + mock.patch("cephadm.module.CephadmOrchestrator.remote"), \ + mock.patch("cephadm.agent.CephadmAgentHelpers._request_agent_acks"), \ + mock.patch("cephadm.agent.CephadmAgentHelpers._apply_agent", return_value=False), \ + mock.patch("cephadm.agent.CephadmAgentHelpers._agent_down", return_value=False), \ + mock.patch('cephadm.offline_watcher.OfflineHostWatcher.run'), \ + mock.patch('cephadm.tuned_profiles.TunedProfileUtils._remove_stray_tuned_profiles'), \ + mock.patch('cephadm.offline_watcher.OfflineHostWatcher.run'), \ + mock.patch('cephadm.http_server.CephadmHttpServer.run'): + + m = CephadmOrchestrator.__new__(CephadmOrchestrator) + if module_options is not None: + for k, v in module_options.items(): + m._ceph_set_module_option('cephadm', k, v) + if store is None: + store = {} + if '_ceph_get/mon_map' not in store: + m.mock_store_set('_ceph_get', 'mon_map', { + 'modified': datetime_to_str(datetime_now()), + 'fsid': 'foobar', + }) + if '_ceph_get/mgr_map' not in store: + m.mock_store_set('_ceph_get', 'mgr_map', { + 'services': { + 'dashboard': 'http://[::1]:8080', + 'prometheus': 'http://[::1]:8081' + }, + 'modules': ['dashboard', 'prometheus'], + }) + for k, v in store.items(): + m._ceph_set_store(k, v) + + m.__init__('cephadm', 0, 0) + m._cluster_fsid = "fsid" + + m.event_loop = MockEventLoopThread() + m.tkey = NamedTemporaryFile(prefix='test-cephadm-identity-') + + yield m + + +def wait(m: CephadmOrchestrator, c: OrchResult) -> Any: + return raise_if_exception(c) + + +@contextmanager +def with_host(m: CephadmOrchestrator, name, addr='1::4', refresh_hosts=True, rm_with_force=True): + with mock.patch("cephadm.utils.resolve_ip", return_value=addr): + wait(m, m.add_host(HostSpec(hostname=name))) + if refresh_hosts: + CephadmServe(m)._refresh_hosts_and_daemons() + receive_agent_metadata(m, name) + yield + wait(m, m.remove_host(name, force=rm_with_force)) + + +def assert_rm_service(cephadm: CephadmOrchestrator, srv_name): + mon_or_mgr = cephadm.spec_store[srv_name].spec.service_type in ('mon', 'mgr') + if mon_or_mgr: + assert 'Unable' in wait(cephadm, cephadm.remove_service(srv_name)) + return + assert wait(cephadm, cephadm.remove_service(srv_name)) == f'Removed service {srv_name}' + assert cephadm.spec_store[srv_name].deleted is not None + CephadmServe(cephadm)._check_daemons() + CephadmServe(cephadm)._apply_all_services() + assert cephadm.spec_store[srv_name].deleted + unmanaged = cephadm.spec_store[srv_name].spec.unmanaged + CephadmServe(cephadm)._purge_deleted_services() + if not unmanaged: # cause then we're not deleting daemons + assert srv_name not in cephadm.spec_store, f'{cephadm.spec_store[srv_name]!r}' + + +@contextmanager +def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth=None, host: str = '', status_running=False) -> Iterator[List[str]]: + if spec.placement.is_empty() and host: + spec.placement = PlacementSpec(hosts=[host], count=1) + if meth is not None: + c = meth(cephadm_module, spec) + assert wait(cephadm_module, c) == f'Scheduled {spec.service_name()} update...' + else: + c = cephadm_module.apply([spec]) + assert wait(cephadm_module, c) == [f'Scheduled {spec.service_name()} update...'] + + specs = [d.spec for d in wait(cephadm_module, cephadm_module.describe_service())] + assert spec in specs + + CephadmServe(cephadm_module)._apply_all_services() + + if status_running: + make_daemons_running(cephadm_module, spec.service_name()) + + dds = wait(cephadm_module, cephadm_module.list_daemons()) + own_dds = [dd for dd in dds if dd.service_name() == spec.service_name()] + if host and spec.service_type != 'osd': + assert own_dds + + yield [dd.name() for dd in own_dds] + + assert_rm_service(cephadm_module, spec.service_name()) + + +def make_daemons_running(cephadm_module, service_name): + own_dds = cephadm_module.cache.get_daemons_by_service(service_name) + for dd in own_dds: + dd.status = DaemonDescriptionStatus.running # We're changing the reference diff --git a/src/pybind/mgr/cephadm/tests/test_autotune.py b/src/pybind/mgr/cephadm/tests/test_autotune.py new file mode 100644 index 000000000..524da9c00 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_autotune.py @@ -0,0 +1,69 @@ +# Disable autopep8 for this file: + +# fmt: off + +import pytest + +from cephadm.autotune import MemoryAutotuner +from orchestrator import DaemonDescription + + +@pytest.mark.parametrize("total,daemons,config,result", + [ # noqa: E128 + ( + 128 * 1024 * 1024 * 1024, + [], + {}, + None, + ), + ( + 128 * 1024 * 1024 * 1024, + [ + DaemonDescription('osd', '1', 'host1'), + DaemonDescription('osd', '2', 'host1'), + ], + {}, + 64 * 1024 * 1024 * 1024, + ), + ( + 128 * 1024 * 1024 * 1024, + [ + DaemonDescription('osd', '1', 'host1'), + DaemonDescription('osd', '2', 'host1'), + DaemonDescription('osd', '3', 'host1'), + ], + { + 'osd.3': 16 * 1024 * 1024 * 1024, + }, + 56 * 1024 * 1024 * 1024, + ), + ( + 128 * 1024 * 1024 * 1024, + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('osd', '1', 'host1'), + DaemonDescription('osd', '2', 'host1'), + ], + {}, + 62 * 1024 * 1024 * 1024, + ) + ]) +def test_autotune(total, daemons, config, result): + def fake_getter(who, opt): + if opt == 'osd_memory_target_autotune': + if who in config: + return False + else: + return True + if opt == 'osd_memory_target': + return config.get(who, 4 * 1024 * 1024 * 1024) + if opt == 'mds_cache_memory_limit': + return 16 * 1024 * 1024 * 1024 + + a = MemoryAutotuner( + total_mem=total, + daemons=daemons, + config_get=fake_getter, + ) + val, osds = a.tune() + assert val == result diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py new file mode 100644 index 000000000..24fcb0280 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -0,0 +1,2709 @@ +import asyncio +import json +import logging + +from contextlib import contextmanager + +import pytest + +from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection +from cephadm.serve import CephadmServe +from cephadm.inventory import HostCacheStatus, ClientKeyringSpec +from cephadm.services.osd import OSD, OSDRemovalQueue, OsdIdClaims +from cephadm.utils import SpecialHostLabels + +try: + from typing import List +except ImportError: + pass + +from ceph.deployment.service_spec import ( + CustomConfig, + CustomContainerSpec, + HostPlacementSpec, + IscsiServiceSpec, + MDSSpec, + NFSServiceSpec, + PlacementSpec, + RGWSpec, + ServiceSpec, +) +from ceph.deployment.drive_selection.selector import DriveSelection +from ceph.deployment.inventory import Devices, Device +from ceph.utils import datetime_to_str, datetime_now, str_to_datetime +from orchestrator import DaemonDescription, InventoryHost, \ + HostSpec, OrchestratorError, DaemonDescriptionStatus, OrchestratorEvent +from tests import mock +from .fixtures import wait, _run_cephadm, match_glob, with_host, \ + with_cephadm_module, with_service, make_daemons_running, async_side_effect +from cephadm.module import CephadmOrchestrator + +""" +TODOs: + There is really room for improvement here. I just quickly assembled theses tests. + I general, everything should be testes in Teuthology as well. Reasons for + also testing this here is the development roundtrip time. +""" + + +def assert_rm_daemon(cephadm: CephadmOrchestrator, prefix, host): + dds: List[DaemonDescription] = wait(cephadm, cephadm.list_daemons(host=host)) + d_names = [dd.name() for dd in dds if dd.name().startswith(prefix)] + assert d_names + # there should only be one daemon (if not match_glob will throw mismatch) + assert len(d_names) == 1 + + c = cephadm.remove_daemons(d_names) + [out] = wait(cephadm, c) + # picking the 1st element is needed, rather than passing the list when the daemon + # name contains '-' char. If not, the '-' is treated as a range i.e. cephadm-exporter + # is treated like a m-e range which is invalid. rbd-mirror (d-m) and node-exporter (e-e) + # are valid, so pass without incident! Also, match_gob acts on strings anyway! + match_glob(out, f"Removed {d_names[0]}* from host '{host}'") + + +@contextmanager +def with_daemon(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, host: str): + spec.placement = PlacementSpec(hosts=[host], count=1) + + c = cephadm_module.add_daemon(spec) + [out] = wait(cephadm_module, c) + match_glob(out, f"Deployed {spec.service_name()}.* on host '{host}'") + + dds = cephadm_module.cache.get_daemons_by_service(spec.service_name()) + for dd in dds: + if dd.hostname == host: + yield dd.daemon_id + assert_rm_daemon(cephadm_module, spec.service_name(), host) + return + + assert False, 'Daemon not found' + + +@contextmanager +def with_osd_daemon(cephadm_module: CephadmOrchestrator, _run_cephadm, host: str, osd_id: int, ceph_volume_lvm_list=None): + cephadm_module.mock_store_set('_ceph_get', 'osd_map', { + 'osds': [ + { + 'osd': 1, + 'up_from': 0, + 'up': True, + 'uuid': 'uuid' + } + ] + }) + + _run_cephadm.reset_mock(return_value=True, side_effect=True) + if ceph_volume_lvm_list: + _run_cephadm.side_effect = ceph_volume_lvm_list + else: + async def _ceph_volume_list(s, host, entity, cmd, **kwargs): + logging.info(f'ceph-volume cmd: {cmd}') + if 'raw' in cmd: + return json.dumps({ + "21a4209b-f51b-4225-81dc-d2dca5b8b2f5": { + "ceph_fsid": cephadm_module._cluster_fsid, + "device": "/dev/loop0", + "osd_id": 21, + "osd_uuid": "21a4209b-f51b-4225-81dc-d2dca5b8b2f5", + "type": "bluestore" + }, + }), '', 0 + if 'lvm' in cmd: + return json.dumps({ + str(osd_id): [{ + 'tags': { + 'ceph.cluster_fsid': cephadm_module._cluster_fsid, + 'ceph.osd_fsid': 'uuid' + }, + 'type': 'data' + }] + }), '', 0 + return '{}', '', 0 + + _run_cephadm.side_effect = _ceph_volume_list + + assert cephadm_module._osd_activate( + [host]).stdout == f"Created osd(s) 1 on host '{host}'" + assert _run_cephadm.mock_calls == [ + mock.call(host, 'osd', 'ceph-volume', + ['--', 'lvm', 'list', '--format', 'json'], no_fsid=False, error_ok=False, image='', log_output=True), + mock.call(host, f'osd.{osd_id}', ['_orch', 'deploy'], [], stdin=mock.ANY), + mock.call(host, 'osd', 'ceph-volume', + ['--', 'raw', 'list', '--format', 'json'], no_fsid=False, error_ok=False, image='', log_output=True), + ] + dd = cephadm_module.cache.get_daemon(f'osd.{osd_id}', host=host) + assert dd.name() == f'osd.{osd_id}' + yield dd + cephadm_module._remove_daemons([(f'osd.{osd_id}', host)]) + + +class TestCephadm(object): + + def test_get_unique_name(self, cephadm_module): + # type: (CephadmOrchestrator) -> None + existing = [ + DaemonDescription(daemon_type='mon', daemon_id='a') + ] + new_mon = cephadm_module.get_unique_name('mon', 'myhost', existing) + match_glob(new_mon, 'myhost') + new_mgr = cephadm_module.get_unique_name('mgr', 'myhost', existing) + match_glob(new_mgr, 'myhost.*') + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_host(self, cephadm_module): + assert wait(cephadm_module, cephadm_module.get_hosts()) == [] + with with_host(cephadm_module, 'test'): + assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '1::4')] + + # Be careful with backward compatibility when changing things here: + assert json.loads(cephadm_module.get_store('inventory')) == \ + {"test": {"hostname": "test", "addr": "1::4", "labels": [], "status": ""}} + + with with_host(cephadm_module, 'second', '1.2.3.5'): + assert wait(cephadm_module, cephadm_module.get_hosts()) == [ + HostSpec('test', '1::4'), + HostSpec('second', '1.2.3.5') + ] + + assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '1::4')] + assert wait(cephadm_module, cephadm_module.get_hosts()) == [] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + @mock.patch("cephadm.utils.resolve_ip") + def test_re_add_host_receive_loopback(self, resolve_ip, cephadm_module): + resolve_ip.side_effect = ['192.168.122.1', '127.0.0.1', '127.0.0.1'] + assert wait(cephadm_module, cephadm_module.get_hosts()) == [] + cephadm_module._add_host(HostSpec('test', '192.168.122.1')) + assert wait(cephadm_module, cephadm_module.get_hosts()) == [ + HostSpec('test', '192.168.122.1')] + cephadm_module._add_host(HostSpec('test')) + assert wait(cephadm_module, cephadm_module.get_hosts()) == [ + HostSpec('test', '192.168.122.1')] + with pytest.raises(OrchestratorError): + cephadm_module._add_host(HostSpec('test2')) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_service_ls(self, cephadm_module): + with with_host(cephadm_module, 'test'): + c = cephadm_module.list_daemons(refresh=True) + assert wait(cephadm_module, c) == [] + with with_service(cephadm_module, MDSSpec('mds', 'name', unmanaged=True)) as _, \ + with_daemon(cephadm_module, MDSSpec('mds', 'name'), 'test') as _: + + c = cephadm_module.list_daemons() + + def remove_id_events(dd): + out = dd.to_json() + del out['daemon_id'] + del out['events'] + del out['daemon_name'] + return out + + assert [remove_id_events(dd) for dd in wait(cephadm_module, c)] == [ + { + 'service_name': 'mds.name', + 'daemon_type': 'mds', + 'hostname': 'test', + 'status': 2, + 'status_desc': 'starting', + 'is_active': False, + 'ports': [], + } + ] + + with with_service(cephadm_module, ServiceSpec('rgw', 'r.z'), + CephadmOrchestrator.apply_rgw, 'test', status_running=True): + make_daemons_running(cephadm_module, 'mds.name') + + c = cephadm_module.describe_service() + out = [dict(o.to_json()) for o in wait(cephadm_module, c)] + expected = [ + { + 'placement': {'count': 2}, + 'service_id': 'name', + 'service_name': 'mds.name', + 'service_type': 'mds', + 'status': {'created': mock.ANY, 'running': 1, 'size': 2}, + 'unmanaged': True + }, + { + 'placement': { + 'count': 1, + 'hosts': ["test"] + }, + 'service_id': 'r.z', + 'service_name': 'rgw.r.z', + 'service_type': 'rgw', + 'status': {'created': mock.ANY, 'running': 1, 'size': 1, + 'ports': [80]}, + } + ] + for o in out: + if 'events' in o: + del o['events'] # delete it, as it contains a timestamp + assert out == expected + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_service_ls_service_type_flag(self, cephadm_module): + with with_host(cephadm_module, 'host1'): + with with_host(cephadm_module, 'host2'): + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)), + CephadmOrchestrator.apply_mgr, '', status_running=True): + with with_service(cephadm_module, MDSSpec('mds', 'test-id', placement=PlacementSpec(count=2)), + CephadmOrchestrator.apply_mds, '', status_running=True): + + # with no service-type. Should provide info fot both services + c = cephadm_module.describe_service() + out = [dict(o.to_json()) for o in wait(cephadm_module, c)] + expected = [ + { + 'placement': {'count': 2}, + 'service_name': 'mgr', + 'service_type': 'mgr', + 'status': {'created': mock.ANY, + 'running': 2, + 'size': 2} + }, + { + 'placement': {'count': 2}, + 'service_id': 'test-id', + 'service_name': 'mds.test-id', + 'service_type': 'mds', + 'status': {'created': mock.ANY, + 'running': 2, + 'size': 2} + }, + ] + + for o in out: + if 'events' in o: + del o['events'] # delete it, as it contains a timestamp + assert out == expected + + # with service-type. Should provide info fot only mds + c = cephadm_module.describe_service(service_type='mds') + out = [dict(o.to_json()) for o in wait(cephadm_module, c)] + expected = [ + { + 'placement': {'count': 2}, + 'service_id': 'test-id', + 'service_name': 'mds.test-id', + 'service_type': 'mds', + 'status': {'created': mock.ANY, + 'running': 2, + 'size': 2} + }, + ] + + for o in out: + if 'events' in o: + del o['events'] # delete it, as it contains a timestamp + assert out == expected + + # service-type should not match with service names + c = cephadm_module.describe_service(service_type='mds.test-id') + out = [dict(o.to_json()) for o in wait(cephadm_module, c)] + assert out == [] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_device_ls(self, cephadm_module): + with with_host(cephadm_module, 'test'): + c = cephadm_module.get_inventory() + assert wait(cephadm_module, c) == [InventoryHost('test')] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm( + json.dumps([ + dict( + name='rgw.myrgw.foobar', + style='cephadm', + fsid='fsid', + container_id='container_id', + version='version', + state='running', + ), + dict( + name='something.foo.bar', + style='cephadm', + fsid='fsid', + ), + dict( + name='haproxy.test.bar', + style='cephadm', + fsid='fsid', + ), + + ]) + )) + def test_list_daemons(self, cephadm_module: CephadmOrchestrator): + cephadm_module.service_cache_timeout = 10 + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + dds = wait(cephadm_module, cephadm_module.list_daemons()) + assert {d.name() for d in dds} == {'rgw.myrgw.foobar', 'haproxy.test.bar'} + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_daemon_action(self, cephadm_module: CephadmOrchestrator): + cephadm_module.service_cache_timeout = 10 + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, RGWSpec(service_id='myrgw.foobar', unmanaged=True)) as _, \ + with_daemon(cephadm_module, RGWSpec(service_id='myrgw.foobar'), 'test') as daemon_id: + + d_name = 'rgw.' + daemon_id + + c = cephadm_module.daemon_action('redeploy', d_name) + assert wait(cephadm_module, + c) == f"Scheduled to redeploy rgw.{daemon_id} on host 'test'" + + for what in ('start', 'stop', 'restart'): + c = cephadm_module.daemon_action(what, d_name) + assert wait(cephadm_module, + c) == F"Scheduled to {what} {d_name} on host 'test'" + + # Make sure, _check_daemons does a redeploy due to monmap change: + cephadm_module._store['_ceph_get/mon_map'] = { + 'modified': datetime_to_str(datetime_now()), + 'fsid': 'foobar', + } + cephadm_module.notify('mon_map', None) + + CephadmServe(cephadm_module)._check_daemons() + + assert cephadm_module.events.get_for_daemon(d_name) == [ + OrchestratorEvent(mock.ANY, 'daemon', d_name, 'INFO', + f"Deployed {d_name} on host \'test\'"), + OrchestratorEvent(mock.ANY, 'daemon', d_name, 'INFO', + f"stop {d_name} from host \'test\'"), + ] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_daemon_action_fail(self, cephadm_module: CephadmOrchestrator): + cephadm_module.service_cache_timeout = 10 + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, RGWSpec(service_id='myrgw.foobar', unmanaged=True)) as _, \ + with_daemon(cephadm_module, RGWSpec(service_id='myrgw.foobar'), 'test') as daemon_id: + with mock.patch('ceph_module.BaseMgrModule._ceph_send_command') as _ceph_send_command: + + _ceph_send_command.side_effect = Exception("myerror") + + # Make sure, _check_daemons does a redeploy due to monmap change: + cephadm_module.mock_store_set('_ceph_get', 'mon_map', { + 'modified': datetime_to_str(datetime_now()), + 'fsid': 'foobar', + }) + cephadm_module.notify('mon_map', None) + + CephadmServe(cephadm_module)._check_daemons() + + evs = [e.message for e in cephadm_module.events.get_for_daemon( + f'rgw.{daemon_id}')] + + assert 'myerror' in ''.join(evs) + + @pytest.mark.parametrize( + "action", + [ + 'start', + 'stop', + 'restart', + 'reconfig', + 'redeploy' + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.module.HostCache.save_host") + def test_daemon_check(self, _save_host, cephadm_module: CephadmOrchestrator, action): + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='grafana'), CephadmOrchestrator.apply_grafana, 'test') as d_names: + [daemon_name] = d_names + + cephadm_module._schedule_daemon_action(daemon_name, action) + + assert cephadm_module.cache.get_scheduled_daemon_action( + 'test', daemon_name) == action + + CephadmServe(cephadm_module)._check_daemons() + + assert _save_host.called_with('test') + assert cephadm_module.cache.get_scheduled_daemon_action('test', daemon_name) is None + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_daemon_check_extra_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test'): + + # Also testing deploying mons without explicit network placement + cephadm_module.check_mon_command({ + 'prefix': 'config set', + 'who': 'mon', + 'name': 'public_network', + 'value': '127.0.0.0/8' + }) + + cephadm_module.cache.update_host_networks( + 'test', + { + "127.0.0.0/8": [ + "127.0.0.1" + ], + } + ) + + with with_service(cephadm_module, ServiceSpec(service_type='mon'), CephadmOrchestrator.apply_mon, 'test') as d_names: + [daemon_name] = d_names + + cephadm_module._set_extra_ceph_conf('[mon]\nk=v') + + CephadmServe(cephadm_module)._check_daemons() + + _run_cephadm.assert_called_with( + 'test', + 'mon.test', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": "mon.test", + "image": '', + "deploy_arguments": [], + "params": { + 'reconfig': True, + }, + "meta": { + 'service_name': 'mon', + 'ports': [], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "config": "[mon]\nk=v\n[mon.test]\npublic network = 127.0.0.0/8\n", + "keyring": "", + "files": { + "config": "[mon.test]\npublic network = 127.0.0.0/8\n" + }, + }, + }), + ) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_mon_crush_location_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test'): + cephadm_module.check_mon_command({ + 'prefix': 'config set', + 'who': 'mon', + 'name': 'public_network', + 'value': '127.0.0.0/8' + }) + + cephadm_module.cache.update_host_networks( + 'test', + { + "127.0.0.0/8": [ + "127.0.0.1" + ], + } + ) + + with with_service(cephadm_module, ServiceSpec(service_type='mon', crush_locations={'test': ['datacenter=a', 'rack=2']}), CephadmOrchestrator.apply_mon, 'test'): + _run_cephadm.assert_called_with( + 'test', + 'mon.test', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": "mon.test", + "image": '', + "deploy_arguments": [], + "params": {}, + "meta": { + 'service_name': 'mon', + 'ports': [], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "config": "[mon.test]\npublic network = 127.0.0.0/8\n", + "keyring": "", + "files": { + "config": "[mon.test]\npublic network = 127.0.0.0/8\n", + }, + "crush_location": "datacenter=a", + }, + }), + ) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_extra_container_args(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='crash', extra_container_args=['--cpus=2', '--quiet']), CephadmOrchestrator.apply_crash): + _run_cephadm.assert_called_with( + 'test', + 'crash.test', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": "crash.test", + "image": '', + "deploy_arguments": [], + "params": { + 'extra_container_args': [ + "--cpus=2", + "--quiet", + ], + }, + "meta": { + 'service_name': 'crash', + 'ports': [], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': [ + "--cpus=2", + "--quiet", + ], + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "config": "", + "keyring": "[client.crash.test]\nkey = None\n", + }, + }), + ) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_extra_entrypoint_args(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='node-exporter', + extra_entrypoint_args=['--collector.textfile.directory=/var/lib/node_exporter/textfile_collector', '--some-other-arg']), + CephadmOrchestrator.apply_node_exporter): + _run_cephadm.assert_called_with( + 'test', + 'node-exporter.test', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": "node-exporter.test", + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9100], + 'extra_entrypoint_args': [ + "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector", + "--some-other-arg", + ], + }, + "meta": { + 'service_name': 'node-exporter', + 'ports': [9100], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': [ + "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector", + "--some-other-arg", + ], + }, + "config_blobs": {}, + }), + ) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_extra_entrypoint_and_container_args(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='node-exporter', + extra_entrypoint_args=['--collector.textfile.directory=/var/lib/node_exporter/textfile_collector', '--some-other-arg'], + extra_container_args=['--cpus=2', '--quiet']), + CephadmOrchestrator.apply_node_exporter): + _run_cephadm.assert_called_with( + 'test', + 'node-exporter.test', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": "node-exporter.test", + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9100], + 'extra_container_args': [ + "--cpus=2", + "--quiet", + ], + 'extra_entrypoint_args': [ + "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector", + "--some-other-arg", + ], + }, + "meta": { + 'service_name': 'node-exporter', + 'ports': [9100], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': [ + "--cpus=2", + "--quiet", + ], + 'extra_entrypoint_args': [ + "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector", + "--some-other-arg", + ], + }, + "config_blobs": {}, + }), + ) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_extra_entrypoint_and_container_args_with_spaces(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='node-exporter', + extra_entrypoint_args=['--entrypoint-arg-with-value value', '--some-other-arg 3'], + extra_container_args=['--cpus 2', '--container-arg-with-value value']), + CephadmOrchestrator.apply_node_exporter): + _run_cephadm.assert_called_with( + 'test', + 'node-exporter.test', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": "node-exporter.test", + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9100], + 'extra_container_args': [ + "--cpus", + "2", + "--container-arg-with-value", + "value", + ], + 'extra_entrypoint_args': [ + "--entrypoint-arg-with-value", + "value", + "--some-other-arg", + "3", + ], + }, + "meta": { + 'service_name': 'node-exporter', + 'ports': [9100], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': [ + "--cpus 2", + "--container-arg-with-value value", + ], + 'extra_entrypoint_args': [ + "--entrypoint-arg-with-value value", + "--some-other-arg 3", + ], + }, + "config_blobs": {}, + }), + ) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_custom_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + test_cert = ['-----BEGIN PRIVATE KEY-----', + 'YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg', + 'ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=', + '-----END PRIVATE KEY-----', + '-----BEGIN CERTIFICATE-----', + 'YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg', + 'ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=', + '-----END CERTIFICATE-----'] + configs = [ + CustomConfig(content='something something something', + mount_path='/etc/test.conf'), + CustomConfig(content='\n'.join(test_cert), mount_path='/usr/share/grafana/thing.crt') + ] + tc_joined = '\n'.join(test_cert) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='crash', custom_configs=configs), CephadmOrchestrator.apply_crash): + _run_cephadm( + 'test', + 'crash.test', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": "crash.test", + "image": "", + "deploy_arguments": [], + "params": {}, + "meta": { + "service_name": "crash", + "ports": [], + "ip": None, + "deployed_by": [], + "rank": None, + "rank_generation": None, + "extra_container_args": None, + "extra_entrypoint_args": None, + }, + "config_blobs": { + "config": "", + "keyring": "[client.crash.test]\nkey = None\n", + "custom_config_files": [ + { + "content": "something something something", + "mount_path": "/etc/test.conf", + }, + { + "content": tc_joined, + "mount_path": "/usr/share/grafana/thing.crt", + }, + ] + } + }), + ) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_daemon_check_post(self, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='grafana'), CephadmOrchestrator.apply_grafana, 'test'): + + # Make sure, _check_daemons does a redeploy due to monmap change: + cephadm_module.mock_store_set('_ceph_get', 'mon_map', { + 'modified': datetime_to_str(datetime_now()), + 'fsid': 'foobar', + }) + cephadm_module.notify('mon_map', None) + cephadm_module.mock_store_set('_ceph_get', 'mgr_map', { + 'modules': ['dashboard'] + }) + + with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd: + CephadmServe(cephadm_module)._check_daemons() + _mon_cmd.assert_any_call( + {'prefix': 'dashboard set-grafana-api-url', 'value': 'https://[1::4]:3000'}, + None) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1.2.3.4') + def test_iscsi_post_actions_with_missing_daemon_in_cache(self, cephadm_module: CephadmOrchestrator): + # https://tracker.ceph.com/issues/52866 + with with_host(cephadm_module, 'test1'): + with with_host(cephadm_module, 'test2'): + with with_service(cephadm_module, IscsiServiceSpec(service_id='foobar', pool='pool', placement=PlacementSpec(host_pattern='*')), CephadmOrchestrator.apply_iscsi, 'test'): + + CephadmServe(cephadm_module)._apply_all_services() + assert len(cephadm_module.cache.get_daemons_by_type('iscsi')) == 2 + + # get a daemons from postaction list (ARRGH sets!!) + tempset = cephadm_module.requires_post_actions.copy() + tempdaemon1 = tempset.pop() + tempdaemon2 = tempset.pop() + + # make sure post actions has 2 daemons in it + assert len(cephadm_module.requires_post_actions) == 2 + + # replicate a host cache that is not in sync when check_daemons is called + tempdd1 = cephadm_module.cache.get_daemon(tempdaemon1) + tempdd2 = cephadm_module.cache.get_daemon(tempdaemon2) + host = 'test1' + if 'test1' not in tempdaemon1: + host = 'test2' + cephadm_module.cache.rm_daemon(host, tempdaemon1) + + # Make sure, _check_daemons does a redeploy due to monmap change: + cephadm_module.mock_store_set('_ceph_get', 'mon_map', { + 'modified': datetime_to_str(datetime_now()), + 'fsid': 'foobar', + }) + cephadm_module.notify('mon_map', None) + cephadm_module.mock_store_set('_ceph_get', 'mgr_map', { + 'modules': ['dashboard'] + }) + + with mock.patch("cephadm.module.IscsiService.config_dashboard") as _cfg_db: + CephadmServe(cephadm_module)._check_daemons() + _cfg_db.assert_called_once_with([tempdd2]) + + # post actions still has the other daemon in it and will run next _check_daemons + assert len(cephadm_module.requires_post_actions) == 1 + + # post actions was missed for a daemon + assert tempdaemon1 in cephadm_module.requires_post_actions + + # put the daemon back in the cache + cephadm_module.cache.add_daemon(host, tempdd1) + + _cfg_db.reset_mock() + # replicate serve loop running again + CephadmServe(cephadm_module)._check_daemons() + + # post actions should have been called again + _cfg_db.asset_called() + + # post actions is now empty + assert len(cephadm_module.requires_post_actions) == 0 + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_mon_add(self, cephadm_module): + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='mon', unmanaged=True)): + ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1) + c = cephadm_module.add_daemon(ServiceSpec('mon', placement=ps)) + assert wait(cephadm_module, c) == ["Deployed mon.a on host 'test'"] + + with pytest.raises(OrchestratorError, match="Must set public_network config option or specify a CIDR network,"): + ps = PlacementSpec(hosts=['test'], count=1) + c = cephadm_module.add_daemon(ServiceSpec('mon', placement=ps)) + wait(cephadm_module, c) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_mgr_update(self, cephadm_module): + with with_host(cephadm_module, 'test'): + ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1) + r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps)) + assert r + + assert_rm_daemon(cephadm_module, 'mgr.a', 'test') + + @mock.patch("cephadm.module.CephadmOrchestrator.mon_command") + def test_find_destroyed_osds(self, _mon_cmd, cephadm_module): + dict_out = { + "nodes": [ + { + "id": -1, + "name": "default", + "type": "root", + "type_id": 11, + "children": [ + -3 + ] + }, + { + "id": -3, + "name": "host1", + "type": "host", + "type_id": 1, + "pool_weights": {}, + "children": [ + 0 + ] + }, + { + "id": 0, + "device_class": "hdd", + "name": "osd.0", + "type": "osd", + "type_id": 0, + "crush_weight": 0.0243988037109375, + "depth": 2, + "pool_weights": {}, + "exists": 1, + "status": "destroyed", + "reweight": 1, + "primary_affinity": 1 + } + ], + "stray": [] + } + json_out = json.dumps(dict_out) + _mon_cmd.return_value = (0, json_out, '') + osd_claims = OsdIdClaims(cephadm_module) + assert osd_claims.get() == {'host1': ['0']} + assert osd_claims.filtered_by_host('host1') == ['0'] + assert osd_claims.filtered_by_host('host1.domain.com') == ['0'] + + @ pytest.mark.parametrize( + "ceph_services, cephadm_daemons, strays_expected, metadata", + # [ ([(daemon_type, daemon_id), ... ], [...], [...]), ... ] + [ + ( + [('mds', 'a'), ('osd', '0'), ('mgr', 'x')], + [], + [('mds', 'a'), ('osd', '0'), ('mgr', 'x')], + {}, + ), + ( + [('mds', 'a'), ('osd', '0'), ('mgr', 'x')], + [('mds', 'a'), ('osd', '0'), ('mgr', 'x')], + [], + {}, + ), + ( + [('mds', 'a'), ('osd', '0'), ('mgr', 'x')], + [('mds', 'a'), ('osd', '0')], + [('mgr', 'x')], + {}, + ), + # https://tracker.ceph.com/issues/49573 + ( + [('rgw-nfs', '14649')], + [], + [('nfs', 'foo-rgw.host1')], + {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}}, + ), + ( + [('rgw-nfs', '14649'), ('rgw-nfs', '14650')], + [('nfs', 'foo-rgw.host1'), ('nfs', 'foo2.host2')], + [], + {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}, '14650': {'id': 'nfs.foo2.host2-rgw'}}, + ), + ( + [('rgw-nfs', '14649'), ('rgw-nfs', '14650')], + [('nfs', 'foo-rgw.host1')], + [('nfs', 'foo2.host2')], + {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}, '14650': {'id': 'nfs.foo2.host2-rgw'}}, + ), + ] + ) + def test_check_for_stray_daemons( + self, + cephadm_module, + ceph_services, + cephadm_daemons, + strays_expected, + metadata + ): + # mock ceph service-map + services = [] + for service in ceph_services: + s = {'type': service[0], 'id': service[1]} + services.append(s) + ls = [{'hostname': 'host1', 'services': services}] + + with mock.patch.object(cephadm_module, 'list_servers', mock.MagicMock()) as list_servers: + list_servers.return_value = ls + list_servers.__iter__.side_effect = ls.__iter__ + + # populate cephadm daemon cache + dm = {} + for daemon_type, daemon_id in cephadm_daemons: + dd = DaemonDescription(daemon_type=daemon_type, daemon_id=daemon_id) + dm[dd.name()] = dd + cephadm_module.cache.update_host_daemons('host1', dm) + + def get_metadata_mock(svc_type, svc_id, default): + return metadata[svc_id] + + with mock.patch.object(cephadm_module, 'get_metadata', new_callable=lambda: get_metadata_mock): + + # test + CephadmServe(cephadm_module)._check_for_strays() + + # verify + strays = cephadm_module.health_checks.get('CEPHADM_STRAY_DAEMON') + if not strays: + assert len(strays_expected) == 0 + else: + for dt, di in strays_expected: + name = '%s.%s' % (dt, di) + for detail in strays['detail']: + if name in detail: + strays['detail'].remove(detail) + break + assert name in detail + assert len(strays['detail']) == 0 + assert strays['count'] == len(strays_expected) + + @mock.patch("cephadm.module.CephadmOrchestrator.mon_command") + def test_find_destroyed_osds_cmd_failure(self, _mon_cmd, cephadm_module): + _mon_cmd.return_value = (1, "", "fail_msg") + with pytest.raises(OrchestratorError): + OsdIdClaims(cephadm_module) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_apply_osd_save(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + + spec = DriveGroupSpec( + service_id='foo', + placement=PlacementSpec( + host_pattern='*', + ), + data_devices=DeviceSelection( + all=True + ) + ) + + c = cephadm_module.apply([spec]) + assert wait(cephadm_module, c) == ['Scheduled osd.foo update...'] + + inventory = Devices([ + Device( + '/dev/sdb', + available=True + ), + ]) + + cephadm_module.cache.update_host_devices('test', inventory.devices) + + _run_cephadm.side_effect = async_side_effect((['{}'], '', 0)) + + assert CephadmServe(cephadm_module)._apply_all_services() is False + + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', + ['--config-json', '-', '--', 'lvm', 'batch', + '--no-auto', '/dev/sdb', '--yes', '--no-systemd'], + env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=foo'], error_ok=True, + stdin='{"config": "", "keyring": ""}') + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False, error_ok=False, log_output=True) + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False, error_ok=False, log_output=True) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_apply_osd_save_non_collocated(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + + spec = DriveGroupSpec( + service_id='noncollocated', + placement=PlacementSpec( + hosts=['test'] + ), + data_devices=DeviceSelection(paths=['/dev/sdb']), + db_devices=DeviceSelection(paths=['/dev/sdc']), + wal_devices=DeviceSelection(paths=['/dev/sdd']) + ) + + c = cephadm_module.apply([spec]) + assert wait(cephadm_module, c) == ['Scheduled osd.noncollocated update...'] + + inventory = Devices([ + Device('/dev/sdb', available=True), + Device('/dev/sdc', available=True), + Device('/dev/sdd', available=True) + ]) + + cephadm_module.cache.update_host_devices('test', inventory.devices) + + _run_cephadm.side_effect = async_side_effect((['{}'], '', 0)) + + assert CephadmServe(cephadm_module)._apply_all_services() is False + + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', + ['--config-json', '-', '--', 'lvm', 'batch', + '--no-auto', '/dev/sdb', '--db-devices', '/dev/sdc', + '--wal-devices', '/dev/sdd', '--yes', '--no-systemd'], + env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=noncollocated'], + error_ok=True, stdin='{"config": "", "keyring": ""}') + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False, error_ok=False, log_output=True) + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False, error_ok=False, log_output=True) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.module.SpecStore.save") + def test_apply_osd_save_placement(self, _save_spec, cephadm_module): + with with_host(cephadm_module, 'test'): + json_spec = {'service_type': 'osd', 'placement': {'host_pattern': 'test'}, + 'service_id': 'foo', 'data_devices': {'all': True}} + spec = ServiceSpec.from_json(json_spec) + assert isinstance(spec, DriveGroupSpec) + c = cephadm_module.apply([spec]) + assert wait(cephadm_module, c) == ['Scheduled osd.foo update...'] + _save_spec.assert_called_with(spec) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_create_osds(self, cephadm_module): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'), + data_devices=DeviceSelection(paths=[''])) + c = cephadm_module.create_osds(dg) + out = wait(cephadm_module, c) + assert out == "Created no osd(s) on host test; already created?" + bad_dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='invalid_host'), + data_devices=DeviceSelection(paths=[''])) + c = cephadm_module.create_osds(bad_dg) + out = wait(cephadm_module, c) + assert "Invalid 'host:device' spec: host not found in cluster" in out + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_create_noncollocated_osd(self, cephadm_module): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'), + data_devices=DeviceSelection(paths=[''])) + c = cephadm_module.create_osds(dg) + out = wait(cephadm_module, c) + assert out == "Created no osd(s) on host test; already created?" + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch('cephadm.services.osd.OSDService._run_ceph_volume_command') + @mock.patch('cephadm.services.osd.OSDService.driveselection_to_ceph_volume') + @mock.patch('cephadm.services.osd.OsdIdClaims.refresh', lambda _: None) + @mock.patch('cephadm.services.osd.OsdIdClaims.get', lambda _: {}) + def test_limit_not_reached(self, d_to_cv, _run_cv_cmd, cephadm_module): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'), + data_devices=DeviceSelection(limit=5, rotational=1), + service_id='not_enough') + + disks_found = [ + '[{"data": "/dev/vdb", "data_size": "50.00 GB", "encryption": "None"}, {"data": "/dev/vdc", "data_size": "50.00 GB", "encryption": "None"}]'] + d_to_cv.return_value = 'foo' + _run_cv_cmd.side_effect = async_side_effect((disks_found, '', 0)) + preview = cephadm_module.osd_service.generate_previews([dg], 'test') + + for osd in preview: + assert 'notes' in osd + assert osd['notes'] == [ + 'NOTE: Did not find enough disks matching filter on host test to reach data device limit (Found: 2 | Limit: 5)'] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_prepare_drivegroup(self, cephadm_module): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'), + data_devices=DeviceSelection(paths=[''])) + out = cephadm_module.osd_service.prepare_drivegroup(dg) + assert len(out) == 1 + f1 = out[0] + assert f1[0] == 'test' + assert isinstance(f1[1], DriveSelection) + + @pytest.mark.parametrize( + "devices, preview, exp_commands", + [ + # no preview and only one disk, prepare is used due the hack that is in place. + (['/dev/sda'], False, ["lvm batch --no-auto /dev/sda --yes --no-systemd"]), + # no preview and multiple disks, uses batch + (['/dev/sda', '/dev/sdb'], False, + ["CEPH_VOLUME_OSDSPEC_AFFINITY=test.spec lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd"]), + # preview and only one disk needs to use batch again to generate the preview + (['/dev/sda'], True, ["lvm batch --no-auto /dev/sda --yes --no-systemd --report --format json"]), + # preview and multiple disks work the same + (['/dev/sda', '/dev/sdb'], True, + ["CEPH_VOLUME_OSDSPEC_AFFINITY=test.spec lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd --report --format json"]), + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_driveselection_to_ceph_volume(self, cephadm_module, devices, preview, exp_commands): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(service_id='test.spec', placement=PlacementSpec( + host_pattern='test'), data_devices=DeviceSelection(paths=devices)) + ds = DriveSelection(dg, Devices([Device(path) for path in devices])) + preview = preview + out = cephadm_module.osd_service.driveselection_to_ceph_volume(ds, [], preview) + assert all(any(cmd in exp_cmd for exp_cmd in exp_commands) + for cmd in out), f'Expected cmds from f{out} in {exp_commands}' + + @pytest.mark.parametrize( + "devices, preview, exp_commands", + [ + # one data device, no preview + (['/dev/sda'], False, ["raw prepare --bluestore --data /dev/sda"]), + # multiple data devices, no preview + (['/dev/sda', '/dev/sdb'], False, + ["raw prepare --bluestore --data /dev/sda", "raw prepare --bluestore --data /dev/sdb"]), + # one data device, preview + (['/dev/sda'], True, ["raw prepare --bluestore --data /dev/sda --report --format json"]), + # multiple data devices, preview + (['/dev/sda', '/dev/sdb'], True, + ["raw prepare --bluestore --data /dev/sda --report --format json", "raw prepare --bluestore --data /dev/sdb --report --format json"]), + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_raw_driveselection_to_ceph_volume(self, cephadm_module, devices, preview, exp_commands): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(service_id='test.spec', method='raw', placement=PlacementSpec( + host_pattern='test'), data_devices=DeviceSelection(paths=devices)) + ds = DriveSelection(dg, Devices([Device(path) for path in devices])) + preview = preview + out = cephadm_module.osd_service.driveselection_to_ceph_volume(ds, [], preview) + assert all(any(cmd in exp_cmd for exp_cmd in exp_commands) + for cmd in out), f'Expected cmds from f{out} in {exp_commands}' + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm( + json.dumps([ + dict( + name='osd.0', + style='cephadm', + fsid='fsid', + container_id='container_id', + version='version', + state='running', + ) + ]) + )) + @mock.patch("cephadm.services.osd.OSD.exists", True) + @mock.patch("cephadm.services.osd.RemoveUtil.get_pg_count", lambda _, __: 0) + def test_remove_osds(self, cephadm_module): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + c = cephadm_module.list_daemons() + wait(cephadm_module, c) + + c = cephadm_module.remove_daemons(['osd.0']) + out = wait(cephadm_module, c) + assert out == ["Removed osd.0 from host 'test'"] + + cephadm_module.to_remove_osds.enqueue(OSD(osd_id=0, + replace=False, + force=False, + hostname='test', + process_started_at=datetime_now(), + remove_util=cephadm_module.to_remove_osds.rm_util + )) + cephadm_module.to_remove_osds.process_removal_queue() + assert cephadm_module.to_remove_osds == OSDRemovalQueue(cephadm_module) + + c = cephadm_module.remove_osds_status() + out = wait(cephadm_module, c) + assert out == [] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_rgw_update(self, cephadm_module): + with with_host(cephadm_module, 'host1'): + with with_host(cephadm_module, 'host2'): + with with_service(cephadm_module, RGWSpec(service_id="foo", unmanaged=True)): + ps = PlacementSpec(hosts=['host1'], count=1) + c = cephadm_module.add_daemon( + RGWSpec(service_id="foo", placement=ps)) + [out] = wait(cephadm_module, c) + match_glob(out, "Deployed rgw.foo.* on host 'host1'") + + ps = PlacementSpec(hosts=['host1', 'host2'], count=2) + r = CephadmServe(cephadm_module)._apply_service( + RGWSpec(service_id="foo", placement=ps)) + assert r + + assert_rm_daemon(cephadm_module, 'rgw.foo', 'host1') + assert_rm_daemon(cephadm_module, 'rgw.foo', 'host2') + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm( + json.dumps([ + dict( + name='rgw.myrgw.myhost.myid', + style='cephadm', + fsid='fsid', + container_id='container_id', + version='version', + state='running', + ) + ]) + )) + def test_remove_daemon(self, cephadm_module): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + c = cephadm_module.list_daemons() + wait(cephadm_module, c) + c = cephadm_module.remove_daemons(['rgw.myrgw.myhost.myid']) + out = wait(cephadm_module, c) + assert out == ["Removed rgw.myrgw.myhost.myid from host 'test'"] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_remove_duplicate_osds(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'host1'): + with with_host(cephadm_module, 'host2'): + with with_osd_daemon(cephadm_module, _run_cephadm, 'host1', 1) as dd1: # type: DaemonDescription + with with_osd_daemon(cephadm_module, _run_cephadm, 'host2', 1) as dd2: # type: DaemonDescription + CephadmServe(cephadm_module)._check_for_moved_osds() + # both are in status "starting" + assert len(cephadm_module.cache.get_daemons()) == 2 + + dd1.status = DaemonDescriptionStatus.running + dd2.status = DaemonDescriptionStatus.error + cephadm_module.cache.update_host_daemons(dd1.hostname, {dd1.name(): dd1}) + cephadm_module.cache.update_host_daemons(dd2.hostname, {dd2.name(): dd2}) + CephadmServe(cephadm_module)._check_for_moved_osds() + assert len(cephadm_module.cache.get_daemons()) == 1 + + assert cephadm_module.events.get_for_daemon('osd.1') == [ + OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO', + "Deployed osd.1 on host 'host1'"), + OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO', + "Deployed osd.1 on host 'host2'"), + OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO', + "Removed duplicated daemon on host 'host2'"), + ] + + with pytest.raises(AssertionError): + cephadm_module.assert_issued_mon_command({ + 'prefix': 'auth rm', + 'entity': 'osd.1', + }) + + cephadm_module.assert_issued_mon_command({ + 'prefix': 'auth rm', + 'entity': 'osd.1', + }) + + @pytest.mark.parametrize( + "spec", + [ + ServiceSpec('crash'), + ServiceSpec('prometheus'), + ServiceSpec('grafana'), + ServiceSpec('node-exporter'), + ServiceSpec('alertmanager'), + ServiceSpec('rbd-mirror'), + ServiceSpec('cephfs-mirror'), + ServiceSpec('mds', service_id='fsname'), + RGWSpec(rgw_realm='realm', rgw_zone='zone'), + RGWSpec(service_id="foo"), + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_daemon_add(self, spec: ServiceSpec, cephadm_module): + unmanaged_spec = ServiceSpec.from_json(spec.to_json()) + unmanaged_spec.unmanaged = True + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, unmanaged_spec): + with with_daemon(cephadm_module, spec, 'test'): + pass + + @pytest.mark.parametrize( + "entity,success,spec", + [ + ('mgr.x', True, ServiceSpec( + service_type='mgr', + placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1), + unmanaged=True) + ), # noqa: E124 + ('client.rgw.x', True, ServiceSpec( + service_type='rgw', + service_id='id', + placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1), + unmanaged=True) + ), # noqa: E124 + ('client.nfs.x', True, ServiceSpec( + service_type='nfs', + service_id='id', + placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1), + unmanaged=True) + ), # noqa: E124 + ('mon.', False, ServiceSpec( + service_type='mon', + placement=PlacementSpec( + hosts=[HostPlacementSpec('test', '127.0.0.0/24', 'x')], count=1), + unmanaged=True) + ), # noqa: E124 + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock()) + def test_daemon_add_fail(self, _run_cephadm, entity, success, spec, cephadm_module): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec): + _run_cephadm.side_effect = OrchestratorError('fail') + with pytest.raises(OrchestratorError): + wait(cephadm_module, cephadm_module.add_daemon(spec)) + if success: + cephadm_module.assert_issued_mon_command({ + 'prefix': 'auth rm', + 'entity': entity, + }) + else: + with pytest.raises(AssertionError): + cephadm_module.assert_issued_mon_command({ + 'prefix': 'auth rm', + 'entity': entity, + }) + assert cephadm_module.events.get_for_service(spec.service_name()) == [ + OrchestratorEvent(mock.ANY, 'service', spec.service_name(), 'INFO', + "service was created"), + OrchestratorEvent(mock.ANY, 'service', spec.service_name(), 'ERROR', + "fail"), + ] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_daemon_place_fail_health_warning(self, _run_cephadm, cephadm_module): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + _run_cephadm.side_effect = OrchestratorError('fail') + ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1) + r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps)) + assert not r + assert cephadm_module.health_checks.get('CEPHADM_DAEMON_PLACE_FAIL') is not None + assert cephadm_module.health_checks['CEPHADM_DAEMON_PLACE_FAIL']['count'] == 1 + assert 'Failed to place 1 daemon(s)' in cephadm_module.health_checks[ + 'CEPHADM_DAEMON_PLACE_FAIL']['summary'] + assert 'Failed while placing mgr.a on test: fail' in cephadm_module.health_checks[ + 'CEPHADM_DAEMON_PLACE_FAIL']['detail'] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_apply_spec_fail_health_warning(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._apply_all_services() + ps = PlacementSpec(hosts=['fail'], count=1) + r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps)) + assert not r + assert cephadm_module.apply_spec_fails + assert cephadm_module.health_checks.get('CEPHADM_APPLY_SPEC_FAIL') is not None + assert cephadm_module.health_checks['CEPHADM_APPLY_SPEC_FAIL']['count'] == 1 + assert 'Failed to apply 1 service(s)' in cephadm_module.health_checks[ + 'CEPHADM_APPLY_SPEC_FAIL']['summary'] + + @mock.patch("cephadm.module.CephadmOrchestrator.get_foreign_ceph_option") + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.module.HostCache.save_host_devices") + def test_invalid_config_option_health_warning(self, _save_devs, _run_cephadm, get_foreign_ceph_option, cephadm_module: CephadmOrchestrator): + _save_devs.return_value = None + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1) + get_foreign_ceph_option.side_effect = KeyError + CephadmServe(cephadm_module)._apply_service_config( + ServiceSpec('mgr', placement=ps, config={'test': 'foo'})) + assert cephadm_module.health_checks.get('CEPHADM_INVALID_CONFIG_OPTION') is not None + assert cephadm_module.health_checks['CEPHADM_INVALID_CONFIG_OPTION']['count'] == 1 + assert 'Ignoring 1 invalid config option(s)' in cephadm_module.health_checks[ + 'CEPHADM_INVALID_CONFIG_OPTION']['summary'] + assert 'Ignoring invalid mgr config option test' in cephadm_module.health_checks[ + 'CEPHADM_INVALID_CONFIG_OPTION']['detail'] + + @mock.patch("cephadm.module.CephadmOrchestrator.get_foreign_ceph_option") + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.module.CephadmOrchestrator.set_store") + def test_save_devices(self, _set_store, _run_cephadm, _get_foreign_ceph_option, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + entry_size = 65536 # default 64k size + _get_foreign_ceph_option.return_value = entry_size + + class FakeDev(): + def __init__(self, c: str = 'a'): + # using 1015 here makes the serialized string exactly 1024 bytes if c is one char + self.content = {c: c * 1015} + self.path = 'dev/vdc' + + def to_json(self): + return self.content + + def from_json(self, stuff): + return json.loads(stuff) + + def byte_len(s): + return len(s.encode('utf-8')) + + with with_host(cephadm_module, 'test'): + fake_devices = [FakeDev()] * 100 # should be ~100k + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) > entry_size + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size * 2 + cephadm_module.cache.update_host_devices('test', fake_devices) + cephadm_module.cache.save_host_devices('test') + expected_calls = [ + mock.call('host.test.devices.0', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 34], 'entries': 3})), + mock.call('host.test.devices.1', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 34]})), + mock.call('host.test.devices.2', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 32]})), + ] + _set_store.assert_has_calls(expected_calls) + + fake_devices = [FakeDev()] * 300 # should be ~300k + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) > entry_size * 4 + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size * 5 + cephadm_module.cache.update_host_devices('test', fake_devices) + cephadm_module.cache.save_host_devices('test') + expected_calls = [ + mock.call('host.test.devices.0', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 50], 'entries': 6})), + mock.call('host.test.devices.1', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 50]})), + mock.call('host.test.devices.2', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 50]})), + mock.call('host.test.devices.3', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 50]})), + mock.call('host.test.devices.4', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 50]})), + mock.call('host.test.devices.5', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 50]})), + ] + _set_store.assert_has_calls(expected_calls) + + fake_devices = [FakeDev()] * 62 # should be ~62k, just under cache size + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size + cephadm_module.cache.update_host_devices('test', fake_devices) + cephadm_module.cache.save_host_devices('test') + expected_calls = [ + mock.call('host.test.devices.0', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 62], 'entries': 1})), + ] + _set_store.assert_has_calls(expected_calls) + + # should be ~64k but just over so it requires more entries + fake_devices = [FakeDev()] * 64 + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) > entry_size + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size * 2 + cephadm_module.cache.update_host_devices('test', fake_devices) + cephadm_module.cache.save_host_devices('test') + expected_calls = [ + mock.call('host.test.devices.0', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 22], 'entries': 3})), + mock.call('host.test.devices.1', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 22]})), + mock.call('host.test.devices.2', json.dumps( + {'devices': [d.to_json() for d in [FakeDev()] * 20]})), + ] + _set_store.assert_has_calls(expected_calls) + + # test for actual content being correct using differing devices + entry_size = 3072 + _get_foreign_ceph_option.return_value = entry_size + fake_devices = [FakeDev('a'), FakeDev('b'), FakeDev('c'), FakeDev('d'), FakeDev('e')] + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) > entry_size + assert byte_len(json.dumps([d.to_json() for d in fake_devices])) < entry_size * 2 + cephadm_module.cache.update_host_devices('test', fake_devices) + cephadm_module.cache.save_host_devices('test') + expected_calls = [ + mock.call('host.test.devices.0', json.dumps( + {'devices': [d.to_json() for d in [FakeDev('a'), FakeDev('b')]], 'entries': 3})), + mock.call('host.test.devices.1', json.dumps( + {'devices': [d.to_json() for d in [FakeDev('c'), FakeDev('d')]]})), + mock.call('host.test.devices.2', json.dumps( + {'devices': [d.to_json() for d in [FakeDev('e')]]})), + ] + _set_store.assert_has_calls(expected_calls) + + @mock.patch("cephadm.module.CephadmOrchestrator.get_store") + def test_load_devices(self, _get_store, cephadm_module: CephadmOrchestrator): + def _fake_store(key): + if key == 'host.test.devices.0': + return json.dumps({'devices': [d.to_json() for d in [Device('/path')] * 9], 'entries': 3}) + elif key == 'host.test.devices.1': + return json.dumps({'devices': [d.to_json() for d in [Device('/path')] * 7]}) + elif key == 'host.test.devices.2': + return json.dumps({'devices': [d.to_json() for d in [Device('/path')] * 4]}) + else: + raise Exception(f'Get store with unexpected value {key}') + + _get_store.side_effect = _fake_store + devs = cephadm_module.cache.load_host_devices('test') + assert devs == [Device('/path')] * 20 + + @mock.patch("cephadm.module.Inventory.__contains__") + def test_check_stray_host_cache_entry(self, _contains, cephadm_module: CephadmOrchestrator): + def _fake_inv(key): + if key in ['host1', 'node02', 'host.something.com']: + return True + return False + + _contains.side_effect = _fake_inv + assert cephadm_module.cache._get_host_cache_entry_status('host1') == HostCacheStatus.host + assert cephadm_module.cache._get_host_cache_entry_status( + 'host.something.com') == HostCacheStatus.host + assert cephadm_module.cache._get_host_cache_entry_status( + 'node02.devices.37') == HostCacheStatus.devices + assert cephadm_module.cache._get_host_cache_entry_status( + 'host.something.com.devices.0') == HostCacheStatus.devices + assert cephadm_module.cache._get_host_cache_entry_status('hostXXX') == HostCacheStatus.stray + assert cephadm_module.cache._get_host_cache_entry_status( + 'host.nothing.com') == HostCacheStatus.stray + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock()) + def test_nfs(self, cephadm_module): + with with_host(cephadm_module, 'test'): + ps = PlacementSpec(hosts=['test'], count=1) + spec = NFSServiceSpec( + service_id='name', + placement=ps) + unmanaged_spec = ServiceSpec.from_json(spec.to_json()) + unmanaged_spec.unmanaged = True + with with_service(cephadm_module, unmanaged_spec): + c = cephadm_module.add_daemon(spec) + [out] = wait(cephadm_module, c) + match_glob(out, "Deployed nfs.name.* on host 'test'") + + assert_rm_daemon(cephadm_module, 'nfs.name.test', 'test') + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("subprocess.run", None) + @mock.patch("cephadm.module.CephadmOrchestrator.rados", mock.MagicMock()) + @mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4') + def test_iscsi(self, cephadm_module): + with with_host(cephadm_module, 'test'): + ps = PlacementSpec(hosts=['test'], count=1) + spec = IscsiServiceSpec( + service_id='name', + pool='pool', + api_user='user', + api_password='password', + placement=ps) + unmanaged_spec = ServiceSpec.from_json(spec.to_json()) + unmanaged_spec.unmanaged = True + with with_service(cephadm_module, unmanaged_spec): + + c = cephadm_module.add_daemon(spec) + [out] = wait(cephadm_module, c) + match_glob(out, "Deployed iscsi.name.* on host 'test'") + + assert_rm_daemon(cephadm_module, 'iscsi.name.test', 'test') + + @pytest.mark.parametrize( + "on_bool", + [ + True, + False + ] + ) + @pytest.mark.parametrize( + "fault_ident", + [ + 'fault', + 'ident' + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_blink_device_light(self, _run_cephadm, on_bool, fault_ident, cephadm_module): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + c = cephadm_module.blink_device_light(fault_ident, on_bool, [('test', '', 'dev')]) + on_off = 'on' if on_bool else 'off' + assert wait(cephadm_module, c) == [f'Set {fault_ident} light for test: {on_off}'] + _run_cephadm.assert_called_with('test', 'osd', 'shell', [ + '--', 'lsmcli', f'local-disk-{fault_ident}-led-{on_off}', '--path', 'dev'], error_ok=True) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_blink_device_light_custom(self, _run_cephadm, cephadm_module): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + cephadm_module.set_store('blink_device_light_cmd', 'echo hello') + c = cephadm_module.blink_device_light('ident', True, [('test', '', '/dev/sda')]) + assert wait(cephadm_module, c) == ['Set ident light for test: on'] + _run_cephadm.assert_called_with('test', 'osd', 'shell', [ + '--', 'echo', 'hello'], error_ok=True) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_blink_device_light_custom_per_host(self, _run_cephadm, cephadm_module): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'mgr0'): + cephadm_module.set_store('mgr0/blink_device_light_cmd', + 'xyz --foo --{{ ident_fault }}={{\'on\' if on else \'off\'}} \'{{ path or dev }}\'') + c = cephadm_module.blink_device_light( + 'fault', True, [('mgr0', 'SanDisk_X400_M.2_2280_512GB_162924424784', '')]) + assert wait(cephadm_module, c) == [ + 'Set fault light for mgr0:SanDisk_X400_M.2_2280_512GB_162924424784 on'] + _run_cephadm.assert_called_with('mgr0', 'osd', 'shell', [ + '--', 'xyz', '--foo', '--fault=on', 'SanDisk_X400_M.2_2280_512GB_162924424784' + ], error_ok=True) + + @pytest.mark.parametrize( + "spec, meth", + [ + (ServiceSpec('mgr'), CephadmOrchestrator.apply_mgr), + (ServiceSpec('crash'), CephadmOrchestrator.apply_crash), + (ServiceSpec('prometheus'), CephadmOrchestrator.apply_prometheus), + (ServiceSpec('grafana'), CephadmOrchestrator.apply_grafana), + (ServiceSpec('node-exporter'), CephadmOrchestrator.apply_node_exporter), + (ServiceSpec('alertmanager'), CephadmOrchestrator.apply_alertmanager), + (ServiceSpec('rbd-mirror'), CephadmOrchestrator.apply_rbd_mirror), + (ServiceSpec('cephfs-mirror'), CephadmOrchestrator.apply_rbd_mirror), + (ServiceSpec('mds', service_id='fsname'), CephadmOrchestrator.apply_mds), + (ServiceSpec( + 'mds', service_id='fsname', + placement=PlacementSpec( + hosts=[HostPlacementSpec( + hostname='test', + name='fsname', + network='' + )] + ) + ), CephadmOrchestrator.apply_mds), + (RGWSpec(service_id='foo'), CephadmOrchestrator.apply_rgw), + (RGWSpec( + service_id='bar', + rgw_realm='realm', rgw_zone='zone', + placement=PlacementSpec( + hosts=[HostPlacementSpec( + hostname='test', + name='bar', + network='' + )] + ) + ), CephadmOrchestrator.apply_rgw), + (NFSServiceSpec( + service_id='name', + ), CephadmOrchestrator.apply_nfs), + (IscsiServiceSpec( + service_id='name', + pool='pool', + api_user='user', + api_password='password' + ), CephadmOrchestrator.apply_iscsi), + (CustomContainerSpec( + service_id='hello-world', + image='docker.io/library/hello-world:latest', + uid=65534, + gid=65534, + dirs=['foo/bar'], + files={ + 'foo/bar/xyz.conf': 'aaa\nbbb' + }, + bind_mounts=[[ + 'type=bind', + 'source=lib/modules', + 'destination=/lib/modules', + 'ro=true' + ]], + volume_mounts={ + 'foo/bar': '/foo/bar:Z' + }, + args=['--no-healthcheck'], + envs=['SECRET=password'], + ports=[8080, 8443] + ), CephadmOrchestrator.apply_container), + ] + ) + @mock.patch("subprocess.run", None) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock()) + @mock.patch("subprocess.run", mock.MagicMock()) + def test_apply_save(self, spec: ServiceSpec, meth, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec, meth, 'test'): + pass + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_mds_config_purge(self, cephadm_module: CephadmOrchestrator): + spec = MDSSpec('mds', service_id='fsname', config={'test': 'foo'}) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec, host='test'): + ret, out, err = cephadm_module.check_mon_command({ + 'prefix': 'config get', + 'who': spec.service_name(), + 'key': 'mds_join_fs', + }) + assert out == 'fsname' + ret, out, err = cephadm_module.check_mon_command({ + 'prefix': 'config get', + 'who': spec.service_name(), + 'key': 'mds_join_fs', + }) + assert not out + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.services.cephadmservice.CephadmService.ok_to_stop") + def test_daemon_ok_to_stop(self, ok_to_stop, cephadm_module: CephadmOrchestrator): + spec = MDSSpec( + 'mds', + service_id='fsname', + placement=PlacementSpec(hosts=['host1', 'host2']), + config={'test': 'foo'} + ) + with with_host(cephadm_module, 'host1'), with_host(cephadm_module, 'host2'): + c = cephadm_module.apply_mds(spec) + out = wait(cephadm_module, c) + match_glob(out, "Scheduled mds.fsname update...") + CephadmServe(cephadm_module)._apply_all_services() + + [daemon] = cephadm_module.cache.daemons['host1'].keys() + + spec.placement.set_hosts(['host2']) + + ok_to_stop.side_effect = False + + c = cephadm_module.apply_mds(spec) + out = wait(cephadm_module, c) + match_glob(out, "Scheduled mds.fsname update...") + CephadmServe(cephadm_module)._apply_all_services() + + ok_to_stop.assert_called_with([daemon[4:]], force=True) + + assert_rm_daemon(cephadm_module, spec.service_name(), 'host1') # verifies ok-to-stop + assert_rm_daemon(cephadm_module, spec.service_name(), 'host2') + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_dont_touch_offline_or_maintenance_host_daemons(self, cephadm_module): + # test daemons on offline/maint hosts not removed when applying specs + # test daemons not added to hosts in maint/offline state + with with_host(cephadm_module, 'test1'): + with with_host(cephadm_module, 'test2'): + with with_host(cephadm_module, 'test3'): + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*'))): + # should get a mgr on all 3 hosts + # CephadmServe(cephadm_module)._apply_all_services() + assert len(cephadm_module.cache.get_daemons_by_type('mgr')) == 3 + + # put one host in offline state and one host in maintenance state + cephadm_module.offline_hosts = {'test2'} + cephadm_module.inventory._inventory['test3']['status'] = 'maintenance' + cephadm_module.inventory.save() + + # being in offline/maint mode should disqualify hosts from being + # candidates for scheduling + assert cephadm_module.cache.is_host_schedulable('test2') + assert cephadm_module.cache.is_host_schedulable('test3') + + assert cephadm_module.cache.is_host_unreachable('test2') + assert cephadm_module.cache.is_host_unreachable('test3') + + with with_service(cephadm_module, ServiceSpec('crash', placement=PlacementSpec(host_pattern='*'))): + # re-apply services. No mgr should be removed from maint/offline hosts + # crash daemon should only be on host not in maint/offline mode + CephadmServe(cephadm_module)._apply_all_services() + assert len(cephadm_module.cache.get_daemons_by_type('mgr')) == 3 + assert len(cephadm_module.cache.get_daemons_by_type('crash')) == 1 + + cephadm_module.offline_hosts = {} + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.CephadmOrchestrator._host_ok_to_stop") + @mock.patch("cephadm.module.HostCache.get_daemon_types") + @mock.patch("cephadm.module.HostCache.get_hosts") + def test_maintenance_enter_success(self, _hosts, _get_daemon_types, _host_ok, _run_cephadm, cephadm_module: CephadmOrchestrator): + hostname = 'host1' + _run_cephadm.side_effect = async_side_effect( + ([''], ['something\nsuccess - systemd target xxx disabled'], 0)) + _host_ok.return_value = 0, 'it is okay' + _get_daemon_types.return_value = ['crash'] + _hosts.return_value = [hostname, 'other_host'] + cephadm_module.inventory.add_host(HostSpec(hostname)) + # should not raise an error + retval = cephadm_module.enter_host_maintenance(hostname) + assert retval.result_str().startswith('Daemons for Ceph cluster') + assert not retval.exception_str + assert cephadm_module.inventory._inventory[hostname]['status'] == 'maintenance' + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.CephadmOrchestrator._host_ok_to_stop") + @mock.patch("cephadm.module.HostCache.get_daemon_types") + @mock.patch("cephadm.module.HostCache.get_hosts") + def test_maintenance_enter_failure(self, _hosts, _get_daemon_types, _host_ok, _run_cephadm, cephadm_module: CephadmOrchestrator): + hostname = 'host1' + _run_cephadm.side_effect = async_side_effect( + ([''], ['something\nfailed - disable the target'], 0)) + _host_ok.return_value = 0, 'it is okay' + _get_daemon_types.return_value = ['crash'] + _hosts.return_value = [hostname, 'other_host'] + cephadm_module.inventory.add_host(HostSpec(hostname)) + + with pytest.raises(OrchestratorError, match='Failed to place host1 into maintenance for cluster fsid'): + cephadm_module.enter_host_maintenance(hostname) + + assert not cephadm_module.inventory._inventory[hostname]['status'] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.CephadmOrchestrator._host_ok_to_stop") + @mock.patch("cephadm.module.HostCache.get_daemon_types") + @mock.patch("cephadm.module.HostCache.get_hosts") + def test_maintenance_enter_i_really_mean_it(self, _hosts, _get_daemon_types, _host_ok, _run_cephadm, cephadm_module: CephadmOrchestrator): + hostname = 'host1' + err_str = 'some kind of error' + _run_cephadm.side_effect = async_side_effect( + ([''], ['something\nfailed - disable the target'], 0)) + _host_ok.return_value = 1, err_str + _get_daemon_types.return_value = ['mon'] + _hosts.return_value = [hostname, 'other_host'] + cephadm_module.inventory.add_host(HostSpec(hostname)) + + with pytest.raises(OrchestratorError, match=err_str): + cephadm_module.enter_host_maintenance(hostname) + assert not cephadm_module.inventory._inventory[hostname]['status'] + + with pytest.raises(OrchestratorError, match=err_str): + cephadm_module.enter_host_maintenance(hostname, force=True) + assert not cephadm_module.inventory._inventory[hostname]['status'] + + retval = cephadm_module.enter_host_maintenance(hostname, force=True, yes_i_really_mean_it=True) + assert retval.result_str().startswith('Daemons for Ceph cluster') + assert not retval.exception_str + assert cephadm_module.inventory._inventory[hostname]['status'] == 'maintenance' + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.module.HostCache.get_daemon_types") + @mock.patch("cephadm.module.HostCache.get_hosts") + def test_maintenance_exit_success(self, _hosts, _get_daemon_types, _run_cephadm, cephadm_module: CephadmOrchestrator): + hostname = 'host1' + _run_cephadm.side_effect = async_side_effect(([''], [ + 'something\nsuccess - systemd target xxx enabled and started'], 0)) + _get_daemon_types.return_value = ['crash'] + _hosts.return_value = [hostname, 'other_host'] + cephadm_module.inventory.add_host(HostSpec(hostname, status='maintenance')) + # should not raise an error + retval = cephadm_module.exit_host_maintenance(hostname) + assert retval.result_str().startswith('Ceph cluster') + assert not retval.exception_str + assert not cephadm_module.inventory._inventory[hostname]['status'] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.module.HostCache.get_daemon_types") + @mock.patch("cephadm.module.HostCache.get_hosts") + def test_maintenance_exit_failure(self, _hosts, _get_daemon_types, _run_cephadm, cephadm_module: CephadmOrchestrator): + hostname = 'host1' + _run_cephadm.side_effect = async_side_effect( + ([''], ['something\nfailed - unable to enable the target'], 0)) + _get_daemon_types.return_value = ['crash'] + _hosts.return_value = [hostname, 'other_host'] + cephadm_module.inventory.add_host(HostSpec(hostname, status='maintenance')) + + with pytest.raises(OrchestratorError, match='Failed to exit maintenance state for host host1, cluster fsid'): + cephadm_module.exit_host_maintenance(hostname) + + assert cephadm_module.inventory._inventory[hostname]['status'] == 'maintenance' + + @mock.patch("cephadm.ssh.SSHManager._remote_connection") + @mock.patch("cephadm.ssh.SSHManager._execute_command") + @mock.patch("cephadm.ssh.SSHManager._check_execute_command") + @mock.patch("cephadm.ssh.SSHManager._write_remote_file") + def test_etc_ceph(self, _write_file, check_execute_command, execute_command, remote_connection, cephadm_module): + _write_file.side_effect = async_side_effect(None) + check_execute_command.side_effect = async_side_effect('') + execute_command.side_effect = async_side_effect(('{}', '', 0)) + remote_connection.side_effect = async_side_effect(mock.Mock()) + + assert cephadm_module.manage_etc_ceph_ceph_conf is False + + with with_host(cephadm_module, 'test'): + assert '/etc/ceph/ceph.conf' not in cephadm_module.cache.get_host_client_files('test') + + with with_host(cephadm_module, 'test'): + cephadm_module.set_module_option('manage_etc_ceph_ceph_conf', True) + cephadm_module.config_notify() + assert cephadm_module.manage_etc_ceph_ceph_conf is True + + CephadmServe(cephadm_module)._write_all_client_files() + # Make sure both ceph conf locations (default and per fsid) are called + _write_file.assert_has_calls([mock.call('test', '/etc/ceph/ceph.conf', b'', + 0o644, 0, 0, None), + mock.call('test', '/var/lib/ceph/fsid/config/ceph.conf', b'', + 0o644, 0, 0, None)] + ) + ceph_conf_files = cephadm_module.cache.get_host_client_files('test') + assert len(ceph_conf_files) == 2 + assert '/etc/ceph/ceph.conf' in ceph_conf_files + assert '/var/lib/ceph/fsid/config/ceph.conf' in ceph_conf_files + + # set extra config and expect that we deploy another ceph.conf + cephadm_module._set_extra_ceph_conf('[mon]\nk=v') + CephadmServe(cephadm_module)._write_all_client_files() + _write_file.assert_has_calls([mock.call('test', + '/etc/ceph/ceph.conf', + b'[mon]\nk=v\n', 0o644, 0, 0, None), + mock.call('test', + '/var/lib/ceph/fsid/config/ceph.conf', + b'[mon]\nk=v\n', 0o644, 0, 0, None)]) + # reload + cephadm_module.cache.last_client_files = {} + cephadm_module.cache.load() + + ceph_conf_files = cephadm_module.cache.get_host_client_files('test') + assert len(ceph_conf_files) == 2 + assert '/etc/ceph/ceph.conf' in ceph_conf_files + assert '/var/lib/ceph/fsid/config/ceph.conf' in ceph_conf_files + + # Make sure, _check_daemons does a redeploy due to monmap change: + f1_before_digest = cephadm_module.cache.get_host_client_files('test')[ + '/etc/ceph/ceph.conf'][0] + f2_before_digest = cephadm_module.cache.get_host_client_files( + 'test')['/var/lib/ceph/fsid/config/ceph.conf'][0] + cephadm_module._set_extra_ceph_conf('[mon]\nk2=v2') + CephadmServe(cephadm_module)._write_all_client_files() + f1_after_digest = cephadm_module.cache.get_host_client_files('test')[ + '/etc/ceph/ceph.conf'][0] + f2_after_digest = cephadm_module.cache.get_host_client_files( + 'test')['/var/lib/ceph/fsid/config/ceph.conf'][0] + assert f1_before_digest != f1_after_digest + assert f2_before_digest != f2_after_digest + + @mock.patch("cephadm.inventory.HostCache.get_host_client_files") + def test_dont_write_client_files_to_unreachable_hosts(self, _get_client_files, cephadm_module): + cephadm_module.inventory.add_host(HostSpec('host1', '1.2.3.1')) # online + cephadm_module.inventory.add_host(HostSpec('host2', '1.2.3.2')) # maintenance + cephadm_module.inventory.add_host(HostSpec('host3', '1.2.3.3')) # offline + + # mark host2 as maintenance and host3 as offline + cephadm_module.inventory._inventory['host2']['status'] = 'maintenance' + cephadm_module.offline_hosts.add('host3') + + # verify host2 and host3 are correctly marked as unreachable but host1 is not + assert not cephadm_module.cache.is_host_unreachable('host1') + assert cephadm_module.cache.is_host_unreachable('host2') + assert cephadm_module.cache.is_host_unreachable('host3') + + _get_client_files.side_effect = Exception('Called _get_client_files') + + # with the online host, should call _get_client_files which + # we have setup to raise an Exception + with pytest.raises(Exception, match='Called _get_client_files'): + CephadmServe(cephadm_module)._write_client_files({}, 'host1') + + # for the maintenance and offline host, _get_client_files should + # not be called and it should just return immediately with nothing + # having been raised + CephadmServe(cephadm_module)._write_client_files({}, 'host2') + CephadmServe(cephadm_module)._write_client_files({}, 'host3') + + def test_etc_ceph_init(self): + with with_cephadm_module({'manage_etc_ceph_ceph_conf': True}) as m: + assert m.manage_etc_ceph_ceph_conf is True + + @mock.patch("cephadm.CephadmOrchestrator.check_mon_command") + @mock.patch("cephadm.CephadmOrchestrator.extra_ceph_conf") + def test_extra_ceph_conf(self, _extra_ceph_conf, _check_mon_cmd, cephadm_module: CephadmOrchestrator): + # settings put into the [global] section in the extra conf + # need to be appended to existing [global] section in given + # minimal ceph conf, but anything in another section (e.g. [mon]) + # needs to continue to be its own section + + # this is the conf "ceph generate-minimal-conf" will return in this test + _check_mon_cmd.return_value = (0, """[global] +global_k1 = global_v1 +global_k2 = global_v2 +[mon] +mon_k1 = mon_v1 +[osd] +osd_k1 = osd_v1 +osd_k2 = osd_v2 +""", '') + + # test with extra ceph conf that has some of the sections from minimal conf + _extra_ceph_conf.return_value = CephadmOrchestrator.ExtraCephConf(conf="""[mon] +mon_k2 = mon_v2 +[global] +global_k3 = global_v3 +""", last_modified=datetime_now()) + + expected_combined_conf = """[global] +global_k1 = global_v1 +global_k2 = global_v2 +global_k3 = global_v3 + +[mon] +mon_k1 = mon_v1 +mon_k2 = mon_v2 + +[osd] +osd_k1 = osd_v1 +osd_k2 = osd_v2 +""" + + assert cephadm_module.get_minimal_ceph_conf() == expected_combined_conf + + def test_client_keyrings_special_host_labels(self, cephadm_module): + cephadm_module.inventory.add_host(HostSpec('host1', labels=['keyring1'])) + cephadm_module.inventory.add_host(HostSpec('host2', labels=['keyring1', SpecialHostLabels.DRAIN_DAEMONS])) + cephadm_module.inventory.add_host(HostSpec('host3', labels=['keyring1', SpecialHostLabels.DRAIN_DAEMONS, SpecialHostLabels.DRAIN_CONF_KEYRING])) + # hosts need to be marked as having had refresh to be available for placement + # so "refresh" with empty daemon list + cephadm_module.cache.update_host_daemons('host1', {}) + cephadm_module.cache.update_host_daemons('host2', {}) + cephadm_module.cache.update_host_daemons('host3', {}) + + assert 'host1' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()] + assert 'host2' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()] + assert 'host3' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()] + + assert 'host1' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()] + assert 'host2' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()] + assert 'host3' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()] + + cephadm_module.keys.update(ClientKeyringSpec('keyring1', PlacementSpec(label='keyring1'))) + + with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd: + _mon_cmd.return_value = (0, 'real-keyring', '') + client_files = CephadmServe(cephadm_module)._calc_client_files() + assert 'host1' in client_files.keys() + assert '/etc/ceph/ceph.keyring1.keyring' in client_files['host1'].keys() + assert 'host2' in client_files.keys() + assert '/etc/ceph/ceph.keyring1.keyring' in client_files['host2'].keys() + assert 'host3' not in client_files.keys() + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_registry_login(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + def check_registry_credentials(url, username, password): + assert json.loads(cephadm_module.get_store('registry_credentials')) == { + 'url': url, 'username': username, 'password': password} + + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + # test successful login with valid args + code, out, err = cephadm_module.registry_login('test-url', 'test-user', 'test-password') + assert out == 'registry login scheduled' + assert err == '' + check_registry_credentials('test-url', 'test-user', 'test-password') + + # test bad login attempt with invalid args + code, out, err = cephadm_module.registry_login('bad-args') + assert err == ("Invalid arguments. Please provide arguments <url> <username> <password> " + "or -i <login credentials json file>") + check_registry_credentials('test-url', 'test-user', 'test-password') + + # test bad login using invalid json file + code, out, err = cephadm_module.registry_login( + None, None, None, '{"bad-json": "bad-json"}') + assert err == ("json provided for custom registry login did not include all necessary fields. " + "Please setup json file as\n" + "{\n" + " \"url\": \"REGISTRY_URL\",\n" + " \"username\": \"REGISTRY_USERNAME\",\n" + " \"password\": \"REGISTRY_PASSWORD\"\n" + "}\n") + check_registry_credentials('test-url', 'test-user', 'test-password') + + # test good login using valid json file + good_json = ("{\"url\": \"" + "json-url" + "\", \"username\": \"" + "json-user" + "\", " + " \"password\": \"" + "json-pass" + "\"}") + code, out, err = cephadm_module.registry_login(None, None, None, good_json) + assert out == 'registry login scheduled' + assert err == '' + check_registry_credentials('json-url', 'json-user', 'json-pass') + + # test bad login where args are valid but login command fails + _run_cephadm.side_effect = async_side_effect(('{}', 'error', 1)) + code, out, err = cephadm_module.registry_login('fail-url', 'fail-user', 'fail-password') + assert err == 'Host test failed to login to fail-url as fail-user with given password' + check_registry_credentials('json-url', 'json-user', 'json-pass') + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({ + 'image_id': 'image_id', + 'repo_digests': ['image@repo_digest'], + }))) + @pytest.mark.parametrize("use_repo_digest", + [ + False, + True + ]) + def test_upgrade_run(self, use_repo_digest, cephadm_module: CephadmOrchestrator): + cephadm_module.use_repo_digest = use_repo_digest + + with with_host(cephadm_module, 'test', refresh_hosts=False): + cephadm_module.set_container_image('global', 'image') + + if use_repo_digest: + + CephadmServe(cephadm_module).convert_tags_to_repo_digest() + + _, image, _ = cephadm_module.check_mon_command({ + 'prefix': 'config get', + 'who': 'global', + 'key': 'container_image', + }) + if use_repo_digest: + assert image == 'image@repo_digest' + else: + assert image == 'image' + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_ceph_volume_no_filter_for_batch(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + error_message = """cephadm exited with an error code: 1, stderr:/usr/bin/podman:stderr usage: ceph-volume inventory [-h] [--format {plain,json,json-pretty}] [path]/usr/bin/podman:stderr ceph-volume inventory: error: unrecognized arguments: --filter-for-batch +Traceback (most recent call last): + File "<stdin>", line 6112, in <module> + File "<stdin>", line 1299, in _infer_fsid + File "<stdin>", line 1382, in _infer_image + File "<stdin>", line 3612, in command_ceph_volume + File "<stdin>", line 1061, in call_throws""" + + with with_host(cephadm_module, 'test'): + _run_cephadm.reset_mock() + _run_cephadm.side_effect = OrchestratorError(error_message) + + s = CephadmServe(cephadm_module)._refresh_host_devices('test') + assert s == 'host test `cephadm ceph-volume` failed: ' + error_message + + assert _run_cephadm.mock_calls == [ + mock.call('test', 'osd', 'ceph-volume', + ['--', 'inventory', '--format=json-pretty', '--filter-for-batch'], image='', + no_fsid=False, error_ok=False, log_output=False), + mock.call('test', 'osd', 'ceph-volume', + ['--', 'inventory', '--format=json-pretty'], image='', + no_fsid=False, error_ok=False, log_output=False), + ] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_osd_activate_datadevice(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test', refresh_hosts=False): + with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1): + pass + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_osd_activate_datadevice_fail(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test', refresh_hosts=False): + cephadm_module.mock_store_set('_ceph_get', 'osd_map', { + 'osds': [ + { + 'osd': 1, + 'up_from': 0, + 'uuid': 'uuid' + } + ] + }) + + ceph_volume_lvm_list = { + '1': [{ + 'tags': { + 'ceph.cluster_fsid': cephadm_module._cluster_fsid, + 'ceph.osd_fsid': 'uuid' + }, + 'type': 'data' + }] + } + _run_cephadm.reset_mock(return_value=True, side_effect=True) + + async def _r_c(*args, **kwargs): + if 'ceph-volume' in args: + return (json.dumps(ceph_volume_lvm_list), '', 0) + else: + assert ['_orch', 'deploy'] in args + raise OrchestratorError("let's fail somehow") + _run_cephadm.side_effect = _r_c + assert cephadm_module._osd_activate( + ['test']).stderr == "let's fail somehow" + with pytest.raises(AssertionError): + cephadm_module.assert_issued_mon_command({ + 'prefix': 'auth rm', + 'entity': 'osd.1', + }) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_osd_activate_datadevice_dbdevice(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test', refresh_hosts=False): + + async def _ceph_volume_list(s, host, entity, cmd, **kwargs): + logging.info(f'ceph-volume cmd: {cmd}') + if 'raw' in cmd: + return json.dumps({ + "21a4209b-f51b-4225-81dc-d2dca5b8b2f5": { + "ceph_fsid": "64c84f19-fe1d-452a-a731-ab19dc144aa8", + "device": "/dev/loop0", + "osd_id": 21, + "osd_uuid": "21a4209b-f51b-4225-81dc-d2dca5b8b2f5", + "type": "bluestore" + }, + }), '', 0 + if 'lvm' in cmd: + return json.dumps({ + '1': [{ + 'tags': { + 'ceph.cluster_fsid': cephadm_module._cluster_fsid, + 'ceph.osd_fsid': 'uuid' + }, + 'type': 'data' + }, { + 'tags': { + 'ceph.cluster_fsid': cephadm_module._cluster_fsid, + 'ceph.osd_fsid': 'uuid' + }, + 'type': 'db' + }] + }), '', 0 + return '{}', '', 0 + + with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1, ceph_volume_lvm_list=_ceph_volume_list): + pass + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_osd_count(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + dg = DriveGroupSpec(service_id='', data_devices=DeviceSelection(all=True)) + with with_host(cephadm_module, 'test', refresh_hosts=False): + with with_service(cephadm_module, dg, host='test'): + with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1): + assert wait(cephadm_module, cephadm_module.describe_service())[0].size == 1 + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_host_rm_last_admin(self, cephadm_module: CephadmOrchestrator): + with pytest.raises(OrchestratorError): + with with_host(cephadm_module, 'test', refresh_hosts=False, rm_with_force=False): + cephadm_module.inventory.add_label('test', SpecialHostLabels.ADMIN) + pass + assert False + with with_host(cephadm_module, 'test1', refresh_hosts=False, rm_with_force=True): + with with_host(cephadm_module, 'test2', refresh_hosts=False, rm_with_force=False): + cephadm_module.inventory.add_label('test2', SpecialHostLabels.ADMIN) + + @pytest.mark.parametrize("facts, settings, expected_value", + [ + # All options are available on all hosts + ( + { + "host1": + { + "sysctl_options": + { + 'opt1': 'val1', + 'opt2': 'val2', + } + }, + "host2": + { + "sysctl_options": + { + 'opt1': '', + 'opt2': '', + } + }, + }, + {'opt1', 'opt2'}, # settings + {'host1': [], 'host2': []} # expected_value + ), + # opt1 is missing on host 1, opt2 is missing on host2 + ({ + "host1": + { + "sysctl_options": + { + 'opt2': '', + 'optX': '', + } + }, + "host2": + { + "sysctl_options": + { + 'opt1': '', + 'opt3': '', + 'opt4': '', + } + }, + }, + {'opt1', 'opt2'}, # settings + {'host1': ['opt1'], 'host2': ['opt2']} # expected_value + ), + # All options are missing on all hosts + ({ + "host1": + { + "sysctl_options": + { + } + }, + "host2": + { + "sysctl_options": + { + } + }, + }, + {'opt1', 'opt2'}, # settings + {'host1': ['opt1', 'opt2'], 'host2': [ + 'opt1', 'opt2']} # expected_value + ), + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_tuned_profiles_settings_validation(self, facts, settings, expected_value, cephadm_module): + with with_host(cephadm_module, 'test'): + spec = mock.Mock() + spec.settings = sorted(settings) + spec.placement.filter_matching_hostspecs = mock.Mock() + spec.placement.filter_matching_hostspecs.return_value = ['host1', 'host2'] + cephadm_module.cache.facts = facts + assert cephadm_module._validate_tunedprofile_settings(spec) == expected_value + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_tuned_profiles_validation(self, cephadm_module): + with with_host(cephadm_module, 'test'): + + with pytest.raises(OrchestratorError, match="^Invalid placement specification.+"): + spec = mock.Mock() + spec.settings = {'a': 'b'} + spec.placement = PlacementSpec(hosts=[]) + cephadm_module._validate_tuned_profile_spec(spec) + + with pytest.raises(OrchestratorError, match="Invalid spec: settings section cannot be empty."): + spec = mock.Mock() + spec.settings = {} + spec.placement = PlacementSpec(hosts=['host1', 'host2']) + cephadm_module._validate_tuned_profile_spec(spec) + + with pytest.raises(OrchestratorError, match="^Placement 'count' field is no supported .+"): + spec = mock.Mock() + spec.settings = {'a': 'b'} + spec.placement = PlacementSpec(count=1) + cephadm_module._validate_tuned_profile_spec(spec) + + with pytest.raises(OrchestratorError, match="^Placement 'count_per_host' field is no supported .+"): + spec = mock.Mock() + spec.settings = {'a': 'b'} + spec.placement = PlacementSpec(count_per_host=1, label='foo') + cephadm_module._validate_tuned_profile_spec(spec) + + with pytest.raises(OrchestratorError, match="^Found invalid host"): + spec = mock.Mock() + spec.settings = {'a': 'b'} + spec.placement = PlacementSpec(hosts=['host1', 'host2']) + cephadm_module.inventory = mock.Mock() + cephadm_module.inventory.all_specs = mock.Mock( + return_value=[mock.Mock().hostname, mock.Mock().hostname]) + cephadm_module._validate_tuned_profile_spec(spec) + + def test_set_unmanaged(self, cephadm_module): + cephadm_module.spec_store._specs['crash'] = ServiceSpec('crash', unmanaged=False) + assert not cephadm_module.spec_store._specs['crash'].unmanaged + cephadm_module.spec_store.set_unmanaged('crash', True) + assert cephadm_module.spec_store._specs['crash'].unmanaged + cephadm_module.spec_store.set_unmanaged('crash', False) + assert not cephadm_module.spec_store._specs['crash'].unmanaged + + def test_inventory_known_hostnames(self, cephadm_module): + cephadm_module.inventory.add_host(HostSpec('host1', '1.2.3.1')) + cephadm_module.inventory.add_host(HostSpec('host2', '1.2.3.2')) + cephadm_module.inventory.add_host(HostSpec('host3.domain', '1.2.3.3')) + cephadm_module.inventory.add_host(HostSpec('host4.domain', '1.2.3.4')) + cephadm_module.inventory.add_host(HostSpec('host5', '1.2.3.5')) + + # update_known_hostname expects args to be <hostname, shortname, fqdn> + # as are gathered from cephadm gather-facts. Although, passing the + # names in the wrong order should actually have no effect on functionality + cephadm_module.inventory.update_known_hostnames('host1', 'host1', 'host1.domain') + cephadm_module.inventory.update_known_hostnames('host2.domain', 'host2', 'host2.domain') + cephadm_module.inventory.update_known_hostnames('host3', 'host3', 'host3.domain') + cephadm_module.inventory.update_known_hostnames('host4.domain', 'host4', 'host4.domain') + cephadm_module.inventory.update_known_hostnames('host5', 'host5', 'host5') + + assert 'host1' in cephadm_module.inventory + assert 'host1.domain' in cephadm_module.inventory + assert cephadm_module.inventory.get_addr('host1') == '1.2.3.1' + assert cephadm_module.inventory.get_addr('host1.domain') == '1.2.3.1' + + assert 'host2' in cephadm_module.inventory + assert 'host2.domain' in cephadm_module.inventory + assert cephadm_module.inventory.get_addr('host2') == '1.2.3.2' + assert cephadm_module.inventory.get_addr('host2.domain') == '1.2.3.2' + + assert 'host3' in cephadm_module.inventory + assert 'host3.domain' in cephadm_module.inventory + assert cephadm_module.inventory.get_addr('host3') == '1.2.3.3' + assert cephadm_module.inventory.get_addr('host3.domain') == '1.2.3.3' + + assert 'host4' in cephadm_module.inventory + assert 'host4.domain' in cephadm_module.inventory + assert cephadm_module.inventory.get_addr('host4') == '1.2.3.4' + assert cephadm_module.inventory.get_addr('host4.domain') == '1.2.3.4' + + assert 'host4.otherdomain' not in cephadm_module.inventory + with pytest.raises(OrchestratorError): + cephadm_module.inventory.get_addr('host4.otherdomain') + + assert 'host5' in cephadm_module.inventory + assert cephadm_module.inventory.get_addr('host5') == '1.2.3.5' + with pytest.raises(OrchestratorError): + cephadm_module.inventory.get_addr('host5.domain') + + def test_async_timeout_handler(self, cephadm_module): + cephadm_module.default_cephadm_command_timeout = 900 + + async def _timeout(): + raise asyncio.TimeoutError + + with pytest.raises(OrchestratorError, match=r'Command timed out \(default 900 second timeout\)'): + with cephadm_module.async_timeout_handler(): + cephadm_module.wait_async(_timeout()) + + with pytest.raises(OrchestratorError, match=r'Command timed out on host hostA \(default 900 second timeout\)'): + with cephadm_module.async_timeout_handler('hostA'): + cephadm_module.wait_async(_timeout()) + + with pytest.raises(OrchestratorError, match=r'Command "testing" timed out \(default 900 second timeout\)'): + with cephadm_module.async_timeout_handler(cmd='testing'): + cephadm_module.wait_async(_timeout()) + + with pytest.raises(OrchestratorError, match=r'Command "testing" timed out on host hostB \(default 900 second timeout\)'): + with cephadm_module.async_timeout_handler('hostB', 'testing'): + cephadm_module.wait_async(_timeout()) + + with pytest.raises(OrchestratorError, match=r'Command timed out \(non-default 111 second timeout\)'): + with cephadm_module.async_timeout_handler(timeout=111): + cephadm_module.wait_async(_timeout()) + + with pytest.raises(OrchestratorError, match=r'Command "very slow" timed out on host hostC \(non-default 999 second timeout\)'): + with cephadm_module.async_timeout_handler('hostC', 'very slow', 999): + cephadm_module.wait_async(_timeout()) + + @mock.patch("cephadm.CephadmOrchestrator.remove_osds") + @mock.patch("cephadm.CephadmOrchestrator.add_host_label", lambda *a, **kw: None) + @mock.patch("cephadm.inventory.HostCache.get_daemons_by_host", lambda *a, **kw: []) + def test_host_drain_zap(self, _rm_osds, cephadm_module): + # pass force=true in these tests to bypass _admin label check + cephadm_module.drain_host('host1', force=True, zap_osd_devices=False) + assert _rm_osds.called_with([], zap=False) + + cephadm_module.drain_host('host1', force=True, zap_osd_devices=True) + assert _rm_osds.called_with([], zap=True) + + def test_process_ls_output(self, cephadm_module): + sample_ls_output = """[ + { + "style": "cephadm:v1", + "name": "mon.vm-00", + "fsid": "588f83ba-5995-11ee-9e94-52540057a206", + "systemd_unit": "ceph-588f83ba-5995-11ee-9e94-52540057a206@mon.vm-00", + "enabled": true, + "state": "running", + "service_name": "mon", + "ports": [], + "ip": null, + "deployed_by": [ + "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3" + ], + "rank": null, + "rank_generation": null, + "extra_container_args": null, + "extra_entrypoint_args": null, + "memory_request": null, + "memory_limit": null, + "container_id": "b170b964a6e2918955362eb36195627c6086d3f859d4ebce2ee13f3ee4738733", + "container_image_name": "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3", + "container_image_id": "674eb38037f1555bb7884ede5db47f1749486e7f12ecb416e34ada87c9934e55", + "container_image_digests": [ + "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3" + ], + "memory_usage": 56214159, + "cpu_percentage": "2.32%", + "version": "18.0.0-5185-g7b3a4f2b", + "started": "2023-09-22T22:31:11.752300Z", + "created": "2023-09-22T22:15:24.121387Z", + "deployed": "2023-09-22T22:31:10.383431Z", + "configured": "2023-09-22T22:31:11.859440Z" + }, + { + "style": "cephadm:v1", + "name": "mgr.vm-00.mpexeg", + "fsid": "588f83ba-5995-11ee-9e94-52540057a206", + "systemd_unit": "ceph-588f83ba-5995-11ee-9e94-52540057a206@mgr.vm-00.mpexeg", + "enabled": true, + "state": "running", + "service_name": "mgr", + "ports": [ + 8443, + 9283, + 8765 + ], + "ip": null, + "deployed_by": [ + "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3" + ], + "rank": null, + "rank_generation": null, + "extra_container_args": null, + "extra_entrypoint_args": null, + "memory_request": null, + "memory_limit": null, + "container_id": "6e7756cef553a25a2a84227e8755d3d25046b9cd8758b23c698d34b3af895242", + "container_image_name": "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3", + "container_image_id": "674eb38037f1555bb7884ede5db47f1749486e7f12ecb416e34ada87c9934e55", + "container_image_digests": [ + "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3" + ], + "memory_usage": 529740595, + "cpu_percentage": "8.35%", + "version": "18.0.0-5185-g7b3a4f2b", + "started": "2023-09-22T22:30:18.587021Z", + "created": "2023-09-22T22:15:29.101409Z", + "deployed": "2023-09-22T22:30:17.339114Z", + "configured": "2023-09-22T22:30:18.758122Z" + }, + { + "style": "cephadm:v1", + "name": "agent.vm-00", + "fsid": "588f83ba-5995-11ee-9e94-52540057a206", + "systemd_unit": "ceph-588f83ba-5995-11ee-9e94-52540057a206@agent.vm-00", + "enabled": true, + "state": "running", + "service_name": "agent", + "ports": [], + "ip": null, + "deployed_by": [ + "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3" + ], + "rank": null, + "rank_generation": null, + "extra_container_args": null, + "extra_entrypoint_args": null, + "container_id": null, + "container_image_name": null, + "container_image_id": null, + "container_image_digests": null, + "version": null, + "started": null, + "created": "2023-09-22T22:33:34.708289Z", + "deployed": null, + "configured": "2023-09-22T22:33:34.722289Z" + }, + { + "style": "cephadm:v1", + "name": "osd.0", + "fsid": "588f83ba-5995-11ee-9e94-52540057a206", + "systemd_unit": "ceph-588f83ba-5995-11ee-9e94-52540057a206@osd.0", + "enabled": true, + "state": "running", + "service_name": "osd.foo", + "ports": [], + "ip": null, + "deployed_by": [ + "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3" + ], + "rank": null, + "rank_generation": null, + "extra_container_args": null, + "extra_entrypoint_args": null, + "memory_request": null, + "memory_limit": null, + "container_id": "93f71c60820b86901a45b3b1fe3dba3e3e677b37fd22310b7e7da3f67bb8ccd6", + "container_image_name": "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3", + "container_image_id": "674eb38037f1555bb7884ede5db47f1749486e7f12ecb416e34ada87c9934e55", + "container_image_digests": [ + "quay.io/adk3798/ceph@sha256:ff374767a4568f6d11a941ab763e7732cd7e071362328f7b6a7891bc4852a3a3" + ], + "memory_usage": 73410805, + "cpu_percentage": "6.54%", + "version": "18.0.0-5185-g7b3a4f2b", + "started": "2023-09-22T22:41:29.019587Z", + "created": "2023-09-22T22:41:03.615080Z", + "deployed": "2023-09-22T22:41:24.965222Z", + "configured": "2023-09-22T22:41:29.119250Z" + } +]""" + + now = str_to_datetime('2023-09-22T22:45:29.119250Z') + cephadm_module._cluster_fsid = '588f83ba-5995-11ee-9e94-52540057a206' + with mock.patch("cephadm.module.datetime_now", lambda: now): + cephadm_module._process_ls_output('vm-00', json.loads(sample_ls_output)) + assert 'vm-00' in cephadm_module.cache.daemons + assert 'mon.vm-00' in cephadm_module.cache.daemons['vm-00'] + assert 'mgr.vm-00.mpexeg' in cephadm_module.cache.daemons['vm-00'] + assert 'agent.vm-00' in cephadm_module.cache.daemons['vm-00'] + assert 'osd.0' in cephadm_module.cache.daemons['vm-00'] + + daemons = cephadm_module.cache.get_daemons_by_host('vm-00') + c_img_ids = [dd.container_image_id for dd in daemons if dd.daemon_type != 'agent'] + assert all(c_img_id == '674eb38037f1555bb7884ede5db47f1749486e7f12ecb416e34ada87c9934e55' for c_img_id in c_img_ids) + last_refreshes = [dd.last_refresh for dd in daemons] + assert all(lrf == now for lrf in last_refreshes) + versions = [dd.version for dd in daemons if dd.daemon_type != 'agent'] + assert all(version == '18.0.0-5185-g7b3a4f2b' for version in versions) + + osd = cephadm_module.cache.get_daemons_by_type('osd', 'vm-00')[0] + assert osd.cpu_percentage == '6.54%' + assert osd.memory_usage == 73410805 + assert osd.created == str_to_datetime('2023-09-22T22:41:03.615080Z') diff --git a/src/pybind/mgr/cephadm/tests/test_completion.py b/src/pybind/mgr/cephadm/tests/test_completion.py new file mode 100644 index 000000000..327c12d2a --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_completion.py @@ -0,0 +1,40 @@ +import pytest + +from ..module import forall_hosts + + +class TestCompletion(object): + + @pytest.mark.parametrize("input,expected", [ + ([], []), + ([1], ["(1,)"]), + (["hallo"], ["('hallo',)"]), + ("hi", ["('h',)", "('i',)"]), + (list(range(5)), [str((x, )) for x in range(5)]), + ([(1, 2), (3, 4)], ["(1, 2)", "(3, 4)"]), + ]) + def test_async_map(self, input, expected, cephadm_module): + @forall_hosts + def run_forall(*args): + return str(args) + assert run_forall(input) == expected + + @pytest.mark.parametrize("input,expected", [ + ([], []), + ([1], ["(1,)"]), + (["hallo"], ["('hallo',)"]), + ("hi", ["('h',)", "('i',)"]), + (list(range(5)), [str((x, )) for x in range(5)]), + ([(1, 2), (3, 4)], ["(1, 2)", "(3, 4)"]), + ]) + def test_async_map_self(self, input, expected, cephadm_module): + class Run(object): + def __init__(self): + self.attr = 1 + + @forall_hosts + def run_forall(self, *args): + assert self.attr == 1 + return str(args) + + assert Run().run_forall(input) == expected diff --git a/src/pybind/mgr/cephadm/tests/test_configchecks.py b/src/pybind/mgr/cephadm/tests/test_configchecks.py new file mode 100644 index 000000000..3cae0a27d --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_configchecks.py @@ -0,0 +1,668 @@ +import copy +import json +import logging +import ipaddress +import pytest +import uuid + +from time import time as now + +from ..configchecks import CephadmConfigChecks +from ..inventory import HostCache +from ..upgrade import CephadmUpgrade, UpgradeState +from orchestrator import DaemonDescription + +from typing import List, Dict, Any, Optional + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + +host_sample = { + "arch": "x86_64", + "bios_date": "04/01/2014", + "bios_version": "F2", + "cpu_cores": 16, + "cpu_count": 2, + "cpu_load": { + "15min": 0.0, + "1min": 0.01, + "5min": 0.01 + }, + "cpu_model": "Intel® Xeon® Processor E5-2698 v3", + "cpu_threads": 64, + "flash_capacity": "4.0TB", + "flash_capacity_bytes": 4000797868032, + "flash_count": 2, + "flash_list": [ + { + "description": "ATA CT2000MX500SSD1 (2.0TB)", + "dev_name": "sda", + "disk_size_bytes": 2000398934016, + "model": "CT2000MX500SSD1", + "rev": "023", + "vendor": "ATA", + "wwid": "t10.ATA CT2000MX500SSD1 193023156DE0" + }, + { + "description": "ATA CT2000MX500SSD1 (2.0TB)", + "dev_name": "sdb", + "disk_size_bytes": 2000398934016, + "model": "CT2000MX500SSD1", + "rev": "023", + "vendor": "ATA", + "wwid": "t10.ATA CT2000MX500SSD1 193023156DE0" + }, + ], + "hdd_capacity": "16.0TB", + "hdd_capacity_bytes": 16003148120064, + "hdd_count": 4, + "hdd_list": [ + { + "description": "ST4000VN008-2DR1 (4.0TB)", + "dev_name": "sdc", + "disk_size_bytes": 4000787030016, + "model": "ST4000VN008-2DR1", + "rev": "SC60", + "vendor": "ATA", + "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ" + }, + { + "description": "ST4000VN008-2DR1 (4.0TB)", + "dev_name": "sdd", + "disk_size_bytes": 4000787030016, + "model": "ST4000VN008-2DR1", + "rev": "SC60", + "vendor": "ATA", + "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ" + }, + { + "description": "ST4000VN008-2DR1 (4.0TB)", + "dev_name": "sde", + "disk_size_bytes": 4000787030016, + "model": "ST4000VN008-2DR1", + "rev": "SC60", + "vendor": "ATA", + "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ" + }, + { + "description": "ST4000VN008-2DR1 (4.0TB)", + "dev_name": "sdf", + "disk_size_bytes": 4000787030016, + "model": "ST4000VN008-2DR1", + "rev": "SC60", + "vendor": "ATA", + "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ" + }, + ], + "hostname": "dummy", + "interfaces": { + "eth0": { + "driver": "e1000e", + "iftype": "physical", + "ipv4_address": "10.7.17.1/24", + "ipv6_address": "fe80::215:17ff:feab:50e2/64", + "lower_devs_list": [], + "mtu": 9000, + "nic_type": "ethernet", + "operstate": "up", + "speed": 1000, + "upper_devs_list": [], + }, + "eth1": { + "driver": "e1000e", + "iftype": "physical", + "ipv4_address": "10.7.18.1/24", + "ipv6_address": "fe80::215:17ff:feab:50e2/64", + "lower_devs_list": [], + "mtu": 9000, + "nic_type": "ethernet", + "operstate": "up", + "speed": 1000, + "upper_devs_list": [], + }, + "eth2": { + "driver": "r8169", + "iftype": "physical", + "ipv4_address": "10.7.19.1/24", + "ipv6_address": "fe80::76d4:35ff:fe58:9a79/64", + "lower_devs_list": [], + "mtu": 1500, + "nic_type": "ethernet", + "operstate": "up", + "speed": 1000, + "upper_devs_list": [] + }, + }, + "kernel": "4.18.0-240.10.1.el8_3.x86_64", + "kernel_parameters": { + "net.ipv4.ip_nonlocal_bind": "0", + }, + "kernel_security": { + "SELINUX": "enforcing", + "SELINUXTYPE": "targeted", + "description": "SELinux: Enabled(enforcing, targeted)", + "type": "SELinux" + }, + "memory_available_kb": 19489212, + "memory_free_kb": 245164, + "memory_total_kb": 32900916, + "model": "StorageHeavy", + "nic_count": 3, + "operating_system": "Red Hat Enterprise Linux 8.3 (Ootpa)", + "subscribed": "Yes", + "system_uptime": 777600.0, + "timestamp": now(), + "vendor": "Ceph Servers Inc", +} + + +def role_list(n: int) -> List[str]: + if n == 1: + return ['mon', 'mgr', 'osd'] + if n in [2, 3]: + return ['mon', 'mds', 'osd'] + + return ['osd'] + + +def generate_testdata(count: int = 10, public_network: str = '10.7.17.0/24', cluster_network: str = '10.7.18.0/24'): + # public network = eth0, cluster_network = eth1 + assert count > 3 + assert public_network + num_disks = host_sample['hdd_count'] + hosts = {} + daemons = {} + daemon_to_host = {} + osd_num = 0 + public_netmask = public_network.split('/')[1] + cluster_ip_list = [] + cluster_netmask = '' + + public_ip_list = [str(i) for i in list(ipaddress.ip_network(public_network).hosts())] + if cluster_network: + cluster_ip_list = [str(i) for i in list(ipaddress.ip_network(cluster_network).hosts())] + cluster_netmask = cluster_network.split('/')[1] + + for n in range(1, count + 1, 1): + + new_host = copy.deepcopy(host_sample) + hostname = f"node-{n}.ceph.com" + + new_host['hostname'] = hostname + new_host['interfaces']['eth0']['ipv4_address'] = f"{public_ip_list.pop(0)}/{public_netmask}" + if cluster_ip_list: + new_host['interfaces']['eth1']['ipv4_address'] = f"{cluster_ip_list.pop(0)}/{cluster_netmask}" + else: + new_host['interfaces']['eth1']['ipv4_address'] = '' + + hosts[hostname] = new_host + daemons[hostname] = {} + for r in role_list(n): + name = '' + if r == 'osd': + for n in range(num_disks): + osd = DaemonDescription( + hostname=hostname, daemon_type='osd', daemon_id=osd_num) + name = f"osd.{osd_num}" + daemons[hostname][name] = osd + daemon_to_host[name] = hostname + osd_num += 1 + else: + name = f"{r}.{hostname}" + daemons[hostname][name] = DaemonDescription( + hostname=hostname, daemon_type=r, daemon_id=hostname) + daemon_to_host[name] = hostname + + logger.debug(f"daemon to host lookup - {json.dumps(daemon_to_host)}") + return hosts, daemons, daemon_to_host + + +@pytest.fixture() +def mgr(): + """Provide a fake ceph mgr object preloaded with a configuration""" + mgr = FakeMgr() + mgr.cache.facts, mgr.cache.daemons, mgr.daemon_to_host = \ + generate_testdata(public_network='10.9.64.0/24', cluster_network='') + mgr.module_option.update({ + "config_checks_enabled": True, + }) + yield mgr + + +class FakeMgr: + + def __init__(self): + self.datastore = {} + self.module_option = {} + self.health_checks = {} + self.default_version = 'quincy' + self.version_overrides = {} + self.daemon_to_host = {} + + self.cache = HostCache(self) + self.upgrade = CephadmUpgrade(self) + + def set_health_checks(self, checks: dict): + return + + def get_module_option(self, keyname: str) -> Optional[str]: + return self.module_option.get(keyname, None) + + def set_module_option(self, keyname: str, value: str) -> None: + return None + + def get_store(self, keyname: str, default=None) -> Optional[str]: + return self.datastore.get(keyname, None) + + def set_store(self, keyname: str, value: str) -> None: + self.datastore[keyname] = value + return None + + def _ceph_get_server(self) -> None: + pass + + def get_metadata(self, daemon_type: str, daemon_id: str) -> Dict[str, Any]: + key = f"{daemon_type}.{daemon_id}" + if key in self.version_overrides: + logger.debug(f"override applied for {key}") + version_str = self.version_overrides[key] + else: + version_str = self.default_version + + return {"ceph_release": version_str, "hostname": self.daemon_to_host[key]} + + def list_servers(self) -> List[Dict[str, List[Dict[str, str]]]]: + num_disks = host_sample['hdd_count'] + osd_num = 0 + service_map = [] + + for hostname in self.cache.facts: + + host_num = int(hostname.split('.')[0].split('-')[1]) + svc_list = [] + for r in role_list(host_num): + if r == 'osd': + for _n in range(num_disks): + svc_list.append({ + "type": "osd", + "id": osd_num, + }) + osd_num += 1 + else: + svc_list.append({ + "type": r, + "id": hostname, + }) + + service_map.append({"services": svc_list}) + logger.debug(f"services map - {json.dumps(service_map)}") + return service_map + + def use_repo_digest(self) -> None: + return None + + +class TestConfigCheck: + + def test_to_json(self, mgr): + checker = CephadmConfigChecks(mgr) + out = checker.to_json() + assert out + assert len(out) == len(checker.health_checks) + + def test_lookup_check(self, mgr): + checker = CephadmConfigChecks(mgr) + check = checker.lookup_check('osd_mtu_size') + logger.debug(json.dumps(check.to_json())) + assert check + assert check.healthcheck_name == "CEPHADM_CHECK_MTU" + + def test_old_checks_removed(self, mgr): + mgr.datastore.update({ + "config_checks": '{"bogus_one": "enabled", "bogus_two": "enabled", ' + '"kernel_security": "enabled", "public_network": "enabled", ' + '"kernel_version": "enabled", "network_missing": "enabled", ' + '"osd_mtu_size": "enabled", "osd_linkspeed": "enabled", ' + '"os_subscription": "enabled", "ceph_release": "enabled"}' + }) + checker = CephadmConfigChecks(mgr) + raw = mgr.get_store('config_checks') + checks = json.loads(raw) + assert "bogus_one" not in checks + assert "bogus_two" not in checks + assert len(checks) == len(checker.health_checks) + + def test_new_checks(self, mgr): + mgr.datastore.update({ + "config_checks": '{"kernel_security": "enabled", "public_network": "enabled", ' + '"osd_mtu_size": "enabled", "osd_linkspeed": "enabled"}' + }) + checker = CephadmConfigChecks(mgr) + raw = mgr.get_store('config_checks') + checks = json.loads(raw) + assert len(checks) == len(checker.health_checks) + + def test_no_issues(self, mgr): + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + checker.run_checks() + + assert not mgr.health_checks + + def test_no_public_network(self, mgr): + bad_node = mgr.cache.facts['node-1.ceph.com'] + bad_node['interfaces']['eth0']['ipv4_address'] = "192.168.1.20/24" + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + checker.run_checks() + logger.debug(mgr.health_checks) + assert len(mgr.health_checks) == 1 + assert 'CEPHADM_CHECK_PUBLIC_MEMBERSHIP' in mgr.health_checks + assert mgr.health_checks['CEPHADM_CHECK_PUBLIC_MEMBERSHIP']['detail'][0] == \ + 'node-1.ceph.com does not have an interface on any public network' + + def test_missing_networks(self, mgr): + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.66.0/24'] + checker.run_checks() + + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert len(mgr.health_checks) == 1 + assert 'CEPHADM_CHECK_NETWORK_MISSING' in mgr.health_checks + assert mgr.health_checks['CEPHADM_CHECK_NETWORK_MISSING']['detail'][0] == \ + "10.9.66.0/24 not found on any host in the cluster" + + def test_bad_mtu_single(self, mgr): + + bad_node = mgr.cache.facts['node-1.ceph.com'] + bad_node['interfaces']['eth0']['mtu'] = 1500 + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert "CEPHADM_CHECK_MTU" in mgr.health_checks and len(mgr.health_checks) == 1 + assert mgr.health_checks['CEPHADM_CHECK_MTU']['detail'][0] == \ + 'host node-1.ceph.com(eth0) is using MTU 1500 on 10.9.64.0/24, NICs on other hosts use 9000' + + def test_bad_mtu_multiple(self, mgr): + + for n in [1, 5]: + bad_node = mgr.cache.facts[f'node-{n}.ceph.com'] + bad_node['interfaces']['eth0']['mtu'] = 1500 + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert "CEPHADM_CHECK_MTU" in mgr.health_checks and len(mgr.health_checks) == 1 + assert mgr.health_checks['CEPHADM_CHECK_MTU']['count'] == 2 + + def test_bad_linkspeed_single(self, mgr): + + bad_node = mgr.cache.facts['node-1.ceph.com'] + bad_node['interfaces']['eth0']['speed'] = 100 + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert mgr.health_checks + assert "CEPHADM_CHECK_LINKSPEED" in mgr.health_checks and len(mgr.health_checks) == 1 + assert mgr.health_checks['CEPHADM_CHECK_LINKSPEED']['detail'][0] == \ + 'host node-1.ceph.com(eth0) has linkspeed of 100 on 10.9.64.0/24, NICs on other hosts use 1000' + + def test_super_linkspeed_single(self, mgr): + + bad_node = mgr.cache.facts['node-1.ceph.com'] + bad_node['interfaces']['eth0']['speed'] = 10000 + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert not mgr.health_checks + + def test_release_mismatch_single(self, mgr): + + mgr.version_overrides = { + "osd.1": "pacific", + } + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + assert mgr.health_checks + assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and len(mgr.health_checks) == 1 + assert mgr.health_checks['CEPHADM_CHECK_CEPH_RELEASE']['detail'][0] == \ + 'osd.1 is running pacific (majority of cluster is using quincy)' + + def test_release_mismatch_multi(self, mgr): + + mgr.version_overrides = { + "osd.1": "pacific", + "osd.5": "octopus", + } + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + assert mgr.health_checks + assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and len(mgr.health_checks) == 1 + assert len(mgr.health_checks['CEPHADM_CHECK_CEPH_RELEASE']['detail']) == 2 + + def test_kernel_mismatch(self, mgr): + + bad_host = mgr.cache.facts['node-1.ceph.com'] + bad_host['kernel'] = "5.10.18.0-241.10.1.el8.x86_64" + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + assert len(mgr.health_checks) == 1 + assert 'CEPHADM_CHECK_KERNEL_VERSION' in mgr.health_checks + assert mgr.health_checks['CEPHADM_CHECK_KERNEL_VERSION']['detail'][0] == \ + "host node-1.ceph.com running kernel 5.10, majority of hosts(9) running 4.18" + assert mgr.health_checks['CEPHADM_CHECK_KERNEL_VERSION']['count'] == 1 + + def test_inconsistent_subscription(self, mgr): + + bad_host = mgr.cache.facts['node-5.ceph.com'] + bad_host['subscribed'] = "no" + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + assert len(mgr.health_checks) == 1 + assert "CEPHADM_CHECK_SUBSCRIPTION" in mgr.health_checks + assert mgr.health_checks['CEPHADM_CHECK_SUBSCRIPTION']['detail'][0] == \ + "node-5.ceph.com does not have an active subscription" + + def test_kernel_security_inconsistent(self, mgr): + + bad_node = mgr.cache.facts['node-3.ceph.com'] + bad_node['kernel_security'] = { + "SELINUX": "permissive", + "SELINUXTYPE": "targeted", + "description": "SELinux: Enabled(permissive, targeted)", + "type": "SELinux" + } + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + assert len(mgr.health_checks) == 1 + assert 'CEPHADM_CHECK_KERNEL_LSM' in mgr.health_checks + assert mgr.health_checks['CEPHADM_CHECK_KERNEL_LSM']['detail'][0] == \ + "node-3.ceph.com has inconsistent KSM settings compared to the majority of hosts(9) in the cluster" + + def test_release_and_bad_mtu(self, mgr): + + mgr.version_overrides = { + "osd.1": "pacific", + } + bad_node = mgr.cache.facts['node-1.ceph.com'] + bad_node['interfaces']['eth0']['mtu'] = 1500 + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert mgr.health_checks + assert len(mgr.health_checks) == 2 + assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \ + "CEPHADM_CHECK_MTU" in mgr.health_checks + + def test_release_mtu_LSM(self, mgr): + + mgr.version_overrides = { + "osd.1": "pacific", + } + bad_node1 = mgr.cache.facts['node-1.ceph.com'] + bad_node1['interfaces']['eth0']['mtu'] = 1500 + bad_node2 = mgr.cache.facts['node-3.ceph.com'] + bad_node2['kernel_security'] = { + "SELINUX": "permissive", + "SELINUXTYPE": "targeted", + "description": "SELinux: Enabled(permissive, targeted)", + "type": "SELinux" + } + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert mgr.health_checks + assert len(mgr.health_checks) == 3 + assert \ + "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \ + "CEPHADM_CHECK_MTU" in mgr.health_checks and \ + "CEPHADM_CHECK_KERNEL_LSM" in mgr.health_checks + + def test_release_mtu_LSM_subscription(self, mgr): + + mgr.version_overrides = { + "osd.1": "pacific", + } + bad_node1 = mgr.cache.facts['node-1.ceph.com'] + bad_node1['interfaces']['eth0']['mtu'] = 1500 + bad_node1['subscribed'] = "no" + bad_node2 = mgr.cache.facts['node-3.ceph.com'] + bad_node2['kernel_security'] = { + "SELINUX": "permissive", + "SELINUXTYPE": "targeted", + "description": "SELinux: Enabled(permissive, targeted)", + "type": "SELinux" + } + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert mgr.health_checks + assert len(mgr.health_checks) == 4 + assert \ + "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \ + "CEPHADM_CHECK_MTU" in mgr.health_checks and \ + "CEPHADM_CHECK_KERNEL_LSM" in mgr.health_checks and \ + "CEPHADM_CHECK_SUBSCRIPTION" in mgr.health_checks + + def test_skip_release_during_upgrade(self, mgr): + mgr.upgrade.upgrade_state = UpgradeState.from_json({ + 'target_name': 'wah', + 'progress_id': str(uuid.uuid4()), + 'target_id': 'wah', + 'error': '', + 'paused': False, + }) + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(f"{checker.skipped_checks_count} skipped check(s): {checker.skipped_checks}") + assert checker.skipped_checks_count == 1 + assert 'ceph_release' in checker.skipped_checks + + def test_skip_when_disabled(self, mgr): + mgr.module_option.update({ + "config_checks_enabled": "false" + }) + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(checker.active_checks) + logger.info(checker.defined_checks) + assert checker.active_checks_count == 0 + + def test_skip_mtu_checks(self, mgr): + mgr.datastore.update({ + 'config_checks': '{"osd_mtu_size": "disabled"}' + }) + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(checker.active_checks) + logger.info(checker.defined_checks) + assert 'osd_mtu_size' not in checker.active_checks + assert checker.defined_checks == 8 and checker.active_checks_count == 7 + + def test_skip_mtu_lsm_checks(self, mgr): + mgr.datastore.update({ + 'config_checks': '{"osd_mtu_size": "disabled", "kernel_security": "disabled"}' + }) + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(checker.active_checks) + logger.info(checker.defined_checks) + assert 'osd_mtu_size' not in checker.active_checks and \ + 'kernel_security' not in checker.active_checks + assert checker.defined_checks == 8 and checker.active_checks_count == 6 + assert not mgr.health_checks diff --git a/src/pybind/mgr/cephadm/tests/test_facts.py b/src/pybind/mgr/cephadm/tests/test_facts.py new file mode 100644 index 000000000..7838ee5d4 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_facts.py @@ -0,0 +1,31 @@ +from ..import CephadmOrchestrator + +from .fixtures import wait + +from tests import mock + + +def test_facts(cephadm_module: CephadmOrchestrator): + facts = {'node-1.ceph.com': {'bios_version': 'F2', 'cpu_cores': 16}} + cephadm_module.cache.facts = facts + ret_facts = cephadm_module.get_facts('node-1.ceph.com') + assert wait(cephadm_module, ret_facts) == [{'bios_version': 'F2', 'cpu_cores': 16}] + + +@mock.patch("cephadm.inventory.Inventory.update_known_hostnames") +def test_known_hostnames(_update_known_hostnames, cephadm_module: CephadmOrchestrator): + host_facts = {'hostname': 'host1.domain', + 'shortname': 'host1', + 'fqdn': 'host1.domain', + 'memory_free_kb': 37383384, + 'memory_total_kb': 40980612, + 'nic_count': 2} + cephadm_module.cache.update_host_facts('host1', host_facts) + _update_known_hostnames.assert_called_with('host1.domain', 'host1', 'host1.domain') + + host_facts = {'hostname': 'host1.domain', + 'memory_free_kb': 37383384, + 'memory_total_kb': 40980612, + 'nic_count': 2} + cephadm_module.cache.update_host_facts('host1', host_facts) + _update_known_hostnames.assert_called_with('host1.domain', '', '') diff --git a/src/pybind/mgr/cephadm/tests/test_migration.py b/src/pybind/mgr/cephadm/tests/test_migration.py new file mode 100644 index 000000000..1f1d32e8b --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_migration.py @@ -0,0 +1,340 @@ +import json +import pytest + +from ceph.deployment.service_spec import PlacementSpec, ServiceSpec, HostPlacementSpec +from ceph.utils import datetime_to_str, datetime_now +from cephadm import CephadmOrchestrator +from cephadm.inventory import SPEC_STORE_PREFIX +from cephadm.migrations import LAST_MIGRATION +from cephadm.tests.fixtures import _run_cephadm, wait, with_host, receive_agent_metadata_all_hosts +from cephadm.serve import CephadmServe +from tests import mock + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_scheduler(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1', refresh_hosts=False): + with with_host(cephadm_module, 'host2', refresh_hosts=False): + + # emulate the old scheduler: + c = cephadm_module.apply_rgw( + ServiceSpec('rgw', 'r.z', placement=PlacementSpec(host_pattern='*', count=2)) + ) + assert wait(cephadm_module, c) == 'Scheduled rgw.r.z update...' + + # with pytest.raises(OrchestratorError, match="cephadm migration still ongoing. Please wait, until the migration is complete."): + CephadmServe(cephadm_module)._apply_all_services() + + cephadm_module.migration_current = 0 + cephadm_module.migration.migrate() + # assert we need all daemons. + assert cephadm_module.migration_current == 0 + + CephadmServe(cephadm_module)._refresh_hosts_and_daemons() + receive_agent_metadata_all_hosts(cephadm_module) + cephadm_module.migration.migrate() + + CephadmServe(cephadm_module)._apply_all_services() + + out = {o.hostname for o in wait(cephadm_module, cephadm_module.list_daemons())} + assert out == {'host1', 'host2'} + + c = cephadm_module.apply_rgw( + ServiceSpec('rgw', 'r.z', placement=PlacementSpec(host_pattern='host1', count=2)) + ) + assert wait(cephadm_module, c) == 'Scheduled rgw.r.z update...' + + # Sorry, for this hack, but I need to make sure, Migration thinks, + # we have updated all daemons already. + cephadm_module.cache.last_daemon_update['host1'] = datetime_now() + cephadm_module.cache.last_daemon_update['host2'] = datetime_now() + + cephadm_module.migration_current = 0 + cephadm_module.migration.migrate() + assert cephadm_module.migration_current >= 2 + + out = [o.spec.placement for o in wait( + cephadm_module, cephadm_module.describe_service())] + assert out == [PlacementSpec(count=2, hosts=[HostPlacementSpec( + hostname='host1', network='', name=''), HostPlacementSpec(hostname='host2', network='', name='')])] + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_service_id_mon_one(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon.wrong', json.dumps({ + 'spec': { + 'service_type': 'mon', + 'service_id': 'wrong', + 'placement': { + 'hosts': ['host1'] + } + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + + cephadm_module.spec_store.load() + + assert len(cephadm_module.spec_store.all_specs) == 1 + assert cephadm_module.spec_store.all_specs['mon.wrong'].service_name() == 'mon' + + cephadm_module.migration_current = 1 + cephadm_module.migration.migrate() + assert cephadm_module.migration_current >= 2 + + assert len(cephadm_module.spec_store.all_specs) == 1 + assert cephadm_module.spec_store.all_specs['mon'] == ServiceSpec( + service_type='mon', + unmanaged=True, + placement=PlacementSpec(hosts=['host1']) + ) + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_service_id_mon_two(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon', json.dumps({ + 'spec': { + 'service_type': 'mon', + 'placement': { + 'count': 5, + } + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon.wrong', json.dumps({ + 'spec': { + 'service_type': 'mon', + 'service_id': 'wrong', + 'placement': { + 'hosts': ['host1'] + } + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + + cephadm_module.spec_store.load() + + assert len(cephadm_module.spec_store.all_specs) == 2 + assert cephadm_module.spec_store.all_specs['mon.wrong'].service_name() == 'mon' + assert cephadm_module.spec_store.all_specs['mon'].service_name() == 'mon' + + cephadm_module.migration_current = 1 + cephadm_module.migration.migrate() + assert cephadm_module.migration_current >= 2 + + assert len(cephadm_module.spec_store.all_specs) == 1 + assert cephadm_module.spec_store.all_specs['mon'] == ServiceSpec( + service_type='mon', + unmanaged=True, + placement=PlacementSpec(count=5) + ) + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_service_id_mds_one(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.set_store(SPEC_STORE_PREFIX + 'mds', json.dumps({ + 'spec': { + 'service_type': 'mds', + 'placement': { + 'hosts': ['host1'] + } + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + + cephadm_module.spec_store.load() + + # there is nothing to migrate, as the spec is gone now. + assert len(cephadm_module.spec_store.all_specs) == 0 + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_nfs_initial(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.set_store( + SPEC_STORE_PREFIX + 'mds', + json.dumps({ + 'spec': { + 'service_type': 'nfs', + 'service_id': 'foo', + 'placement': { + 'hosts': ['host1'] + }, + 'spec': { + 'pool': 'mypool', + 'namespace': 'foons', + }, + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + cephadm_module.migration_current = 1 + cephadm_module.spec_store.load() + + ls = json.loads(cephadm_module.get_store('nfs_migration_queue')) + assert ls == [['foo', 'mypool', 'foons']] + + cephadm_module.migration.migrate(True) + assert cephadm_module.migration_current == 2 + + cephadm_module.migration.migrate() + assert cephadm_module.migration_current == LAST_MIGRATION + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_nfs_initial_octopus(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.set_store( + SPEC_STORE_PREFIX + 'mds', + json.dumps({ + 'spec': { + 'service_type': 'nfs', + 'service_id': 'ganesha-foo', + 'placement': { + 'hosts': ['host1'] + }, + 'spec': { + 'pool': 'mypool', + 'namespace': 'foons', + }, + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + cephadm_module.migration_current = 1 + cephadm_module.spec_store.load() + + ls = json.loads(cephadm_module.get_store('nfs_migration_queue')) + assert ls == [['ganesha-foo', 'mypool', 'foons']] + + cephadm_module.migration.migrate(True) + assert cephadm_module.migration_current == 2 + + cephadm_module.migration.migrate() + assert cephadm_module.migration_current == LAST_MIGRATION + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_admin_client_keyring(cephadm_module: CephadmOrchestrator): + assert 'client.admin' not in cephadm_module.keys.keys + + cephadm_module.migration_current = 3 + cephadm_module.migration.migrate() + assert cephadm_module.migration_current == LAST_MIGRATION + + assert cephadm_module.keys.keys['client.admin'].placement.label == '_admin' + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_set_sane_value(cephadm_module: CephadmOrchestrator): + cephadm_module.migration_current = 0 + cephadm_module.migration.set_sane_migration_current() + assert cephadm_module.migration_current == 0 + + cephadm_module.migration_current = LAST_MIGRATION + cephadm_module.migration.set_sane_migration_current() + assert cephadm_module.migration_current == LAST_MIGRATION + + cephadm_module.migration_current = None + cephadm_module.migration.set_sane_migration_current() + assert cephadm_module.migration_current == LAST_MIGRATION + + cephadm_module.migration_current = LAST_MIGRATION + 1 + cephadm_module.migration.set_sane_migration_current() + assert cephadm_module.migration_current == 0 + + cephadm_module.migration_current = None + ongoing = cephadm_module.migration.is_migration_ongoing() + assert not ongoing + assert cephadm_module.migration_current == LAST_MIGRATION + + cephadm_module.migration_current = LAST_MIGRATION + 1 + ongoing = cephadm_module.migration.is_migration_ongoing() + assert ongoing + assert cephadm_module.migration_current == 0 + + +@pytest.mark.parametrize( + "rgw_spec_store_entry, should_migrate", + [ + ({ + 'spec': { + 'service_type': 'rgw', + 'service_name': 'rgw.foo', + 'service_id': 'foo', + 'placement': { + 'hosts': ['host1'] + }, + 'spec': { + 'rgw_frontend_type': 'beast tcp_nodelay=1 request_timeout_ms=65000 rgw_thread_pool_size=512', + 'rgw_frontend_port': '5000', + }, + }, + 'created': datetime_to_str(datetime_now()), + }, True), + ({ + 'spec': { + 'service_type': 'rgw', + 'service_name': 'rgw.foo', + 'service_id': 'foo', + 'placement': { + 'hosts': ['host1'] + }, + }, + 'created': datetime_to_str(datetime_now()), + }, False), + ] +) +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_rgw_spec(cephadm_module: CephadmOrchestrator, rgw_spec_store_entry, should_migrate): + with with_host(cephadm_module, 'host1'): + cephadm_module.set_store( + SPEC_STORE_PREFIX + 'rgw', + json.dumps(rgw_spec_store_entry, sort_keys=True), + ) + + # make sure rgw_migration_queue is populated accordingly + cephadm_module.migration_current = 1 + cephadm_module.spec_store.load() + ls = json.loads(cephadm_module.get_store('rgw_migration_queue')) + assert 'rgw' == ls[0]['spec']['service_type'] + + # shortcut rgw_migration_queue loading by directly assigning + # ls output to rgw_migration_queue list + cephadm_module.migration.rgw_migration_queue = ls + + # skip other migrations and go directly to 5_6 migration (RGW spec) + cephadm_module.migration_current = 5 + cephadm_module.migration.migrate() + assert cephadm_module.migration_current == LAST_MIGRATION + + if should_migrate: + # make sure the spec has been migrated and the the param=value entries + # that were part of the rgw_frontend_type are now in the new + # 'rgw_frontend_extra_args' list + assert 'rgw.foo' in cephadm_module.spec_store.all_specs + rgw_spec = cephadm_module.spec_store.all_specs['rgw.foo'] + assert dict(rgw_spec.to_json()) == {'service_type': 'rgw', + 'service_id': 'foo', + 'service_name': 'rgw.foo', + 'placement': {'hosts': ['host1']}, + 'spec': { + 'rgw_frontend_extra_args': ['tcp_nodelay=1', + 'request_timeout_ms=65000', + 'rgw_thread_pool_size=512'], + 'rgw_frontend_port': '5000', + 'rgw_frontend_type': 'beast', + }} + else: + # in a real environment, we still expect the spec to be there, + # just untouched by the migration. For this test specifically + # though, the spec will only have ended up in the spec store + # if it was migrated, so we can use this to test the spec + # was untouched + assert 'rgw.foo' not in cephadm_module.spec_store.all_specs diff --git a/src/pybind/mgr/cephadm/tests/test_osd_removal.py b/src/pybind/mgr/cephadm/tests/test_osd_removal.py new file mode 100644 index 000000000..6685fcb2a --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_osd_removal.py @@ -0,0 +1,298 @@ +import json + +from cephadm.services.osd import OSDRemovalQueue, OSD +import pytest +from tests import mock +from .fixtures import with_cephadm_module +from datetime import datetime + + +class MockOSD: + + def __init__(self, osd_id): + self.osd_id = osd_id + + +class TestOSDRemoval: + + @pytest.mark.parametrize( + "osd_id, osd_df, expected", + [ + # missing 'nodes' key + (1, dict(nodes=[]), -1), + # missing 'pgs' key + (1, dict(nodes=[dict(id=1)]), -1), + # id != osd_id + (1, dict(nodes=[dict(id=999, pgs=1)]), -1), + # valid + (1, dict(nodes=[dict(id=1, pgs=1)]), 1), + ] + ) + def test_get_pg_count(self, rm_util, osd_id, osd_df, expected): + with mock.patch("cephadm.services.osd.RemoveUtil.osd_df", return_value=osd_df): + assert rm_util.get_pg_count(osd_id) == expected + + @pytest.mark.parametrize( + "osds, ok_to_stop, expected", + [ + # no osd_ids provided + ([], [False], []), + # all osds are ok_to_stop + ([1, 2], [True], [1, 2]), + # osds are ok_to_stop after the second iteration + ([1, 2], [False, True], [2]), + # osds are never ok_to_stop, (taking the sample size `(len(osd_ids))` into account), + # expected to get False + ([1, 2], [False, False], []), + ] + ) + def test_find_stop_threshold(self, rm_util, osds, ok_to_stop, expected): + with mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop", side_effect=ok_to_stop): + assert rm_util.find_osd_stop_threshold(osds) == expected + + def test_process_removal_queue(self, rm_util): + # TODO: ! + # rm_util.process_removal_queue() + pass + + @pytest.mark.parametrize( + "max_osd_draining_count, draining_osds, idling_osds, ok_to_stop, expected", + [ + # drain one at a time, one already draining + (1, [1], [1], [True], 0), + # drain one at a time, none draining yet + (1, [], [1, 2, 3], [True, True, True], 1), + # drain one at a time, one already draining, none ok-to-stop + (1, [1], [1], [False], 0), + # drain one at a time, none draining, one ok-to-stop + (1, [], [1, 2, 3], [False, False, True], 1), + # drain three at a time, one already draining, all ok-to-stop + (3, [1], [1, 2, 3], [True, True, True], 2), + # drain two at a time, none already draining, none ok-to-stop + (2, [], [1, 2, 3], [False, False, False], 0), + # drain two at a time, none already draining, none idling + (2, [], [], [], 0), + ] + ) + def test_ready_to_drain_osds(self, max_osd_draining_count, draining_osds, idling_osds, ok_to_stop, expected): + with with_cephadm_module({'max_osd_draining_count': max_osd_draining_count}) as m: + with mock.patch("cephadm.services.osd.OSDRemovalQueue.draining_osds", return_value=draining_osds): + with mock.patch("cephadm.services.osd.OSDRemovalQueue.idling_osds", return_value=idling_osds): + with mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop", side_effect=ok_to_stop): + removal_queue = OSDRemovalQueue(m) + assert len(removal_queue._ready_to_drain_osds()) == expected + + def test_ok_to_stop(self, rm_util): + rm_util.ok_to_stop([MockOSD(1)]) + rm_util._run_mon_cmd.assert_called_with({'prefix': 'osd ok-to-stop', 'ids': ['1']}, + error_ok=True) + + def test_safe_to_destroy(self, rm_util): + rm_util.safe_to_destroy([1]) + rm_util._run_mon_cmd.assert_called_with({'prefix': 'osd safe-to-destroy', + 'ids': ['1']}, error_ok=True) + + def test_destroy_osd(self, rm_util): + rm_util.destroy_osd(1) + rm_util._run_mon_cmd.assert_called_with( + {'prefix': 'osd destroy-actual', 'id': 1, 'yes_i_really_mean_it': True}) + + def test_purge_osd(self, rm_util): + rm_util.purge_osd(1) + rm_util._run_mon_cmd.assert_called_with( + {'prefix': 'osd purge-actual', 'id': 1, 'yes_i_really_mean_it': True}) + + def test_load(self, cephadm_module, rm_util): + data = json.dumps([ + { + "osd_id": 35, + "started": True, + "draining": True, + "stopped": False, + "replace": False, + "force": False, + "zap": False, + "nodename": "node2", + "drain_started_at": "2020-09-14T11:41:53.960463", + "drain_stopped_at": None, + "drain_done_at": None, + "process_started_at": "2020-09-14T11:41:52.245832" + } + ]) + cephadm_module.set_store('osd_remove_queue', data) + cephadm_module.to_remove_osds.load_from_store() + + expected = OSDRemovalQueue(cephadm_module) + expected.osds.add(OSD(osd_id=35, remove_util=rm_util, draining=True)) + assert cephadm_module.to_remove_osds == expected + + +class TestOSD: + + def test_start(self, osd_obj): + assert osd_obj.started is False + osd_obj.start() + assert osd_obj.started is True + assert osd_obj.stopped is False + + def test_start_draining_purge(self, osd_obj): + assert osd_obj.draining is False + assert osd_obj.drain_started_at is None + ret = osd_obj.start_draining() + osd_obj.rm_util.reweight_osd.assert_called_with(osd_obj, 0.0) + assert isinstance(osd_obj.drain_started_at, datetime) + assert osd_obj.draining is True + assert osd_obj.replace is False + assert ret is True + + def test_start_draining_replace(self, osd_obj): + assert osd_obj.draining is False + assert osd_obj.drain_started_at is None + osd_obj.replace = True + ret = osd_obj.start_draining() + osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'out') + assert isinstance(osd_obj.drain_started_at, datetime) + assert osd_obj.draining is True + assert osd_obj.replace is True + assert ret is True + + def test_start_draining_stopped(self, osd_obj): + osd_obj.stopped = True + ret = osd_obj.start_draining() + assert osd_obj.drain_started_at is None + assert ret is False + assert osd_obj.draining is False + + def test_stop_draining_replace(self, osd_obj): + osd_obj.replace = True + ret = osd_obj.stop_draining() + osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'in') + assert isinstance(osd_obj.drain_stopped_at, datetime) + assert osd_obj.draining is False + assert ret is True + + def test_stop_draining_purge(self, osd_obj): + osd_obj.original_weight = 1.0 + ret = osd_obj.stop_draining() + osd_obj.rm_util.reweight_osd.assert_called_with(osd_obj, 1.0) + assert isinstance(osd_obj.drain_stopped_at, datetime) + assert osd_obj.draining is False + assert ret is True + + @mock.patch('cephadm.services.osd.OSD.stop_draining') + def test_stop(self, stop_draining_mock, osd_obj): + osd_obj.stop() + assert osd_obj.started is False + assert osd_obj.stopped is True + stop_draining_mock.assert_called_once() + + @pytest.mark.parametrize( + "draining, empty, expected", + [ + # must be !draining! and !not empty! to yield True + (True, not True, True), + # not draining and not empty + (False, not True, False), + # not draining and empty + (False, True, False), + # draining and empty + (True, True, False), + ] + ) + def test_is_draining(self, osd_obj, draining, empty, expected): + with mock.patch("cephadm.services.osd.OSD.is_empty", new_callable=mock.PropertyMock(return_value=empty)): + osd_obj.draining = draining + assert osd_obj.is_draining is expected + + @mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop") + def test_is_ok_to_stop(self, _, osd_obj): + osd_obj.is_ok_to_stop + osd_obj.rm_util.ok_to_stop.assert_called_once() + + @pytest.mark.parametrize( + "pg_count, expected", + [ + (0, True), + (1, False), + (9999, False), + (-1, False), + ] + ) + def test_is_empty(self, osd_obj, pg_count, expected): + with mock.patch("cephadm.services.osd.OSD.get_pg_count", return_value=pg_count): + assert osd_obj.is_empty is expected + + @mock.patch("cephadm.services.osd.RemoveUtil.safe_to_destroy") + def test_safe_to_destroy(self, _, osd_obj): + osd_obj.safe_to_destroy() + osd_obj.rm_util.safe_to_destroy.assert_called_once() + + @mock.patch("cephadm.services.osd.RemoveUtil.set_osd_flag") + def test_down(self, _, osd_obj): + osd_obj.down() + osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'down') + + @mock.patch("cephadm.services.osd.RemoveUtil.destroy_osd") + def test_destroy_osd(self, _, osd_obj): + osd_obj.destroy() + osd_obj.rm_util.destroy_osd.assert_called_once() + + @mock.patch("cephadm.services.osd.RemoveUtil.purge_osd") + def test_purge(self, _, osd_obj): + osd_obj.purge() + osd_obj.rm_util.purge_osd.assert_called_once() + + @mock.patch("cephadm.services.osd.RemoveUtil.get_pg_count") + def test_pg_count(self, _, osd_obj): + osd_obj.get_pg_count() + osd_obj.rm_util.get_pg_count.assert_called_once() + + def test_drain_status_human_not_started(self, osd_obj): + assert osd_obj.drain_status_human() == 'not started' + + def test_drain_status_human_started(self, osd_obj): + osd_obj.started = True + assert osd_obj.drain_status_human() == 'started' + + def test_drain_status_human_draining(self, osd_obj): + osd_obj.started = True + osd_obj.draining = True + assert osd_obj.drain_status_human() == 'draining' + + def test_drain_status_human_done(self, osd_obj): + osd_obj.started = True + osd_obj.draining = False + osd_obj.drain_done_at = datetime.utcnow() + assert osd_obj.drain_status_human() == 'done, waiting for purge' + + +class TestOSDRemovalQueue: + + def test_queue_size(self, osd_obj): + q = OSDRemovalQueue(mock.Mock()) + assert q.queue_size() == 0 + q.osds.add(osd_obj) + assert q.queue_size() == 1 + + @mock.patch("cephadm.services.osd.OSD.start") + @mock.patch("cephadm.services.osd.OSD.exists") + def test_enqueue(self, exist, start, osd_obj): + q = OSDRemovalQueue(mock.Mock()) + q.enqueue(osd_obj) + osd_obj.start.assert_called_once() + + @mock.patch("cephadm.services.osd.OSD.stop") + @mock.patch("cephadm.services.osd.OSD.exists") + def test_rm_raise(self, exist, stop, osd_obj): + q = OSDRemovalQueue(mock.Mock()) + with pytest.raises(KeyError): + q.rm(osd_obj) + osd_obj.stop.assert_called_once() + + @mock.patch("cephadm.services.osd.OSD.stop") + @mock.patch("cephadm.services.osd.OSD.exists") + def test_rm(self, exist, stop, osd_obj): + q = OSDRemovalQueue(mock.Mock()) + q.osds.add(osd_obj) + q.rm(osd_obj) + osd_obj.stop.assert_called_once() diff --git a/src/pybind/mgr/cephadm/tests/test_scheduling.py b/src/pybind/mgr/cephadm/tests/test_scheduling.py new file mode 100644 index 000000000..067cd5028 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_scheduling.py @@ -0,0 +1,1699 @@ +# Disable autopep8 for this file: + +# fmt: off + +from typing import NamedTuple, List, Dict, Optional +import pytest + +from ceph.deployment.hostspec import HostSpec +from ceph.deployment.service_spec import ServiceSpec, PlacementSpec, IngressSpec +from ceph.deployment.hostspec import SpecValidationError + +from cephadm.module import HostAssignment +from cephadm.schedule import DaemonPlacement +from orchestrator import DaemonDescription, OrchestratorValidationError, OrchestratorError + + +def wrapper(func): + # some odd thingy to revert the order or arguments + def inner(*args): + def inner2(expected): + func(expected, *args) + return inner2 + return inner + + +@wrapper +def none(expected): + assert expected == [] + + +@wrapper +def one_of(expected, *hosts): + if not isinstance(expected, list): + assert False, str(expected) + assert len(expected) == 1, f'one_of failed len({expected}) != 1' + assert expected[0] in hosts + + +@wrapper +def two_of(expected, *hosts): + if not isinstance(expected, list): + assert False, str(expected) + assert len(expected) == 2, f'one_of failed len({expected}) != 2' + matches = 0 + for h in hosts: + matches += int(h in expected) + if matches != 2: + assert False, f'two of {hosts} not in {expected}' + + +@wrapper +def exactly(expected, *hosts): + assert expected == list(hosts) + + +@wrapper +def error(expected, kind, match): + assert isinstance(expected, kind), (str(expected), match) + assert str(expected) == match, (str(expected), match) + + +@wrapper +def _or(expected, *inners): + def catch(inner): + try: + inner(expected) + except AssertionError as e: + return e + result = [catch(i) for i in inners] + if None not in result: + assert False, f"_or failed: {expected}" + + +def _always_true(_): + pass + + +def k(s): + return [e for e in s.split(' ') if e] + + +def get_result(key, results): + def match(one): + for o, k in zip(one, key): + if o != k and o != '*': + return False + return True + return [v for k, v in results if match(k)][0] + + +def mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count): + + if spec_section == 'hosts': + mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec( # noqa: E731 + hosts=explicit, + count=count, + )) + elif spec_section == 'label': + mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec( # noqa: E731 + label='mylabel', + count=count, + )) + elif spec_section == 'host_pattern': + pattern = { + 'e': 'notfound', + '1': '1', + '12': '[1-2]', + '123': '*', + }[explicit_key] + mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec( # noqa: E731 + host_pattern=pattern, + count=count, + )) + else: + assert False + + hosts = [ + HostSpec(h, labels=['mylabel']) if h in explicit else HostSpec(h) + for h in hosts + ] + + return mk_spec, hosts + + +def run_scheduler_test(results, mk_spec, hosts, daemons, key_elems): + key = ' '.join('N' if e is None else str(e) for e in key_elems) + try: + assert_res = get_result(k(key), results) + except IndexError: + try: + spec = mk_spec() + host_res, to_add, to_remove = HostAssignment( + spec=spec, + hosts=hosts, + unreachable_hosts=[], + draining_hosts=[], + daemons=daemons, + ).place() + if isinstance(host_res, list): + e = ', '.join(repr(h.hostname) for h in host_res) + assert False, f'`(k("{key}"), exactly({e})),` not found' + assert False, f'`(k("{key}"), ...),` not found' + except OrchestratorError as e: + assert False, f'`(k("{key}"), error({type(e).__name__}, {repr(str(e))})),` not found' + + for _ in range(10): # scheduler has a random component + try: + spec = mk_spec() + host_res, to_add, to_remove = HostAssignment( + spec=spec, + hosts=hosts, + unreachable_hosts=[], + draining_hosts=[], + daemons=daemons + ).place() + + assert_res(sorted([h.hostname for h in host_res])) + except Exception as e: + assert_res(e) + + +@pytest.mark.parametrize("dp,n,result", + [ # noqa: E128 + ( + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]), + 0, + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]), + ), + ( + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]), + 2, + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[82]), + ), + ( + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80, 90]), + 2, + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[82, 92]), + ), + ]) +def test_daemon_placement_renumber(dp, n, result): + assert dp.renumber_ports(n) == result + + +@pytest.mark.parametrize( + 'dp,dd,result', + [ + ( + DaemonPlacement(daemon_type='mgr', hostname='host1'), + DaemonDescription('mgr', 'a', 'host1'), + True + ), + ( + DaemonPlacement(daemon_type='mgr', hostname='host1', name='a'), + DaemonDescription('mgr', 'a', 'host1'), + True + ), + ( + DaemonPlacement(daemon_type='mon', hostname='host1', name='a'), + DaemonDescription('mgr', 'a', 'host1'), + False + ), + ( + DaemonPlacement(daemon_type='mgr', hostname='host1', name='a'), + DaemonDescription('mgr', 'b', 'host1'), + False + ), + ]) +def test_daemon_placement_match(dp, dd, result): + assert dp.matches_daemon(dd) == result + + +# * first match from the top wins +# * where e=[], *=any +# +# + list of known hosts available for scheduling (host_key) +# | + hosts used for explict placement (explicit_key) +# | | + count +# | | | + section (host, label, pattern) +# | | | | + expected result +# | | | | | +test_explicit_scheduler_results = [ + (k("* * 0 *"), error(SpecValidationError, 'num/count must be >= 1')), + (k("* e N l"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr>: No matching hosts for label mylabel')), + (k("* e N p"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr>: No matching hosts')), + (k("* e N h"), error(OrchestratorValidationError, 'placement spec is empty: no hosts, no label, no pattern, no count')), + (k("* e * *"), none), + (k("1 12 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 2: Unknown hosts")), + (k("1 123 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 2, 3: Unknown hosts")), + (k("1 * * *"), exactly('1')), + (k("12 1 * *"), exactly('1')), + (k("12 12 1 *"), one_of('1', '2')), + (k("12 12 * *"), exactly('1', '2')), + (k("12 123 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 3: Unknown hosts")), + (k("12 123 1 *"), one_of('1', '2', '3')), + (k("12 123 * *"), two_of('1', '2', '3')), + (k("123 1 * *"), exactly('1')), + (k("123 12 1 *"), one_of('1', '2')), + (k("123 12 * *"), exactly('1', '2')), + (k("123 123 1 *"), one_of('1', '2', '3')), + (k("123 123 2 *"), two_of('1', '2', '3')), + (k("123 123 * *"), exactly('1', '2', '3')), +] + + +@pytest.mark.parametrize("spec_section_key,spec_section", + [ # noqa: E128 + ('h', 'hosts'), + ('l', 'label'), + ('p', 'host_pattern'), + ]) +@pytest.mark.parametrize("count", + [ # noqa: E128 + None, + 0, + 1, + 2, + 3, + ]) +@pytest.mark.parametrize("explicit_key, explicit", + [ # noqa: E128 + ('e', []), + ('1', ['1']), + ('12', ['1', '2']), + ('123', ['1', '2', '3']), + ]) +@pytest.mark.parametrize("host_key, hosts", + [ # noqa: E128 + ('1', ['1']), + ('12', ['1', '2']), + ('123', ['1', '2', '3']), + ]) +def test_explicit_scheduler(host_key, hosts, + explicit_key, explicit, + count, + spec_section_key, spec_section): + + mk_spec, hosts = mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count) + run_scheduler_test( + results=test_explicit_scheduler_results, + mk_spec=mk_spec, + hosts=hosts, + daemons=[], + key_elems=(host_key, explicit_key, count, spec_section_key) + ) + + +# * first match from the top wins +# * where e=[], *=any +# +# + list of known hosts available for scheduling (host_key) +# | + hosts used for explicit placement (explicit_key) +# | | + count +# | | | + existing daemons +# | | | | + section (host, label, pattern) +# | | | | | + expected result +# | | | | | | +test_scheduler_daemons_results = [ + (k("* 1 * * *"), exactly('1')), + (k("1 123 * * h"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr> on 2, 3: Unknown hosts')), + (k("1 123 * * *"), exactly('1')), + (k("12 123 * * h"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr> on 3: Unknown hosts')), + (k("12 123 N * *"), exactly('1', '2')), + (k("12 123 1 * *"), one_of('1', '2')), + (k("12 123 2 * *"), exactly('1', '2')), + (k("12 123 3 * *"), exactly('1', '2')), + (k("123 123 N * *"), exactly('1', '2', '3')), + (k("123 123 1 e *"), one_of('1', '2', '3')), + (k("123 123 1 1 *"), exactly('1')), + (k("123 123 1 3 *"), exactly('3')), + (k("123 123 1 12 *"), one_of('1', '2')), + (k("123 123 1 112 *"), one_of('1', '2')), + (k("123 123 1 23 *"), one_of('2', '3')), + (k("123 123 1 123 *"), one_of('1', '2', '3')), + (k("123 123 2 e *"), two_of('1', '2', '3')), + (k("123 123 2 1 *"), _or(exactly('1', '2'), exactly('1', '3'))), + (k("123 123 2 3 *"), _or(exactly('1', '3'), exactly('2', '3'))), + (k("123 123 2 12 *"), exactly('1', '2')), + (k("123 123 2 112 *"), exactly('1', '2')), + (k("123 123 2 23 *"), exactly('2', '3')), + (k("123 123 2 123 *"), two_of('1', '2', '3')), + (k("123 123 3 * *"), exactly('1', '2', '3')), +] + + +@pytest.mark.parametrize("spec_section_key,spec_section", + [ # noqa: E128 + ('h', 'hosts'), + ('l', 'label'), + ('p', 'host_pattern'), + ]) +@pytest.mark.parametrize("daemons_key, daemons", + [ # noqa: E128 + ('e', []), + ('1', ['1']), + ('3', ['3']), + ('12', ['1', '2']), + ('112', ['1', '1', '2']), # deal with existing co-located daemons + ('23', ['2', '3']), + ('123', ['1', '2', '3']), + ]) +@pytest.mark.parametrize("count", + [ # noqa: E128 + None, + 1, + 2, + 3, + ]) +@pytest.mark.parametrize("explicit_key, explicit", + [ # noqa: E128 + ('1', ['1']), + ('123', ['1', '2', '3']), + ]) +@pytest.mark.parametrize("host_key, hosts", + [ # noqa: E128 + ('1', ['1']), + ('12', ['1', '2']), + ('123', ['1', '2', '3']), + ]) +def test_scheduler_daemons(host_key, hosts, + explicit_key, explicit, + count, + daemons_key, daemons, + spec_section_key, spec_section): + mk_spec, hosts = mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count) + dds = [ + DaemonDescription('mgr', d, d) + for d in daemons + ] + run_scheduler_test( + results=test_scheduler_daemons_results, + mk_spec=mk_spec, + hosts=hosts, + daemons=dds, + key_elems=(host_key, explicit_key, count, daemons_key, spec_section_key) + ) + + +# ========================= + + +class NodeAssignmentTest(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + daemons: List[DaemonDescription] + rank_map: Optional[Dict[int, Dict[int, Optional[str]]]] + post_rank_map: Optional[Dict[int, Dict[int, Optional[str]]]] + expected: List[str] + expected_add: List[str] + expected_remove: List[DaemonDescription] + + +@pytest.mark.parametrize("service_type,placement,hosts,daemons,rank_map,post_rank_map,expected,expected_add,expected_remove", + [ # noqa: E128 + # just hosts + NodeAssignmentTest( + 'mgr', + PlacementSpec(hosts=['smithi060']), + ['smithi060'], + [], + None, None, + ['mgr:smithi060'], ['mgr:smithi060'], [] + ), + # all_hosts + NodeAssignmentTest( + 'mgr', + PlacementSpec(host_pattern='*'), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + ], + None, None, + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + ['mgr:host3'], + [] + ), + # all_hosts + count_per_host + NodeAssignmentTest( + 'mds', + PlacementSpec(host_pattern='*', count_per_host=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mds', 'a', 'host1'), + DaemonDescription('mds', 'b', 'host2'), + ], + None, None, + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + ['mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + [] + ), + # count that is bigger than the amount of hosts. Truncate to len(hosts) + # mgr should not be co-located to each other. + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=4), + 'host1 host2 host3'.split(), + [], + None, None, + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + [] + ), + # count that is bigger than the amount of hosts; wrap around. + NodeAssignmentTest( + 'mds', + PlacementSpec(count=6), + 'host1 host2 host3'.split(), + [], + None, None, + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + [] + ), + # count + partial host list + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=3, hosts=['host3']), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + ], + None, None, + ['mgr:host3'], + ['mgr:host3'], + ['mgr.a', 'mgr.b'] + ), + # count + partial host list (with colo) + NodeAssignmentTest( + 'mds', + PlacementSpec(count=3, hosts=['host3']), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mds', 'a', 'host1'), + DaemonDescription('mds', 'b', 'host2'), + ], + None, None, + ['mds:host3', 'mds:host3', 'mds:host3'], + ['mds:host3', 'mds:host3', 'mds:host3'], + ['mds.a', 'mds.b'] + ), + # count 1 + partial host list + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=1, hosts=['host3']), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + ], + None, None, + ['mgr:host3'], + ['mgr:host3'], + ['mgr.a', 'mgr.b'] + ), + # count + partial host list + existing + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=2, hosts=['host3']), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + ], + None, None, + ['mgr:host3'], + ['mgr:host3'], + ['mgr.a'] + ), + # count + partial host list + existing (deterministic) + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=2, hosts=['host1']), + 'host1 host2'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + ], + None, None, + ['mgr:host1'], + [], + [] + ), + # count + partial host list + existing (deterministic) + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=2, hosts=['host1']), + 'host1 host2'.split(), + [ + DaemonDescription('mgr', 'a', 'host2'), + ], + None, None, + ['mgr:host1'], + ['mgr:host1'], + ['mgr.a'] + ), + # label only + NodeAssignmentTest( + 'mgr', + PlacementSpec(label='foo'), + 'host1 host2 host3'.split(), + [], + None, None, + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + [] + ), + # label + count (truncate to host list) + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=4, label='foo'), + 'host1 host2 host3'.split(), + [], + None, None, + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + [] + ), + # label + count (with colo) + NodeAssignmentTest( + 'mds', + PlacementSpec(count=6, label='foo'), + 'host1 host2 host3'.split(), + [], + None, None, + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + [] + ), + # label only + count_per_hst + NodeAssignmentTest( + 'mds', + PlacementSpec(label='foo', count_per_host=3), + 'host1 host2 host3'.split(), + [], + None, None, + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3', + 'mds:host1', 'mds:host2', 'mds:host3'], + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3', + 'mds:host1', 'mds:host2', 'mds:host3'], + [] + ), + # host_pattern + NodeAssignmentTest( + 'mgr', + PlacementSpec(host_pattern='mgr*'), + 'mgrhost1 mgrhost2 datahost'.split(), + [], + None, None, + ['mgr:mgrhost1', 'mgr:mgrhost2'], + ['mgr:mgrhost1', 'mgr:mgrhost2'], + [] + ), + # host_pattern + count_per_host + NodeAssignmentTest( + 'mds', + PlacementSpec(host_pattern='mds*', count_per_host=3), + 'mdshost1 mdshost2 datahost'.split(), + [], + None, None, + ['mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2'], + ['mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2'], + [] + ), + # label + count_per_host + ports + NodeAssignmentTest( + 'rgw', + PlacementSpec(count=6, label='foo'), + 'host1 host2 host3'.split(), + [], + None, None, + ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)', + 'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'], + ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)', + 'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'], + [] + ), + # label + count_per_host + ports (+ existing) + NodeAssignmentTest( + 'rgw', + PlacementSpec(count=6, label='foo'), + 'host1 host2 host3'.split(), + [ + DaemonDescription('rgw', 'a', 'host1', ports=[81]), + DaemonDescription('rgw', 'b', 'host2', ports=[80]), + DaemonDescription('rgw', 'c', 'host1', ports=[82]), + ], + None, None, + ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)', + 'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'], + ['rgw:host1(*:80)', 'rgw:host3(*:80)', + 'rgw:host2(*:81)', 'rgw:host3(*:81)'], + ['rgw.c'] + ), + # cephadm.py teuth case + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=3, hosts=['host1=y', 'host2=x']), + 'host1 host2'.split(), + [ + DaemonDescription('mgr', 'y', 'host1'), + DaemonDescription('mgr', 'x', 'host2'), + ], + None, None, + ['mgr:host1(name=y)', 'mgr:host2(name=x)'], + [], [] + ), + + # note: host -> rank mapping is psuedo-random based on svc name, so these + # host/rank pairs may seem random but they match the nfs.mynfs seed used by + # the test. + + # ranked, fresh + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [], + {}, + {0: {0: None}, 1: {0: None}, 2: {0: None}}, + ['nfs:host3(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host1(rank=2.0)'], + ['nfs:host3(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host1(rank=2.0)'], + [] + ), + # 21: ranked, exist + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1), + ], + {0: {1: '0.1'}}, + {0: {1: '0.1'}, 1: {0: None}, 2: {0: None}}, + ['nfs:host1(rank=0.1)', 'nfs:host3(rank=1.0)', 'nfs:host2(rank=2.0)'], + ['nfs:host3(rank=1.0)', 'nfs:host2(rank=2.0)'], + [] + ), + # ranked, exist, different ranks + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1), + DaemonDescription('nfs', '1.1', 'host2', rank=1, rank_generation=1), + ], + {0: {1: '0.1'}, 1: {1: '1.1'}}, + {0: {1: '0.1'}, 1: {1: '1.1'}, 2: {0: None}}, + ['nfs:host1(rank=0.1)', 'nfs:host2(rank=1.1)', 'nfs:host3(rank=2.0)'], + ['nfs:host3(rank=2.0)'], + [] + ), + # ranked, exist, different ranks (2) + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1), + DaemonDescription('nfs', '1.1', 'host3', rank=1, rank_generation=1), + ], + {0: {1: '0.1'}, 1: {1: '1.1'}}, + {0: {1: '0.1'}, 1: {1: '1.1'}, 2: {0: None}}, + ['nfs:host1(rank=0.1)', 'nfs:host3(rank=1.1)', 'nfs:host2(rank=2.0)'], + ['nfs:host2(rank=2.0)'], + [] + ), + # ranked, exist, extra ranks + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5), + DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5), + DaemonDescription('nfs', '4.5', 'host2', rank=4, rank_generation=5), + ], + {0: {5: '0.5'}, 1: {5: '1.5'}}, + {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {0: None}}, + ['nfs:host1(rank=0.5)', 'nfs:host2(rank=1.5)', 'nfs:host3(rank=2.0)'], + ['nfs:host3(rank=2.0)'], + ['nfs.4.5'] + ), + # 25: ranked, exist, extra ranks (scale down: kill off high rank) + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=2), + 'host3 host2 host1'.split(), + [ + DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5), + DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5), + DaemonDescription('nfs', '2.5', 'host3', rank=2, rank_generation=5), + ], + {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}}, + {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}}, + ['nfs:host1(rank=0.5)', 'nfs:host2(rank=1.5)'], + [], + ['nfs.2.5'] + ), + # ranked, exist, extra ranks (scale down hosts) + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=2), + 'host1 host3'.split(), + [ + DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5), + DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5), + DaemonDescription('nfs', '2.5', 'host3', rank=4, rank_generation=5), + ], + {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}}, + {0: {5: '0.5'}, 1: {5: '1.5', 6: None}, 2: {5: '2.5'}}, + ['nfs:host1(rank=0.5)', 'nfs:host3(rank=1.6)'], + ['nfs:host3(rank=1.6)'], + ['nfs.2.5', 'nfs.1.5'] + ), + # ranked, exist, duplicate rank + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.0', 'host1', rank=0, rank_generation=0), + DaemonDescription('nfs', '1.1', 'host2', rank=1, rank_generation=1), + DaemonDescription('nfs', '1.2', 'host3', rank=1, rank_generation=2), + ], + {0: {0: '0.0'}, 1: {2: '1.2'}}, + {0: {0: '0.0'}, 1: {2: '1.2'}, 2: {0: None}}, + ['nfs:host1(rank=0.0)', 'nfs:host3(rank=1.2)', 'nfs:host2(rank=2.0)'], + ['nfs:host2(rank=2.0)'], + ['nfs.1.1'] + ), + # 28: ranked, all gens stale (failure during update cycle) + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2), + DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2), + ], + {0: {2: '0.2'}, 1: {2: '1.2', 3: '1.3'}}, + {0: {2: '0.2'}, 1: {2: '1.2', 3: '1.3', 4: None}}, + ['nfs:host1(rank=0.2)', 'nfs:host3(rank=1.4)'], + ['nfs:host3(rank=1.4)'], + ['nfs.1.2'] + ), + # ranked, not enough hosts + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=4), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2), + DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2), + ], + {0: {2: '0.2'}, 1: {2: '1.2'}}, + {0: {2: '0.2'}, 1: {2: '1.2'}, 2: {0: None}}, + ['nfs:host1(rank=0.2)', 'nfs:host2(rank=1.2)', 'nfs:host3(rank=2.0)'], + ['nfs:host3(rank=2.0)'], + [] + ), + # ranked, scale down + NodeAssignmentTest( + 'nfs', + PlacementSpec(hosts=['host2']), + 'host1 host2'.split(), + [ + DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2), + DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2), + DaemonDescription('nfs', '2.2', 'host3', rank=2, rank_generation=2), + ], + {0: {2: '0.2'}, 1: {2: '1.2'}, 2: {2: '2.2'}}, + {0: {2: '0.2', 3: None}, 1: {2: '1.2'}, 2: {2: '2.2'}}, + ['nfs:host2(rank=0.3)'], + ['nfs:host2(rank=0.3)'], + ['nfs.0.2', 'nfs.1.2', 'nfs.2.2'] + ), + + ]) +def test_node_assignment(service_type, placement, hosts, daemons, rank_map, post_rank_map, + expected, expected_add, expected_remove): + spec = None + service_id = None + allow_colo = False + if service_type == 'rgw': + service_id = 'realm.zone' + allow_colo = True + elif service_type == 'mds': + service_id = 'myfs' + allow_colo = True + elif service_type == 'nfs': + service_id = 'mynfs' + spec = ServiceSpec(service_type=service_type, + service_id=service_id, + placement=placement) + + if not spec: + spec = ServiceSpec(service_type=service_type, + service_id=service_id, + placement=placement) + + all_slots, to_add, to_remove = HostAssignment( + spec=spec, + hosts=[HostSpec(h, labels=['foo']) for h in hosts], + unreachable_hosts=[], + draining_hosts=[], + daemons=daemons, + allow_colo=allow_colo, + rank_map=rank_map, + ).place() + + assert rank_map == post_rank_map + + got = [str(p) for p in all_slots] + num_wildcard = 0 + for i in expected: + if i == '*': + num_wildcard += 1 + else: + assert i in got + got.remove(i) + assert num_wildcard == len(got) + + got = [str(p) for p in to_add] + num_wildcard = 0 + for i in expected_add: + if i == '*': + num_wildcard += 1 + else: + assert i in got + got.remove(i) + assert num_wildcard == len(got) + + assert sorted([d.name() for d in to_remove]) == sorted(expected_remove) + + +class NodeAssignmentTest5(NamedTuple): + service_type: str + placement: PlacementSpec + available_hosts: List[str] + candidates_hosts: List[str] + + +@pytest.mark.parametrize("service_type, placement, available_hosts, expected_candidates", + [ # noqa: E128 + NodeAssignmentTest5( + 'alertmanager', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host3 host1 host4 host2'.split(), + ), + NodeAssignmentTest5( + 'prometheus', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host3 host2 host4 host1'.split(), + ), + NodeAssignmentTest5( + 'grafana', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host1 host2 host4 host3'.split(), + ), + NodeAssignmentTest5( + 'mgr', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host4 host2 host1 host3'.split(), + ), + NodeAssignmentTest5( + 'mon', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host1 host3 host4 host2'.split(), + ), + NodeAssignmentTest5( + 'rgw', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host1 host3 host2 host4'.split(), + ), + NodeAssignmentTest5( + 'cephfs-mirror', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host4 host3 host1 host2'.split(), + ), + ]) +def test_node_assignment_random_shuffle(service_type, placement, available_hosts, expected_candidates): + spec = None + service_id = None + allow_colo = False + spec = ServiceSpec(service_type=service_type, + service_id=service_id, + placement=placement) + + candidates = HostAssignment( + spec=spec, + hosts=[HostSpec(h, labels=['foo']) for h in available_hosts], + unreachable_hosts=[], + draining_hosts=[], + daemons=[], + allow_colo=allow_colo, + ).get_candidates() + + candidates_hosts = [h.hostname for h in candidates] + assert candidates_hosts == expected_candidates + + +class NodeAssignmentTest2(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + daemons: List[DaemonDescription] + expected_len: int + in_set: List[str] + + +@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected_len,in_set", + [ # noqa: E128 + # just count + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [], + 1, + ['host1', 'host2', 'host3'], + ), + + # hosts + (smaller) count + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=1, hosts='host1 host2'.split()), + 'host1 host2'.split(), + [], + 1, + ['host1', 'host2'], + ), + # hosts + (smaller) count, existing + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=1, hosts='host1 host2 host3'.split()), + 'host1 host2 host3'.split(), + [DaemonDescription('mgr', 'mgr.a', 'host1')], + 1, + ['host1', 'host2', 'host3'], + ), + # hosts + (smaller) count, (more) existing + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=1, hosts='host1 host2 host3'.split()), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + ], + 1, + ['host1', 'host2'] + ), + # count + partial host list + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=2, hosts=['host3']), + 'host1 host2 host3'.split(), + [], + 1, + ['host1', 'host2', 'host3'] + ), + # label + count + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=1, label='foo'), + 'host1 host2 host3'.split(), + [], + 1, + ['host1', 'host2', 'host3'] + ), + ]) +def test_node_assignment2(service_type, placement, hosts, + daemons, expected_len, in_set): + hosts, to_add, to_remove = HostAssignment( + spec=ServiceSpec(service_type, placement=placement), + hosts=[HostSpec(h, labels=['foo']) for h in hosts], + unreachable_hosts=[], + draining_hosts=[], + daemons=daemons, + ).place() + assert len(hosts) == expected_len + for h in [h.hostname for h in hosts]: + assert h in in_set + + +@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected_len,must_have", + [ # noqa: E128 + # hosts + (smaller) count, (more) existing + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=3, hosts='host3'.split()), + 'host1 host2 host3'.split(), + [], + 1, + ['host3'] + ), + # count + partial host list + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=2, hosts=['host3']), + 'host1 host2 host3'.split(), + [], + 1, + ['host3'] + ), + ]) +def test_node_assignment3(service_type, placement, hosts, + daemons, expected_len, must_have): + hosts, to_add, to_remove = HostAssignment( + spec=ServiceSpec(service_type, placement=placement), + hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[], + draining_hosts=[], + daemons=daemons, + ).place() + assert len(hosts) == expected_len + for h in must_have: + assert h in [h.hostname for h in hosts] + + +class NodeAssignmentTest4(NamedTuple): + spec: ServiceSpec + networks: Dict[str, Dict[str, Dict[str, List[str]]]] + daemons: List[DaemonDescription] + expected: List[str] + expected_add: List[str] + expected_remove: List[DaemonDescription] + + +@pytest.mark.parametrize("spec,networks,daemons,expected,expected_add,expected_remove", + [ # noqa: E128 + NodeAssignmentTest4( + ServiceSpec( + service_type='rgw', + service_id='foo', + placement=PlacementSpec(count=6, label='foo'), + networks=['10.0.0.0/8'], + ), + { + 'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}}, + 'host2': {'10.0.0.0/8': {'eth0': ['10.0.0.2']}}, + 'host3': {'192.168.0.0/16': {'eth0': ['192.168.0.1']}}, + }, + [], + ['rgw:host1(10.0.0.1:80)', 'rgw:host2(10.0.0.2:80)', + 'rgw:host1(10.0.0.1:81)', 'rgw:host2(10.0.0.2:81)', + 'rgw:host1(10.0.0.1:82)', 'rgw:host2(10.0.0.2:82)'], + ['rgw:host1(10.0.0.1:80)', 'rgw:host2(10.0.0.2:80)', + 'rgw:host1(10.0.0.1:81)', 'rgw:host2(10.0.0.2:81)', + 'rgw:host1(10.0.0.1:82)', 'rgw:host2(10.0.0.2:82)'], + [] + ), + NodeAssignmentTest4( + IngressSpec( + service_type='ingress', + service_id='rgw.foo', + frontend_port=443, + monitor_port=8888, + virtual_ip='10.0.0.20/8', + backend_service='rgw.foo', + placement=PlacementSpec(label='foo'), + networks=['10.0.0.0/8'], + ), + { + 'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}}, + 'host2': {'10.0.0.0/8': {'eth1': ['10.0.0.2']}}, + 'host3': {'192.168.0.0/16': {'eth2': ['192.168.0.1']}}, + }, + [], + ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)', + 'keepalived:host1', 'keepalived:host2'], + ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)', + 'keepalived:host1', 'keepalived:host2'], + [] + ), + NodeAssignmentTest4( + IngressSpec( + service_type='ingress', + service_id='rgw.foo', + frontend_port=443, + monitor_port=8888, + virtual_ip='10.0.0.20/8', + backend_service='rgw.foo', + placement=PlacementSpec(label='foo'), + networks=['10.0.0.0/8'], + ), + { + 'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}}, + 'host2': {'10.0.0.0/8': {'eth1': ['10.0.0.2']}}, + 'host3': {'192.168.0.0/16': {'eth2': ['192.168.0.1']}}, + }, + [ + DaemonDescription('haproxy', 'a', 'host1', ip='10.0.0.1', + ports=[443, 8888]), + DaemonDescription('keepalived', 'b', 'host2'), + DaemonDescription('keepalived', 'c', 'host3'), + ], + ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)', + 'keepalived:host1', 'keepalived:host2'], + ['haproxy:host2(10.0.0.2:443,8888)', + 'keepalived:host1'], + ['keepalived.c'] + ), + ]) +def test_node_assignment4(spec, networks, daemons, + expected, expected_add, expected_remove): + all_slots, to_add, to_remove = HostAssignment( + spec=spec, + hosts=[HostSpec(h, labels=['foo']) for h in networks.keys()], + unreachable_hosts=[], + draining_hosts=[], + daemons=daemons, + allow_colo=True, + networks=networks, + primary_daemon_type='haproxy' if spec.service_type == 'ingress' else spec.service_type, + per_host_daemon_type='keepalived' if spec.service_type == 'ingress' else None, + ).place() + + got = [str(p) for p in all_slots] + num_wildcard = 0 + for i in expected: + if i == '*': + num_wildcard += 1 + else: + assert i in got + got.remove(i) + assert num_wildcard == len(got) + + got = [str(p) for p in to_add] + num_wildcard = 0 + for i in expected_add: + if i == '*': + num_wildcard += 1 + else: + assert i in got + got.remove(i) + assert num_wildcard == len(got) + + assert sorted([d.name() for d in to_remove]) == sorted(expected_remove) + + +@pytest.mark.parametrize("placement", + [ # noqa: E128 + ('1 *'), + ('* label:foo'), + ('* host1 host2'), + ('hostname12hostname12hostname12hostname12hostname12hostname12hostname12'), # > 63 chars + ]) +def test_bad_placements(placement): + try: + PlacementSpec.from_string(placement.split(' ')) + assert False + except SpecValidationError: + pass + + +class NodeAssignmentTestBadSpec(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + daemons: List[DaemonDescription] + expected: str + + +@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected", + [ # noqa: E128 + # unknown host + NodeAssignmentTestBadSpec( + 'mgr', + PlacementSpec(hosts=['unknownhost']), + ['knownhost'], + [], + "Cannot place <ServiceSpec for service_name=mgr> on unknownhost: Unknown hosts" + ), + # unknown host pattern + NodeAssignmentTestBadSpec( + 'mgr', + PlacementSpec(host_pattern='unknownhost'), + ['knownhost'], + [], + "Cannot place <ServiceSpec for service_name=mgr>: No matching hosts" + ), + # unknown label + NodeAssignmentTestBadSpec( + 'mgr', + PlacementSpec(label='unknownlabel'), + [], + [], + "Cannot place <ServiceSpec for service_name=mgr>: No matching hosts for label unknownlabel" + ), + ]) +def test_bad_specs(service_type, placement, hosts, daemons, expected): + with pytest.raises(OrchestratorValidationError) as e: + hosts, to_add, to_remove = HostAssignment( + spec=ServiceSpec(service_type, placement=placement), + hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[], + draining_hosts=[], + daemons=daemons, + ).place() + assert str(e.value) == expected + + +class ActiveAssignmentTest(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + daemons: List[DaemonDescription] + expected: List[List[str]] + expected_add: List[List[str]] + expected_remove: List[List[str]] + + +@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected,expected_add,expected_remove", + [ + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3'), + ], + [['host1', 'host2'], ['host1', 'host3']], + [[]], + [['mgr.b'], ['mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1', 'host3'], ['host2', 'host3']], + [[]], + [['mgr.a'], ['mgr.b']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2', is_active=True), + DaemonDescription('mgr', 'c', 'host3'), + ], + [['host2']], + [[]], + [['mgr.a', 'mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host3']], + [[]], + [['mgr.a', 'mgr.b']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1'], ['host3']], + [[]], + [['mgr.a', 'mgr.b'], ['mgr.b', 'mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2', is_active=True), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host2', 'host3']], + [[]], + [['mgr.a']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'b', 'host2', is_active=True), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1'], ['host2'], ['host3']], + [[]], + [['mgr.a', 'mgr.b'], ['mgr.b', 'mgr.c'], ['mgr.a', 'mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'a2', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3'), + ], + [['host1']], + [[]], + [['mgr.a2', 'mgr.b', 'mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'a2', 'host1', is_active=True), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3'), + ], + [['host1']], + [[]], + [['mgr.a', 'mgr.b', 'mgr.c'], ['mgr.a2', 'mgr.b', 'mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'a2', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1', 'host3']], + [[]], + [['mgr.a2', 'mgr.b']] + ), + # Explicit placement should override preference for active daemon + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1, hosts=['host1']), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1']], + [[]], + [['mgr.b', 'mgr.c']] + ), + + ]) +def test_active_assignment(service_type, placement, hosts, daemons, expected, expected_add, expected_remove): + + spec = ServiceSpec(service_type=service_type, + service_id=None, + placement=placement) + + hosts, to_add, to_remove = HostAssignment( + spec=spec, + hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[], + draining_hosts=[], + daemons=daemons, + ).place() + assert sorted([h.hostname for h in hosts]) in expected + assert sorted([h.hostname for h in to_add]) in expected_add + assert sorted([h.name() for h in to_remove]) in expected_remove + + +class UnreachableHostsTest(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + unreachables_hosts: List[str] + daemons: List[DaemonDescription] + expected_add: List[List[str]] + expected_remove: List[List[str]] + + +@pytest.mark.parametrize("service_type,placement,hosts,unreachable_hosts,daemons,expected_add,expected_remove", + [ + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + ['host2'], + [], + [['host1', 'host3']], + [[]], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(hosts=['host3']), + 'host1 host2 host3'.split(), + ['host1'], + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [[]], + [['mgr.b']], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3 host4'.split(), + ['host1'], + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [[]], + [[]], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3 host4'.split(), + 'host1 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [[]], + [['mgr.b']], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3 host4'.split(), + ['host2'], + [], + [['host1', 'host3', 'host4']], + [[]], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3 host4'.split(), + 'host1 host4'.split(), + [], + [['host2', 'host3']], + [[]], + ), + + ]) +def test_unreachable_host(service_type, placement, hosts, unreachable_hosts, daemons, expected_add, expected_remove): + + spec = ServiceSpec(service_type=service_type, + service_id=None, + placement=placement) + + hosts, to_add, to_remove = HostAssignment( + spec=spec, + hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[HostSpec(h) for h in unreachable_hosts], + draining_hosts=[], + daemons=daemons, + ).place() + assert sorted([h.hostname for h in to_add]) in expected_add + assert sorted([h.name() for h in to_remove]) in expected_remove + + +class RescheduleFromOfflineTest(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + maintenance_hosts: List[str] + offline_hosts: List[str] + daemons: List[DaemonDescription] + expected_add: List[List[str]] + expected_remove: List[List[str]] + + +@pytest.mark.parametrize("service_type,placement,hosts,maintenance_hosts,offline_hosts,daemons,expected_add,expected_remove", + [ + RescheduleFromOfflineTest( + 'nfs', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [], + ['host2'], + [ + DaemonDescription('nfs', 'a', 'host1'), + DaemonDescription('nfs', 'b', 'host2'), + ], + [['host3']], + [[]], + ), + RescheduleFromOfflineTest( + 'nfs', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + ['host2'], + [], + [ + DaemonDescription('nfs', 'a', 'host1'), + DaemonDescription('nfs', 'b', 'host2'), + ], + [[]], + [[]], + ), + RescheduleFromOfflineTest( + 'mon', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [], + ['host2'], + [ + DaemonDescription('mon', 'a', 'host1'), + DaemonDescription('mon', 'b', 'host2'), + ], + [[]], + [[]], + ), + RescheduleFromOfflineTest( + 'ingress', + PlacementSpec(count=1), + 'host1 host2'.split(), + [], + ['host2'], + [ + DaemonDescription('haproxy', 'b', 'host2'), + DaemonDescription('keepalived', 'b', 'host2'), + ], + [['host1']], + [[]], + ), + ]) +def test_remove_from_offline(service_type, placement, hosts, maintenance_hosts, offline_hosts, daemons, expected_add, expected_remove): + + if service_type == 'ingress': + spec = \ + IngressSpec( + service_type='ingress', + service_id='nfs-ha.foo', + frontend_port=443, + monitor_port=8888, + virtual_ip='10.0.0.20/8', + backend_service='nfs-ha.foo', + placement=placement, + ) + else: + spec = \ + ServiceSpec( + service_type=service_type, + service_id='test', + placement=placement, + ) + + host_specs = [HostSpec(h) for h in hosts] + for h in host_specs: + if h.hostname in offline_hosts: + h.status = 'offline' + if h.hostname in maintenance_hosts: + h.status = 'maintenance' + + hosts, to_add, to_remove = HostAssignment( + spec=spec, + hosts=host_specs, + unreachable_hosts=[h for h in host_specs if h.status], + draining_hosts=[], + daemons=daemons, + ).place() + assert sorted([h.hostname for h in to_add]) in expected_add + assert sorted([h.name() for h in to_remove]) in expected_remove + + +class DrainExplicitPlacementTest(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + maintenance_hosts: List[str] + offline_hosts: List[str] + draining_hosts: List[str] + daemons: List[DaemonDescription] + expected_add: List[List[str]] + expected_remove: List[List[str]] + + +@pytest.mark.parametrize("service_type,placement,hosts,maintenance_hosts,offline_hosts,draining_hosts,daemons,expected_add,expected_remove", + [ + DrainExplicitPlacementTest( + 'crash', + PlacementSpec(hosts='host1 host2 host3'.split()), + 'host1 host2 host3 host4'.split(), + [], + [], + ['host3'], + [ + DaemonDescription('crash', 'host1', 'host1'), + DaemonDescription('crash', 'host2', 'host2'), + DaemonDescription('crash', 'host3', 'host3'), + ], + [[]], + [['crash.host3']], + ), + DrainExplicitPlacementTest( + 'crash', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + [], + [], + ['host1', 'host4'], + [ + DaemonDescription('crash', 'host1', 'host1'), + DaemonDescription('crash', 'host3', 'host3'), + ], + [['host2']], + [['crash.host1']], + ), + ]) +def test_drain_from_explict_placement(service_type, placement, hosts, maintenance_hosts, offline_hosts, draining_hosts, daemons, expected_add, expected_remove): + + spec = ServiceSpec(service_type=service_type, + service_id='test', + placement=placement) + + host_specs = [HostSpec(h) for h in hosts] + draining_host_specs = [HostSpec(h) for h in draining_hosts] + for h in host_specs: + if h.hostname in offline_hosts: + h.status = 'offline' + if h.hostname in maintenance_hosts: + h.status = 'maintenance' + + hosts, to_add, to_remove = HostAssignment( + spec=spec, + hosts=host_specs, + unreachable_hosts=[h for h in host_specs if h.status], + draining_hosts=draining_host_specs, + daemons=daemons, + ).place() + assert sorted([h.hostname for h in to_add]) in expected_add + assert sorted([h.name() for h in to_remove]) in expected_remove diff --git a/src/pybind/mgr/cephadm/tests/test_service_discovery.py b/src/pybind/mgr/cephadm/tests/test_service_discovery.py new file mode 100644 index 000000000..ff98a1388 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_service_discovery.py @@ -0,0 +1,178 @@ +from unittest.mock import MagicMock +from cephadm.service_discovery import Root + + +class FakeDaemonDescription: + def __init__(self, ip, ports, hostname, service_name='', daemon_type=''): + self.ip = ip + self.ports = ports + self.hostname = hostname + self._service_name = service_name + self.daemon_type = daemon_type + + def service_name(self): + return self._service_name + + +class FakeCache: + def get_daemons_by_service(self, service_type): + if service_type == 'ceph-exporter': + return [FakeDaemonDescription('1.2.3.4', [9926], 'node0'), + FakeDaemonDescription('1.2.3.5', [9926], 'node1')] + + return [FakeDaemonDescription('1.2.3.4', [9100], 'node0'), + FakeDaemonDescription('1.2.3.5', [9200], 'node1')] + + def get_daemons_by_type(self, daemon_type): + return [FakeDaemonDescription('1.2.3.4', [9100], 'node0', 'ingress', 'haproxy'), + FakeDaemonDescription('1.2.3.5', [9200], 'node1', 'ingress', 'haproxy')] + + +class FakeInventory: + def get_addr(self, name: str): + return '1.2.3.4' + + +class FakeServiceSpec: + def __init__(self, port): + self.monitor_port = port + + +class FakeSpecDescription: + def __init__(self, port): + self.spec = FakeServiceSpec(port) + + +class FakeSpecStore(): + def __init__(self, mgr): + self.mgr = mgr + self._specs = {'ingress': FakeSpecDescription(9049)} + + def __contains__(self, name): + return name in self._specs + + def __getitem__(self, name): + return self._specs['ingress'] + + +class FakeMgr: + def __init__(self): + self.config = '' + self.check_mon_command = MagicMock(side_effect=self._check_mon_command) + self.mon_command = MagicMock(side_effect=self._check_mon_command) + self.template = MagicMock() + self.log = MagicMock() + self.inventory = FakeInventory() + self.cache = FakeCache() + self.spec_store = FakeSpecStore(self) + + def get_mgr_id(self): + return 'mgr-1' + + def list_servers(self): + + servers = [ + {'hostname': 'node0', + 'ceph_version': '16.2', + 'services': [{'type': 'mgr', 'id': 'mgr-1'}, {'type': 'mon'}]}, + {'hostname': 'node1', + 'ceph_version': '16.2', + 'services': [{'type': 'mgr', 'id': 'mgr-2'}, {'type': 'mon'}]} + ] + + return servers + + def _check_mon_command(self, cmd_dict, inbuf=None): + prefix = cmd_dict.get('prefix') + if prefix == 'get-cmd': + return 0, self.config, '' + if prefix == 'set-cmd': + self.config = cmd_dict.get('value') + return 0, 'value set', '' + return -1, '', 'error' + + def get_module_option_ex(self, module, option, default_value): + return "9283" + + +class TestServiceDiscovery: + + def test_get_sd_config_prometheus(self): + mgr = FakeMgr() + root = Root(mgr, 5000, '0.0.0.0') + cfg = root.get_sd_config('mgr-prometheus') + + # check response structure + assert cfg + for entry in cfg: + assert 'labels' in entry + assert 'targets' in entry + + # check content + assert cfg[0]['targets'] == ['node0:9283'] + + def test_get_sd_config_node_exporter(self): + mgr = FakeMgr() + root = Root(mgr, 5000, '0.0.0.0') + cfg = root.get_sd_config('node-exporter') + + # check response structure + assert cfg + for entry in cfg: + assert 'labels' in entry + assert 'targets' in entry + + # check content + assert cfg[0]['targets'] == ['1.2.3.4:9100'] + assert cfg[0]['labels'] == {'instance': 'node0'} + assert cfg[1]['targets'] == ['1.2.3.5:9200'] + assert cfg[1]['labels'] == {'instance': 'node1'} + + def test_get_sd_config_alertmgr(self): + mgr = FakeMgr() + root = Root(mgr, 5000, '0.0.0.0') + cfg = root.get_sd_config('alertmanager') + + # check response structure + assert cfg + for entry in cfg: + assert 'labels' in entry + assert 'targets' in entry + + # check content + assert cfg[0]['targets'] == ['1.2.3.4:9100', '1.2.3.5:9200'] + + def test_get_sd_config_haproxy(self): + mgr = FakeMgr() + root = Root(mgr, 5000, '0.0.0.0') + cfg = root.get_sd_config('haproxy') + + # check response structure + assert cfg + for entry in cfg: + assert 'labels' in entry + assert 'targets' in entry + + # check content + assert cfg[0]['targets'] == ['1.2.3.4:9049'] + assert cfg[0]['labels'] == {'instance': 'ingress'} + + def test_get_sd_config_ceph_exporter(self): + mgr = FakeMgr() + root = Root(mgr, 5000, '0.0.0.0') + cfg = root.get_sd_config('ceph-exporter') + + # check response structure + assert cfg + for entry in cfg: + assert 'labels' in entry + assert 'targets' in entry + + # check content + assert cfg[0]['targets'] == ['1.2.3.4:9926'] + + def test_get_sd_config_invalid_service(self): + mgr = FakeMgr() + root = Root(mgr, 5000, '0.0.0.0') + cfg = root.get_sd_config('invalid-service') + assert cfg == [] diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py new file mode 100644 index 000000000..2300b288d --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -0,0 +1,2725 @@ +from textwrap import dedent +import json +import urllib.parse +import yaml +from mgr_util import build_url + +import pytest + +from unittest.mock import MagicMock, call, patch, ANY + +from cephadm.serve import CephadmServe +from cephadm.services.cephadmservice import MonService, MgrService, MdsService, RgwService, \ + RbdMirrorService, CrashService, CephadmDaemonDeploySpec +from cephadm.services.iscsi import IscsiService +from cephadm.services.nfs import NFSService +from cephadm.services.nvmeof import NvmeofService +from cephadm.services.osd import OSDService +from cephadm.services.monitoring import GrafanaService, AlertmanagerService, PrometheusService, \ + NodeExporterService, LokiService, PromtailService +from cephadm.module import CephadmOrchestrator +from ceph.deployment.service_spec import IscsiServiceSpec, MonitoringSpec, AlertManagerSpec, \ + ServiceSpec, RGWSpec, GrafanaSpec, SNMPGatewaySpec, IngressSpec, PlacementSpec, TracingSpec, \ + PrometheusSpec, CephExporterSpec, NFSServiceSpec, NvmeofServiceSpec +from cephadm.tests.fixtures import with_host, with_service, _run_cephadm, async_side_effect + +from ceph.utils import datetime_now + +from orchestrator import OrchestratorError +from orchestrator._interface import DaemonDescription + +from typing import Dict, List + +grafana_cert = """-----BEGIN CERTIFICATE-----\nMIICxjCCAa4CEQDIZSujNBlKaLJzmvntjukjMA0GCSqGSIb3DQEBDQUAMCExDTAL\nBgNVBAoMBENlcGgxEDAOBgNVBAMMB2NlcGhhZG0wHhcNMjIwNzEzMTE0NzA3WhcN\nMzIwNzEwMTE0NzA3WjAhMQ0wCwYDVQQKDARDZXBoMRAwDgYDVQQDDAdjZXBoYWRt\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyyMe4DMA+MeYK7BHZMHB\nq7zjliEOcNgxomjU8qbf5USF7Mqrf6+/87XWqj4pCyAW8x0WXEr6A56a+cmBVmt+\nqtWDzl020aoId6lL5EgLLn6/kMDCCJLq++Lg9cEofMSvcZh+lY2f+1p+C+00xent\nrLXvXGOilAZWaQfojT2BpRnNWWIFbpFwlcKrlg2G0cFjV5c1m6a0wpsQ9JHOieq0\nSvwCixajwq3CwAYuuiU1wjI4oJO4Io1+g8yB3nH2Mo/25SApCxMXuXh4kHLQr/T4\n4hqisvG4uJYgKMcSIrWj5o25mclByGi1UI/kZkCUES94i7Z/3ihx4Bad0AMs/9tw\nFwIDAQABMA0GCSqGSIb3DQEBDQUAA4IBAQAf+pwz7Gd7mDwU2LY0TQXsK6/8KGzh\nHuX+ErOb8h5cOAbvCnHjyJFWf6gCITG98k9nxU9NToG0WYuNm/max1y/54f0dtxZ\npUo6KSNl3w6iYCfGOeUIj8isi06xMmeTgMNzv8DYhDt+P2igN6LenqWTVztogkiV\nxQ5ZJFFLEw4sN0CXnrZX3t5ruakxLXLTLKeE0I91YJvjClSBGkVJq26wOKQNHMhx\npWxeydQ5EgPZY+Aviz5Dnxe8aB7oSSovpXByzxURSabOuCK21awW5WJCGNpmqhWK\nZzACBDEstccj57c4OGV0eayHJRsluVr2e9NHRINZA3qdB37e6gsI1xHo\n-----END CERTIFICATE-----\n""" + +grafana_key = """-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDLIx7gMwD4x5gr\nsEdkwcGrvOOWIQ5w2DGiaNTypt/lRIXsyqt/r7/ztdaqPikLIBbzHRZcSvoDnpr5\nyYFWa36q1YPOXTbRqgh3qUvkSAsufr+QwMIIkur74uD1wSh8xK9xmH6VjZ/7Wn4L\n7TTF6e2ste9cY6KUBlZpB+iNPYGlGc1ZYgVukXCVwquWDYbRwWNXlzWbprTCmxD0\nkc6J6rRK/AKLFqPCrcLABi66JTXCMjigk7gijX6DzIHecfYyj/blICkLExe5eHiQ\nctCv9PjiGqKy8bi4liAoxxIitaPmjbmZyUHIaLVQj+RmQJQRL3iLtn/eKHHgFp3Q\nAyz/23AXAgMBAAECggEAVoTB3Mm8azlPlaQB9GcV3tiXslSn+uYJ1duCf0sV52dV\nBzKW8s5fGiTjpiTNhGCJhchowqxoaew+o47wmGc2TvqbpeRLuecKrjScD0GkCYyQ\neM2wlshEbz4FhIZdgS6gbuh9WaM1dW/oaZoBNR5aTYo7xYTmNNeyLA/jO2zr7+4W\n5yES1lMSBXpKk7bDGKYY4bsX2b5RLr2Grh2u2bp7hoLABCEvuu8tSQdWXLEXWpXo\njwmV3hc6tabypIa0mj2Dmn2Dmt1ppSO0AZWG/WAizN3f4Z0r/u9HnbVrVmh0IEDw\n3uf2LP5o3msG9qKCbzv3lMgt9mMr70HOKnJ8ohMSKQKBgQDLkNb+0nr152HU9AeJ\nvdz8BeMxcwxCG77iwZphZ1HprmYKvvXgedqWtS6FRU+nV6UuQoPUbQxJBQzrN1Qv\nwKSlOAPCrTJgNgF/RbfxZTrIgCPuK2KM8I89VZv92TSGi362oQA4MazXC8RAWjoJ\nSu1/PHzK3aXOfVNSLrOWvIYeZQKBgQD/dgT6RUXKg0UhmXj7ExevV+c7oOJTDlMl\nvLngrmbjRgPO9VxLnZQGdyaBJeRngU/UXfNgajT/MU8B5fSKInnTMawv/tW7634B\nw3v6n5kNIMIjJmENRsXBVMllDTkT9S7ApV+VoGnXRccbTiDapBThSGd0wri/CuwK\nNWK1YFOeywKBgEDyI/XG114PBUJ43NLQVWm+wx5qszWAPqV/2S5MVXD1qC6zgCSv\nG9NLWN1CIMimCNg6dm7Wn73IM7fzvhNCJgVkWqbItTLG6DFf3/DPODLx1wTMqLOI\nqFqMLqmNm9l1Nec0dKp5BsjRQzq4zp1aX21hsfrTPmwjxeqJZdioqy2VAoGAXR5X\nCCdSHlSlUW8RE2xNOOQw7KJjfWT+WAYoN0c7R+MQplL31rRU7dpm1bLLRBN11vJ8\nMYvlT5RYuVdqQSP6BkrX+hLJNBvOLbRlL+EXOBrVyVxHCkDe+u7+DnC4epbn+N8P\nLYpwqkDMKB7diPVAizIKTBxinXjMu5fkKDs5n+sCgYBbZheYKk5M0sIxiDfZuXGB\nkf4mJdEkTI1KUGRdCwO/O7hXbroGoUVJTwqBLi1tKqLLarwCITje2T200BYOzj82\nqwRkCXGtXPKnxYEEUOiFx9OeDrzsZV00cxsEnX0Zdj+PucQ/J3Cvd0dWUspJfLHJ\n39gnaegswnz9KMQAvzKFdg==\n-----END PRIVATE KEY-----\n""" + + +class FakeInventory: + def get_addr(self, name: str) -> str: + return '1.2.3.4' + + +class FakeMgr: + def __init__(self): + self.config = '' + self.set_mon_crush_locations: Dict[str, List[str]] = {} + self.check_mon_command = MagicMock(side_effect=self._check_mon_command) + self.mon_command = MagicMock(side_effect=self._check_mon_command) + self.template = MagicMock() + self.log = MagicMock() + self.inventory = FakeInventory() + + def _check_mon_command(self, cmd_dict, inbuf=None): + prefix = cmd_dict.get('prefix') + if prefix == 'get-cmd': + return 0, self.config, '' + if prefix == 'set-cmd': + self.config = cmd_dict.get('value') + return 0, 'value set', '' + if prefix in ['auth get']: + return 0, '[foo]\nkeyring = asdf\n', '' + if prefix == 'quorum_status': + # actual quorum status output from testing + # note in this output all of the mons have blank crush locations + return 0, """{"election_epoch": 14, "quorum": [0, 1, 2], "quorum_names": ["vm-00", "vm-01", "vm-02"], "quorum_leader_name": "vm-00", "quorum_age": 101, "features": {"quorum_con": "4540138322906710015", "quorum_mon": ["kraken", "luminous", "mimic", "osdmap-prune", "nautilus", "octopus", "pacific", "elector-pinging", "quincy", "reef"]}, "monmap": {"epoch": 3, "fsid": "9863e1b8-6f24-11ed-8ad8-525400c13ad2", "modified": "2022-11-28T14:00:29.972488Z", "created": "2022-11-28T13:57:55.847497Z", "min_mon_release": 18, "min_mon_release_name": "reef", "election_strategy": 1, "disallowed_leaders: ": "", "stretch_mode": false, "tiebreaker_mon": "", "features": {"persistent": ["kraken", "luminous", "mimic", "osdmap-prune", "nautilus", "octopus", "pacific", "elector-pinging", "quincy", "reef"], "optional": []}, "mons": [{"rank": 0, "name": "vm-00", "public_addrs": {"addrvec": [{"type": "v2", "addr": "192.168.122.61:3300", "nonce": 0}, {"type": "v1", "addr": "192.168.122.61:6789", "nonce": 0}]}, "addr": "192.168.122.61:6789/0", "public_addr": "192.168.122.61:6789/0", "priority": 0, "weight": 0, "crush_location": "{}"}, {"rank": 1, "name": "vm-01", "public_addrs": {"addrvec": [{"type": "v2", "addr": "192.168.122.63:3300", "nonce": 0}, {"type": "v1", "addr": "192.168.122.63:6789", "nonce": 0}]}, "addr": "192.168.122.63:6789/0", "public_addr": "192.168.122.63:6789/0", "priority": 0, "weight": 0, "crush_location": "{}"}, {"rank": 2, "name": "vm-02", "public_addrs": {"addrvec": [{"type": "v2", "addr": "192.168.122.82:3300", "nonce": 0}, {"type": "v1", "addr": "192.168.122.82:6789", "nonce": 0}]}, "addr": "192.168.122.82:6789/0", "public_addr": "192.168.122.82:6789/0", "priority": 0, "weight": 0, "crush_location": "{}"}]}}""", '' + if prefix == 'mon set_location': + self.set_mon_crush_locations[cmd_dict.get('name')] = cmd_dict.get('args') + return 0, '', '' + return -1, '', 'error' + + def get_minimal_ceph_conf(self) -> str: + return '' + + def get_mgr_ip(self) -> str: + return '1.2.3.4' + + +class TestCephadmService: + def test_set_service_url_on_dashboard(self): + # pylint: disable=protected-access + mgr = FakeMgr() + service_url = 'http://svc:1000' + service = GrafanaService(mgr) + service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url) + assert mgr.config == service_url + + # set-cmd should not be called if value doesn't change + mgr.check_mon_command.reset_mock() + service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url) + mgr.check_mon_command.assert_called_once_with({'prefix': 'get-cmd'}) + + def _get_services(self, mgr): + # services: + osd_service = OSDService(mgr) + nfs_service = NFSService(mgr) + mon_service = MonService(mgr) + mgr_service = MgrService(mgr) + mds_service = MdsService(mgr) + rgw_service = RgwService(mgr) + rbd_mirror_service = RbdMirrorService(mgr) + grafana_service = GrafanaService(mgr) + alertmanager_service = AlertmanagerService(mgr) + prometheus_service = PrometheusService(mgr) + node_exporter_service = NodeExporterService(mgr) + loki_service = LokiService(mgr) + promtail_service = PromtailService(mgr) + crash_service = CrashService(mgr) + iscsi_service = IscsiService(mgr) + nvmeof_service = NvmeofService(mgr) + cephadm_services = { + 'mon': mon_service, + 'mgr': mgr_service, + 'osd': osd_service, + 'mds': mds_service, + 'rgw': rgw_service, + 'rbd-mirror': rbd_mirror_service, + 'nfs': nfs_service, + 'grafana': grafana_service, + 'alertmanager': alertmanager_service, + 'prometheus': prometheus_service, + 'node-exporter': node_exporter_service, + 'loki': loki_service, + 'promtail': promtail_service, + 'crash': crash_service, + 'iscsi': iscsi_service, + 'nvmeof': nvmeof_service, + } + return cephadm_services + + def test_get_auth_entity(self): + mgr = FakeMgr() + cephadm_services = self._get_services(mgr) + + for daemon_type in ['rgw', 'rbd-mirror', 'nfs', "iscsi"]: + assert "client.%s.id1" % (daemon_type) == \ + cephadm_services[daemon_type].get_auth_entity("id1", "host") + assert "client.%s.id1" % (daemon_type) == \ + cephadm_services[daemon_type].get_auth_entity("id1", "") + assert "client.%s.id1" % (daemon_type) == \ + cephadm_services[daemon_type].get_auth_entity("id1") + + assert "client.crash.host" == \ + cephadm_services["crash"].get_auth_entity("id1", "host") + with pytest.raises(OrchestratorError): + cephadm_services["crash"].get_auth_entity("id1", "") + cephadm_services["crash"].get_auth_entity("id1") + + assert "mon." == cephadm_services["mon"].get_auth_entity("id1", "host") + assert "mon." == cephadm_services["mon"].get_auth_entity("id1", "") + assert "mon." == cephadm_services["mon"].get_auth_entity("id1") + + assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1", "host") + assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1", "") + assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1") + + for daemon_type in ["osd", "mds"]: + assert "%s.id1" % daemon_type == \ + cephadm_services[daemon_type].get_auth_entity("id1", "host") + assert "%s.id1" % daemon_type == \ + cephadm_services[daemon_type].get_auth_entity("id1", "") + assert "%s.id1" % daemon_type == \ + cephadm_services[daemon_type].get_auth_entity("id1") + + # services based on CephadmService shouldn't have get_auth_entity + with pytest.raises(AttributeError): + for daemon_type in ['grafana', 'alertmanager', 'prometheus', 'node-exporter', 'loki', 'promtail']: + cephadm_services[daemon_type].get_auth_entity("id1", "host") + cephadm_services[daemon_type].get_auth_entity("id1", "") + cephadm_services[daemon_type].get_auth_entity("id1") + + +class TestISCSIService: + + mgr = FakeMgr() + iscsi_service = IscsiService(mgr) + + iscsi_spec = IscsiServiceSpec(service_type='iscsi', service_id="a") + iscsi_spec.daemon_type = "iscsi" + iscsi_spec.daemon_id = "a" + iscsi_spec.spec = MagicMock() + iscsi_spec.spec.daemon_type = "iscsi" + iscsi_spec.spec.ssl_cert = '' + iscsi_spec.api_user = "user" + iscsi_spec.api_password = "password" + iscsi_spec.api_port = 5000 + iscsi_spec.api_secure = False + iscsi_spec.ssl_cert = "cert" + iscsi_spec.ssl_key = "key" + + mgr.spec_store = MagicMock() + mgr.spec_store.all_specs.get.return_value = iscsi_spec + + def test_iscsi_client_caps(self): + + iscsi_daemon_spec = CephadmDaemonDeploySpec( + host='host', daemon_id='a', service_name=self.iscsi_spec.service_name()) + + self.iscsi_service.prepare_create(iscsi_daemon_spec) + + expected_caps = ['mon', + 'profile rbd, allow command "osd blocklist", allow command "config-key get" with "key" prefix "iscsi/"', + 'mgr', 'allow command "service status"', + 'osd', 'allow rwx'] + + expected_call = call({'prefix': 'auth get-or-create', + 'entity': 'client.iscsi.a', + 'caps': expected_caps}) + expected_call2 = call({'prefix': 'auth caps', + 'entity': 'client.iscsi.a', + 'caps': expected_caps}) + expected_call3 = call({'prefix': 'auth get', + 'entity': 'client.iscsi.a'}) + + assert expected_call in self.mgr.mon_command.mock_calls + assert expected_call2 in self.mgr.mon_command.mock_calls + assert expected_call3 in self.mgr.mon_command.mock_calls + + @patch('cephadm.utils.resolve_ip') + def test_iscsi_dashboard_config(self, mock_resolve_ip): + + self.mgr.check_mon_command = MagicMock() + self.mgr.check_mon_command.return_value = ('', '{"gateways": {}}', '') + + # Case 1: use IPV4 address + id1 = DaemonDescription(daemon_type='iscsi', hostname="testhost1", + daemon_id="a", ip='192.168.1.1') + daemon_list = [id1] + mock_resolve_ip.return_value = '192.168.1.1' + + self.iscsi_service.config_dashboard(daemon_list) + + dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add', + 'name': 'testhost1'}, + 'http://user:password@192.168.1.1:5000') + + assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls + + # Case 2: use IPV6 address + self.mgr.check_mon_command.reset_mock() + + id1 = DaemonDescription(daemon_type='iscsi', hostname="testhost1", + daemon_id="a", ip='FEDC:BA98:7654:3210:FEDC:BA98:7654:3210') + mock_resolve_ip.return_value = 'FEDC:BA98:7654:3210:FEDC:BA98:7654:3210' + + self.iscsi_service.config_dashboard(daemon_list) + + dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add', + 'name': 'testhost1'}, + 'http://user:password@[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:5000') + + assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls + + # Case 3: IPV6 Address . Secure protocol + self.mgr.check_mon_command.reset_mock() + + self.iscsi_spec.api_secure = True + + self.iscsi_service.config_dashboard(daemon_list) + + dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add', + 'name': 'testhost1'}, + 'https://user:password@[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:5000') + + assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("cephadm.module.CephadmOrchestrator.get_unique_name") + @patch("cephadm.services.iscsi.IscsiService.get_trusted_ips") + def test_iscsi_config(self, _get_trusted_ips, _get_name, _run_cephadm, cephadm_module: CephadmOrchestrator): + + iscsi_daemon_id = 'testpool.test.qwert' + trusted_ips = '1.1.1.1,2.2.2.2' + api_port = 3456 + api_user = 'test-user' + api_password = 'test-password' + pool = 'testpool' + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + _get_name.return_value = iscsi_daemon_id + _get_trusted_ips.return_value = trusted_ips + + iscsi_gateway_conf = f"""# This file is generated by cephadm. +[config] +cluster_client_name = client.iscsi.{iscsi_daemon_id} +pool = {pool} +trusted_ip_list = {trusted_ips} +minimum_gateways = 1 +api_port = {api_port} +api_user = {api_user} +api_password = {api_password} +api_secure = False +log_to_stderr = True +log_to_stderr_prefix = debug +log_to_file = False""" + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, IscsiServiceSpec(service_id=pool, + api_port=api_port, + api_user=api_user, + api_password=api_password, + pool=pool, + trusted_ip_list=trusted_ips)): + _run_cephadm.assert_called_with( + 'test', + f'iscsi.{iscsi_daemon_id}', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": f'iscsi.{iscsi_daemon_id}', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [api_port], + }, + "meta": { + 'service_name': f'iscsi.{pool}', + 'ports': [api_port], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "config": "", + "keyring": f"[client.iscsi.{iscsi_daemon_id}]\nkey = None\n", + "files": { + "iscsi-gateway.cfg": iscsi_gateway_conf, + }, + } + }), + ) + + +class TestNVMEOFService: + + mgr = FakeMgr() + nvmeof_service = NvmeofService(mgr) + + nvmeof_spec = NvmeofServiceSpec(service_type='nvmeof', service_id="a") + nvmeof_spec.daemon_type = 'nvmeof' + nvmeof_spec.daemon_id = "a" + nvmeof_spec.spec = MagicMock() + nvmeof_spec.spec.daemon_type = 'nvmeof' + + mgr.spec_store = MagicMock() + mgr.spec_store.all_specs.get.return_value = nvmeof_spec + + def test_nvmeof_client_caps(self): + pass + + @patch('cephadm.utils.resolve_ip') + def test_nvmeof_dashboard_config(self, mock_resolve_ip): + pass + + @patch("cephadm.inventory.Inventory.get_addr", lambda _, __: '192.168.100.100') + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("cephadm.module.CephadmOrchestrator.get_unique_name") + def test_nvmeof_config(self, _get_name, _run_cephadm, cephadm_module: CephadmOrchestrator): + + nvmeof_daemon_id = 'testpool.test.qwert' + pool = 'testpool' + tgt_cmd_extra_args = '--cpumask=0xFF --msg-mempool-size=524288' + default_port = 5500 + group = 'mygroup' + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + _get_name.return_value = nvmeof_daemon_id + + nvmeof_gateway_conf = f"""# This file is generated by cephadm. +[gateway] +name = client.nvmeof.{nvmeof_daemon_id} +group = {group} +addr = 192.168.100.100 +port = {default_port} +enable_auth = False +state_update_notify = True +state_update_interval_sec = 5 + +[ceph] +pool = {pool} +config_file = /etc/ceph/ceph.conf +id = nvmeof.{nvmeof_daemon_id} + +[mtls] +server_key = ./server.key +client_key = ./client.key +server_cert = ./server.crt +client_cert = ./client.crt + +[spdk] +tgt_path = /usr/local/bin/nvmf_tgt +rpc_socket = /var/tmp/spdk.sock +timeout = 60 +log_level = WARN +conn_retries = 10 +transports = tcp +transport_tcp_options = {{"in_capsule_data_size": 8192, "max_io_qpairs_per_ctrlr": 7}} +tgt_cmd_extra_args = {tgt_cmd_extra_args}\n""" + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, NvmeofServiceSpec(service_id=pool, + tgt_cmd_extra_args=tgt_cmd_extra_args, + group=group, + pool=pool)): + _run_cephadm.assert_called_with( + 'test', + f'nvmeof.{nvmeof_daemon_id}', + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": "nvmeof.testpool.test.qwert", + "image": "", + "deploy_arguments": [], + "params": { + "tcp_ports": [5500, 4420, 8009] + }, + "meta": { + "service_name": "nvmeof.testpool", + "ports": [5500, 4420, 8009], + "ip": None, + "deployed_by": [], + "rank": None, + "rank_generation": None, + "extra_container_args": None, + "extra_entrypoint_args": None + }, + "config_blobs": { + "config": "", + "keyring": "[client.nvmeof.testpool.test.qwert]\nkey = None\n", + "files": { + "ceph-nvmeof.conf": nvmeof_gateway_conf + } + } + }), + ) + + +class TestMonitoring: + def _get_config(self, url: str) -> str: + + return f""" + # This file is generated by cephadm. + # See https://prometheus.io/docs/alerting/configuration/ for documentation. + + global: + resolve_timeout: 5m + http_config: + tls_config: + insecure_skip_verify: true + + route: + receiver: 'default' + routes: + - group_by: ['alertname'] + group_wait: 10s + group_interval: 10s + repeat_interval: 1h + receiver: 'ceph-dashboard' + + receivers: + - name: 'default' + webhook_configs: + - name: 'ceph-dashboard' + webhook_configs: + - url: '{url}/api/prometheus_receiver' + """ + + @pytest.mark.parametrize( + "dashboard_url,expected_yaml_url", + [ + # loopback address + ("http://[::1]:8080", "http://localhost:8080"), + # IPv6 + ( + "http://[2001:db8:4321:0000:0000:0000:0000:0000]:8080", + "http://[2001:db8:4321:0000:0000:0000:0000:0000]:8080", + ), + # IPv6 to FQDN + ( + "http://[2001:db8:4321:0000:0000:0000:0000:0000]:8080", + "http://mgr.fqdn.test:8080", + ), + # IPv4 + ( + "http://192.168.0.123:8080", + "http://192.168.0.123:8080", + ), + # IPv4 to FQDN + ( + "http://192.168.0.123:8080", + "http://mgr.fqdn.test:8080", + ), + ], + ) + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("mgr_module.MgrModule.get") + @patch("socket.getfqdn") + def test_alertmanager_config( + self, + mock_getfqdn, + mock_get, + _run_cephadm, + cephadm_module: CephadmOrchestrator, + dashboard_url, + expected_yaml_url, + ): + _run_cephadm.side_effect = async_side_effect(("{}", "", 0)) + mock_get.return_value = {"services": {"dashboard": dashboard_url}} + purl = urllib.parse.urlparse(expected_yaml_url) + mock_getfqdn.return_value = purl.hostname + + with with_host(cephadm_module, "test"): + with with_service(cephadm_module, AlertManagerSpec()): + y = dedent(self._get_config(expected_yaml_url)).lstrip() + _run_cephadm.assert_called_with( + 'test', + "alertmanager.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'alertmanager.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9093, 9094], + }, + "meta": { + 'service_name': 'alertmanager', + 'ports': [9093, 9094], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "files": { + "alertmanager.yml": y, + }, + "peers": [], + } + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("socket.getfqdn") + @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1') + @patch("cephadm.services.monitoring.password_hash", lambda password: 'alertmanager_password_hash') + def test_alertmanager_config_security_enabled(self, _get_fqdn, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + fqdn = 'host1.test' + _get_fqdn.return_value = fqdn + + def gen_cert(host, addr): + return ('mycert', 'mykey') + + def get_root_cert(): + return 'my_root_cert' + + with with_host(cephadm_module, 'test'): + cephadm_module.secure_monitoring_stack = True + cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user') + cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password') + cephadm_module.http_server.service_discovery.ssl_certs.generate_cert = MagicMock(side_effect=gen_cert) + cephadm_module.http_server.service_discovery.ssl_certs.get_root_cert = MagicMock(side_effect=get_root_cert) + with with_service(cephadm_module, AlertManagerSpec()): + + y = dedent(f""" + # This file is generated by cephadm. + # See https://prometheus.io/docs/alerting/configuration/ for documentation. + + global: + resolve_timeout: 5m + http_config: + tls_config: + ca_file: root_cert.pem + + route: + receiver: 'default' + routes: + - group_by: ['alertname'] + group_wait: 10s + group_interval: 10s + repeat_interval: 1h + receiver: 'ceph-dashboard' + + receivers: + - name: 'default' + webhook_configs: + - name: 'ceph-dashboard' + webhook_configs: + - url: 'http://{fqdn}:8080/api/prometheus_receiver' + """).lstrip() + + web_config = dedent(""" + tls_server_config: + cert_file: alertmanager.crt + key_file: alertmanager.key + basic_auth_users: + alertmanager_user: alertmanager_password_hash""").lstrip() + + _run_cephadm.assert_called_with( + 'test', + "alertmanager.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'alertmanager.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9093, 9094], + }, + "meta": { + 'service_name': 'alertmanager', + 'ports': [9093, 9094], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "files": { + "alertmanager.yml": y, + 'alertmanager.crt': 'mycert', + 'alertmanager.key': 'mykey', + 'web.yml': web_config, + 'root_cert.pem': 'my_root_cert' + }, + 'peers': [], + 'web_config': '/etc/alertmanager/web.yml', + } + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1') + def test_prometheus_config_security_disabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast') + with with_host(cephadm_module, 'test'): + # host "test" needs to have networks for keepalive to be placed + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + }, + }) + with with_service(cephadm_module, MonitoringSpec('node-exporter')) as _, \ + with_service(cephadm_module, CephExporterSpec('ceph-exporter')) as _, \ + with_service(cephadm_module, s) as _, \ + with_service(cephadm_module, AlertManagerSpec('alertmanager')) as _, \ + with_service(cephadm_module, IngressSpec(service_id='ingress', + frontend_port=8089, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_ip="1.2.3.4/32", + backend_service='rgw.foo')) as _, \ + with_service(cephadm_module, PrometheusSpec('prometheus')) as _: + + y = dedent(""" + # This file is generated by cephadm. + global: + scrape_interval: 10s + evaluation_interval: 10s + rule_files: + - /etc/prometheus/alerting/* + + alerting: + alertmanagers: + - scheme: http + http_sd_configs: + - url: http://[::1]:8765/sd/prometheus/sd-config?service=alertmanager + + scrape_configs: + - job_name: 'ceph' + honor_labels: true + http_sd_configs: + - url: http://[::1]:8765/sd/prometheus/sd-config?service=mgr-prometheus + + - job_name: 'node' + http_sd_configs: + - url: http://[::1]:8765/sd/prometheus/sd-config?service=node-exporter + + - job_name: 'haproxy' + http_sd_configs: + - url: http://[::1]:8765/sd/prometheus/sd-config?service=haproxy + + - job_name: 'ceph-exporter' + honor_labels: true + http_sd_configs: + - url: http://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter + """).lstrip() + + _run_cephadm.assert_called_with( + 'test', + "prometheus.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'prometheus.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9095], + }, + "meta": { + 'service_name': 'prometheus', + 'ports': [9095], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "files": { + "prometheus.yml": y, + "/etc/prometheus/alerting/custom_alerts.yml": "", + }, + 'retention_time': '15d', + 'retention_size': '0', + }, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1') + @patch("cephadm.services.monitoring.password_hash", lambda password: 'prometheus_password_hash') + def test_prometheus_config_security_enabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast') + + def gen_cert(host, addr): + return ('mycert', 'mykey') + + with with_host(cephadm_module, 'test'): + cephadm_module.secure_monitoring_stack = True + cephadm_module.set_store(PrometheusService.USER_CFG_KEY, 'prometheus_user') + cephadm_module.set_store(PrometheusService.PASS_CFG_KEY, 'prometheus_plain_password') + cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user') + cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password') + cephadm_module.http_server.service_discovery.username = 'sd_user' + cephadm_module.http_server.service_discovery.password = 'sd_password' + cephadm_module.http_server.service_discovery.ssl_certs.generate_cert = MagicMock( + side_effect=gen_cert) + # host "test" needs to have networks for keepalive to be placed + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + }, + }) + with with_service(cephadm_module, MonitoringSpec('node-exporter')) as _, \ + with_service(cephadm_module, s) as _, \ + with_service(cephadm_module, AlertManagerSpec('alertmanager')) as _, \ + with_service(cephadm_module, IngressSpec(service_id='ingress', + frontend_port=8089, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_ip="1.2.3.4/32", + backend_service='rgw.foo')) as _, \ + with_service(cephadm_module, PrometheusSpec('prometheus')) as _: + + web_config = dedent(""" + tls_server_config: + cert_file: prometheus.crt + key_file: prometheus.key + basic_auth_users: + prometheus_user: prometheus_password_hash""").lstrip() + + y = dedent(""" + # This file is generated by cephadm. + global: + scrape_interval: 10s + evaluation_interval: 10s + rule_files: + - /etc/prometheus/alerting/* + + alerting: + alertmanagers: + - scheme: https + basic_auth: + username: alertmanager_user + password: alertmanager_plain_password + tls_config: + ca_file: root_cert.pem + http_sd_configs: + - url: https://[::1]:8765/sd/prometheus/sd-config?service=alertmanager + basic_auth: + username: sd_user + password: sd_password + tls_config: + ca_file: root_cert.pem + + scrape_configs: + - job_name: 'ceph' + scheme: https + tls_config: + ca_file: mgr_prometheus_cert.pem + honor_labels: true + http_sd_configs: + - url: https://[::1]:8765/sd/prometheus/sd-config?service=mgr-prometheus + basic_auth: + username: sd_user + password: sd_password + tls_config: + ca_file: root_cert.pem + + - job_name: 'node' + scheme: https + tls_config: + ca_file: root_cert.pem + http_sd_configs: + - url: https://[::1]:8765/sd/prometheus/sd-config?service=node-exporter + basic_auth: + username: sd_user + password: sd_password + tls_config: + ca_file: root_cert.pem + + - job_name: 'haproxy' + scheme: https + tls_config: + ca_file: root_cert.pem + http_sd_configs: + - url: https://[::1]:8765/sd/prometheus/sd-config?service=haproxy + basic_auth: + username: sd_user + password: sd_password + tls_config: + ca_file: root_cert.pem + + - job_name: 'ceph-exporter' + honor_labels: true + scheme: https + tls_config: + ca_file: root_cert.pem + http_sd_configs: + - url: https://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter + basic_auth: + username: sd_user + password: sd_password + tls_config: + ca_file: root_cert.pem + """).lstrip() + + _run_cephadm.assert_called_with( + 'test', + "prometheus.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'prometheus.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9095], + }, + "meta": { + 'service_name': 'prometheus', + 'ports': [9095], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + 'files': { + 'prometheus.yml': y, + 'root_cert.pem': '', + 'mgr_prometheus_cert.pem': '', + 'web.yml': web_config, + 'prometheus.crt': 'mycert', + 'prometheus.key': 'mykey', + "/etc/prometheus/alerting/custom_alerts.yml": "", + }, + 'retention_time': '15d', + 'retention_size': '0', + 'web_config': '/etc/prometheus/web.yml', + }, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_loki_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, MonitoringSpec('loki')) as _: + + y = dedent(""" + # This file is generated by cephadm. + auth_enabled: false + + server: + http_listen_port: 3100 + grpc_listen_port: 8080 + + common: + path_prefix: /tmp/loki + storage: + filesystem: + chunks_directory: /tmp/loki/chunks + rules_directory: /tmp/loki/rules + replication_factor: 1 + ring: + instance_addr: 127.0.0.1 + kvstore: + store: inmemory + + schema_config: + configs: + - from: 2020-10-24 + store: boltdb-shipper + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h""").lstrip() + + _run_cephadm.assert_called_with( + 'test', + "loki.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'loki.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [3100], + }, + "meta": { + 'service_name': 'loki', + 'ports': [3100], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "files": { + "loki.yml": y + }, + }, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_promtail_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec('mgr')) as _, \ + with_service(cephadm_module, MonitoringSpec('promtail')) as _: + + y = dedent(""" + # This file is generated by cephadm. + server: + http_listen_port: 9080 + grpc_listen_port: 0 + + positions: + filename: /tmp/positions.yaml + + clients: + - url: http://:3100/loki/api/v1/push + + scrape_configs: + - job_name: system + static_configs: + - labels: + job: Cluster Logs + __path__: /var/log/ceph/**/*.log""").lstrip() + + _run_cephadm.assert_called_with( + 'test', + "promtail.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'promtail.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9080], + }, + "meta": { + 'service_name': 'promtail', + 'ports': [9080], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "files": { + "promtail.yml": y + }, + }, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4') + @patch("cephadm.services.monitoring.verify_tls", lambda *_: None) + def test_grafana_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(("{}", "", 0)) + + with with_host(cephadm_module, "test"): + cephadm_module.set_store("test/grafana_crt", grafana_cert) + cephadm_module.set_store("test/grafana_key", grafana_key) + with with_service( + cephadm_module, PrometheusSpec("prometheus") + ) as _, with_service(cephadm_module, ServiceSpec("mgr")) as _, with_service( + cephadm_module, GrafanaSpec("grafana") + ) as _: + files = { + 'grafana.ini': dedent(""" + # This file is generated by cephadm. + [users] + default_theme = light + [auth.anonymous] + enabled = true + org_name = 'Main Org.' + org_role = 'Viewer' + [server] + domain = 'bootstrap.storage.lab' + protocol = https + cert_file = /etc/grafana/certs/cert_file + cert_key = /etc/grafana/certs/cert_key + http_port = 3000 + http_addr = + [snapshots] + external_enabled = false + [security] + disable_initial_admin_creation = true + cookie_secure = true + cookie_samesite = none + allow_embedding = true""").lstrip(), # noqa: W291 + 'provisioning/datasources/ceph-dashboard.yml': dedent(""" + # This file is generated by cephadm. + apiVersion: 1 + + deleteDatasources: + - name: 'Dashboard1' + orgId: 1 + + datasources: + - name: 'Dashboard1' + type: 'prometheus' + access: 'proxy' + orgId: 1 + url: 'http://[1::4]:9095' + basicAuth: false + isDefault: true + editable: false + + - name: 'Loki' + type: 'loki' + access: 'proxy' + url: '' + basicAuth: false + isDefault: false + editable: false""").lstrip(), + 'certs/cert_file': dedent(f""" + # generated by cephadm\n{grafana_cert}""").lstrip(), + 'certs/cert_key': dedent(f""" + # generated by cephadm\n{grafana_key}""").lstrip(), + } + + _run_cephadm.assert_called_with( + 'test', + "grafana.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'grafana.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [3000], + }, + "meta": { + 'service_name': 'grafana', + 'ports': [3000], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": { + "files": files, + }, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_grafana_initial_admin_pw(self, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec('mgr')) as _, \ + with_service(cephadm_module, GrafanaSpec(initial_admin_password='secure')): + out = cephadm_module.cephadm_services['grafana'].generate_config( + CephadmDaemonDeploySpec('test', 'daemon', 'grafana')) + assert out == ( + { + 'files': + { + 'grafana.ini': + '# This file is generated by cephadm.\n' + '[users]\n' + ' default_theme = light\n' + '[auth.anonymous]\n' + ' enabled = true\n' + " org_name = 'Main Org.'\n" + " org_role = 'Viewer'\n" + '[server]\n' + " domain = 'bootstrap.storage.lab'\n" + ' protocol = https\n' + ' cert_file = /etc/grafana/certs/cert_file\n' + ' cert_key = /etc/grafana/certs/cert_key\n' + ' http_port = 3000\n' + ' http_addr = \n' + '[snapshots]\n' + ' external_enabled = false\n' + '[security]\n' + ' admin_user = admin\n' + ' admin_password = secure\n' + ' cookie_secure = true\n' + ' cookie_samesite = none\n' + ' allow_embedding = true', + 'provisioning/datasources/ceph-dashboard.yml': + "# This file is generated by cephadm.\n" + "apiVersion: 1\n\n" + 'deleteDatasources:\n\n' + 'datasources:\n\n' + " - name: 'Loki'\n" + " type: 'loki'\n" + " access: 'proxy'\n" + " url: ''\n" + ' basicAuth: false\n' + ' isDefault: false\n' + ' editable: false', + 'certs/cert_file': ANY, + 'certs/cert_key': ANY}}, ['secure_monitoring_stack:False']) + + @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_grafana_no_anon_access(self, cephadm_module: CephadmOrchestrator): + # with anonymous_access set to False, expecting the [auth.anonymous] section + # to not be present in the grafana config. Note that we require an initial_admin_password + # to be provided when anonymous_access is False + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec('mgr')) as _, \ + with_service(cephadm_module, GrafanaSpec(anonymous_access=False, initial_admin_password='secure')): + out = cephadm_module.cephadm_services['grafana'].generate_config( + CephadmDaemonDeploySpec('test', 'daemon', 'grafana')) + assert out == ( + { + 'files': + { + 'grafana.ini': + '# This file is generated by cephadm.\n' + '[users]\n' + ' default_theme = light\n' + '[server]\n' + " domain = 'bootstrap.storage.lab'\n" + ' protocol = https\n' + ' cert_file = /etc/grafana/certs/cert_file\n' + ' cert_key = /etc/grafana/certs/cert_key\n' + ' http_port = 3000\n' + ' http_addr = \n' + '[snapshots]\n' + ' external_enabled = false\n' + '[security]\n' + ' admin_user = admin\n' + ' admin_password = secure\n' + ' cookie_secure = true\n' + ' cookie_samesite = none\n' + ' allow_embedding = true', + 'provisioning/datasources/ceph-dashboard.yml': + "# This file is generated by cephadm.\n" + "apiVersion: 1\n\n" + 'deleteDatasources:\n\n' + 'datasources:\n\n' + " - name: 'Loki'\n" + " type: 'loki'\n" + " access: 'proxy'\n" + " url: ''\n" + ' basicAuth: false\n' + ' isDefault: false\n' + ' editable: false', + 'certs/cert_file': ANY, + 'certs/cert_key': ANY}}, ['secure_monitoring_stack:False']) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_monitoring_ports(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test'): + + yaml_str = """service_type: alertmanager +service_name: alertmanager +placement: + count: 1 +spec: + port: 4200 +""" + yaml_file = yaml.safe_load(yaml_str) + spec = ServiceSpec.from_json(yaml_file) + + with patch("cephadm.services.monitoring.AlertmanagerService.generate_config", return_value=({}, [])): + with with_service(cephadm_module, spec): + + CephadmServe(cephadm_module)._check_daemons() + + _run_cephadm.assert_called_with( + 'test', + "alertmanager.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'alertmanager.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [4200, 9094], + 'reconfig': True, + }, + "meta": { + 'service_name': 'alertmanager', + 'ports': [4200, 9094], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": {}, + }), + ) + + +class TestRGWService: + + @pytest.mark.parametrize( + "frontend, ssl, extra_args, expected", + [ + ('beast', False, ['tcp_nodelay=1'], + 'beast endpoint=[fd00:fd00:fd00:3000::1]:80 tcp_nodelay=1'), + ('beast', True, ['tcp_nodelay=0', 'max_header_size=65536'], + 'beast ssl_endpoint=[fd00:fd00:fd00:3000::1]:443 ssl_certificate=config://rgw/cert/rgw.foo tcp_nodelay=0 max_header_size=65536'), + ('civetweb', False, [], 'civetweb port=[fd00:fd00:fd00:3000::1]:80'), + ('civetweb', True, None, + 'civetweb port=[fd00:fd00:fd00:3000::1]:443s ssl_certificate=config://rgw/cert/rgw.foo'), + ] + ) + @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_rgw_update(self, frontend, ssl, extra_args, expected, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.cache.update_host_networks('host1', { + 'fd00:fd00:fd00:3000::/64': { + 'if0': ['fd00:fd00:fd00:3000::1'] + } + }) + s = RGWSpec(service_id="foo", + networks=['fd00:fd00:fd00:3000::/64'], + ssl=ssl, + rgw_frontend_type=frontend, + rgw_frontend_extra_args=extra_args) + with with_service(cephadm_module, s) as dds: + _, f, _ = cephadm_module.check_mon_command({ + 'prefix': 'config get', + 'who': f'client.{dds[0]}', + 'key': 'rgw_frontends', + }) + assert f == expected + + +class TestMonService: + + def test_set_crush_locations(self, cephadm_module: CephadmOrchestrator): + mgr = FakeMgr() + mon_service = MonService(mgr) + mon_spec = ServiceSpec(service_type='mon', crush_locations={'vm-00': ['datacenter=a', 'rack=1'], 'vm-01': ['datacenter=a'], 'vm-02': ['datacenter=b', 'rack=3']}) + + mon_daemons = [ + DaemonDescription(daemon_type='mon', daemon_id='vm-00', hostname='vm-00'), + DaemonDescription(daemon_type='mon', daemon_id='vm-01', hostname='vm-01'), + DaemonDescription(daemon_type='mon', daemon_id='vm-02', hostname='vm-02') + ] + mon_service.set_crush_locations(mon_daemons, mon_spec) + assert 'vm-00' in mgr.set_mon_crush_locations + assert mgr.set_mon_crush_locations['vm-00'] == ['datacenter=a', 'rack=1'] + assert 'vm-01' in mgr.set_mon_crush_locations + assert mgr.set_mon_crush_locations['vm-01'] == ['datacenter=a'] + assert 'vm-02' in mgr.set_mon_crush_locations + assert mgr.set_mon_crush_locations['vm-02'] == ['datacenter=b', 'rack=3'] + + +class TestSNMPGateway: + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_snmp_v2c_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + spec = SNMPGatewaySpec( + snmp_version='V2c', + snmp_destination='192.168.1.1:162', + credentials={ + 'snmp_community': 'public' + }) + + config = { + "destination": spec.snmp_destination, + "snmp_version": spec.snmp_version, + "snmp_community": spec.credentials.get('snmp_community') + } + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec): + _run_cephadm.assert_called_with( + 'test', + "snmp-gateway.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'snmp-gateway.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9464], + }, + "meta": { + 'service_name': 'snmp-gateway', + 'ports': [9464], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": config, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_snmp_v2c_with_port(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + spec = SNMPGatewaySpec( + snmp_version='V2c', + snmp_destination='192.168.1.1:162', + credentials={ + 'snmp_community': 'public' + }, + port=9465) + + config = { + "destination": spec.snmp_destination, + "snmp_version": spec.snmp_version, + "snmp_community": spec.credentials.get('snmp_community') + } + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec): + _run_cephadm.assert_called_with( + 'test', + "snmp-gateway.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'snmp-gateway.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9465], + }, + "meta": { + 'service_name': 'snmp-gateway', + 'ports': [9465], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": config, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_snmp_v3nopriv_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + spec = SNMPGatewaySpec( + snmp_version='V3', + snmp_destination='192.168.1.1:162', + engine_id='8000C53F00000000', + credentials={ + 'snmp_v3_auth_username': 'myuser', + 'snmp_v3_auth_password': 'mypassword' + }) + + config = { + 'destination': spec.snmp_destination, + 'snmp_version': spec.snmp_version, + 'snmp_v3_auth_protocol': 'SHA', + 'snmp_v3_auth_username': 'myuser', + 'snmp_v3_auth_password': 'mypassword', + 'snmp_v3_engine_id': '8000C53F00000000' + } + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec): + _run_cephadm.assert_called_with( + 'test', + "snmp-gateway.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'snmp-gateway.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9464], + }, + "meta": { + 'service_name': 'snmp-gateway', + 'ports': [9464], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": config, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_snmp_v3priv_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + spec = SNMPGatewaySpec( + snmp_version='V3', + snmp_destination='192.168.1.1:162', + engine_id='8000C53F00000000', + auth_protocol='MD5', + privacy_protocol='AES', + credentials={ + 'snmp_v3_auth_username': 'myuser', + 'snmp_v3_auth_password': 'mypassword', + 'snmp_v3_priv_password': 'mysecret', + }) + + config = { + 'destination': spec.snmp_destination, + 'snmp_version': spec.snmp_version, + 'snmp_v3_auth_protocol': 'MD5', + 'snmp_v3_auth_username': spec.credentials.get('snmp_v3_auth_username'), + 'snmp_v3_auth_password': spec.credentials.get('snmp_v3_auth_password'), + 'snmp_v3_engine_id': '8000C53F00000000', + 'snmp_v3_priv_protocol': spec.privacy_protocol, + 'snmp_v3_priv_password': spec.credentials.get('snmp_v3_priv_password'), + } + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec): + _run_cephadm.assert_called_with( + 'test', + "snmp-gateway.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'snmp-gateway.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9464], + }, + "meta": { + 'service_name': 'snmp-gateway', + 'ports': [9464], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": config, + }), + ) + + +class TestIngressService: + + @pytest.mark.parametrize( + "enable_haproxy_protocol", + [False, True], + ) + @patch("cephadm.inventory.Inventory.get_addr") + @patch("cephadm.utils.resolve_ip") + @patch("cephadm.inventory.HostCache.get_daemons_by_service") + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_ingress_config_nfs_multiple_nfs_same_rank( + self, + _run_cephadm, + _get_daemons_by_service, + _resolve_ip, _get_addr, + cephadm_module: CephadmOrchestrator, + enable_haproxy_protocol: bool, + ): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + def fake_resolve_ip(hostname: str) -> str: + if hostname == 'host1': + return '192.168.122.111' + elif hostname == 'host2': + return '192.168.122.222' + else: + return 'xxx.xxx.xxx.xxx' + _resolve_ip.side_effect = fake_resolve_ip + + def fake_get_addr(hostname: str) -> str: + return hostname + _get_addr.side_effect = fake_get_addr + + nfs_service = NFSServiceSpec( + service_id="foo", + placement=PlacementSpec( + count=1, + hosts=['host1', 'host2']), + port=12049, + enable_haproxy_protocol=enable_haproxy_protocol, + ) + + ispec = IngressSpec( + service_type='ingress', + service_id='nfs.foo', + backend_service='nfs.foo', + frontend_port=2049, + monitor_port=9049, + virtual_ip='192.168.122.100/24', + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + enable_haproxy_protocol=enable_haproxy_protocol, + ) + + cephadm_module.spec_store._specs = { + 'nfs.foo': nfs_service, + 'ingress.nfs.foo': ispec + } + cephadm_module.spec_store.spec_created = { + 'nfs.foo': datetime_now(), + 'ingress.nfs.foo': datetime_now() + } + + # in both test cases we'll do here, we want only the ip + # for the host1 nfs daemon as we'll end up giving that + # one higher rank_generation but the same rank as the one + # on host2 + haproxy_txt = ( + '# This file is generated by cephadm.\n' + 'global\n' + ' log 127.0.0.1 local2\n' + ' chroot /var/lib/haproxy\n' + ' pidfile /var/lib/haproxy/haproxy.pid\n' + ' maxconn 8000\n' + ' daemon\n' + ' stats socket /var/lib/haproxy/stats\n\n' + 'defaults\n' + ' mode tcp\n' + ' log global\n' + ' timeout queue 1m\n' + ' timeout connect 10s\n' + ' timeout client 1m\n' + ' timeout server 1m\n' + ' timeout check 10s\n' + ' maxconn 8000\n\n' + 'frontend stats\n' + ' mode http\n' + ' bind 192.168.122.100:9049\n' + ' bind host1:9049\n' + ' stats enable\n' + ' stats uri /stats\n' + ' stats refresh 10s\n' + ' stats auth admin:12345\n' + ' http-request use-service prometheus-exporter if { path /metrics }\n' + ' monitor-uri /health\n\n' + 'frontend frontend\n' + ' bind 192.168.122.100:2049\n' + ' default_backend backend\n\n' + 'backend backend\n' + ' mode tcp\n' + ' balance source\n' + ' hash-type consistent\n' + ) + if enable_haproxy_protocol: + haproxy_txt += ' default-server send-proxy-v2\n' + haproxy_txt += ' server nfs.foo.0 192.168.122.111:12049\n' + haproxy_expected_conf = { + 'files': {'haproxy.cfg': haproxy_txt} + } + + # verify we get the same cfg regardless of the order in which the nfs daemons are returned + # in this case both nfs are rank 0, so it should only take the one with rank_generation 1 a.k.a + # the one on host1 + nfs_daemons = [ + DaemonDescription(daemon_type='nfs', daemon_id='foo.0.1.host1.qwerty', hostname='host1', rank=0, rank_generation=1, ports=[12049]), + DaemonDescription(daemon_type='nfs', daemon_id='foo.0.0.host2.abcdef', hostname='host2', rank=0, rank_generation=0, ports=[12049]) + ] + _get_daemons_by_service.return_value = nfs_daemons + + haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config( + CephadmDaemonDeploySpec(host='host1', daemon_id='ingress', service_name=ispec.service_name())) + + assert haproxy_generated_conf[0] == haproxy_expected_conf + + # swapping order now, should still pick out the one with the higher rank_generation + # in this case both nfs are rank 0, so it should only take the one with rank_generation 1 a.k.a + # the one on host1 + nfs_daemons = [ + DaemonDescription(daemon_type='nfs', daemon_id='foo.0.0.host2.abcdef', hostname='host2', rank=0, rank_generation=0, ports=[12049]), + DaemonDescription(daemon_type='nfs', daemon_id='foo.0.1.host1.qwerty', hostname='host1', rank=0, rank_generation=1, ports=[12049]) + ] + _get_daemons_by_service.return_value = nfs_daemons + + haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config( + CephadmDaemonDeploySpec(host='host1', daemon_id='ingress', service_name=ispec.service_name())) + + assert haproxy_generated_conf[0] == haproxy_expected_conf + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_ingress_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test', addr='1.2.3.7'): + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.4'] + } + }) + + # the ingress backend + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), + rgw_frontend_type='beast') + + ispec = IngressSpec(service_type='ingress', + service_id='test', + backend_service='rgw.foo', + frontend_port=8089, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_interface_networks=['1.2.3.0/24'], + virtual_ip="1.2.3.4/32") + with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _: + # generate the keepalived conf based on the specified spec + keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + keepalived_expected_conf = { + 'files': + { + 'keepalived.conf': + '# This file is generated by cephadm.\n' + 'vrrp_script check_backend {\n ' + 'script "/usr/bin/curl http://1.2.3.7:8999/health"\n ' + 'weight -20\n ' + 'interval 2\n ' + 'rise 2\n ' + 'fall 2\n}\n\n' + 'vrrp_instance VI_0 {\n ' + 'state MASTER\n ' + 'priority 100\n ' + 'interface if0\n ' + 'virtual_router_id 50\n ' + 'advert_int 1\n ' + 'authentication {\n ' + 'auth_type PASS\n ' + 'auth_pass 12345\n ' + '}\n ' + 'unicast_src_ip 1.2.3.4\n ' + 'unicast_peer {\n ' + '}\n ' + 'virtual_ipaddress {\n ' + '1.2.3.4/32 dev if0\n ' + '}\n ' + 'track_script {\n ' + 'check_backend\n }\n' + '}\n' + } + } + + # check keepalived config + assert keepalived_generated_conf[0] == keepalived_expected_conf + + # generate the haproxy conf based on the specified spec + haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + haproxy_expected_conf = { + 'files': + { + 'haproxy.cfg': + '# This file is generated by cephadm.' + '\nglobal\n log ' + '127.0.0.1 local2\n ' + 'chroot /var/lib/haproxy\n ' + 'pidfile /var/lib/haproxy/haproxy.pid\n ' + 'maxconn 8000\n ' + 'daemon\n ' + 'stats socket /var/lib/haproxy/stats\n' + '\ndefaults\n ' + 'mode http\n ' + 'log global\n ' + 'option httplog\n ' + 'option dontlognull\n ' + 'option http-server-close\n ' + 'option forwardfor except 127.0.0.0/8\n ' + 'option redispatch\n ' + 'retries 3\n ' + 'timeout queue 20s\n ' + 'timeout connect 5s\n ' + 'timeout http-request 1s\n ' + 'timeout http-keep-alive 5s\n ' + 'timeout client 30s\n ' + 'timeout server 30s\n ' + 'timeout check 5s\n ' + 'maxconn 8000\n' + '\nfrontend stats\n ' + 'mode http\n ' + 'bind 1.2.3.4:8999\n ' + 'bind 1.2.3.7:8999\n ' + 'stats enable\n ' + 'stats uri /stats\n ' + 'stats refresh 10s\n ' + 'stats auth admin:12345\n ' + 'http-request use-service prometheus-exporter if { path /metrics }\n ' + 'monitor-uri /health\n' + '\nfrontend frontend\n ' + 'bind 1.2.3.4:8089\n ' + 'default_backend backend\n\n' + 'backend backend\n ' + 'option forwardfor\n ' + 'balance static-rr\n ' + 'option httpchk HEAD / HTTP/1.0\n ' + 'server ' + + haproxy_generated_conf[1][0] + ' 1.2.3.7:80 check weight 100\n' + } + } + + assert haproxy_generated_conf[0] == haproxy_expected_conf + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_ingress_config_ssl_rgw(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test'): + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + } + }) + + # the ingress backend + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), + rgw_frontend_type='beast', rgw_frontend_port=443, ssl=True) + + ispec = IngressSpec(service_type='ingress', + service_id='test', + backend_service='rgw.foo', + frontend_port=8089, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_interface_networks=['1.2.3.0/24'], + virtual_ip="1.2.3.4/32") + with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _: + # generate the keepalived conf based on the specified spec + keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + keepalived_expected_conf = { + 'files': + { + 'keepalived.conf': + '# This file is generated by cephadm.\n' + 'vrrp_script check_backend {\n ' + 'script "/usr/bin/curl http://[1::4]:8999/health"\n ' + 'weight -20\n ' + 'interval 2\n ' + 'rise 2\n ' + 'fall 2\n}\n\n' + 'vrrp_instance VI_0 {\n ' + 'state MASTER\n ' + 'priority 100\n ' + 'interface if0\n ' + 'virtual_router_id 50\n ' + 'advert_int 1\n ' + 'authentication {\n ' + 'auth_type PASS\n ' + 'auth_pass 12345\n ' + '}\n ' + 'unicast_src_ip 1.2.3.1\n ' + 'unicast_peer {\n ' + '}\n ' + 'virtual_ipaddress {\n ' + '1.2.3.4/32 dev if0\n ' + '}\n ' + 'track_script {\n ' + 'check_backend\n }\n' + '}\n' + } + } + + # check keepalived config + assert keepalived_generated_conf[0] == keepalived_expected_conf + + # generate the haproxy conf based on the specified spec + haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + haproxy_expected_conf = { + 'files': + { + 'haproxy.cfg': + '# This file is generated by cephadm.' + '\nglobal\n log ' + '127.0.0.1 local2\n ' + 'chroot /var/lib/haproxy\n ' + 'pidfile /var/lib/haproxy/haproxy.pid\n ' + 'maxconn 8000\n ' + 'daemon\n ' + 'stats socket /var/lib/haproxy/stats\n' + '\ndefaults\n ' + 'mode http\n ' + 'log global\n ' + 'option httplog\n ' + 'option dontlognull\n ' + 'option http-server-close\n ' + 'option forwardfor except 127.0.0.0/8\n ' + 'option redispatch\n ' + 'retries 3\n ' + 'timeout queue 20s\n ' + 'timeout connect 5s\n ' + 'timeout http-request 1s\n ' + 'timeout http-keep-alive 5s\n ' + 'timeout client 30s\n ' + 'timeout server 30s\n ' + 'timeout check 5s\n ' + 'maxconn 8000\n' + '\nfrontend stats\n ' + 'mode http\n ' + 'bind 1.2.3.4:8999\n ' + 'bind 1::4:8999\n ' + 'stats enable\n ' + 'stats uri /stats\n ' + 'stats refresh 10s\n ' + 'stats auth admin:12345\n ' + 'http-request use-service prometheus-exporter if { path /metrics }\n ' + 'monitor-uri /health\n' + '\nfrontend frontend\n ' + 'bind 1.2.3.4:8089\n ' + 'default_backend backend\n\n' + 'backend backend\n ' + 'option forwardfor\n ' + 'default-server ssl\n ' + 'default-server verify none\n ' + 'balance static-rr\n ' + 'option httpchk HEAD / HTTP/1.0\n ' + 'server ' + + haproxy_generated_conf[1][0] + ' 1::4:443 check weight 100\n' + } + } + + assert haproxy_generated_conf[0] == haproxy_expected_conf + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_ingress_config_multi_vips(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test', addr='1.2.3.7'): + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + } + }) + + # Check the ingress with multiple VIPs + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), + rgw_frontend_type='beast') + + ispec = IngressSpec(service_type='ingress', + service_id='test', + backend_service='rgw.foo', + frontend_port=8089, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_interface_networks=['1.2.3.0/24'], + virtual_ips_list=["1.2.3.4/32"]) + with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _: + # generate the keepalived conf based on the specified spec + # Test with only 1 IP on the list, as it will fail with more VIPS but only one host. + keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + keepalived_expected_conf = { + 'files': + { + 'keepalived.conf': + '# This file is generated by cephadm.\n' + 'vrrp_script check_backend {\n ' + 'script "/usr/bin/curl http://1.2.3.7:8999/health"\n ' + 'weight -20\n ' + 'interval 2\n ' + 'rise 2\n ' + 'fall 2\n}\n\n' + 'vrrp_instance VI_0 {\n ' + 'state MASTER\n ' + 'priority 100\n ' + 'interface if0\n ' + 'virtual_router_id 50\n ' + 'advert_int 1\n ' + 'authentication {\n ' + 'auth_type PASS\n ' + 'auth_pass 12345\n ' + '}\n ' + 'unicast_src_ip 1.2.3.1\n ' + 'unicast_peer {\n ' + '}\n ' + 'virtual_ipaddress {\n ' + '1.2.3.4/32 dev if0\n ' + '}\n ' + 'track_script {\n ' + 'check_backend\n }\n' + '}\n' + } + } + + # check keepalived config + assert keepalived_generated_conf[0] == keepalived_expected_conf + + # generate the haproxy conf based on the specified spec + haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + haproxy_expected_conf = { + 'files': + { + 'haproxy.cfg': + '# This file is generated by cephadm.' + '\nglobal\n log ' + '127.0.0.1 local2\n ' + 'chroot /var/lib/haproxy\n ' + 'pidfile /var/lib/haproxy/haproxy.pid\n ' + 'maxconn 8000\n ' + 'daemon\n ' + 'stats socket /var/lib/haproxy/stats\n' + '\ndefaults\n ' + 'mode http\n ' + 'log global\n ' + 'option httplog\n ' + 'option dontlognull\n ' + 'option http-server-close\n ' + 'option forwardfor except 127.0.0.0/8\n ' + 'option redispatch\n ' + 'retries 3\n ' + 'timeout queue 20s\n ' + 'timeout connect 5s\n ' + 'timeout http-request 1s\n ' + 'timeout http-keep-alive 5s\n ' + 'timeout client 30s\n ' + 'timeout server 30s\n ' + 'timeout check 5s\n ' + 'maxconn 8000\n' + '\nfrontend stats\n ' + 'mode http\n ' + 'bind *:8999\n ' + 'bind 1.2.3.7:8999\n ' + 'stats enable\n ' + 'stats uri /stats\n ' + 'stats refresh 10s\n ' + 'stats auth admin:12345\n ' + 'http-request use-service prometheus-exporter if { path /metrics }\n ' + 'monitor-uri /health\n' + '\nfrontend frontend\n ' + 'bind *:8089\n ' + 'default_backend backend\n\n' + 'backend backend\n ' + 'option forwardfor\n ' + 'balance static-rr\n ' + 'option httpchk HEAD / HTTP/1.0\n ' + 'server ' + + haproxy_generated_conf[1][0] + ' 1.2.3.7:80 check weight 100\n' + } + } + + assert haproxy_generated_conf[0] == haproxy_expected_conf + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_haproxy_port_ips(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test', addr='1.2.3.7'): + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.4/32'] + } + }) + + # Check the ingress with multiple VIPs + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), + rgw_frontend_type='beast') + + ip = '1.2.3.100' + frontend_port = 8089 + + ispec = IngressSpec(service_type='ingress', + service_id='test', + backend_service='rgw.foo', + frontend_port=frontend_port, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_ip=f"{ip}/24") + with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _: + # generate the haproxy conf based on the specified spec + haproxy_daemon_spec = cephadm_module.cephadm_services['ingress'].prepare_create( + CephadmDaemonDeploySpec( + host='test', + daemon_type='haproxy', + daemon_id='ingress', + service_name=ispec.service_name())) + + assert haproxy_daemon_spec.port_ips == {str(frontend_port): ip} + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_keepalive_config_multi_interface_vips(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test', addr='1.2.3.1'): + with with_host(cephadm_module, 'test2', addr='1.2.3.2'): + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + }, + '100.100.100.0/24': { + 'if1': ['100.100.100.1'] + } + }) + cephadm_module.cache.update_host_networks('test2', { + '1.2.3.0/24': { + 'if0': ['1.2.3.2'] + }, + '100.100.100.0/24': { + 'if1': ['100.100.100.2'] + } + }) + + # Check the ingress with multiple VIPs + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), + rgw_frontend_type='beast') + + ispec = IngressSpec(service_type='ingress', + service_id='test', + placement=PlacementSpec(hosts=['test', 'test2']), + backend_service='rgw.foo', + frontend_port=8089, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_ips_list=["1.2.3.100/24", "100.100.100.100/24"]) + with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _: + keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + keepalived_expected_conf = { + 'files': + { + 'keepalived.conf': + '# This file is generated by cephadm.\n' + 'vrrp_script check_backend {\n ' + 'script "/usr/bin/curl http://1.2.3.1:8999/health"\n ' + 'weight -20\n ' + 'interval 2\n ' + 'rise 2\n ' + 'fall 2\n}\n\n' + 'vrrp_instance VI_0 {\n ' + 'state MASTER\n ' + 'priority 100\n ' + 'interface if0\n ' + 'virtual_router_id 50\n ' + 'advert_int 1\n ' + 'authentication {\n ' + 'auth_type PASS\n ' + 'auth_pass 12345\n ' + '}\n ' + 'unicast_src_ip 1.2.3.1\n ' + 'unicast_peer {\n ' + '1.2.3.2\n ' + '}\n ' + 'virtual_ipaddress {\n ' + '1.2.3.100/24 dev if0\n ' + '}\n ' + 'track_script {\n ' + 'check_backend\n }\n' + '}\n' + 'vrrp_instance VI_1 {\n ' + 'state BACKUP\n ' + 'priority 90\n ' + 'interface if1\n ' + 'virtual_router_id 51\n ' + 'advert_int 1\n ' + 'authentication {\n ' + 'auth_type PASS\n ' + 'auth_pass 12345\n ' + '}\n ' + 'unicast_src_ip 100.100.100.1\n ' + 'unicast_peer {\n ' + '100.100.100.2\n ' + '}\n ' + 'virtual_ipaddress {\n ' + '100.100.100.100/24 dev if1\n ' + '}\n ' + 'track_script {\n ' + 'check_backend\n }\n' + '}\n' + } + } + + # check keepalived config + assert keepalived_generated_conf[0] == keepalived_expected_conf + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_keepalive_interface_host_filtering(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + # we need to make sure keepalive daemons will have an interface + # on the hosts we deploy them on in order to set up their VIP. + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test', addr='1.2.3.1'): + with with_host(cephadm_module, 'test2', addr='1.2.3.2'): + with with_host(cephadm_module, 'test3', addr='1.2.3.3'): + with with_host(cephadm_module, 'test4', addr='1.2.3.3'): + # setup "test" and "test4" to have all the necessary interfaces, + # "test2" to have one of them (should still be filtered) + # and "test3" to have none of them + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + }, + '100.100.100.0/24': { + 'if1': ['100.100.100.1'] + } + }) + cephadm_module.cache.update_host_networks('test2', { + '1.2.3.0/24': { + 'if0': ['1.2.3.2'] + }, + }) + cephadm_module.cache.update_host_networks('test4', { + '1.2.3.0/24': { + 'if0': ['1.2.3.4'] + }, + '100.100.100.0/24': { + 'if1': ['100.100.100.4'] + } + }) + + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), + rgw_frontend_type='beast') + + ispec = IngressSpec(service_type='ingress', + service_id='test', + placement=PlacementSpec(hosts=['test', 'test2', 'test3', 'test4']), + backend_service='rgw.foo', + frontend_port=8089, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_ips_list=["1.2.3.100/24", "100.100.100.100/24"]) + with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _: + # since we're never actually going to refresh the host here, + # check the tmp daemons to see what was placed during the apply + daemons = cephadm_module.cache._get_tmp_daemons() + keepalive_daemons = [d for d in daemons if d.daemon_type == 'keepalived'] + hosts_deployed_on = [d.hostname for d in keepalive_daemons] + assert 'test' in hosts_deployed_on + assert 'test2' not in hosts_deployed_on + assert 'test3' not in hosts_deployed_on + assert 'test4' in hosts_deployed_on + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("cephadm.services.nfs.NFSService.fence_old_ranks", MagicMock()) + @patch("cephadm.services.nfs.NFSService.run_grace_tool", MagicMock()) + @patch("cephadm.services.nfs.NFSService.purge", MagicMock()) + @patch("cephadm.services.nfs.NFSService.create_rados_config_obj", MagicMock()) + def test_keepalive_only_nfs_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + with with_host(cephadm_module, 'test', addr='1.2.3.7'): + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + } + }) + + # Check the ingress with multiple VIPs + s = NFSServiceSpec(service_id="foo", placement=PlacementSpec(count=1), + virtual_ip='1.2.3.0/24') + + ispec = IngressSpec(service_type='ingress', + service_id='test', + backend_service='nfs.foo', + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_ip='1.2.3.0/24', + keepalive_only=True) + with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _: + nfs_generated_conf, _ = cephadm_module.cephadm_services['nfs'].generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='foo.test.0.0', service_name=s.service_name())) + ganesha_conf = nfs_generated_conf['files']['ganesha.conf'] + assert "Bind_addr = 1.2.3.0/24" in ganesha_conf + + keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + keepalived_expected_conf = { + 'files': + { + 'keepalived.conf': + '# This file is generated by cephadm.\n' + 'vrrp_script check_backend {\n ' + 'script "/usr/bin/false"\n ' + 'weight -20\n ' + 'interval 2\n ' + 'rise 2\n ' + 'fall 2\n}\n\n' + 'vrrp_instance VI_0 {\n ' + 'state MASTER\n ' + 'priority 100\n ' + 'interface if0\n ' + 'virtual_router_id 50\n ' + 'advert_int 1\n ' + 'authentication {\n ' + 'auth_type PASS\n ' + 'auth_pass 12345\n ' + '}\n ' + 'unicast_src_ip 1.2.3.1\n ' + 'unicast_peer {\n ' + '}\n ' + 'virtual_ipaddress {\n ' + '1.2.3.0/24 dev if0\n ' + '}\n ' + 'track_script {\n ' + 'check_backend\n }\n' + '}\n' + } + } + + # check keepalived config + assert keepalived_generated_conf[0] == keepalived_expected_conf + + @patch("cephadm.services.nfs.NFSService.fence_old_ranks", MagicMock()) + @patch("cephadm.services.nfs.NFSService.run_grace_tool", MagicMock()) + @patch("cephadm.services.nfs.NFSService.purge", MagicMock()) + @patch("cephadm.services.nfs.NFSService.create_rados_config_obj", MagicMock()) + @patch("cephadm.inventory.Inventory.keys") + @patch("cephadm.inventory.Inventory.get_addr") + @patch("cephadm.utils.resolve_ip") + @patch("cephadm.inventory.HostCache.get_daemons_by_service") + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_ingress_config_nfs_proxy_protocol( + self, + _run_cephadm, + _get_daemons_by_service, + _resolve_ip, + _get_addr, + _inventory_keys, + cephadm_module: CephadmOrchestrator, + ): + """Verify that setting enable_haproxy_protocol for both ingress and + nfs services sets the desired configuration parameters in both + the haproxy config and nfs ganesha config. + """ + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + def fake_resolve_ip(hostname: str) -> str: + if hostname in ('host1', "192.168.122.111"): + return '192.168.122.111' + elif hostname in ('host2', '192.168.122.222'): + return '192.168.122.222' + else: + raise KeyError(hostname) + _resolve_ip.side_effect = fake_resolve_ip + _get_addr.side_effect = fake_resolve_ip + + def fake_keys(): + return ['host1', 'host2'] + _inventory_keys.side_effect = fake_keys + + nfs_service = NFSServiceSpec( + service_id="foo", + placement=PlacementSpec( + count=1, + hosts=['host1', 'host2']), + port=12049, + enable_haproxy_protocol=True, + ) + + ispec = IngressSpec( + service_type='ingress', + service_id='nfs.foo', + backend_service='nfs.foo', + frontend_port=2049, + monitor_port=9049, + virtual_ip='192.168.122.100/24', + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + enable_haproxy_protocol=True, + ) + + cephadm_module.spec_store._specs = { + 'nfs.foo': nfs_service, + 'ingress.nfs.foo': ispec + } + cephadm_module.spec_store.spec_created = { + 'nfs.foo': datetime_now(), + 'ingress.nfs.foo': datetime_now() + } + + haproxy_txt = ( + '# This file is generated by cephadm.\n' + 'global\n' + ' log 127.0.0.1 local2\n' + ' chroot /var/lib/haproxy\n' + ' pidfile /var/lib/haproxy/haproxy.pid\n' + ' maxconn 8000\n' + ' daemon\n' + ' stats socket /var/lib/haproxy/stats\n\n' + 'defaults\n' + ' mode tcp\n' + ' log global\n' + ' timeout queue 1m\n' + ' timeout connect 10s\n' + ' timeout client 1m\n' + ' timeout server 1m\n' + ' timeout check 10s\n' + ' maxconn 8000\n\n' + 'frontend stats\n' + ' mode http\n' + ' bind 192.168.122.100:9049\n' + ' bind 192.168.122.111:9049\n' + ' stats enable\n' + ' stats uri /stats\n' + ' stats refresh 10s\n' + ' stats auth admin:12345\n' + ' http-request use-service prometheus-exporter if { path /metrics }\n' + ' monitor-uri /health\n\n' + 'frontend frontend\n' + ' bind 192.168.122.100:2049\n' + ' default_backend backend\n\n' + 'backend backend\n' + ' mode tcp\n' + ' balance source\n' + ' hash-type consistent\n' + ' default-server send-proxy-v2\n' + ' server nfs.foo.0 192.168.122.111:12049\n' + ) + haproxy_expected_conf = { + 'files': {'haproxy.cfg': haproxy_txt} + } + + nfs_ganesha_txt = ( + "# This file is generated by cephadm.\n" + 'NFS_CORE_PARAM {\n' + ' Enable_NLM = false;\n' + ' Enable_RQUOTA = false;\n' + ' Protocols = 4;\n' + ' NFS_Port = 2049;\n' + ' HAProxy_Hosts = 192.168.122.111, 10.10.2.20, 192.168.122.222;\n' + '}\n' + '\n' + 'NFSv4 {\n' + ' Delegations = false;\n' + " RecoveryBackend = 'rados_cluster';\n" + ' Minor_Versions = 1, 2;\n' + '}\n' + '\n' + 'RADOS_KV {\n' + ' UserId = "nfs.foo.test.0.0";\n' + ' nodeid = "nfs.foo.None";\n' + ' pool = ".nfs";\n' + ' namespace = "foo";\n' + '}\n' + '\n' + 'RADOS_URLS {\n' + ' UserId = "nfs.foo.test.0.0";\n' + ' watch_url = ' + '"rados://.nfs/foo/conf-nfs.foo";\n' + '}\n' + '\n' + 'RGW {\n' + ' cluster = "ceph";\n' + ' name = "client.nfs.foo.test.0.0-rgw";\n' + '}\n' + '\n' + "%url rados://.nfs/foo/conf-nfs.foo" + ) + nfs_expected_conf = { + 'files': {'ganesha.conf': nfs_ganesha_txt}, + 'config': '', + 'extra_args': ['-N', 'NIV_EVENT'], + 'keyring': ( + '[client.nfs.foo.test.0.0]\n' + 'key = None\n' + ), + 'namespace': 'foo', + 'pool': '.nfs', + 'rgw': { + 'cluster': 'ceph', + 'keyring': ( + '[client.nfs.foo.test.0.0-rgw]\n' + 'key = None\n' + ), + 'user': 'nfs.foo.test.0.0-rgw', + }, + 'userid': 'nfs.foo.test.0.0', + } + + nfs_daemons = [ + DaemonDescription( + daemon_type='nfs', + daemon_id='foo.0.1.host1.qwerty', + hostname='host1', + rank=0, + rank_generation=1, + ports=[12049], + ), + DaemonDescription( + daemon_type='nfs', + daemon_id='foo.0.0.host2.abcdef', + hostname='host2', + rank=0, + rank_generation=0, + ports=[12049], + ), + ] + _get_daemons_by_service.return_value = nfs_daemons + + ingress_svc = cephadm_module.cephadm_services['ingress'] + nfs_svc = cephadm_module.cephadm_services['nfs'] + + # add host network info to one host to test the behavior of + # adding all known-good addresses of the host to the list. + cephadm_module.cache.update_host_networks('host1', { + # this one is additional + '10.10.2.0/24': { + 'eth1': ['10.10.2.20'] + }, + # this is redundant and will be skipped + '192.168.122.0/24': { + 'eth0': ['192.168.122.111'] + }, + # this is a link-local address and will be ignored + "fe80::/64": { + "veth0": [ + "fe80::8cf5:25ff:fe1c:d963" + ], + "eth0": [ + "fe80::c7b:cbff:fef6:7370" + ], + "eth1": [ + "fe80::7201:25a7:390b:d9a7" + ] + }, + }) + + haproxy_generated_conf, _ = ingress_svc.haproxy_generate_config( + CephadmDaemonDeploySpec( + host='host1', + daemon_id='ingress', + service_name=ispec.service_name(), + ), + ) + assert haproxy_generated_conf == haproxy_expected_conf + + nfs_generated_conf, _ = nfs_svc.generate_config( + CephadmDaemonDeploySpec( + host='test', + daemon_id='foo.test.0.0', + service_name=nfs_service.service_name(), + ), + ) + assert nfs_generated_conf == nfs_expected_conf + + +class TestCephFsMirror: + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec('cephfs-mirror')): + cephadm_module.assert_issued_mon_command({ + 'prefix': 'mgr module enable', + 'module': 'mirroring' + }) + + +class TestJaeger: + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_jaeger_query(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + spec = TracingSpec(es_nodes="192.168.0.1:9200", + service_type="jaeger-query") + + config = {"elasticsearch_nodes": "http://192.168.0.1:9200"} + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec): + _run_cephadm.assert_called_with( + 'test', + "jaeger-query.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'jaeger-query.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [16686], + }, + "meta": { + 'service_name': 'jaeger-query', + 'ports': [16686], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": config, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_jaeger_collector_es_deploy(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + collector_spec = TracingSpec(service_type="jaeger-collector") + es_spec = TracingSpec(service_type="elasticsearch") + es_config = {} + + with with_host(cephadm_module, 'test'): + collector_config = { + "elasticsearch_nodes": f'http://{build_url(host=cephadm_module.inventory.get_addr("test"), port=9200).lstrip("/")}'} + with with_service(cephadm_module, es_spec): + _run_cephadm.assert_called_with( + "test", + "elasticsearch.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'elasticsearch.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [9200], + }, + "meta": { + 'service_name': 'elasticsearch', + 'ports': [9200], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": es_config, + }), + ) + with with_service(cephadm_module, collector_spec): + _run_cephadm.assert_called_with( + "test", + "jaeger-collector.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'jaeger-collector.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [14250], + }, + "meta": { + 'service_name': 'jaeger-collector', + 'ports': [14250], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": collector_config, + }), + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_jaeger_agent(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + + collector_spec = TracingSpec(service_type="jaeger-collector", es_nodes="192.168.0.1:9200") + collector_config = {"elasticsearch_nodes": "http://192.168.0.1:9200"} + + agent_spec = TracingSpec(service_type="jaeger-agent") + agent_config = {"collector_nodes": "test:14250"} + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, collector_spec): + _run_cephadm.assert_called_with( + "test", + "jaeger-collector.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'jaeger-collector.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [14250], + }, + "meta": { + 'service_name': 'jaeger-collector', + 'ports': [14250], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": collector_config, + }), + ) + with with_service(cephadm_module, agent_spec): + _run_cephadm.assert_called_with( + "test", + "jaeger-agent.test", + ['_orch', 'deploy'], + [], + stdin=json.dumps({ + "fsid": "fsid", + "name": 'jaeger-agent.test', + "image": '', + "deploy_arguments": [], + "params": { + 'tcp_ports': [6799], + }, + "meta": { + 'service_name': 'jaeger-agent', + 'ports': [6799], + 'ip': None, + 'deployed_by': [], + 'rank': None, + 'rank_generation': None, + 'extra_container_args': None, + 'extra_entrypoint_args': None, + }, + "config_blobs": agent_config, + }), + ) diff --git a/src/pybind/mgr/cephadm/tests/test_spec.py b/src/pybind/mgr/cephadm/tests/test_spec.py new file mode 100644 index 000000000..78a2d7311 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_spec.py @@ -0,0 +1,590 @@ +# Disable autopep8 for this file: + +# fmt: off + +import json + +import pytest + +from ceph.deployment.service_spec import ServiceSpec, NFSServiceSpec, RGWSpec, \ + IscsiServiceSpec, HostPlacementSpec, CustomContainerSpec +from orchestrator import DaemonDescription, OrchestratorError + + +@pytest.mark.parametrize( + "spec_json", + json.loads("""[ +{ + "placement": { + "count": 1 + }, + "service_type": "alertmanager" +}, +{ + "placement": { + "host_pattern": "*" + }, + "service_type": "crash" +}, +{ + "placement": { + "count": 1 + }, + "service_type": "grafana", + "protocol": "https" +}, +{ + "placement": { + "count": 2 + }, + "service_type": "mgr" +}, +{ + "placement": { + "count": 5 + }, + "service_type": "mon" +}, +{ + "placement": { + "host_pattern": "*" + }, + "service_type": "node-exporter" +}, +{ + "placement": { + "count": 1 + }, + "service_type": "prometheus" +}, +{ + "placement": { + "hosts": [ + { + "hostname": "ceph-001", + "network": "", + "name": "" + } + ] + }, + "service_type": "rgw", + "service_id": "default-rgw-realm.eu-central-1.1", + "rgw_realm": "default-rgw-realm", + "rgw_zone": "eu-central-1" +}, +{ + "service_type": "osd", + "service_id": "osd_spec_default", + "placement": { + "host_pattern": "*" + }, + "data_devices": { + "model": "MC-55-44-XZ" + }, + "db_devices": { + "model": "SSD-123-foo" + }, + "wal_devices": { + "model": "NVME-QQQQ-987" + } +} +] +""") +) +def test_spec_octopus(spec_json): + # https://tracker.ceph.com/issues/44934 + # Those are real user data from early octopus. + # Please do not modify those JSON values. + + spec = ServiceSpec.from_json(spec_json) + + # just some verification that we can sill read old octopus specs + def convert_to_old_style_json(j): + j_c = dict(j.copy()) + j_c.pop('service_name', None) + if 'spec' in j_c: + spec = j_c.pop('spec') + j_c.update(spec) + if 'placement' in j_c: + if 'hosts' in j_c['placement']: + j_c['placement']['hosts'] = [ + { + 'hostname': HostPlacementSpec.parse(h).hostname, + 'network': HostPlacementSpec.parse(h).network, + 'name': HostPlacementSpec.parse(h).name + } + for h in j_c['placement']['hosts'] + ] + j_c.pop('objectstore', None) + j_c.pop('filter_logic', None) + j_c.pop('anonymous_access', None) + return j_c + + assert spec_json == convert_to_old_style_json(spec.to_json()) + + +@pytest.mark.parametrize( + "dd_json", + json.loads("""[ + { + "hostname": "ceph-001", + "container_id": "d94d7969094d", + "container_image_id": "0881eb8f169f5556a292b4e2c01d683172b12830a62a9225a98a8e206bb734f0", + "container_image_name": "docker.io/prom/alertmanager:latest", + "daemon_id": "ceph-001", + "daemon_type": "alertmanager", + "version": "0.20.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725856", + "created": "2020-04-02T19:23:08.829543", + "started": "2020-04-03T07:29:16.932838", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "c4b036202241", + "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1", + "container_image_name": "docker.io/ceph/ceph:v15", + "daemon_id": "ceph-001", + "daemon_type": "crash", + "version": "15.2.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725903", + "created": "2020-04-02T19:23:11.390694", + "started": "2020-04-03T07:29:16.910897", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "5b7b94b48f31", + "container_image_id": "87a51ecf0b1c9a7b187b21c1b071425dafea0d765a96d5bc371c791169b3d7f4", + "container_image_name": "docker.io/ceph/ceph-grafana:latest", + "daemon_id": "ceph-001", + "daemon_type": "grafana", + "version": "6.6.2", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725950", + "created": "2020-04-02T19:23:52.025088", + "started": "2020-04-03T07:29:16.847972", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "9ca007280456", + "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1", + "container_image_name": "docker.io/ceph/ceph:v15", + "daemon_id": "ceph-001.gkjwqp", + "daemon_type": "mgr", + "version": "15.2.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725807", + "created": "2020-04-02T19:22:18.648584", + "started": "2020-04-03T07:29:16.856153", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "3d1ba9a2b697", + "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1", + "container_image_name": "docker.io/ceph/ceph:v15", + "daemon_id": "ceph-001", + "daemon_type": "mon", + "version": "15.2.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725715", + "created": "2020-04-02T19:22:13.863300", + "started": "2020-04-03T07:29:17.206024", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "36d026c68ba1", + "container_image_id": "e5a616e4b9cf68dfcad7782b78e118be4310022e874d52da85c55923fb615f87", + "container_image_name": "docker.io/prom/node-exporter:latest", + "daemon_id": "ceph-001", + "daemon_type": "node-exporter", + "version": "0.18.1", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725996", + "created": "2020-04-02T19:23:53.880197", + "started": "2020-04-03T07:29:16.880044", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "faf76193cbfe", + "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1", + "container_image_name": "docker.io/ceph/ceph:v15", + "daemon_id": "0", + "daemon_type": "osd", + "version": "15.2.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.726088", + "created": "2020-04-02T20:35:02.991435", + "started": "2020-04-03T07:29:19.373956", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "f82505bae0f1", + "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1", + "container_image_name": "docker.io/ceph/ceph:v15", + "daemon_id": "1", + "daemon_type": "osd", + "version": "15.2.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.726134", + "created": "2020-04-02T20:35:17.142272", + "started": "2020-04-03T07:29:19.374002", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "2708d84cd484", + "container_image_id": "358a0d2395fe711bb8258e8fb4b2d7865c0a9a6463969bcd1452ee8869ea6653", + "container_image_name": "docker.io/prom/prometheus:latest", + "daemon_id": "ceph-001", + "daemon_type": "prometheus", + "version": "2.17.1", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.726042", + "created": "2020-04-02T19:24:10.281163", + "started": "2020-04-03T07:29:16.926292", + "is_active": false + }, + { + "hostname": "ceph-001", + "daemon_id": "default-rgw-realm.eu-central-1.1.ceph-001.ytywjo", + "daemon_type": "rgw", + "status": 1, + "status_desc": "starting", + "is_active": false + } +]""") +) +def test_dd_octopus(dd_json): + # https://tracker.ceph.com/issues/44934 + # Those are real user data from early octopus. + # Please do not modify those JSON values. + + # Convert datetime properties to old style. + # 2020-04-03T07:29:16.926292Z -> 2020-04-03T07:29:16.926292 + def convert_to_old_style_json(j): + for k in ['last_refresh', 'created', 'started', 'last_deployed', + 'last_configured']: + if k in j: + j[k] = j[k].rstrip('Z') + del j['daemon_name'] + return j + + assert dd_json == convert_to_old_style_json( + DaemonDescription.from_json(dd_json).to_json()) + + +@pytest.mark.parametrize("spec,dd,valid", +[ # noqa: E128 + # https://tracker.ceph.com/issues/44934 + ( + RGWSpec( + service_id="foo", + rgw_realm="default-rgw-realm", + rgw_zone="eu-central-1", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="foo.ceph-001.ytywjo", + hostname="ceph-001", + ), + True + ), + ( + # no realm + RGWSpec( + service_id="foo.bar", + rgw_zone="eu-central-1", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="foo.bar.ceph-001.ytywjo", + hostname="ceph-001", + ), + True + ), + ( + # no realm or zone + RGWSpec( + service_id="bar", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="bar.host.domain.tld.ytywjo", + hostname="host.domain.tld", + ), + True + ), + ( + # explicit naming + RGWSpec( + service_id="realm.zone", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="realm.zone.a", + hostname="smithi028", + ), + True + ), + ( + # without host + RGWSpec( + service_type='rgw', + service_id="foo", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="foo.hostname.ytywjo", + hostname=None, + ), + False + ), + ( + # without host (2) + RGWSpec( + service_type='rgw', + service_id="default-rgw-realm.eu-central-1.1", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="default-rgw-realm.eu-central-1.1.hostname.ytywjo", + hostname=None, + ), + False + ), + ( + # service_id contains hostname + # (sort of) https://tracker.ceph.com/issues/45294 + RGWSpec( + service_id="default.rgw.realm.ceph.001", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="default.rgw.realm.ceph.001.ceph.001.ytywjo", + hostname="ceph.001", + ), + True + ), + + # https://tracker.ceph.com/issues/45293 + ( + ServiceSpec( + service_type='mds', + service_id="a", + ), + DaemonDescription( + daemon_type='mds', + daemon_id="a.host1.abc123", + hostname="host1", + ), + True + ), + ( + # '.' char in service_id + ServiceSpec( + service_type='mds', + service_id="a.b.c", + ), + DaemonDescription( + daemon_type='mds', + daemon_id="a.b.c.host1.abc123", + hostname="host1", + ), + True + ), + + # https://tracker.ceph.com/issues/45617 + ( + # daemon_id does not contain hostname + ServiceSpec( + service_type='mds', + service_id="a", + ), + DaemonDescription( + daemon_type='mds', + daemon_id="a", + hostname="host1", + ), + True + ), + ( + # daemon_id only contains hostname + ServiceSpec( + service_type='mds', + service_id="host1", + ), + DaemonDescription( + daemon_type='mds', + daemon_id="host1", + hostname="host1", + ), + True + ), + + # https://tracker.ceph.com/issues/45399 + ( + # daemon_id only contains hostname + ServiceSpec( + service_type='mds', + service_id="a", + ), + DaemonDescription( + daemon_type='mds', + daemon_id="a.host1.abc123", + hostname="host1.site", + ), + True + ), + ( + NFSServiceSpec( + service_id="a", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="a.host1", + hostname="host1.site", + ), + True + ), + + # https://tracker.ceph.com/issues/45293 + ( + NFSServiceSpec( + service_id="a", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="a.host1", + hostname="host1", + ), + True + ), + ( + # service_id contains a '.' char + NFSServiceSpec( + service_id="a.b.c", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="a.b.c.host1", + hostname="host1", + ), + True + ), + ( + # trailing chars after hostname + NFSServiceSpec( + service_id="a.b.c", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="a.b.c.host1.abc123", + hostname="host1", + ), + True + ), + ( + # chars after hostname without '.' + NFSServiceSpec( + service_id="a", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="a.host1abc123", + hostname="host1", + ), + False + ), + ( + # chars before hostname without '.' + NFSServiceSpec( + service_id="a", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="ahost1.abc123", + hostname="host1", + ), + False + ), + + # https://tracker.ceph.com/issues/45293 + ( + IscsiServiceSpec( + service_type='iscsi', + service_id="a", + ), + DaemonDescription( + daemon_type='iscsi', + daemon_id="a.host1.abc123", + hostname="host1", + ), + True + ), + ( + # '.' char in service_id + IscsiServiceSpec( + service_type='iscsi', + service_id="a.b.c", + ), + DaemonDescription( + daemon_type='iscsi', + daemon_id="a.b.c.host1.abc123", + hostname="host1", + ), + True + ), + ( + # fixed daemon id for teuthology. + IscsiServiceSpec( + service_type='iscsi', + service_id='iscsi', + ), + DaemonDescription( + daemon_type='iscsi', + daemon_id="iscsi.a", + hostname="host1", + ), + True + ), + + ( + CustomContainerSpec( + service_type='container', + service_id='hello-world', + image='docker.io/library/hello-world:latest', + ), + DaemonDescription( + daemon_type='container', + daemon_id='hello-world.mgr0', + hostname='mgr0', + ), + True + ), + +]) +def test_daemon_description_service_name(spec: ServiceSpec, + dd: DaemonDescription, + valid: bool): + if valid: + assert spec.service_name() == dd.service_name() + else: + with pytest.raises(OrchestratorError): + dd.service_name() diff --git a/src/pybind/mgr/cephadm/tests/test_ssh.py b/src/pybind/mgr/cephadm/tests/test_ssh.py new file mode 100644 index 000000000..29f01b6c7 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_ssh.py @@ -0,0 +1,105 @@ +import asyncssh +from asyncssh.process import SSHCompletedProcess +from unittest import mock +try: + # AsyncMock was not added until python 3.8 + from unittest.mock import AsyncMock +except ImportError: + from asyncmock import AsyncMock +except ImportError: + AsyncMock = None +import pytest + + +try: + from asyncssh.misc import ConnectionLost +except ImportError: + ConnectionLost = None + +from ceph.deployment.hostspec import HostSpec + +from cephadm import CephadmOrchestrator +from cephadm.serve import CephadmServe +from cephadm.tests.fixtures import with_host, wait, async_side_effect +from orchestrator import OrchestratorError + + +@pytest.mark.skipif(ConnectionLost is None, reason='no asyncssh') +class TestWithSSH: + @mock.patch("cephadm.ssh.SSHManager._execute_command") + @mock.patch("cephadm.ssh.SSHManager._check_execute_command") + def test_offline(self, check_execute_command, execute_command, cephadm_module): + check_execute_command.side_effect = async_side_effect('') + execute_command.side_effect = async_side_effect(('', '', 0)) + + if not AsyncMock: + # can't run this test if we could not import AsyncMock + return + mock_connect = AsyncMock(return_value='') + with mock.patch("asyncssh.connect", new=mock_connect) as asyncssh_connect: + with with_host(cephadm_module, 'test'): + asyncssh_connect.side_effect = ConnectionLost('reason') + code, out, err = cephadm_module.check_host('test') + assert out == '' + assert "Failed to connect to test at address (1::4)" in err + + out = wait(cephadm_module, cephadm_module.get_hosts())[0].to_json() + assert out == HostSpec('test', '1::4', status='Offline').to_json() + + asyncssh_connect.return_value = mock.MagicMock() + asyncssh_connect.side_effect = None + assert CephadmServe(cephadm_module)._check_host('test') is None + out = wait(cephadm_module, cephadm_module.get_hosts())[0].to_json() + assert out == HostSpec('test', '1::4').to_json() + + def test_ssh_remote_cmds_execution(self, cephadm_module): + + if not AsyncMock: + # can't run this test if we could not import AsyncMock + return + + class FakeConn: + def __init__(self, exception=None, returncode=0): + self.exception = exception + self.returncode = returncode + + async def run(self, *args, **kwargs): + if self.exception: + raise self.exception + else: + return SSHCompletedProcess(returncode=self.returncode, stdout="", stderr="") + + async def close(self): + pass + + def run_test(host, conn, expected_error): + mock_connect = AsyncMock(return_value=conn) + with pytest.raises(OrchestratorError, match=expected_error): + with mock.patch("asyncssh.connect", new=mock_connect): + with with_host(cephadm_module, host): + CephadmServe(cephadm_module)._check_host(host) + + # Test case 1: command failure + run_test('test1', FakeConn(returncode=1), "Command .+ failed") + + # Test case 2: connection error + run_test('test2', FakeConn(exception=asyncssh.ChannelOpenError(1, "", "")), "Unable to reach remote host test2.") + + # Test case 3: asyncssh ProcessError + stderr = "my-process-stderr" + run_test('test3', FakeConn(exception=asyncssh.ProcessError(returncode=3, + env="", + command="", + subsystem="", + exit_status="", + exit_signal="", + stderr=stderr, + stdout="")), f"Cannot execute the command.+{stderr}") + # Test case 4: generic error + run_test('test4', FakeConn(exception=Exception), "Generic error while executing command.+") + + +@pytest.mark.skipif(ConnectionLost is not None, reason='asyncssh') +class TestWithoutSSH: + def test_can_run(self, cephadm_module: CephadmOrchestrator): + assert cephadm_module.can_run() == (False, "loading asyncssh library:No module named 'asyncssh'") diff --git a/src/pybind/mgr/cephadm/tests/test_template.py b/src/pybind/mgr/cephadm/tests/test_template.py new file mode 100644 index 000000000..f67304348 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_template.py @@ -0,0 +1,33 @@ +import pathlib + +import pytest + +from cephadm.template import TemplateMgr, UndefinedError, TemplateNotFoundError + + +def test_render(cephadm_module, fs): + template_base = (pathlib.Path(__file__).parent / '../templates').resolve() + fake_template = template_base / 'foo/bar' + fs.create_file(fake_template, contents='{{ cephadm_managed }}{{ var }}') + + template_mgr = TemplateMgr(cephadm_module) + value = 'test' + + # with base context + expected_text = '{}{}'.format(template_mgr.base_context['cephadm_managed'], value) + assert template_mgr.render('foo/bar', {'var': value}) == expected_text + + # without base context + with pytest.raises(UndefinedError): + template_mgr.render('foo/bar', {'var': value}, managed_context=False) + + # override the base context + context = { + 'cephadm_managed': 'abc', + 'var': value + } + assert template_mgr.render('foo/bar', context) == 'abc{}'.format(value) + + # template not found + with pytest.raises(TemplateNotFoundError): + template_mgr.render('foo/bar/2', {}) diff --git a/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py b/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py new file mode 100644 index 000000000..66feaee31 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py @@ -0,0 +1,256 @@ +import pytest +import json +from tests import mock +from cephadm.tuned_profiles import TunedProfileUtils, SYSCTL_DIR +from cephadm.inventory import TunedProfileStore +from ceph.utils import datetime_now +from ceph.deployment.service_spec import TunedProfileSpec, PlacementSpec +from cephadm.ssh import SSHManager +from orchestrator import HostSpec + +from typing import List, Dict + + +class SaveError(Exception): + pass + + +class FakeCache: + def __init__(self, + hosts, + schedulable_hosts, + unreachable_hosts): + self.hosts = hosts + self.unreachable_hosts = [HostSpec(h) for h in unreachable_hosts] + self.schedulable_hosts = [HostSpec(h) for h in schedulable_hosts] + self.last_tuned_profile_update = {} + + def get_hosts(self): + return self.hosts + + def get_schedulable_hosts(self): + return self.schedulable_hosts + + def get_unreachable_hosts(self): + return self.unreachable_hosts + + def get_draining_hosts(self): + return [] + + def is_host_unreachable(self, hostname: str): + return hostname in [h.hostname for h in self.get_unreachable_hosts()] + + def is_host_schedulable(self, hostname: str): + return hostname in [h.hostname for h in self.get_schedulable_hosts()] + + def is_host_draining(self, hostname: str): + return hostname in [h.hostname for h in self.get_draining_hosts()] + + @property + def networks(self): + return {h: {'a': {'b': ['c']}} for h in self.hosts} + + def host_needs_tuned_profile_update(self, host, profile_name): + return profile_name == 'p2' + + +class FakeMgr: + def __init__(self, + hosts: List[str], + schedulable_hosts: List[str], + unreachable_hosts: List[str], + profiles: Dict[str, TunedProfileSpec]): + self.cache = FakeCache(hosts, schedulable_hosts, unreachable_hosts) + self.tuned_profiles = TunedProfileStore(self) + self.tuned_profiles.profiles = profiles + self.ssh = SSHManager(self) + self.offline_hosts = [] + self.log_refresh_metadata = False + + def set_store(self, what: str, value: str): + raise SaveError(f'{what}: {value}') + + def get_store(self, what: str): + if what == 'tuned_profiles': + return json.dumps({'x': TunedProfileSpec('x', + PlacementSpec(hosts=['x']), + {'x': 'x'}).to_json(), + 'y': TunedProfileSpec('y', + PlacementSpec(hosts=['y']), + {'y': 'y'}).to_json()}) + return '' + + +class TestTunedProfiles: + tspec1 = TunedProfileSpec('p1', + PlacementSpec(hosts=['a', 'b', 'c']), + {'setting1': 'value1', + 'setting2': 'value2', + 'setting with space': 'value with space'}) + tspec2 = TunedProfileSpec('p2', + PlacementSpec(hosts=['a', 'c']), + {'something': 'something_else', + 'high': '5'}) + tspec3 = TunedProfileSpec('p3', + PlacementSpec(hosts=['c']), + {'wow': 'wow2', + 'setting with space': 'value with space', + 'down': 'low'}) + + def profiles_to_calls(self, tp: TunedProfileUtils, profiles: List[TunedProfileSpec]) -> List[Dict[str, str]]: + # this function takes a list of tuned profiles and returns a mapping from + # profile names to the string that will be written to the actual config file on the host. + res = [] + for p in profiles: + p_str = tp._profile_to_str(p) + res.append({p.profile_name: p_str}) + return res + + @mock.patch("cephadm.tuned_profiles.TunedProfileUtils._remove_stray_tuned_profiles") + @mock.patch("cephadm.tuned_profiles.TunedProfileUtils._write_tuned_profiles") + def test_write_all_tuned_profiles(self, _write_profiles, _rm_profiles): + profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3} + mgr = FakeMgr(['a', 'b', 'c'], + ['a', 'b', 'c'], + [], + profiles) + tp = TunedProfileUtils(mgr) + tp._write_all_tuned_profiles() + # need to check that _write_tuned_profiles is correctly called with the + # profiles that match the tuned profile placements and with the correct + # strings that should be generated from the settings the profiles have. + # the _profiles_to_calls helper allows us to generated the input we + # should check against + calls = [ + mock.call('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2])), + mock.call('b', self.profiles_to_calls(tp, [self.tspec1])), + mock.call('c', self.profiles_to_calls(tp, [self.tspec1, self.tspec2, self.tspec3])) + ] + _write_profiles.assert_has_calls(calls, any_order=True) + + @mock.patch('cephadm.ssh.SSHManager.check_execute_command') + def test_rm_stray_tuned_profiles(self, _check_execute_command): + profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3} + # for this test, going to use host "a" and put 4 cephadm generated + # profiles "p1" "p2", "p3" and "who" only two of which should be there ("p1", "p2") + # as well as a file not generated by cephadm. Only the "p3" and "who" + # profiles should be removed from the host. This should total to 4 + # calls to check_execute_command, 1 "ls", 2 "rm", and 1 "sysctl --system" + _check_execute_command.return_value = '\n'.join(['p1-cephadm-tuned-profile.conf', + 'p2-cephadm-tuned-profile.conf', + 'p3-cephadm-tuned-profile.conf', + 'who-cephadm-tuned-profile.conf', + 'dont-touch-me']) + mgr = FakeMgr(['a', 'b', 'c'], + ['a', 'b', 'c'], + [], + profiles) + tp = TunedProfileUtils(mgr) + tp._remove_stray_tuned_profiles('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2])) + calls = [ + mock.call('a', ['ls', SYSCTL_DIR], log_command=False), + mock.call('a', ['rm', '-f', f'{SYSCTL_DIR}/p3-cephadm-tuned-profile.conf']), + mock.call('a', ['rm', '-f', f'{SYSCTL_DIR}/who-cephadm-tuned-profile.conf']), + mock.call('a', ['sysctl', '--system']) + ] + _check_execute_command.assert_has_calls(calls, any_order=True) + + @mock.patch('cephadm.ssh.SSHManager.check_execute_command') + @mock.patch('cephadm.ssh.SSHManager.write_remote_file') + def test_write_tuned_profiles(self, _write_remote_file, _check_execute_command): + profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3} + # for this test we will use host "a" and have it so host_needs_tuned_profile_update + # returns True for p2 and False for p1 (see FakeCache class). So we should see + # 2 ssh calls, one to write p2, one to run sysctl --system + _check_execute_command.return_value = 'success' + _write_remote_file.return_value = 'success' + mgr = FakeMgr(['a', 'b', 'c'], + ['a', 'b', 'c'], + [], + profiles) + tp = TunedProfileUtils(mgr) + tp._write_tuned_profiles('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2])) + _check_execute_command.assert_called_with('a', ['sysctl', '--system']) + _write_remote_file.assert_called_with( + 'a', f'{SYSCTL_DIR}/p2-cephadm-tuned-profile.conf', tp._profile_to_str(self.tspec2).encode('utf-8')) + + def test_dont_write_to_unreachable_hosts(self): + profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3} + + # list host "a" and "b" as hosts that exist, "a" will be + # a normal, schedulable host and "b" is considered unreachable + mgr = FakeMgr(['a', 'b'], + ['a'], + ['b'], + profiles) + tp = TunedProfileUtils(mgr) + + assert 'a' not in tp.mgr.cache.last_tuned_profile_update + assert 'b' not in tp.mgr.cache.last_tuned_profile_update + + # with an online host, should proceed as normal. Providing + # no actual profiles here though so the only actual action taken + # is updating the entry in the last_tuned_profile_update dict + tp._write_tuned_profiles('a', {}) + assert 'a' in tp.mgr.cache.last_tuned_profile_update + + # trying to write to an unreachable host should be a no-op + # and return immediately. No entry for 'b' should be added + # to the last_tuned_profile_update dict + tp._write_tuned_profiles('b', {}) + assert 'b' not in tp.mgr.cache.last_tuned_profile_update + + def test_store(self): + mgr = FakeMgr(['a', 'b', 'c'], + ['a', 'b', 'c'], + [], + {}) + tps = TunedProfileStore(mgr) + save_str_p1 = 'tuned_profiles: ' + json.dumps({'p1': self.tspec1.to_json()}) + tspec1_updated = self.tspec1.copy() + tspec1_updated.settings.update({'new-setting': 'new-value'}) + save_str_p1_updated = 'tuned_profiles: ' + json.dumps({'p1': tspec1_updated.to_json()}) + save_str_p1_updated_p2 = 'tuned_profiles: ' + \ + json.dumps({'p1': tspec1_updated.to_json(), 'p2': self.tspec2.to_json()}) + tspec2_updated = self.tspec2.copy() + tspec2_updated.settings.pop('something') + save_str_p1_updated_p2_updated = 'tuned_profiles: ' + \ + json.dumps({'p1': tspec1_updated.to_json(), 'p2': tspec2_updated.to_json()}) + save_str_p2_updated = 'tuned_profiles: ' + json.dumps({'p2': tspec2_updated.to_json()}) + with pytest.raises(SaveError) as e: + tps.add_profile(self.tspec1) + assert str(e.value) == save_str_p1 + assert 'p1' in tps + with pytest.raises(SaveError) as e: + tps.add_setting('p1', 'new-setting', 'new-value') + assert str(e.value) == save_str_p1_updated + assert 'new-setting' in tps.list_profiles()[0].settings + with pytest.raises(SaveError) as e: + tps.add_profile(self.tspec2) + assert str(e.value) == save_str_p1_updated_p2 + assert 'p2' in tps + assert 'something' in tps.list_profiles()[1].settings + with pytest.raises(SaveError) as e: + tps.rm_setting('p2', 'something') + assert 'something' not in tps.list_profiles()[1].settings + assert str(e.value) == save_str_p1_updated_p2_updated + with pytest.raises(SaveError) as e: + tps.rm_profile('p1') + assert str(e.value) == save_str_p2_updated + assert 'p1' not in tps + assert 'p2' in tps + assert len(tps.list_profiles()) == 1 + assert tps.list_profiles()[0].profile_name == 'p2' + + cur_last_updated = tps.last_updated('p2') + new_last_updated = datetime_now() + assert cur_last_updated != new_last_updated + tps.set_last_updated('p2', new_last_updated) + assert tps.last_updated('p2') == new_last_updated + + # check FakeMgr get_store func to see what is expected to be found in Key Store here + tps.load() + assert 'x' in tps + assert 'y' in tps + assert [p for p in tps.list_profiles() if p.profile_name == 'x'][0].settings == {'x': 'x'} + assert [p for p in tps.list_profiles() if p.profile_name == 'y'][0].settings == {'y': 'y'} diff --git a/src/pybind/mgr/cephadm/tests/test_upgrade.py b/src/pybind/mgr/cephadm/tests/test_upgrade.py new file mode 100644 index 000000000..3b5c305b5 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_upgrade.py @@ -0,0 +1,481 @@ +import json +from unittest import mock + +import pytest + +from ceph.deployment.service_spec import PlacementSpec, ServiceSpec +from cephadm import CephadmOrchestrator +from cephadm.upgrade import CephadmUpgrade, UpgradeState +from cephadm.ssh import HostConnectionError +from cephadm.utils import ContainerInspectInfo +from orchestrator import OrchestratorError, DaemonDescription +from .fixtures import _run_cephadm, wait, with_host, with_service, \ + receive_agent_metadata, async_side_effect + +from typing import List, Tuple, Optional + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +def test_upgrade_start(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_host(cephadm_module, 'test2'): + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)), status_running=True): + assert wait(cephadm_module, cephadm_module.upgrade_start( + 'image_id', None)) == 'Initiating upgrade to image_id' + + assert wait(cephadm_module, cephadm_module.upgrade_status() + ).target_image == 'image_id' + + assert wait(cephadm_module, cephadm_module.upgrade_pause() + ) == 'Paused upgrade to image_id' + + assert wait(cephadm_module, cephadm_module.upgrade_resume() + ) == 'Resumed upgrade to image_id' + + assert wait(cephadm_module, cephadm_module.upgrade_stop() + ) == 'Stopped upgrade to image_id' + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +def test_upgrade_start_offline_hosts(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_host(cephadm_module, 'test2'): + cephadm_module.offline_hosts = set(['test2']) + with pytest.raises(OrchestratorError, match=r"Upgrade aborted - Some host\(s\) are currently offline: {'test2'}"): + cephadm_module.upgrade_start('image_id', None) + cephadm_module.offline_hosts = set([]) # so remove_host doesn't fail when leaving the with_host block + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +def test_upgrade_daemons_offline_hosts(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_host(cephadm_module, 'test2'): + cephadm_module.upgrade.upgrade_state = UpgradeState('target_image', 0) + with mock.patch("cephadm.serve.CephadmServe._run_cephadm", side_effect=HostConnectionError('connection failure reason', 'test2', '192.168.122.1')): + _to_upgrade = [(DaemonDescription(daemon_type='crash', daemon_id='test2', hostname='test2'), True)] + with pytest.raises(HostConnectionError, match=r"connection failure reason"): + cephadm_module.upgrade._upgrade_daemons(_to_upgrade, 'target_image', ['digest1']) + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +def test_do_upgrade_offline_hosts(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_host(cephadm_module, 'test2'): + cephadm_module.upgrade.upgrade_state = UpgradeState('target_image', 0) + cephadm_module.offline_hosts = set(['test2']) + with pytest.raises(HostConnectionError, match=r"Host\(s\) were marked offline: {'test2'}"): + cephadm_module.upgrade._do_upgrade() + cephadm_module.offline_hosts = set([]) # so remove_host doesn't fail when leaving the with_host block + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +@mock.patch("cephadm.module.CephadmOrchestrator.remove_health_warning") +def test_upgrade_resume_clear_health_warnings(_rm_health_warning, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_host(cephadm_module, 'test2'): + cephadm_module.upgrade.upgrade_state = UpgradeState('target_image', 0, paused=True) + _rm_health_warning.return_value = None + assert wait(cephadm_module, cephadm_module.upgrade_resume() + ) == 'Resumed upgrade to target_image' + calls_list = [mock.call(alert_id) for alert_id in cephadm_module.upgrade.UPGRADE_ERRORS] + _rm_health_warning.assert_has_calls(calls_list, any_order=True) + + +@mock.patch('cephadm.upgrade.CephadmUpgrade._get_current_version', lambda _: (17, 2, 6)) +@mock.patch("cephadm.serve.CephadmServe._get_container_image_info") +def test_upgrade_check_with_ceph_version(_get_img_info, cephadm_module: CephadmOrchestrator): + # This test was added to avoid screwing up the image base so that + # when the version was added to it it made an incorrect image + # The issue caused the image to come out as + # quay.io/ceph/ceph:v18:v18.2.0 + # see https://tracker.ceph.com/issues/63150 + _img = '' + + def _fake_get_img_info(img_name): + nonlocal _img + _img = img_name + return ContainerInspectInfo( + 'image_id', + '18.2.0', + 'digest' + ) + + _get_img_info.side_effect = _fake_get_img_info + cephadm_module.upgrade_check('', '18.2.0') + assert _img == 'quay.io/ceph/ceph:v18.2.0' + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +@pytest.mark.parametrize("use_repo_digest", + [ + False, + True + ]) +def test_upgrade_run(use_repo_digest, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + with with_host(cephadm_module, 'host2'): + cephadm_module.set_container_image('global', 'from_image') + cephadm_module.use_repo_digest = use_repo_digest + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*', count=2)), + CephadmOrchestrator.apply_mgr, '', status_running=True), \ + mock.patch("cephadm.module.CephadmOrchestrator.lookup_release_name", + return_value='foo'), \ + mock.patch("cephadm.module.CephadmOrchestrator.version", + new_callable=mock.PropertyMock) as version_mock, \ + mock.patch("cephadm.module.CephadmOrchestrator.get", + return_value={ + # capture fields in both mon and osd maps + "require_osd_release": "pacific", + "min_mon_release": 16, + }): + version_mock.return_value = 'ceph version 18.2.1 (somehash)' + assert wait(cephadm_module, cephadm_module.upgrade_start( + 'to_image', None)) == 'Initiating upgrade to to_image' + + assert wait(cephadm_module, cephadm_module.upgrade_status() + ).target_image == 'to_image' + + def _versions_mock(cmd): + return json.dumps({ + 'mgr': { + 'ceph version 1.2.3 (asdf) blah': 1 + } + }) + + cephadm_module._mon_command_mock_versions = _versions_mock + + with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({ + 'image_id': 'image_id', + 'repo_digests': ['to_image@repo_digest'], + 'ceph_version': 'ceph version 18.2.3 (hash)', + }))): + + cephadm_module.upgrade._do_upgrade() + + assert cephadm_module.upgrade_status is not None + + with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm( + json.dumps([ + dict( + name=list(cephadm_module.cache.daemons['host1'].keys())[0], + style='cephadm', + fsid='fsid', + container_id='container_id', + container_image_name='to_image', + container_image_id='image_id', + container_image_digests=['to_image@repo_digest'], + deployed_by=['to_image@repo_digest'], + version='version', + state='running', + ) + ]) + )): + receive_agent_metadata(cephadm_module, 'host1', ['ls']) + receive_agent_metadata(cephadm_module, 'host2', ['ls']) + + with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({ + 'image_id': 'image_id', + 'repo_digests': ['to_image@repo_digest'], + 'ceph_version': 'ceph version 18.2.3 (hash)', + }))): + cephadm_module.upgrade._do_upgrade() + + _, image, _ = cephadm_module.check_mon_command({ + 'prefix': 'config get', + 'who': 'global', + 'key': 'container_image', + }) + if use_repo_digest: + assert image == 'to_image@repo_digest' + else: + assert image == 'to_image' + + +def test_upgrade_state_null(cephadm_module: CephadmOrchestrator): + # This test validates https://tracker.ceph.com/issues/47580 + cephadm_module.set_store('upgrade_state', 'null') + CephadmUpgrade(cephadm_module) + assert CephadmUpgrade(cephadm_module).upgrade_state is None + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +def test_not_enough_mgrs(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=1)), CephadmOrchestrator.apply_mgr, ''): + with pytest.raises(OrchestratorError): + wait(cephadm_module, cephadm_module.upgrade_start('image_id', None)) + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +@mock.patch("cephadm.CephadmOrchestrator.check_mon_command") +def test_enough_mons_for_ok_to_stop(check_mon_command, cephadm_module: CephadmOrchestrator): + # only 2 monitors, not enough for ok-to-stop to ever pass + check_mon_command.return_value = ( + 0, '{"monmap": {"mons": [{"name": "mon.1"}, {"name": "mon.2"}]}}', '') + assert not cephadm_module.upgrade._enough_mons_for_ok_to_stop() + + # 3 monitors, ok-to-stop should work fine + check_mon_command.return_value = ( + 0, '{"monmap": {"mons": [{"name": "mon.1"}, {"name": "mon.2"}, {"name": "mon.3"}]}}', '') + assert cephadm_module.upgrade._enough_mons_for_ok_to_stop() + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +@mock.patch("cephadm.module.HostCache.get_daemons_by_service") +@mock.patch("cephadm.CephadmOrchestrator.get") +def test_enough_mds_for_ok_to_stop(get, get_daemons_by_service, cephadm_module: CephadmOrchestrator): + get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'test', 'max_mds': 1}}]}] + get_daemons_by_service.side_effect = [[DaemonDescription()]] + assert not cephadm_module.upgrade._enough_mds_for_ok_to_stop( + DaemonDescription(daemon_type='mds', daemon_id='test.host1.gfknd', service_name='mds.test')) + + get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'myfs.test', 'max_mds': 2}}]}] + get_daemons_by_service.side_effect = [[DaemonDescription(), DaemonDescription()]] + assert not cephadm_module.upgrade._enough_mds_for_ok_to_stop( + DaemonDescription(daemon_type='mds', daemon_id='myfs.test.host1.gfknd', service_name='mds.myfs.test')) + + get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'myfs.test', 'max_mds': 1}}]}] + get_daemons_by_service.side_effect = [[DaemonDescription(), DaemonDescription()]] + assert cephadm_module.upgrade._enough_mds_for_ok_to_stop( + DaemonDescription(daemon_type='mds', daemon_id='myfs.test.host1.gfknd', service_name='mds.myfs.test')) + + +@pytest.mark.parametrize("current_version, use_tags, show_all_versions, tags, result", + [ + # several candidate versions (from different major versions) + ( + (16, 1, '16.1.0'), + False, # use_tags + False, # show_all_versions + [ + 'v17.1.0', + 'v16.2.7', + 'v16.2.6', + 'v16.2.5', + 'v16.1.4', + 'v16.1.3', + 'v15.2.0', + ], + ['17.1.0', '16.2.7', '16.2.6', '16.2.5', '16.1.4', '16.1.3'] + ), + # candidate minor versions are available + ( + (16, 1, '16.1.0'), + False, # use_tags + False, # show_all_versions + [ + 'v16.2.2', + 'v16.2.1', + 'v16.1.6', + ], + ['16.2.2', '16.2.1', '16.1.6'] + ), + # all versions are less than the current version + ( + (17, 2, '17.2.0'), + False, # use_tags + False, # show_all_versions + [ + 'v17.1.0', + 'v16.2.7', + 'v16.2.6', + ], + [] + ), + # show all versions (regardless of the current version) + ( + (16, 1, '16.1.0'), + False, # use_tags + True, # show_all_versions + [ + 'v17.1.0', + 'v16.2.7', + 'v16.2.6', + 'v15.1.0', + 'v14.2.0', + ], + ['17.1.0', '16.2.7', '16.2.6', '15.1.0', '14.2.0'] + ), + # show all tags (regardless of the current version and show_all_versions flag) + ( + (16, 1, '16.1.0'), + True, # use_tags + False, # show_all_versions + [ + 'v17.1.0', + 'v16.2.7', + 'v16.2.6', + 'v16.2.5', + 'v16.1.4', + 'v16.1.3', + 'v15.2.0', + ], + ['v15.2.0', 'v16.1.3', 'v16.1.4', 'v16.2.5', + 'v16.2.6', 'v16.2.7', 'v17.1.0'] + ), + ]) +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +def test_upgrade_ls(current_version, use_tags, show_all_versions, tags, result, cephadm_module: CephadmOrchestrator): + with mock.patch('cephadm.upgrade.Registry.get_tags', return_value=tags): + with mock.patch('cephadm.upgrade.CephadmUpgrade._get_current_version', return_value=current_version): + out = cephadm_module.upgrade.upgrade_ls(None, use_tags, show_all_versions) + if use_tags: + assert out['tags'] == result + else: + assert out['versions'] == result + + +@pytest.mark.parametrize( + "upgraded, not_upgraded, daemon_types, hosts, services, should_block", + # [ ([(type, host, id), ... ], [...], [daemon types], [hosts], [services], True/False), ... ] + [ + ( # valid, upgrade mgr daemons + [], + [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')], + ['mgr'], + None, + None, + False + ), + ( # invalid, can't upgrade mons until mgr is upgraded + [], + [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')], + ['mon'], + None, + None, + True + ), + ( # invalid, can't upgrade mon service until all mgr daemons are upgraded + [], + [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')], + None, + None, + ['mon'], + True + ), + ( # valid, upgrade mgr service + [], + [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')], + None, + None, + ['mgr'], + False + ), + ( # valid, mgr is already upgraded so can upgrade mons + [('mgr', 'a', 'a.x')], + [('mon', 'a', 'a')], + ['mon'], + None, + None, + False + ), + ( # invalid, can't upgrade all daemons on b b/c un-upgraded mgr on a + [], + [('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + None, + ['a'], + None, + True + ), + ( # valid, only daemon on b is a mgr + [], + [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + None, + ['b'], + None, + False + ), + ( # invalid, can't upgrade mon on a while mgr on b is un-upgraded + [], + [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + None, + ['a'], + None, + True + ), + ( # valid, only upgrading the mgr on a + [], + [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + ['mgr'], + ['a'], + None, + False + ), + ( # valid, mgr daemon not on b are upgraded + [('mgr', 'a', 'a.x')], + [('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + None, + ['b'], + None, + False + ), + ( # valid, all the necessary hosts are covered, mgr on c is already upgraded + [('mgr', 'c', 'c.z')], + [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a'), ('osd', 'c', '0')], + None, + ['a', 'b'], + None, + False + ), + ( # invalid, can't upgrade mon on a while mgr on b is un-upgraded + [], + [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + ['mgr', 'mon'], + ['a'], + None, + True + ), + ( # valid, only mon not on "b" is upgraded already. Case hit while making teuthology test + [('mon', 'a', 'a')], + [('mon', 'b', 'x'), ('mon', 'b', 'y'), ('osd', 'a', '1'), ('osd', 'b', '2')], + ['mon', 'osd'], + ['b'], + None, + False + ), + ] +) +@mock.patch("cephadm.module.HostCache.get_daemons") +@mock.patch("cephadm.serve.CephadmServe._get_container_image_info") +@mock.patch('cephadm.module.SpecStore.__getitem__') +def test_staggered_upgrade_validation( + get_spec, + get_image_info, + get_daemons, + upgraded: List[Tuple[str, str, str]], + not_upgraded: List[Tuple[str, str, str, str]], + daemon_types: Optional[str], + hosts: Optional[str], + services: Optional[str], + should_block: bool, + cephadm_module: CephadmOrchestrator, +): + def to_dds(ts: List[Tuple[str, str]], upgraded: bool) -> List[DaemonDescription]: + dds = [] + digest = 'new_image@repo_digest' if upgraded else 'old_image@repo_digest' + for t in ts: + dds.append(DaemonDescription(daemon_type=t[0], + hostname=t[1], + daemon_id=t[2], + container_image_digests=[digest], + deployed_by=[digest],)) + return dds + get_daemons.return_value = to_dds(upgraded, True) + to_dds(not_upgraded, False) + get_image_info.side_effect = async_side_effect( + ('new_id', 'ceph version 99.99.99 (hash)', ['new_image@repo_digest'])) + + class FakeSpecDesc(): + def __init__(self, spec): + self.spec = spec + + def _get_spec(s): + return FakeSpecDesc(ServiceSpec(s)) + + get_spec.side_effect = _get_spec + if should_block: + with pytest.raises(OrchestratorError): + cephadm_module.upgrade._validate_upgrade_filters( + 'new_image_name', daemon_types, hosts, services) + else: + cephadm_module.upgrade._validate_upgrade_filters( + 'new_image_name', daemon_types, hosts, services) diff --git a/src/pybind/mgr/cephadm/tuned_profiles.py b/src/pybind/mgr/cephadm/tuned_profiles.py new file mode 100644 index 000000000..8ec30bd53 --- /dev/null +++ b/src/pybind/mgr/cephadm/tuned_profiles.py @@ -0,0 +1,103 @@ +import logging +from typing import Dict, List, TYPE_CHECKING +from ceph.utils import datetime_now +from .schedule import HostAssignment +from ceph.deployment.service_spec import ServiceSpec, TunedProfileSpec + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + +logger = logging.getLogger(__name__) + +SYSCTL_DIR = '/etc/sysctl.d' + + +class TunedProfileUtils(): + def __init__(self, mgr: "CephadmOrchestrator") -> None: + self.mgr = mgr + + def _profile_to_str(self, p: TunedProfileSpec) -> str: + p_str = f'# created by cephadm\n# tuned profile "{p.profile_name}"\n\n' + for k, v in p.settings.items(): + p_str += f'{k} = {v}\n' + return p_str + + def _write_all_tuned_profiles(self) -> None: + host_profile_mapping: Dict[str, List[Dict[str, str]]] = {} + for host in self.mgr.cache.get_hosts(): + host_profile_mapping[host] = [] + + for profile in self.mgr.tuned_profiles.list_profiles(): + p_str = self._profile_to_str(profile) + ha = HostAssignment( + spec=ServiceSpec( + 'crash', placement=profile.placement), + hosts=self.mgr.cache.get_schedulable_hosts(), + unreachable_hosts=self.mgr.cache.get_unreachable_hosts(), + draining_hosts=self.mgr.cache.get_draining_hosts(), + daemons=[], + networks=self.mgr.cache.networks, + ) + all_slots, _, _ = ha.place() + for host in {s.hostname for s in all_slots}: + host_profile_mapping[host].append({profile.profile_name: p_str}) + + for host, profiles in host_profile_mapping.items(): + self._remove_stray_tuned_profiles(host, profiles) + self._write_tuned_profiles(host, profiles) + + def _remove_stray_tuned_profiles(self, host: str, profiles: List[Dict[str, str]]) -> None: + """ + this function looks at the contents of /etc/sysctl.d/ for profiles we have written + that should now be removed. It assumes any file with "-cephadm-tuned-profile.conf" in + it is written by us any without that are not. Only files written by us are considered + candidates for removal. The "profiles" parameter is a list of dictionaries that map + profile names to the file contents to actually be written to the + /etc/sysctl.d/<profile-name>-cephadm-tuned-profile.conf. For example + [ + { + 'profile1': 'setting1: value1\nsetting2: value2' + }, + { + 'profile2': 'setting3: value3' + } + ] + what we want to end up doing is going through the keys of the dicts and appending + -cephadm-tuned-profile.conf to the profile names to build our list of profile files that + SHOULD be on the host. Then if we see any file names that don't match this, but + DO include "-cephadm-tuned-profile.conf" (implying they're from us), remove them. + """ + if self.mgr.cache.is_host_unreachable(host): + return + cmd = ['ls', SYSCTL_DIR] + found_files = self.mgr.ssh.check_execute_command(host, cmd, log_command=self.mgr.log_refresh_metadata).split('\n') + found_files = [s.strip() for s in found_files] + profile_names: List[str] = sum([[*p] for p in profiles], []) # extract all profiles names + profile_names = list(set(profile_names)) # remove duplicates + expected_files = [p + '-cephadm-tuned-profile.conf' for p in profile_names] + updated = False + for file in found_files: + if '-cephadm-tuned-profile.conf' not in file: + continue + if file not in expected_files: + logger.info(f'Removing stray tuned profile file {file}') + cmd = ['rm', '-f', f'{SYSCTL_DIR}/{file}'] + self.mgr.ssh.check_execute_command(host, cmd) + updated = True + if updated: + self.mgr.ssh.check_execute_command(host, ['sysctl', '--system']) + + def _write_tuned_profiles(self, host: str, profiles: List[Dict[str, str]]) -> None: + if self.mgr.cache.is_host_unreachable(host): + return + updated = False + for p in profiles: + for profile_name, content in p.items(): + if self.mgr.cache.host_needs_tuned_profile_update(host, profile_name): + logger.info(f'Writing tuned profile {profile_name} to host {host}') + profile_filename: str = f'{SYSCTL_DIR}/{profile_name}-cephadm-tuned-profile.conf' + self.mgr.ssh.write_remote_file(host, profile_filename, content.encode('utf-8')) + updated = True + if updated: + self.mgr.ssh.check_execute_command(host, ['sysctl', '--system']) + self.mgr.cache.last_tuned_profile_update[host] = datetime_now() diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py new file mode 100644 index 000000000..eeae37580 --- /dev/null +++ b/src/pybind/mgr/cephadm/upgrade.py @@ -0,0 +1,1294 @@ +import json +import logging +import time +import uuid +from typing import TYPE_CHECKING, Optional, Dict, List, Tuple, Any, cast + +import orchestrator +from cephadm.registry import Registry +from cephadm.serve import CephadmServe +from cephadm.services.cephadmservice import CephadmDaemonDeploySpec +from cephadm.utils import ceph_release_to_major, name_to_config_section, CEPH_UPGRADE_ORDER, \ + CEPH_TYPES, NON_CEPH_IMAGE_TYPES, GATEWAY_TYPES +from cephadm.ssh import HostConnectionError +from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus, daemon_type_to_service + +if TYPE_CHECKING: + from .module import CephadmOrchestrator + + +logger = logging.getLogger(__name__) + +# from ceph_fs.h +CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1 << 5) +CEPH_MDSMAP_NOT_JOINABLE = (1 << 0) + + +def normalize_image_digest(digest: str, default_registry: str) -> str: + """ + Normal case: + >>> normalize_image_digest('ceph/ceph', 'docker.io') + 'docker.io/ceph/ceph' + + No change: + >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'docker.io') + 'quay.ceph.io/ceph/ceph' + + >>> normalize_image_digest('docker.io/ubuntu', 'docker.io') + 'docker.io/ubuntu' + + >>> normalize_image_digest('localhost/ceph', 'docker.io') + 'localhost/ceph' + """ + known_shortnames = [ + 'ceph/ceph', + 'ceph/daemon', + 'ceph/daemon-base', + ] + for image in known_shortnames: + if digest.startswith(image): + return f'{default_registry}/{digest}' + return digest + + +class UpgradeState: + def __init__(self, + target_name: str, + progress_id: str, + target_id: Optional[str] = None, + target_digests: Optional[List[str]] = None, + target_version: Optional[str] = None, + error: Optional[str] = None, + paused: Optional[bool] = None, + fail_fs: bool = False, + fs_original_max_mds: Optional[Dict[str, int]] = None, + fs_original_allow_standby_replay: Optional[Dict[str, bool]] = None, + daemon_types: Optional[List[str]] = None, + hosts: Optional[List[str]] = None, + services: Optional[List[str]] = None, + total_count: Optional[int] = None, + remaining_count: Optional[int] = None, + ): + self._target_name: str = target_name # Use CephadmUpgrade.target_image instead. + self.progress_id: str = progress_id + self.target_id: Optional[str] = target_id + self.target_digests: Optional[List[str]] = target_digests + self.target_version: Optional[str] = target_version + self.error: Optional[str] = error + self.paused: bool = paused or False + self.fs_original_max_mds: Optional[Dict[str, int]] = fs_original_max_mds + self.fs_original_allow_standby_replay: Optional[Dict[str, + bool]] = fs_original_allow_standby_replay + self.fail_fs = fail_fs + self.daemon_types = daemon_types + self.hosts = hosts + self.services = services + self.total_count = total_count + self.remaining_count = remaining_count + + def to_json(self) -> dict: + return { + 'target_name': self._target_name, + 'progress_id': self.progress_id, + 'target_id': self.target_id, + 'target_digests': self.target_digests, + 'target_version': self.target_version, + 'fail_fs': self.fail_fs, + 'fs_original_max_mds': self.fs_original_max_mds, + 'fs_original_allow_standby_replay': self.fs_original_allow_standby_replay, + 'error': self.error, + 'paused': self.paused, + 'daemon_types': self.daemon_types, + 'hosts': self.hosts, + 'services': self.services, + 'total_count': self.total_count, + 'remaining_count': self.remaining_count, + } + + @classmethod + def from_json(cls, data: dict) -> Optional['UpgradeState']: + valid_params = UpgradeState.__init__.__code__.co_varnames + if data: + c = {k: v for k, v in data.items() if k in valid_params} + if 'repo_digest' in c: + c['target_digests'] = [c.pop('repo_digest')] + return cls(**c) + else: + return None + + +class CephadmUpgrade: + UPGRADE_ERRORS = [ + 'UPGRADE_NO_STANDBY_MGR', + 'UPGRADE_FAILED_PULL', + 'UPGRADE_REDEPLOY_DAEMON', + 'UPGRADE_BAD_TARGET_VERSION', + 'UPGRADE_EXCEPTION', + 'UPGRADE_OFFLINE_HOST' + ] + + def __init__(self, mgr: "CephadmOrchestrator"): + self.mgr = mgr + + t = self.mgr.get_store('upgrade_state') + if t: + self.upgrade_state: Optional[UpgradeState] = UpgradeState.from_json(json.loads(t)) + else: + self.upgrade_state = None + self.upgrade_info_str: str = '' + + @property + def target_image(self) -> str: + assert self.upgrade_state + if not self.mgr.use_repo_digest: + return self.upgrade_state._target_name + if not self.upgrade_state.target_digests: + return self.upgrade_state._target_name + + # FIXME: we assume the first digest is the best one to use + return self.upgrade_state.target_digests[0] + + def upgrade_status(self) -> orchestrator.UpgradeStatusSpec: + r = orchestrator.UpgradeStatusSpec() + if self.upgrade_state: + r.target_image = self.target_image + r.in_progress = True + r.progress, r.services_complete = self._get_upgrade_info() + r.is_paused = self.upgrade_state.paused + + if self.upgrade_state.daemon_types is not None: + which_str = f'Upgrading daemons of type(s) {",".join(self.upgrade_state.daemon_types)}' + if self.upgrade_state.hosts is not None: + which_str += f' on host(s) {",".join(self.upgrade_state.hosts)}' + elif self.upgrade_state.services is not None: + which_str = f'Upgrading daemons in service(s) {",".join(self.upgrade_state.services)}' + if self.upgrade_state.hosts is not None: + which_str += f' on host(s) {",".join(self.upgrade_state.hosts)}' + elif self.upgrade_state.hosts is not None: + which_str = f'Upgrading all daemons on host(s) {",".join(self.upgrade_state.hosts)}' + else: + which_str = 'Upgrading all daemon types on all hosts' + if self.upgrade_state.total_count is not None and self.upgrade_state.remaining_count is not None: + which_str += f'. Upgrade limited to {self.upgrade_state.total_count} daemons ({self.upgrade_state.remaining_count} remaining).' + r.which = which_str + + # accessing self.upgrade_info_str will throw an exception if it + # has not been set in _do_upgrade yet + try: + r.message = self.upgrade_info_str + except AttributeError: + pass + if self.upgrade_state.error: + r.message = 'Error: ' + self.upgrade_state.error + elif self.upgrade_state.paused: + r.message = 'Upgrade paused' + return r + + def _get_upgrade_info(self) -> Tuple[str, List[str]]: + if not self.upgrade_state or not self.upgrade_state.target_digests: + return '', [] + + daemons = self._get_filtered_daemons() + + if any(not d.container_image_digests for d in daemons if d.daemon_type == 'mgr'): + return '', [] + + completed_daemons = [(d.daemon_type, any(d in self.upgrade_state.target_digests for d in ( + d.container_image_digests or []))) for d in daemons if d.daemon_type] + + done = len([True for completion in completed_daemons if completion[1]]) + + completed_types = list(set([completion[0] for completion in completed_daemons if all( + c[1] for c in completed_daemons if c[0] == completion[0])])) + + return '%s/%s daemons upgraded' % (done, len(daemons)), completed_types + + def _get_filtered_daemons(self) -> List[DaemonDescription]: + # Return the set of daemons set to be upgraded with out current + # filtering parameters (or all daemons in upgrade order if no filtering + # parameter are set). + assert self.upgrade_state is not None + if self.upgrade_state.daemon_types is not None: + daemons = [d for d in self.mgr.cache.get_daemons( + ) if d.daemon_type in self.upgrade_state.daemon_types] + elif self.upgrade_state.services is not None: + daemons = [] + for service in self.upgrade_state.services: + daemons += self.mgr.cache.get_daemons_by_service(service) + else: + daemons = [d for d in self.mgr.cache.get_daemons( + ) if d.daemon_type in CEPH_UPGRADE_ORDER] + if self.upgrade_state.hosts is not None: + daemons = [d for d in daemons if d.hostname in self.upgrade_state.hosts] + return daemons + + def _get_current_version(self) -> Tuple[int, int, str]: + current_version = self.mgr.version.split('ceph version ')[1] + (current_major, current_minor, _) = current_version.split('-')[0].split('.', 2) + return (int(current_major), int(current_minor), current_version) + + def _check_target_version(self, version: str) -> Optional[str]: + try: + v = version.split('.', 2) + (major, minor) = (int(v[0]), int(v[1])) + assert minor >= 0 + # patch might be a number or {number}-g{sha1} + except ValueError: + return 'version must be in the form X.Y.Z (e.g., 15.2.3)' + if major < 15 or (major == 15 and minor < 2): + return 'cephadm only supports octopus (15.2.0) or later' + + # to far a jump? + (current_major, current_minor, current_version) = self._get_current_version() + if current_major < major - 2: + return f'ceph can only upgrade 1 or 2 major versions at a time; {current_version} -> {version} is too big a jump' + if current_major > major: + return f'ceph cannot downgrade major versions (from {current_version} to {version})' + if current_major == major: + if current_minor > minor: + return f'ceph cannot downgrade to a {"rc" if minor == 1 else "dev"} release' + + # check mon min + monmap = self.mgr.get("mon_map") + mon_min = monmap.get("min_mon_release", 0) + if mon_min < major - 2: + return f'min_mon_release ({mon_min}) < target {major} - 2; first complete an upgrade to an earlier release' + + # check osd min + osdmap = self.mgr.get("osd_map") + osd_min_name = osdmap.get("require_osd_release", "argonaut") + osd_min = ceph_release_to_major(osd_min_name) + if osd_min < major - 2: + return f'require_osd_release ({osd_min_name} or {osd_min}) < target {major} - 2; first complete an upgrade to an earlier release' + + return None + + def upgrade_ls(self, image: Optional[str], tags: bool, show_all_versions: Optional[bool]) -> Dict: + if not image: + image = self.mgr.container_image_base + reg_name, bare_image = image.split('/', 1) + if ':' in bare_image: + # for our purposes, we don't want to use the tag here + bare_image = bare_image.split(':')[0] + reg = Registry(reg_name) + (current_major, current_minor, _) = self._get_current_version() + versions = [] + r: Dict[Any, Any] = { + "image": image, + "registry": reg_name, + "bare_image": bare_image, + } + + try: + ls = reg.get_tags(bare_image) + except ValueError as e: + raise OrchestratorError(f'{e}') + if not tags: + for t in ls: + if t[0] != 'v': + continue + v = t[1:].split('.') + if len(v) != 3: + continue + if '-' in v[2]: + continue + v_major = int(v[0]) + v_minor = int(v[1]) + candidate_version = (v_major > current_major + or (v_major == current_major and v_minor >= current_minor)) + if show_all_versions or candidate_version: + versions.append('.'.join(v)) + r["versions"] = sorted( + versions, + key=lambda k: list(map(int, k.split('.'))), + reverse=True + ) + else: + r["tags"] = sorted(ls) + return r + + def upgrade_start(self, image: str, version: str, daemon_types: Optional[List[str]] = None, + hosts: Optional[List[str]] = None, services: Optional[List[str]] = None, limit: Optional[int] = None) -> str: + fail_fs_value = cast(bool, self.mgr.get_module_option_ex( + 'orchestrator', 'fail_fs', False)) + if self.mgr.mode != 'root': + raise OrchestratorError('upgrade is not supported in %s mode' % ( + self.mgr.mode)) + if version: + version_error = self._check_target_version(version) + if version_error: + raise OrchestratorError(version_error) + target_name = self.mgr.container_image_base + ':v' + version + elif image: + target_name = normalize_image_digest(image, self.mgr.default_registry) + else: + raise OrchestratorError('must specify either image or version') + + if daemon_types is not None or services is not None or hosts is not None: + self._validate_upgrade_filters(target_name, daemon_types, hosts, services) + + if self.upgrade_state: + if self.upgrade_state._target_name != target_name: + raise OrchestratorError( + 'Upgrade to %s (not %s) already in progress' % + (self.upgrade_state._target_name, target_name)) + if self.upgrade_state.paused: + self.upgrade_state.paused = False + self._save_upgrade_state() + return 'Resumed upgrade to %s' % self.target_image + return 'Upgrade to %s in progress' % self.target_image + + running_mgr_count = len([daemon for daemon in self.mgr.cache.get_daemons_by_type( + 'mgr') if daemon.status == DaemonDescriptionStatus.running]) + + if running_mgr_count < 2: + raise OrchestratorError('Need at least 2 running mgr daemons for upgrade') + + self.mgr.log.info('Upgrade: Started with target %s' % target_name) + self.upgrade_state = UpgradeState( + target_name=target_name, + progress_id=str(uuid.uuid4()), + fail_fs=fail_fs_value, + daemon_types=daemon_types, + hosts=hosts, + services=services, + total_count=limit, + remaining_count=limit, + ) + self._update_upgrade_progress(0.0) + self._save_upgrade_state() + self._clear_upgrade_health_checks() + self.mgr.event.set() + return 'Initiating upgrade to %s' % (target_name) + + def _validate_upgrade_filters(self, target_name: str, daemon_types: Optional[List[str]] = None, hosts: Optional[List[str]] = None, services: Optional[List[str]] = None) -> None: + def _latest_type(dtypes: List[str]) -> str: + # [::-1] gives the list in reverse + for daemon_type in CEPH_UPGRADE_ORDER[::-1]: + if daemon_type in dtypes: + return daemon_type + return '' + + def _get_earlier_daemons(dtypes: List[str], candidates: List[DaemonDescription]) -> List[DaemonDescription]: + # this function takes a list of daemon types and first finds the daemon + # type from that list that is latest in our upgrade order. Then, from + # that latest type, it filters the list of candidate daemons received + # for daemons with types earlier in the upgrade order than the latest + # type found earlier. That filtered list of daemons is returned. The + # purpose of this function is to help in finding daemons that must have + # already been upgraded for the given filtering parameters (--daemon-types, + # --services, --hosts) to be valid. + latest = _latest_type(dtypes) + if not latest: + return [] + earlier_types = '|'.join(CEPH_UPGRADE_ORDER).split(latest)[0].split('|')[:-1] + earlier_types = [t for t in earlier_types if t not in dtypes] + return [d for d in candidates if d.daemon_type in earlier_types] + + if self.upgrade_state: + raise OrchestratorError( + 'Cannot set values for --daemon-types, --services or --hosts when upgrade already in progress.') + try: + with self.mgr.async_timeout_handler('cephadm inspect-image'): + target_id, target_version, target_digests = self.mgr.wait_async( + CephadmServe(self.mgr)._get_container_image_info(target_name)) + except OrchestratorError as e: + raise OrchestratorError(f'Failed to pull {target_name}: {str(e)}') + # what we need to do here is build a list of daemons that must already be upgraded + # in order for the user's selection of daemons to upgrade to be valid. for example, + # if they say --daemon-types 'osd,mds' but mons have not been upgraded, we block. + daemons = [d for d in self.mgr.cache.get_daemons( + ) if d.daemon_type not in NON_CEPH_IMAGE_TYPES] + err_msg_base = 'Cannot start upgrade. ' + # "dtypes" will later be filled in with the types of daemons that will be upgraded with the given parameters + dtypes = [] + if daemon_types is not None: + dtypes = daemon_types + if hosts is not None: + dtypes = [_latest_type(dtypes)] + other_host_daemons = [ + d for d in daemons if d.hostname is not None and d.hostname not in hosts] + daemons = _get_earlier_daemons(dtypes, other_host_daemons) + else: + daemons = _get_earlier_daemons(dtypes, daemons) + err_msg_base += 'Daemons with types earlier in upgrade order than given types need upgrading.\n' + elif services is not None: + # for our purposes here we can effectively convert our list of services into the + # set of daemon types the services contain. This works because we don't allow --services + # and --daemon-types at the same time and we only allow services of the same type + sspecs = [ + self.mgr.spec_store[s].spec for s in services if self.mgr.spec_store[s].spec is not None] + stypes = list(set([s.service_type for s in sspecs])) + if len(stypes) != 1: + raise OrchestratorError('Doing upgrade by service only support services of one type at ' + f'a time. Found service types: {stypes}') + for stype in stypes: + dtypes += orchestrator.service_to_daemon_types(stype) + dtypes = list(set(dtypes)) + if hosts is not None: + other_host_daemons = [ + d for d in daemons if d.hostname is not None and d.hostname not in hosts] + daemons = _get_earlier_daemons(dtypes, other_host_daemons) + else: + daemons = _get_earlier_daemons(dtypes, daemons) + err_msg_base += 'Daemons with types earlier in upgrade order than daemons from given services need upgrading.\n' + elif hosts is not None: + # hosts must be handled a bit differently. For this, we really need to find all the daemon types + # that reside on hosts in the list of hosts we will upgrade. Then take the type from + # that list that is latest in the upgrade order and check if any daemons on hosts not in the + # provided list of hosts have a daemon with a type earlier in the upgrade order that is not upgraded. + dtypes = list( + set([d.daemon_type for d in daemons if d.daemon_type is not None and d.hostname in hosts])) + other_hosts_daemons = [ + d for d in daemons if d.hostname is not None and d.hostname not in hosts] + daemons = _get_earlier_daemons([_latest_type(dtypes)], other_hosts_daemons) + err_msg_base += 'Daemons with types earlier in upgrade order than daemons on given host need upgrading.\n' + need_upgrade_self, n1, n2, _ = self._detect_need_upgrade(daemons, target_digests, target_name) + if need_upgrade_self and ('mgr' not in dtypes or (daemon_types is None and services is None)): + # also report active mgr as needing to be upgraded. It is not included in the resulting list + # by default as it is treated special and handled via the need_upgrade_self bool + n1.insert(0, (self.mgr.mgr_service.get_active_daemon( + self.mgr.cache.get_daemons_by_type('mgr')), True)) + if n1 or n2: + raise OrchestratorError(f'{err_msg_base}Please first upgrade ' + f'{", ".join(list(set([d[0].name() for d in n1] + [d[0].name() for d in n2])))}\n' + f'NOTE: Enforced upgrade order is: {" -> ".join(CEPH_TYPES + GATEWAY_TYPES)}') + + def upgrade_pause(self) -> str: + if not self.upgrade_state: + raise OrchestratorError('No upgrade in progress') + if self.upgrade_state.paused: + return 'Upgrade to %s already paused' % self.target_image + self.upgrade_state.paused = True + self.mgr.log.info('Upgrade: Paused upgrade to %s' % self.target_image) + self._save_upgrade_state() + return 'Paused upgrade to %s' % self.target_image + + def upgrade_resume(self) -> str: + if not self.upgrade_state: + raise OrchestratorError('No upgrade in progress') + if not self.upgrade_state.paused: + return 'Upgrade to %s not paused' % self.target_image + self.upgrade_state.paused = False + self.upgrade_state.error = '' + self.mgr.log.info('Upgrade: Resumed upgrade to %s' % self.target_image) + self._save_upgrade_state() + self.mgr.event.set() + for alert_id in self.UPGRADE_ERRORS: + self.mgr.remove_health_warning(alert_id) + return 'Resumed upgrade to %s' % self.target_image + + def upgrade_stop(self) -> str: + if not self.upgrade_state: + return 'No upgrade in progress' + if self.upgrade_state.progress_id: + self.mgr.remote('progress', 'complete', + self.upgrade_state.progress_id) + target_image = self.target_image + self.mgr.log.info('Upgrade: Stopped') + self.upgrade_state = None + self._save_upgrade_state() + self._clear_upgrade_health_checks() + self.mgr.event.set() + return 'Stopped upgrade to %s' % target_image + + def continue_upgrade(self) -> bool: + """ + Returns false, if nothing was done. + :return: + """ + if self.upgrade_state and not self.upgrade_state.paused: + try: + self._do_upgrade() + except HostConnectionError as e: + self._fail_upgrade('UPGRADE_OFFLINE_HOST', { + 'severity': 'error', + 'summary': f'Upgrade: Failed to connect to host {e.hostname} at addr ({e.addr})', + 'count': 1, + 'detail': [f'SSH connection failed to {e.hostname} at addr ({e.addr}): {str(e)}'], + }) + return False + except Exception as e: + self._fail_upgrade('UPGRADE_EXCEPTION', { + 'severity': 'error', + 'summary': 'Upgrade: failed due to an unexpected exception', + 'count': 1, + 'detail': [f'Unexpected exception occurred during upgrade process: {str(e)}'], + }) + return False + return True + return False + + def _wait_for_ok_to_stop( + self, s: DaemonDescription, + known: Optional[List[str]] = None, # NOTE: output argument! + ) -> bool: + # only wait a little bit; the service might go away for something + assert s.daemon_type is not None + assert s.daemon_id is not None + tries = 4 + while tries > 0: + if not self.upgrade_state or self.upgrade_state.paused: + return False + + # setting force flag to retain old functionality. + # note that known is an output argument for ok_to_stop() + r = self.mgr.cephadm_services[daemon_type_to_service(s.daemon_type)].ok_to_stop([ + s.daemon_id], known=known, force=True) + + if not r.retval: + logger.info(f'Upgrade: {r.stdout}') + return True + logger.info(f'Upgrade: {r.stderr}') + + time.sleep(15) + tries -= 1 + return False + + def _clear_upgrade_health_checks(self) -> None: + for k in self.UPGRADE_ERRORS: + if k in self.mgr.health_checks: + del self.mgr.health_checks[k] + self.mgr.set_health_checks(self.mgr.health_checks) + + def _fail_upgrade(self, alert_id: str, alert: dict) -> None: + assert alert_id in self.UPGRADE_ERRORS + if not self.upgrade_state: + # this could happen if the user canceled the upgrade while we + # were doing something + return + + logger.error('Upgrade: Paused due to %s: %s' % (alert_id, + alert['summary'])) + self.upgrade_state.error = alert_id + ': ' + alert['summary'] + self.upgrade_state.paused = True + self._save_upgrade_state() + self.mgr.health_checks[alert_id] = alert + self.mgr.set_health_checks(self.mgr.health_checks) + + def _update_upgrade_progress(self, progress: float) -> None: + if not self.upgrade_state: + assert False, 'No upgrade in progress' + + if not self.upgrade_state.progress_id: + self.upgrade_state.progress_id = str(uuid.uuid4()) + self._save_upgrade_state() + self.mgr.remote('progress', 'update', self.upgrade_state.progress_id, + ev_msg='Upgrade to %s' % ( + self.upgrade_state.target_version or self.target_image + ), + ev_progress=progress, + add_to_ceph_s=True) + + def _save_upgrade_state(self) -> None: + if not self.upgrade_state: + self.mgr.set_store('upgrade_state', None) + return + self.mgr.set_store('upgrade_state', json.dumps(self.upgrade_state.to_json())) + + def get_distinct_container_image_settings(self) -> Dict[str, str]: + # get all distinct container_image settings + image_settings = {} + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config dump', + 'format': 'json', + }) + config = json.loads(out) + for opt in config: + if opt['name'] == 'container_image': + image_settings[opt['section']] = opt['value'] + return image_settings + + def _prepare_for_mds_upgrade( + self, + target_major: str, + need_upgrade: List[DaemonDescription] + ) -> bool: + # scale down all filesystems to 1 MDS + assert self.upgrade_state + if not self.upgrade_state.fs_original_max_mds: + self.upgrade_state.fs_original_max_mds = {} + if not self.upgrade_state.fs_original_allow_standby_replay: + self.upgrade_state.fs_original_allow_standby_replay = {} + fsmap = self.mgr.get("fs_map") + continue_upgrade = True + for fs in fsmap.get('filesystems', []): + fscid = fs["id"] + mdsmap = fs["mdsmap"] + fs_name = mdsmap["fs_name"] + + # disable allow_standby_replay? + if mdsmap['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY: + self.mgr.log.info('Upgrade: Disabling standby-replay for filesystem %s' % ( + fs_name + )) + if fscid not in self.upgrade_state.fs_original_allow_standby_replay: + self.upgrade_state.fs_original_allow_standby_replay[fscid] = True + self._save_upgrade_state() + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'fs set', + 'fs_name': fs_name, + 'var': 'allow_standby_replay', + 'val': '0', + }) + continue_upgrade = False + continue + + # scale down this filesystem? + if mdsmap["max_mds"] > 1: + if self.upgrade_state.fail_fs: + if not (mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE) and \ + len(mdsmap['up']) > 0: + self.mgr.log.info(f'Upgrade: failing fs {fs_name} for ' + f'rapid multi-rank mds upgrade') + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'fs fail', + 'fs_name': fs_name + }) + if ret != 0: + continue_upgrade = False + continue + else: + self.mgr.log.info('Upgrade: Scaling down filesystem %s' % ( + fs_name + )) + if fscid not in self.upgrade_state.fs_original_max_mds: + self.upgrade_state.fs_original_max_mds[fscid] = \ + mdsmap['max_mds'] + self._save_upgrade_state() + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'fs set', + 'fs_name': fs_name, + 'var': 'max_mds', + 'val': '1', + }) + continue_upgrade = False + continue + + if not self.upgrade_state.fail_fs: + if not (mdsmap['in'] == [0] and len(mdsmap['up']) <= 1): + self.mgr.log.info( + 'Upgrade: Waiting for fs %s to scale down to reach 1 MDS' % ( + fs_name)) + time.sleep(10) + continue_upgrade = False + continue + + if len(mdsmap['up']) == 0: + self.mgr.log.warning( + "Upgrade: No mds is up; continuing upgrade procedure to poke things in the right direction") + # This can happen because the current version MDS have + # incompatible compatsets; the mons will not do any promotions. + # We must upgrade to continue. + elif len(mdsmap['up']) > 0: + mdss = list(mdsmap['info'].values()) + assert len(mdss) == 1 + lone_mds = mdss[0] + if lone_mds['state'] != 'up:active': + self.mgr.log.info('Upgrade: Waiting for mds.%s to be up:active (currently %s)' % ( + lone_mds['name'], + lone_mds['state'], + )) + time.sleep(10) + continue_upgrade = False + continue + else: + assert False + + return continue_upgrade + + def _enough_mons_for_ok_to_stop(self) -> bool: + # type () -> bool + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'quorum_status', + }) + try: + j = json.loads(out) + except Exception: + raise OrchestratorError('failed to parse quorum status') + + mons = [m['name'] for m in j['monmap']['mons']] + return len(mons) > 2 + + def _enough_mds_for_ok_to_stop(self, mds_daemon: DaemonDescription) -> bool: + # type (DaemonDescription) -> bool + + # find fs this mds daemon belongs to + fsmap = self.mgr.get("fs_map") + for fs in fsmap.get('filesystems', []): + mdsmap = fs["mdsmap"] + fs_name = mdsmap["fs_name"] + + assert mds_daemon.daemon_id + if fs_name != mds_daemon.service_name().split('.', 1)[1]: + # wrong fs for this mds daemon + continue + + # get number of mds daemons for this fs + mds_count = len( + [daemon for daemon in self.mgr.cache.get_daemons_by_service(mds_daemon.service_name())]) + + # standby mds daemons for this fs? + if mdsmap["max_mds"] < mds_count: + return True + return False + + return True # if mds has no fs it should pass ok-to-stop + + def _detect_need_upgrade(self, daemons: List[DaemonDescription], target_digests: Optional[List[str]] = None, target_name: Optional[str] = None) -> Tuple[bool, List[Tuple[DaemonDescription, bool]], List[Tuple[DaemonDescription, bool]], int]: + # this function takes a list of daemons and container digests. The purpose + # is to go through each daemon and check if the current container digests + # for that daemon match the target digests. The purpose being that we determine + # if a daemon is upgraded to a certain container image or not based on what + # container digests it has. By checking the current digests against the + # targets we can determine which daemons still need to be upgraded + need_upgrade_self = False + need_upgrade: List[Tuple[DaemonDescription, bool]] = [] + need_upgrade_deployer: List[Tuple[DaemonDescription, bool]] = [] + done = 0 + if target_digests is None: + target_digests = [] + if target_name is None: + target_name = '' + for d in daemons: + assert d.daemon_type is not None + assert d.daemon_id is not None + assert d.hostname is not None + if self.mgr.use_agent and not self.mgr.cache.host_metadata_up_to_date(d.hostname): + continue + correct_image = False + # check if the container digest for the digest we're upgrading to matches + # the container digest for the daemon if "use_repo_digest" setting is true + # or that the image name matches the daemon's image name if "use_repo_digest" + # is false. The idea is to generally check if the daemon is already using + # the image we're upgrading to or not. Additionally, since monitoring stack + # daemons are included in the upgrade process but don't use the ceph images + # we are assuming any monitoring stack daemon is on the "correct" image already + if ( + (self.mgr.use_repo_digest and d.matches_digests(target_digests)) + or (not self.mgr.use_repo_digest and d.matches_image_name(target_name)) + or (d.daemon_type in NON_CEPH_IMAGE_TYPES) + ): + logger.debug('daemon %s.%s on correct image' % ( + d.daemon_type, d.daemon_id)) + correct_image = True + # do deployed_by check using digest no matter what. We don't care + # what repo the image used to deploy the daemon was as long + # as the image content is correct + if any(d in target_digests for d in (d.deployed_by or [])): + logger.debug('daemon %s.%s deployed by correct version' % ( + d.daemon_type, d.daemon_id)) + done += 1 + continue + + if self.mgr.daemon_is_self(d.daemon_type, d.daemon_id): + logger.info('Upgrade: Need to upgrade myself (mgr.%s)' % + self.mgr.get_mgr_id()) + need_upgrade_self = True + continue + + if correct_image: + logger.debug('daemon %s.%s not deployed by correct version' % ( + d.daemon_type, d.daemon_id)) + need_upgrade_deployer.append((d, True)) + else: + logger.debug('daemon %s.%s not correct (%s, %s, %s)' % ( + d.daemon_type, d.daemon_id, + d.container_image_name, d.container_image_digests, d.version)) + need_upgrade.append((d, False)) + + return (need_upgrade_self, need_upgrade, need_upgrade_deployer, done) + + def _to_upgrade(self, need_upgrade: List[Tuple[DaemonDescription, bool]], target_image: str) -> Tuple[bool, List[Tuple[DaemonDescription, bool]]]: + to_upgrade: List[Tuple[DaemonDescription, bool]] = [] + known_ok_to_stop: List[str] = [] + for d_entry in need_upgrade: + d = d_entry[0] + assert d.daemon_type is not None + assert d.daemon_id is not None + assert d.hostname is not None + + if not d.container_image_id: + if d.container_image_name == target_image: + logger.debug( + 'daemon %s has unknown container_image_id but has correct image name' % (d.name())) + continue + + if known_ok_to_stop: + if d.name() in known_ok_to_stop: + logger.info(f'Upgrade: {d.name()} is also safe to restart') + to_upgrade.append(d_entry) + continue + + if d.daemon_type == 'osd': + # NOTE: known_ok_to_stop is an output argument for + # _wait_for_ok_to_stop + if not self._wait_for_ok_to_stop(d, known_ok_to_stop): + return False, to_upgrade + + if d.daemon_type == 'mon' and self._enough_mons_for_ok_to_stop(): + if not self._wait_for_ok_to_stop(d, known_ok_to_stop): + return False, to_upgrade + + if d.daemon_type == 'mds' and self._enough_mds_for_ok_to_stop(d): + # when fail_fs is set to true, all MDS daemons will be moved to + # up:standby state, so Cephadm won't be able to upgrade due to + # this check and and will warn with "It is NOT safe to stop + # mds.<daemon_name> at this time: one or more filesystems is + # currently degraded", therefore we bypass this check for that + # case. + assert self.upgrade_state is not None + if not self.upgrade_state.fail_fs \ + and not self._wait_for_ok_to_stop(d, known_ok_to_stop): + return False, to_upgrade + + to_upgrade.append(d_entry) + + # if we don't have a list of others to consider, stop now + if d.daemon_type in ['osd', 'mds', 'mon'] and not known_ok_to_stop: + break + return True, to_upgrade + + def _upgrade_daemons(self, to_upgrade: List[Tuple[DaemonDescription, bool]], target_image: str, target_digests: Optional[List[str]] = None) -> None: + assert self.upgrade_state is not None + num = 1 + if target_digests is None: + target_digests = [] + for d_entry in to_upgrade: + if self.upgrade_state.remaining_count is not None and self.upgrade_state.remaining_count <= 0 and not d_entry[1]: + self.mgr.log.info( + f'Hit upgrade limit of {self.upgrade_state.total_count}. Stopping upgrade') + return + d = d_entry[0] + assert d.daemon_type is not None + assert d.daemon_id is not None + assert d.hostname is not None + + # make sure host has latest container image + with self.mgr.async_timeout_handler(d.hostname, 'cephadm inspect-image'): + out, errs, code = self.mgr.wait_async(CephadmServe(self.mgr)._run_cephadm( + d.hostname, '', 'inspect-image', [], + image=target_image, no_fsid=True, error_ok=True)) + if code or not any(d in target_digests for d in json.loads(''.join(out)).get('repo_digests', [])): + logger.info('Upgrade: Pulling %s on %s' % (target_image, + d.hostname)) + self.upgrade_info_str = 'Pulling %s image on host %s' % ( + target_image, d.hostname) + with self.mgr.async_timeout_handler(d.hostname, 'cephadm pull'): + out, errs, code = self.mgr.wait_async(CephadmServe(self.mgr)._run_cephadm( + d.hostname, '', 'pull', [], + image=target_image, no_fsid=True, error_ok=True)) + if code: + self._fail_upgrade('UPGRADE_FAILED_PULL', { + 'severity': 'warning', + 'summary': 'Upgrade: failed to pull target image', + 'count': 1, + 'detail': [ + 'failed to pull %s on host %s' % (target_image, + d.hostname)], + }) + return + r = json.loads(''.join(out)) + if not any(d in target_digests for d in r.get('repo_digests', [])): + logger.info('Upgrade: image %s pull on %s got new digests %s (not %s), restarting' % ( + target_image, d.hostname, r['repo_digests'], target_digests)) + self.upgrade_info_str = 'Image %s pull on %s got new digests %s (not %s), restarting' % ( + target_image, d.hostname, r['repo_digests'], target_digests) + self.upgrade_state.target_digests = r['repo_digests'] + self._save_upgrade_state() + return + + self.upgrade_info_str = 'Currently upgrading %s daemons' % (d.daemon_type) + + if len(to_upgrade) > 1: + logger.info('Upgrade: Updating %s.%s (%d/%d)' % (d.daemon_type, d.daemon_id, num, min(len(to_upgrade), + self.upgrade_state.remaining_count if self.upgrade_state.remaining_count is not None else 9999999))) + else: + logger.info('Upgrade: Updating %s.%s' % + (d.daemon_type, d.daemon_id)) + action = 'Upgrading' if not d_entry[1] else 'Redeploying' + try: + daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(d) + self.mgr._daemon_action( + daemon_spec, + 'redeploy', + image=target_image if not d_entry[1] else None + ) + self.mgr.cache.metadata_up_to_date[d.hostname] = False + except Exception as e: + self._fail_upgrade('UPGRADE_REDEPLOY_DAEMON', { + 'severity': 'warning', + 'summary': f'{action} daemon {d.name()} on host {d.hostname} failed.', + 'count': 1, + 'detail': [ + f'Upgrade daemon: {d.name()}: {e}' + ], + }) + return + num += 1 + if self.upgrade_state.remaining_count is not None and not d_entry[1]: + self.upgrade_state.remaining_count -= 1 + self._save_upgrade_state() + + def _handle_need_upgrade_self(self, need_upgrade_self: bool, upgrading_mgrs: bool) -> None: + if need_upgrade_self: + try: + self.mgr.mgr_service.fail_over() + except OrchestratorError as e: + self._fail_upgrade('UPGRADE_NO_STANDBY_MGR', { + 'severity': 'warning', + 'summary': f'Upgrade: {e}', + 'count': 1, + 'detail': [ + 'The upgrade process needs to upgrade the mgr, ' + 'but it needs at least one standby to proceed.', + ], + }) + return + + return # unreachable code, as fail_over never returns + elif upgrading_mgrs: + if 'UPGRADE_NO_STANDBY_MGR' in self.mgr.health_checks: + del self.mgr.health_checks['UPGRADE_NO_STANDBY_MGR'] + self.mgr.set_health_checks(self.mgr.health_checks) + + def _set_container_images(self, daemon_type: str, target_image: str, image_settings: Dict[str, str]) -> None: + # push down configs + daemon_type_section = name_to_config_section(daemon_type) + if image_settings.get(daemon_type_section) != target_image: + logger.info('Upgrade: Setting container_image for all %s' % + daemon_type) + self.mgr.set_container_image(daemon_type_section, target_image) + to_clean = [] + for section in image_settings.keys(): + if section.startswith(name_to_config_section(daemon_type) + '.'): + to_clean.append(section) + if to_clean: + logger.debug('Upgrade: Cleaning up container_image for %s' % + to_clean) + for section in to_clean: + ret, image, err = self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'name': 'container_image', + 'who': section, + }) + + def _complete_osd_upgrade(self, target_major: str, target_major_name: str) -> None: + osdmap = self.mgr.get("osd_map") + osd_min_name = osdmap.get("require_osd_release", "argonaut") + osd_min = ceph_release_to_major(osd_min_name) + if osd_min < int(target_major): + logger.info( + f'Upgrade: Setting require_osd_release to {target_major} {target_major_name}') + ret, _, err = self.mgr.check_mon_command({ + 'prefix': 'osd require-osd-release', + 'release': target_major_name, + }) + + def _complete_mds_upgrade(self) -> None: + assert self.upgrade_state is not None + if self.upgrade_state.fail_fs: + for fs in self.mgr.get("fs_map")['filesystems']: + fs_name = fs['mdsmap']['fs_name'] + self.mgr.log.info('Upgrade: Setting filesystem ' + f'{fs_name} Joinable') + try: + ret, _, err = self.mgr.check_mon_command({ + 'prefix': 'fs set', + 'fs_name': fs_name, + 'var': 'joinable', + 'val': 'true', + }) + except Exception as e: + logger.error("Failed to set fs joinable " + f"true due to {e}") + raise OrchestratorError("Failed to set" + "fs joinable true" + f"due to {e}") + elif self.upgrade_state.fs_original_max_mds: + for fs in self.mgr.get("fs_map")['filesystems']: + fscid = fs["id"] + fs_name = fs['mdsmap']['fs_name'] + new_max = self.upgrade_state.fs_original_max_mds.get(fscid, 1) + if new_max > 1: + self.mgr.log.info('Upgrade: Scaling up filesystem %s max_mds to %d' % ( + fs_name, new_max + )) + ret, _, err = self.mgr.check_mon_command({ + 'prefix': 'fs set', + 'fs_name': fs_name, + 'var': 'max_mds', + 'val': str(new_max), + }) + + self.upgrade_state.fs_original_max_mds = {} + self._save_upgrade_state() + if self.upgrade_state.fs_original_allow_standby_replay: + for fs in self.mgr.get("fs_map")['filesystems']: + fscid = fs["id"] + fs_name = fs['mdsmap']['fs_name'] + asr = self.upgrade_state.fs_original_allow_standby_replay.get(fscid, False) + if asr: + self.mgr.log.info('Upgrade: Enabling allow_standby_replay on filesystem %s' % ( + fs_name + )) + ret, _, err = self.mgr.check_mon_command({ + 'prefix': 'fs set', + 'fs_name': fs_name, + 'var': 'allow_standby_replay', + 'val': '1' + }) + + self.upgrade_state.fs_original_allow_standby_replay = {} + self._save_upgrade_state() + + def _mark_upgrade_complete(self) -> None: + if not self.upgrade_state: + logger.debug('_mark_upgrade_complete upgrade already marked complete, exiting') + return + logger.info('Upgrade: Complete!') + if self.upgrade_state.progress_id: + self.mgr.remote('progress', 'complete', + self.upgrade_state.progress_id) + self.upgrade_state = None + self._save_upgrade_state() + + def _do_upgrade(self): + # type: () -> None + if not self.upgrade_state: + logger.debug('_do_upgrade no state, exiting') + return + + if self.mgr.offline_hosts: + # offline host(s), on top of potential connection errors when trying to upgrade a daemon + # or pull an image, can cause issues where daemons are never ok to stop. Since evaluating + # whether or not that risk is present for any given offline hosts is a difficult problem, + # it's best to just fail upgrade cleanly so user can address the offline host(s) + + # the HostConnectionError expects a hostname and addr, so let's just take + # one at random. It doesn't really matter which host we say we couldn't reach here. + hostname: str = list(self.mgr.offline_hosts)[0] + addr: str = self.mgr.inventory.get_addr(hostname) + raise HostConnectionError(f'Host(s) were marked offline: {self.mgr.offline_hosts}', hostname, addr) + + target_image = self.target_image + target_id = self.upgrade_state.target_id + target_digests = self.upgrade_state.target_digests + target_version = self.upgrade_state.target_version + + first = False + if not target_id or not target_version or not target_digests: + # need to learn the container hash + logger.info('Upgrade: First pull of %s' % target_image) + self.upgrade_info_str = 'Doing first pull of %s image' % (target_image) + try: + with self.mgr.async_timeout_handler(f'cephadm inspect-image (image {target_image})'): + target_id, target_version, target_digests = self.mgr.wait_async( + CephadmServe(self.mgr)._get_container_image_info(target_image)) + except OrchestratorError as e: + self._fail_upgrade('UPGRADE_FAILED_PULL', { + 'severity': 'warning', + 'summary': 'Upgrade: failed to pull target image', + 'count': 1, + 'detail': [str(e)], + }) + return + if not target_version: + self._fail_upgrade('UPGRADE_FAILED_PULL', { + 'severity': 'warning', + 'summary': 'Upgrade: failed to pull target image', + 'count': 1, + 'detail': ['unable to extract ceph version from container'], + }) + return + self.upgrade_state.target_id = target_id + # extract the version portion of 'ceph version {version} ({sha1})' + self.upgrade_state.target_version = target_version.split(' ')[2] + self.upgrade_state.target_digests = target_digests + self._save_upgrade_state() + target_image = self.target_image + first = True + + if target_digests is None: + target_digests = [] + if target_version.startswith('ceph version '): + # tolerate/fix upgrade state from older version + self.upgrade_state.target_version = target_version.split(' ')[2] + target_version = self.upgrade_state.target_version + (target_major, _) = target_version.split('.', 1) + target_major_name = self.mgr.lookup_release_name(int(target_major)) + + if first: + logger.info('Upgrade: Target is version %s (%s)' % ( + target_version, target_major_name)) + logger.info('Upgrade: Target container is %s, digests %s' % ( + target_image, target_digests)) + + version_error = self._check_target_version(target_version) + if version_error: + self._fail_upgrade('UPGRADE_BAD_TARGET_VERSION', { + 'severity': 'error', + 'summary': f'Upgrade: cannot upgrade/downgrade to {target_version}', + 'count': 1, + 'detail': [version_error], + }) + return + + image_settings = self.get_distinct_container_image_settings() + + # Older monitors (pre-v16.2.5) asserted that FSMap::compat == + # MDSMap::compat for all fs. This is no longer the case beginning in + # v16.2.5. We must disable the sanity checks during upgrade. + # N.B.: we don't bother confirming the operator has not already + # disabled this or saving the config value. + self.mgr.check_mon_command({ + 'prefix': 'config set', + 'name': 'mon_mds_skip_sanity', + 'value': '1', + 'who': 'mon', + }) + + if self.upgrade_state.daemon_types is not None: + logger.debug( + f'Filtering daemons to upgrade by daemon types: {self.upgrade_state.daemon_types}') + daemons = [d for d in self.mgr.cache.get_daemons( + ) if d.daemon_type in self.upgrade_state.daemon_types] + elif self.upgrade_state.services is not None: + logger.debug( + f'Filtering daemons to upgrade by services: {self.upgrade_state.daemon_types}') + daemons = [] + for service in self.upgrade_state.services: + daemons += self.mgr.cache.get_daemons_by_service(service) + else: + daemons = [d for d in self.mgr.cache.get_daemons( + ) if d.daemon_type in CEPH_UPGRADE_ORDER] + if self.upgrade_state.hosts is not None: + logger.debug(f'Filtering daemons to upgrade by hosts: {self.upgrade_state.hosts}') + daemons = [d for d in daemons if d.hostname in self.upgrade_state.hosts] + upgraded_daemon_count: int = 0 + for daemon_type in CEPH_UPGRADE_ORDER: + if self.upgrade_state.remaining_count is not None and self.upgrade_state.remaining_count <= 0: + # we hit our limit and should end the upgrade + # except for cases where we only need to redeploy, but not actually upgrade + # the image (which we don't count towards our limit). This case only occurs with mgr + # and monitoring stack daemons. Additionally, this case is only valid if + # the active mgr is already upgraded. + if any(d in target_digests for d in self.mgr.get_active_mgr_digests()): + if daemon_type not in NON_CEPH_IMAGE_TYPES and daemon_type != 'mgr': + continue + else: + self._mark_upgrade_complete() + return + logger.debug('Upgrade: Checking %s daemons' % daemon_type) + daemons_of_type = [d for d in daemons if d.daemon_type == daemon_type] + + need_upgrade_self, need_upgrade, need_upgrade_deployer, done = self._detect_need_upgrade( + daemons_of_type, target_digests, target_image) + upgraded_daemon_count += done + self._update_upgrade_progress(upgraded_daemon_count / len(daemons)) + + # make sure mgr and non-ceph-image daemons are properly redeployed in staggered upgrade scenarios + if daemon_type == 'mgr' or daemon_type in NON_CEPH_IMAGE_TYPES: + if any(d in target_digests for d in self.mgr.get_active_mgr_digests()): + need_upgrade_names = [d[0].name() for d in need_upgrade] + \ + [d[0].name() for d in need_upgrade_deployer] + dds = [d for d in self.mgr.cache.get_daemons_by_type( + daemon_type) if d.name() not in need_upgrade_names] + need_upgrade_active, n1, n2, __ = self._detect_need_upgrade(dds, target_digests, target_image) + if not n1: + if not need_upgrade_self and need_upgrade_active: + need_upgrade_self = True + need_upgrade_deployer += n2 + else: + # no point in trying to redeploy with new version if active mgr is not on the new version + need_upgrade_deployer = [] + + if any(d in target_digests for d in self.mgr.get_active_mgr_digests()): + # only after the mgr itself is upgraded can we expect daemons to have + # deployed_by == target_digests + need_upgrade += need_upgrade_deployer + + # prepare filesystems for daemon upgrades? + if ( + daemon_type == 'mds' + and need_upgrade + and not self._prepare_for_mds_upgrade(target_major, [d_entry[0] for d_entry in need_upgrade]) + ): + return + + if need_upgrade: + self.upgrade_info_str = 'Currently upgrading %s daemons' % (daemon_type) + + _continue, to_upgrade = self._to_upgrade(need_upgrade, target_image) + if not _continue: + return + self._upgrade_daemons(to_upgrade, target_image, target_digests) + if to_upgrade: + return + + self._handle_need_upgrade_self(need_upgrade_self, daemon_type == 'mgr') + + # following bits of _do_upgrade are for completing upgrade for given + # types. If we haven't actually finished upgrading all the daemons + # of this type, we should exit the loop here + _, n1, n2, _ = self._detect_need_upgrade( + self.mgr.cache.get_daemons_by_type(daemon_type), target_digests, target_image) + if n1 or n2: + continue + + # complete mon upgrade? + if daemon_type == 'mon': + if not self.mgr.get("have_local_config_map"): + logger.info('Upgrade: Restarting mgr now that mons are running pacific') + need_upgrade_self = True + + self._handle_need_upgrade_self(need_upgrade_self, daemon_type == 'mgr') + + # make sure 'ceph versions' agrees + ret, out_ver, err = self.mgr.check_mon_command({ + 'prefix': 'versions', + }) + j = json.loads(out_ver) + for version, count in j.get(daemon_type, {}).items(): + short_version = version.split(' ')[2] + if short_version != target_version: + logger.warning( + 'Upgrade: %d %s daemon(s) are %s != target %s' % + (count, daemon_type, short_version, target_version)) + + self._set_container_images(daemon_type, target_image, image_settings) + + # complete osd upgrade? + if daemon_type == 'osd': + self._complete_osd_upgrade(target_major, target_major_name) + + # complete mds upgrade? + if daemon_type == 'mds': + self._complete_mds_upgrade() + + # Make sure all metadata is up to date before saying we are done upgrading this daemon type + if self.mgr.use_agent and not self.mgr.cache.all_host_metadata_up_to_date(): + self.mgr.agent_helpers._request_ack_all_not_up_to_date() + return + + logger.debug('Upgrade: Upgraded %s daemon(s).' % daemon_type) + + # clean up + logger.info('Upgrade: Finalizing container_image settings') + self.mgr.set_container_image('global', target_image) + + for daemon_type in CEPH_UPGRADE_ORDER: + ret, image, err = self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'name': 'container_image', + 'who': name_to_config_section(daemon_type), + }) + + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'name': 'mon_mds_skip_sanity', + 'who': 'mon', + }) + + self._mark_upgrade_complete() + return diff --git a/src/pybind/mgr/cephadm/utils.py b/src/pybind/mgr/cephadm/utils.py new file mode 100644 index 000000000..63672936c --- /dev/null +++ b/src/pybind/mgr/cephadm/utils.py @@ -0,0 +1,153 @@ +import logging +import json +import socket +from enum import Enum +from functools import wraps +from typing import Optional, Callable, TypeVar, List, NewType, TYPE_CHECKING, Any, NamedTuple +from orchestrator import OrchestratorError + +if TYPE_CHECKING: + from cephadm import CephadmOrchestrator + +T = TypeVar('T') +logger = logging.getLogger(__name__) + +ConfEntity = NewType('ConfEntity', str) + + +class CephadmNoImage(Enum): + token = 1 + + +# ceph daemon types that use the ceph container image. +# NOTE: order important here as these are used for upgrade order +CEPH_TYPES = ['mgr', 'mon', 'crash', 'osd', 'mds', 'rgw', + 'rbd-mirror', 'cephfs-mirror', 'ceph-exporter'] +GATEWAY_TYPES = ['iscsi', 'nfs', 'nvmeof'] +MONITORING_STACK_TYPES = ['node-exporter', 'prometheus', + 'alertmanager', 'grafana', 'loki', 'promtail'] +RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES = ['haproxy', 'nfs'] + +CEPH_UPGRADE_ORDER = CEPH_TYPES + GATEWAY_TYPES + MONITORING_STACK_TYPES + +# these daemon types use the ceph container image +CEPH_IMAGE_TYPES = CEPH_TYPES + ['iscsi', 'nfs'] + +# these daemons do not use the ceph image. There are other daemons +# that also don't use the ceph image, but we only care about those +# that are part of the upgrade order here +NON_CEPH_IMAGE_TYPES = MONITORING_STACK_TYPES + ['nvmeof'] + +# Used for _run_cephadm used for check-host etc that don't require an --image parameter +cephadmNoImage = CephadmNoImage.token + + +class ContainerInspectInfo(NamedTuple): + image_id: str + ceph_version: Optional[str] + repo_digests: Optional[List[str]] + + +class SpecialHostLabels(str, Enum): + ADMIN: str = '_admin' + NO_MEMORY_AUTOTUNE: str = '_no_autotune_memory' + DRAIN_DAEMONS: str = '_no_schedule' + DRAIN_CONF_KEYRING: str = '_no_conf_keyring' + + def to_json(self) -> str: + return self.value + + +def name_to_config_section(name: str) -> ConfEntity: + """ + Map from daemon names to ceph entity names (as seen in config) + """ + daemon_type = name.split('.', 1)[0] + if daemon_type in ['rgw', 'rbd-mirror', 'nfs', 'crash', 'iscsi', 'ceph-exporter', 'nvmeof']: + return ConfEntity('client.' + name) + elif daemon_type in ['mon', 'osd', 'mds', 'mgr', 'client']: + return ConfEntity(name) + else: + return ConfEntity('mon') + + +def forall_hosts(f: Callable[..., T]) -> Callable[..., List[T]]: + @wraps(f) + def forall_hosts_wrapper(*args: Any) -> List[T]: + from cephadm.module import CephadmOrchestrator + + # Some weird logic to make calling functions with multiple arguments work. + if len(args) == 1: + vals = args[0] + self = None + elif len(args) == 2: + self, vals = args + else: + assert 'either f([...]) or self.f([...])' + + def do_work(arg: Any) -> T: + if not isinstance(arg, tuple): + arg = (arg, ) + try: + if self: + return f(self, *arg) + return f(*arg) + except Exception: + logger.exception(f'executing {f.__name__}({args}) failed.') + raise + + assert CephadmOrchestrator.instance is not None + return CephadmOrchestrator.instance._worker_pool.map(do_work, vals) + + return forall_hosts_wrapper + + +def get_cluster_health(mgr: 'CephadmOrchestrator') -> str: + # check cluster health + ret, out, err = mgr.check_mon_command({ + 'prefix': 'health', + 'format': 'json', + }) + try: + j = json.loads(out) + except ValueError: + msg = 'Failed to parse health status: Cannot decode JSON' + logger.exception('%s: \'%s\'' % (msg, out)) + raise OrchestratorError('failed to parse health status') + + return j['status'] + + +def is_repo_digest(image_name: str) -> bool: + """ + repo digest are something like "ceph/ceph@sha256:blablabla" + """ + return '@' in image_name + + +def resolve_ip(hostname: str) -> str: + try: + r = socket.getaddrinfo(hostname, None, flags=socket.AI_CANONNAME, + type=socket.SOCK_STREAM) + # pick first v4 IP, if present + for a in r: + if a[0] == socket.AF_INET: + return a[4][0] + return r[0][4][0] + except socket.gaierror as e: + raise OrchestratorError(f"Cannot resolve ip for host {hostname}: {e}") + + +def ceph_release_to_major(release: str) -> int: + return ord(release[0]) - ord('a') + 1 + + +def file_mode_to_str(mode: int) -> str: + r = '' + for shift in range(0, 9, 3): + r = ( + f'{"r" if (mode >> shift) & 4 else "-"}' + f'{"w" if (mode >> shift) & 2 else "-"}' + f'{"x" if (mode >> shift) & 1 else "-"}' + ) + r + return r diff --git a/src/pybind/mgr/cephadm/vagrant.config.example.json b/src/pybind/mgr/cephadm/vagrant.config.example.json new file mode 100644 index 000000000..9419af630 --- /dev/null +++ b/src/pybind/mgr/cephadm/vagrant.config.example.json @@ -0,0 +1,13 @@ +/** + * To use a permanent config copy this file to "vagrant.config.json", + * edit it and remove this comment because comments are not allowed + * in a valid JSON file. + */ + +{ + "mgrs": 1, + "mons": 1, + "osds": 1, + "disks": 2 +} + |