diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-23 16:45:17 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-23 16:45:44 +0000 |
commit | 17d6a993fc17d533460c5f40f3908c708e057c18 (patch) | |
tree | 1a3bd93e0ecd74fa02f93a528fe2f87e5314c4b5 /src/pybind/mgr/cephadm/agent.py | |
parent | Releasing progress-linux version 18.2.2-0progress7.99u1. (diff) | |
download | ceph-17d6a993fc17d533460c5f40f3908c708e057c18.tar.xz ceph-17d6a993fc17d533460c5f40f3908c708e057c18.zip |
Merging upstream version 18.2.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/pybind/mgr/cephadm/agent.py | 565 |
1 files changed, 551 insertions, 14 deletions
diff --git a/src/pybind/mgr/cephadm/agent.py b/src/pybind/mgr/cephadm/agent.py index 93a08cb34..9e71477d4 100644 --- a/src/pybind/mgr/cephadm/agent.py +++ b/src/pybind/mgr/cephadm/agent.py @@ -16,14 +16,15 @@ import time from orchestrator import DaemonDescriptionStatus from orchestrator._interface import daemon_type_to_service -from ceph.utils import datetime_now +from ceph.utils import datetime_now, http_req from ceph.deployment.inventory import Devices from ceph.deployment.service_spec import ServiceSpec, PlacementSpec from cephadm.services.cephadmservice import CephadmDaemonDeploySpec from cephadm.ssl_cert_utils import SSLCerts from mgr_util import test_port_allocation, PortAlreadyInUse -from typing import Any, Dict, List, Set, TYPE_CHECKING, Optional +from urllib.error import HTTPError, URLError +from typing import Any, Dict, List, Set, TYPE_CHECKING, Optional, MutableMapping if TYPE_CHECKING: from cephadm.module import CephadmOrchestrator @@ -53,11 +54,10 @@ class AgentEndpoint: self.server_addr = self.mgr.get_mgr_ip() def configure_routes(self) -> None: - d = cherrypy.dispatch.RoutesDispatcher() - d.connect(name='host-data', route='/data/', - controller=self.host_data.POST, - conditions=dict(method=['POST'])) - cherrypy.tree.mount(None, '/', config={'/': {'request.dispatch': d}}) + conf = {'/': {'tools.trailing_slash.on': False}} + + cherrypy.tree.mount(self.host_data, '/data', config=conf) + cherrypy.tree.mount(self.node_proxy_endpoint, '/node-proxy', config=conf) def configure_tls(self, server: Server) -> None: old_cert = self.mgr.get_store(self.KV_STORE_AGENT_ROOT_CERT) @@ -88,10 +88,540 @@ class AgentEndpoint: def configure(self) -> None: self.host_data = HostData(self.mgr, self.server_port, self.server_addr) self.configure_tls(self.host_data) + self.node_proxy_endpoint = NodeProxyEndpoint(self.mgr) self.configure_routes() self.find_free_port() +class NodeProxyEndpoint: + def __init__(self, mgr: "CephadmOrchestrator"): + self.mgr = mgr + self.ssl_root_crt = self.mgr.http_server.agent.ssl_certs.get_root_cert() + self.ssl_ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + self.ssl_ctx.check_hostname = False + self.ssl_ctx.verify_mode = ssl.CERT_NONE + # self.ssl_ctx = ssl.create_default_context() + # self.ssl_ctx.check_hostname = True + # self.ssl_ctx.verify_mode = ssl.CERT_REQUIRED + # self.ssl_ctx.load_verify_locations(cadata=self.ssl_root_crt) + self.redfish_token: str = '' + self.redfish_session_location: str = '' + + def _cp_dispatch(self, vpath: List[str]) -> "NodeProxyEndpoint": + if len(vpath) > 1: # /{hostname}/<endpoint> + hostname = vpath.pop(0) # /<endpoint> + cherrypy.request.params['hostname'] = hostname + # /{hostname}/led/{type}/{drive} eg: /{hostname}/led/chassis or /{hostname}/led/drive/{id} + if vpath[0] == 'led' and len(vpath) > 1: # /led/{type}/{id} + _type = vpath[1] + cherrypy.request.params['type'] = _type + vpath.pop(1) # /led/{id} or # /led + if _type == 'drive' and len(vpath) > 1: # /led/{id} + _id = vpath[1] + vpath.pop(1) # /led + cherrypy.request.params['id'] = _id + # /<endpoint> + return self + + @cherrypy.expose + @cherrypy.tools.allow(methods=['POST']) + @cherrypy.tools.json_in() + @cherrypy.tools.json_out() + def oob(self) -> Dict[str, Any]: + """ + Get the out-of-band management tool details for a given host. + + :return: oob details. + :rtype: dict + """ + data: Dict[str, Any] = cherrypy.request.json + results: Dict[str, Any] = {} + + self.validate_node_proxy_data(data) + + # expecting name to be "node-proxy.<hostname>" + hostname = data['cephx']['name'][11:] + results['result'] = self.mgr.node_proxy_cache.oob.get(hostname, '') + if not results['result']: + raise cherrypy.HTTPError(400, 'The provided host has no iDrac details.') + return results + + def validate_node_proxy_data(self, data: Dict[str, Any]) -> None: + """ + Validate received data. + + :param data: data to validate. + :type data: dict + + :raises cherrypy.HTTPError 400: If the data is not valid (missing fields) + :raises cherrypy.HTTPError 403: If the secret provided is wrong. + """ + cherrypy.response.status = 200 + try: + if 'cephx' not in data.keys(): + raise cherrypy.HTTPError(400, 'The field \'cephx\' must be provided.') + elif 'name' not in data['cephx'].keys(): + cherrypy.response.status = 400 + raise cherrypy.HTTPError(400, 'The field \'name\' must be provided.') + # expecting name to be "node-proxy.<hostname>" + hostname = data['cephx']['name'][11:] + if 'secret' not in data['cephx'].keys(): + raise cherrypy.HTTPError(400, 'The node-proxy keyring must be provided.') + elif not self.mgr.node_proxy_cache.keyrings.get(hostname, ''): + raise cherrypy.HTTPError(502, f'Make sure the node-proxy is running on {hostname}') + elif data['cephx']['secret'] != self.mgr.node_proxy_cache.keyrings[hostname]: + raise cherrypy.HTTPError(403, f'Got wrong keyring from agent on host {hostname}.') + except AttributeError: + raise cherrypy.HTTPError(400, 'Malformed data received.') + + # TODO(guits): refactor this + # TODO(guits): use self.node_proxy.get_critical_from_host() ? + def get_nok_members(self, + data: Dict[str, Any]) -> List[Dict[str, str]]: + """ + Retrieves members whose status is not 'ok'. + + :param data: Data containing information about members. + :type data: dict + + :return: A list containing dictionaries of members whose status is not 'ok'. + :rtype: List[Dict[str, str]] + + :return: None + :rtype: None + """ + nok_members: List[Dict[str, str]] = [] + + for member in data.keys(): + _status = data[member]['status']['health'].lower() + if _status.lower() != 'ok': + state = data[member]['status']['state'] + _member = dict( + member=member, + status=_status, + state=state + ) + nok_members.append(_member) + + return nok_members + + def raise_alert(self, data: Dict[str, Any]) -> None: + """ + Raises hardware alerts based on the provided patch status. + + :param data: Data containing patch status information. + :type data: dict + + This function iterates through the provided status + information to raise hardware alerts. + For each component in the provided data, it removes any + existing health warnings associated with it and checks + for non-okay members using the `get_nok_members` method. + If non-okay members are found, it sets a new health + warning for that component and generates a report detailing + the non-okay members' statuses. + + Note: This function relies on the `get_nok_members` method to + identify non-okay members. + + :return: None + :rtype: None + """ + + for component in data['patch']['status'].keys(): + alert_name = f"HARDWARE_{component.upper()}" + self.mgr.remove_health_warning(alert_name) + nok_members = self.get_nok_members(data['patch']['status'][component]) + + if nok_members: + count = len(nok_members) + self.mgr.set_health_warning( + alert_name, + summary=f'{count} {component} member{"s" if count > 1 else ""} {"are" if count > 1 else "is"} not ok', + count=count, + detail=[f"{member['member']} is {member['status']}: {member['state']}" for member in nok_members], + ) + + @cherrypy.expose + @cherrypy.tools.allow(methods=['POST']) + @cherrypy.tools.json_in() + @cherrypy.tools.json_out() + def data(self) -> None: + """ + Handles incoming data via a POST request. + + This function is exposed to handle POST requests and expects incoming + JSON data. It processes the incoming data by first validating it + through the `validate_node_proxy_data` method. Subsequently, it + extracts the hostname from the data and saves the information + using `mgr.node_proxy.save`. Finally, it raises alerts based on the + provided status through the `raise_alert` method. + + :return: None + :rtype: None + """ + data: Dict[str, Any] = cherrypy.request.json + self.validate_node_proxy_data(data) + if 'patch' not in data.keys(): + raise cherrypy.HTTPError(400, 'Malformed data received.') + host = data['cephx']['name'][11:] + self.mgr.node_proxy_cache.save(host, data['patch']) + self.raise_alert(data) + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET', 'PATCH']) + @cherrypy.tools.json_in() + @cherrypy.tools.json_out() + def led(self, **kw: Any) -> Dict[str, Any]: + """ + Handles enclosure LED operations for the specified hostname. + + This function handles GET and PATCH requests related to LED status for a + specific hostname. It identifies the request method and provided hostname. + If the hostname is missing, it logs an error and returns an error message. + + For PATCH requests, it prepares authorization headers based on the + provided ID and password, encodes them, and constructs the authorization + header. + + After processing, it queries the endpoint and returns the result. + + :param kw: Keyword arguments including 'hostname'. + :type kw: dict + + :return: Result of the LED-related operation. + :rtype: dict[str, Any] + """ + method: str = cherrypy.request.method + header: MutableMapping[str, str] = {} + hostname: Optional[str] = kw.get('hostname') + led_type: Optional[str] = kw.get('type') + id_drive: Optional[str] = kw.get('id') + payload: Optional[Dict[str, str]] = None + endpoint: List[Any] = ['led', led_type] + device: str = id_drive if id_drive else '' + + ssl_root_crt = self.mgr.http_server.agent.ssl_certs.get_root_cert() + ssl_ctx = ssl.create_default_context() + ssl_ctx.check_hostname = True + ssl_ctx.verify_mode = ssl.CERT_REQUIRED + ssl_ctx.load_verify_locations(cadata=ssl_root_crt) + + if not hostname: + msg: str = "listing enclosure LED status for all nodes is not implemented." + self.mgr.log.debug(msg) + raise cherrypy.HTTPError(501, msg) + + if not led_type: + msg = "the led type must be provided (either 'chassis' or 'drive')." + self.mgr.log.debug(msg) + raise cherrypy.HTTPError(400, msg) + + if led_type == 'drive' and not id_drive: + msg = "the id of the drive must be provided when type is 'drive'." + self.mgr.log.debug(msg) + raise cherrypy.HTTPError(400, msg) + + if led_type == 'drive': + endpoint.append(device) + + if hostname not in self.mgr.node_proxy_cache.data.keys(): + # TODO(guits): update unit test for this + msg = f"'{hostname}' not found." + self.mgr.log.debug(msg) + raise cherrypy.HTTPError(400, msg) + + addr: str = self.mgr.inventory.get_addr(hostname) + + if method == 'PATCH': + # TODO(guits): need to check the request is authorized + # allowing a specific keyring only ? (client.admin or client.agent.. ?) + data: Dict[str, Any] = cherrypy.request.json + if 'state' not in data.keys(): + msg = "'state' key not provided." + raise cherrypy.HTTPError(400, msg) + if 'keyring' not in data.keys(): + msg = "'keyring' key must be provided." + raise cherrypy.HTTPError(400, msg) + if data['keyring'] != self.mgr.node_proxy_cache.keyrings.get(hostname): + msg = 'wrong keyring provided.' + raise cherrypy.HTTPError(401, msg) + payload = {} + payload['state'] = data['state'] + + if led_type == 'drive': + if id_drive not in self.mgr.node_proxy_cache.data[hostname]['status']['storage'].keys(): + # TODO(guits): update unit test for this + msg = f"'{id_drive}' not found." + self.mgr.log.debug(msg) + raise cherrypy.HTTPError(400, msg) + + endpoint = f'/{"/".join(endpoint)}' + header = self.mgr.node_proxy.generate_auth_header(hostname) + + try: + headers, result, status = http_req(hostname=addr, + port='9456', + headers=header, + method=method, + data=json.dumps(payload), + endpoint=endpoint, + ssl_ctx=ssl_ctx) + response_json = json.loads(result) + except HTTPError as e: + self.mgr.log.debug(e) + except URLError: + raise cherrypy.HTTPError(502, f'Make sure the node-proxy agent is deployed and running on {hostname}') + + return response_json + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def fullreport(self, **kw: Any) -> Dict[str, Any]: + """ + Handles GET request to retrieve a full report. + + This function is exposed to handle GET requests and retrieves a comprehensive + report using the 'fullreport' method from the NodeProxyCache class. + + :param kw: Keyword arguments for the request. + :type kw: dict + + :return: The full report data. + :rtype: dict[str, Any] + + :raises cherrypy.HTTPError 404: If the passed hostname is not found. + """ + try: + results = self.mgr.node_proxy_cache.fullreport(**kw) + except KeyError: + raise cherrypy.HTTPError(404, f"{kw.get('hostname')} not found.") + return results + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def criticals(self, **kw: Any) -> Dict[str, Any]: + """ + Handles GET request to retrieve critical information. + + This function is exposed to handle GET requests and fetches critical data + using the 'criticals' method from the NodeProxyCache class. + + :param kw: Keyword arguments for the request. + :type kw: dict + + :return: Critical information data. + :rtype: dict[str, Any] + + :raises cherrypy.HTTPError 404: If the passed hostname is not found. + """ + try: + results = self.mgr.node_proxy_cache.criticals(**kw) + except KeyError: + raise cherrypy.HTTPError(404, f"{kw.get('hostname')} not found.") + return results + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def summary(self, **kw: Any) -> Dict[str, Any]: + """ + Handles GET request to retrieve summary information. + + This function is exposed to handle GET requests and fetches summary + data using the 'summary' method from the NodeProxyCache class. + + :param kw: Keyword arguments for the request. + :type kw: dict + + :return: Summary information data. + :rtype: dict[str, Any] + + :raises cherrypy.HTTPError 404: If the passed hostname is not found. + """ + try: + results = self.mgr.node_proxy_cache.summary(**kw) + except KeyError: + raise cherrypy.HTTPError(404, f"{kw.get('hostname')} not found.") + return results + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def memory(self, **kw: Any) -> Dict[str, Any]: + """ + Handles GET request to retrieve specific information. + + This function is exposed to handle GET requests + and fetch specific data using the 'common' method + from the NodeProxyCache class with. + + :param kw: Keyword arguments for the request. + :type kw: dict + + :return: Specific information data. + :rtype: dict[str, Any] + + :raises cherrypy.HTTPError 404: If the passed hostname is not found. + """ + try: + results = self.mgr.node_proxy_cache.common('memory', **kw) + except KeyError: + raise cherrypy.HTTPError(404, f"{kw.get('hostname')} not found.") + return results + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def network(self, **kw: Any) -> Dict[str, Any]: + """ + Handles GET request to retrieve specific information. + + This function is exposed to handle GET requests + and fetch specific data using the 'common' method + from the NodeProxyCache class with. + + :param kw: Keyword arguments for the request. + :type kw: dict + + :return: Specific information data. + :rtype: dict[str, Any] + + :raises cherrypy.HTTPError 404: If the passed hostname is not found. + """ + try: + results = self.mgr.node_proxy_cache.common('network', **kw) + except KeyError: + raise cherrypy.HTTPError(404, f"{kw.get('hostname')} not found.") + return results + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def processors(self, **kw: Any) -> Dict[str, Any]: + """ + Handles GET request to retrieve specific information. + + This function is exposed to handle GET requests + and fetch specific data using the 'common' method + from the NodeProxyCache class with. + + :param kw: Keyword arguments for the request. + :type kw: dict + + :return: Specific information data. + :rtype: dict[str, Any] + + :raises cherrypy.HTTPError 404: If the passed hostname is not found. + """ + try: + results = self.mgr.node_proxy_cache.common('processors', **kw) + except KeyError: + raise cherrypy.HTTPError(404, f"{kw.get('hostname')} not found.") + return results + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def storage(self, **kw: Any) -> Dict[str, Any]: + """ + Handles GET request to retrieve specific information. + + This function is exposed to handle GET requests + and fetch specific data using the 'common' method + from the NodeProxyCache class with. + + :param kw: Keyword arguments for the request. + :type kw: dict + + :return: Specific information data. + :rtype: dict[str, Any] + + :raises cherrypy.HTTPError 404: If the passed hostname is not found. + """ + try: + results = self.mgr.node_proxy_cache.common('storage', **kw) + except KeyError: + raise cherrypy.HTTPError(404, f"{kw.get('hostname')} not found.") + return results + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def power(self, **kw: Any) -> Dict[str, Any]: + """ + Handles GET request to retrieve specific information. + + This function is exposed to handle GET requests + and fetch specific data using the 'common' method + from the NodeProxyCache class with. + + :param kw: Keyword arguments for the request. + :type kw: dict + + :return: Specific information data. + :rtype: dict[str, Any] + + :raises cherrypy.HTTPError 404: If the passed hostname is not found. + """ + try: + results = self.mgr.node_proxy_cache.common('power', **kw) + except KeyError: + raise cherrypy.HTTPError(404, f"{kw.get('hostname')} not found.") + return results + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def fans(self, **kw: Any) -> Dict[str, Any]: + """ + Handles GET request to retrieve specific information. + + This function is exposed to handle GET requests + and fetch specific data using the 'common' method + from the NodeProxyCache class with. + + :param kw: Keyword arguments for the request. + :type kw: dict + + :return: Specific information data. + :rtype: dict[str, Any] + + :raises cherrypy.HTTPError 404: If the passed hostname is not found. + """ + try: + results = self.mgr.node_proxy_cache.common('fans', **kw) + except KeyError: + raise cherrypy.HTTPError(404, f"{kw.get('hostname')} not found.") + return results + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def firmwares(self, **kw: Any) -> Dict[str, Any]: + """ + Handles GET request to retrieve firmware information. + + This function is exposed to handle GET requests and fetches firmware data using + the 'firmwares' method from the NodeProxyCache class. + + :param kw: Keyword arguments for the request. + :type kw: dict + + :return: Firmware information data. + :rtype: dict[str, Any] + + :raises cherrypy.HTTPError 404: If the passed hostname is not found. + """ + try: + results = self.mgr.node_proxy_cache.firmwares(**kw) + except KeyError: + raise cherrypy.HTTPError(404, f"{kw.get('hostname')} not found.") + return results + + class HostData(Server): exposed = True @@ -109,9 +639,11 @@ class HostData(Server): self.unsubscribe() super().stop() + @cherrypy.tools.allow(methods=['POST']) @cherrypy.tools.json_in() @cherrypy.tools.json_out() - def POST(self) -> Dict[str, Any]: + @cherrypy.expose + def index(self) -> Dict[str, Any]: data: Dict[str, Any] = cherrypy.request.json results: Dict[str, Any] = {} try: @@ -234,6 +766,7 @@ class AgentMessageThread(threading.Thread): self.port = port self.data: str = json.dumps(data) self.daemon_spec: Optional[CephadmDaemonDeploySpec] = daemon_spec + self.agent_response: str = '' super().__init__(target=self.run) def run(self) -> None: @@ -286,8 +819,8 @@ class AgentMessageThread(threading.Thread): secure_agent_socket.connect((self.addr, self.port)) msg = (bytes_len + self.data) secure_agent_socket.sendall(msg.encode('utf-8')) - agent_response = secure_agent_socket.recv(1024).decode() - self.mgr.log.debug(f'Received "{agent_response}" from agent on host {self.host}') + self.agent_response = secure_agent_socket.recv(1024).decode() + self.mgr.log.debug(f'Received "{self.agent_response}" from agent on host {self.host}') if self.daemon_spec: self.mgr.agent_cache.agent_config_successfully_delivered(self.daemon_spec) self.mgr.agent_cache.sending_agent_message[self.host] = False @@ -307,6 +840,9 @@ class AgentMessageThread(threading.Thread): self.mgr.agent_cache.sending_agent_message[self.host] = False return + def get_agent_response(self) -> str: + return self.agent_response + class CephadmAgentHelpers: def __init__(self, mgr: "CephadmOrchestrator"): @@ -403,10 +939,11 @@ class CephadmAgentHelpers: if 'agent' in self.mgr.spec_store: self.mgr.spec_store.rm('agent') need_apply = True - self.mgr.agent_cache.agent_counter = {} - self.mgr.agent_cache.agent_timestamp = {} - self.mgr.agent_cache.agent_keys = {} - self.mgr.agent_cache.agent_ports = {} + if not self.mgr.cache.get_daemons_by_service('agent'): + self.mgr.agent_cache.agent_counter = {} + self.mgr.agent_cache.agent_timestamp = {} + self.mgr.agent_cache.agent_keys = {} + self.mgr.agent_cache.agent_ports = {} return need_apply def _check_agent(self, host: str) -> bool: |