summaryrefslogtreecommitdiffstats
path: root/src/pybind/mgr/cephadm/services
diff options
context:
space:
mode:
Diffstat (limited to 'src/pybind/mgr/cephadm/services')
-rw-r--r--src/pybind/mgr/cephadm/services/cephadmservice.py2
-rw-r--r--src/pybind/mgr/cephadm/services/ingress.py1
-rw-r--r--src/pybind/mgr/cephadm/services/jaeger.py3
-rw-r--r--src/pybind/mgr/cephadm/services/monitoring.py30
-rw-r--r--src/pybind/mgr/cephadm/services/nfs.py20
-rw-r--r--src/pybind/mgr/cephadm/services/node_proxy.py180
-rw-r--r--src/pybind/mgr/cephadm/services/nvmeof.py1
-rw-r--r--src/pybind/mgr/cephadm/services/osd.py22
8 files changed, 251 insertions, 8 deletions
diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py
index 7d7a04dad..f3c45b164 100644
--- a/src/pybind/mgr/cephadm/services/cephadmservice.py
+++ b/src/pybind/mgr/cephadm/services/cephadmservice.py
@@ -41,7 +41,7 @@ def get_auth_entity(daemon_type: str, daemon_id: str, host: str = "") -> AuthEnt
# the CephService class refers to service types, not daemon types
if daemon_type in ['rgw', 'rbd-mirror', 'cephfs-mirror', 'nfs', "iscsi", 'nvmeof', 'ingress', 'ceph-exporter']:
return AuthEntity(f'client.{daemon_type}.{daemon_id}')
- elif daemon_type in ['crash', 'agent']:
+ elif daemon_type in ['crash', 'agent', 'node-proxy']:
if host == "":
raise OrchestratorError(
f'Host not provided to generate <{daemon_type}> auth entity name')
diff --git a/src/pybind/mgr/cephadm/services/ingress.py b/src/pybind/mgr/cephadm/services/ingress.py
index 55be30454..5edd2517d 100644
--- a/src/pybind/mgr/cephadm/services/ingress.py
+++ b/src/pybind/mgr/cephadm/services/ingress.py
@@ -187,6 +187,7 @@ class IngressService(CephService):
'monitor_port': daemon_spec.ports[1] if daemon_spec.ports else spec.monitor_port,
'local_host_ip': host_ip,
'default_server_opts': server_opts,
+ 'health_check_interval': spec.health_check_interval or '2s',
}
)
config_files = {
diff --git a/src/pybind/mgr/cephadm/services/jaeger.py b/src/pybind/mgr/cephadm/services/jaeger.py
index c136d20e6..c83c765d0 100644
--- a/src/pybind/mgr/cephadm/services/jaeger.py
+++ b/src/pybind/mgr/cephadm/services/jaeger.py
@@ -20,13 +20,16 @@ class JaegerAgentService(CephadmService):
def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
assert self.TYPE == daemon_spec.daemon_type
collectors = []
+ deps: List[str] = []
for dd in self.mgr.cache.get_daemons_by_type(JaegerCollectorService.TYPE):
# scrape jaeger-collector nodes
assert dd.hostname is not None
port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT
url = build_url(host=dd.hostname, port=port).lstrip('/')
collectors.append(url)
+ deps.append(url)
daemon_spec.final_config = {'collector_nodes': ",".join(collectors)}
+ daemon_spec.deps = sorted(deps)
return daemon_spec
diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py
index 114c84860..10ddcbbd0 100644
--- a/src/pybind/mgr/cephadm/services/monitoring.py
+++ b/src/pybind/mgr/cephadm/services/monitoring.py
@@ -67,13 +67,24 @@ class GrafanaService(CephadmService):
spec: GrafanaSpec = cast(
GrafanaSpec, self.mgr.spec_store.active_specs[daemon_spec.service_name])
+
+ grafana_port = daemon_spec.ports[0] if daemon_spec.ports else self.DEFAULT_SERVICE_PORT
+ grafana_ip = daemon_spec.ip if daemon_spec.ip else ''
+
+ if spec.only_bind_port_on_networks and spec.networks:
+ assert daemon_spec.host is not None
+ ip_to_bind_to = self.mgr.get_first_matching_network_ip(daemon_spec.host, spec)
+ if ip_to_bind_to:
+ daemon_spec.port_ips = {str(grafana_port): ip_to_bind_to}
+ grafana_ip = ip_to_bind_to
+
grafana_ini = self.mgr.template.render(
'services/grafana/grafana.ini.j2', {
'anonymous_access': spec.anonymous_access,
'initial_admin_password': spec.initial_admin_password,
- 'http_port': daemon_spec.ports[0] if daemon_spec.ports else self.DEFAULT_SERVICE_PORT,
+ 'http_port': grafana_port,
'protocol': spec.protocol,
- 'http_addr': daemon_spec.ip if daemon_spec.ip else ''
+ 'http_addr': grafana_ip
})
if 'dashboard' in self.mgr.get('mgr_map')['modules'] and spec.initial_admin_password:
@@ -402,6 +413,7 @@ class PrometheusService(CephadmService):
haproxy_sd_url = f'{srv_end_point}service=haproxy' if haproxy_cnt > 0 else None
mgr_prometheus_sd_url = f'{srv_end_point}service=mgr-prometheus' # always included
ceph_exporter_sd_url = f'{srv_end_point}service=ceph-exporter' # always included
+ nvmeof_sd_url = f'{srv_end_point}service=nvmeof' # always included
alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
@@ -417,9 +429,17 @@ class PrometheusService(CephadmService):
'node_exporter_sd_url': node_exporter_sd_url,
'alertmanager_sd_url': alertmanager_sd_url,
'haproxy_sd_url': haproxy_sd_url,
- 'ceph_exporter_sd_url': ceph_exporter_sd_url
+ 'ceph_exporter_sd_url': ceph_exporter_sd_url,
+ 'nvmeof_sd_url': nvmeof_sd_url,
}
+ ip_to_bind_to = ''
+ if spec.only_bind_port_on_networks and spec.networks:
+ assert daemon_spec.host is not None
+ ip_to_bind_to = self.mgr.get_first_matching_network_ip(daemon_spec.host, spec) or ''
+ if ip_to_bind_to:
+ daemon_spec.port_ips = {str(port): ip_to_bind_to}
+
web_context = {
'prometheus_web_user': prometheus_user,
'prometheus_web_password': password_hash(prometheus_password),
@@ -446,6 +466,7 @@ class PrometheusService(CephadmService):
},
'retention_time': retention_time,
'retention_size': retention_size,
+ 'ip_to_bind_to': ip_to_bind_to,
'web_config': '/etc/prometheus/web.yml'
}
else:
@@ -454,7 +475,8 @@ class PrometheusService(CephadmService):
'prometheus.yml': self.mgr.template.render('services/prometheus/prometheus.yml.j2', context)
},
'retention_time': retention_time,
- 'retention_size': retention_size
+ 'retention_size': retention_size,
+ 'ip_to_bind_to': ip_to_bind_to
}
# include alerts, if present in the container
diff --git a/src/pybind/mgr/cephadm/services/nfs.py b/src/pybind/mgr/cephadm/services/nfs.py
index f94a00f5b..e0c61b117 100644
--- a/src/pybind/mgr/cephadm/services/nfs.py
+++ b/src/pybind/mgr/cephadm/services/nfs.py
@@ -5,6 +5,8 @@ import os
import subprocess
import tempfile
from typing import Dict, Tuple, Any, List, cast, Optional
+from configparser import ConfigParser
+from io import StringIO
from mgr_module import HandleCommandResult
from mgr_module import NFS_POOL_NAME as POOL_NAME
@@ -79,6 +81,8 @@ class NFSService(CephService):
nodeid = f'{daemon_spec.service_name}.{daemon_spec.rank}'
+ nfs_idmap_conf = '/etc/ganesha/idmap.conf'
+
# create the RADOS recovery pool keyring
rados_user = f'{daemon_type}.{daemon_id}'
rados_keyring = self.create_keyring(daemon_spec)
@@ -115,12 +119,27 @@ class NFSService(CephService):
"port": daemon_spec.ports[0] if daemon_spec.ports else 2049,
"bind_addr": bind_addr,
"haproxy_hosts": [],
+ "nfs_idmap_conf": nfs_idmap_conf,
}
if spec.enable_haproxy_protocol:
context["haproxy_hosts"] = self._haproxy_hosts()
logger.debug("selected haproxy_hosts: %r", context["haproxy_hosts"])
return self.mgr.template.render('services/nfs/ganesha.conf.j2', context)
+ # generate the idmap config
+ def get_idmap_conf() -> str:
+ idmap_conf = spec.idmap_conf
+ output = ''
+ if idmap_conf is not None:
+ cp = ConfigParser()
+ out = StringIO()
+ cp.read_dict(idmap_conf)
+ cp.write(out)
+ out.seek(0)
+ output = out.read()
+ out.close()
+ return output
+
# generate the cephadm config json
def get_cephadm_config() -> Dict[str, Any]:
config: Dict[str, Any] = {}
@@ -130,6 +149,7 @@ class NFSService(CephService):
config['extra_args'] = ['-N', 'NIV_EVENT']
config['files'] = {
'ganesha.conf': get_ganesha_conf(),
+ 'idmap.conf': get_idmap_conf()
}
config.update(
self.get_config_and_keyring(
diff --git a/src/pybind/mgr/cephadm/services/node_proxy.py b/src/pybind/mgr/cephadm/services/node_proxy.py
new file mode 100644
index 000000000..e5608ca42
--- /dev/null
+++ b/src/pybind/mgr/cephadm/services/node_proxy.py
@@ -0,0 +1,180 @@
+import json
+import ssl
+import base64
+
+from urllib.error import HTTPError, URLError
+from typing import List, Any, Dict, Tuple, Optional, MutableMapping
+
+from .cephadmservice import CephadmDaemonDeploySpec, CephService
+from ceph.deployment.service_spec import ServiceSpec, PlacementSpec
+from ceph.utils import http_req
+from orchestrator import OrchestratorError
+
+
+class NodeProxy(CephService):
+ TYPE = 'node-proxy'
+
+ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+ assert self.TYPE == daemon_spec.daemon_type
+ daemon_id, host = daemon_spec.daemon_id, daemon_spec.host
+
+ if not self.mgr.http_server.agent:
+ raise OrchestratorError('Cannot deploy node-proxy before creating cephadm endpoint')
+
+ keyring = self.get_keyring_with_caps(self.get_auth_entity(daemon_id, host=host), [])
+ daemon_spec.keyring = keyring
+ self.mgr.node_proxy_cache.update_keyring(host, keyring)
+
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+
+ return daemon_spec
+
+ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
+ # node-proxy is re-using the agent endpoint and therefore
+ # needs similar checks to see if the endpoint is ready.
+ self.agent_endpoint = self.mgr.http_server.agent
+ try:
+ assert self.agent_endpoint
+ assert self.agent_endpoint.ssl_certs.get_root_cert()
+ assert self.agent_endpoint.server_port
+ except Exception:
+ raise OrchestratorError(
+ 'Cannot deploy node-proxy daemons until cephadm endpoint has finished generating certs')
+
+ listener_cert, listener_key = self.agent_endpoint.ssl_certs.generate_cert(daemon_spec.host, self.mgr.inventory.get_addr(daemon_spec.host))
+ cfg = {
+ 'target_ip': self.mgr.get_mgr_ip(),
+ 'target_port': self.agent_endpoint.server_port,
+ 'name': f'node-proxy.{daemon_spec.host}',
+ 'keyring': daemon_spec.keyring,
+ 'root_cert.pem': self.agent_endpoint.ssl_certs.get_root_cert(),
+ 'listener.crt': listener_cert,
+ 'listener.key': listener_key,
+ }
+ config = {'node-proxy.json': json.dumps(cfg)}
+
+ return config, sorted([str(self.mgr.get_mgr_ip()), str(self.agent_endpoint.server_port),
+ self.agent_endpoint.ssl_certs.get_root_cert()])
+
+ def handle_hw_monitoring_setting(self) -> bool:
+ # function to apply or remove node-proxy service spec depending
+ # on whether the hw_mointoring config option is set or not.
+ # It should return True when it either creates or deletes a spec
+ # and False otherwise.
+ if self.mgr.hw_monitoring:
+ if 'node-proxy' not in self.mgr.spec_store:
+ spec = ServiceSpec(
+ service_type='node-proxy',
+ placement=PlacementSpec(host_pattern='*')
+ )
+ self.mgr.spec_store.save(spec)
+ return True
+ return False
+ else:
+ if 'node-proxy' in self.mgr.spec_store:
+ self.mgr.spec_store.rm('node-proxy')
+ return True
+ return False
+
+ def get_ssl_ctx(self) -> ssl.SSLContext:
+ ssl_root_crt = self.mgr.http_server.agent.ssl_certs.get_root_cert()
+ ssl_ctx = ssl.create_default_context()
+ ssl_ctx.check_hostname = True
+ ssl_ctx.verify_mode = ssl.CERT_REQUIRED
+ ssl_ctx.load_verify_locations(cadata=ssl_root_crt)
+ return ssl_ctx
+
+ def led(self, light_type: str, action: str, hostname: str, device: Optional[str] = None) -> Dict[str, Any]:
+ ssl_ctx: ssl.SSLContext = self.get_ssl_ctx()
+ header: MutableMapping[str, str] = {}
+ method: str = 'PATCH' if action in ['on', 'off'] else 'GET'
+ payload: Optional[Dict[str, str]] = None
+ addr: str = self.mgr.inventory.get_addr(hostname)
+ endpoint: List[str] = ['led', light_type]
+ _device: str = device if device else ''
+
+ if light_type == 'drive':
+ endpoint.append(_device)
+
+ if method == 'PATCH':
+ payload = dict(state=action)
+
+ header = self.generate_auth_header(hostname)
+
+ endpoint = f'/{"/".join(endpoint)}'
+
+ try:
+ headers, result, status = http_req(hostname=addr,
+ port='9456',
+ headers=header,
+ method=method,
+ data=json.dumps(payload),
+ endpoint=endpoint,
+ ssl_ctx=ssl_ctx)
+ result_json = json.loads(result)
+ except HTTPError as e:
+ self.mgr.log.error(e)
+ raise
+ except URLError as e:
+ raise RuntimeError(e)
+
+ return result_json
+
+ def generate_auth_header(self, hostname: str) -> Dict[str, str]:
+ try:
+ username = self.mgr.node_proxy_cache.oob[hostname]['username']
+ password = self.mgr.node_proxy_cache.oob[hostname]['password']
+ auth: bytes = f'{username}:{password}'.encode('utf-8')
+ auth_str: str = base64.b64encode(auth).decode('utf-8')
+ header = {'Authorization': f'Basic {auth_str}'}
+ except KeyError as e:
+ self.mgr.log.error(f'Check oob information is provided for {hostname}.')
+ raise RuntimeError(e)
+ return header
+
+ def shutdown(self, hostname: str, force: Optional[bool] = False) -> Dict[str, Any]:
+ ssl_ctx: ssl.SSLContext = self.get_ssl_ctx()
+ header: Dict[str, str] = self.generate_auth_header(hostname)
+ addr: str = self.mgr.inventory.get_addr(hostname)
+
+ endpoint = '/shutdown'
+ payload: Dict[str, Optional[bool]] = dict(force=force)
+
+ try:
+ headers, result, status = http_req(hostname=addr,
+ port='9456',
+ headers=header,
+ data=json.dumps(payload),
+ endpoint=endpoint,
+ ssl_ctx=ssl_ctx)
+ result_json = json.loads(result)
+ except HTTPError as e:
+ self.mgr.log.error(e)
+ raise
+ except URLError as e:
+ raise RuntimeError(e)
+
+ return result_json
+
+ def powercycle(self, hostname: str) -> Dict[str, Any]:
+ ssl_ctx: ssl.SSLContext = self.get_ssl_ctx()
+ header: Dict[str, str] = self.generate_auth_header(hostname)
+ addr: str = self.mgr.inventory.get_addr(hostname)
+
+ endpoint = '/powercycle'
+
+ try:
+ headers, result, status = http_req(hostname=addr,
+ port='9456',
+ headers=header,
+ data="{}",
+ endpoint=endpoint,
+ ssl_ctx=ssl_ctx)
+ result_json = json.loads(result)
+ except HTTPError as e:
+ self.mgr.log.error(e)
+ raise
+ except URLError as e:
+ raise RuntimeError(e)
+
+ return result_json
diff --git a/src/pybind/mgr/cephadm/services/nvmeof.py b/src/pybind/mgr/cephadm/services/nvmeof.py
index 7d2dd16cf..5f28273d4 100644
--- a/src/pybind/mgr/cephadm/services/nvmeof.py
+++ b/src/pybind/mgr/cephadm/services/nvmeof.py
@@ -15,6 +15,7 @@ logger = logging.getLogger(__name__)
class NvmeofService(CephService):
TYPE = 'nvmeof'
+ PROMETHEUS_PORT = 10008
def config(self, spec: NvmeofServiceSpec) -> None: # type: ignore
assert self.TYPE == spec.service_type
diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py
index bfecc5723..75b3fc58c 100644
--- a/src/pybind/mgr/cephadm/services/osd.py
+++ b/src/pybind/mgr/cephadm/services/osd.py
@@ -319,11 +319,16 @@ class OSDService(CephService):
logger.exception('Cannot decode JSON: \'%s\'' % ' '.join(out))
concat_out = {}
notes = []
- if osdspec.data_devices is not None and osdspec.data_devices.limit and len(concat_out) < osdspec.data_devices.limit:
+ if (
+ osdspec.data_devices is not None
+ and osdspec.data_devices.limit
+ and (len(concat_out) + ds.existing_daemons) < osdspec.data_devices.limit
+ ):
found = len(concat_out)
limit = osdspec.data_devices.limit
notes.append(
- f'NOTE: Did not find enough disks matching filter on host {host} to reach data device limit (Found: {found} | Limit: {limit})')
+ f'NOTE: Did not find enough disks matching filter on host {host} to reach data device limit\n'
+ f'(New Devices: {found} | Existing Matching Daemons: {ds.existing_daemons} | Limit: {limit})')
ret_all.append({'data': concat_out,
'osdspec': osdspec.service_id,
'host': host,
@@ -664,6 +669,7 @@ class OSD:
return None
self.started = True
self.stopped = False
+ self.original_weight = self.rm_util.get_weight(self)
def start_draining(self) -> bool:
if self.stopped:
@@ -672,7 +678,6 @@ class OSD:
if self.replace:
self.rm_util.set_osd_flag([self], 'out')
else:
- self.original_weight = self.rm_util.get_weight(self)
self.rm_util.reweight_osd(self, 0.0)
self.drain_started_at = datetime.utcnow()
self.draining = True
@@ -761,6 +766,7 @@ class OSD:
out['force'] = self.force
out['zap'] = self.zap
out['hostname'] = self.hostname # type: ignore
+ out['original_weight'] = self.original_weight
for k in ['drain_started_at', 'drain_stopped_at', 'drain_done_at', 'process_started_at']:
if getattr(self, k):
@@ -953,6 +959,16 @@ class OSDRemovalQueue(object):
self.osds.add(osd)
osd.start()
+ def rm_by_osd_id(self, osd_id: int) -> None:
+ osd: Optional["OSD"] = None
+ for o in self.osds:
+ if o.osd_id == osd_id:
+ osd = o
+ if not osd:
+ logger.debug(f"Could not find osd with id {osd_id} in queue.")
+ raise KeyError(f'No osd with id {osd_id} in removal queue')
+ self.rm(osd)
+
def rm(self, osd: "OSD") -> None:
if not osd.exists:
raise NotFoundError()