diff options
Diffstat (limited to '')
-rw-r--r-- | src/pybind/mgr/cephadm/services/nfs.py | 290 |
1 files changed, 290 insertions, 0 deletions
diff --git a/src/pybind/mgr/cephadm/services/nfs.py b/src/pybind/mgr/cephadm/services/nfs.py new file mode 100644 index 000000000..ee53283bd --- /dev/null +++ b/src/pybind/mgr/cephadm/services/nfs.py @@ -0,0 +1,290 @@ +import errno +import logging +import os +import subprocess +import tempfile +from typing import Dict, Tuple, Any, List, cast, Optional + +from mgr_module import HandleCommandResult +from mgr_module import NFS_POOL_NAME as POOL_NAME + +from ceph.deployment.service_spec import ServiceSpec, NFSServiceSpec + +from orchestrator import DaemonDescription + +from cephadm.services.cephadmservice import AuthEntity, CephadmDaemonDeploySpec, CephService + +logger = logging.getLogger(__name__) + + +class NFSService(CephService): + TYPE = 'nfs' + + def ranked(self) -> bool: + return True + + def fence(self, daemon_id: str) -> None: + logger.info(f'Fencing old nfs.{daemon_id}') + ret, out, err = self.mgr.mon_command({ + 'prefix': 'auth rm', + 'entity': f'client.nfs.{daemon_id}', + }) + + # TODO: block/fence this entity (in case it is still running somewhere) + + def fence_old_ranks(self, + spec: ServiceSpec, + rank_map: Dict[int, Dict[int, Optional[str]]], + num_ranks: int) -> None: + for rank, m in list(rank_map.items()): + if rank >= num_ranks: + for daemon_id in m.values(): + if daemon_id is not None: + self.fence(daemon_id) + del rank_map[rank] + nodeid = f'{spec.service_name()}.{rank}' + self.mgr.log.info(f'Removing {nodeid} from the ganesha grace table') + self.run_grace_tool(cast(NFSServiceSpec, spec), 'remove', nodeid) + self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map) + else: + max_gen = max(m.keys()) + for gen, daemon_id in list(m.items()): + if gen < max_gen: + if daemon_id is not None: + self.fence(daemon_id) + del rank_map[rank][gen] + self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map) + + def config(self, spec: NFSServiceSpec) -> None: # type: ignore + from nfs.cluster import create_ganesha_pool + + assert self.TYPE == spec.service_type + create_ganesha_pool(self.mgr) + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + + daemon_type = daemon_spec.daemon_type + daemon_id = daemon_spec.daemon_id + host = daemon_spec.host + spec = cast(NFSServiceSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + + deps: List[str] = [] + + nodeid = f'{daemon_spec.service_name}.{daemon_spec.rank}' + + # create the RADOS recovery pool keyring + rados_user = f'{daemon_type}.{daemon_id}' + rados_keyring = self.create_keyring(daemon_spec) + + # ensure rank is known to ganesha + self.mgr.log.info(f'Ensuring {nodeid} is in the ganesha grace table') + self.run_grace_tool(spec, 'add', nodeid) + + # create the rados config object + self.create_rados_config_obj(spec) + + # create the RGW keyring + rgw_user = f'{rados_user}-rgw' + rgw_keyring = self.create_rgw_keyring(daemon_spec) + + # generate the ganesha config + def get_ganesha_conf() -> str: + context = { + "user": rados_user, + "nodeid": nodeid, + "pool": POOL_NAME, + "namespace": spec.service_id, + "rgw_user": rgw_user, + "url": f'rados://{POOL_NAME}/{spec.service_id}/{spec.rados_config_name()}', + # fall back to default NFS port if not present in daemon_spec + "port": daemon_spec.ports[0] if daemon_spec.ports else 2049, + "bind_addr": daemon_spec.ip if daemon_spec.ip else '', + } + return self.mgr.template.render('services/nfs/ganesha.conf.j2', context) + + # generate the cephadm config json + def get_cephadm_config() -> Dict[str, Any]: + config: Dict[str, Any] = {} + config['pool'] = POOL_NAME + config['namespace'] = spec.service_id + config['userid'] = rados_user + config['extra_args'] = ['-N', 'NIV_EVENT'] + config['files'] = { + 'ganesha.conf': get_ganesha_conf(), + } + config.update( + self.get_config_and_keyring( + daemon_type, daemon_id, + keyring=rados_keyring, + host=host + ) + ) + config['rgw'] = { + 'cluster': 'ceph', + 'user': rgw_user, + 'keyring': rgw_keyring, + } + logger.debug('Generated cephadm config-json: %s' % config) + return config + + return get_cephadm_config(), deps + + def create_rados_config_obj(self, + spec: NFSServiceSpec, + clobber: bool = False) -> None: + objname = spec.rados_config_name() + cmd = [ + 'rados', + '-n', f"mgr.{self.mgr.get_mgr_id()}", + '-k', str(self.mgr.get_ceph_option('keyring')), + '-p', POOL_NAME, + '--namespace', cast(str, spec.service_id), + ] + result = subprocess.run( + cmd + ['get', objname, '-'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + timeout=10) + if not result.returncode and not clobber: + logger.info('Rados config object exists: %s' % objname) + else: + logger.info('Creating rados config object: %s' % objname) + result = subprocess.run( + cmd + ['put', objname, '-'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + timeout=10) + if result.returncode: + self.mgr.log.warning( + f'Unable to create rados config object {objname}: {result.stderr.decode("utf-8")}' + ) + raise RuntimeError(result.stderr.decode("utf-8")) + + def create_keyring(self, daemon_spec: CephadmDaemonDeploySpec) -> str: + daemon_id = daemon_spec.daemon_id + spec = cast(NFSServiceSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + entity: AuthEntity = self.get_auth_entity(daemon_id) + + osd_caps = 'allow rw pool=%s namespace=%s' % (POOL_NAME, spec.service_id) + + logger.info('Creating key for %s' % entity) + keyring = self.get_keyring_with_caps(entity, + ['mon', 'allow r', + 'osd', osd_caps]) + + return keyring + + def create_rgw_keyring(self, daemon_spec: CephadmDaemonDeploySpec) -> str: + daemon_id = daemon_spec.daemon_id + entity: AuthEntity = self.get_auth_entity(f'{daemon_id}-rgw') + + logger.info('Creating key for %s' % entity) + keyring = self.get_keyring_with_caps(entity, + ['mon', 'allow r', + 'osd', 'allow rwx tag rgw *=*']) + + return keyring + + def run_grace_tool(self, + spec: NFSServiceSpec, + action: str, + nodeid: str) -> None: + # write a temp keyring and referencing config file. this is a kludge + # because the ganesha-grace-tool can only authenticate as a client (and + # not a mgr). Also, it doesn't allow you to pass a keyring location via + # the command line, nor does it parse the CEPH_ARGS env var. + tmp_id = f'mgr.nfs.grace.{spec.service_name()}' + entity = AuthEntity(f'client.{tmp_id}') + keyring = self.get_keyring_with_caps( + entity, + ['mon', 'allow r', 'osd', f'allow rwx pool {POOL_NAME}'] + ) + tmp_keyring = tempfile.NamedTemporaryFile(mode='w', prefix='mgr-grace-keyring') + os.fchmod(tmp_keyring.fileno(), 0o600) + tmp_keyring.write(keyring) + tmp_keyring.flush() + tmp_conf = tempfile.NamedTemporaryFile(mode='w', prefix='mgr-grace-conf') + tmp_conf.write(self.mgr.get_minimal_ceph_conf()) + tmp_conf.write(f'\tkeyring = {tmp_keyring.name}\n') + tmp_conf.flush() + try: + cmd: List[str] = [ + 'ganesha-rados-grace', + '--cephconf', tmp_conf.name, + '--userid', tmp_id, + '--pool', POOL_NAME, + '--ns', cast(str, spec.service_id), + action, nodeid, + ] + self.mgr.log.debug(cmd) + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + timeout=10) + if result.returncode: + self.mgr.log.warning( + f'ganesha-rados-grace tool failed: {result.stderr.decode("utf-8")}' + ) + raise RuntimeError(f'grace tool failed: {result.stderr.decode("utf-8")}') + + finally: + self.mgr.check_mon_command({ + 'prefix': 'auth rm', + 'entity': entity, + }) + + def remove_rgw_keyring(self, daemon: DaemonDescription) -> None: + assert daemon.daemon_id is not None + daemon_id: str = daemon.daemon_id + entity: AuthEntity = self.get_auth_entity(f'{daemon_id}-rgw') + + logger.info(f'Removing key for {entity}') + self.mgr.check_mon_command({ + 'prefix': 'auth rm', + 'entity': entity, + }) + + def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: + super().post_remove(daemon, is_failed_deploy=is_failed_deploy) + self.remove_rgw_keyring(daemon) + + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: + # if only 1 nfs, alert user (this is not passable with --force) + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'NFS', 1, True) + if warn: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + # if reached here, there is > 1 nfs daemon. + if force: + return HandleCommandResult(0, warn_message, '') + + # if reached here, > 1 nfs daemon and no force flag. + # Provide warning + warn_message = "WARNING: Removing NFS daemons can cause clients to lose connectivity. " + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + def purge(self, service_name: str) -> None: + if service_name not in self.mgr.spec_store: + return + spec = cast(NFSServiceSpec, self.mgr.spec_store[service_name].spec) + + logger.info(f'Removing grace file for {service_name}') + cmd = [ + 'rados', + '-n', f"mgr.{self.mgr.get_mgr_id()}", + '-k', str(self.mgr.get_ceph_option('keyring')), + '-p', POOL_NAME, + '--namespace', cast(str, spec.service_id), + 'rm', 'grace', + ] + subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=10 + ) |