summaryrefslogtreecommitdiffstats
path: root/staslib
diff options
context:
space:
mode:
Diffstat (limited to 'staslib')
-rw-r--r--staslib/avahi.py8
-rw-r--r--staslib/conf.py106
-rw-r--r--staslib/ctrl.py360
-rw-r--r--staslib/defs.py5
-rw-r--r--staslib/gutil.py2
-rw-r--r--staslib/iputil.py305
-rw-r--r--staslib/meson.build1
-rw-r--r--staslib/nbft.py28
-rw-r--r--staslib/service.py12
-rw-r--r--staslib/singleton.py25
-rw-r--r--staslib/stas.py44
-rw-r--r--staslib/trid.py17
-rw-r--r--staslib/udev.py199
13 files changed, 806 insertions, 306 deletions
diff --git a/staslib/avahi.py b/staslib/avahi.py
index c8a3a0b..26543d9 100644
--- a/staslib/avahi.py
+++ b/staslib/avahi.py
@@ -29,9 +29,11 @@ def _txt2dict(txt: list):
for list_of_chars in txt:
try:
string = functools.reduce(lambda accumulator, c: accumulator + chr(c), list_of_chars, '')
- key, val = string.split("=")
- the_dict[key.lower()] = val
- except Exception: # pylint: disable=broad-except
+ if string.isprintable():
+ key, val = string.split('=')
+ if key: # Make sure the key is not an empty string
+ the_dict[key.lower()] = val
+ except ValueError:
pass
return the_dict
diff --git a/staslib/conf.py b/staslib/conf.py
index a54da98..e5c038b 100644
--- a/staslib/conf.py
+++ b/staslib/conf.py
@@ -14,7 +14,8 @@ import sys
import logging
import functools
import configparser
-from staslib import defs, singleton, timeparse
+from urllib.parse import urlparse
+from staslib import defs, iputil, nbft, singleton, timeparse
__TOKEN_RE = re.compile(r'\s*;\s*')
__OPTION_RE = re.compile(r'\s*=\s*')
@@ -83,7 +84,8 @@ def _to_ip_family(text):
class OrderedMultisetDict(dict):
'''This class is used to change the behavior of configparser.ConfigParser
and allow multiple configuration parameters with the same key. The
- result is a list of values.
+ result is a list of values, where values are sorted by the order they
+ appear in the file.
'''
def __setitem__(self, key, value):
@@ -317,7 +319,7 @@ class SvcConf(metaclass=singleton.Singleton): # pylint: disable=too-many-public
option = 'persistent-connections'
value = self.get_option(section, option, ignore_default=True)
- legacy = self.get_option('Global', 'persistent-connections', ignore_default=True)
+ legacy = self.get_option('Global', option, ignore_default=True)
if value is None and legacy is None:
return self._defaults.get((section, option), True)
@@ -381,7 +383,7 @@ class SvcConf(metaclass=singleton.Singleton): # pylint: disable=too-many-public
controller_list = self.get_option('Controllers', 'exclude')
# 2022-09-20: Look for "blacklist". This is for backwards compatibility
- # with releases 1.0 to 1.1.6. This is to be phased out (i.e. remove by 2024)
+ # with releases 1.0 to 1.1.x. This is to be phased out (i.e. remove by 2024)
controller_list += self.get_option('Controllers', 'blacklist')
excluded = [_parse_controller(controller) for controller in controller_list]
@@ -572,7 +574,7 @@ class SysConf(metaclass=singleton.Singleton):
try:
value = self.__get_value('Host', 'key', defs.NVME_HOSTKEY)
except FileNotFoundError as ex:
- logging.info('Host key undefined: %s', ex)
+ logging.debug('Host key undefined: %s', ex)
value = None
return value
@@ -701,3 +703,97 @@ class NvmeOptions(metaclass=singleton.Singleton):
def dhchap_ctrlkey_supp(self):
'''This option allows specifying the controller DHCHAP key used for authentication.'''
return self._supported_options['dhchap_ctrl_secret']
+
+
+# ******************************************************************************
+class NbftConf(metaclass=singleton.Singleton):
+ '''Read and cache configuration file.'''
+
+ def __init__(self, root_dir=defs.NBFT_SYSFS_PATH):
+ self._disc_ctrls = []
+ self._subs_ctrls = []
+
+ nbft_files = nbft.get_nbft_files(root_dir)
+ if len(nbft_files):
+ logging.info('NBFT location(s): %s', list(nbft_files.keys()))
+
+ for data in nbft_files.values():
+ hfis = data.get('hfi', [])
+ discovery = data.get('discovery', [])
+ subsystem = data.get('subsystem', [])
+
+ self._disc_ctrls.extend(NbftConf.__nbft_disc_to_cids(discovery, hfis))
+ self._subs_ctrls.extend(NbftConf.__nbft_subs_to_cids(subsystem, hfis))
+
+ dcs = property(lambda self: self._disc_ctrls)
+ iocs = property(lambda self: self._subs_ctrls)
+
+ def get_controllers(self):
+ '''Retrieve the list of controllers. Stafd only cares about
+ discovery controllers. Stacd only cares about I/O controllers.'''
+
+ # For now, only return DCs. There are still unanswered questions
+ # regarding I/O controllers, e.g. what if multipathing has been
+ # configured.
+ return self.dcs if defs.PROG_NAME == 'stafd' else []
+
+ @staticmethod
+ def __nbft_disc_to_cids(discovery, hfis):
+ cids = []
+
+ for ctrl in discovery:
+ cid = NbftConf.__uri2cid(ctrl['uri'])
+ cid['subsysnqn'] = ctrl['nqn']
+
+ host_iface = NbftConf.__get_host_iface(ctrl.get('hfi_index'), hfis)
+ if host_iface:
+ cid['host-iface'] = host_iface
+
+ cids.append(cid)
+
+ return cids
+
+ @staticmethod
+ def __nbft_subs_to_cids(subsystem, hfis):
+ cids = []
+
+ for ctrl in subsystem:
+ cid = {
+ 'transport': ctrl['trtype'],
+ 'traddr': ctrl['traddr'],
+ 'trsvcid': ctrl['trsvcid'],
+ 'subsysnqn': ctrl['subsys_nqn'],
+ 'hdr-digest': ctrl['pdu_header_digest_required'],
+ 'data-digest': ctrl['data_digest_required'],
+ }
+
+ indexes = ctrl.get('hfi_indexes')
+ if isinstance(indexes, list) and len(indexes) > 0:
+ host_iface = NbftConf.__get_host_iface(indexes[0], hfis)
+ if host_iface:
+ cid['host-iface'] = host_iface
+
+ cids.append(cid)
+
+ return cids
+
+ @staticmethod
+ def __get_host_iface(indx, hfis):
+ if indx is None or indx >= len(hfis):
+ return None
+
+ mac = hfis[indx].get('mac_addr')
+ if mac is None:
+ return None
+
+ return iputil.mac2iface(mac)
+
+ @staticmethod
+ def __uri2cid(uri: str):
+ '''Convert a URI of the form "nvme+tcp://100.71.103.50:8009/" to a Controller ID'''
+ obj = urlparse(uri)
+ return {
+ 'transport': obj.scheme.split('+')[1],
+ 'traddr': obj.hostname,
+ 'trsvcid': str(obj.port),
+ }
diff --git a/staslib/ctrl.py b/staslib/ctrl.py
index 97a1c7b..ad221e0 100644
--- a/staslib/ctrl.py
+++ b/staslib/ctrl.py
@@ -135,26 +135,7 @@ class Controller(stas.ControllerABC): # pylint: disable=too-many-instance-attri
self._root.log_level("debug" if tron else "err")
def _on_udev_notification(self, udev_obj):
- if self._alive():
- if udev_obj.action == 'change':
- nvme_aen = udev_obj.get('NVME_AEN')
- nvme_event = udev_obj.get('NVME_EVENT')
- if isinstance(nvme_aen, str):
- logging.info('%s | %s - Received AEN: %s', self.id, udev_obj.sys_name, nvme_aen)
- self._on_aen(int(nvme_aen, 16))
- if isinstance(nvme_event, str):
- self._on_nvme_event(nvme_event)
- elif udev_obj.action == 'remove':
- logging.info('%s | %s - Received "remove" event', self.id, udev_obj.sys_name)
- self._on_ctrl_removed(udev_obj)
- else:
- logging.debug(
- 'Controller._on_udev_notification() - %s | %s: Received "%s" event',
- self.id,
- udev_obj.sys_name,
- udev_obj.action,
- )
- else:
+ if not self._alive():
logging.debug(
'Controller._on_udev_notification() - %s | %s: Received event on dead object. udev_obj %s: %s',
self.id,
@@ -162,6 +143,26 @@ class Controller(stas.ControllerABC): # pylint: disable=too-many-instance-attri
udev_obj.action,
udev_obj.sys_name,
)
+ return
+
+ if udev_obj.action == 'change':
+ nvme_aen = udev_obj.get('NVME_AEN')
+ nvme_event = udev_obj.get('NVME_EVENT')
+ if isinstance(nvme_aen, str):
+ logging.info('%s | %s - Received AEN: %s', self.id, udev_obj.sys_name, nvme_aen)
+ self._on_aen(int(nvme_aen, 16))
+ if isinstance(nvme_event, str):
+ self._on_nvme_event(nvme_event)
+ elif udev_obj.action == 'remove':
+ logging.info('%s | %s - Received "remove" event', self.id, udev_obj.sys_name)
+ self._on_ctrl_removed(udev_obj)
+ else:
+ logging.debug(
+ 'Controller._on_udev_notification() - %s | %s: Received "%s" event',
+ self.id,
+ udev_obj.sys_name,
+ udev_obj.action,
+ )
def _on_ctrl_removed(self, udev_obj): # pylint: disable=unused-argument
if self._udev:
@@ -269,48 +270,51 @@ class Controller(stas.ControllerABC): # pylint: disable=too-many-instance-attri
op_obj.kill()
self._connect_op = None
- if self._alive():
- self._device = self._ctrl.name
- logging.info('%s | %s - Connection established!', self.id, self.device)
- self._connect_attempts = 0
- self._udev.register_for_device_events(self._device, self._on_udev_notification)
- else:
+ if not self._alive():
logging.debug(
'Controller._on_connect_success() - %s | %s: Received event on dead object. data=%s',
self.id,
self.device,
data,
)
+ return
+
+ self._device = self._ctrl.name
+ logging.info('%s | %s - Connection established!', self.id, self.device)
+ self._connect_attempts = 0
+ self._udev.register_for_device_events(self._device, self._on_udev_notification)
def _on_connect_fail(self, op_obj: gutil.AsyncTask, err, fail_cnt): # pylint: disable=unused-argument
'''@brief Function called when we fail to connect to the Controller.'''
op_obj.kill()
self._connect_op = None
- if self._alive():
- if self._connect_attempts == 1:
- # Do a fast re-try on the first failure.
- self._retry_connect_tmr.set_timeout(self.FAST_CONNECT_RETRY_PERIOD_SEC)
- elif self._connect_attempts == 2:
- # If the fast connect re-try fails, then we can print a message to
- # indicate the failure, and start a slow re-try period.
- self._retry_connect_tmr.set_timeout(self.CONNECT_RETRY_PERIOD_SEC)
- logging.error('%s Failed to connect to controller. %s %s', self.id, err.domain, err.message)
-
- if self._should_try_to_reconnect():
- logging.debug(
- 'Controller._on_connect_fail() - %s %s. Retry in %s sec.',
- self.id,
- err,
- self._retry_connect_tmr.get_timeout(),
- )
- self._retry_connect_tmr.start()
- else:
+
+ if not self._alive():
logging.debug(
'Controller._on_connect_fail() - %s Received event on dead object. %s %s',
self.id,
err.domain,
err.message,
)
+ return
+
+ if self._connect_attempts == 1:
+ # Do a fast re-try on the first failure.
+ self._retry_connect_tmr.set_timeout(self.FAST_CONNECT_RETRY_PERIOD_SEC)
+ elif self._connect_attempts == 2:
+ # If the fast connect re-try fails, then we can print a message to
+ # indicate the failure, and start a slow re-try period.
+ self._retry_connect_tmr.set_timeout(self.CONNECT_RETRY_PERIOD_SEC)
+ logging.error('%s Failed to connect to controller. %s %s', self.id, err.domain, err.message)
+
+ if self._should_try_to_reconnect():
+ logging.debug(
+ 'Controller._on_connect_fail() - %s %s. Retry in %s sec.',
+ self.id,
+ err,
+ self._retry_connect_tmr.get_timeout(),
+ )
+ self._retry_connect_tmr.start()
def disconnect(self, disconnected_cb, keep_connection):
'''@brief Issue an asynchronous disconnect command to a Controller.
@@ -507,11 +511,7 @@ class Dc(Controller):
return
- logging.info(
- '%s | %s - Controller not responding. Retrying...',
- self.id,
- self.device,
- )
+ logging.info('%s | %s - Controller not responding. Retrying...', self.id, self.device)
self._ctrl_unresponsive_time = None
self._ctrl_unresponsive_tmr.stop()
@@ -555,8 +555,8 @@ class Dc(Controller):
def _post_registration_actions(self):
# Need to check that supported_log_pages() is available (introduced in libnvme 1.2)
- has_supported_log_pages = hasattr(self._ctrl, 'supported_log_pages')
- if not has_supported_log_pages:
+ get_slp = getattr(self._ctrl, 'supported_log_pages', None)
+ if get_slp is None:
logging.warning(
'%s | %s - libnvme-%s does not support "Get supported log pages". Please upgrade libnvme.',
self.id,
@@ -564,9 +564,9 @@ class Dc(Controller):
defs.LIBNVME_VERSION,
)
- if conf.SvcConf().pleo_enabled and self._is_ddc() and has_supported_log_pages:
+ if conf.SvcConf().pleo_enabled and self._is_ddc() and get_slp is not None:
self._get_supported_op = gutil.AsyncTask(
- self._on_get_supported_success, self._on_get_supported_fail, self._ctrl.supported_log_pages
+ self._on_get_supported_success, self._on_get_supported_fail, get_slp
)
self._get_supported_op.run_async()
else:
@@ -580,21 +580,23 @@ class Dc(Controller):
'''
super()._on_connect_success(op_obj, data)
- if self._alive():
- self._ctrl_unresponsive_time = None
- self._ctrl_unresponsive_tmr.stop()
- self._ctrl_unresponsive_tmr.set_timeout(0)
-
- if self._ctrl.is_registration_supported():
- self._register_op = gutil.AsyncTask(
- self._on_registration_success,
- self._on_registration_fail,
- self._ctrl.registration_ctlr,
- nvme.NVMF_DIM_TAS_REGISTER,
- )
- self._register_op.run_async()
- else:
- self._post_registration_actions()
+ if not self._alive():
+ return
+
+ self._ctrl_unresponsive_time = None
+ self._ctrl_unresponsive_tmr.stop()
+ self._ctrl_unresponsive_tmr.set_timeout(0)
+
+ if self._ctrl.is_registration_supported():
+ self._register_op = gutil.AsyncTask(
+ self._on_registration_success,
+ self._on_registration_fail,
+ self._ctrl.registration_ctlr,
+ nvme.NVMF_DIM_TAS_REGISTER,
+ )
+ self._register_op.run_async()
+ else:
+ self._post_registration_actions()
def _on_connect_fail(self, op_obj: gutil.AsyncTask, err, fail_cnt):
'''@brief Function called when we fail to connect to the Controller.'''
@@ -613,35 +615,25 @@ class Dc(Controller):
refers to the fact that a successful exchange was made with the DC.
It doesn't mean that the registration itself succeeded.
'''
- if self._alive():
- if data is not None:
- logging.warning('%s | %s - Registration error. %s.', self.id, self.device, data)
- else:
- logging.debug('Dc._on_registration_success() - %s | %s', self.id, self.device)
-
- self._post_registration_actions()
- else:
+ if not self._alive():
logging.debug(
'Dc._on_registration_success() - %s | %s: Received event on dead object.', self.id, self.device
)
+ return
+
+ if data is not None:
+ logging.warning('%s | %s - Registration error. %s.', self.id, self.device, data)
+ else:
+ logging.debug('Dc._on_registration_success() - %s | %s', self.id, self.device)
+
+ self._post_registration_actions()
def _on_registration_fail(self, op_obj: gutil.AsyncTask, err, fail_cnt):
'''@brief Function called when we fail to register with the
Discovery Controller. See self._register_op object
for details.
'''
- if self._alive():
- logging.debug(
- 'Dc._on_registration_fail() - %s | %s: %s. Retry in %s sec',
- self.id,
- self.device,
- err,
- Dc.REGISTRATION_RETRY_RERIOD_SEC,
- )
- if fail_cnt == 1: # Throttle the logs. Only print the first time the command fails
- logging.error('%s | %s - Failed to register with Discovery Controller. %s', self.id, self.device, err)
- op_obj.retry(Dc.REGISTRATION_RETRY_RERIOD_SEC)
- else:
+ if not self._alive():
logging.debug(
'Dc._on_registration_fail() - %s | %s: Received event on dead object. %s',
self.id,
@@ -649,6 +641,18 @@ class Dc(Controller):
err,
)
op_obj.kill()
+ return
+
+ logging.debug(
+ 'Dc._on_registration_fail() - %s | %s: %s. Retry in %s sec',
+ self.id,
+ self.device,
+ err,
+ Dc.REGISTRATION_RETRY_RERIOD_SEC,
+ )
+ if fail_cnt == 1: # Throttle the logs. Only print the first time the command fails
+ logging.error('%s | %s - Failed to register with Discovery Controller. %s', self.id, self.device, err)
+ op_obj.retry(Dc.REGISTRATION_RETRY_RERIOD_SEC)
# --------------------------------------------------------------------------
def _on_get_supported_success(self, op_obj: gutil.AsyncTask, data): # pylint: disable=unused-argument
@@ -660,68 +664,70 @@ class Dc(Controller):
refers to the fact that a successful exchange was made with the DC.
It doesn't mean that the Get Supported Log Page itself succeeded.
'''
- if self._alive():
- try:
- dlp_supp_opts = data[nvme.NVME_LOG_LID_DISCOVER] >> 16
- except (TypeError, IndexError):
- dlp_supp_opts = 0
-
+ if not self._alive():
logging.debug(
- 'Dc._on_get_supported_success() - %s | %s: supported options = 0x%04X = %s',
- self.id,
- self.device,
- dlp_supp_opts,
- dlp_supp_opts_as_string(dlp_supp_opts),
+ 'Dc._on_get_supported_success() - %s | %s: Received event on dead object.', self.id, self.device
)
+ return
- if 'lsp' in inspect.signature(self._ctrl.discover).parameters:
- lsp = nvme.NVMF_LOG_DISC_LSP_PLEO if dlp_supp_opts & nvme.NVMF_LOG_DISC_LID_PLEOS else 0
- self._get_log_op = gutil.AsyncTask(
- self._on_get_log_success, self._on_get_log_fail, self._ctrl.discover, lsp
- )
- else:
- logging.warning(
- '%s | %s - libnvme-%s does not support setting PLEO bit. Please upgrade.',
- self.id,
- self.device,
- defs.LIBNVME_VERSION,
- )
- self._get_log_op = gutil.AsyncTask(self._on_get_log_success, self._on_get_log_fail, self._ctrl.discover)
- self._get_log_op.run_async()
+ try:
+ dlp_supp_opts = data[nvme.NVME_LOG_LID_DISCOVER] >> 16
+ except (TypeError, IndexError):
+ dlp_supp_opts = 0
+
+ logging.debug(
+ 'Dc._on_get_supported_success() - %s | %s: supported options = 0x%04X = %s',
+ self.id,
+ self.device,
+ dlp_supp_opts,
+ dlp_supp_opts_as_string(dlp_supp_opts),
+ )
+
+ if 'lsp' in inspect.signature(self._ctrl.discover).parameters:
+ lsp = nvme.NVMF_LOG_DISC_LSP_PLEO if dlp_supp_opts & nvme.NVMF_LOG_DISC_LID_PLEOS else 0
+ self._get_log_op = gutil.AsyncTask(
+ self._on_get_log_success, self._on_get_log_fail, self._ctrl.discover, lsp
+ )
else:
- logging.debug(
- 'Dc._on_get_supported_success() - %s | %s: Received event on dead object.', self.id, self.device
+ logging.warning(
+ '%s | %s - libnvme-%s does not support setting PLEO bit. Please upgrade.',
+ self.id,
+ self.device,
+ defs.LIBNVME_VERSION,
)
+ self._get_log_op = gutil.AsyncTask(self._on_get_log_success, self._on_get_log_fail, self._ctrl.discover)
+ self._get_log_op.run_async()
def _on_get_supported_fail(self, op_obj: gutil.AsyncTask, err, fail_cnt):
'''@brief Function called when we fail to retrieve the supported log
page from the Discovery Controller. See self._get_supported_op object
for details.
'''
- if self._alive():
+ if not self._alive():
logging.debug(
- 'Dc._on_get_supported_fail() - %s | %s: %s. Retry in %s sec',
+ 'Dc._on_get_supported_fail() - %s | %s: Received event on dead object. %s',
self.id,
self.device,
err,
- Dc.GET_SUPPORTED_RETRY_RERIOD_SEC,
)
- if fail_cnt == 1: # Throttle the logs. Only print the first time the command fails
- logging.error(
- '%s | %s - Failed to Get supported log pages from Discovery Controller. %s',
- self.id,
- self.device,
- err,
- )
- op_obj.retry(Dc.GET_SUPPORTED_RETRY_RERIOD_SEC)
- else:
- logging.debug(
- 'Dc._on_get_supported_fail() - %s | %s: Received event on dead object. %s',
+ op_obj.kill()
+ return
+
+ logging.debug(
+ 'Dc._on_get_supported_fail() - %s | %s: %s. Retry in %s sec',
+ self.id,
+ self.device,
+ err,
+ Dc.GET_SUPPORTED_RETRY_RERIOD_SEC,
+ )
+ if fail_cnt == 1: # Throttle the logs. Only print the first time the command fails
+ logging.error(
+ '%s | %s - Failed to Get supported log pages from Discovery Controller. %s',
self.id,
self.device,
err,
)
- op_obj.kill()
+ op_obj.retry(Dc.GET_SUPPORTED_RETRY_RERIOD_SEC)
# --------------------------------------------------------------------------
def _on_get_log_success(self, op_obj: gutil.AsyncTask, data): # pylint: disable=unused-argument
@@ -729,61 +735,51 @@ class Dc(Controller):
from the Discovery Controller. See self._get_log_op object
for details.
'''
- if self._alive():
- # Note that for historical reasons too long to explain, the CDC may
- # return invalid addresses ('0.0.0.0', '::', or ''). Those need to
- # be filtered out.
- referrals_before = self.referrals()
- self._log_pages = (
- [
- {k.strip(): str(v).strip() for k, v in dictionary.items()}
- for dictionary in data
- if dictionary.get('traddr', '').strip() not in ('0.0.0.0', '::', '')
- ]
- if data
- else list()
+ if not self._alive():
+ logging.debug(
+ 'Dc._on_get_log_success() - %s | %s: Received event on dead object.', self.id, self.device
)
- logging.info(
- '%s | %s - Received discovery log pages (num records=%s).', self.id, self.device, len(self._log_pages)
+ return
+
+ # Note that for historical reasons too long to explain, the CDC may
+ # return invalid addresses ('0.0.0.0', '::', or ''). Those need to
+ # be filtered out.
+ referrals_before = self.referrals()
+ self._log_pages = (
+ [
+ {k.strip(): str(v).strip() for k, v in dictionary.items()}
+ for dictionary in data
+ if dictionary.get('traddr', '').strip() not in ('0.0.0.0', '::', '')
+ ]
+ if data
+ else list()
+ )
+ logging.info(
+ '%s | %s - Received discovery log pages (num records=%s).', self.id, self.device, len(self._log_pages)
+ )
+ referrals_after = self.referrals()
+ self._serv.log_pages_changed(self, self.device)
+ if referrals_after != referrals_before:
+ logging.debug(
+ 'Dc._on_get_log_success() - %s | %s: Referrals before = %s',
+ self.id,
+ self.device,
+ referrals_before,
)
- referrals_after = self.referrals()
- self._serv.log_pages_changed(self, self.device)
- if referrals_after != referrals_before:
- logging.debug(
- 'Dc._on_get_log_success() - %s | %s: Referrals before = %s',
- self.id,
- self.device,
- referrals_before,
- )
- logging.debug(
- 'Dc._on_get_log_success() - %s | %s: Referrals after = %s',
- self.id,
- self.device,
- referrals_after,
- )
- self._serv.referrals_changed()
- else:
logging.debug(
- 'Dc._on_get_log_success() - %s | %s: Received event on dead object.', self.id, self.device
+ 'Dc._on_get_log_success() - %s | %s: Referrals after = %s',
+ self.id,
+ self.device,
+ referrals_after,
)
+ self._serv.referrals_changed()
def _on_get_log_fail(self, op_obj: gutil.AsyncTask, err, fail_cnt):
'''@brief Function called when we fail to retrieve the log pages
from the Discovery Controller. See self._get_log_op object
for details.
'''
- if self._alive():
- logging.debug(
- 'Dc._on_get_log_fail() - %s | %s: %s. Retry in %s sec',
- self.id,
- self.device,
- err,
- Dc.GET_LOG_PAGE_RETRY_RERIOD_SEC,
- )
- if fail_cnt == 1: # Throttle the logs. Only print the first time the command fails
- logging.error('%s | %s - Failed to retrieve log pages. %s', self.id, self.device, err)
- op_obj.retry(Dc.GET_LOG_PAGE_RETRY_RERIOD_SEC)
- else:
+ if not self._alive():
logging.debug(
'Dc._on_get_log_fail() - %s | %s: Received event on dead object. %s',
self.id,
@@ -791,6 +787,18 @@ class Dc(Controller):
err,
)
op_obj.kill()
+ return
+
+ logging.debug(
+ 'Dc._on_get_log_fail() - %s | %s: %s. Retry in %s sec',
+ self.id,
+ self.device,
+ err,
+ Dc.GET_LOG_PAGE_RETRY_RERIOD_SEC,
+ )
+ if fail_cnt == 1: # Throttle the logs. Only print the first time the command fails
+ logging.error('%s | %s - Failed to retrieve log pages. %s', self.id, self.device, err)
+ op_obj.retry(Dc.GET_LOG_PAGE_RETRY_RERIOD_SEC)
# ******************************************************************************
diff --git a/staslib/defs.py b/staslib/defs.py
index 5a50371..877f6e6 100644
--- a/staslib/defs.py
+++ b/staslib/defs.py
@@ -12,6 +12,7 @@ import os
import sys
import shutil
import platform
+from libnvme import nvme
from staslib.version import KernelVersion
try:
@@ -48,4 +49,8 @@ SYS_CONF_FILE = '/etc/stas/sys.conf'
STAFD_CONF_FILE = '/etc/stas/stafd.conf'
STACD_CONF_FILE = '/etc/stas/stacd.conf'
+HAS_NBFT_SUPPORT = hasattr(nvme, 'nbft_get')
+NBFT_SYSFS_PATH = "/sys/firmware/acpi/tables"
+NBFT_SYSFS_FILENAME = "NBFT*"
+
SYSTEMCTL = shutil.which('systemctl')
diff --git a/staslib/gutil.py b/staslib/gutil.py
index c40b80e..836674a 100644
--- a/staslib/gutil.py
+++ b/staslib/gutil.py
@@ -309,7 +309,7 @@ class AsyncTask: # pylint: disable=too-many-instance-attributes
'''Return object members as a dictionary'''
info = {
'fail count': self._fail_cnt,
- 'completed': self._task.get_completed(),
+ 'completed': self._task.get_completed() if self._task else None,
'alive': self._alive(),
}
diff --git a/staslib/iputil.py b/staslib/iputil.py
index 9199a49..96c5d56 100644
--- a/staslib/iputil.py
+++ b/staslib/iputil.py
@@ -8,45 +8,126 @@
'''A collection of IP address and network interface utilities'''
+import struct
import socket
-import logging
import ipaddress
-from staslib import conf
+RTM_BASE = 16
+RTM_GETLINK = 18
RTM_NEWADDR = 20
RTM_GETADDR = 22
NLM_F_REQUEST = 0x01
NLM_F_ROOT = 0x100
NLMSG_DONE = 3
-IFLA_ADDRESS = 1
-NLMSGHDR_SZ = 16
+NLMSG_HDRLEN = 16
IFADDRMSG_SZ = 8
+IFINFOMSG_SZ = 16
+ARPHRD_ETHER = 1
+ARPHRD_LOOPBACK = 772
+NLMSG_LENGTH = lambda msg_len: msg_len + NLMSG_HDRLEN # pylint: disable=unnecessary-lambda-assignment
+
RTATTR_SZ = 4
+RTA_ALIGN = lambda length: ((length + 3) & ~3) # pylint: disable=unnecessary-lambda-assignment
+IFLA_ADDRESS = 1
+IFLA_IFNAME = 3
+
+
+def _nlmsghdr(nlmsg_type, nlmsg_flags, nlmsg_seq, nlmsg_pid, msg_len: int):
+ '''Implement this C struct:
+ struct nlmsghdr {
+ __u32 nlmsg_len; /* Length of message including header */
+ __u16 nlmsg_type; /* Message content */
+ __u16 nlmsg_flags; /* Additional flags */
+ __u32 nlmsg_seq; /* Sequence number */
+ __u32 nlmsg_pid; /* Sending process port ID */
+ };
+ '''
+ return struct.pack('<LHHLL', NLMSG_LENGTH(msg_len), nlmsg_type, nlmsg_flags, nlmsg_seq, nlmsg_pid)
+
+
+def _ifaddrmsg(family=0, prefixlen=0, flags=0, scope=0, index=0):
+ '''Implement this C struct:
+ struct ifaddrmsg {
+ __u8 ifa_family;
+ __u8 ifa_prefixlen; /* The prefix length */
+ __u8 ifa_flags; /* Flags */
+ __u8 ifa_scope; /* Address scope */
+ __u32 ifa_index; /* Link index */
+ };
+ '''
+ return struct.pack('<BBBBL', family, prefixlen, flags, scope, index)
+
+
+def _ifinfomsg(family=0, dev_type=0, index=0, flags=0, change=0):
+ '''Implement this C struct:
+ struct ifinfomsg {
+ unsigned char ifi_family; /* AF_UNSPEC */
+ unsigned char __ifi_pad;
+ unsigned short ifi_type; /* Device type: ARPHRD_* */
+ int ifi_index; /* Interface index */
+ unsigned int ifi_flags; /* Device flags: IFF_* */
+ unsigned int ifi_change; /* change mask: IFF_* */
+ };
+ '''
+ return struct.pack('<BBHiII', family, 0, dev_type, index, flags, change)
+
+
+def _nlmsg(nlmsg_type, nlmsg_flags, msg: bytes):
+ '''Build a Netlink message'''
+ return _nlmsghdr(nlmsg_type, nlmsg_flags, 0, 0, len(msg)) + msg
+
+
+# Netlink request (Get address command)
+GETADDRCMD = _nlmsg(RTM_GETADDR, NLM_F_REQUEST | NLM_F_ROOT, _ifaddrmsg())
# Netlink request (Get address command)
-GETADDRCMD = (
- # BEGIN: struct nlmsghdr
- b'\0' * 4 # nlmsg_len (placeholder - actual length calculated below)
- + (RTM_GETADDR).to_bytes(2, byteorder='little', signed=False) # nlmsg_type
- + (NLM_F_REQUEST | NLM_F_ROOT).to_bytes(2, byteorder='little', signed=False) # nlmsg_flags
- + b'\0' * 2 # nlmsg_seq
- + b'\0' * 2 # nlmsg_pid
- # END: struct nlmsghdr
- + b'\0' * 8 # struct ifaddrmsg
-)
-GETADDRCMD = len(GETADDRCMD).to_bytes(4, byteorder='little') + GETADDRCMD[4:] # nlmsg_len
+GETLINKCMD = _nlmsg(RTM_GETLINK, NLM_F_REQUEST | NLM_F_ROOT, _ifinfomsg(family=socket.AF_UNSPEC, change=0xFFFFFFFF))
# ******************************************************************************
-def get_ipaddress_obj(ipaddr):
- '''@brief Return a IPv4Address or IPv6Address depending on whether @ipaddr
- is a valid IPv4 or IPv6 address. Return None otherwise.'''
- try:
- ip = ipaddress.ip_address(ipaddr)
- except ValueError:
- return None
+def _data_matches_mac(data, mac):
+ return mac.lower() == ':'.join([f'{x:02x}' for x in data[0:6]])
- return ip
+
+def mac2iface(mac: str): # pylint: disable=too-many-locals
+ '''@brief Find the interface that has @mac as its assigned MAC address.
+ @param mac: The MAC address to match
+ '''
+ with socket.socket(family=socket.AF_NETLINK, type=socket.SOCK_RAW, proto=socket.NETLINK_ROUTE) as sock:
+ sock.sendall(GETLINKCMD)
+ nlmsg = sock.recv(8192)
+ nlmsg_idx = 0
+ while True: # pylint: disable=too-many-nested-blocks
+ if nlmsg_idx >= len(nlmsg):
+ nlmsg += sock.recv(8192)
+
+ nlmsghdr = nlmsg[nlmsg_idx : nlmsg_idx + NLMSG_HDRLEN]
+ nlmsg_len, nlmsg_type, _, _, _ = struct.unpack('<LHHLL', nlmsghdr)
+
+ if nlmsg_type == NLMSG_DONE:
+ break
+
+ if nlmsg_type == RTM_BASE:
+ msg_indx = nlmsg_idx + NLMSG_HDRLEN
+ msg = nlmsg[msg_indx : msg_indx + IFINFOMSG_SZ] # ifinfomsg
+ _, _, ifi_type, ifi_index, _, _ = struct.unpack('<BBHiII', msg)
+
+ if ifi_type in (ARPHRD_LOOPBACK, ARPHRD_ETHER):
+ rtattr_indx = msg_indx + IFINFOMSG_SZ
+ while rtattr_indx < (nlmsg_idx + nlmsg_len):
+ rtattr = nlmsg[rtattr_indx : rtattr_indx + RTATTR_SZ]
+ rta_len, rta_type = struct.unpack('<HH', rtattr)
+ if rta_type == IFLA_ADDRESS:
+ data = nlmsg[rtattr_indx + RTATTR_SZ : rtattr_indx + rta_len]
+ if _data_matches_mac(data, mac):
+ return socket.if_indextoname(ifi_index)
+
+ rta_len = RTA_ALIGN(rta_len) # Round up to multiple of 4
+ rtattr_indx += rta_len # Move to next rtattr
+
+ nlmsg_idx += nlmsg_len # Move to next Netlink message
+
+ return ''
# ******************************************************************************
@@ -71,8 +152,7 @@ def _data_matches_ip(data_family, data, ip):
return other_ip == ip
-# ******************************************************************************
-def iface_of(src_addr):
+def _iface_of(src_addr): # pylint: disable=too-many-locals
'''@brief Find the interface that has src_addr as one of its assigned IP addresses.
@param src_addr: The IP address to match
@type src_addr: Instance of ipaddress.IPv4Address or ipaddress.IPv6Address
@@ -85,36 +165,133 @@ def iface_of(src_addr):
if nlmsg_idx >= len(nlmsg):
nlmsg += sock.recv(8192)
- nlmsg_type = int.from_bytes(nlmsg[nlmsg_idx + 4 : nlmsg_idx + 6], byteorder='little', signed=False)
+ nlmsghdr = nlmsg[nlmsg_idx : nlmsg_idx + NLMSG_HDRLEN]
+ nlmsg_len, nlmsg_type, _, _, _ = struct.unpack('<LHHLL', nlmsghdr)
+
if nlmsg_type == NLMSG_DONE:
break
- if nlmsg_type != RTM_NEWADDR:
- break
+ if nlmsg_type == RTM_NEWADDR:
+ msg_indx = nlmsg_idx + NLMSG_HDRLEN
+ msg = nlmsg[msg_indx : msg_indx + IFADDRMSG_SZ] # ifaddrmsg
+ ifa_family, _, _, _, ifa_index = struct.unpack('<BBBBL', msg)
+
+ rtattr_indx = msg_indx + IFADDRMSG_SZ
+ while rtattr_indx < (nlmsg_idx + nlmsg_len):
+ rtattr = nlmsg[rtattr_indx : rtattr_indx + RTATTR_SZ]
+ rta_len, rta_type = struct.unpack('<HH', rtattr)
+ if rta_type == IFLA_ADDRESS:
+ data = nlmsg[rtattr_indx + RTATTR_SZ : rtattr_indx + rta_len]
+ if _data_matches_ip(ifa_family, data, src_addr):
+ return socket.if_indextoname(ifa_index)
+
+ rta_len = RTA_ALIGN(rta_len) # Round up to multiple of 4
+ rtattr_indx += rta_len # Move to next rtattr
+
+ nlmsg_idx += nlmsg_len # Move to next Netlink message
+
+ return ''
+
+
+# ******************************************************************************
+def get_ipaddress_obj(ipaddr, ipv4_mapped_convert=False):
+ '''@brief Return a IPv4Address or IPv6Address depending on whether @ipaddr
+ is a valid IPv4 or IPv6 address. Return None otherwise.
+
+ If ipv4_mapped_resolve is set to True, IPv6 addresses that are IPv4-Mapped,
+ will be converted to their IPv4 equivalent.
+ '''
+ try:
+ ip = ipaddress.ip_address(ipaddr)
+ except ValueError:
+ return None
+
+ if ipv4_mapped_convert:
+ ipv4_mapped = getattr(ip, 'ipv4_mapped', None)
+ if ipv4_mapped is not None:
+ ip = ipv4_mapped
+
+ return ip
+
- nlmsg_len = int.from_bytes(nlmsg[nlmsg_idx : nlmsg_idx + 4], byteorder='little', signed=False)
- if nlmsg_len % 4: # Is msg length not a multiple of 4?
+# ******************************************************************************
+def net_if_addrs(): # pylint: disable=too-many-locals
+ '''@brief Return a dictionary listing every IP addresses for each interface.
+ The first IP address of a list is the primary address used as the default
+ source address.
+ @example: {
+ 'wlp0s20f3': {
+ 4: ['10.0.0.28'],
+ 6: [
+ 'fd5e:9a9e:c5bd:0:5509:890c:1848:3843',
+ 'fd5e:9a9e:c5bd:0:1fd5:e527:8df7:7912',
+ '2605:59c8:6128:fb00:c083:1b8:c467:81d2',
+ '2605:59c8:6128:fb00:e99d:1a02:38e0:ad52',
+ 'fe80::d71b:e807:d5ee:7614'
+ ],
+ },
+ 'lo': {
+ 4: ['127.0.0.1'],
+ 6: ['::1'],
+ },
+ 'docker0': {
+ 4: ['172.17.0.1'],
+ 6: []
+ },
+ }
+ '''
+ interfaces = {}
+ with socket.socket(socket.AF_NETLINK, socket.SOCK_RAW) as sock:
+ sock.sendall(GETADDRCMD)
+ nlmsg = sock.recv(8192)
+ nlmsg_idx = 0
+ while True: # pylint: disable=too-many-nested-blocks
+ if nlmsg_idx >= len(nlmsg):
+ nlmsg += sock.recv(8192)
+
+ nlmsghdr = nlmsg[nlmsg_idx : nlmsg_idx + NLMSG_HDRLEN]
+ nlmsg_len, nlmsg_type, _, _, _ = struct.unpack('<LHHLL', nlmsghdr)
+
+ if nlmsg_type == NLMSG_DONE:
break
- ifaddrmsg_indx = nlmsg_idx + NLMSGHDR_SZ
- ifa_family = nlmsg[ifaddrmsg_indx]
- ifa_index = int.from_bytes(nlmsg[ifaddrmsg_indx + 4 : ifaddrmsg_indx + 8], byteorder='little', signed=False)
+ if nlmsg_type == RTM_NEWADDR:
+ msg_indx = nlmsg_idx + NLMSG_HDRLEN
+ msg = nlmsg[msg_indx : msg_indx + IFADDRMSG_SZ] # ifaddrmsg
+ ifa_family, _, _, _, ifa_index = struct.unpack('<BBBBL', msg)
+
+ if ifa_family in (socket.AF_INET, socket.AF_INET6):
+ interfaces.setdefault(ifa_index, {4: [], 6: []})
+
+ rtattr_indx = msg_indx + IFADDRMSG_SZ
+ while rtattr_indx < (nlmsg_idx + nlmsg_len):
+ rtattr = nlmsg[rtattr_indx : rtattr_indx + RTATTR_SZ]
+ rta_len, rta_type = struct.unpack('<HH', rtattr)
- rtattr_indx = ifaddrmsg_indx + IFADDRMSG_SZ
- while rtattr_indx < (nlmsg_idx + nlmsg_len):
- rta_len = int.from_bytes(nlmsg[rtattr_indx : rtattr_indx + 2], byteorder='little', signed=False)
- rta_type = int.from_bytes(nlmsg[rtattr_indx + 2 : rtattr_indx + 4], byteorder='little', signed=False)
- if rta_type == IFLA_ADDRESS:
- data = nlmsg[rtattr_indx + RTATTR_SZ : rtattr_indx + rta_len]
- if _data_matches_ip(ifa_family, data, src_addr):
- return socket.if_indextoname(ifa_index)
+ if rta_type == IFLA_IFNAME:
+ data = nlmsg[rtattr_indx + RTATTR_SZ : rtattr_indx + rta_len]
+ ifname = data.rstrip(b'\0').decode()
+ interfaces[ifa_index]['name'] = ifname
- rta_len = (rta_len + 3) & ~3 # Round up to multiple of 4
- rtattr_indx += rta_len # Move to next rtattr
+ elif rta_type == IFLA_ADDRESS:
+ data = nlmsg[rtattr_indx + RTATTR_SZ : rtattr_indx + rta_len]
+ ip = get_ipaddress_obj(data)
+ if ip:
+ family = 4 if ifa_family == socket.AF_INET else 6
+ interfaces[ifa_index][family].append(ip)
+
+ rta_len = RTA_ALIGN(rta_len) # Round up to multiple of 4
+ rtattr_indx += rta_len # Move to next rtattr
nlmsg_idx += nlmsg_len # Move to next Netlink message
- return ''
+ if_addrs = {}
+ for value in interfaces.values():
+ name = value.pop('name', None)
+ if name is not None:
+ if_addrs[name] = value
+
+ return if_addrs
# ******************************************************************************
@@ -128,42 +305,4 @@ def get_interface(src_addr):
src_addr = src_addr.split('%')[0] # remove scope-id (if any)
src_addr = get_ipaddress_obj(src_addr)
- return '' if src_addr is None else iface_of(src_addr)
-
-
-# ******************************************************************************
-def remove_invalid_addresses(controllers: list):
- '''@brief Remove controllers with invalid addresses from the list of controllers.
- @param controllers: List of TIDs
- '''
- service_conf = conf.SvcConf()
- valid_controllers = list()
- for controller in controllers:
- if controller.transport in ('tcp', 'rdma'):
- # Let's make sure that traddr is
- # syntactically a valid IPv4 or IPv6 address.
- ip = get_ipaddress_obj(controller.traddr)
- if ip is None:
- logging.warning('%s IP address is not valid', controller)
- continue
-
- # Let's make sure the address family is enabled.
- if ip.version not in service_conf.ip_family:
- logging.debug(
- '%s ignored because IPv%s is disabled in %s',
- controller,
- ip.version,
- service_conf.conf_file,
- )
- continue
-
- valid_controllers.append(controller)
-
- elif controller.transport in ('fc', 'loop'):
- # At some point, need to validate FC addresses as well...
- valid_controllers.append(controller)
-
- else:
- logging.warning('Invalid transport %s', controller.transport)
-
- return valid_controllers
+ return '' if src_addr is None else _iface_of(src_addr)
diff --git a/staslib/meson.build b/staslib/meson.build
index eb006f0..1658d3c 100644
--- a/staslib/meson.build
+++ b/staslib/meson.build
@@ -24,6 +24,7 @@ files_to_copy = [
'gutil.py',
'iputil.py',
'log.py',
+ 'nbft.py',
'service.py',
'singleton.py',
'stas.py',
diff --git a/staslib/nbft.py b/staslib/nbft.py
new file mode 100644
index 0000000..c643c9c
--- /dev/null
+++ b/staslib/nbft.py
@@ -0,0 +1,28 @@
+# Copyright (c) 2023, Dell Inc. or its subsidiaries. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+# See the LICENSE file for details.
+#
+# This file is part of NVMe STorage Appliance Services (nvme-stas).
+#
+# Authors: Martin Belanger <Martin.Belanger@dell.com>
+#
+'''Code used to access the NVMe Boot Firmware Tables'''
+
+import os
+import glob
+import logging
+from libnvme import nvme
+from staslib import defs
+
+
+def get_nbft_files(root_dir=defs.NBFT_SYSFS_PATH):
+ """Return a dictionary containing the NBFT data for all the NBFT binary files located in @root_dir"""
+ if not defs.HAS_NBFT_SUPPORT:
+ logging.warning(
+ "libnvme-%s does not have NBFT support. Please upgrade libnvme.",
+ defs.LIBNVME_VERSION,
+ )
+ return {}
+
+ pathname = os.path.join(root_dir, defs.NBFT_SYSFS_FILENAME)
+ return {fname: nvme.nbft_get(fname) or {} for fname in glob.iglob(pathname=pathname)} # pylint: disable=no-member
diff --git a/staslib/service.py b/staslib/service.py
index ce4769d..e681f3a 100644
--- a/staslib/service.py
+++ b/staslib/service.py
@@ -20,7 +20,7 @@ import dasbus.client.proxy
from gi.repository import GLib
from systemd.daemon import notify as sd_notify
-from staslib import avahi, conf, ctrl, defs, gutil, iputil, stas, timeparse, trid, udev
+from staslib import avahi, conf, ctrl, defs, gutil, stas, timeparse, trid, udev
# ******************************************************************************
@@ -402,7 +402,7 @@ class Stac(Service):
logging.debug('Stac._config_ctrls_finish() - discovered_ctrl_list = %s', discovered_ctrl_list)
controllers = stas.remove_excluded(configured_ctrl_list + discovered_ctrl_list)
- controllers = iputil.remove_invalid_addresses(controllers)
+ controllers = stas.remove_invalid_addresses(controllers)
new_controller_tids = set(controllers)
cur_controller_tids = set(self._controllers.keys())
@@ -752,7 +752,7 @@ class Staf(Service):
all_ctrls = configured_ctrl_list + discovered_ctrl_list + referral_ctrl_list
controllers = stas.remove_excluded(all_ctrls)
- controllers = iputil.remove_invalid_addresses(controllers)
+ controllers = stas.remove_invalid_addresses(controllers)
new_controller_tids = set(controllers)
cur_controller_tids = set(self._controllers.keys())
@@ -856,7 +856,7 @@ class Staf(Service):
return
# Did we receive a Change of DLP AEN or an NVME Event indicating 'connect' or 'rediscover'?
- if not _is_dlp_changed_aen(udev_obj) and not _event_matches(udev_obj, ('connected', 'rediscover')):
+ if not _is_dlp_changed_aen(udev_obj) and not _event_matches(udev_obj, ('rediscover',)):
return
# We need to invoke "nvme connect-all" using nvme-cli's nvmf-connect@.service
@@ -873,6 +873,6 @@ class Staf(Service):
options = r'\x09'.join(
[fr'{option}\x3d{value}' for option, value in cnf if value not in (None, 'none', 'None', '')]
)
- logging.info('Invoking: systemctl start nvmf-connect@%s.service', options)
- cmd = [defs.SYSTEMCTL, '--quiet', '--no-block', 'start', fr'nvmf-connect@{options}.service']
+ logging.debug('Invoking: systemctl restart nvmf-connect@%s.service', options)
+ cmd = [defs.SYSTEMCTL, '--quiet', '--no-block', 'restart', fr'nvmf-connect@{options}.service']
subprocess.run(cmd, check=False)
diff --git a/staslib/singleton.py b/staslib/singleton.py
index 2171186..f80fc0b 100644
--- a/staslib/singleton.py
+++ b/staslib/singleton.py
@@ -21,3 +21,28 @@ class Singleton(type):
instance = super(Singleton, cls).__call__(*args, **kwargs)
cls._instances[cls] = instance
return cls._instances[cls]
+
+ def destroy(cls):
+ '''Delete a singleton instance.
+
+ This is to be invoked using the derived class Name.
+
+ For example:
+
+ class Child(Singleton):
+ pass
+
+ child1 = Child() # Instantiate singleton
+ child2 = Child() # Get a reference to the singleton
+
+ print(f'{child1 is child2}') # True
+
+ Child.destroy() # Delete the singleton
+
+ print(f'{child1 is child2}') # Still True because child1 and child2 still hold reference to the singleton
+
+ child1 = Child() # Instantiate a new singleton and assign to child1
+
+ print(f'{child1 is child2}') # False
+ '''
+ cls._instances.pop(cls, None)
diff --git a/staslib/stas.py b/staslib/stas.py
index 95afb94..e333b90 100644
--- a/staslib/stas.py
+++ b/staslib/stas.py
@@ -18,7 +18,7 @@ import logging
import dasbus.connection
from gi.repository import Gio, GLib
from systemd.daemon import notify as sd_notify
-from staslib import conf, defs, gutil, log, trid
+from staslib import conf, defs, gutil, iputil, log, trid
try:
# Python 3.9 or later
@@ -87,6 +87,44 @@ def check_if_allowed_to_continue():
# ******************************************************************************
+def remove_invalid_addresses(controllers: list):
+ '''@brief Remove controllers with invalid addresses from the list of controllers.
+ @param controllers: List of TIDs
+ '''
+ service_conf = conf.SvcConf()
+ valid_controllers = list()
+ for controller in controllers:
+ if controller.transport in ('tcp', 'rdma'):
+ # Let's make sure that traddr is
+ # syntactically a valid IPv4 or IPv6 address.
+ ip = iputil.get_ipaddress_obj(controller.traddr)
+ if ip is None:
+ logging.warning('%s IP address is not valid', controller)
+ continue
+
+ # Let's make sure the address family is enabled.
+ if ip.version not in service_conf.ip_family:
+ logging.debug(
+ '%s ignored because IPv%s is disabled in %s',
+ controller,
+ ip.version,
+ service_conf.conf_file,
+ )
+ continue
+
+ valid_controllers.append(controller)
+
+ elif controller.transport in ('fc', 'loop'):
+ # At some point, need to validate FC addresses as well...
+ valid_controllers.append(controller)
+
+ else:
+ logging.warning('Invalid transport %s', controller.transport)
+
+ return valid_controllers
+
+
+# ******************************************************************************
def tid_from_dlpe(dlpe, host_traddr, host_iface):
'''@brief Take a Discovery Log Page Entry and return a Controller ID as a dict.'''
cid = {
@@ -492,7 +530,9 @@ class ServiceABC(abc.ABC): # pylint: disable=too-many-instance-attributes
# elements after name resolution is complete (i.e. in the calback
# function _config_ctrls_finish)
logging.debug('ServiceABC._config_ctrls()')
- configured_controllers = [trid.TID(cid) for cid in conf.SvcConf().get_controllers()]
+ configured_controllers = [
+ trid.TID(cid) for cid in conf.SvcConf().get_controllers() + conf.NbftConf().get_controllers()
+ ]
configured_controllers = remove_excluded(configured_controllers)
self._resolver.resolve_ctrl_async(self._cancellable, configured_controllers, self._config_ctrls_finish)
diff --git a/staslib/trid.py b/staslib/trid.py
index d9a05ba..11065ea 100644
--- a/staslib/trid.py
+++ b/staslib/trid.py
@@ -61,7 +61,6 @@ class TID: # pylint: disable=too-many-instance-attributes
self._host_traddr = cid.get('host-traddr', '')
self._host_iface = '' if conf.SvcConf().ignore_iface else cid.get('host-iface', '')
self._subsysnqn = cid.get('subsysnqn', '')
- self._shortkey = (self._transport, self._traddr, self._trsvcid, self._subsysnqn, self._host_traddr)
self._key = (self._transport, self._traddr, self._trsvcid, self._subsysnqn, self._host_traddr, self._host_iface)
self._hash = int.from_bytes(
hashlib.md5(''.join(self._key).encode('utf-8')).digest(), 'big'
@@ -121,22 +120,10 @@ class TID: # pylint: disable=too-many-instance-attributes
return self._id
def __eq__(self, other):
- if not isinstance(other, self.__class__):
- return False
-
- if self._host_iface and other._host_iface:
- return self._key == other._key
-
- return self._shortkey == other._shortkey
+ return isinstance(other, self.__class__) and self._key == other._key
def __ne__(self, other):
- if not isinstance(other, self.__class__):
- return True
-
- if self._host_iface and other._host_iface:
- return self._key != other._key
-
- return self._shortkey != other._shortkey
+ return not isinstance(other, self.__class__) or self._key != other._key
def __hash__(self):
return self._hash
diff --git a/staslib/udev.py b/staslib/udev.py
index 12ef61b..80555dd 100644
--- a/staslib/udev.py
+++ b/staslib/udev.py
@@ -153,6 +153,169 @@ class Udev:
return False
+ @staticmethod
+ def _cid_matches_tcp_tid_legacy(tid, cid): # pylint: disable=too-many-return-statements,too-many-branches
+ '''On kernels older than 6.1, the src_addr parameter is not available
+ from the sysfs. Therefore, we need to infer a match based on other
+ parameters. And there are a few cases where we're simply not sure
+ whether an existing connection (cid) matches the candidate
+ connection (tid).
+ '''
+ cid_host_iface = cid['host-iface']
+ cid_host_traddr = iputil.get_ipaddress_obj(cid['host-traddr'], ipv4_mapped_convert=True)
+
+ if not cid_host_iface: # cid.host_iface is undefined
+ if not cid_host_traddr: # cid.host_traddr is undefined
+ # When the existing cid.src_addr, cid.host_traddr, and cid.host_iface
+ # are all undefined (which can only happen on kernels prior to 6.1),
+ # we can't know for sure on which interface an existing connection
+ # was made. In this case, we can only declare a match if both
+ # tid.host_iface and tid.host_traddr are undefined as well.
+ logging.debug(
+ 'Udev._cid_matches_tcp_tid_legacy() - cid=%s, tid=%s - Not enough info. Assume "match" but this could be wrong.',
+ cid,
+ tid,
+ )
+ return True
+
+ # cid.host_traddr is defined. If tid.host_traddr is also
+ # defined, then it must match the existing cid.host_traddr.
+ if tid.host_traddr:
+ tid_host_traddr = iputil.get_ipaddress_obj(tid.host_traddr, ipv4_mapped_convert=True)
+ if tid_host_traddr != cid_host_traddr:
+ return False
+
+ # If tid.host_iface is defined, then the interface where
+ # the connection is located must match. If tid.host_iface
+ # is not defined, then we don't really care on which
+ # interface the connection was made and we can skip this test.
+ if tid.host_iface:
+ # With the existing cid.host_traddr, we can find the
+ # interface of the exisiting connection.
+ connection_iface = iputil.get_interface(str(cid_host_traddr))
+ if tid.host_iface != connection_iface:
+ return False
+
+ return True
+
+ # cid.host_iface is defined
+ if not cid_host_traddr: # cid.host_traddr is undefined
+ if tid.host_iface and tid.host_iface != cid_host_iface:
+ return False
+
+ if tid.host_traddr:
+ # It's impossible to tell the existing connection source
+ # address. So, we can't tell if it matches tid.host_traddr.
+ # However, if the existing host_iface has only one source
+ # address assigned to it, we can assume that the source
+ # address used for the existing connection is that address.
+ if_addrs = iputil.net_if_addrs().get(cid_host_iface, {4: [], 6: []})
+ tid_host_traddr = iputil.get_ipaddress_obj(tid.host_traddr, ipv4_mapped_convert=True)
+ source_addrs = if_addrs[tid_host_traddr.version]
+ if len(source_addrs) != 1:
+ return False
+
+ src_addr0 = iputil.get_ipaddress_obj(source_addrs[0], ipv4_mapped_convert=True)
+ if src_addr0 != tid_host_traddr:
+ return False
+
+ return True
+
+ # cid.host_traddr is defined
+ if tid.host_iface and tid.host_iface != cid_host_iface:
+ return False
+
+ if tid.host_traddr:
+ tid_host_traddr = iputil.get_ipaddress_obj(tid.host_traddr, ipv4_mapped_convert=True)
+ if tid_host_traddr != cid_host_traddr:
+ return False
+
+ return True
+
+ @staticmethod
+ def _cid_matches_tid(tid, cid): # pylint: disable=too-many-return-statements,too-many-branches
+ '''Check if existing controller's cid matches candidate controller's tid.
+ @param cid: The Connection ID of an existing controller (from the sysfs).
+ @param tid: The Transport ID of a candidate controller.
+
+ We're trying to find if an existing connection (specified by cid) can
+ be re-used for the candidate controller (specified by tid).
+
+ We do not have a match if the candidate's tid.transport, tid.traddr,
+ tid.trsvcid, and tid.subsysnqn are not identical to those of the cid.
+ These 4 parameters are mandatory for a match.
+
+ The tid.host_traddr and tid.host_iface depend on the transport type.
+ These parameters may not apply or have a different syntax/meaning
+ depending on the transport type.
+
+ For TCP only:
+ With regards to the candidate's tid.host_traddr and tid.host_iface,
+ if those are defined but do not match the existing cid.host_traddr
+ and cid.host_iface, we may still be able to find a match by taking
+ the existing cid.src_addr into consideration since that parameter
+ identifies the actual source address of the connection and therefore
+ can be used to infer the interface of the connection. However, the
+ cid.src_addr can only be read from the sysfs starting with kernel
+ 6.1.
+ '''
+ # 'transport', 'traddr', 'trsvcid', and 'subsysnqn' must exactly match.
+ if cid['transport'] != tid.transport or cid['trsvcid'] != tid.trsvcid or cid['subsysnqn'] != tid.subsysnqn:
+ return False
+
+ if tid.transport in ('tcp', 'rdma'):
+ # Need to convert to ipaddress objects to properly
+ # handle all variations of IPv6 addresses.
+ tid_traddr = iputil.get_ipaddress_obj(tid.traddr, ipv4_mapped_convert=True)
+ cid_traddr = iputil.get_ipaddress_obj(cid['traddr'], ipv4_mapped_convert=True)
+ else:
+ cid_traddr = cid['traddr']
+ tid_traddr = tid.traddr
+
+ if cid_traddr != tid_traddr:
+ return False
+
+ # We need to know the type of transport to compare 'host-traddr' and
+ # 'host-iface'. These parameters don't apply to all transport types
+ # and may have a different meaning/syntax.
+ if tid.transport == 'tcp':
+ if tid.host_traddr or tid.host_iface:
+ src_addr = iputil.get_ipaddress_obj(cid['src-addr'], ipv4_mapped_convert=True)
+ if not src_addr:
+ # For legacy kernels (i.e. older than 6.1), the existing cid.src_addr
+ # is always undefined. We need to use advanced logic to determine
+ # whether cid and tid match.
+ return Udev._cid_matches_tcp_tid_legacy(tid, cid)
+
+ # The existing controller's cid.src_addr is always defined for kernel
+ # 6.1 and later. We can use the existing controller's cid.src_addr to
+ # find the interface on which the connection was made and therefore
+ # match it to the candidate's tid.host_iface. And the cid.src_addr
+ # can also be used to match the candidate's tid.host_traddr.
+ if tid.host_traddr:
+ tid_host_traddr = iputil.get_ipaddress_obj(tid.host_traddr, ipv4_mapped_convert=True)
+ if tid_host_traddr != src_addr:
+ return False
+
+ # host-iface is an optional tcp-only parameter.
+ if tid.host_iface and tid.host_iface != iputil.get_interface(str(src_addr)):
+ return False
+
+ elif tid.transport == 'fc':
+ # host-traddr is mandatory for FC.
+ if tid.host_traddr != cid['host-traddr']:
+ return False
+
+ elif tid.transport == 'rdma':
+ # host-traddr is optional for RDMA and is expressed as an IP address.
+ if tid.host_traddr:
+ tid_host_traddr = iputil.get_ipaddress_obj(tid.host_traddr, ipv4_mapped_convert=True)
+ cid_host_traddr = iputil.get_ipaddress_obj(cid['host-traddr'], ipv4_mapped_convert=True)
+ if tid_host_traddr != cid_host_traddr:
+ return False
+
+ return True
+
def find_nvme_dc_device(self, tid):
'''@brief Find the nvme device associated with the specified
Discovery Controller.
@@ -164,7 +327,8 @@ class Udev:
if not self.is_dc_device(device):
continue
- if self.get_tid(device) != tid:
+ cid = self.get_cid(device)
+ if not self._cid_matches_tid(tid, cid):
continue
return device
@@ -182,7 +346,8 @@ class Udev:
if not self.is_ioc_device(device):
continue
- if self.get_tid(device) != tid:
+ cid = self.get_cid(device)
+ if not self._cid_matches_tid(tid, cid):
continue
return device
@@ -300,28 +465,32 @@ class Udev:
return attr_str[start:end]
@staticmethod
- def _get_host_iface(device):
- host_iface = Udev._get_property(device, 'NVME_HOST_IFACE')
- if not host_iface:
- # We'll try to find the interface from the source address on
- # the connection. Only available if kernel exposes the source
- # address (src_addr) in the "address" attribute.
- src_addr = Udev.get_key_from_attr(device, 'address', 'src_addr=')
- host_iface = iputil.get_interface(src_addr)
- return host_iface
-
- @staticmethod
def get_tid(device):
'''@brief return the Transport ID associated with a udev device'''
+ cid = Udev.get_cid(device)
+ if cid['transport'] == 'tcp':
+ src_addr = cid['src-addr']
+ if not cid['host-iface'] and src_addr:
+ # We'll try to find the interface from the source address on
+ # the connection. Only available if kernel exposes the source
+ # address (src_addr) in the "address" attribute.
+ cid['host-iface'] = iputil.get_interface(src_addr)
+
+ return trid.TID(cid)
+
+ @staticmethod
+ def get_cid(device):
+ '''@brief return the Connection ID associated with a udev device'''
cid = {
'transport': Udev._get_property(device, 'NVME_TRTYPE'),
'traddr': Udev._get_property(device, 'NVME_TRADDR'),
'trsvcid': Udev._get_property(device, 'NVME_TRSVCID'),
'host-traddr': Udev._get_property(device, 'NVME_HOST_TRADDR'),
- 'host-iface': Udev._get_host_iface(device),
+ 'host-iface': Udev._get_property(device, 'NVME_HOST_IFACE'),
'subsysnqn': Udev._get_attribute(device, 'subsysnqn'),
+ 'src-addr': Udev.get_key_from_attr(device, 'address', 'src_addr='),
}
- return trid.TID(cid)
+ return cid
UDEV = Udev() # Singleton