summaryrefslogtreecommitdiffstats
path: root/staslib/ctrl.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--staslib/ctrl.py360
1 files changed, 184 insertions, 176 deletions
diff --git a/staslib/ctrl.py b/staslib/ctrl.py
index 97a1c7b..ad221e0 100644
--- a/staslib/ctrl.py
+++ b/staslib/ctrl.py
@@ -135,26 +135,7 @@ class Controller(stas.ControllerABC): # pylint: disable=too-many-instance-attri
self._root.log_level("debug" if tron else "err")
def _on_udev_notification(self, udev_obj):
- if self._alive():
- if udev_obj.action == 'change':
- nvme_aen = udev_obj.get('NVME_AEN')
- nvme_event = udev_obj.get('NVME_EVENT')
- if isinstance(nvme_aen, str):
- logging.info('%s | %s - Received AEN: %s', self.id, udev_obj.sys_name, nvme_aen)
- self._on_aen(int(nvme_aen, 16))
- if isinstance(nvme_event, str):
- self._on_nvme_event(nvme_event)
- elif udev_obj.action == 'remove':
- logging.info('%s | %s - Received "remove" event', self.id, udev_obj.sys_name)
- self._on_ctrl_removed(udev_obj)
- else:
- logging.debug(
- 'Controller._on_udev_notification() - %s | %s: Received "%s" event',
- self.id,
- udev_obj.sys_name,
- udev_obj.action,
- )
- else:
+ if not self._alive():
logging.debug(
'Controller._on_udev_notification() - %s | %s: Received event on dead object. udev_obj %s: %s',
self.id,
@@ -162,6 +143,26 @@ class Controller(stas.ControllerABC): # pylint: disable=too-many-instance-attri
udev_obj.action,
udev_obj.sys_name,
)
+ return
+
+ if udev_obj.action == 'change':
+ nvme_aen = udev_obj.get('NVME_AEN')
+ nvme_event = udev_obj.get('NVME_EVENT')
+ if isinstance(nvme_aen, str):
+ logging.info('%s | %s - Received AEN: %s', self.id, udev_obj.sys_name, nvme_aen)
+ self._on_aen(int(nvme_aen, 16))
+ if isinstance(nvme_event, str):
+ self._on_nvme_event(nvme_event)
+ elif udev_obj.action == 'remove':
+ logging.info('%s | %s - Received "remove" event', self.id, udev_obj.sys_name)
+ self._on_ctrl_removed(udev_obj)
+ else:
+ logging.debug(
+ 'Controller._on_udev_notification() - %s | %s: Received "%s" event',
+ self.id,
+ udev_obj.sys_name,
+ udev_obj.action,
+ )
def _on_ctrl_removed(self, udev_obj): # pylint: disable=unused-argument
if self._udev:
@@ -269,48 +270,51 @@ class Controller(stas.ControllerABC): # pylint: disable=too-many-instance-attri
op_obj.kill()
self._connect_op = None
- if self._alive():
- self._device = self._ctrl.name
- logging.info('%s | %s - Connection established!', self.id, self.device)
- self._connect_attempts = 0
- self._udev.register_for_device_events(self._device, self._on_udev_notification)
- else:
+ if not self._alive():
logging.debug(
'Controller._on_connect_success() - %s | %s: Received event on dead object. data=%s',
self.id,
self.device,
data,
)
+ return
+
+ self._device = self._ctrl.name
+ logging.info('%s | %s - Connection established!', self.id, self.device)
+ self._connect_attempts = 0
+ self._udev.register_for_device_events(self._device, self._on_udev_notification)
def _on_connect_fail(self, op_obj: gutil.AsyncTask, err, fail_cnt): # pylint: disable=unused-argument
'''@brief Function called when we fail to connect to the Controller.'''
op_obj.kill()
self._connect_op = None
- if self._alive():
- if self._connect_attempts == 1:
- # Do a fast re-try on the first failure.
- self._retry_connect_tmr.set_timeout(self.FAST_CONNECT_RETRY_PERIOD_SEC)
- elif self._connect_attempts == 2:
- # If the fast connect re-try fails, then we can print a message to
- # indicate the failure, and start a slow re-try period.
- self._retry_connect_tmr.set_timeout(self.CONNECT_RETRY_PERIOD_SEC)
- logging.error('%s Failed to connect to controller. %s %s', self.id, err.domain, err.message)
-
- if self._should_try_to_reconnect():
- logging.debug(
- 'Controller._on_connect_fail() - %s %s. Retry in %s sec.',
- self.id,
- err,
- self._retry_connect_tmr.get_timeout(),
- )
- self._retry_connect_tmr.start()
- else:
+
+ if not self._alive():
logging.debug(
'Controller._on_connect_fail() - %s Received event on dead object. %s %s',
self.id,
err.domain,
err.message,
)
+ return
+
+ if self._connect_attempts == 1:
+ # Do a fast re-try on the first failure.
+ self._retry_connect_tmr.set_timeout(self.FAST_CONNECT_RETRY_PERIOD_SEC)
+ elif self._connect_attempts == 2:
+ # If the fast connect re-try fails, then we can print a message to
+ # indicate the failure, and start a slow re-try period.
+ self._retry_connect_tmr.set_timeout(self.CONNECT_RETRY_PERIOD_SEC)
+ logging.error('%s Failed to connect to controller. %s %s', self.id, err.domain, err.message)
+
+ if self._should_try_to_reconnect():
+ logging.debug(
+ 'Controller._on_connect_fail() - %s %s. Retry in %s sec.',
+ self.id,
+ err,
+ self._retry_connect_tmr.get_timeout(),
+ )
+ self._retry_connect_tmr.start()
def disconnect(self, disconnected_cb, keep_connection):
'''@brief Issue an asynchronous disconnect command to a Controller.
@@ -507,11 +511,7 @@ class Dc(Controller):
return
- logging.info(
- '%s | %s - Controller not responding. Retrying...',
- self.id,
- self.device,
- )
+ logging.info('%s | %s - Controller not responding. Retrying...', self.id, self.device)
self._ctrl_unresponsive_time = None
self._ctrl_unresponsive_tmr.stop()
@@ -555,8 +555,8 @@ class Dc(Controller):
def _post_registration_actions(self):
# Need to check that supported_log_pages() is available (introduced in libnvme 1.2)
- has_supported_log_pages = hasattr(self._ctrl, 'supported_log_pages')
- if not has_supported_log_pages:
+ get_slp = getattr(self._ctrl, 'supported_log_pages', None)
+ if get_slp is None:
logging.warning(
'%s | %s - libnvme-%s does not support "Get supported log pages". Please upgrade libnvme.',
self.id,
@@ -564,9 +564,9 @@ class Dc(Controller):
defs.LIBNVME_VERSION,
)
- if conf.SvcConf().pleo_enabled and self._is_ddc() and has_supported_log_pages:
+ if conf.SvcConf().pleo_enabled and self._is_ddc() and get_slp is not None:
self._get_supported_op = gutil.AsyncTask(
- self._on_get_supported_success, self._on_get_supported_fail, self._ctrl.supported_log_pages
+ self._on_get_supported_success, self._on_get_supported_fail, get_slp
)
self._get_supported_op.run_async()
else:
@@ -580,21 +580,23 @@ class Dc(Controller):
'''
super()._on_connect_success(op_obj, data)
- if self._alive():
- self._ctrl_unresponsive_time = None
- self._ctrl_unresponsive_tmr.stop()
- self._ctrl_unresponsive_tmr.set_timeout(0)
-
- if self._ctrl.is_registration_supported():
- self._register_op = gutil.AsyncTask(
- self._on_registration_success,
- self._on_registration_fail,
- self._ctrl.registration_ctlr,
- nvme.NVMF_DIM_TAS_REGISTER,
- )
- self._register_op.run_async()
- else:
- self._post_registration_actions()
+ if not self._alive():
+ return
+
+ self._ctrl_unresponsive_time = None
+ self._ctrl_unresponsive_tmr.stop()
+ self._ctrl_unresponsive_tmr.set_timeout(0)
+
+ if self._ctrl.is_registration_supported():
+ self._register_op = gutil.AsyncTask(
+ self._on_registration_success,
+ self._on_registration_fail,
+ self._ctrl.registration_ctlr,
+ nvme.NVMF_DIM_TAS_REGISTER,
+ )
+ self._register_op.run_async()
+ else:
+ self._post_registration_actions()
def _on_connect_fail(self, op_obj: gutil.AsyncTask, err, fail_cnt):
'''@brief Function called when we fail to connect to the Controller.'''
@@ -613,35 +615,25 @@ class Dc(Controller):
refers to the fact that a successful exchange was made with the DC.
It doesn't mean that the registration itself succeeded.
'''
- if self._alive():
- if data is not None:
- logging.warning('%s | %s - Registration error. %s.', self.id, self.device, data)
- else:
- logging.debug('Dc._on_registration_success() - %s | %s', self.id, self.device)
-
- self._post_registration_actions()
- else:
+ if not self._alive():
logging.debug(
'Dc._on_registration_success() - %s | %s: Received event on dead object.', self.id, self.device
)
+ return
+
+ if data is not None:
+ logging.warning('%s | %s - Registration error. %s.', self.id, self.device, data)
+ else:
+ logging.debug('Dc._on_registration_success() - %s | %s', self.id, self.device)
+
+ self._post_registration_actions()
def _on_registration_fail(self, op_obj: gutil.AsyncTask, err, fail_cnt):
'''@brief Function called when we fail to register with the
Discovery Controller. See self._register_op object
for details.
'''
- if self._alive():
- logging.debug(
- 'Dc._on_registration_fail() - %s | %s: %s. Retry in %s sec',
- self.id,
- self.device,
- err,
- Dc.REGISTRATION_RETRY_RERIOD_SEC,
- )
- if fail_cnt == 1: # Throttle the logs. Only print the first time the command fails
- logging.error('%s | %s - Failed to register with Discovery Controller. %s', self.id, self.device, err)
- op_obj.retry(Dc.REGISTRATION_RETRY_RERIOD_SEC)
- else:
+ if not self._alive():
logging.debug(
'Dc._on_registration_fail() - %s | %s: Received event on dead object. %s',
self.id,
@@ -649,6 +641,18 @@ class Dc(Controller):
err,
)
op_obj.kill()
+ return
+
+ logging.debug(
+ 'Dc._on_registration_fail() - %s | %s: %s. Retry in %s sec',
+ self.id,
+ self.device,
+ err,
+ Dc.REGISTRATION_RETRY_RERIOD_SEC,
+ )
+ if fail_cnt == 1: # Throttle the logs. Only print the first time the command fails
+ logging.error('%s | %s - Failed to register with Discovery Controller. %s', self.id, self.device, err)
+ op_obj.retry(Dc.REGISTRATION_RETRY_RERIOD_SEC)
# --------------------------------------------------------------------------
def _on_get_supported_success(self, op_obj: gutil.AsyncTask, data): # pylint: disable=unused-argument
@@ -660,68 +664,70 @@ class Dc(Controller):
refers to the fact that a successful exchange was made with the DC.
It doesn't mean that the Get Supported Log Page itself succeeded.
'''
- if self._alive():
- try:
- dlp_supp_opts = data[nvme.NVME_LOG_LID_DISCOVER] >> 16
- except (TypeError, IndexError):
- dlp_supp_opts = 0
-
+ if not self._alive():
logging.debug(
- 'Dc._on_get_supported_success() - %s | %s: supported options = 0x%04X = %s',
- self.id,
- self.device,
- dlp_supp_opts,
- dlp_supp_opts_as_string(dlp_supp_opts),
+ 'Dc._on_get_supported_success() - %s | %s: Received event on dead object.', self.id, self.device
)
+ return
- if 'lsp' in inspect.signature(self._ctrl.discover).parameters:
- lsp = nvme.NVMF_LOG_DISC_LSP_PLEO if dlp_supp_opts & nvme.NVMF_LOG_DISC_LID_PLEOS else 0
- self._get_log_op = gutil.AsyncTask(
- self._on_get_log_success, self._on_get_log_fail, self._ctrl.discover, lsp
- )
- else:
- logging.warning(
- '%s | %s - libnvme-%s does not support setting PLEO bit. Please upgrade.',
- self.id,
- self.device,
- defs.LIBNVME_VERSION,
- )
- self._get_log_op = gutil.AsyncTask(self._on_get_log_success, self._on_get_log_fail, self._ctrl.discover)
- self._get_log_op.run_async()
+ try:
+ dlp_supp_opts = data[nvme.NVME_LOG_LID_DISCOVER] >> 16
+ except (TypeError, IndexError):
+ dlp_supp_opts = 0
+
+ logging.debug(
+ 'Dc._on_get_supported_success() - %s | %s: supported options = 0x%04X = %s',
+ self.id,
+ self.device,
+ dlp_supp_opts,
+ dlp_supp_opts_as_string(dlp_supp_opts),
+ )
+
+ if 'lsp' in inspect.signature(self._ctrl.discover).parameters:
+ lsp = nvme.NVMF_LOG_DISC_LSP_PLEO if dlp_supp_opts & nvme.NVMF_LOG_DISC_LID_PLEOS else 0
+ self._get_log_op = gutil.AsyncTask(
+ self._on_get_log_success, self._on_get_log_fail, self._ctrl.discover, lsp
+ )
else:
- logging.debug(
- 'Dc._on_get_supported_success() - %s | %s: Received event on dead object.', self.id, self.device
+ logging.warning(
+ '%s | %s - libnvme-%s does not support setting PLEO bit. Please upgrade.',
+ self.id,
+ self.device,
+ defs.LIBNVME_VERSION,
)
+ self._get_log_op = gutil.AsyncTask(self._on_get_log_success, self._on_get_log_fail, self._ctrl.discover)
+ self._get_log_op.run_async()
def _on_get_supported_fail(self, op_obj: gutil.AsyncTask, err, fail_cnt):
'''@brief Function called when we fail to retrieve the supported log
page from the Discovery Controller. See self._get_supported_op object
for details.
'''
- if self._alive():
+ if not self._alive():
logging.debug(
- 'Dc._on_get_supported_fail() - %s | %s: %s. Retry in %s sec',
+ 'Dc._on_get_supported_fail() - %s | %s: Received event on dead object. %s',
self.id,
self.device,
err,
- Dc.GET_SUPPORTED_RETRY_RERIOD_SEC,
)
- if fail_cnt == 1: # Throttle the logs. Only print the first time the command fails
- logging.error(
- '%s | %s - Failed to Get supported log pages from Discovery Controller. %s',
- self.id,
- self.device,
- err,
- )
- op_obj.retry(Dc.GET_SUPPORTED_RETRY_RERIOD_SEC)
- else:
- logging.debug(
- 'Dc._on_get_supported_fail() - %s | %s: Received event on dead object. %s',
+ op_obj.kill()
+ return
+
+ logging.debug(
+ 'Dc._on_get_supported_fail() - %s | %s: %s. Retry in %s sec',
+ self.id,
+ self.device,
+ err,
+ Dc.GET_SUPPORTED_RETRY_RERIOD_SEC,
+ )
+ if fail_cnt == 1: # Throttle the logs. Only print the first time the command fails
+ logging.error(
+ '%s | %s - Failed to Get supported log pages from Discovery Controller. %s',
self.id,
self.device,
err,
)
- op_obj.kill()
+ op_obj.retry(Dc.GET_SUPPORTED_RETRY_RERIOD_SEC)
# --------------------------------------------------------------------------
def _on_get_log_success(self, op_obj: gutil.AsyncTask, data): # pylint: disable=unused-argument
@@ -729,61 +735,51 @@ class Dc(Controller):
from the Discovery Controller. See self._get_log_op object
for details.
'''
- if self._alive():
- # Note that for historical reasons too long to explain, the CDC may
- # return invalid addresses ('0.0.0.0', '::', or ''). Those need to
- # be filtered out.
- referrals_before = self.referrals()
- self._log_pages = (
- [
- {k.strip(): str(v).strip() for k, v in dictionary.items()}
- for dictionary in data
- if dictionary.get('traddr', '').strip() not in ('0.0.0.0', '::', '')
- ]
- if data
- else list()
+ if not self._alive():
+ logging.debug(
+ 'Dc._on_get_log_success() - %s | %s: Received event on dead object.', self.id, self.device
)
- logging.info(
- '%s | %s - Received discovery log pages (num records=%s).', self.id, self.device, len(self._log_pages)
+ return
+
+ # Note that for historical reasons too long to explain, the CDC may
+ # return invalid addresses ('0.0.0.0', '::', or ''). Those need to
+ # be filtered out.
+ referrals_before = self.referrals()
+ self._log_pages = (
+ [
+ {k.strip(): str(v).strip() for k, v in dictionary.items()}
+ for dictionary in data
+ if dictionary.get('traddr', '').strip() not in ('0.0.0.0', '::', '')
+ ]
+ if data
+ else list()
+ )
+ logging.info(
+ '%s | %s - Received discovery log pages (num records=%s).', self.id, self.device, len(self._log_pages)
+ )
+ referrals_after = self.referrals()
+ self._serv.log_pages_changed(self, self.device)
+ if referrals_after != referrals_before:
+ logging.debug(
+ 'Dc._on_get_log_success() - %s | %s: Referrals before = %s',
+ self.id,
+ self.device,
+ referrals_before,
)
- referrals_after = self.referrals()
- self._serv.log_pages_changed(self, self.device)
- if referrals_after != referrals_before:
- logging.debug(
- 'Dc._on_get_log_success() - %s | %s: Referrals before = %s',
- self.id,
- self.device,
- referrals_before,
- )
- logging.debug(
- 'Dc._on_get_log_success() - %s | %s: Referrals after = %s',
- self.id,
- self.device,
- referrals_after,
- )
- self._serv.referrals_changed()
- else:
logging.debug(
- 'Dc._on_get_log_success() - %s | %s: Received event on dead object.', self.id, self.device
+ 'Dc._on_get_log_success() - %s | %s: Referrals after = %s',
+ self.id,
+ self.device,
+ referrals_after,
)
+ self._serv.referrals_changed()
def _on_get_log_fail(self, op_obj: gutil.AsyncTask, err, fail_cnt):
'''@brief Function called when we fail to retrieve the log pages
from the Discovery Controller. See self._get_log_op object
for details.
'''
- if self._alive():
- logging.debug(
- 'Dc._on_get_log_fail() - %s | %s: %s. Retry in %s sec',
- self.id,
- self.device,
- err,
- Dc.GET_LOG_PAGE_RETRY_RERIOD_SEC,
- )
- if fail_cnt == 1: # Throttle the logs. Only print the first time the command fails
- logging.error('%s | %s - Failed to retrieve log pages. %s', self.id, self.device, err)
- op_obj.retry(Dc.GET_LOG_PAGE_RETRY_RERIOD_SEC)
- else:
+ if not self._alive():
logging.debug(
'Dc._on_get_log_fail() - %s | %s: Received event on dead object. %s',
self.id,
@@ -791,6 +787,18 @@ class Dc(Controller):
err,
)
op_obj.kill()
+ return
+
+ logging.debug(
+ 'Dc._on_get_log_fail() - %s | %s: %s. Retry in %s sec',
+ self.id,
+ self.device,
+ err,
+ Dc.GET_LOG_PAGE_RETRY_RERIOD_SEC,
+ )
+ if fail_cnt == 1: # Throttle the logs. Only print the first time the command fails
+ logging.error('%s | %s - Failed to retrieve log pages. %s', self.id, self.device, err)
+ op_obj.retry(Dc.GET_LOG_PAGE_RETRY_RERIOD_SEC)
# ******************************************************************************