summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-09-25 08:22:07 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-09-25 08:22:07 +0000
commit73334e48e395cd0ac4fbef0611609a3bd70abfe2 (patch)
tree8881d2482e8a19e3c40e148571851d75a7bb2a9b
parentAdding upstream version 2.3~rc5. (diff)
downloadnvme-stas-73334e48e395cd0ac4fbef0611609a3bd70abfe2.tar.xz
nvme-stas-73334e48e395cd0ac4fbef0611609a3bd70abfe2.zip
Adding upstream version 2.3.upstream/2.3
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--.github/workflows/docker-publish.yml8
-rw-r--r--.github/workflows/docker-test.yml6
-rw-r--r--.github/workflows/meson-test.yml3
-rw-r--r--.github/workflows/pylint.yml7
-rw-r--r--.readthedocs.yaml3
-rw-r--r--NEWS.md3
-rw-r--r--meson.build2
-rw-r--r--staslib/avahi.py50
-rw-r--r--staslib/gutil.py20
9 files changed, 77 insertions, 25 deletions
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
index a94e7b3..3e183c0 100644
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -32,13 +32,13 @@ jobs:
steps:
- name: Checkout repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
# Login against a Docker registry except on PR
# https://github.com/docker/login-action
- name: Log into registry ${{ env.REGISTRY }}
if: github.event_name != 'pull_request'
- uses: docker/login-action@465a07811f14bebb1938fbed4728c6a1ff8901fc
+ uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
@@ -48,14 +48,14 @@ jobs:
# https://github.com/docker/metadata-action
- name: Extract Docker metadata
id: meta
- uses: docker/metadata-action@818d4b7b91585d195f67373fd9cb0332e31a7175
+ uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
# Build and push Docker image with Buildx (don't push on PR)
# https://github.com/docker/build-push-action
- name: Build and push Docker image
- uses: docker/build-push-action@2eb1c1961a95fc15694676618e422e8ba1d63825
+ uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09
with:
context: .
push: ${{ github.event_name != 'pull_request' }}
diff --git a/.github/workflows/docker-test.yml b/.github/workflows/docker-test.yml
index 92284c0..42a871c 100644
--- a/.github/workflows/docker-test.yml
+++ b/.github/workflows/docker-test.yml
@@ -13,11 +13,13 @@ jobs:
if: ${{ !github.event.act }} # skip during local actions testing
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Install requirements
# make sure nvme-cli installed (we need it for /etc/nvme/hostnqn and /etc/nvme/hostid)
- run: sudo apt-get install --yes --quiet nvme-cli
+ run: |
+ sudo apt update
+ sudo apt-get install --yes --quiet nvme-cli
- name: Load Kernel drivers
run: sudo modprobe -v nvme-fabrics
diff --git a/.github/workflows/meson-test.yml b/.github/workflows/meson-test.yml
index eff6df1..4b9662d 100644
--- a/.github/workflows/meson-test.yml
+++ b/.github/workflows/meson-test.yml
@@ -13,10 +13,11 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: "CHECKOUT: nvme-stas"
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: "INSTALL: Overall dependencies"
run: |
+ sudo apt update
sudo apt-get install --yes --quiet python3-pip cmake iproute2
sudo python3 -m pip install --upgrade pip
sudo python3 -m pip install --upgrade wheel meson ninja
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index cd200f9..9af1819 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -14,7 +14,7 @@ jobs:
if: ${{ !github.event.act }} # skip during local actions testing
runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- uses: hadolint/hadolint-action@v3.1.0
with:
recursive: true
@@ -30,7 +30,7 @@ jobs:
steps:
- name: "CHECKOUT: nvme-stas"
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
@@ -39,6 +39,7 @@ jobs:
- name: "INSTALL: additional packages"
run: |
+ sudo apt update
sudo apt-get install --yes --quiet python3-pip cmake libgirepository1.0-dev libsystemd-dev python3-systemd swig libjson-c-dev || true
sudo python3 -m pip install --upgrade pip wheel meson ninja
python3 -m pip install --upgrade dasbus pylint pyflakes PyGObject lxml pyudev
@@ -77,7 +78,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: "CHECKOUT: nvme-stas"
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: "BLACK"
uses: psf/black@stable
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 8744afb..e3e10f1 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -9,9 +9,6 @@
version: 2
-python:
- system_packages: true
-
build:
os: ubuntu-22.04
tools:
diff --git a/NEWS.md b/NEWS.md
index b7abb7c..079a276 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -5,9 +5,10 @@
New features:
- Support for nBFT (NVMe-oF Boot Table).
-- The Avahi driver will now verify reachability of services discovered through mDNS to make sure all discovered IP addresses can be connected to. This avoids invoking the NVMe kernel driver with invalid IP addresses and getting error messages in the syslog.
+- The Avahi driver will now verify reachability of services discovered through mDNS to make sure all discovered IP addresses can be connected to. This avoids invoking the NVMe kernel driver with invalid IP addresses and getting error messages in the syslog. While testing this feature, we found that the CDC may advertise itself (using mDNS) before it is actually ready to receive connections from the host. If a host reacting to mDNS advertisements tries to connect to the CDC before the CDC is listening for connections, a "Connection refused" will happen and the host may conclude that the CDC is not reachable. For that reason the host will keep trying to connect in the background. Retries will initially happen at a face pace and gradually be done at a slower pace.
- The Avahi driver will now print an error message if the same IP address is found on multiple interfaces. This indicates a misconfiguration of the network.
- Simplify algorithm that determines if an existing connection (is sysfs) can be reused by stafd/stacd instead of creating a duplicate connection.
+- Improve scalability. First, the algorithm that handles kernel events was reworked to handle events faster. Second, limit the amount of times that the netlink kernel interface is invoked. Instead invoke netlink once and cache & reuse the data for the whole duration of the scanning loop.
Bug fixes:
diff --git a/meson.build b/meson.build
index 4e52c6e..b648a86 100644
--- a/meson.build
+++ b/meson.build
@@ -9,7 +9,7 @@
project(
'nvme-stas',
meson_version: '>= 0.53.0',
- version: '2.3-rc5',
+ version: '2.3',
license: 'Apache-2.0',
default_options: [
'buildtype=release',
diff --git a/staslib/avahi.py b/staslib/avahi.py
index 84a0b2a..f91e489 100644
--- a/staslib/avahi.py
+++ b/staslib/avahi.py
@@ -71,6 +71,29 @@ def fmt_service_str(interface, protocol, name, stype, domain, flags): # pylint:
)
+class ValueRange:
+ '''Implement a range of values with ceiling. Once the ceiling has been
+ reached, then any further request for a new value will return the
+ ceiling (i.e last value).'''
+
+ def __init__(self, values: list):
+ self._values = values
+ self._index = 0
+
+ def get_next(self):
+ '''Get the next value (or last value if ceiling was reached)'''
+ value = self._values[self._index]
+ if self._index >= 0:
+ self._index += 1
+ if self._index >= len(self._values):
+ self._index = -1
+ return value
+
+ def reset(self):
+ '''Reset the range to start from the beginning'''
+ self._index = 0
+
+
# ******************************************************************************
class Service: # pylint: disable=too-many-instance-attributes
'''Object used to keep track of the services discovered from the avahi-daemon'''
@@ -109,6 +132,11 @@ class Service: # pylint: disable=too-many-instance-attributes
self._interface_id, self._protocol_id, self._name, self._stype, self._domain, self._flags
)
+ self._connect_check_retry_tmo = ValueRange([2, 5, 10, 30, 60, 300, 600])
+ self._connect_check_retry_tmr = gutil.GTimer(
+ self._connect_check_retry_tmo.get_next(), self._on_connect_check_retry
+ )
+
self._ip = None
self._resolver = None
self._data = {}
@@ -151,8 +179,17 @@ class Service: # pylint: disable=too-many-instance-attributes
self._identified_cback()
return
+ self._connect_check(verbose=True) # Enable verbosity on first attempt
+
+ def _connect_check(self, verbose=False):
self._reachable = False
- connect_checker = gutil.TcpChecker(traddr, trsvcid, host_iface, self._tcp_connect_check_cback)
+ connect_checker = gutil.TcpChecker(
+ self._data['traddr'],
+ self._data['trsvcid'],
+ self._data['host-iface'],
+ verbose,
+ self._tcp_connect_check_cback,
+ )
try:
connect_checker.connect()
@@ -168,7 +205,16 @@ class Service: # pylint: disable=too-many-instance-attributes
self._connect_checker.close()
self._connect_checker = None
self._reachable = connected
- self._identified_cback()
+
+ if self._reachable:
+ self._identified_cback()
+ else:
+ # Restart the timer but with incremented timeout
+ self._connect_check_retry_tmr.start(self._connect_check_retry_tmo.get_next())
+
+ def _on_connect_check_retry(self):
+ self._connect_check()
+ return GLib.SOURCE_REMOVE
def set_resolver(self, resolver):
'''Set the resolver object'''
diff --git a/staslib/gutil.py b/staslib/gutil.py
index 9aef347..7bdc117 100644
--- a/staslib/gutil.py
+++ b/staslib/gutil.py
@@ -423,7 +423,9 @@ class Deferred:
class TcpChecker: # pylint: disable=too-many-instance-attributes
'''@brief Verify that a TCP connection can be established with an enpoint'''
- def __init__(self, traddr, trsvcid, host_iface, user_cback, *user_data):
+ def __init__(
+ self, traddr, trsvcid, host_iface, verbose, user_cback, *user_data
+ ): # pylint: disable=too-many-arguments
self._user_cback = user_cback
self._host_iface = host_iface
self._user_data = user_data
@@ -432,6 +434,7 @@ class TcpChecker: # pylint: disable=too-many-instance-attributes
self._cancellable = None
self._gio_sock = None
self._native_sock = None
+ self._verbose = verbose
def connect(self):
'''Attempt to connect'''
@@ -502,13 +505,14 @@ class TcpChecker: # pylint: disable=too-many-instance-attributes
if err.matches(Gio.io_error_quark(), Gio.IOErrorEnum.CANCELLED):
logging.debug('TcpChecker._connect_async_cback() - %s', err.message) # pylint: disable=no-member
else:
- logging.info(
- 'Unable to verify TCP connectivity - (%-10s %-14s %s): %s',
- self._host_iface + ',',
- self._traddr.compressed + ',',
- self._trsvcid,
- err.message, # pylint: disable=no-member
- )
+ if self._verbose:
+ logging.info(
+ 'Unable to verify TCP connectivity - (%-10s %-14s %s): %s',
+ self._host_iface + ',',
+ self._traddr.compressed + ',',
+ self._trsvcid,
+ err.message, # pylint: disable=no-member
+ )
self.close()