From 73334e48e395cd0ac4fbef0611609a3bd70abfe2 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 25 Sep 2023 10:22:07 +0200 Subject: Adding upstream version 2.3. Signed-off-by: Daniel Baumann --- .github/workflows/docker-publish.yml | 8 +++--- .github/workflows/docker-test.yml | 6 +++-- .github/workflows/meson-test.yml | 3 ++- .github/workflows/pylint.yml | 7 ++--- .readthedocs.yaml | 3 --- NEWS.md | 3 ++- meson.build | 2 +- staslib/avahi.py | 50 ++++++++++++++++++++++++++++++++++-- staslib/gutil.py | 20 +++++++++------ 9 files changed, 77 insertions(+), 25 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index a94e7b3..3e183c0 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -32,13 +32,13 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Login against a Docker registry except on PR # https://github.com/docker/login-action - name: Log into registry ${{ env.REGISTRY }} if: github.event_name != 'pull_request' - uses: docker/login-action@465a07811f14bebb1938fbed4728c6a1ff8901fc + uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} @@ -48,14 +48,14 @@ jobs: # https://github.com/docker/metadata-action - name: Extract Docker metadata id: meta - uses: docker/metadata-action@818d4b7b91585d195f67373fd9cb0332e31a7175 + uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} # Build and push Docker image with Buildx (don't push on PR) # https://github.com/docker/build-push-action - name: Build and push Docker image - uses: docker/build-push-action@2eb1c1961a95fc15694676618e422e8ba1d63825 + uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 with: context: . push: ${{ github.event_name != 'pull_request' }} diff --git a/.github/workflows/docker-test.yml b/.github/workflows/docker-test.yml index 92284c0..42a871c 100644 --- a/.github/workflows/docker-test.yml +++ b/.github/workflows/docker-test.yml @@ -13,11 +13,13 @@ jobs: if: ${{ !github.event.act }} # skip during local actions testing runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install requirements # make sure nvme-cli installed (we need it for /etc/nvme/hostnqn and /etc/nvme/hostid) - run: sudo apt-get install --yes --quiet nvme-cli + run: | + sudo apt update + sudo apt-get install --yes --quiet nvme-cli - name: Load Kernel drivers run: sudo modprobe -v nvme-fabrics diff --git a/.github/workflows/meson-test.yml b/.github/workflows/meson-test.yml index eff6df1..4b9662d 100644 --- a/.github/workflows/meson-test.yml +++ b/.github/workflows/meson-test.yml @@ -13,10 +13,11 @@ jobs: runs-on: ubuntu-latest steps: - name: "CHECKOUT: nvme-stas" - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: "INSTALL: Overall dependencies" run: | + sudo apt update sudo apt-get install --yes --quiet python3-pip cmake iproute2 sudo python3 -m pip install --upgrade pip sudo python3 -m pip install --upgrade wheel meson ninja diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index cd200f9..9af1819 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -14,7 +14,7 @@ jobs: if: ${{ !github.event.act }} # skip during local actions testing runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: hadolint/hadolint-action@v3.1.0 with: recursive: true @@ -30,7 +30,7 @@ jobs: steps: - name: "CHECKOUT: nvme-stas" - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 @@ -39,6 +39,7 @@ jobs: - name: "INSTALL: additional packages" run: | + sudo apt update sudo apt-get install --yes --quiet python3-pip cmake libgirepository1.0-dev libsystemd-dev python3-systemd swig libjson-c-dev || true sudo python3 -m pip install --upgrade pip wheel meson ninja python3 -m pip install --upgrade dasbus pylint pyflakes PyGObject lxml pyudev @@ -77,7 +78,7 @@ jobs: runs-on: ubuntu-latest steps: - name: "CHECKOUT: nvme-stas" - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: "BLACK" uses: psf/black@stable diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 8744afb..e3e10f1 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -9,9 +9,6 @@ version: 2 -python: - system_packages: true - build: os: ubuntu-22.04 tools: diff --git a/NEWS.md b/NEWS.md index b7abb7c..079a276 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,9 +5,10 @@ New features: - Support for nBFT (NVMe-oF Boot Table). -- The Avahi driver will now verify reachability of services discovered through mDNS to make sure all discovered IP addresses can be connected to. This avoids invoking the NVMe kernel driver with invalid IP addresses and getting error messages in the syslog. +- The Avahi driver will now verify reachability of services discovered through mDNS to make sure all discovered IP addresses can be connected to. This avoids invoking the NVMe kernel driver with invalid IP addresses and getting error messages in the syslog. While testing this feature, we found that the CDC may advertise itself (using mDNS) before it is actually ready to receive connections from the host. If a host reacting to mDNS advertisements tries to connect to the CDC before the CDC is listening for connections, a "Connection refused" will happen and the host may conclude that the CDC is not reachable. For that reason the host will keep trying to connect in the background. Retries will initially happen at a face pace and gradually be done at a slower pace. - The Avahi driver will now print an error message if the same IP address is found on multiple interfaces. This indicates a misconfiguration of the network. - Simplify algorithm that determines if an existing connection (is sysfs) can be reused by stafd/stacd instead of creating a duplicate connection. +- Improve scalability. First, the algorithm that handles kernel events was reworked to handle events faster. Second, limit the amount of times that the netlink kernel interface is invoked. Instead invoke netlink once and cache & reuse the data for the whole duration of the scanning loop. Bug fixes: diff --git a/meson.build b/meson.build index 4e52c6e..b648a86 100644 --- a/meson.build +++ b/meson.build @@ -9,7 +9,7 @@ project( 'nvme-stas', meson_version: '>= 0.53.0', - version: '2.3-rc5', + version: '2.3', license: 'Apache-2.0', default_options: [ 'buildtype=release', diff --git a/staslib/avahi.py b/staslib/avahi.py index 84a0b2a..f91e489 100644 --- a/staslib/avahi.py +++ b/staslib/avahi.py @@ -71,6 +71,29 @@ def fmt_service_str(interface, protocol, name, stype, domain, flags): # pylint: ) +class ValueRange: + '''Implement a range of values with ceiling. Once the ceiling has been + reached, then any further request for a new value will return the + ceiling (i.e last value).''' + + def __init__(self, values: list): + self._values = values + self._index = 0 + + def get_next(self): + '''Get the next value (or last value if ceiling was reached)''' + value = self._values[self._index] + if self._index >= 0: + self._index += 1 + if self._index >= len(self._values): + self._index = -1 + return value + + def reset(self): + '''Reset the range to start from the beginning''' + self._index = 0 + + # ****************************************************************************** class Service: # pylint: disable=too-many-instance-attributes '''Object used to keep track of the services discovered from the avahi-daemon''' @@ -109,6 +132,11 @@ class Service: # pylint: disable=too-many-instance-attributes self._interface_id, self._protocol_id, self._name, self._stype, self._domain, self._flags ) + self._connect_check_retry_tmo = ValueRange([2, 5, 10, 30, 60, 300, 600]) + self._connect_check_retry_tmr = gutil.GTimer( + self._connect_check_retry_tmo.get_next(), self._on_connect_check_retry + ) + self._ip = None self._resolver = None self._data = {} @@ -151,8 +179,17 @@ class Service: # pylint: disable=too-many-instance-attributes self._identified_cback() return + self._connect_check(verbose=True) # Enable verbosity on first attempt + + def _connect_check(self, verbose=False): self._reachable = False - connect_checker = gutil.TcpChecker(traddr, trsvcid, host_iface, self._tcp_connect_check_cback) + connect_checker = gutil.TcpChecker( + self._data['traddr'], + self._data['trsvcid'], + self._data['host-iface'], + verbose, + self._tcp_connect_check_cback, + ) try: connect_checker.connect() @@ -168,7 +205,16 @@ class Service: # pylint: disable=too-many-instance-attributes self._connect_checker.close() self._connect_checker = None self._reachable = connected - self._identified_cback() + + if self._reachable: + self._identified_cback() + else: + # Restart the timer but with incremented timeout + self._connect_check_retry_tmr.start(self._connect_check_retry_tmo.get_next()) + + def _on_connect_check_retry(self): + self._connect_check() + return GLib.SOURCE_REMOVE def set_resolver(self, resolver): '''Set the resolver object''' diff --git a/staslib/gutil.py b/staslib/gutil.py index 9aef347..7bdc117 100644 --- a/staslib/gutil.py +++ b/staslib/gutil.py @@ -423,7 +423,9 @@ class Deferred: class TcpChecker: # pylint: disable=too-many-instance-attributes '''@brief Verify that a TCP connection can be established with an enpoint''' - def __init__(self, traddr, trsvcid, host_iface, user_cback, *user_data): + def __init__( + self, traddr, trsvcid, host_iface, verbose, user_cback, *user_data + ): # pylint: disable=too-many-arguments self._user_cback = user_cback self._host_iface = host_iface self._user_data = user_data @@ -432,6 +434,7 @@ class TcpChecker: # pylint: disable=too-many-instance-attributes self._cancellable = None self._gio_sock = None self._native_sock = None + self._verbose = verbose def connect(self): '''Attempt to connect''' @@ -502,13 +505,14 @@ class TcpChecker: # pylint: disable=too-many-instance-attributes if err.matches(Gio.io_error_quark(), Gio.IOErrorEnum.CANCELLED): logging.debug('TcpChecker._connect_async_cback() - %s', err.message) # pylint: disable=no-member else: - logging.info( - 'Unable to verify TCP connectivity - (%-10s %-14s %s): %s', - self._host_iface + ',', - self._traddr.compressed + ',', - self._trsvcid, - err.message, # pylint: disable=no-member - ) + if self._verbose: + logging.info( + 'Unable to verify TCP connectivity - (%-10s %-14s %s): %s', + self._host_iface + ',', + self._traddr.compressed + ',', + self._trsvcid, + err.message, # pylint: disable=no-member + ) self.close() -- cgit v1.2.3