summaryrefslogtreecommitdiffstats
path: root/src/pybind/mgr/cephadm/offline_watcher.py
blob: 2b7751dfc34d8118833ef1c597f1cc5cb370a3c2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import logging
from typing import List, Optional, TYPE_CHECKING

import multiprocessing as mp
import threading

if TYPE_CHECKING:
    from cephadm.module import CephadmOrchestrator

logger = logging.getLogger(__name__)


class OfflineHostWatcher(threading.Thread):
    def __init__(self, mgr: "CephadmOrchestrator") -> None:
        self.mgr = mgr
        self.hosts: Optional[List[str]] = None
        self.new_hosts: Optional[List[str]] = None
        self.stop = False
        self.event = threading.Event()
        super(OfflineHostWatcher, self).__init__(target=self.run)

    def run(self) -> None:
        self.thread_pool = mp.pool.ThreadPool(10)
        while not self.stop:
            # only need to take action if we have hosts to check
            if self.hosts or self.new_hosts:
                if self.new_hosts:
                    self.hosts = self.new_hosts
                    self.new_hosts = None
                logger.debug(f'OfflineHostDetector: Checking if hosts: {self.hosts} are offline.')
                assert self.hosts is not None
                self.thread_pool.map(self.check_host, self.hosts)
            self.event.wait(20)
            self.event.clear()
        self.thread_pool.close()
        self.thread_pool.join()

    def check_host(self, host: str) -> None:
        if host not in self.mgr.offline_hosts:
            try:
                self.mgr.ssh.check_execute_command(host, ['true'], log_command=self.mgr.log_refresh_metadata)
            except Exception:
                logger.debug(f'OfflineHostDetector: detected {host} to be offline')
                # kick serve loop in case corrective action must be taken for offline host
                self.mgr._kick_serve_loop()

    def set_hosts(self, hosts: List[str]) -> None:
        hosts.sort()
        if (not self.hosts or self.hosts != hosts) and hosts:
            self.new_hosts = hosts
            logger.debug(
                f'OfflineHostDetector: Hosts to check if offline swapped to: {self.new_hosts}.')
            self.wakeup()

    def wakeup(self) -> None:
        self.event.set()

    def shutdown(self) -> None:
        self.stop = True
        self.wakeup()