blob: 2b7751dfc34d8118833ef1c597f1cc5cb370a3c2 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
import logging
from typing import List, Optional, TYPE_CHECKING
import multiprocessing as mp
import threading
if TYPE_CHECKING:
from cephadm.module import CephadmOrchestrator
logger = logging.getLogger(__name__)
class OfflineHostWatcher(threading.Thread):
def __init__(self, mgr: "CephadmOrchestrator") -> None:
self.mgr = mgr
self.hosts: Optional[List[str]] = None
self.new_hosts: Optional[List[str]] = None
self.stop = False
self.event = threading.Event()
super(OfflineHostWatcher, self).__init__(target=self.run)
def run(self) -> None:
self.thread_pool = mp.pool.ThreadPool(10)
while not self.stop:
# only need to take action if we have hosts to check
if self.hosts or self.new_hosts:
if self.new_hosts:
self.hosts = self.new_hosts
self.new_hosts = None
logger.debug(f'OfflineHostDetector: Checking if hosts: {self.hosts} are offline.')
assert self.hosts is not None
self.thread_pool.map(self.check_host, self.hosts)
self.event.wait(20)
self.event.clear()
self.thread_pool.close()
self.thread_pool.join()
def check_host(self, host: str) -> None:
if host not in self.mgr.offline_hosts:
try:
self.mgr.ssh.check_execute_command(host, ['true'], log_command=self.mgr.log_refresh_metadata)
except Exception:
logger.debug(f'OfflineHostDetector: detected {host} to be offline')
# kick serve loop in case corrective action must be taken for offline host
self.mgr._kick_serve_loop()
def set_hosts(self, hosts: List[str]) -> None:
hosts.sort()
if (not self.hosts or self.hosts != hosts) and hosts:
self.new_hosts = hosts
logger.debug(
f'OfflineHostDetector: Hosts to check if offline swapped to: {self.new_hosts}.')
self.wakeup()
def wakeup(self) -> None:
self.event.set()
def shutdown(self) -> None:
self.stop = True
self.wakeup()
|