summaryrefslogtreecommitdiffstats
path: root/crmsh/watchdog.py
blob: 6d0d2cff44fa2020becbc2fcc2caf3f07371bd93 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import re
from . import utils
from .constants import SSH_OPTION
from .bootstrap import invoke, invokerc, WATCHDOG_CFG, SYSCONFIG_SBD
from .sh import ShellUtils


class Watchdog(object):
    """
    Class to find valid watchdog device name
    """
    QUERY_CMD = "sudo sbd query-watchdog"
    DEVICE_FIND_REGREX = "\[[0-9]+\] (/dev/.*)\n.*\nDriver: (.*)"

    def __init__(self, _input=None, remote_user=None, peer_host=None):
        """
        Init function
        """
        self._input = _input
        self._remote_user = remote_user
        self._peer_host = peer_host
        self._watchdog_info_dict = {}
        self._watchdog_device_name = None

    @property
    def watchdog_device_name(self):
        return self._watchdog_device_name

    @staticmethod
    def _verify_watchdog_device(dev, ignore_error=False):
        """
        Use wdctl to verify watchdog device
        """
        rc, _, err = ShellUtils().get_stdout_stderr("wdctl {}".format(dev))
        if rc != 0:
            if ignore_error:
                return False
            else:
                utils.fatal("Invalid watchdog device {}: {}".format(dev, err))
        return True

    @staticmethod
    def _load_watchdog_driver(driver):
        """
        Load specific watchdog driver
        """
        invoke("echo {} > {}".format(driver, WATCHDOG_CFG))
        invoke("systemctl restart systemd-modules-load")

    @staticmethod
    def _get_watchdog_device_from_sbd_config():
        """
        Try to get watchdog device name from sbd config file
        """
        conf = utils.parse_sysconfig(SYSCONFIG_SBD)
        return conf.get("SBD_WATCHDOG_DEV")

    @staticmethod
    def _driver_is_loaded(driver):
        """
        Check if driver was already loaded
        """
        _, out, _ = ShellUtils().get_stdout_stderr("lsmod")
        return re.search("\n{}\s+".format(driver), out)

    def _set_watchdog_info(self):
        """
        Set watchdog info through sbd query-watchdog command
        Content in self._watchdog_info_dict: {device_name: driver_name}
        """
        rc, out, err = ShellUtils().get_stdout_stderr(self.QUERY_CMD)
        if rc == 0 and out:
            # output format might like:
            #   [1] /dev/watchdog\nIdentity: Software Watchdog\nDriver: softdog\n
            self._watchdog_info_dict = dict(re.findall(self.DEVICE_FIND_REGREX, out))
        else:
            utils.fatal("Failed to run {}: {}".format(self.QUERY_CMD, err))

    def _get_device_through_driver(self, driver_name):
        """
        Get watchdog device name which has driver_name
        """
        for device, driver in self._watchdog_info_dict.items():
            if driver == driver_name and self._verify_watchdog_device(device):
                return device
        return None

    def _get_driver_through_device_remotely(self, dev_name):
        """
        Given watchdog device name, get driver name on remote node
        """
        # FIXME
        cmd = "ssh {} {}@{} {}".format(SSH_OPTION, self._remote_user, self._peer_host, self.QUERY_CMD)
        rc, out, err = ShellUtils().get_stdout_stderr(cmd)
        if rc == 0 and out:
            # output format might like:
            #   [1] /dev/watchdog\nIdentity: Software Watchdog\nDriver: softdog\n
            device_driver_dict = dict(re.findall(self.DEVICE_FIND_REGREX, out))
            if device_driver_dict and dev_name in device_driver_dict:
                return device_driver_dict[dev_name]
            else:
                return None
        else:
            utils.fatal("Failed to run {} remotely: {}".format(self.QUERY_CMD, err))

    def _get_first_unused_device(self):
        """
        Get first unused watchdog device name
        """
        for dev in self._watchdog_info_dict:
            if self._verify_watchdog_device(dev, ignore_error=True):
                return dev
        return None

    def _set_input(self):
        """
        If self._input was not provided by option:
          1. Try to get it from sbd config file
          2. Try to get the first valid device from result of sbd query-watchdog
          3. Set the self._input as softdog
        """
        if not self._input:
            dev = self._get_watchdog_device_from_sbd_config()
            if dev and self._verify_watchdog_device(dev, ignore_error=True):
                self._input = dev
                return
            first_unused = self._get_first_unused_device()
            self._input = first_unused if first_unused else "softdog"

    def _valid_device(self, dev):
        """
        Is an unused watchdog device
        """
        if dev in self._watchdog_info_dict and self._verify_watchdog_device(dev):
            return True
        return False

    def join_watchdog(self):
        """
        In join proces, get watchdog device from config
        If that device not exist, get driver name from init node, and load that driver
        """
        self._set_watchdog_info()

        res = self._get_watchdog_device_from_sbd_config()
        if not res:
            utils.fatal("Failed to get watchdog device from {}".format(SYSCONFIG_SBD))
        self._input = res

        if not self._valid_device(self._input):
            driver = self._get_driver_through_device_remotely(self._input)
            self._load_watchdog_driver(driver)

    def init_watchdog(self):
        """
        In init process, find valid watchdog device
        """
        self._set_watchdog_info()
        self._set_input()

        # self._input is a device name
        if self._valid_device(self._input):
            self._watchdog_device_name = self._input
            return

        # self._input is invalid, exit
        if not invokerc("modinfo {}".format(self._input)):
            utils.fatal("Should provide valid watchdog device or driver name by -w option")

        # self._input is a driver name, load it if it was unloaded
        if not self._driver_is_loaded(self._input):
            self._load_watchdog_driver(self._input)
            self._set_watchdog_info()

        # self._input is a loaded driver name, find corresponding device name
        res = self._get_device_through_driver(self._input)
        if res:
            self._watchdog_device_name = res
            return