diff options
Diffstat (limited to 'qa/tasks/mgr/test_module_selftest.py')
-rw-r--r-- | qa/tasks/mgr/test_module_selftest.py | 335 |
1 files changed, 335 insertions, 0 deletions
diff --git a/qa/tasks/mgr/test_module_selftest.py b/qa/tasks/mgr/test_module_selftest.py new file mode 100644 index 00000000..3c36a6eb --- /dev/null +++ b/qa/tasks/mgr/test_module_selftest.py @@ -0,0 +1,335 @@ + +import time +import requests +import errno +import logging +from teuthology.exceptions import CommandFailedError + +from .mgr_test_case import MgrTestCase + + +log = logging.getLogger(__name__) + + +class TestModuleSelftest(MgrTestCase): + """ + That modules with a self-test command can be loaded and execute it + without errors. + + This is not a substitute for really testing the modules, but it + is quick and is designed to catch regressions that could occur + if data structures change in a way that breaks how the modules + touch them. + """ + MGRS_REQUIRED = 1 + + def setUp(self): + super(TestModuleSelftest, self).setUp() + self.setup_mgrs() + + def _selftest_plugin(self, module_name): + self._load_module("selftest") + self._load_module(module_name) + + # Execute the module's self_test() method + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "module", module_name) + + def test_zabbix(self): + # Set these mandatory config fields so that the zabbix module + # won't trigger health/log errors on load/serve. + self.mgr_cluster.set_module_conf("zabbix", "zabbix_host", "localhost") + self.mgr_cluster.set_module_conf("zabbix", "identifier", "foo") + self._selftest_plugin("zabbix") + + def test_prometheus(self): + self._assign_ports("prometheus", "server_port", min_port=8100) + self._selftest_plugin("prometheus") + + def test_influx(self): + self._selftest_plugin("influx") + + def test_diskprediction_local(self): + self._selftest_plugin("diskprediction_local") + + # Not included in qa/packages/packages.yaml + #def test_diskprediction_cloud(self): + # self._selftest_plugin("diskprediction_cloud") + + def test_telegraf(self): + self._selftest_plugin("telegraf") + + def test_iostat(self): + self._selftest_plugin("iostat") + + def test_devicehealth(self): + self._selftest_plugin("devicehealth") + # Clean up the pool that the module creates, because otherwise + # it's low PG count causes test failures. + pool_name = "device_health_metrics" + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "osd", "pool", "delete", pool_name, pool_name, + "--yes-i-really-really-mean-it") + + def test_selftest_run(self): + self._load_module("selftest") + self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", "run") + + def test_telemetry(self): + self._selftest_plugin("telemetry") + + def test_crash(self): + self._selftest_plugin("crash") + + def test_orchestrator_cli(self): + self._selftest_plugin("orchestrator_cli") + + + def test_selftest_config_update(self): + """ + That configuration updates are seen by running mgr modules + """ + self._load_module("selftest") + + def get_value(): + return self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "config", "get", "testkey").strip() + + self.assertEqual(get_value(), "None") + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "config", "set", "mgr", "mgr/selftest/testkey", "foo") + self.wait_until_equal(get_value, "foo", timeout=10) + + def get_localized_value(): + return self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "config", "get_localized", "testkey").strip() + + self.assertEqual(get_localized_value(), "foo") + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "config", "set", "mgr", "mgr/selftest/{}/testkey".format( + self.mgr_cluster.get_active_id()), + "bar") + self.wait_until_equal(get_localized_value, "bar", timeout=10) + + def test_selftest_config_upgrade(self): + """ + That pre-mimic config-key config settings are migrated into + mimic-style config settings and visible from mgr modules. + """ + self._load_module("selftest") + + def get_value(): + return self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "config", "get", "testkey").strip() + + def get_config(): + lines = self.mgr_cluster.mon_manager.raw_cluster_cmd( + "config", "dump")\ + .strip().split("\n") + result = [] + for line in lines[1:]: + tokens = line.strip().split() + log.info("tokens: {0}".format(tokens)) + subsys, key, value = tokens[0], tokens[2], tokens[3] + result.append((subsys, key, value)) + + return result + + # Stop ceph-mgr while we synthetically create a pre-mimic + # configuration scenario + for mgr_id in self.mgr_cluster.mgr_daemons.keys(): + self.mgr_cluster.mgr_stop(mgr_id) + self.mgr_cluster.mgr_fail(mgr_id) + + # Blow away any modern-style mgr module config options + # (the ceph-mgr implementation may only do the upgrade if + # it doesn't see new style options) + stash = [] + for subsys, key, value in get_config(): + if subsys == "mgr" and key.startswith("mgr/"): + log.info("Removing config key {0} ahead of upgrade".format( + key)) + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "config", "rm", subsys, key) + stash.append((subsys, key, value)) + + # Inject an old-style configuration setting in config-key + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "config-key", "set", "mgr/selftest/testkey", "testvalue") + + # Inject configuration settings that looks data-ish and should + # not be migrated to a config key + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "config-key", "set", "mgr/selftest/testnewline", "foo\nbar") + + # Inject configuration setting that does not appear in the + # module's config schema + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "config-key", "set", "mgr/selftest/kvitem", "foo\nbar") + + # Bring mgr daemons back online, the one that goes active + # should be doing the upgrade. + for mgr_id in self.mgr_cluster.mgr_daemons.keys(): + self.mgr_cluster.mgr_restart(mgr_id) + + # Wait for a new active + self.wait_until_true( + lambda: self.mgr_cluster.get_active_id() != "", timeout=30) + + # Check that the selftest module sees the upgraded value + self.assertEqual(get_value(), "testvalue") + + # Check that the upgraded value is visible in the configuration + seen_keys = [k for s,k,v in get_config()] + self.assertIn("mgr/selftest/testkey", seen_keys) + + # ...and that the non-config-looking one isn't + self.assertNotIn("mgr/selftest/testnewline", seen_keys) + + # ...and that the not-in-schema one isn't + self.assertNotIn("mgr/selftest/kvitem", seen_keys) + + # Restore previous configuration + for subsys, key, value in stash: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "config", "set", subsys, key, value) + + def test_selftest_command_spam(self): + # Use the selftest module to stress the mgr daemon + self._load_module("selftest") + + # Use the dashboard to test that the mgr is still able to do its job + self._assign_ports("dashboard", "ssl_server_port") + self._load_module("dashboard") + self.mgr_cluster.mon_manager.raw_cluster_cmd("dashboard", + "create-self-signed-cert") + + original_active = self.mgr_cluster.get_active_id() + original_standbys = self.mgr_cluster.get_standby_ids() + + self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", + "background", "start", + "command_spam") + + dashboard_uri = self._get_uri("dashboard") + + delay = 10 + periods = 10 + for i in range(0, periods): + t1 = time.time() + # Check that an HTTP module remains responsive + r = requests.get(dashboard_uri, verify=False) + self.assertEqual(r.status_code, 200) + + # Check that a native non-module command remains responsive + self.mgr_cluster.mon_manager.raw_cluster_cmd("osd", "df") + + time.sleep(delay - (time.time() - t1)) + + self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", + "background", "stop") + + # Check that all mgr daemons are still running + self.assertEqual(original_active, self.mgr_cluster.get_active_id()) + self.assertEqual(original_standbys, self.mgr_cluster.get_standby_ids()) + + def test_module_commands(self): + """ + That module-handled commands have appropriate behavior on + disabled/failed/recently-enabled modules. + """ + + # Calling a command on a disabled module should return the proper + # error code. + self._load_module("selftest") + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "module", "disable", "selftest") + with self.assertRaises(CommandFailedError) as exc_raised: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "run") + + self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP) + + # Calling a command that really doesn't exist should give me EINVAL. + with self.assertRaises(CommandFailedError) as exc_raised: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "osd", "albatross") + + self.assertEqual(exc_raised.exception.exitstatus, errno.EINVAL) + + # Enabling a module and then immediately using ones of its commands + # should work (#21683) + self._load_module("selftest") + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "config", "get", "testkey") + + # Calling a command for a failed module should return the proper + # error code. + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "background", "start", "throw_exception") + with self.assertRaises(CommandFailedError) as exc_raised: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "run" + ) + self.assertEqual(exc_raised.exception.exitstatus, errno.EIO) + + # A health alert should be raised for a module that has thrown + # an exception from its serve() method + self.wait_for_health( + "Module 'selftest' has failed: Synthetic exception in serve", + timeout=30) + + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "module", "disable", "selftest") + + self.wait_for_health_clear(timeout=30) + + def test_module_remote(self): + """ + Use the selftest module to exercise inter-module communication + """ + self._load_module("selftest") + # The "self-test remote" operation just happens to call into + # influx. + self._load_module("influx") + + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "remote") + + def test_selftest_cluster_log(self): + """ + Use the selftest module to test the cluster/audit log interface. + """ + priority_map = { + "info": "INF", + "security": "SEC", + "warning": "WRN", + "error": "ERR" + } + self._load_module("selftest") + for priority in priority_map.keys(): + message = "foo bar {}".format(priority) + log_message = "[{}] {}".format(priority_map[priority], message) + # Check for cluster/audit logs: + # 2018-09-24 09:37:10.977858 mgr.x [INF] foo bar info + # 2018-09-24 09:37:10.977860 mgr.x [SEC] foo bar security + # 2018-09-24 09:37:10.977863 mgr.x [WRN] foo bar warning + # 2018-09-24 09:37:10.977866 mgr.x [ERR] foo bar error + with self.assert_cluster_log(log_message): + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "cluster-log", "cluster", + priority, message) + with self.assert_cluster_log(log_message, watch_channel="audit"): + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "cluster-log", "audit", + priority, message) + + def test_selftest_cluster_log_unknown_channel(self): + """ + Use the selftest module to test the cluster/audit log interface. + """ + with self.assertRaises(CommandFailedError) as exc_raised: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "cluster-log", "xyz", + "ERR", "The channel does not exist") + self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP) |