From 19fcec84d8d7d21e796c7624e521b60d28ee21ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:45:59 +0200 Subject: Adding upstream version 16.2.11+ds. Signed-off-by: Daniel Baumann --- src/pybind/mgr/telemetry/__init__.py | 1 + src/pybind/mgr/telemetry/module.py | 877 +++++++++++++++++++++++++++++++++++ 2 files changed, 878 insertions(+) create mode 100644 src/pybind/mgr/telemetry/__init__.py create mode 100644 src/pybind/mgr/telemetry/module.py (limited to 'src/pybind/mgr/telemetry') diff --git a/src/pybind/mgr/telemetry/__init__.py b/src/pybind/mgr/telemetry/__init__.py new file mode 100644 index 000000000..8f210ac92 --- /dev/null +++ b/src/pybind/mgr/telemetry/__init__.py @@ -0,0 +1 @@ +from .module import Module diff --git a/src/pybind/mgr/telemetry/module.py b/src/pybind/mgr/telemetry/module.py new file mode 100644 index 000000000..953e5c5f7 --- /dev/null +++ b/src/pybind/mgr/telemetry/module.py @@ -0,0 +1,877 @@ +""" +Telemetry module for ceph-mgr + +Collect statistics from Ceph cluster and send this back to the Ceph project +when user has opted-in +""" +import errno +import hashlib +import json +import rbd +import re +import requests +import uuid +import time +from datetime import datetime, timedelta +from threading import Event +from collections import defaultdict + +from mgr_module import MgrModule + + +ALL_CHANNELS = ['basic', 'ident', 'crash', 'device'] + +LICENSE='sharing-1-0' +LICENSE_NAME='Community Data License Agreement - Sharing - Version 1.0' +LICENSE_URL='https://cdla.io/sharing-1-0/' + +# If the telemetry revision has changed since this point, re-require +# an opt-in. This should happen each time we add new information to +# the telemetry report. +LAST_REVISION_RE_OPT_IN = 2 + +# Latest revision of the telemetry report. Bump this each time we make +# *any* change. +REVISION = 3 + +# History of revisions +# -------------------- +# +# Version 1: +# Mimic and/or nautilus are lumped together here, since +# we didn't track revisions yet. +# +# Version 2: +# - added revision tracking, nagging, etc. +# - added config option changes +# - added channels +# - added explicit license acknowledgement to the opt-in process +# +# Version 3: +# - added device health metrics (i.e., SMART data, minus serial number) +# - remove crush_rule +# - added CephFS metadata (how many MDSs, fs features, how many data pools, +# how much metadata is cached, rfiles, rbytes, rsnapshots) +# - added more pool metadata (rep vs ec, cache tiering mode, ec profile) +# - added host count, and counts for hosts with each of (mon, osd, mds, mgr) +# - whether an OSD cluster network is in use +# - rbd pool and image count, and rbd mirror mode (pool-level) +# - rgw daemons, zones, zonegroups; which rgw frontends +# - crush map stats + +class Module(MgrModule): + config = dict() + + metadata_keys = [ + "arch", + "ceph_version", + "os", + "cpu", + "kernel_description", + "kernel_version", + "distro_description", + "distro" + ] + + MODULE_OPTIONS = [ + { + 'name': 'url', + 'type': 'str', + 'default': 'https://telemetry.ceph.com/report' + }, + { + 'name': 'device_url', + 'type': 'str', + 'default': 'https://telemetry.ceph.com/device' + }, + { + 'name': 'enabled', + 'type': 'bool', + 'default': False + }, + { + 'name': 'last_opt_revision', + 'type': 'int', + 'default': 1, + }, + { + 'name': 'leaderboard', + 'type': 'bool', + 'default': False + }, + { + 'name': 'description', + 'type': 'str', + 'default': None + }, + { + 'name': 'contact', + 'type': 'str', + 'default': None + }, + { + 'name': 'organization', + 'type': 'str', + 'default': None + }, + { + 'name': 'proxy', + 'type': 'str', + 'default': None + }, + { + 'name': 'interval', + 'type': 'int', + 'default': 24, + 'min': 8 + }, + { + 'name': 'channel_basic', + 'type': 'bool', + 'default': True, + 'desc': 'Share basic cluster information (size, version)', + }, + { + 'name': 'channel_ident', + 'type': 'bool', + 'default': False, + 'description': 'Share a user-provided description and/or contact email for the cluster', + }, + { + 'name': 'channel_crash', + 'type': 'bool', + 'default': True, + 'description': 'Share metadata about Ceph daemon crashes (version, stack straces, etc)', + }, + { + 'name': 'channel_device', + 'type': 'bool', + 'default': True, + 'description': 'Share device health metrics (e.g., SMART data, minus potentially identifying info like serial numbers)', + }, + ] + + COMMANDS = [ + { + "cmd": "telemetry status", + "desc": "Show current configuration", + "perm": "r" + }, + { + "cmd": "telemetry send " + "name=endpoint,type=CephChoices,strings=ceph|device,n=N,req=false " + "name=license,type=CephString,req=false", + "desc": "Force sending data to Ceph telemetry", + "perm": "rw" + }, + { + "cmd": "telemetry show " + "name=channels,type=CephString,n=N,req=False", + "desc": "Show last report or report to be sent", + "perm": "r" + }, + { + "cmd": "telemetry show-device", + "desc": "Show last device report or device report to be sent", + "perm": "r" + }, + { + "cmd": "telemetry show-all", + "desc": "Show report of all channels", + "perm": "r" + }, + { + "cmd": "telemetry on name=license,type=CephString,req=false", + "desc": "Enable telemetry reports from this cluster", + "perm": "rw", + }, + { + "cmd": "telemetry off", + "desc": "Disable telemetry reports from this cluster", + "perm": "rw", + }, + ] + + @property + def config_keys(self): + return dict((o['name'], o.get('default', None)) for o in self.MODULE_OPTIONS) + + def __init__(self, *args, **kwargs): + super(Module, self).__init__(*args, **kwargs) + self.event = Event() + self.run = False + self.last_upload = None + self.last_report = dict() + self.report_id = None + self.salt = None + self.config_update_module_option() + + def config_update_module_option(self): + for opt in self.MODULE_OPTIONS: + setattr(self, + opt['name'], + self.get_module_option(opt['name'])) + self.log.debug(' %s = %s', opt['name'], getattr(self, opt['name'])) + + def config_notify(self): + self.config_update_module_option() + # wake up serve() thread + self.event.set() + + def load(self): + self.last_upload = self.get_store('last_upload', None) + if self.last_upload is not None: + self.last_upload = int(self.last_upload) + + self.report_id = self.get_store('report_id', None) + if self.report_id is None: + self.report_id = str(uuid.uuid4()) + self.set_store('report_id', self.report_id) + + self.salt = self.get_store('salt', None) + if not self.salt: + self.salt = str(uuid.uuid4()) + self.set_store('salt', self.salt) + + def gather_osd_metadata(self, osd_map): + keys = ["osd_objectstore", "rotational"] + keys += self.metadata_keys + + metadata = dict() + for key in keys: + metadata[key] = defaultdict(int) + + for osd in osd_map['osds']: + res = self.get_metadata('osd', str(osd['osd'])).items() + if res is None: + self.log.debug('Could not get metadata for osd.%s' % str(osd['osd'])) + continue + for k, v in res: + if k not in keys: + continue + + metadata[k][v] += 1 + + return metadata + + def gather_mon_metadata(self, mon_map): + keys = list() + keys += self.metadata_keys + + metadata = dict() + for key in keys: + metadata[key] = defaultdict(int) + + for mon in mon_map['mons']: + res = self.get_metadata('mon', mon['name']).items() + if res is None: + self.log.debug('Could not get metadata for mon.%s' % (mon['name'])) + continue + for k, v in res: + if k not in keys: + continue + + metadata[k][v] += 1 + + return metadata + + def gather_crush_info(self): + osdmap = self.get_osdmap() + crush_raw = osdmap.get_crush() + crush = crush_raw.dump() + + def inc(d, k): + if k in d: + d[k] += 1 + else: + d[k] = 1 + + device_classes = {} + for dev in crush['devices']: + inc(device_classes, dev.get('class', '')) + + bucket_algs = {} + bucket_types = {} + bucket_sizes = {} + for bucket in crush['buckets']: + if '~' in bucket['name']: # ignore shadow buckets + continue + inc(bucket_algs, bucket['alg']) + inc(bucket_types, bucket['type_id']) + inc(bucket_sizes, len(bucket['items'])) + + return { + 'num_devices': len(crush['devices']), + 'num_types': len(crush['types']), + 'num_buckets': len(crush['buckets']), + 'num_rules': len(crush['rules']), + 'device_classes': list(device_classes.values()), + 'tunables': crush['tunables'], + 'compat_weight_set': '-1' in crush['choose_args'], + 'num_weight_sets': len(crush['choose_args']), + 'bucket_algs': bucket_algs, + 'bucket_sizes': bucket_sizes, + 'bucket_types': bucket_types, + } + + def gather_configs(self): + # cluster config options + cluster = set() + r, outb, outs = self.mon_command({ + 'prefix': 'config dump', + 'format': 'json' + }); + if r != 0: + return {} + try: + dump = json.loads(outb) + except json.decoder.JSONDecodeError: + return {} + for opt in dump: + name = opt.get('name') + if name: + cluster.add(name) + # daemon-reported options (which may include ceph.conf) + active = set() + ls = self.get("modified_config_options"); + for opt in ls.get('options', {}): + active.add(opt) + return { + 'cluster_changed': sorted(list(cluster)), + 'active_changed': sorted(list(active)), + } + + def gather_crashinfo(self): + crashlist = list() + errno, crashids, err = self.remote('crash', 'ls') + if errno: + return '' + for crashid in crashids.split(): + cmd = {'id': crashid} + errno, crashinfo, err = self.remote('crash', 'do_info', cmd, '') + if errno: + continue + c = json.loads(crashinfo) + del c['utsname_hostname'] + # entity_name might have more than one '.', beware + (etype, eid) = c.get('entity_name', '').split('.', 1) + m = hashlib.sha1() + m.update(self.salt.encode('utf-8')) + m.update(eid.encode('utf-8')) + m.update(self.salt.encode('utf-8')) + c['entity_name'] = etype + '.' + m.hexdigest() + crashlist.append(c) + return crashlist + + def get_active_channels(self): + r = [] + if self.channel_basic: + r.append('basic') + if self.channel_crash: + r.append('crash') + if self.channel_device: + r.append('device') + if self.channel_ident: + r.append('ident') + return r + + def gather_device_report(self): + try: + time_format = self.remote('devicehealth', 'get_time_format') + except: + return None + cutoff = datetime.utcnow() - timedelta(hours=self.interval * 2) + min_sample = cutoff.strftime(time_format) + + devices = self.get('devices')['devices'] + + res = {} # anon-host-id -> anon-devid -> { timestamp -> record } + for d in devices: + devid = d['devid'] + try: + # this is a map of stamp -> {device info} + m = self.remote('devicehealth', 'get_recent_device_metrics', + devid, min_sample) + except: + continue + + # anonymize host id + try: + host = d['location'][0]['host'] + except: + continue + anon_host = self.get_store('host-id/%s' % host) + if not anon_host: + anon_host = str(uuid.uuid1()) + self.set_store('host-id/%s' % host, anon_host) + serial = None + for dev, rep in m.items(): + rep['host_id'] = anon_host + if serial is None and 'serial_number' in rep: + serial = rep['serial_number'] + + # anonymize device id + anon_devid = self.get_store('devid-id/%s' % devid) + if not anon_devid: + # ideally devid is 'vendor_model_serial', + # but can also be 'model_serial', 'serial' + if '_' in devid: + anon_devid = f"{devid.rsplit('_', 1)[0]}_{uuid.uuid1()}" + else: + anon_devid = str(uuid.uuid1()) + self.set_store('devid-id/%s' % devid, anon_devid) + self.log.info('devid %s / %s, host %s / %s' % (devid, anon_devid, + host, anon_host)) + + # anonymize the smartctl report itself + if serial: + m_str = json.dumps(m) + m = json.loads(m_str.replace(serial, 'deleted')) + + if anon_host not in res: + res[anon_host] = {} + res[anon_host][anon_devid] = m + return res + + def get_latest(self, daemon_type, daemon_name, stat): + data = self.get_counter(daemon_type, daemon_name, stat)[stat] + #self.log.error("get_latest {0} data={1}".format(stat, data)) + if data: + return data[-1][1] + else: + return 0 + + def compile_report(self, channels=[]): + if not channels: + channels = self.get_active_channels() + report = { + 'leaderboard': self.leaderboard, + 'report_version': 1, + 'report_timestamp': datetime.utcnow().isoformat(), + 'report_id': self.report_id, + 'channels': channels, + 'channels_available': ALL_CHANNELS, + 'license': LICENSE, + } + + if 'ident' in channels: + for option in ['description', 'contact', 'organization']: + report[option] = getattr(self, option) + + if 'basic' in channels: + mon_map = self.get('mon_map') + osd_map = self.get('osd_map') + service_map = self.get('service_map') + fs_map = self.get('fs_map') + df = self.get('df') + + report['created'] = mon_map['created'] + + # mons + v1_mons = 0 + v2_mons = 0 + ipv4_mons = 0 + ipv6_mons = 0 + for mon in mon_map['mons']: + for a in mon['public_addrs']['addrvec']: + if a['type'] == 'v2': + v2_mons += 1 + elif a['type'] == 'v1': + v1_mons += 1 + if a['addr'].startswith('['): + ipv6_mons += 1 + else: + ipv4_mons += 1 + report['mon'] = { + 'count': len(mon_map['mons']), + 'features': mon_map['features'], + 'min_mon_release': mon_map['min_mon_release'], + 'v1_addr_mons': v1_mons, + 'v2_addr_mons': v2_mons, + 'ipv4_addr_mons': ipv4_mons, + 'ipv6_addr_mons': ipv6_mons, + } + + report['config'] = self.gather_configs() + + # pools + report['rbd'] = { + 'num_pools': 0, + 'num_images_by_pool': [], + 'mirroring_by_pool': [], + } + num_pg = 0 + report['pools'] = list() + for pool in osd_map['pools']: + num_pg += pool['pg_num'] + ec_profile = {} + if pool['erasure_code_profile']: + orig = osd_map['erasure_code_profiles'].get( + pool['erasure_code_profile'], {}) + ec_profile = { + k: orig[k] for k in orig.keys() + if k in ['k', 'm', 'plugin', 'technique', + 'crush-failure-domain', 'l'] + } + report['pools'].append( + { + 'pool': pool['pool'], + 'type': pool['type'], + 'pg_num': pool['pg_num'], + 'pgp_num': pool['pg_placement_num'], + 'size': pool['size'], + 'min_size': pool['min_size'], + 'pg_autoscale_mode': pool['pg_autoscale_mode'], + 'target_max_bytes': pool['target_max_bytes'], + 'target_max_objects': pool['target_max_objects'], + 'type': ['', 'replicated', '', 'erasure'][pool['type']], + 'erasure_code_profile': ec_profile, + 'cache_mode': pool['cache_mode'], + } + ) + if 'rbd' in pool['application_metadata']: + report['rbd']['num_pools'] += 1 + ioctx = self.rados.open_ioctx(pool['pool_name']) + report['rbd']['num_images_by_pool'].append( + sum(1 for _ in rbd.RBD().list2(ioctx))) + report['rbd']['mirroring_by_pool'].append( + rbd.RBD().mirror_mode_get(ioctx) != rbd.RBD_MIRROR_MODE_DISABLED) + + # osds + cluster_network = False + for osd in osd_map['osds']: + if osd['up'] and not cluster_network: + front_ip = osd['public_addrs']['addrvec'][0]['addr'].split(':')[0] + back_ip = osd['cluster_addrs']['addrvec'][0]['addr'].split(':')[0] + if front_ip != back_ip: + cluster_network = True + report['osd'] = { + 'count': len(osd_map['osds']), + 'require_osd_release': osd_map['require_osd_release'], + 'require_min_compat_client': osd_map['require_min_compat_client'], + 'cluster_network': cluster_network, + } + + # crush + report['crush'] = self.gather_crush_info() + + # cephfs + report['fs'] = { + 'count': len(fs_map['filesystems']), + 'feature_flags': fs_map['feature_flags'], + 'num_standby_mds': len(fs_map['standbys']), + 'filesystems': [], + } + num_mds = len(fs_map['standbys']) + for fsm in fs_map['filesystems']: + fs = fsm['mdsmap'] + num_sessions = 0 + cached_ino = 0 + cached_dn = 0 + cached_cap = 0 + subtrees = 0 + rfiles = 0 + rbytes = 0 + rsnaps = 0 + for gid, mds in fs['info'].items(): + num_sessions += self.get_latest('mds', mds['name'], + 'mds_sessions.session_count') + cached_ino += self.get_latest('mds', mds['name'], + 'mds_mem.ino') + cached_dn += self.get_latest('mds', mds['name'], + 'mds_mem.dn') + cached_cap += self.get_latest('mds', mds['name'], + 'mds_mem.cap') + subtrees += self.get_latest('mds', mds['name'], + 'mds.subtrees') + if mds['rank'] == 0: + rfiles = self.get_latest('mds', mds['name'], + 'mds.root_rfiles') + rbytes = self.get_latest('mds', mds['name'], + 'mds.root_rbytes') + rsnaps = self.get_latest('mds', mds['name'], + 'mds.root_rsnaps') + report['fs']['filesystems'].append({ + 'max_mds': fs['max_mds'], + 'ever_allowed_features': fs['ever_allowed_features'], + 'explicitly_allowed_features': fs['explicitly_allowed_features'], + 'num_in': len(fs['in']), + 'num_up': len(fs['up']), + 'num_standby_replay': len( + [mds for gid, mds in fs['info'].items() + if mds['state'] == 'up:standby-replay']), + 'num_mds': len(fs['info']), + 'num_sessions': num_sessions, + 'cached_inos': cached_ino, + 'cached_dns': cached_dn, + 'cached_caps': cached_cap, + 'cached_subtrees': subtrees, + 'balancer_enabled': len(fs['balancer']) > 0, + 'num_data_pools': len(fs['data_pools']), + 'standby_count_wanted': fs['standby_count_wanted'], + 'approx_ctime': fs['created'][0:7], + 'files': rfiles, + 'bytes': rbytes, + 'snaps': rsnaps, + }) + num_mds += len(fs['info']) + report['fs']['total_num_mds'] = num_mds + + # daemons + report['metadata'] = dict() + report['metadata']['osd'] = self.gather_osd_metadata(osd_map) + report['metadata']['mon'] = self.gather_mon_metadata(mon_map) + + # host counts + servers = self.list_servers() + self.log.debug('servers %s' % servers) + report['hosts'] = { + 'num': len([h for h in servers if h['hostname']]), + } + for t in ['mon', 'mds', 'osd', 'mgr']: + report['hosts']['num_with_' + t] = len( + [h for h in servers + if len([s for s in h['services'] if s['type'] == t])] + ) + + report['usage'] = { + 'pools': len(df['pools']), + 'pg_num': num_pg, + 'total_used_bytes': df['stats']['total_used_bytes'], + 'total_bytes': df['stats']['total_bytes'], + 'total_avail_bytes': df['stats']['total_avail_bytes'] + } + + report['services'] = defaultdict(int) + for key, value in service_map['services'].items(): + report['services'][key] += 1 + if key == 'rgw': + report['rgw'] = { + 'count': 0, + } + zones = set() + realms = set() + zonegroups = set() + frontends = set() + d = value.get('daemons', dict()) + + for k,v in d.items(): + if k == 'summary' and v: + report['rgw'][k] = v + elif isinstance(v, dict) and 'metadata' in v: + report['rgw']['count'] += 1 + zones.add(v['metadata']['zone_id']) + zonegroups.add(v['metadata']['zonegroup_id']) + frontends.add(v['metadata']['frontend_type#0']) + + # we could actually iterate over all the keys of + # the dict and check for how many frontends there + # are, but it is unlikely that one would be running + # more than 2 supported ones + f2 = v['metadata'].get('frontend_type#1', None) + if f2: + frontends.add(f2) + + report['rgw']['zones'] = len(zones) + report['rgw']['zonegroups'] = len(zonegroups) + report['rgw']['frontends'] = list(frontends) # sets aren't json-serializable + + try: + report['balancer'] = self.remote('balancer', 'gather_telemetry') + except ImportError: + report['balancer'] = { + 'active': False + } + + if 'crash' in channels: + report['crashes'] = self.gather_crashinfo() + + # NOTE: We do not include the 'device' channel in this report; it is + # sent to a different endpoint. + + return report + + def _try_post(self, what, url, report): + self.log.info('Sending %s to: %s' % (what, url)) + proxies = dict() + if self.proxy: + self.log.info('Send using HTTP(S) proxy: %s', self.proxy) + proxies['http'] = self.proxy + proxies['https'] = self.proxy + try: + resp = requests.put(url=url, json=report, proxies=proxies) + resp.raise_for_status() + except Exception as e: + fail_reason = 'Failed to send %s to %s: %s' % (what, url, str(e)) + self.log.error(fail_reason) + return fail_reason + return None + + def send(self, report, endpoint=None): + if not endpoint: + endpoint = ['ceph', 'device'] + failed = [] + success = [] + self.log.debug('Send endpoints %s' % endpoint) + for e in endpoint: + if e == 'ceph': + fail_reason = self._try_post('ceph report', self.url, report) + if fail_reason: + failed.append(fail_reason) + else: + now = int(time.time()) + self.last_upload = now + self.set_store('last_upload', str(now)) + success.append('Ceph report sent to {0}'.format(self.url)) + self.log.info('Sent report to {0}'.format(self.url)) + elif e == 'device': + if 'device' in self.get_active_channels(): + devices = self.gather_device_report() + num_devs = 0 + num_hosts = 0 + for host, ls in devices.items(): + self.log.debug('host %s devices %s' % (host, ls)) + if not len(ls): + continue + fail_reason = self._try_post('devices', self.device_url, + ls) + if fail_reason: + failed.append(fail_reason) + else: + num_devs += len(ls) + num_hosts += 1 + if num_devs: + success.append('Reported %d devices across %d hosts' % ( + num_devs, len(devices))) + if failed: + return 1, '', '\n'.join(success + failed) + return 0, '', '\n'.join(success) + + def handle_command(self, inbuf, command): + if command['prefix'] == 'telemetry status': + r = {} + for opt in self.MODULE_OPTIONS: + r[opt['name']] = getattr(self, opt['name']) + r['last_upload'] = time.ctime(self.last_upload) if self.last_upload else self.last_upload + return 0, json.dumps(r, indent=4, sort_keys=True), '' + elif command['prefix'] == 'telemetry on': + if command.get('license') != LICENSE: + return -errno.EPERM, '', "Telemetry data is licensed under the " + LICENSE_NAME + " (" + LICENSE_URL + ").\nTo enable, add '--license " + LICENSE + "' to the 'ceph telemetry on' command." + self.on() + return 0, '', '' + elif command['prefix'] == 'telemetry off': + self.off() + return 0, '', '' + elif command['prefix'] == 'telemetry send': + if self.last_opt_revision < LAST_REVISION_RE_OPT_IN and command.get('license') != LICENSE: + self.log.debug('A telemetry send attempt while opted-out. Asking for license agreement') + return -errno.EPERM, '', "Telemetry data is licensed under the " + LICENSE_NAME + " (" + LICENSE_URL + ").\nTo manually send telemetry data, add '--license " + LICENSE + "' to the 'ceph telemetry send' command.\nPlease consider enabling the telemetry module with 'ceph telemetry on'." + self.last_report = self.compile_report() + return self.send(self.last_report, command.get('endpoint')) + + elif command['prefix'] == 'telemetry show': + report = self.get_report(channels=command.get('channels', None)) + report = json.dumps(report, indent=4, sort_keys=True) + if self.channel_device: + report += '\n \nDevice report is generated separately. To see it run \'ceph telemetry show-device\'.' + return 0, report, '' + elif command['prefix'] == 'telemetry show-device': + return 0, json.dumps(self.get_report('device'), indent=4, sort_keys=True), '' + elif command['prefix'] == 'telemetry show-all': + return 0, json.dumps(self.get_report('all'), indent=4, sort_keys=True), '' + else: + return (-errno.EINVAL, '', + "Command not found '{0}'".format(command['prefix'])) + + def on(self): + self.set_module_option('enabled', True) + self.set_module_option('last_opt_revision', REVISION) + + # wake up serve() to reset health warning + self.event.set() + + def off(self): + self.set_module_option('enabled', False) + self.set_module_option('last_opt_revision', 1) + + def get_report(self, report_type='default', channels=None): + if report_type == 'default': + return self.compile_report(channels=channels) + elif report_type == 'device': + return self.gather_device_report() + elif report_type == 'all': + return {'report': self.compile_report(channels=channels), + 'device_report': self.gather_device_report()} + return {} + + def self_test(self): + report = self.compile_report() + if len(report) == 0: + raise RuntimeError('Report is empty') + + if 'report_id' not in report: + raise RuntimeError('report_id not found in report') + + def shutdown(self): + self.run = False + self.event.set() + + def refresh_health_checks(self): + health_checks = {} + if self.enabled and self.last_opt_revision < LAST_REVISION_RE_OPT_IN: + health_checks['TELEMETRY_CHANGED'] = { + 'severity': 'warning', + 'summary': 'Telemetry requires re-opt-in', + 'detail': [ + 'telemetry report includes new information; must re-opt-in (or out)' + ] + } + self.set_health_checks(health_checks) + + def serve(self): + self.load() + self.run = True + + self.log.debug('Waiting for mgr to warm up') + time.sleep(10) + + while self.run: + self.event.clear() + + self.refresh_health_checks() + + if self.last_opt_revision < LAST_REVISION_RE_OPT_IN: + self.log.debug('Not sending report until user re-opts-in') + self.event.wait(1800) + continue + if not self.enabled: + self.log.debug('Not sending report until configured to do so') + self.event.wait(1800) + continue + + now = int(time.time()) + if not self.last_upload or (now - self.last_upload) > \ + self.interval * 3600: + self.log.info('Compiling and sending report to %s', + self.url) + + try: + self.last_report = self.compile_report() + except: + self.log.exception('Exception while compiling report:') + + self.send(self.last_report) + else: + self.log.debug('Interval for sending new report has not expired') + + sleep = 3600 + self.log.debug('Sleeping for %d seconds', sleep) + self.event.wait(sleep) + + def self_test(self): + self.compile_report() + return True + + @staticmethod + def can_run(): + return True, '' -- cgit v1.2.3