diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/glean-core/src | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/glean-core/src')
76 files changed, 20203 insertions, 0 deletions
diff --git a/third_party/rust/glean-core/src/common_metric_data.rs b/third_party/rust/glean-core/src/common_metric_data.rs new file mode 100644 index 0000000000..033cbe1472 --- /dev/null +++ b/third_party/rust/glean-core/src/common_metric_data.rs @@ -0,0 +1,149 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::convert::TryFrom; +use std::sync::atomic::{AtomicU8, Ordering}; + +use crate::error::{Error, ErrorKind}; +use crate::metrics::labeled::validate_dynamic_label; +use crate::Glean; +use serde::{Deserialize, Serialize}; + +/// The supported metrics' lifetimes. +/// +/// A metric's lifetime determines when its stored data gets reset. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +#[repr(i32)] // Use i32 to be compatible with our JNA definition +#[serde(rename_all = "lowercase")] +pub enum Lifetime { + /// The metric is reset with each sent ping + #[default] + Ping, + /// The metric is reset on application restart + Application, + /// The metric is reset with each user profile + User, +} + +impl Lifetime { + /// String representation of the lifetime. + pub fn as_str(self) -> &'static str { + match self { + Lifetime::Ping => "ping", + Lifetime::Application => "app", + Lifetime::User => "user", + } + } +} + +impl TryFrom<i32> for Lifetime { + type Error = Error; + + fn try_from(value: i32) -> Result<Lifetime, Self::Error> { + match value { + 0 => Ok(Lifetime::Ping), + 1 => Ok(Lifetime::Application), + 2 => Ok(Lifetime::User), + e => Err(ErrorKind::Lifetime(e).into()), + } + } +} + +/// The common set of data shared across all different metric types. +#[derive(Default, Debug, Clone, Deserialize, Serialize)] +pub struct CommonMetricData { + /// The metric's name. + pub name: String, + /// The metric's category. + pub category: String, + /// List of ping names to include this metric in. + pub send_in_pings: Vec<String>, + /// The metric's lifetime. + pub lifetime: Lifetime, + /// Whether or not the metric is disabled. + /// + /// Disabled metrics are never recorded. + pub disabled: bool, + /// Dynamic label. + /// + /// When a [`LabeledMetric<T>`](crate::metrics::LabeledMetric) factory creates the specific + /// metric to be recorded to, dynamic labels are stored in the specific + /// label so that we can validate them when the Glean singleton is + /// available. + pub dynamic_label: Option<String>, +} + +#[derive(Default, Debug)] +pub struct CommonMetricDataInternal { + pub inner: CommonMetricData, + pub disabled: AtomicU8, +} + +impl Clone for CommonMetricDataInternal { + fn clone(&self) -> Self { + Self { + inner: self.inner.clone(), + disabled: AtomicU8::new(self.disabled.load(Ordering::Relaxed)), + } + } +} + +impl From<CommonMetricData> for CommonMetricDataInternal { + fn from(input_data: CommonMetricData) -> Self { + Self { + inner: input_data.clone(), + disabled: AtomicU8::new(u8::from(input_data.disabled)), + } + } +} + +impl CommonMetricDataInternal { + /// Creates a new metadata object. + pub fn new<A: Into<String>, B: Into<String>, C: Into<String>>( + category: A, + name: B, + ping_name: C, + ) -> CommonMetricDataInternal { + CommonMetricDataInternal { + inner: CommonMetricData { + name: name.into(), + category: category.into(), + send_in_pings: vec![ping_name.into()], + ..Default::default() + }, + disabled: AtomicU8::new(0), + } + } + + /// The metric's base identifier, including the category and name, but not the label. + /// + /// If `category` is empty, it's ommitted. + /// Otherwise, it's the combination of the metric's `category` and `name`. + pub(crate) fn base_identifier(&self) -> String { + if self.inner.category.is_empty() { + self.inner.name.clone() + } else { + format!("{}.{}", self.inner.category, self.inner.name) + } + } + + /// The metric's unique identifier, including the category, name and label. + /// + /// If `category` is empty, it's ommitted. + /// Otherwise, it's the combination of the metric's `category`, `name` and `label`. + pub(crate) fn identifier(&self, glean: &Glean) -> String { + let base_identifier = self.base_identifier(); + + if let Some(label) = &self.inner.dynamic_label { + validate_dynamic_label(glean, self, &base_identifier, label) + } else { + base_identifier + } + } + + /// The list of storages this metric should be recorded into. + pub fn storage_names(&self) -> &[String] { + &self.inner.send_in_pings + } +} diff --git a/third_party/rust/glean-core/src/core/mod.rs b/third_party/rust/glean-core/src/core/mod.rs new file mode 100644 index 0000000000..29ee1e52c7 --- /dev/null +++ b/third_party/rust/glean-core/src/core/mod.rs @@ -0,0 +1,942 @@ +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicU8, Ordering}; +use std::sync::{Arc, Mutex}; + +use chrono::{DateTime, FixedOffset}; +use once_cell::sync::OnceCell; + +use crate::database::Database; +use crate::debug::DebugOptions; +use crate::event_database::EventDatabase; +use crate::internal_metrics::{AdditionalMetrics, CoreMetrics, DatabaseMetrics}; +use crate::internal_pings::InternalPings; +use crate::metrics::{ + self, ExperimentMetric, Metric, MetricType, MetricsEnabledConfig, PingType, RecordedExperiment, +}; +use crate::ping::PingMaker; +use crate::storage::{StorageManager, INTERNAL_STORAGE}; +use crate::upload::{PingUploadManager, PingUploadTask, UploadResult, UploadTaskAction}; +use crate::util::{local_now_with_offset, sanitize_application_id}; +use crate::{ + scheduler, system, CommonMetricData, ErrorKind, InternalConfiguration, Lifetime, Result, + DEFAULT_MAX_EVENTS, GLEAN_SCHEMA_VERSION, GLEAN_VERSION, KNOWN_CLIENT_ID, +}; + +static GLEAN: OnceCell<Mutex<Glean>> = OnceCell::new(); + +pub fn global_glean() -> Option<&'static Mutex<Glean>> { + GLEAN.get() +} + +/// Sets or replaces the global Glean object. +pub fn setup_glean(glean: Glean) -> Result<()> { + // The `OnceCell` type wrapping our Glean is thread-safe and can only be set once. + // Therefore even if our check for it being empty succeeds, setting it could fail if a + // concurrent thread is quicker in setting it. + // However this will not cause a bigger problem, as the second `set` operation will just fail. + // We can log it and move on. + // + // For all wrappers this is not a problem, as the Glean object is intialized exactly once on + // calling `initialize` on the global singleton and further operations check that it has been + // initialized. + if GLEAN.get().is_none() { + if GLEAN.set(Mutex::new(glean)).is_err() { + log::warn!( + "Global Glean object is initialized already. This probably happened concurrently." + ) + } + } else { + // We allow overriding the global Glean object to support test mode. + // In test mode the Glean object is fully destroyed and recreated. + // This all happens behind a mutex and is therefore also thread-safe.. + let mut lock = GLEAN.get().unwrap().lock().unwrap(); + *lock = glean; + } + Ok(()) +} + +/// Execute `f` passing the global Glean object. +/// +/// Panics if the global Glean object has not been set. +pub fn with_glean<F, R>(f: F) -> R +where + F: FnOnce(&Glean) -> R, +{ + let glean = global_glean().expect("Global Glean object not initialized"); + let lock = glean.lock().unwrap(); + f(&lock) +} + +/// Execute `f` passing the global Glean object mutable. +/// +/// Panics if the global Glean object has not been set. +pub fn with_glean_mut<F, R>(f: F) -> R +where + F: FnOnce(&mut Glean) -> R, +{ + let glean = global_glean().expect("Global Glean object not initialized"); + let mut lock = glean.lock().unwrap(); + f(&mut lock) +} + +/// Execute `f` passing the global Glean object if it has been set. +/// +/// Returns `None` if the global Glean object has not been set. +/// Returns `Some(T)` otherwise. +pub fn with_opt_glean<F, R>(f: F) -> Option<R> +where + F: FnOnce(&Glean) -> R, +{ + let glean = global_glean()?; + let lock = glean.lock().unwrap(); + Some(f(&lock)) +} + +/// The object holding meta information about a Glean instance. +/// +/// ## Example +/// +/// Create a new Glean instance, register a ping, record a simple counter and then send the final +/// ping. +/// +/// ```rust,no_run +/// # use glean_core::{Glean, InternalConfiguration, CommonMetricData, metrics::*}; +/// let cfg = InternalConfiguration { +/// data_path: "/tmp/glean".into(), +/// application_id: "glean.sample.app".into(), +/// language_binding_name: "Rust".into(), +/// upload_enabled: true, +/// max_events: None, +/// delay_ping_lifetime_io: false, +/// app_build: "".into(), +/// use_core_mps: false, +/// trim_data_to_registered_pings: false, +/// log_level: None, +/// }; +/// let mut glean = Glean::new(cfg).unwrap(); +/// let ping = PingType::new("sample", true, false, vec![]); +/// glean.register_ping_type(&ping); +/// +/// let call_counter: CounterMetric = CounterMetric::new(CommonMetricData { +/// name: "calls".into(), +/// category: "local".into(), +/// send_in_pings: vec!["sample".into()], +/// ..Default::default() +/// }); +/// +/// call_counter.add_sync(&glean, 1); +/// +/// ping.submit_sync(&glean, None); +/// ``` +/// +/// ## Note +/// +/// In specific language bindings, this is usually wrapped in a singleton and all metric recording goes to a single instance of this object. +/// In the Rust core, it is possible to create multiple instances, which is used in testing. +#[derive(Debug)] +pub struct Glean { + upload_enabled: bool, + pub(crate) data_store: Option<Database>, + event_data_store: EventDatabase, + pub(crate) core_metrics: CoreMetrics, + pub(crate) additional_metrics: AdditionalMetrics, + pub(crate) database_metrics: DatabaseMetrics, + pub(crate) internal_pings: InternalPings, + data_path: PathBuf, + application_id: String, + ping_registry: HashMap<String, PingType>, + start_time: DateTime<FixedOffset>, + max_events: u32, + is_first_run: bool, + pub(crate) upload_manager: PingUploadManager, + debug: DebugOptions, + pub(crate) app_build: String, + pub(crate) schedule_metrics_pings: bool, + pub(crate) remote_settings_epoch: AtomicU8, + pub(crate) remote_settings_metrics_config: Arc<Mutex<MetricsEnabledConfig>>, +} + +impl Glean { + /// Creates and initializes a new Glean object for use in a subprocess. + /// + /// Importantly, this will not send any pings at startup, since that + /// sort of management should only happen in the main process. + pub fn new_for_subprocess(cfg: &InternalConfiguration, scan_directories: bool) -> Result<Self> { + log::info!("Creating new Glean v{}", GLEAN_VERSION); + + let application_id = sanitize_application_id(&cfg.application_id); + if application_id.is_empty() { + return Err(ErrorKind::InvalidConfig.into()); + } + + let data_path = Path::new(&cfg.data_path); + let event_data_store = EventDatabase::new(data_path)?; + + // Create an upload manager with rate limiting of 15 pings every 60 seconds. + let mut upload_manager = PingUploadManager::new(&cfg.data_path, &cfg.language_binding_name); + upload_manager.set_rate_limiter( + /* seconds per interval */ 60, /* max pings per interval */ 15, + ); + + // We only scan the pending ping directories when calling this from a subprocess, + // when calling this from ::new we need to scan the directories after dealing with the upload state. + if scan_directories { + let _scanning_thread = upload_manager.scan_pending_pings_directories(); + } + + let start_time = local_now_with_offset(); + let mut this = Self { + upload_enabled: cfg.upload_enabled, + // In the subprocess, we want to avoid accessing the database entirely. + // The easiest way to ensure that is to just not initialize it. + data_store: None, + event_data_store, + core_metrics: CoreMetrics::new(), + additional_metrics: AdditionalMetrics::new(), + database_metrics: DatabaseMetrics::new(), + internal_pings: InternalPings::new(), + upload_manager, + data_path: PathBuf::from(&cfg.data_path), + application_id, + ping_registry: HashMap::new(), + start_time, + max_events: cfg.max_events.unwrap_or(DEFAULT_MAX_EVENTS), + is_first_run: false, + debug: DebugOptions::new(), + app_build: cfg.app_build.to_string(), + // Subprocess doesn't use "metrics" pings so has no need for a scheduler. + schedule_metrics_pings: false, + remote_settings_epoch: AtomicU8::new(0), + remote_settings_metrics_config: Arc::new(Mutex::new(MetricsEnabledConfig::new())), + }; + + // Ensuring these pings are registered. + let pings = this.internal_pings.clone(); + this.register_ping_type(&pings.baseline); + this.register_ping_type(&pings.metrics); + this.register_ping_type(&pings.events); + this.register_ping_type(&pings.deletion_request); + + Ok(this) + } + + /// Creates and initializes a new Glean object. + /// + /// This will create the necessary directories and files in + /// [`cfg.data_path`](InternalConfiguration::data_path). This will also initialize + /// the core metrics. + pub fn new(cfg: InternalConfiguration) -> Result<Self> { + let mut glean = Self::new_for_subprocess(&cfg, false)?; + + // Creating the data store creates the necessary path as well. + // If that fails we bail out and don't initialize further. + let data_path = Path::new(&cfg.data_path); + glean.data_store = Some(Database::new(data_path, cfg.delay_ping_lifetime_io)?); + + // The upload enabled flag may have changed since the last run, for + // example by the changing of a config file. + if cfg.upload_enabled { + // If upload is enabled, just follow the normal code path to + // instantiate the core metrics. + glean.on_upload_enabled(); + } else { + // If upload is disabled, and we've never run before, only set the + // client_id to KNOWN_CLIENT_ID, but do not send a deletion request + // ping. + // If we have run before, and if the client_id is not equal to + // the KNOWN_CLIENT_ID, do the full upload disabled operations to + // clear metrics, set the client_id to KNOWN_CLIENT_ID, and send a + // deletion request ping. + match glean + .core_metrics + .client_id + .get_value(&glean, Some("glean_client_info")) + { + None => glean.clear_metrics(), + Some(uuid) => { + if uuid != *KNOWN_CLIENT_ID { + // Temporarily enable uploading so we can submit a + // deletion request ping. + glean.upload_enabled = true; + glean.on_upload_disabled(true); + } + } + } + } + + // We set this only for non-subprocess situations. + glean.schedule_metrics_pings = cfg.use_core_mps; + + // We only scan the pendings pings directories **after** dealing with the upload state. + // If upload is disabled, we delete all pending pings files + // and we need to do that **before** scanning the pending pings folder + // to ensure we don't enqueue pings before their files are deleted. + let _scanning_thread = glean.upload_manager.scan_pending_pings_directories(); + + Ok(glean) + } + + /// For tests make it easy to create a Glean object using only the required configuration. + #[cfg(test)] + pub(crate) fn with_options( + data_path: &str, + application_id: &str, + upload_enabled: bool, + ) -> Self { + let cfg = InternalConfiguration { + data_path: data_path.into(), + application_id: application_id.into(), + language_binding_name: "Rust".into(), + upload_enabled, + max_events: None, + delay_ping_lifetime_io: false, + app_build: "Unknown".into(), + use_core_mps: false, + trim_data_to_registered_pings: false, + log_level: None, + }; + + let mut glean = Self::new(cfg).unwrap(); + + // Disable all upload manager policies for testing + glean.upload_manager = PingUploadManager::no_policy(data_path); + + glean + } + + /// Destroys the database. + /// + /// After this Glean needs to be reinitialized. + pub fn destroy_db(&mut self) { + self.data_store = None; + } + + /// Initializes the core metrics managed by Glean's Rust core. + fn initialize_core_metrics(&mut self) { + let need_new_client_id = match self + .core_metrics + .client_id + .get_value(self, Some("glean_client_info")) + { + None => true, + Some(uuid) => uuid == *KNOWN_CLIENT_ID, + }; + if need_new_client_id { + self.core_metrics.client_id.generate_and_set_sync(self); + } + + if self + .core_metrics + .first_run_date + .get_value(self, "glean_client_info") + .is_none() + { + self.core_metrics.first_run_date.set_sync(self, None); + // The `first_run_date` field is generated on the very first run + // and persisted across upload toggling. We can assume that, the only + // time it is set, that's indeed our "first run". + self.is_first_run = true; + } + + self.set_application_lifetime_core_metrics(); + } + + /// Initializes the database metrics managed by Glean's Rust core. + fn initialize_database_metrics(&mut self) { + log::trace!("Initializing database metrics"); + + if let Some(size) = self + .data_store + .as_ref() + .and_then(|database| database.file_size()) + { + log::trace!("Database file size: {}", size.get()); + self.database_metrics + .size + .accumulate_sync(self, size.get() as i64) + } + } + + /// Signals that the environment is ready to submit pings. + /// + /// Should be called when Glean is initialized to the point where it can correctly assemble pings. + /// Usually called from the language binding after all of the core metrics have been set + /// and the ping types have been registered. + /// + /// # Arguments + /// + /// * `trim_data_to_registered_pings` - Whether we should limit to storing data only for + /// data belonging to pings previously registered via `register_ping_type`. + /// + /// # Returns + /// + /// Whether the "events" ping was submitted. + pub fn on_ready_to_submit_pings(&self, trim_data_to_registered_pings: bool) -> bool { + self.event_data_store + .flush_pending_events_on_startup(self, trim_data_to_registered_pings) + } + + /// Sets whether upload is enabled or not. + /// + /// When uploading is disabled, metrics aren't recorded at all and no + /// data is uploaded. + /// + /// When disabling, all pending metrics, events and queued pings are cleared. + /// + /// When enabling, the core Glean metrics are recreated. + /// + /// If the value of this flag is not actually changed, this is a no-op. + /// + /// # Arguments + /// + /// * `flag` - When true, enable metric collection. + /// + /// # Returns + /// + /// Whether the flag was different from the current value, + /// and actual work was done to clear or reinstate metrics. + pub fn set_upload_enabled(&mut self, flag: bool) -> bool { + log::info!("Upload enabled: {:?}", flag); + + if self.upload_enabled != flag { + if flag { + self.on_upload_enabled(); + } else { + self.on_upload_disabled(false); + } + true + } else { + false + } + } + + /// Determines whether upload is enabled. + /// + /// When upload is disabled, no data will be recorded. + pub fn is_upload_enabled(&self) -> bool { + self.upload_enabled + } + + /// Handles the changing of state from upload disabled to enabled. + /// + /// Should only be called when the state actually changes. + /// + /// The `upload_enabled` flag is set to true and the core Glean metrics are + /// recreated. + fn on_upload_enabled(&mut self) { + self.upload_enabled = true; + self.initialize_core_metrics(); + self.initialize_database_metrics(); + } + + /// Handles the changing of state from upload enabled to disabled. + /// + /// Should only be called when the state actually changes. + /// + /// A deletion_request ping is sent, all pending metrics, events and queued + /// pings are cleared, and the client_id is set to KNOWN_CLIENT_ID. + /// Afterward, the upload_enabled flag is set to false. + fn on_upload_disabled(&mut self, during_init: bool) { + // The upload_enabled flag should be true here, or the deletion ping + // won't be submitted. + let reason = if during_init { + Some("at_init") + } else { + Some("set_upload_enabled") + }; + if !self + .internal_pings + .deletion_request + .submit_sync(self, reason) + { + log::error!("Failed to submit deletion-request ping on optout."); + } + self.clear_metrics(); + self.upload_enabled = false; + } + + /// Clear any pending metrics when telemetry is disabled. + fn clear_metrics(&mut self) { + // Clear the pending pings queue and acquire the lock + // so that it can't be accessed until this function is done. + let _lock = self.upload_manager.clear_ping_queue(); + + // There is only one metric that we want to survive after clearing all + // metrics: first_run_date. Here, we store its value so we can restore + // it after clearing the metrics. + let existing_first_run_date = self + .core_metrics + .first_run_date + .get_value(self, "glean_client_info"); + + // Clear any pending pings. + let ping_maker = PingMaker::new(); + if let Err(err) = ping_maker.clear_pending_pings(self.get_data_path()) { + log::warn!("Error clearing pending pings: {}", err); + } + + // Delete all stored metrics. + // Note that this also includes the ping sequence numbers, so it has + // the effect of resetting those to their initial values. + if let Some(data) = self.data_store.as_ref() { + data.clear_all() + } + if let Err(err) = self.event_data_store.clear_all() { + log::warn!("Error clearing pending events: {}", err); + } + + // This does not clear the experiments store (which isn't managed by the + // StorageEngineManager), since doing so would mean we would have to have the + // application tell us again which experiments are active if telemetry is + // re-enabled. + + { + // We need to briefly set upload_enabled to true here so that `set` + // is not a no-op. This is safe, since nothing on the Rust side can + // run concurrently to this since we hold a mutable reference to the + // Glean object. Additionally, the pending pings have been cleared + // from disk, so the PingUploader can't wake up and start sending + // pings. + self.upload_enabled = true; + + // Store a "dummy" KNOWN_CLIENT_ID in the client_id metric. This will + // make it easier to detect if pings were unintentionally sent after + // uploading is disabled. + self.core_metrics + .client_id + .set_from_uuid_sync(self, *KNOWN_CLIENT_ID); + + // Restore the first_run_date. + if let Some(existing_first_run_date) = existing_first_run_date { + self.core_metrics + .first_run_date + .set_sync_chrono(self, existing_first_run_date); + } + + self.upload_enabled = false; + } + } + + /// Gets the application ID as specified on instantiation. + pub fn get_application_id(&self) -> &str { + &self.application_id + } + + /// Gets the data path of this instance. + pub fn get_data_path(&self) -> &Path { + &self.data_path + } + + /// Gets a handle to the database. + #[track_caller] // If this fails we're interested in the caller. + pub fn storage(&self) -> &Database { + self.data_store.as_ref().expect("No database found") + } + + /// Gets an optional handle to the database. + pub fn storage_opt(&self) -> Option<&Database> { + self.data_store.as_ref() + } + + /// Gets a handle to the event database. + pub fn event_storage(&self) -> &EventDatabase { + &self.event_data_store + } + + /// Gets the maximum number of events to store before sending a ping. + pub fn get_max_events(&self) -> usize { + self.max_events as usize + } + + /// Gets the next task for an uploader. + /// + /// This can be one of: + /// + /// * [`Wait`](PingUploadTask::Wait) - which means the requester should ask + /// again later; + /// * [`Upload(PingRequest)`](PingUploadTask::Upload) - which means there is + /// a ping to upload. This wraps the actual request object; + /// * [`Done`](PingUploadTask::Done) - which means requester should stop + /// asking for now. + /// + /// # Returns + /// + /// A [`PingUploadTask`] representing the next task. + pub fn get_upload_task(&self) -> PingUploadTask { + self.upload_manager.get_upload_task(self, self.log_pings()) + } + + /// Processes the response from an attempt to upload a ping. + /// + /// # Arguments + /// + /// * `uuid` - The UUID of the ping in question. + /// * `status` - The upload result. + pub fn process_ping_upload_response( + &self, + uuid: &str, + status: UploadResult, + ) -> UploadTaskAction { + self.upload_manager + .process_ping_upload_response(self, uuid, status) + } + + /// Takes a snapshot for the given store and optionally clear it. + /// + /// # Arguments + /// + /// * `store_name` - The store to snapshot. + /// * `clear_store` - Whether to clear the store after snapshotting. + /// + /// # Returns + /// + /// The snapshot in a string encoded as JSON. If the snapshot is empty, returns an empty string. + pub fn snapshot(&mut self, store_name: &str, clear_store: bool) -> String { + StorageManager + .snapshot(self.storage(), store_name, clear_store) + .unwrap_or_else(|| String::from("")) + } + + pub(crate) fn make_path(&self, ping_name: &str, doc_id: &str) -> String { + format!( + "/submit/{}/{}/{}/{}", + self.get_application_id(), + ping_name, + GLEAN_SCHEMA_VERSION, + doc_id + ) + } + + /// Collects and submits a ping by name for eventual uploading. + /// + /// The ping content is assembled as soon as possible, but upload is not + /// guaranteed to happen immediately, as that depends on the upload policies. + /// + /// If the ping currently contains no content, it will not be sent, + /// unless it is configured to be sent if empty. + /// + /// # Arguments + /// + /// * `ping_name` - The name of the ping to submit + /// * `reason` - A reason code to include in the ping + /// + /// # Returns + /// + /// Whether the ping was succesfully assembled and queued. + /// + /// # Errors + /// + /// If collecting or writing the ping to disk failed. + pub fn submit_ping_by_name(&self, ping_name: &str, reason: Option<&str>) -> bool { + match self.get_ping_by_name(ping_name) { + None => { + log::error!("Attempted to submit unknown ping '{}'", ping_name); + false + } + Some(ping) => ping.submit_sync(self, reason), + } + } + + /// Gets a [`PingType`] by name. + /// + /// # Returns + /// + /// The [`PingType`] of a ping if the given name was registered before, [`None`] + /// otherwise. + pub fn get_ping_by_name(&self, ping_name: &str) -> Option<&PingType> { + self.ping_registry.get(ping_name) + } + + /// Register a new [`PingType`](metrics/struct.PingType.html). + pub fn register_ping_type(&mut self, ping: &PingType) { + if self.ping_registry.contains_key(ping.name()) { + log::debug!("Duplicate ping named '{}'", ping.name()) + } + + self.ping_registry + .insert(ping.name().to_string(), ping.clone()); + } + + /// Get create time of the Glean object. + pub(crate) fn start_time(&self) -> DateTime<FixedOffset> { + self.start_time + } + + /// Indicates that an experiment is running. + /// + /// Glean will then add an experiment annotation to the environment + /// which is sent with pings. This information is not persisted between runs. + /// + /// # Arguments + /// + /// * `experiment_id` - The id of the active experiment (maximum 30 bytes). + /// * `branch` - The experiment branch (maximum 30 bytes). + /// * `extra` - Optional metadata to output with the ping. + pub fn set_experiment_active( + &self, + experiment_id: String, + branch: String, + extra: HashMap<String, String>, + ) { + let metric = ExperimentMetric::new(self, experiment_id); + metric.set_active_sync(self, branch, extra); + } + + /// Indicates that an experiment is no longer running. + /// + /// # Arguments + /// + /// * `experiment_id` - The id of the active experiment to deactivate (maximum 30 bytes). + pub fn set_experiment_inactive(&self, experiment_id: String) { + let metric = ExperimentMetric::new(self, experiment_id); + metric.set_inactive_sync(self); + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets stored data for the requested experiment. + /// + /// # Arguments + /// + /// * `experiment_id` - The id of the active experiment (maximum 30 bytes). + pub fn test_get_experiment_data(&self, experiment_id: String) -> Option<RecordedExperiment> { + let metric = ExperimentMetric::new(self, experiment_id); + metric.test_get_value(self) + } + + /// Set configuration to override the default metric enabled/disabled state, typically from a + /// remote_settings experiment or rollout + /// + /// # Arguments + /// + /// * `json` - The stringified JSON representation of a `MetricsEnabledConfig` object + pub fn set_metrics_enabled_config(&self, cfg: MetricsEnabledConfig) { + // Set the current MetricsEnabledConfig, keeping the lock until the epoch is + // updated to prevent against reading a "new" config but an "old" epoch + let mut lock = self.remote_settings_metrics_config.lock().unwrap(); + *lock = cfg; + + // Update remote_settings epoch + self.remote_settings_epoch.fetch_add(1, Ordering::SeqCst); + } + + /// Persists [`Lifetime::Ping`] data that might be in memory in case + /// [`delay_ping_lifetime_io`](InternalConfiguration::delay_ping_lifetime_io) is set + /// or was set at a previous time. + /// + /// If there is no data to persist, this function does nothing. + pub fn persist_ping_lifetime_data(&self) -> Result<()> { + if let Some(data) = self.data_store.as_ref() { + return data.persist_ping_lifetime_data(); + } + + Ok(()) + } + + /// Sets internally-handled application lifetime metrics. + fn set_application_lifetime_core_metrics(&self) { + self.core_metrics.os.set_sync(self, system::OS); + } + + /// **This is not meant to be used directly.** + /// + /// Clears all the metrics that have [`Lifetime::Application`]. + pub fn clear_application_lifetime_metrics(&self) { + log::trace!("Clearing Lifetime::Application metrics"); + if let Some(data) = self.data_store.as_ref() { + data.clear_lifetime(Lifetime::Application); + } + + // Set internally handled app lifetime metrics again. + self.set_application_lifetime_core_metrics(); + } + + /// Whether or not this is the first run on this profile. + pub fn is_first_run(&self) -> bool { + self.is_first_run + } + + /// Sets a debug view tag. + /// + /// This will return `false` in case `value` is not a valid tag. + /// + /// When the debug view tag is set, pings are sent with a `X-Debug-ID` header with the value of the tag + /// and are sent to the ["Ping Debug Viewer"](https://mozilla.github.io/glean/book/dev/core/internal/debug-pings.html). + /// + /// # Arguments + /// + /// * `value` - A valid HTTP header value. Must match the regex: "[a-zA-Z0-9-]{1,20}". + pub fn set_debug_view_tag(&mut self, value: &str) -> bool { + self.debug.debug_view_tag.set(value.into()) + } + + /// Return the value for the debug view tag or [`None`] if it hasn't been set. + /// + /// The `debug_view_tag` may be set from an environment variable + /// (`GLEAN_DEBUG_VIEW_TAG`) or through the [`set_debug_view_tag`] function. + pub(crate) fn debug_view_tag(&self) -> Option<&String> { + self.debug.debug_view_tag.get() + } + + /// Sets source tags. + /// + /// This will return `false` in case `value` contains invalid tags. + /// + /// Ping tags will show in the destination datasets, after ingestion. + /// + /// **Note** If one or more tags are invalid, all tags are ignored. + /// + /// # Arguments + /// + /// * `value` - A vector of at most 5 valid HTTP header values. Individual tags must match the regex: "[a-zA-Z0-9-]{1,20}". + pub fn set_source_tags(&mut self, value: Vec<String>) -> bool { + self.debug.source_tags.set(value) + } + + /// Return the value for the source tags or [`None`] if it hasn't been set. + /// + /// The `source_tags` may be set from an environment variable (`GLEAN_SOURCE_TAGS`) + /// or through the [`set_source_tags`] function. + pub(crate) fn source_tags(&self) -> Option<&Vec<String>> { + self.debug.source_tags.get() + } + + /// Sets the log pings debug option. + /// + /// This will return `false` in case we are unable to set the option. + /// + /// When the log pings debug option is `true`, + /// we log the payload of all succesfully assembled pings. + /// + /// # Arguments + /// + /// * `value` - The value of the log pings option + pub fn set_log_pings(&mut self, value: bool) -> bool { + self.debug.log_pings.set(value) + } + + /// Return the value for the log pings debug option or [`None`] if it hasn't been set. + /// + /// The `log_pings` option may be set from an environment variable (`GLEAN_LOG_PINGS`) + /// or through the [`set_log_pings`] function. + pub(crate) fn log_pings(&self) -> bool { + self.debug.log_pings.get().copied().unwrap_or(false) + } + + fn get_dirty_bit_metric(&self) -> metrics::BooleanMetric { + metrics::BooleanMetric::new(CommonMetricData { + name: "dirtybit".into(), + // We don't need a category, the name is already unique + category: "".into(), + send_in_pings: vec![INTERNAL_STORAGE.into()], + lifetime: Lifetime::User, + ..Default::default() + }) + } + + /// **This is not meant to be used directly.** + /// + /// Sets the value of a "dirty flag" in the permanent storage. + /// + /// The "dirty flag" is meant to have the following behaviour, implemented + /// by the consumers of the FFI layer: + /// + /// - on mobile: set to `false` when going to background or shutting down, + /// set to `true` at startup and when going to foreground. + /// - on non-mobile platforms: set to `true` at startup and `false` at + /// shutdown. + /// + /// At startup, before setting its new value, if the "dirty flag" value is + /// `true`, then Glean knows it did not exit cleanly and can implement + /// coping mechanisms (e.g. sending a `baseline` ping). + pub fn set_dirty_flag(&self, new_value: bool) { + self.get_dirty_bit_metric().set_sync(self, new_value); + } + + /// **This is not meant to be used directly.** + /// + /// Checks the stored value of the "dirty flag". + pub fn is_dirty_flag_set(&self) -> bool { + let dirty_bit_metric = self.get_dirty_bit_metric(); + match StorageManager.snapshot_metric( + self.storage(), + INTERNAL_STORAGE, + &dirty_bit_metric.meta().identifier(self), + dirty_bit_metric.meta().inner.lifetime, + ) { + Some(Metric::Boolean(b)) => b, + _ => false, + } + } + + /// Performs the collection/cleanup operations required by becoming active. + /// + /// This functions generates a baseline ping with reason `active` + /// and then sets the dirty bit. + pub fn handle_client_active(&mut self) { + if !self + .internal_pings + .baseline + .submit_sync(self, Some("active")) + { + log::info!("baseline ping not submitted on active"); + } + + self.set_dirty_flag(true); + } + + /// Performs the collection/cleanup operations required by becoming inactive. + /// + /// This functions generates a baseline and an events ping with reason + /// `inactive` and then clears the dirty bit. + pub fn handle_client_inactive(&mut self) { + if !self + .internal_pings + .baseline + .submit_sync(self, Some("inactive")) + { + log::info!("baseline ping not submitted on inactive"); + } + + if !self + .internal_pings + .events + .submit_sync(self, Some("inactive")) + { + log::info!("events ping not submitted on inactive"); + } + + self.set_dirty_flag(false); + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Deletes all stored metrics. + /// + /// Note that this also includes the ping sequence numbers, so it has + /// the effect of resetting those to their initial values. + pub fn test_clear_all_stores(&self) { + if let Some(data) = self.data_store.as_ref() { + data.clear_all() + } + // We don't care about this failing, maybe the data does just not exist. + let _ = self.event_data_store.clear_all(); + } + + /// Instructs the Metrics Ping Scheduler's thread to exit cleanly. + /// If Glean was configured with `use_core_mps: false`, this has no effect. + pub fn cancel_metrics_ping_scheduler(&self) { + if self.schedule_metrics_pings { + scheduler::cancel(); + } + } + + /// Instructs the Metrics Ping Scheduler to being scheduling metrics pings. + /// If Glean wsa configured with `use_core_mps: false`, this has no effect. + pub fn start_metrics_ping_scheduler(&self) { + if self.schedule_metrics_pings { + scheduler::schedule(self); + } + } +} diff --git a/third_party/rust/glean-core/src/core_metrics.rs b/third_party/rust/glean-core/src/core_metrics.rs new file mode 100644 index 0000000000..baa2b8515b --- /dev/null +++ b/third_party/rust/glean-core/src/core_metrics.rs @@ -0,0 +1,206 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::metrics::{ + Datetime, DatetimeMetric, QuantityMetric, StringMetric, TimeUnit, TimespanMetric, +}; +use crate::{CommonMetricData, Lifetime}; + +use once_cell::sync::Lazy; + +/// Metrics included in every ping as `client_info`. +#[derive(Debug, Default)] +pub struct ClientInfoMetrics { + /// The build identifier generated by the CI system (e.g. "1234/A"). + pub app_build: String, + /// The user visible version string (e.g. "1.0.3"). + pub app_display_version: String, + /// The app's build date + pub app_build_date: Datetime, + + /// The architecture of the device (e.g. "arm", "x86"). + pub architecture: String, + /// The name of the operating system (e.g. "Linux", "Android", "iOS"). + pub os_version: String, + + /// The product-provided release channel (e.g. "beta"). + pub channel: Option<String>, + /// The Android specific SDK version of the software running on this hardware device (e.g. "23"). + pub android_sdk_version: Option<String>, + /// The Windows specific OS build version (e.g. 19043) + pub windows_build_number: Option<i64>, + /// The manufacturer of the device the application is running on. + /// Not set if the device manufacturer can't be determined (e.g. on Desktop). + pub device_manufacturer: Option<String>, + /// The model of the device the application is running on. + /// On Android, this is Build.MODEL, the user-visible marketing name, like "Pixel 2 XL". + /// Not set if the device model can't be determined (e.g. on Desktop). + pub device_model: Option<String>, + /// The locale of the application during initialization (e.g. "es-ES"). + /// If the locale can't be determined on the system, the value is "und", to indicate "undetermined". + pub locale: Option<String>, +} + +/// Metrics included in every ping as `client_info`. +impl ClientInfoMetrics { + /// Creates the client info with dummy values for all. + pub fn unknown() -> Self { + ClientInfoMetrics { + app_build: "Unknown".to_string(), + app_display_version: "Unknown".to_string(), + app_build_date: Datetime::default(), + architecture: "Unknown".to_string(), + os_version: "Unknown".to_string(), + channel: Some("Unknown".to_string()), + android_sdk_version: None, + windows_build_number: None, + device_manufacturer: None, + device_model: None, + locale: None, + } + } +} + +#[allow(non_upper_case_globals)] +pub mod internal_metrics { + use super::*; + + pub static app_build: Lazy<StringMetric> = Lazy::new(|| { + StringMetric::new(CommonMetricData { + name: "app_build".into(), + category: "".into(), + send_in_pings: vec!["glean_client_info".into()], + lifetime: Lifetime::Application, + disabled: false, + ..Default::default() + }) + }); + + pub static app_display_version: Lazy<StringMetric> = Lazy::new(|| { + StringMetric::new(CommonMetricData { + name: "app_display_version".into(), + category: "".into(), + send_in_pings: vec!["glean_client_info".into()], + lifetime: Lifetime::Application, + disabled: false, + ..Default::default() + }) + }); + + pub static app_build_date: Lazy<DatetimeMetric> = Lazy::new(|| { + DatetimeMetric::new( + CommonMetricData { + name: "build_date".into(), + category: "".into(), + send_in_pings: vec!["glean_client_info".into()], + lifetime: Lifetime::Application, + disabled: false, + ..Default::default() + }, + TimeUnit::Second, + ) + }); + + pub static app_channel: Lazy<StringMetric> = Lazy::new(|| { + StringMetric::new(CommonMetricData { + name: "app_channel".into(), + category: "".into(), + send_in_pings: vec!["glean_client_info".into()], + lifetime: Lifetime::Application, + disabled: false, + ..Default::default() + }) + }); + + pub static os_version: Lazy<StringMetric> = Lazy::new(|| { + StringMetric::new(CommonMetricData { + name: "os_version".into(), + category: "".into(), + send_in_pings: vec!["glean_client_info".into()], + lifetime: Lifetime::Application, + disabled: false, + ..Default::default() + }) + }); + + pub static architecture: Lazy<StringMetric> = Lazy::new(|| { + StringMetric::new(CommonMetricData { + name: "architecture".into(), + category: "".into(), + send_in_pings: vec!["glean_client_info".into()], + lifetime: Lifetime::Application, + disabled: false, + ..Default::default() + }) + }); + + pub static android_sdk_version: Lazy<StringMetric> = Lazy::new(|| { + StringMetric::new(CommonMetricData { + name: "android_sdk_version".into(), + category: "".into(), + send_in_pings: vec!["glean_client_info".into()], + lifetime: Lifetime::Application, + disabled: false, + ..Default::default() + }) + }); + + pub static windows_build_number: Lazy<QuantityMetric> = Lazy::new(|| { + QuantityMetric::new(CommonMetricData { + name: "windows_build_number".into(), + category: "".into(), + send_in_pings: vec!["glean_client_info".into()], + lifetime: Lifetime::Application, + disabled: false, + ..Default::default() + }) + }); + + pub static device_manufacturer: Lazy<StringMetric> = Lazy::new(|| { + StringMetric::new(CommonMetricData { + name: "device_manufacturer".into(), + category: "".into(), + send_in_pings: vec!["glean_client_info".into()], + lifetime: Lifetime::Application, + disabled: false, + ..Default::default() + }) + }); + + pub static device_model: Lazy<StringMetric> = Lazy::new(|| { + StringMetric::new(CommonMetricData { + name: "device_model".into(), + category: "".into(), + send_in_pings: vec!["glean_client_info".into()], + lifetime: Lifetime::Application, + disabled: false, + ..Default::default() + }) + }); + + pub static locale: Lazy<StringMetric> = Lazy::new(|| { + StringMetric::new(CommonMetricData { + name: "locale".into(), + category: "".into(), + send_in_pings: vec!["glean_client_info".into()], + lifetime: Lifetime::Application, + disabled: false, + ..Default::default() + }) + }); + + pub static baseline_duration: Lazy<TimespanMetric> = Lazy::new(|| { + TimespanMetric::new( + CommonMetricData { + name: "duration".into(), + category: "glean.baseline".into(), + send_in_pings: vec!["baseline".into()], + lifetime: Lifetime::Ping, + disabled: false, + ..Default::default() + }, + TimeUnit::Second, + ) + }); +} diff --git a/third_party/rust/glean-core/src/coverage.rs b/third_party/rust/glean-core/src/coverage.rs new file mode 100644 index 0000000000..426e6295c8 --- /dev/null +++ b/third_party/rust/glean-core/src/coverage.rs @@ -0,0 +1,47 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Utilities for recording when testing APIs have been called on specific +//! metrics. +//! +//! Testing coverage is enabled by setting the GLEAN_TEST_COVERAGE environment +//! variable to the name of an output file. This output file must run through a +//! post-processor (in glean_parser's `coverage` command) to convert to a format +//! understood by third-party coverage reporting tools. +//! +//! While running a unit test suite, Glean records which database keys were +//! accessed by the testing APIs, with one entry per line. Database keys are +//! usually, but not always, the same as metric identifiers, but it is the +//! responsibility of the post-processor to resolve that difference. +//! +//! This functionality has no runtime overhead unless the testing API is used. + +use std::env; +use std::fs::{File, OpenOptions}; +use std::io::Write; +use std::sync::Mutex; + +use once_cell::sync::Lazy; + +static COVERAGE_FILE: Lazy<Option<Mutex<File>>> = Lazy::new(|| { + if let Some(filename) = env::var_os("GLEAN_TEST_COVERAGE") { + match OpenOptions::new().append(true).create(true).open(filename) { + Ok(file) => { + return Some(Mutex::new(file)); + } + Err(err) => { + log::error!("Couldn't open file for coverage results: {:?}", err); + } + } + } + None +}); + +pub(crate) fn record_coverage(metric_id: &str) { + if let Some(file_mutex) = &*COVERAGE_FILE { + let mut file = file_mutex.lock().unwrap(); + writeln!(&mut file, "{}", metric_id).ok(); + file.flush().ok(); + } +} diff --git a/third_party/rust/glean-core/src/database/mod.rs b/third_party/rust/glean-core/src/database/mod.rs new file mode 100644 index 0000000000..32a20b017a --- /dev/null +++ b/third_party/rust/glean-core/src/database/mod.rs @@ -0,0 +1,1766 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::btree_map::Entry; +use std::collections::BTreeMap; +use std::fs; +use std::io; +use std::num::NonZeroU64; +use std::path::Path; +use std::str; +use std::sync::RwLock; + +use crate::ErrorKind; + +use rkv::migrator::Migrator; +use rkv::{MigrateError, StoreError, StoreOptions}; + +/// Unwrap a `Result`s `Ok` value or do the specified action. +/// +/// This is an alternative to the question-mark operator (`?`), +/// when the other action should not be to return the error. +macro_rules! unwrap_or { + ($expr:expr, $or:expr) => { + match $expr { + Ok(x) => x, + Err(_) => { + $or; + } + } + }; +} + +/// cbindgen:ignore +pub type Rkv = rkv::Rkv<rkv::backend::SafeModeEnvironment>; +/// cbindgen:ignore +pub type SingleStore = rkv::SingleStore<rkv::backend::SafeModeDatabase>; +/// cbindgen:ignore +pub type Writer<'t> = rkv::Writer<rkv::backend::SafeModeRwTransaction<'t>>; + +pub fn rkv_new(path: &Path) -> std::result::Result<Rkv, rkv::StoreError> { + match Rkv::new::<rkv::backend::SafeMode>(path) { + // An invalid file can mean: + // 1. An empty file. + // 2. A corrupted file. + // + // In both instances there's not much we can do. + // Drop the data by removing the file, and start over. + Err(rkv::StoreError::FileInvalid) => { + let safebin = path.join("data.safe.bin"); + fs::remove_file(safebin).map_err(|_| rkv::StoreError::FileInvalid)?; + // Now try again, we only handle that error once. + Rkv::new::<rkv::backend::SafeMode>(path) + } + Err(rkv::StoreError::DatabaseCorrupted) => { + let safebin = path.join("data.safe.bin"); + fs::remove_file(safebin).map_err(|_| rkv::StoreError::DatabaseCorrupted)?; + // Try again, only allowing the error once. + Rkv::new::<rkv::backend::SafeMode>(path) + } + other => other, + } +} + +fn delete_and_log(path: &Path, msg: &str) { + if let Err(err) = fs::remove_file(path) { + match err.kind() { + std::io::ErrorKind::NotFound => { + // Silently drop this error, the file was already non-existing. + } + _ => log::warn!("{}", msg), + } + } +} + +fn delete_lmdb_database(path: &Path) { + let datamdb = path.join("data.mdb"); + delete_and_log(&datamdb, "Failed to delete old data."); + + let lockmdb = path.join("lock.mdb"); + delete_and_log(&lockmdb, "Failed to delete old lock."); +} + +/// Migrate from LMDB storage to safe-mode storage. +/// +/// This migrates the data once, then deletes the LMDB storage. +/// The safe-mode storage must be empty for it to work. +/// Existing data will not be overwritten. +/// If the destination database is not empty the LMDB database is deleted +/// without migrating data. +/// This is a no-op if no LMDB database file exists. +pub fn migrate(path: &Path, dst_env: &Rkv) { + log::debug!("Migrating files in {}", path.display()); + + // Shortcut if no data to migrate is around. + let datamdb = path.join("data.mdb"); + if !datamdb.exists() { + log::debug!("No data to migrate."); + return; + } + + // We're handling the same error cases as `easy_migrate_lmdb_to_safe_mode`, + // but annotate each why they don't cause problems for Glean. + // Additionally for known cases we delete the LMDB database regardless. + let should_delete = + match Migrator::open_and_migrate_lmdb_to_safe_mode(path, |builder| builder, dst_env) { + // Source environment is corrupted. + // We start fresh with the new database. + Err(MigrateError::StoreError(StoreError::FileInvalid)) => true, + Err(MigrateError::StoreError(StoreError::DatabaseCorrupted)) => true, + // Path not accessible. + // Somehow our directory vanished between us creating it and reading from it. + // Nothing we can do really. + Err(MigrateError::StoreError(StoreError::IoError(_))) => true, + // Path accessible but incompatible for configuration. + // This should not happen, we never used storages that safe-mode doesn't understand. + // If it does happen, let's start fresh and use the safe-mode from now on. + Err(MigrateError::StoreError(StoreError::UnsuitableEnvironmentPath(_))) => true, + // Nothing to migrate. + // Source database was empty. We just start fresh anyway. + Err(MigrateError::SourceEmpty) => true, + // Migrating would overwrite. + // Either a previous migration failed and we still started writing data, + // or someone placed back an old data file. + // In any case we better stay on the new data and delete the old one. + Err(MigrateError::DestinationNotEmpty) => { + log::warn!("Failed to migrate old data. Destination was not empty"); + true + } + // An internal lock was poisoned. + // This would only happen if multiple things run concurrently and one crashes. + Err(MigrateError::ManagerPoisonError) => false, + // Couldn't close source environment and delete files on disk (e.g. other stores still open). + // This could only happen if multiple instances are running, + // we leave files in place. + Err(MigrateError::CloseError(_)) => false, + // Other store errors are never returned from the migrator. + // We need to handle them to please rustc. + Err(MigrateError::StoreError(_)) => false, + // Other errors can't happen, so this leaves us with the Ok case. + // This already deleted the LMDB files. + Ok(()) => false, + }; + + if should_delete { + log::debug!("Need to delete remaining LMDB files."); + delete_lmdb_database(path); + } + + log::debug!("Migration ended. Safe-mode database in {}", path.display()); +} + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::metrics::Metric; +use crate::Glean; +use crate::Lifetime; +use crate::Result; + +pub struct Database { + /// Handle to the database environment. + rkv: Rkv, + + /// Handles to the "lifetime" stores. + /// + /// A "store" is a handle to the underlying database. + /// We keep them open for fast and frequent access. + user_store: SingleStore, + ping_store: SingleStore, + application_store: SingleStore, + + /// If the `delay_ping_lifetime_io` Glean config option is `true`, + /// we will save metrics with 'ping' lifetime data in a map temporarily + /// so as to persist them to disk using rkv in bulk on demand. + ping_lifetime_data: Option<RwLock<BTreeMap<String, Metric>>>, + + // Initial file size when opening the database. + file_size: Option<NonZeroU64>, +} + +impl std::fmt::Debug for Database { + fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { + fmt.debug_struct("Database") + .field("rkv", &self.rkv) + .field("user_store", &"SingleStore") + .field("ping_store", &"SingleStore") + .field("application_store", &"SingleStore") + .field("ping_lifetime_data", &self.ping_lifetime_data) + .finish() + } +} + +/// Calculate the database size from all the files in the directory. +/// +/// # Arguments +/// +/// *`path` - The path to the directory +/// +/// # Returns +/// +/// Returns the non-zero combined size of all files in a directory, +/// or `None` on error or if the size is `0`. +fn database_size(dir: &Path) -> Option<NonZeroU64> { + let mut total_size = 0; + if let Ok(entries) = fs::read_dir(dir) { + for entry in entries.flatten() { + if let Ok(file_type) = entry.file_type() { + if file_type.is_file() { + let path = entry.path(); + if let Ok(metadata) = fs::metadata(path) { + total_size += metadata.len(); + } else { + continue; + } + } + } + } + } + + NonZeroU64::new(total_size) +} + +impl Database { + /// Initializes the data store. + /// + /// This opens the underlying rkv store and creates + /// the underlying directory structure. + /// + /// It also loads any Lifetime::Ping data that might be + /// persisted, in case `delay_ping_lifetime_io` is set. + pub fn new(data_path: &Path, delay_ping_lifetime_io: bool) -> Result<Self> { + let path = data_path.join("db"); + log::debug!("Database path: {:?}", path.display()); + let file_size = database_size(&path); + + let rkv = Self::open_rkv(&path)?; + let user_store = rkv.open_single(Lifetime::User.as_str(), StoreOptions::create())?; + let ping_store = rkv.open_single(Lifetime::Ping.as_str(), StoreOptions::create())?; + let application_store = + rkv.open_single(Lifetime::Application.as_str(), StoreOptions::create())?; + let ping_lifetime_data = if delay_ping_lifetime_io { + Some(RwLock::new(BTreeMap::new())) + } else { + None + }; + + let db = Self { + rkv, + user_store, + ping_store, + application_store, + ping_lifetime_data, + file_size, + }; + + db.load_ping_lifetime_data(); + + Ok(db) + } + + /// Get the initial database file size. + pub fn file_size(&self) -> Option<NonZeroU64> { + self.file_size + } + + fn get_store(&self, lifetime: Lifetime) -> &SingleStore { + match lifetime { + Lifetime::User => &self.user_store, + Lifetime::Ping => &self.ping_store, + Lifetime::Application => &self.application_store, + } + } + + /// Creates the storage directories and inits rkv. + fn open_rkv(path: &Path) -> Result<Rkv> { + fs::create_dir_all(path)?; + + let rkv = rkv_new(path)?; + migrate(path, &rkv); + + log::info!("Database initialized"); + Ok(rkv) + } + + /// Build the key of the final location of the data in the database. + /// Such location is built using the storage name and the metric + /// key/name (if available). + /// + /// # Arguments + /// + /// * `storage_name` - the name of the storage to store/fetch data from. + /// * `metric_key` - the optional metric key/name. + /// + /// # Returns + /// + /// A string representing the location in the database. + fn get_storage_key(storage_name: &str, metric_key: Option<&str>) -> String { + match metric_key { + Some(k) => format!("{}#{}", storage_name, k), + None => format!("{}#", storage_name), + } + } + + /// Loads Lifetime::Ping data from rkv to memory, + /// if `delay_ping_lifetime_io` is set to true. + /// + /// Does nothing if it isn't or if there is not data to load. + fn load_ping_lifetime_data(&self) { + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + let mut data = ping_lifetime_data + .write() + .expect("Can't read ping lifetime data"); + + let reader = unwrap_or!(self.rkv.read(), return); + let store = self.get_store(Lifetime::Ping); + let mut iter = unwrap_or!(store.iter_start(&reader), return); + + while let Some(Ok((metric_id, value))) = iter.next() { + let metric_id = match str::from_utf8(metric_id) { + Ok(metric_id) => metric_id.to_string(), + _ => continue, + }; + let metric: Metric = match value { + rkv::Value::Blob(blob) => unwrap_or!(bincode::deserialize(blob), continue), + _ => continue, + }; + + data.insert(metric_id, metric); + } + } + } + + /// Iterates with the provided transaction function + /// over the requested data from the given storage. + /// + /// * If the storage is unavailable, the transaction function is never invoked. + /// * If the read data cannot be deserialized it will be silently skipped. + /// + /// # Arguments + /// + /// * `lifetime` - The metric lifetime to iterate over. + /// * `storage_name` - The storage name to iterate over. + /// * `metric_key` - The metric key to iterate over. All metrics iterated over + /// will have this prefix. For example, if `metric_key` is of the form `{category}.`, + /// it will iterate over all metrics in the given category. If the `metric_key` is of the + /// form `{category}.{name}/`, the iterator will iterate over all specific metrics for + /// a given labeled metric. If not provided, the entire storage for the given lifetime + /// will be iterated over. + /// * `transaction_fn` - Called for each entry being iterated over. It is + /// passed two arguments: `(metric_id: &[u8], metric: &Metric)`. + /// + /// # Panics + /// + /// This function will **not** panic on database errors. + pub fn iter_store_from<F>( + &self, + lifetime: Lifetime, + storage_name: &str, + metric_key: Option<&str>, + mut transaction_fn: F, + ) where + F: FnMut(&[u8], &Metric), + { + let iter_start = Self::get_storage_key(storage_name, metric_key); + let len = iter_start.len(); + + // Lifetime::Ping data is not immediately persisted to disk if + // Glean has `delay_ping_lifetime_io` set to true + if lifetime == Lifetime::Ping { + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + let data = ping_lifetime_data + .read() + .expect("Can't read ping lifetime data"); + for (key, value) in data.iter() { + if key.starts_with(&iter_start) { + let key = &key[len..]; + transaction_fn(key.as_bytes(), value); + } + } + return; + } + } + + let reader = unwrap_or!(self.rkv.read(), return); + let mut iter = unwrap_or!( + self.get_store(lifetime).iter_from(&reader, &iter_start), + return + ); + + while let Some(Ok((metric_id, value))) = iter.next() { + if !metric_id.starts_with(iter_start.as_bytes()) { + break; + } + + let metric_id = &metric_id[len..]; + let metric: Metric = match value { + rkv::Value::Blob(blob) => unwrap_or!(bincode::deserialize(blob), continue), + _ => continue, + }; + transaction_fn(metric_id, &metric); + } + } + + /// Determines if the storage has the given metric. + /// + /// If data cannot be read it is assumed that the storage does not have the metric. + /// + /// # Arguments + /// + /// * `lifetime` - The lifetime of the metric. + /// * `storage_name` - The storage name to look in. + /// * `metric_identifier` - The metric identifier. + /// + /// # Panics + /// + /// This function will **not** panic on database errors. + pub fn has_metric( + &self, + lifetime: Lifetime, + storage_name: &str, + metric_identifier: &str, + ) -> bool { + let key = Self::get_storage_key(storage_name, Some(metric_identifier)); + + // Lifetime::Ping data is not persisted to disk if + // Glean has `delay_ping_lifetime_io` set to true + if lifetime == Lifetime::Ping { + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + return ping_lifetime_data + .read() + .map(|data| data.contains_key(&key)) + .unwrap_or(false); + } + } + + let reader = unwrap_or!(self.rkv.read(), return false); + self.get_store(lifetime) + .get(&reader, &key) + .unwrap_or(None) + .is_some() + } + + /// Writes to the specified storage with the provided transaction function. + /// + /// If the storage is unavailable, it will return an error. + /// + /// # Panics + /// + /// * This function will **not** panic on database errors. + fn write_with_store<F>(&self, store_name: Lifetime, mut transaction_fn: F) -> Result<()> + where + F: FnMut(Writer, &SingleStore) -> Result<()>, + { + let writer = self.rkv.write().unwrap(); + let store = self.get_store(store_name); + transaction_fn(writer, store) + } + + /// Records a metric in the underlying storage system. + pub fn record(&self, glean: &Glean, data: &CommonMetricDataInternal, value: &Metric) { + // If upload is disabled we don't want to record. + if !glean.is_upload_enabled() { + return; + } + + let name = data.identifier(glean); + + for ping_name in data.storage_names() { + if let Err(e) = self.record_per_lifetime(data.inner.lifetime, ping_name, &name, value) { + log::error!( + "Failed to record metric '{}' into {}: {:?}", + data.base_identifier(), + ping_name, + e + ); + } + } + } + + /// Records a metric in the underlying storage system, for a single lifetime. + /// + /// # Returns + /// + /// If the storage is unavailable or the write fails, no data will be stored and an error will be returned. + /// + /// Otherwise `Ok(())` is returned. + /// + /// # Panics + /// + /// This function will **not** panic on database errors. + fn record_per_lifetime( + &self, + lifetime: Lifetime, + storage_name: &str, + key: &str, + metric: &Metric, + ) -> Result<()> { + let final_key = Self::get_storage_key(storage_name, Some(key)); + + // Lifetime::Ping data is not immediately persisted to disk if + // Glean has `delay_ping_lifetime_io` set to true + if lifetime == Lifetime::Ping { + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + let mut data = ping_lifetime_data + .write() + .expect("Can't read ping lifetime data"); + data.insert(final_key, metric.clone()); + return Ok(()); + } + } + + let encoded = bincode::serialize(&metric).expect("IMPOSSIBLE: Serializing metric failed"); + let value = rkv::Value::Blob(&encoded); + + let mut writer = self.rkv.write()?; + self.get_store(lifetime) + .put(&mut writer, final_key, &value)?; + writer.commit()?; + Ok(()) + } + + /// Records the provided value, with the given lifetime, + /// after applying a transformation function. + pub fn record_with<F>(&self, glean: &Glean, data: &CommonMetricDataInternal, mut transform: F) + where + F: FnMut(Option<Metric>) -> Metric, + { + // If upload is disabled we don't want to record. + if !glean.is_upload_enabled() { + return; + } + + let name = data.identifier(glean); + for ping_name in data.storage_names() { + if let Err(e) = + self.record_per_lifetime_with(data.inner.lifetime, ping_name, &name, &mut transform) + { + log::error!( + "Failed to record metric '{}' into {}: {:?}", + data.base_identifier(), + ping_name, + e + ); + } + } + } + + /// Records a metric in the underlying storage system, + /// after applying the given transformation function, for a single lifetime. + /// + /// # Returns + /// + /// If the storage is unavailable or the write fails, no data will be stored and an error will be returned. + /// + /// Otherwise `Ok(())` is returned. + /// + /// # Panics + /// + /// This function will **not** panic on database errors. + fn record_per_lifetime_with<F>( + &self, + lifetime: Lifetime, + storage_name: &str, + key: &str, + mut transform: F, + ) -> Result<()> + where + F: FnMut(Option<Metric>) -> Metric, + { + let final_key = Self::get_storage_key(storage_name, Some(key)); + + // Lifetime::Ping data is not persisted to disk if + // Glean has `delay_ping_lifetime_io` set to true + if lifetime == Lifetime::Ping { + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + let mut data = ping_lifetime_data + .write() + .expect("Can't access ping lifetime data as writable"); + let entry = data.entry(final_key); + match entry { + Entry::Vacant(entry) => { + entry.insert(transform(None)); + } + Entry::Occupied(mut entry) => { + let old_value = entry.get().clone(); + entry.insert(transform(Some(old_value))); + } + } + return Ok(()); + } + } + + let mut writer = self.rkv.write()?; + let store = self.get_store(lifetime); + let new_value: Metric = { + let old_value = store.get(&writer, &final_key)?; + + match old_value { + Some(rkv::Value::Blob(blob)) => { + let old_value = bincode::deserialize(blob).ok(); + transform(old_value) + } + _ => transform(None), + } + }; + + let encoded = + bincode::serialize(&new_value).expect("IMPOSSIBLE: Serializing metric failed"); + let value = rkv::Value::Blob(&encoded); + store.put(&mut writer, final_key, &value)?; + writer.commit()?; + Ok(()) + } + + /// Clears a storage (only Ping Lifetime). + /// + /// # Returns + /// + /// * If the storage is unavailable an error is returned. + /// * If any individual delete fails, an error is returned, but other deletions might have + /// happened. + /// + /// Otherwise `Ok(())` is returned. + /// + /// # Panics + /// + /// This function will **not** panic on database errors. + pub fn clear_ping_lifetime_storage(&self, storage_name: &str) -> Result<()> { + // Lifetime::Ping data will be saved to `ping_lifetime_data` + // in case `delay_ping_lifetime_io` is set to true + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + ping_lifetime_data + .write() + .expect("Can't access ping lifetime data as writable") + .retain(|metric_id, _| !metric_id.starts_with(storage_name)); + } + + self.write_with_store(Lifetime::Ping, |mut writer, store| { + let mut metrics = Vec::new(); + { + let mut iter = store.iter_from(&writer, storage_name)?; + while let Some(Ok((metric_id, _))) = iter.next() { + if let Ok(metric_id) = std::str::from_utf8(metric_id) { + if !metric_id.starts_with(storage_name) { + break; + } + metrics.push(metric_id.to_owned()); + } + } + } + + let mut res = Ok(()); + for to_delete in metrics { + if let Err(e) = store.delete(&mut writer, to_delete) { + log::warn!("Can't delete from store: {:?}", e); + res = Err(e); + } + } + + writer.commit()?; + Ok(res?) + }) + } + + /// Removes a single metric from the storage. + /// + /// # Arguments + /// + /// * `lifetime` - the lifetime of the storage in which to look for the metric. + /// * `storage_name` - the name of the storage to store/fetch data from. + /// * `metric_id` - the metric category + name. + /// + /// # Returns + /// + /// * If the storage is unavailable an error is returned. + /// * If the metric could not be deleted, an error is returned. + /// + /// Otherwise `Ok(())` is returned. + /// + /// # Panics + /// + /// This function will **not** panic on database errors. + pub fn remove_single_metric( + &self, + lifetime: Lifetime, + storage_name: &str, + metric_id: &str, + ) -> Result<()> { + let final_key = Self::get_storage_key(storage_name, Some(metric_id)); + + // Lifetime::Ping data is not persisted to disk if + // Glean has `delay_ping_lifetime_io` set to true + if lifetime == Lifetime::Ping { + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + let mut data = ping_lifetime_data + .write() + .expect("Can't access app lifetime data as writable"); + data.remove(&final_key); + } + } + + self.write_with_store(lifetime, |mut writer, store| { + if let Err(e) = store.delete(&mut writer, final_key.clone()) { + if self.ping_lifetime_data.is_some() { + // If ping_lifetime_data exists, it might be + // that data is in memory, but not yet in rkv. + return Ok(()); + } + return Err(e.into()); + } + writer.commit()?; + Ok(()) + }) + } + + /// Clears all the metrics in the database, for the provided lifetime. + /// + /// Errors are logged. + /// + /// # Panics + /// + /// * This function will **not** panic on database errors. + pub fn clear_lifetime(&self, lifetime: Lifetime) { + let res = self.write_with_store(lifetime, |mut writer, store| { + store.clear(&mut writer)?; + writer.commit()?; + Ok(()) + }); + + if let Err(e) = res { + // We try to clear everything. + // If there was no data to begin with we encounter a `NotFound` error. + // There's no point in logging that. + if let ErrorKind::Rkv(StoreError::IoError(ioerr)) = e.kind() { + if let io::ErrorKind::NotFound = ioerr.kind() { + log::debug!( + "Could not clear store for lifetime {:?}: {:?}", + lifetime, + ioerr + ); + return; + } + } + + log::warn!("Could not clear store for lifetime {:?}: {:?}", lifetime, e); + } + } + + /// Clears all metrics in the database. + /// + /// Errors are logged. + /// + /// # Panics + /// + /// * This function will **not** panic on database errors. + pub fn clear_all(&self) { + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + ping_lifetime_data + .write() + .expect("Can't access ping lifetime data as writable") + .clear(); + } + + for lifetime in [Lifetime::User, Lifetime::Ping, Lifetime::Application].iter() { + self.clear_lifetime(*lifetime); + } + } + + /// Persists ping_lifetime_data to disk. + /// + /// Does nothing in case there is nothing to persist. + /// + /// # Panics + /// + /// * This function will **not** panic on database errors. + pub fn persist_ping_lifetime_data(&self) -> Result<()> { + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + let data = ping_lifetime_data + .read() + .expect("Can't read ping lifetime data"); + + self.write_with_store(Lifetime::Ping, |mut writer, store| { + for (key, value) in data.iter() { + let encoded = + bincode::serialize(&value).expect("IMPOSSIBLE: Serializing metric failed"); + // There is no need for `get_storage_key` here because + // the key is already formatted from when it was saved + // to ping_lifetime_data. + store.put(&mut writer, key, &rkv::Value::Blob(&encoded))?; + } + writer.commit()?; + Ok(()) + })?; + } + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::tests::new_glean; + use std::collections::HashMap; + use std::path::Path; + use tempfile::tempdir; + + #[test] + fn test_panicks_if_fails_dir_creation() { + let path = Path::new("/!#\"'@#°ç"); + assert!(Database::new(path, false).is_err()); + } + + #[test] + #[cfg(windows)] + fn windows_invalid_utf16_panicfree() { + use std::ffi::OsString; + use std::os::windows::prelude::*; + + // Here the values 0x0066 and 0x006f correspond to 'f' and 'o' + // respectively. The value 0xD800 is a lone surrogate half, invalid + // in a UTF-16 sequence. + let source = [0x0066, 0x006f, 0xD800, 0x006f]; + let os_string = OsString::from_wide(&source[..]); + let os_str = os_string.as_os_str(); + let dir = tempdir().unwrap(); + let path = dir.path().join(os_str); + + let res = Database::new(&path, false); + + assert!( + res.is_ok(), + "Database should succeed at {}: {:?}", + path.display(), + res + ); + } + + #[test] + #[cfg(target_os = "linux")] + fn linux_invalid_utf8_panicfree() { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + + // Here, the values 0x66 and 0x6f correspond to 'f' and 'o' + // respectively. The value 0x80 is a lone continuation byte, invalid + // in a UTF-8 sequence. + let source = [0x66, 0x6f, 0x80, 0x6f]; + let os_str = OsStr::from_bytes(&source[..]); + let dir = tempdir().unwrap(); + let path = dir.path().join(os_str); + + let res = Database::new(&path, false); + assert!( + res.is_ok(), + "Database should not fail at {}: {:?}", + path.display(), + res + ); + } + + #[test] + #[cfg(target_os = "macos")] + fn macos_invalid_utf8_panicfree() { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + + // Here, the values 0x66 and 0x6f correspond to 'f' and 'o' + // respectively. The value 0x80 is a lone continuation byte, invalid + // in a UTF-8 sequence. + let source = [0x66, 0x6f, 0x80, 0x6f]; + let os_str = OsStr::from_bytes(&source[..]); + let dir = tempdir().unwrap(); + let path = dir.path().join(os_str); + + let res = Database::new(&path, false); + assert!( + res.is_err(), + "Database should not fail at {}: {:?}", + path.display(), + res + ); + } + + #[test] + fn test_data_dir_rkv_inits() { + let dir = tempdir().unwrap(); + Database::new(dir.path(), false).unwrap(); + + assert!(dir.path().exists()); + } + + #[test] + fn test_ping_lifetime_metric_recorded() { + // Init the database in a temporary directory. + let dir = tempdir().unwrap(); + let db = Database::new(dir.path(), false).unwrap(); + + assert!(db.ping_lifetime_data.is_none()); + + // Attempt to record a known value. + let test_value = "test-value"; + let test_storage = "test-storage"; + let test_metric_id = "telemetry_test.test_name"; + db.record_per_lifetime( + Lifetime::Ping, + test_storage, + test_metric_id, + &Metric::String(test_value.to_string()), + ) + .unwrap(); + + // Verify that the data is correctly recorded. + let mut found_metrics = 0; + let mut snapshotter = |metric_id: &[u8], metric: &Metric| { + found_metrics += 1; + let metric_id = String::from_utf8_lossy(metric_id).into_owned(); + assert_eq!(test_metric_id, metric_id); + match metric { + Metric::String(s) => assert_eq!(test_value, s), + _ => panic!("Unexpected data found"), + } + }; + + db.iter_store_from(Lifetime::Ping, test_storage, None, &mut snapshotter); + assert_eq!(1, found_metrics, "We only expect 1 Lifetime.Ping metric."); + } + + #[test] + fn test_application_lifetime_metric_recorded() { + // Init the database in a temporary directory. + let dir = tempdir().unwrap(); + let db = Database::new(dir.path(), false).unwrap(); + + // Attempt to record a known value. + let test_value = "test-value"; + let test_storage = "test-storage1"; + let test_metric_id = "telemetry_test.test_name"; + db.record_per_lifetime( + Lifetime::Application, + test_storage, + test_metric_id, + &Metric::String(test_value.to_string()), + ) + .unwrap(); + + // Verify that the data is correctly recorded. + let mut found_metrics = 0; + let mut snapshotter = |metric_id: &[u8], metric: &Metric| { + found_metrics += 1; + let metric_id = String::from_utf8_lossy(metric_id).into_owned(); + assert_eq!(test_metric_id, metric_id); + match metric { + Metric::String(s) => assert_eq!(test_value, s), + _ => panic!("Unexpected data found"), + } + }; + + db.iter_store_from(Lifetime::Application, test_storage, None, &mut snapshotter); + assert_eq!( + 1, found_metrics, + "We only expect 1 Lifetime.Application metric." + ); + } + + #[test] + fn test_user_lifetime_metric_recorded() { + // Init the database in a temporary directory. + let dir = tempdir().unwrap(); + let db = Database::new(dir.path(), false).unwrap(); + + // Attempt to record a known value. + let test_value = "test-value"; + let test_storage = "test-storage2"; + let test_metric_id = "telemetry_test.test_name"; + db.record_per_lifetime( + Lifetime::User, + test_storage, + test_metric_id, + &Metric::String(test_value.to_string()), + ) + .unwrap(); + + // Verify that the data is correctly recorded. + let mut found_metrics = 0; + let mut snapshotter = |metric_id: &[u8], metric: &Metric| { + found_metrics += 1; + let metric_id = String::from_utf8_lossy(metric_id).into_owned(); + assert_eq!(test_metric_id, metric_id); + match metric { + Metric::String(s) => assert_eq!(test_value, s), + _ => panic!("Unexpected data found"), + } + }; + + db.iter_store_from(Lifetime::User, test_storage, None, &mut snapshotter); + assert_eq!(1, found_metrics, "We only expect 1 Lifetime.User metric."); + } + + #[test] + fn test_clear_ping_storage() { + // Init the database in a temporary directory. + let dir = tempdir().unwrap(); + let db = Database::new(dir.path(), false).unwrap(); + + // Attempt to record a known value for every single lifetime. + let test_storage = "test-storage"; + db.record_per_lifetime( + Lifetime::User, + test_storage, + "telemetry_test.test_name_user", + &Metric::String("test-value-user".to_string()), + ) + .unwrap(); + db.record_per_lifetime( + Lifetime::Ping, + test_storage, + "telemetry_test.test_name_ping", + &Metric::String("test-value-ping".to_string()), + ) + .unwrap(); + db.record_per_lifetime( + Lifetime::Application, + test_storage, + "telemetry_test.test_name_application", + &Metric::String("test-value-application".to_string()), + ) + .unwrap(); + + // Take a snapshot for the data, all the lifetimes. + { + let mut snapshot: HashMap<String, String> = HashMap::new(); + let mut snapshotter = |metric_id: &[u8], metric: &Metric| { + let metric_id = String::from_utf8_lossy(metric_id).into_owned(); + match metric { + Metric::String(s) => snapshot.insert(metric_id, s.to_string()), + _ => panic!("Unexpected data found"), + }; + }; + + db.iter_store_from(Lifetime::User, test_storage, None, &mut snapshotter); + db.iter_store_from(Lifetime::Ping, test_storage, None, &mut snapshotter); + db.iter_store_from(Lifetime::Application, test_storage, None, &mut snapshotter); + + assert_eq!(3, snapshot.len(), "We expect all lifetimes to be present."); + assert!(snapshot.contains_key("telemetry_test.test_name_user")); + assert!(snapshot.contains_key("telemetry_test.test_name_ping")); + assert!(snapshot.contains_key("telemetry_test.test_name_application")); + } + + // Clear the Ping lifetime. + db.clear_ping_lifetime_storage(test_storage).unwrap(); + + // Take a snapshot again and check that we're only clearing the Ping lifetime. + { + let mut snapshot: HashMap<String, String> = HashMap::new(); + let mut snapshotter = |metric_id: &[u8], metric: &Metric| { + let metric_id = String::from_utf8_lossy(metric_id).into_owned(); + match metric { + Metric::String(s) => snapshot.insert(metric_id, s.to_string()), + _ => panic!("Unexpected data found"), + }; + }; + + db.iter_store_from(Lifetime::User, test_storage, None, &mut snapshotter); + db.iter_store_from(Lifetime::Ping, test_storage, None, &mut snapshotter); + db.iter_store_from(Lifetime::Application, test_storage, None, &mut snapshotter); + + assert_eq!(2, snapshot.len(), "We only expect 2 metrics to be left."); + assert!(snapshot.contains_key("telemetry_test.test_name_user")); + assert!(snapshot.contains_key("telemetry_test.test_name_application")); + } + } + + #[test] + fn test_remove_single_metric() { + // Init the database in a temporary directory. + let dir = tempdir().unwrap(); + let db = Database::new(dir.path(), false).unwrap(); + + let test_storage = "test-storage-single-lifetime"; + let metric_id_pattern = "telemetry_test.single_metric"; + + // Write sample metrics to the database. + let lifetimes = vec![Lifetime::User, Lifetime::Ping, Lifetime::Application]; + + for lifetime in lifetimes.iter() { + for value in &["retain", "delete"] { + db.record_per_lifetime( + *lifetime, + test_storage, + &format!("{}_{}", metric_id_pattern, value), + &Metric::String((*value).to_string()), + ) + .unwrap(); + } + } + + // Remove "telemetry_test.single_metric_delete" from each lifetime. + for lifetime in lifetimes.iter() { + db.remove_single_metric( + *lifetime, + test_storage, + &format!("{}_delete", metric_id_pattern), + ) + .unwrap(); + } + + // Verify that "telemetry_test.single_metric_retain" is still around for all lifetimes. + for lifetime in lifetimes.iter() { + let mut found_metrics = 0; + let mut snapshotter = |metric_id: &[u8], metric: &Metric| { + found_metrics += 1; + let metric_id = String::from_utf8_lossy(metric_id).into_owned(); + assert_eq!(format!("{}_retain", metric_id_pattern), metric_id); + match metric { + Metric::String(s) => assert_eq!("retain", s), + _ => panic!("Unexpected data found"), + } + }; + + // Check the User lifetime. + db.iter_store_from(*lifetime, test_storage, None, &mut snapshotter); + assert_eq!( + 1, found_metrics, + "We only expect 1 metric for this lifetime." + ); + } + } + + #[test] + fn test_delayed_ping_lifetime_persistence() { + // Init the database in a temporary directory. + let dir = tempdir().unwrap(); + let db = Database::new(dir.path(), true).unwrap(); + let test_storage = "test-storage"; + + assert!(db.ping_lifetime_data.is_some()); + + // Attempt to record a known value. + let test_value1 = "test-value1"; + let test_metric_id1 = "telemetry_test.test_name1"; + db.record_per_lifetime( + Lifetime::Ping, + test_storage, + test_metric_id1, + &Metric::String(test_value1.to_string()), + ) + .unwrap(); + + // Attempt to persist data. + db.persist_ping_lifetime_data().unwrap(); + + // Attempt to record another known value. + let test_value2 = "test-value2"; + let test_metric_id2 = "telemetry_test.test_name2"; + db.record_per_lifetime( + Lifetime::Ping, + test_storage, + test_metric_id2, + &Metric::String(test_value2.to_string()), + ) + .unwrap(); + + { + // At this stage we expect `test_value1` to be persisted and in memory, + // since it was recorded before calling `persist_ping_lifetime_data`, + // and `test_value2` to be only in memory, since it was recorded after. + let store: SingleStore = db + .rkv + .open_single(Lifetime::Ping.as_str(), StoreOptions::create()) + .unwrap(); + let reader = db.rkv.read().unwrap(); + + // Verify that test_value1 is in rkv. + assert!(store + .get(&reader, format!("{}#{}", test_storage, test_metric_id1)) + .unwrap_or(None) + .is_some()); + // Verifiy that test_value2 is **not** in rkv. + assert!(store + .get(&reader, format!("{}#{}", test_storage, test_metric_id2)) + .unwrap_or(None) + .is_none()); + + let data = match &db.ping_lifetime_data { + Some(ping_lifetime_data) => ping_lifetime_data, + None => panic!("Expected `ping_lifetime_data` to exist here!"), + }; + let data = data.read().unwrap(); + // Verify that test_value1 is also in memory. + assert!(data + .get(&format!("{}#{}", test_storage, test_metric_id1)) + .is_some()); + // Verify that test_value2 is in memory. + assert!(data + .get(&format!("{}#{}", test_storage, test_metric_id2)) + .is_some()); + } + + // Attempt to persist data again. + db.persist_ping_lifetime_data().unwrap(); + + { + // At this stage we expect `test_value1` and `test_value2` to + // be persisted, since both were created before a call to + // `persist_ping_lifetime_data`. + let store: SingleStore = db + .rkv + .open_single(Lifetime::Ping.as_str(), StoreOptions::create()) + .unwrap(); + let reader = db.rkv.read().unwrap(); + + // Verify that test_value1 is in rkv. + assert!(store + .get(&reader, format!("{}#{}", test_storage, test_metric_id1)) + .unwrap_or(None) + .is_some()); + // Verifiy that test_value2 is also in rkv. + assert!(store + .get(&reader, format!("{}#{}", test_storage, test_metric_id2)) + .unwrap_or(None) + .is_some()); + + let data = match &db.ping_lifetime_data { + Some(ping_lifetime_data) => ping_lifetime_data, + None => panic!("Expected `ping_lifetime_data` to exist here!"), + }; + let data = data.read().unwrap(); + // Verify that test_value1 is also in memory. + assert!(data + .get(&format!("{}#{}", test_storage, test_metric_id1)) + .is_some()); + // Verify that test_value2 is also in memory. + assert!(data + .get(&format!("{}#{}", test_storage, test_metric_id2)) + .is_some()); + } + } + + #[test] + fn test_load_ping_lifetime_data_from_memory() { + // Init the database in a temporary directory. + let dir = tempdir().unwrap(); + + let test_storage = "test-storage"; + let test_value = "test-value"; + let test_metric_id = "telemetry_test.test_name"; + + { + let db = Database::new(dir.path(), true).unwrap(); + + // Attempt to record a known value. + db.record_per_lifetime( + Lifetime::Ping, + test_storage, + test_metric_id, + &Metric::String(test_value.to_string()), + ) + .unwrap(); + + // Verify that test_value is in memory. + let data = match &db.ping_lifetime_data { + Some(ping_lifetime_data) => ping_lifetime_data, + None => panic!("Expected `ping_lifetime_data` to exist here!"), + }; + let data = data.read().unwrap(); + assert!(data + .get(&format!("{}#{}", test_storage, test_metric_id)) + .is_some()); + + // Attempt to persist data. + db.persist_ping_lifetime_data().unwrap(); + + // Verify that test_value is now in rkv. + let store: SingleStore = db + .rkv + .open_single(Lifetime::Ping.as_str(), StoreOptions::create()) + .unwrap(); + let reader = db.rkv.read().unwrap(); + assert!(store + .get(&reader, format!("{}#{}", test_storage, test_metric_id)) + .unwrap_or(None) + .is_some()); + } + + // Now create a new instace of the db and check if data was + // correctly loaded from rkv to memory. + { + let db = Database::new(dir.path(), true).unwrap(); + + // Verify that test_value is in memory. + let data = match &db.ping_lifetime_data { + Some(ping_lifetime_data) => ping_lifetime_data, + None => panic!("Expected `ping_lifetime_data` to exist here!"), + }; + let data = data.read().unwrap(); + assert!(data + .get(&format!("{}#{}", test_storage, test_metric_id)) + .is_some()); + + // Verify that test_value is also in rkv. + let store: SingleStore = db + .rkv + .open_single(Lifetime::Ping.as_str(), StoreOptions::create()) + .unwrap(); + let reader = db.rkv.read().unwrap(); + assert!(store + .get(&reader, format!("{}#{}", test_storage, test_metric_id)) + .unwrap_or(None) + .is_some()); + } + } + + #[test] + fn test_delayed_ping_lifetime_clear() { + // Init the database in a temporary directory. + let dir = tempdir().unwrap(); + let db = Database::new(dir.path(), true).unwrap(); + let test_storage = "test-storage"; + + assert!(db.ping_lifetime_data.is_some()); + + // Attempt to record a known value. + let test_value1 = "test-value1"; + let test_metric_id1 = "telemetry_test.test_name1"; + db.record_per_lifetime( + Lifetime::Ping, + test_storage, + test_metric_id1, + &Metric::String(test_value1.to_string()), + ) + .unwrap(); + + { + let data = match &db.ping_lifetime_data { + Some(ping_lifetime_data) => ping_lifetime_data, + None => panic!("Expected `ping_lifetime_data` to exist here!"), + }; + let data = data.read().unwrap(); + // Verify that test_value1 is in memory. + assert!(data + .get(&format!("{}#{}", test_storage, test_metric_id1)) + .is_some()); + } + + // Clear ping lifetime storage for a storage that isn't test_storage. + // Doesn't matter what it's called, just that it isn't test_storage. + db.clear_ping_lifetime_storage(&(test_storage.to_owned() + "x")) + .unwrap(); + + { + let data = match &db.ping_lifetime_data { + Some(ping_lifetime_data) => ping_lifetime_data, + None => panic!("Expected `ping_lifetime_data` to exist here!"), + }; + let data = data.read().unwrap(); + // Verify that test_value1 is still in memory. + assert!(data + .get(&format!("{}#{}", test_storage, test_metric_id1)) + .is_some()); + } + + // Clear test_storage's ping lifetime storage. + db.clear_ping_lifetime_storage(test_storage).unwrap(); + + { + let data = match &db.ping_lifetime_data { + Some(ping_lifetime_data) => ping_lifetime_data, + None => panic!("Expected `ping_lifetime_data` to exist here!"), + }; + let data = data.read().unwrap(); + // Verify that test_value1 is no longer in memory. + assert!(data + .get(&format!("{}#{}", test_storage, test_metric_id1)) + .is_none()); + } + } + + #[test] + fn doesnt_record_when_upload_is_disabled() { + let (mut glean, dir) = new_glean(None); + + // Init the database in a temporary directory. + + let test_storage = "test-storage"; + let test_data = CommonMetricDataInternal::new("category", "name", test_storage); + let test_metric_id = test_data.identifier(&glean); + + // Attempt to record metric with the record and record_with functions, + // this should work since upload is enabled. + let db = Database::new(dir.path(), true).unwrap(); + db.record(&glean, &test_data, &Metric::String("record".to_owned())); + db.iter_store_from( + Lifetime::Ping, + test_storage, + None, + &mut |metric_id: &[u8], metric: &Metric| { + assert_eq!( + String::from_utf8_lossy(metric_id).into_owned(), + test_metric_id + ); + match metric { + Metric::String(v) => assert_eq!("record", *v), + _ => panic!("Unexpected data found"), + } + }, + ); + + db.record_with(&glean, &test_data, |_| { + Metric::String("record_with".to_owned()) + }); + db.iter_store_from( + Lifetime::Ping, + test_storage, + None, + &mut |metric_id: &[u8], metric: &Metric| { + assert_eq!( + String::from_utf8_lossy(metric_id).into_owned(), + test_metric_id + ); + match metric { + Metric::String(v) => assert_eq!("record_with", *v), + _ => panic!("Unexpected data found"), + } + }, + ); + + // Disable upload + glean.set_upload_enabled(false); + + // Attempt to record metric with the record and record_with functions, + // this should work since upload is now **disabled**. + db.record(&glean, &test_data, &Metric::String("record_nop".to_owned())); + db.iter_store_from( + Lifetime::Ping, + test_storage, + None, + &mut |metric_id: &[u8], metric: &Metric| { + assert_eq!( + String::from_utf8_lossy(metric_id).into_owned(), + test_metric_id + ); + match metric { + Metric::String(v) => assert_eq!("record_with", *v), + _ => panic!("Unexpected data found"), + } + }, + ); + db.record_with(&glean, &test_data, |_| { + Metric::String("record_with_nop".to_owned()) + }); + db.iter_store_from( + Lifetime::Ping, + test_storage, + None, + &mut |metric_id: &[u8], metric: &Metric| { + assert_eq!( + String::from_utf8_lossy(metric_id).into_owned(), + test_metric_id + ); + match metric { + Metric::String(v) => assert_eq!("record_with", *v), + _ => panic!("Unexpected data found"), + } + }, + ); + } + + mod safe_mode { + use std::fs::File; + + use super::*; + use rkv::Value; + + #[test] + fn empty_data_file() { + let dir = tempdir().unwrap(); + + // Create database directory structure. + let database_dir = dir.path().join("db"); + fs::create_dir_all(&database_dir).expect("create database dir"); + + // Create empty database file. + let safebin = database_dir.join("data.safe.bin"); + let f = File::create(safebin).expect("create database file"); + drop(f); + + Database::new(dir.path(), false).unwrap(); + + assert!(dir.path().exists()); + } + + #[test] + fn corrupted_data_file() { + let dir = tempdir().unwrap(); + + // Create database directory structure. + let database_dir = dir.path().join("db"); + fs::create_dir_all(&database_dir).expect("create database dir"); + + // Create empty database file. + let safebin = database_dir.join("data.safe.bin"); + fs::write(safebin, "<broken>").expect("write to database file"); + + Database::new(dir.path(), false).unwrap(); + + assert!(dir.path().exists()); + } + + #[test] + fn migration_works_on_startup() { + let dir = tempdir().unwrap(); + + let database_dir = dir.path().join("db"); + let datamdb = database_dir.join("data.mdb"); + let lockmdb = database_dir.join("lock.mdb"); + let safebin = database_dir.join("data.safe.bin"); + + assert!(!safebin.exists()); + assert!(!datamdb.exists()); + assert!(!lockmdb.exists()); + + let store_name = "store1"; + let metric_name = "bool"; + let key = Database::get_storage_key(store_name, Some(metric_name)); + + // Ensure some old data in the LMDB format exists. + { + fs::create_dir_all(&database_dir).expect("create dir"); + let rkv_db = rkv::Rkv::new::<rkv::backend::Lmdb>(&database_dir).expect("rkv env"); + + let store = rkv_db + .open_single("ping", StoreOptions::create()) + .expect("opened"); + let mut writer = rkv_db.write().expect("writer"); + let metric = Metric::Boolean(true); + let value = bincode::serialize(&metric).expect("serialized"); + store + .put(&mut writer, &key, &Value::Blob(&value)) + .expect("wrote"); + writer.commit().expect("committed"); + + assert!(datamdb.exists()); + assert!(lockmdb.exists()); + assert!(!safebin.exists()); + } + + // First open should migrate the data. + { + let db = Database::new(dir.path(), false).unwrap(); + let safebin = database_dir.join("data.safe.bin"); + assert!(safebin.exists(), "safe-mode file should exist"); + assert!(!datamdb.exists(), "LMDB data should be deleted"); + assert!(!lockmdb.exists(), "LMDB lock should be deleted"); + + let mut stored_metrics = vec![]; + let mut snapshotter = |name: &[u8], metric: &Metric| { + let name = str::from_utf8(name).unwrap().to_string(); + stored_metrics.push((name, metric.clone())) + }; + db.iter_store_from(Lifetime::Ping, "store1", None, &mut snapshotter); + + assert_eq!(1, stored_metrics.len()); + assert_eq!(metric_name, stored_metrics[0].0); + assert_eq!(&Metric::Boolean(true), &stored_metrics[0].1); + } + + // Next open should not re-create the LMDB files. + { + let db = Database::new(dir.path(), false).unwrap(); + let safebin = database_dir.join("data.safe.bin"); + assert!(safebin.exists(), "safe-mode file exists"); + assert!(!datamdb.exists(), "LMDB data should not be recreated"); + assert!(!lockmdb.exists(), "LMDB lock should not be recreated"); + + let mut stored_metrics = vec![]; + let mut snapshotter = |name: &[u8], metric: &Metric| { + let name = str::from_utf8(name).unwrap().to_string(); + stored_metrics.push((name, metric.clone())) + }; + db.iter_store_from(Lifetime::Ping, "store1", None, &mut snapshotter); + + assert_eq!(1, stored_metrics.len()); + assert_eq!(metric_name, stored_metrics[0].0); + assert_eq!(&Metric::Boolean(true), &stored_metrics[0].1); + } + } + + #[test] + fn migration_doesnt_overwrite() { + let dir = tempdir().unwrap(); + + let database_dir = dir.path().join("db"); + let datamdb = database_dir.join("data.mdb"); + let lockmdb = database_dir.join("lock.mdb"); + let safebin = database_dir.join("data.safe.bin"); + + assert!(!safebin.exists()); + assert!(!datamdb.exists()); + assert!(!lockmdb.exists()); + + let store_name = "store1"; + let metric_name = "counter"; + let key = Database::get_storage_key(store_name, Some(metric_name)); + + // Ensure some old data in the LMDB format exists. + { + fs::create_dir_all(&database_dir).expect("create dir"); + let rkv_db = rkv::Rkv::new::<rkv::backend::Lmdb>(&database_dir).expect("rkv env"); + + let store = rkv_db + .open_single("ping", StoreOptions::create()) + .expect("opened"); + let mut writer = rkv_db.write().expect("writer"); + let metric = Metric::Counter(734); // this value will be ignored + let value = bincode::serialize(&metric).expect("serialized"); + store + .put(&mut writer, &key, &Value::Blob(&value)) + .expect("wrote"); + writer.commit().expect("committed"); + + assert!(datamdb.exists()); + assert!(lockmdb.exists()); + } + + // Ensure some data exists in the new database. + { + fs::create_dir_all(&database_dir).expect("create dir"); + let rkv_db = + rkv::Rkv::new::<rkv::backend::SafeMode>(&database_dir).expect("rkv env"); + + let store = rkv_db + .open_single("ping", StoreOptions::create()) + .expect("opened"); + let mut writer = rkv_db.write().expect("writer"); + let metric = Metric::Counter(2); + let value = bincode::serialize(&metric).expect("serialized"); + store + .put(&mut writer, &key, &Value::Blob(&value)) + .expect("wrote"); + writer.commit().expect("committed"); + + assert!(safebin.exists()); + } + + // First open should try migration and ignore it, because destination is not empty. + // It also deletes the leftover LMDB database. + { + let db = Database::new(dir.path(), false).unwrap(); + let safebin = database_dir.join("data.safe.bin"); + assert!(safebin.exists(), "safe-mode file should exist"); + assert!(!datamdb.exists(), "LMDB data should be deleted"); + assert!(!lockmdb.exists(), "LMDB lock should be deleted"); + + let mut stored_metrics = vec![]; + let mut snapshotter = |name: &[u8], metric: &Metric| { + let name = str::from_utf8(name).unwrap().to_string(); + stored_metrics.push((name, metric.clone())) + }; + db.iter_store_from(Lifetime::Ping, "store1", None, &mut snapshotter); + + assert_eq!(1, stored_metrics.len()); + assert_eq!(metric_name, stored_metrics[0].0); + assert_eq!(&Metric::Counter(2), &stored_metrics[0].1); + } + } + + #[test] + fn migration_ignores_broken_database() { + let dir = tempdir().unwrap(); + + let database_dir = dir.path().join("db"); + let datamdb = database_dir.join("data.mdb"); + let lockmdb = database_dir.join("lock.mdb"); + let safebin = database_dir.join("data.safe.bin"); + + assert!(!safebin.exists()); + assert!(!datamdb.exists()); + assert!(!lockmdb.exists()); + + let store_name = "store1"; + let metric_name = "counter"; + let key = Database::get_storage_key(store_name, Some(metric_name)); + + // Ensure some old data in the LMDB format exists. + { + fs::create_dir_all(&database_dir).expect("create dir"); + fs::write(&datamdb, "bogus").expect("dbfile created"); + + assert!(datamdb.exists()); + } + + // Ensure some data exists in the new database. + { + fs::create_dir_all(&database_dir).expect("create dir"); + let rkv_db = + rkv::Rkv::new::<rkv::backend::SafeMode>(&database_dir).expect("rkv env"); + + let store = rkv_db + .open_single("ping", StoreOptions::create()) + .expect("opened"); + let mut writer = rkv_db.write().expect("writer"); + let metric = Metric::Counter(2); + let value = bincode::serialize(&metric).expect("serialized"); + store + .put(&mut writer, &key, &Value::Blob(&value)) + .expect("wrote"); + writer.commit().expect("committed"); + } + + // First open should try migration and ignore it, because destination is not empty. + // It also deletes the leftover LMDB database. + { + let db = Database::new(dir.path(), false).unwrap(); + let safebin = database_dir.join("data.safe.bin"); + assert!(safebin.exists(), "safe-mode file should exist"); + assert!(!datamdb.exists(), "LMDB data should be deleted"); + assert!(!lockmdb.exists(), "LMDB lock should be deleted"); + + let mut stored_metrics = vec![]; + let mut snapshotter = |name: &[u8], metric: &Metric| { + let name = str::from_utf8(name).unwrap().to_string(); + stored_metrics.push((name, metric.clone())) + }; + db.iter_store_from(Lifetime::Ping, "store1", None, &mut snapshotter); + + assert_eq!(1, stored_metrics.len()); + assert_eq!(metric_name, stored_metrics[0].0); + assert_eq!(&Metric::Counter(2), &stored_metrics[0].1); + } + } + + #[test] + fn migration_ignores_empty_database() { + let dir = tempdir().unwrap(); + + let database_dir = dir.path().join("db"); + let datamdb = database_dir.join("data.mdb"); + let lockmdb = database_dir.join("lock.mdb"); + let safebin = database_dir.join("data.safe.bin"); + + assert!(!safebin.exists()); + assert!(!datamdb.exists()); + assert!(!lockmdb.exists()); + + // Ensure old LMDB database exists, but is empty. + { + fs::create_dir_all(&database_dir).expect("create dir"); + let rkv_db = rkv::Rkv::new::<rkv::backend::Lmdb>(&database_dir).expect("rkv env"); + drop(rkv_db); + assert!(datamdb.exists()); + assert!(lockmdb.exists()); + } + + // First open should try migration, but find no data. + // safe-mode does not write an empty database to disk. + // It also deletes the leftover LMDB database. + { + let _db = Database::new(dir.path(), false).unwrap(); + let safebin = database_dir.join("data.safe.bin"); + assert!(!safebin.exists(), "safe-mode file should exist"); + assert!(!datamdb.exists(), "LMDB data should be deleted"); + assert!(!lockmdb.exists(), "LMDB lock should be deleted"); + } + } + } +} diff --git a/third_party/rust/glean-core/src/debug.rs b/third_party/rust/glean-core/src/debug.rs new file mode 100644 index 0000000000..a572a02b8f --- /dev/null +++ b/third_party/rust/glean-core/src/debug.rs @@ -0,0 +1,319 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! # Debug options +//! +//! The debug options for Glean may be set by calling one of the `set_*` functions +//! or by setting specific environment variables. +//! +//! The environment variables will be read only once when the options are initialized. +//! +//! The possible debugging features available out of the box are: +//! +//! * **Ping logging** - logging the contents of ping requests that are correctly assembled; +//! This may be set by calling glean.set_log_pings(value: bool) +//! or by setting the environment variable GLEAN_LOG_PINGS="true"; +//! * **Debug tagging** - Adding the X-Debug-ID header to every ping request, +//! allowing these tagged pings to be sent to the ["Ping Debug Viewer"](https://mozilla.github.io/glean/book/dev/core/internal/debug-pings.html). +//! This may be set by calling glean.set_debug_view_tag(value: &str) +//! or by setting the environment variable GLEAN_DEBUG_VIEW_TAG=<some tag>; +//! * **Source tagging** - Adding the X-Source-Tags header to every ping request, +//! allowing pings to be tagged with custom labels. +//! This may be set by calling glean.set_source_tags(value: Vec<String>) +//! or by setting the environment variable GLEAN_SOURCE_TAGS=<some, tags>; +//! +//! Bindings may implement other debugging features, e.g. sending pings on demand. + +use std::env; + +const GLEAN_LOG_PINGS: &str = "GLEAN_LOG_PINGS"; +const GLEAN_DEBUG_VIEW_TAG: &str = "GLEAN_DEBUG_VIEW_TAG"; +const GLEAN_SOURCE_TAGS: &str = "GLEAN_SOURCE_TAGS"; +const GLEAN_MAX_SOURCE_TAGS: usize = 5; + +/// A representation of all of Glean's debug options. +pub struct DebugOptions { + /// Option to log the payload of pings that are successfully assembled into a ping request. + pub log_pings: DebugOption<bool>, + /// Option to add the X-Debug-ID header to every ping request. + pub debug_view_tag: DebugOption<String>, + /// Option to add the X-Source-Tags header to ping requests. This will allow the data + /// consumers to classify data depending on the applied tags. + pub source_tags: DebugOption<Vec<String>>, +} + +impl std::fmt::Debug for DebugOptions { + fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { + fmt.debug_struct("DebugOptions") + .field("log_pings", &self.log_pings.get()) + .field("debug_view_tag", &self.debug_view_tag.get()) + .field("source_tags", &self.source_tags.get()) + .finish() + } +} + +impl DebugOptions { + pub fn new() -> Self { + Self { + log_pings: DebugOption::new(GLEAN_LOG_PINGS, get_bool_from_str, None), + debug_view_tag: DebugOption::new(GLEAN_DEBUG_VIEW_TAG, Some, Some(validate_tag)), + source_tags: DebugOption::new( + GLEAN_SOURCE_TAGS, + tokenize_string, + Some(validate_source_tags), + ), + } + } +} + +/// A representation of a debug option, +/// where the value can be set programmatically or come from an environment variable. +#[derive(Debug)] +pub struct DebugOption<T, E = fn(String) -> Option<T>, V = fn(&T) -> bool> { + /// The name of the environment variable related to this debug option. + env: String, + /// The actual value of this option. + value: Option<T>, + /// Function to extract the data of type `T` from a `String`, used when + /// extracting data from the environment. + extraction: E, + /// Optional function to validate the value parsed from the environment + /// or passed to the `set` function. + validation: Option<V>, +} + +impl<T, E, V> DebugOption<T, E, V> +where + T: Clone, + E: Fn(String) -> Option<T>, + V: Fn(&T) -> bool, +{ + /// Creates a new debug option. + /// + /// Tries to get the initial value of the option from the environment. + pub fn new(env: &str, extraction: E, validation: Option<V>) -> Self { + let mut option = Self { + env: env.into(), + value: None, + extraction, + validation, + }; + + option.set_from_env(); + option + } + + fn validate(&self, value: &T) -> bool { + if let Some(f) = self.validation.as_ref() { + f(value) + } else { + true + } + } + + fn set_from_env(&mut self) { + let extract = &self.extraction; + match env::var(&self.env) { + Ok(env_value) => match extract(env_value.clone()) { + Some(v) => { + self.set(v); + } + None => { + log::error!( + "Unable to parse debug option {}={} into {}. Ignoring.", + self.env, + env_value, + std::any::type_name::<T>() + ); + } + }, + Err(env::VarError::NotUnicode(_)) => { + log::error!("The value of {} is not valid unicode. Ignoring.", self.env) + } + // The other possible error is that the env var is not set, + // which is not an error for us and can safely be ignored. + Err(_) => {} + } + } + + /// Tries to set a value for this debug option. + /// + /// Validates the value in case a validation function is available. + /// + /// # Returns + /// + /// Whether the option passed validation and was succesfully set. + pub fn set(&mut self, value: T) -> bool { + let validated = self.validate(&value); + if validated { + log::info!("Setting the debug option {}.", self.env); + self.value = Some(value); + return true; + } + log::error!("Invalid value for debug option {}.", self.env); + false + } + + /// Gets the value of this debug option. + pub fn get(&self) -> Option<&T> { + self.value.as_ref() + } +} + +fn get_bool_from_str(value: String) -> Option<bool> { + std::str::FromStr::from_str(&value).ok() +} + +fn tokenize_string(value: String) -> Option<Vec<String>> { + let trimmed = value.trim(); + if trimmed.is_empty() { + return None; + } + + Some(trimmed.split(',').map(|s| s.trim().to_string()).collect()) +} + +/// A tag is the value used in both the `X-Debug-ID` and `X-Source-Tags` headers +/// of tagged ping requests, thus is it must be a valid header value. +/// +/// In other words, it must match the regex: "[a-zA-Z0-9-]{1,20}" +/// +/// The regex crate isn't used here because it adds to the binary size, +/// and the Glean SDK doesn't use regular expressions anywhere else. +#[allow(clippy::ptr_arg)] +fn validate_tag(value: &String) -> bool { + if value.is_empty() { + log::error!("A tag must have at least one character."); + return false; + } + + let mut iter = value.chars(); + let mut count = 0; + + loop { + match iter.next() { + // We are done, so the whole expression is valid. + None => return true, + // Valid characters. + Some('-') | Some('a'..='z') | Some('A'..='Z') | Some('0'..='9') => (), + // An invalid character + Some(c) => { + log::error!("Invalid character '{}' in the tag.", c); + return false; + } + } + count += 1; + if count == 20 { + log::error!("A tag cannot exceed 20 characters."); + return false; + } + } +} + +/// Validate the list of source tags. +/// +/// This builds upon the existing `validate_tag` function, since all the +/// tags should respect the same rules to make the pipeline happy. +#[allow(clippy::ptr_arg)] +fn validate_source_tags(tags: &Vec<String>) -> bool { + if tags.is_empty() { + return false; + } + + if tags.len() > GLEAN_MAX_SOURCE_TAGS { + log::error!( + "A list of tags cannot contain more than {} elements.", + GLEAN_MAX_SOURCE_TAGS + ); + return false; + } + + if tags.iter().any(|s| s.starts_with("glean")) { + log::error!("Tags starting with `glean` are reserved and must not be used."); + return false; + } + + tags.iter().all(validate_tag) +} + +#[cfg(test)] +mod test { + use super::*; + use std::env; + + #[test] + fn debug_option_is_correctly_loaded_from_env() { + env::set_var("GLEAN_TEST_1", "test"); + let option: DebugOption<String> = DebugOption::new("GLEAN_TEST_1", Some, None); + assert_eq!(option.get().unwrap(), "test"); + } + + #[test] + fn debug_option_is_correctly_validated_when_necessary() { + #[allow(clippy::ptr_arg)] + fn validate(value: &String) -> bool { + value == "test" + } + + // Invalid values from the env are not set + env::set_var("GLEAN_TEST_2", "invalid"); + let mut option: DebugOption<String> = + DebugOption::new("GLEAN_TEST_2", Some, Some(validate)); + assert!(option.get().is_none()); + + // Valid values are set using the `set` function + assert!(option.set("test".into())); + assert_eq!(option.get().unwrap(), "test"); + + // Invalid values are not set using the `set` function + assert!(!option.set("invalid".into())); + assert_eq!(option.get().unwrap(), "test"); + } + + #[test] + fn tokenize_string_splits_correctly() { + // Valid list is properly tokenized and spaces are trimmed. + assert_eq!( + Some(vec!["test1".to_string(), "test2".to_string()]), + tokenize_string(" test1, test2 ".to_string()) + ); + + // Empty strings return no item. + assert_eq!(None, tokenize_string("".to_string())); + } + + #[test] + fn validates_tag_correctly() { + assert!(validate_tag(&"valid-value".to_string())); + assert!(validate_tag(&"-also-valid-value".to_string())); + assert!(!validate_tag(&"invalid_value".to_string())); + assert!(!validate_tag(&"invalid value".to_string())); + assert!(!validate_tag(&"!nv@lid-val*e".to_string())); + assert!(!validate_tag( + &"invalid-value-because-way-too-long".to_string() + )); + assert!(!validate_tag(&"".to_string())); + } + + #[test] + fn validates_source_tags_correctly() { + // Empty tags. + assert!(!validate_source_tags(&vec!["".to_string()])); + // Too many tags. + assert!(!validate_source_tags(&vec![ + "1".to_string(), + "2".to_string(), + "3".to_string(), + "4".to_string(), + "5".to_string(), + "6".to_string() + ])); + // Invalid tags. + assert!(!validate_source_tags(&vec!["!nv@lid-val*e".to_string()])); + assert!(!validate_source_tags(&vec![ + "glean-test1".to_string(), + "test2".to_string() + ])); + } +} diff --git a/third_party/rust/glean-core/src/dispatcher/global.rs b/third_party/rust/glean-core/src/dispatcher/global.rs new file mode 100644 index 0000000000..f90a681a5e --- /dev/null +++ b/third_party/rust/glean-core/src/dispatcher/global.rs @@ -0,0 +1,232 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use once_cell::sync::Lazy; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::RwLock; +use std::thread; +use std::time::Duration; + +use super::{DispatchError, DispatchGuard, Dispatcher}; +use crossbeam_channel::RecvTimeoutError; + +#[cfg(feature = "preinit_million_queue")] +pub const GLOBAL_DISPATCHER_LIMIT: usize = 1000000; +#[cfg(not(feature = "preinit_million_queue"))] +pub const GLOBAL_DISPATCHER_LIMIT: usize = 1000; + +static GLOBAL_DISPATCHER: Lazy<RwLock<Option<Dispatcher>>> = + Lazy::new(|| RwLock::new(Some(Dispatcher::new(GLOBAL_DISPATCHER_LIMIT)))); +pub static TESTING_MODE: AtomicBool = AtomicBool::new(false); +pub static QUEUE_TASKS: AtomicBool = AtomicBool::new(true); + +pub fn is_test_mode() -> bool { + TESTING_MODE.load(Ordering::SeqCst) +} + +/// Get a dispatcher for the global queue. +/// +/// A dispatcher is cheap to create, so we create one on every access instead of caching it. +/// This avoids troubles for tests where the global dispatcher _can_ change. +fn guard() -> DispatchGuard { + GLOBAL_DISPATCHER + .read() + .unwrap() + .as_ref() + .map(|dispatcher| dispatcher.guard()) + .unwrap() +} + +/// Launches a new task on the global dispatch queue. +/// +/// The new task will be enqueued immediately. +/// If the pre-init queue was already flushed, +/// the background thread will process tasks in the queue (see [`flush_init`]). +/// +/// This will not block. +/// +/// [`flush_init`]: fn.flush_init.html +pub fn launch(task: impl FnOnce() + Send + 'static) { + let current_thread = thread::current(); + if let Some("glean.shutdown") = current_thread.name() { + log::error!("Tried to launch a task from the shutdown thread. That is forbidden."); + } + + let guard = guard(); + match guard.launch(task) { + Ok(_) => {} + Err(DispatchError::QueueFull) => { + log::info!("Exceeded maximum queue size, discarding task"); + // TODO: Record this as an error. + } + Err(_) => { + log::info!("Failed to launch a task on the queue. Discarding task."); + } + } + + // In test mode wait for the execution, unless we're still queueing tasks. + let is_queueing = QUEUE_TASKS.load(Ordering::SeqCst); + let is_test = TESTING_MODE.load(Ordering::SeqCst); + if !is_queueing && is_test { + guard.block_on_queue(); + } +} + +/// Block until all tasks prior to this call are processed. +pub fn block_on_queue() { + guard().block_on_queue(); +} + +/// Block until all tasks prior to this call are processed, with a timeout. +pub fn block_on_queue_timeout(timeout: Duration) -> Result<(), RecvTimeoutError> { + guard().block_on_queue_timeout(timeout) +} + +/// Starts processing queued tasks in the global dispatch queue. +/// +/// This function blocks until queued tasks prior to this call are finished. +/// Once the initial queue is empty the dispatcher will wait for new tasks to be launched. +/// +/// # Returns +/// +/// Returns the total number of items that were added to the queue before being flushed, +/// or an error if the queue couldn't be flushed. +pub fn flush_init() -> Result<usize, DispatchError> { + guard().flush_init() +} + +fn join_dispatcher_thread() -> Result<(), DispatchError> { + // After we issue the shutdown command, make sure to wait for the + // worker thread to join. + let mut lock = GLOBAL_DISPATCHER.write().unwrap(); + let dispatcher = lock.as_mut().expect("Global dispatcher has gone missing"); + + if let Some(worker) = dispatcher.worker.take() { + return worker.join().map_err(|_| DispatchError::WorkerPanic); + } + + Ok(()) +} + +/// Kill the blocked dispatcher without processing the queue. +/// +/// This will immediately shutdown the worker thread +/// and no other tasks will be processed. +/// This only has an effect when the queue is still blocked. +pub fn kill() -> Result<(), DispatchError> { + guard().kill()?; + join_dispatcher_thread() +} + +/// Shuts down the dispatch queue. +/// +/// This will initiate a shutdown of the worker thread +/// and no new tasks will be processed after this. +pub fn shutdown() -> Result<(), DispatchError> { + guard().shutdown()?; + join_dispatcher_thread() +} + +/// TEST ONLY FUNCTION. +/// Resets the Glean state and triggers init again. +pub(crate) fn reset_dispatcher() { + // We don't care about shutdown errors, since they will + // definitely happen if this is run concurrently. + // We will still replace the global dispatcher. + let _ = shutdown(); + + // New dispatcher = we're queuing again. + QUEUE_TASKS.store(true, Ordering::SeqCst); + + // Now that the dispatcher is shut down, replace it. + // For that we + // 1. Create a new + // 2. Replace the global one + // 3. Only then return (and thus release the lock) + let mut lock = GLOBAL_DISPATCHER.write().unwrap(); + let new_dispatcher = Some(Dispatcher::new(GLOBAL_DISPATCHER_LIMIT)); + *lock = new_dispatcher; +} + +#[cfg(test)] +mod test { + use std::sync::{Arc, Mutex}; + + use super::*; + + #[test] + #[ignore] // We can't reset the queue at the moment, so filling it up breaks other tests. + fn global_fills_up_in_order_and_works() { + let _ = env_logger::builder().is_test(true).try_init(); + + let result = Arc::new(Mutex::new(vec![])); + + for i in 1..=GLOBAL_DISPATCHER_LIMIT { + let result = Arc::clone(&result); + launch(move || { + result.lock().unwrap().push(i); + }); + } + + { + let result = Arc::clone(&result); + launch(move || { + result.lock().unwrap().push(150); + }); + } + + flush_init().unwrap(); + + { + let result = Arc::clone(&result); + launch(move || { + result.lock().unwrap().push(200); + }); + } + + block_on_queue(); + + let mut expected = (1..=GLOBAL_DISPATCHER_LIMIT).collect::<Vec<_>>(); + expected.push(200); + assert_eq!(&*result.lock().unwrap(), &expected); + } + + #[test] + #[ignore] // We can't reset the queue at the moment, so flushing it breaks other tests. + fn global_nested_calls() { + let _ = env_logger::builder().is_test(true).try_init(); + + let result = Arc::new(Mutex::new(vec![])); + + { + let result = Arc::clone(&result); + launch(move || { + result.lock().unwrap().push(1); + }); + } + + flush_init().unwrap(); + + { + let result = Arc::clone(&result); + launch(move || { + result.lock().unwrap().push(21); + + { + let result = Arc::clone(&result); + launch(move || { + result.lock().unwrap().push(3); + }); + } + + result.lock().unwrap().push(22); + }); + } + + block_on_queue(); + + let expected = vec![1, 21, 22, 3]; + assert_eq!(&*result.lock().unwrap(), &expected); + } +} diff --git a/third_party/rust/glean-core/src/dispatcher/mod.rs b/third_party/rust/glean-core/src/dispatcher/mod.rs new file mode 100644 index 0000000000..257695c34e --- /dev/null +++ b/third_party/rust/glean-core/src/dispatcher/mod.rs @@ -0,0 +1,589 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A global dispatcher queue. +//! +//! # Example - Global Dispatch queue +//! +//! The global dispatch queue is pre-configured with a maximum queue size of 100 tasks. +//! +//! ```rust,ignore +//! // Ensure the dispatcher queue is being worked on. +//! dispatcher::flush_init(); +//! +//! dispatcher::launch(|| { +//! println!("Executing expensive task"); +//! // Run your expensive task in a separate thread. +//! }); +//! +//! dispatcher::launch(|| { +//! println!("A second task that's executed sequentially, but off the main thread."); +//! }); +//! ``` + +use std::{ + mem, + sync::{ + atomic::{AtomicBool, AtomicUsize, Ordering}, + Arc, + }, + thread::{self, JoinHandle}, + time::Duration, +}; + +use crossbeam_channel::{bounded, unbounded, RecvTimeoutError, SendError, Sender}; +use thiserror::Error; + +pub use global::*; + +pub(crate) mod global; + +/// Command received while blocked from further work. +enum Blocked { + /// Shutdown immediately without processing the queue. + Shutdown, + /// Unblock and continue with work as normal. + Continue, +} + +/// The command a worker should execute. +enum Command { + /// A task is a user-defined function to run. + Task(Box<dyn FnOnce() + Send>), + + /// Swap the channel + Swap(Sender<()>), + + /// Signal the worker to finish work and shut down. + Shutdown, +} + +/// The error returned from operations on the dispatcher +#[derive(Error, Debug, PartialEq, Eq)] +pub enum DispatchError { + /// The worker panicked while running a task + #[error("The worker panicked while running a task")] + WorkerPanic, + + /// Maximum queue size reached + #[error("Maximum queue size reached")] + QueueFull, + + /// Pre-init buffer was already flushed + #[error("Pre-init buffer was already flushed")] + AlreadyFlushed, + + /// Failed to send command to worker thread + #[error("Failed to send command to worker thread")] + SendError, + + /// Failed to receive from channel + #[error("Failed to receive from channel")] + RecvError(#[from] crossbeam_channel::RecvError), +} + +impl<T> From<SendError<T>> for DispatchError { + fn from(_: SendError<T>) -> Self { + DispatchError::SendError + } +} + +/// A clonable guard for a dispatch queue. +#[derive(Clone)] +struct DispatchGuard { + /// Whether to queue on the preinit buffer or on the unbounded queue + queue_preinit: Arc<AtomicBool>, + + /// The number of items that were added to the queue after it filled up. + overflow_count: Arc<AtomicUsize>, + + /// The maximum pre-init queue size + max_queue_size: usize, + + /// Used to unblock the worker thread initially. + block_sender: Sender<Blocked>, + + /// Sender for the preinit queue. + preinit_sender: Sender<Command>, + + /// Sender for the unbounded queue. + sender: Sender<Command>, +} + +impl DispatchGuard { + pub fn launch(&self, task: impl FnOnce() + Send + 'static) -> Result<(), DispatchError> { + let task = Command::Task(Box::new(task)); + self.send(task) + } + + pub fn shutdown(&mut self) -> Result<(), DispatchError> { + // Need to flush in order for the thread to actually process anything, + // including the shutdown command. + self.flush_init().ok(); + self.send(Command::Shutdown) + } + + fn send(&self, task: Command) -> Result<(), DispatchError> { + if self.queue_preinit.load(Ordering::SeqCst) { + if self.preinit_sender.len() < self.max_queue_size { + self.preinit_sender.send(task)?; + Ok(()) + } else { + self.overflow_count.fetch_add(1, Ordering::SeqCst); + // Instead of using a bounded queue, we are handling the bounds + // checking ourselves. If a bounded queue were full, we would return + // a QueueFull DispatchError, so we do the same here. + Err(DispatchError::QueueFull) + } + } else { + self.sender.send(task)?; + Ok(()) + } + } + + fn block_on_queue(&self) { + let (tx, rx) = crossbeam_channel::bounded(0); + + // We explicitly don't use `self.launch` here. + // We always put this task on the unbounded queue. + // The pre-init queue might be full before its flushed, in which case this would panic. + // Blocking on the queue can only work if it is eventually flushed anyway. + + let task = Command::Task(Box::new(move || { + tx.send(()) + .expect("(worker) Can't send message on single-use channel"); + })); + self.sender + .send(task) + .expect("Failed to launch the blocking task"); + + rx.recv() + .expect("Failed to receive message on single-use channel"); + } + + /// Block on the task queue emptying, with a timeout. + fn block_on_queue_timeout(&self, timeout: Duration) -> Result<(), RecvTimeoutError> { + let (tx, rx) = crossbeam_channel::bounded(0); + + // We explicitly don't use `self.launch` here. + // We always put this task on the unbounded queue. + // The pre-init queue might be full before its flushed, in which case this would panic. + // Blocking on the queue can only work if it is eventually flushed anyway. + + let task = Command::Task(Box::new(move || { + tx.send(()) + .expect("(worker) Can't send message on single-use channel"); + })); + self.sender + .send(task) + .expect("Failed to launch the blocking task"); + + rx.recv_timeout(timeout) + } + + fn kill(&mut self) -> Result<(), DispatchError> { + // We immediately stop queueing in the pre-init buffer. + let old_val = self.queue_preinit.swap(false, Ordering::SeqCst); + if !old_val { + return Err(DispatchError::AlreadyFlushed); + } + + // Unblock the worker thread exactly once. + self.block_sender.send(Blocked::Shutdown)?; + Ok(()) + } + + /// Flushes the pre-init buffer. + /// + /// This function blocks until tasks queued prior to this call are finished. + /// Once the initial queue is empty the dispatcher will wait for new tasks to be launched. + /// + /// Returns an error if called multiple times. + fn flush_init(&mut self) -> Result<usize, DispatchError> { + // We immediately stop queueing in the pre-init buffer. + let old_val = self.queue_preinit.swap(false, Ordering::SeqCst); + if !old_val { + return Err(DispatchError::AlreadyFlushed); + } + + // Unblock the worker thread exactly once. + self.block_sender.send(Blocked::Continue)?; + + // Single-use channel to communicate with the worker thread. + let (swap_sender, swap_receiver) = bounded(0); + + // Send final command and block until it is sent. + self.preinit_sender + .send(Command::Swap(swap_sender)) + .map_err(|_| DispatchError::SendError)?; + + // Now wait for the worker thread to do the swap and inform us. + // This blocks until all tasks in the preinit buffer have been processed. + swap_receiver.recv()?; + + // We're not queueing anymore. + global::QUEUE_TASKS.store(false, Ordering::SeqCst); + + let overflow_count = self.overflow_count.load(Ordering::SeqCst); + if overflow_count > 0 { + Ok(overflow_count) + } else { + Ok(0) + } + } +} + +/// A dispatcher. +/// +/// Run expensive processing tasks sequentially off the main thread. +/// Tasks are processed in a single separate thread in the order they are submitted. +/// The dispatch queue will enqueue tasks while not flushed, up to the maximum queue size. +/// Processing will start after flushing once, processing already enqueued tasks first, then +/// waiting for further tasks to be enqueued. +pub struct Dispatcher { + /// Guard used for communication with the worker thread. + guard: DispatchGuard, + + /// Handle to the worker thread, allows to wait for it to finish. + worker: Option<JoinHandle<()>>, +} + +impl Dispatcher { + /// Creates a new dispatcher with a maximum queue size. + /// + /// Launched tasks won't run until [`flush_init`] is called. + /// + /// [`flush_init`]: #method.flush_init + pub fn new(max_queue_size: usize) -> Self { + let (block_sender, block_receiver) = bounded(1); + let (preinit_sender, preinit_receiver) = unbounded(); + let (sender, mut unbounded_receiver) = unbounded(); + + let queue_preinit = Arc::new(AtomicBool::new(true)); + let overflow_count = Arc::new(AtomicUsize::new(0)); + + let worker = thread::Builder::new() + .name("glean.dispatcher".into()) + .spawn(move || { + match block_receiver.recv() { + Err(_) => { + // The other side was disconnected. + // There's nothing the worker thread can do. + log::error!("The task producer was disconnected. Worker thread will exit."); + return; + } + Ok(Blocked::Shutdown) => { + // The other side wants us to stop immediately + return; + } + Ok(Blocked::Continue) => { + // Queue is unblocked, processing continues as normal. + } + } + + let mut receiver = preinit_receiver; + loop { + use Command::*; + + match receiver.recv() { + Ok(Shutdown) => { + break; + } + + Ok(Task(f)) => { + (f)(); + } + + Ok(Swap(swap_done)) => { + // A swap should only occur exactly once. + // This is upheld by `flush_init`, which errors out if the preinit buffer + // was already flushed. + + // We swap the channels we listen on for new tasks. + // The next iteration will continue with the unbounded queue. + mem::swap(&mut receiver, &mut unbounded_receiver); + + // The swap command MUST be the last one received on the preinit buffer, + // so by the time we run this we know all preinit tasks were processed. + // We can notify the other side. + swap_done + .send(()) + .expect("The caller of `flush_init` has gone missing"); + } + + // Other side was disconnected. + Err(_) => { + log::error!( + "The task producer was disconnected. Worker thread will exit." + ); + return; + } + } + } + }) + .expect("Failed to spawn Glean's dispatcher thread"); + + let guard = DispatchGuard { + queue_preinit, + overflow_count, + max_queue_size, + block_sender, + preinit_sender, + sender, + }; + + Dispatcher { + guard, + worker: Some(worker), + } + } + + fn guard(&self) -> DispatchGuard { + self.guard.clone() + } + + /// Waits for the worker thread to finish and finishes the dispatch queue. + /// + /// You need to call `shutdown` to initiate a shutdown of the queue. + #[cfg(test)] + fn join(mut self) -> Result<(), DispatchError> { + if let Some(worker) = self.worker.take() { + worker.join().map_err(|_| DispatchError::WorkerPanic)?; + } + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use std::sync::atomic::{AtomicBool, AtomicU8, Ordering}; + use std::sync::{Arc, Mutex}; + use std::{thread, time::Duration}; + + fn enable_test_logging() { + // When testing we want all logs to go to stdout/stderr by default, + // without requiring each individual test to activate it. + let _ = env_logger::builder().is_test(true).try_init(); + } + + #[test] + fn tasks_run_off_the_main_thread() { + enable_test_logging(); + + let main_thread_id = thread::current().id(); + let thread_canary = Arc::new(AtomicBool::new(false)); + + let dispatcher = Dispatcher::new(100); + + // Force the Dispatcher out of the pre-init queue mode. + dispatcher + .guard() + .flush_init() + .expect("Failed to get out of preinit queue mode"); + + let canary_clone = thread_canary.clone(); + dispatcher + .guard() + .launch(move || { + assert!(thread::current().id() != main_thread_id); + // Use the canary bool to make sure this is getting called before + // the test completes. + assert!(!canary_clone.load(Ordering::SeqCst)); + canary_clone.store(true, Ordering::SeqCst); + }) + .expect("Failed to dispatch the test task"); + + dispatcher.guard().block_on_queue(); + assert!(thread_canary.load(Ordering::SeqCst)); + assert_eq!(main_thread_id, thread::current().id()); + } + + #[test] + fn launch_correctly_adds_tasks_to_preinit_queue() { + enable_test_logging(); + + let main_thread_id = thread::current().id(); + let thread_canary = Arc::new(AtomicU8::new(0)); + + let dispatcher = Dispatcher::new(100); + + // Add 3 tasks to queue each one increasing thread_canary by 1 to + // signal that the tasks ran. + for _ in 0..3 { + let canary_clone = thread_canary.clone(); + dispatcher + .guard() + .launch(move || { + // Make sure the task is flushed off-the-main thread. + assert!(thread::current().id() != main_thread_id); + canary_clone.fetch_add(1, Ordering::SeqCst); + }) + .expect("Failed to dispatch the test task"); + } + + // Ensure that no task ran. + assert_eq!(0, thread_canary.load(Ordering::SeqCst)); + + // Flush the queue and wait for the tasks to complete. + dispatcher + .guard() + .flush_init() + .expect("Failed to get out of preinit queue mode"); + // Validate that we have the expected canary value. + assert_eq!(3, thread_canary.load(Ordering::SeqCst)); + } + + #[test] + fn preinit_tasks_are_processed_after_flush() { + enable_test_logging(); + + let dispatcher = Dispatcher::new(10); + + let result = Arc::new(Mutex::new(vec![])); + for i in 1..=5 { + let result = Arc::clone(&result); + dispatcher + .guard() + .launch(move || { + result.lock().unwrap().push(i); + }) + .unwrap(); + } + + result.lock().unwrap().push(0); + dispatcher.guard().flush_init().unwrap(); + for i in 6..=10 { + let result = Arc::clone(&result); + dispatcher + .guard() + .launch(move || { + result.lock().unwrap().push(i); + }) + .unwrap(); + } + + dispatcher.guard().block_on_queue(); + + // This additionally checks that tasks were executed in order. + assert_eq!( + &*result.lock().unwrap(), + &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + ); + } + + #[test] + fn tasks_after_shutdown_are_not_processed() { + enable_test_logging(); + + let dispatcher = Dispatcher::new(10); + + let result = Arc::new(Mutex::new(vec![])); + + dispatcher.guard().flush_init().unwrap(); + + dispatcher.guard().shutdown().unwrap(); + { + let result = Arc::clone(&result); + // This might fail because the shutdown is quick enough, + // or it might succeed and still send the task. + // In any case that task should not be executed. + let _ = dispatcher.guard().launch(move || { + result.lock().unwrap().push(0); + }); + } + + dispatcher.join().unwrap(); + + assert!(result.lock().unwrap().is_empty()); + } + + #[test] + fn preinit_buffer_fills_up() { + enable_test_logging(); + + let dispatcher = Dispatcher::new(5); + + let result = Arc::new(Mutex::new(vec![])); + + for i in 1..=5 { + let result = Arc::clone(&result); + dispatcher + .guard() + .launch(move || { + result.lock().unwrap().push(i); + }) + .unwrap(); + } + + { + let result = Arc::clone(&result); + let err = dispatcher.guard().launch(move || { + result.lock().unwrap().push(10); + }); + assert_eq!(Err(DispatchError::QueueFull), err); + } + + dispatcher.guard().flush_init().unwrap(); + + { + let result = Arc::clone(&result); + dispatcher + .guard() + .launch(move || { + result.lock().unwrap().push(20); + }) + .unwrap(); + } + + dispatcher.guard().block_on_queue(); + + assert_eq!(&*result.lock().unwrap(), &[1, 2, 3, 4, 5, 20]); + } + + #[test] + fn normal_queue_is_unbounded() { + enable_test_logging(); + + // Note: We can't actually test that it's fully unbounded, + // but we can quickly queue more slow tasks than the pre-init buffer holds + // and then guarantuee they all run. + + let dispatcher = Dispatcher::new(5); + + let result = Arc::new(Mutex::new(vec![])); + + for i in 1..=5 { + let result = Arc::clone(&result); + dispatcher + .guard() + .launch(move || { + result.lock().unwrap().push(i); + }) + .unwrap(); + } + + dispatcher.guard().flush_init().unwrap(); + + // Queue more than 5 tasks, + // Each one is slow to process, so we should be faster in queueing + // them up than they are processed. + for i in 6..=20 { + let result = Arc::clone(&result); + dispatcher + .guard() + .launch(move || { + thread::sleep(Duration::from_millis(50)); + result.lock().unwrap().push(i); + }) + .unwrap(); + } + + dispatcher.guard().shutdown().unwrap(); + dispatcher.join().unwrap(); + + let expected = (1..=20).collect::<Vec<_>>(); + assert_eq!(&*result.lock().unwrap(), &expected); + } +} diff --git a/third_party/rust/glean-core/src/error.rs b/third_party/rust/glean-core/src/error.rs new file mode 100644 index 0000000000..9c2c445a4c --- /dev/null +++ b/third_party/rust/glean-core/src/error.rs @@ -0,0 +1,169 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::ffi::OsString; +use std::fmt::{self, Display}; +use std::io; +use std::result; + +use rkv::StoreError; + +/// A specialized [`Result`] type for this crate's operations. +/// +/// This is generally used to avoid writing out [`Error`] directly and +/// is otherwise a direct mapping to [`Result`]. +/// +/// [`Result`]: https://doc.rust-lang.org/stable/std/result/enum.Result.html +/// [`Error`]: std.struct.Error.html +pub type Result<T, E = Error> = result::Result<T, E>; + +/// A list enumerating the categories of errors in this crate. +/// +/// [`Error`]: https://doc.rust-lang.org/stable/std/error/trait.Error.html +/// +/// This list is intended to grow over time and it is not recommended to +/// exhaustively match against it. +#[derive(Debug)] +#[non_exhaustive] +pub enum ErrorKind { + /// Lifetime conversion failed + Lifetime(i32), + + /// IO error + IoError(io::Error), + + /// IO error + Rkv(StoreError), + + /// JSON error + Json(serde_json::error::Error), + + /// TimeUnit conversion failed + TimeUnit(i32), + + /// MemoryUnit conversion failed + MemoryUnit(i32), + + /// HistogramType conversion failed + HistogramType(i32), + + /// [`OsString`] conversion failed + OsString(OsString), + + /// Unknown error + Utf8Error, + + /// Glean initialization was attempted with an invalid configuration + InvalidConfig, + + /// Glean not initialized + NotInitialized, + + /// Ping request body size overflowed + PingBodyOverflow(usize), +} + +/// A specialized [`Error`] type for this crate's operations. +/// +/// [`Error`]: https://doc.rust-lang.org/stable/std/error/trait.Error.html +#[derive(Debug)] +pub struct Error { + kind: ErrorKind, +} + +impl Error { + /// Returns a new UTF-8 error + /// + /// This is exposed in order to expose conversion errors on the FFI layer. + pub fn utf8_error() -> Error { + Error { + kind: ErrorKind::Utf8Error, + } + } + + /// Indicates an error that no requested global object is initialized + pub fn not_initialized() -> Error { + Error { + kind: ErrorKind::NotInitialized, + } + } + + /// Returns the kind of the current error instance. + pub fn kind(&self) -> &ErrorKind { + &self.kind + } +} + +impl std::error::Error for Error {} + +impl Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use ErrorKind::*; + match self.kind() { + Lifetime(l) => write!(f, "Lifetime conversion from {} failed", l), + IoError(e) => write!(f, "An I/O error occurred: {}", e), + Rkv(e) => write!(f, "An Rkv error occurred: {}", e), + Json(e) => write!(f, "A JSON error occurred: {}", e), + TimeUnit(t) => write!(f, "TimeUnit conversion from {} failed", t), + MemoryUnit(m) => write!(f, "MemoryUnit conversion from {} failed", m), + HistogramType(h) => write!(f, "HistogramType conversion from {} failed", h), + OsString(s) => write!(f, "OsString conversion from {:?} failed", s), + Utf8Error => write!(f, "Invalid UTF-8 byte sequence in string"), + InvalidConfig => write!(f, "Invalid Glean configuration provided"), + NotInitialized => write!(f, "Global Glean object missing"), + PingBodyOverflow(s) => write!( + f, + "Ping request body size exceeded maximum size allowed: {}kB.", + s / 1024 + ), + } + } +} + +impl From<ErrorKind> for Error { + fn from(kind: ErrorKind) -> Error { + Error { kind } + } +} + +impl From<io::Error> for Error { + fn from(error: io::Error) -> Error { + Error { + kind: ErrorKind::IoError(error), + } + } +} + +impl From<StoreError> for Error { + fn from(error: StoreError) -> Error { + Error { + kind: ErrorKind::Rkv(error), + } + } +} + +impl From<serde_json::error::Error> for Error { + fn from(error: serde_json::error::Error) -> Error { + Error { + kind: ErrorKind::Json(error), + } + } +} + +impl From<OsString> for Error { + fn from(error: OsString) -> Error { + Error { + kind: ErrorKind::OsString(error), + } + } +} + +/// To satisfy integer conversion done by the macros on the FFI side, we need to be able to turn +/// something infallible into an error. +/// This will never actually be reached, as an integer-to-integer conversion is infallible. +impl From<std::convert::Infallible> for Error { + fn from(_: std::convert::Infallible) -> Error { + unreachable!() + } +} diff --git a/third_party/rust/glean-core/src/error_recording.rs b/third_party/rust/glean-core/src/error_recording.rs new file mode 100644 index 0000000000..aaf850d019 --- /dev/null +++ b/third_party/rust/glean-core/src/error_recording.rs @@ -0,0 +1,239 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! # Error Recording +//! +//! Glean keeps track of errors that occured due to invalid labels or invalid values when recording +//! other metrics. +//! +//! Error counts are stored in labeled counters in the `glean.error` category. +//! The labeled counter metrics that store the errors are defined in the `metrics.yaml` for documentation purposes, +//! but are not actually used directly, since the `send_in_pings` value needs to match the pings of the metric that is erroring (plus the "metrics" ping), +//! not some constant value that we could define in `metrics.yaml`. + +use std::convert::TryFrom; +use std::fmt::Display; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error::{Error, ErrorKind}; +use crate::metrics::labeled::{combine_base_identifier_and_label, strip_label}; +use crate::metrics::CounterMetric; +use crate::CommonMetricData; +use crate::Glean; +use crate::Lifetime; + +/// The possible error types for metric recording. +/// Note: the cases in this enum must be kept in sync with the ones +/// in the platform-specific code (e.g. `ErrorType.kt`) and with the +/// metrics in the registry files. +// When adding a new error type ensure it's also added to `ErrorType::iter()` below. +#[repr(C)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ErrorType { + /// For when the value to be recorded does not match the metric-specific restrictions + InvalidValue, + /// For when the label of a labeled metric does not match the restrictions + InvalidLabel, + /// For when the metric caught an invalid state while recording + InvalidState, + /// For when the value to be recorded overflows the metric-specific upper range + InvalidOverflow, +} + +impl ErrorType { + /// The error type's metric id + pub fn as_str(&self) -> &'static str { + match self { + ErrorType::InvalidValue => "invalid_value", + ErrorType::InvalidLabel => "invalid_label", + ErrorType::InvalidState => "invalid_state", + ErrorType::InvalidOverflow => "invalid_overflow", + } + } + + /// Return an iterator over all possible error types. + /// + /// ``` + /// # use glean_core::ErrorType; + /// let errors = ErrorType::iter(); + /// let all_errors = errors.collect::<Vec<_>>(); + /// assert_eq!(4, all_errors.len()); + /// ``` + pub fn iter() -> impl Iterator<Item = Self> { + // N.B.: This has no compile-time guarantees that it is complete. + // New `ErrorType` variants will need to be added manually. + [ + ErrorType::InvalidValue, + ErrorType::InvalidLabel, + ErrorType::InvalidState, + ErrorType::InvalidOverflow, + ] + .iter() + .copied() + } +} + +impl TryFrom<i32> for ErrorType { + type Error = Error; + + fn try_from(value: i32) -> Result<ErrorType, Self::Error> { + match value { + 0 => Ok(ErrorType::InvalidValue), + 1 => Ok(ErrorType::InvalidLabel), + 2 => Ok(ErrorType::InvalidState), + 3 => Ok(ErrorType::InvalidOverflow), + e => Err(ErrorKind::Lifetime(e).into()), + } + } +} + +/// For a given metric, get the metric in which to record errors +fn get_error_metric_for_metric(meta: &CommonMetricDataInternal, error: ErrorType) -> CounterMetric { + // Can't use meta.identifier here, since that might cause infinite recursion + // if the label on this metric needs to report an error. + let identifier = meta.base_identifier(); + let name = strip_label(&identifier); + + // Record errors in the pings the metric is in, as well as the metrics ping. + let mut send_in_pings = meta.inner.send_in_pings.clone(); + let ping_name = "metrics".to_string(); + if !send_in_pings.contains(&ping_name) { + send_in_pings.push(ping_name); + } + + CounterMetric::new(CommonMetricData { + name: combine_base_identifier_and_label(error.as_str(), name), + category: "glean.error".into(), + lifetime: Lifetime::Ping, + send_in_pings, + ..Default::default() + }) +} + +/// Records an error into Glean. +/// +/// Errors are recorded as labeled counters in the `glean.error` category. +/// +/// *Note*: We do make assumptions here how labeled metrics are encoded, namely by having the name +/// `<name>/<label>`. +/// Errors do not adhere to the usual "maximum label" restriction. +/// +/// # Arguments +/// +/// * `glean` - The Glean instance containing the database +/// * `meta` - The metric's meta data +/// * `error` - The error type to record +/// * `message` - The message to log. This message is not sent with the ping. +/// It does not need to include the metric id, as that is automatically prepended to the message. +/// * `num_errors` - The number of errors of the same type to report. +pub fn record_error<O: Into<Option<i32>>>( + glean: &Glean, + meta: &CommonMetricDataInternal, + error: ErrorType, + message: impl Display, + num_errors: O, +) { + let metric = get_error_metric_for_metric(meta, error); + + log::warn!("{}: {}", meta.base_identifier(), message); + let to_report = num_errors.into().unwrap_or(1); + debug_assert!(to_report > 0); + metric.add_sync(glean, to_report); +} + +/// Gets the number of recorded errors for the given metric and error type. +/// +/// *Notes: This is a **test-only** API, but we need to expose it to be used in integration tests. +/// +/// # Arguments +/// +/// * `glean` - The Glean object holding the database +/// * `meta` - The metadata of the metric instance +/// * `error` - The type of error +/// +/// # Returns +/// +/// The number of errors reported. +pub fn test_get_num_recorded_errors( + glean: &Glean, + meta: &CommonMetricDataInternal, + error: ErrorType, +) -> Result<i32, String> { + let metric = get_error_metric_for_metric(meta, error); + + metric.get_value(glean, Some("metrics")).ok_or_else(|| { + format!( + "No error recorded for {} in 'metrics' store", + meta.base_identifier(), + ) + }) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::metrics::*; + use crate::tests::new_glean; + + #[test] + fn error_type_i32_mapping() { + let error: ErrorType = std::convert::TryFrom::try_from(0).unwrap(); + assert_eq!(error, ErrorType::InvalidValue); + let error: ErrorType = std::convert::TryFrom::try_from(1).unwrap(); + assert_eq!(error, ErrorType::InvalidLabel); + let error: ErrorType = std::convert::TryFrom::try_from(2).unwrap(); + assert_eq!(error, ErrorType::InvalidState); + let error: ErrorType = std::convert::TryFrom::try_from(3).unwrap(); + assert_eq!(error, ErrorType::InvalidOverflow); + } + + #[test] + fn recording_of_all_error_types() { + let (glean, _t) = new_glean(None); + + let string_metric = StringMetric::new(CommonMetricData { + name: "string_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into(), "store2".into()], + disabled: false, + lifetime: Lifetime::User, + ..Default::default() + }); + + let expected_invalid_values_errors: i32 = 1; + let expected_invalid_labels_errors: i32 = 2; + + record_error( + &glean, + string_metric.meta(), + ErrorType::InvalidValue, + "Invalid value", + None, + ); + + record_error( + &glean, + string_metric.meta(), + ErrorType::InvalidLabel, + "Invalid label", + expected_invalid_labels_errors, + ); + + let invalid_val = + get_error_metric_for_metric(string_metric.meta(), ErrorType::InvalidValue); + let invalid_label = + get_error_metric_for_metric(string_metric.meta(), ErrorType::InvalidLabel); + for &store in &["store1", "store2", "metrics"] { + assert_eq!( + Some(expected_invalid_values_errors), + invalid_val.get_value(&glean, Some(store)) + ); + + assert_eq!( + Some(expected_invalid_labels_errors), + invalid_label.get_value(&glean, Some(store)) + ); + } + } +} diff --git a/third_party/rust/glean-core/src/event_database/mod.rs b/third_party/rust/glean-core/src/event_database/mod.rs new file mode 100644 index 0000000000..e4e64e04d8 --- /dev/null +++ b/third_party/rust/glean-core/src/event_database/mod.rs @@ -0,0 +1,1299 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::cmp::Ordering; +use std::collections::HashMap; +use std::convert::TryFrom; +use std::fs; +use std::fs::{create_dir_all, File, OpenOptions}; +use std::io::BufRead; +use std::io::BufReader; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::sync::RwLock; + +use chrono::{DateTime, FixedOffset}; + +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value as JsonValue}; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::coverage::record_coverage; +use crate::error_recording::{record_error, ErrorType}; +use crate::metrics::{DatetimeMetric, TimeUnit}; +use crate::storage::INTERNAL_STORAGE; +use crate::util::get_iso_time_string; +use crate::Glean; +use crate::Result; +use crate::{CommonMetricData, CounterMetric, Lifetime}; + +/// Represents the recorded data for a single event. +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] +#[cfg_attr(test, derive(Default))] +pub struct RecordedEvent { + /// The timestamp of when the event was recorded. + /// + /// This allows to order events from a single process run. + pub timestamp: u64, + + /// The event's category. + /// + /// This is defined by users in the metrics file. + pub category: String, + + /// The event's name. + /// + /// This is defined by users in the metrics file. + pub name: String, + + /// A map of all extra data values. + /// + /// The set of allowed extra keys is defined by users in the metrics file. + #[serde(skip_serializing_if = "Option::is_none")] + pub extra: Option<HashMap<String, String>>, +} + +/// Represents the stored data for a single event. +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] +struct StoredEvent { + #[serde(flatten)] + event: RecordedEvent, + + /// The monotonically-increasing execution counter. + /// + /// Included to allow sending of events across Glean restarts (bug 1716725). + /// Is i32 because it is stored in a CounterMetric. + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub execution_counter: Option<i32>, +} + +/// This struct handles the in-memory and on-disk storage logic for events. +/// +/// So that the data survives shutting down of the application, events are stored +/// in an append-only file on disk, in addition to the store in memory. Each line +/// of this file records a single event in JSON, exactly as it will be sent in the +/// ping. There is one file per store. +/// +/// When restarting the application, these on-disk files are checked, and if any are +/// found, they are loaded, and a `glean.restarted` event is added before any +/// further events are collected. This is because the timestamps for these events +/// may have come from a previous boot of the device, and therefore will not be +/// compatible with any newly-collected events. +/// +/// Normalizing all these timestamps happens on serialization for submission (see +/// `serialize_as_json`) where the client time between restarts is calculated using +/// data stored in the `glean.startup.date` extra of the `glean.restarted` event, plus +/// the `execution_counter` stored in events on disk. +/// +/// Neither `execution_counter` nor `glean.startup.date` is submitted in pings. +/// The `glean.restarted` event is, though. +/// (See [bug 1716725](https://bugzilla.mozilla.org/show_bug.cgi?id=1716725).) +#[derive(Debug)] +pub struct EventDatabase { + /// Path to directory of on-disk event files + pub path: PathBuf, + /// The in-memory list of events + event_stores: RwLock<HashMap<String, Vec<StoredEvent>>>, + /// A lock to be held when doing operations on the filesystem + file_lock: RwLock<()>, +} + +impl EventDatabase { + /// Creates a new event database. + /// + /// # Arguments + /// + /// * `data_path` - The directory to store events in. A new directory + /// * `events` - will be created inside of this directory. + pub fn new(data_path: &Path) -> Result<Self> { + let path = data_path.join("events"); + create_dir_all(&path)?; + + Ok(Self { + path, + event_stores: RwLock::new(HashMap::new()), + file_lock: RwLock::new(()), + }) + } + + /// Initializes events storage after Glean is fully initialized and ready to send pings. + /// + /// This must be called once on application startup, e.g. from + /// [Glean.initialize], but after we are ready to send pings, since this + /// could potentially collect and send the "events" ping. + /// + /// If there are any events queued on disk, it loads them into memory so + /// that the memory and disk representations are in sync. + /// + /// If event records for the "events" ping are present, they are assembled into + /// an "events" ping which is submitted immediately with reason "startup". + /// + /// If event records for custom pings are present, we increment the custom pings' + /// stores' `execution_counter` and record a `glean.restarted` + /// event with the current client clock in its `glean.startup.date` extra. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance. + /// * `trim_data_to_registered_pings` - Whether we should trim the event storage of + /// any events not belonging to pings previously registered via `register_ping_type`. + /// + /// # Returns + /// + /// Whether the "events" ping was submitted. + pub fn flush_pending_events_on_startup( + &self, + glean: &Glean, + trim_data_to_registered_pings: bool, + ) -> bool { + match self.load_events_from_disk(glean, trim_data_to_registered_pings) { + Ok(_) => { + let stores_with_events: Vec<String> = { + self.event_stores + .read() + .unwrap() + .keys() + .map(|x| x.to_owned()) + .collect() // safe unwrap, only error case is poisoning + }; + // We do not want to be holding the event stores lock when + // submitting a ping or recording new events. + let has_events_events = stores_with_events.contains(&"events".to_owned()); + let glean_restarted_stores = if has_events_events { + stores_with_events + .into_iter() + .filter(|store| store != "events") + .collect() + } else { + stores_with_events + }; + if !glean_restarted_stores.is_empty() { + for store_name in glean_restarted_stores.iter() { + CounterMetric::new(CommonMetricData { + name: "execution_counter".into(), + category: store_name.into(), + send_in_pings: vec![INTERNAL_STORAGE.into()], + lifetime: Lifetime::Ping, + ..Default::default() + }) + .add_sync(glean, 1); + } + let glean_restarted = CommonMetricData { + name: "restarted".into(), + category: "glean".into(), + send_in_pings: glean_restarted_stores, + lifetime: Lifetime::Ping, + ..Default::default() + }; + let startup = get_iso_time_string(glean.start_time(), TimeUnit::Minute); + let extra = [("glean.startup.date".into(), startup)].into(); + self.record( + glean, + &glean_restarted.into(), + crate::get_timestamp_ms(), + Some(extra), + ); + } + has_events_events && glean.submit_ping_by_name("events", Some("startup")) + } + Err(err) => { + log::warn!("Error loading events from disk: {}", err); + false + } + } + } + + fn load_events_from_disk( + &self, + glean: &Glean, + trim_data_to_registered_pings: bool, + ) -> Result<()> { + // NOTE: The order of locks here is important. + // In other code parts we might acquire the `file_lock` when we already have acquired + // a lock on `event_stores`. + // This is a potential lock-order-inversion. + let mut db = self.event_stores.write().unwrap(); // safe unwrap, only error case is poisoning + let _lock = self.file_lock.write().unwrap(); // safe unwrap, only error case is poisoning + + for entry in fs::read_dir(&self.path)? { + let entry = entry?; + if entry.file_type()?.is_file() { + let store_name = entry.file_name().into_string()?; + log::info!("Loading events for {}", store_name); + if trim_data_to_registered_pings && glean.get_ping_by_name(&store_name).is_none() { + log::warn!("Trimming {}'s events", store_name); + if let Err(err) = fs::remove_file(entry.path()) { + match err.kind() { + std::io::ErrorKind::NotFound => { + // silently drop this error, the file was already non-existing + } + _ => log::warn!("Error trimming events file '{}': {}", store_name, err), + } + } + continue; + } + let file = BufReader::new(File::open(entry.path())?); + db.insert( + store_name, + file.lines() + .map_while(Result::ok) + .filter_map(|line| serde_json::from_str::<StoredEvent>(&line).ok()) + .collect(), + ); + } + } + Ok(()) + } + + /// Records an event in the desired stores. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance. + /// * `meta` - The metadata about the event metric. Used to get the category, + /// name and stores for the metric. + /// * `timestamp` - The timestamp of the event, in milliseconds. Must use a + /// monotonically increasing timer (this value is obtained on the + /// platform-specific side). + /// * `extra` - Extra data values, mapping strings to strings. + /// + /// ## Returns + /// + /// `true` if a ping was submitted and should be uploaded. + /// `false` otherwise. + pub fn record( + &self, + glean: &Glean, + meta: &CommonMetricDataInternal, + timestamp: u64, + extra: Option<HashMap<String, String>>, + ) -> bool { + // If upload is disabled we don't want to record. + if !glean.is_upload_enabled() { + return false; + } + + let mut submit_max_capacity_event_ping = false; + { + let mut db = self.event_stores.write().unwrap(); // safe unwrap, only error case is poisoning + for store_name in meta.inner.send_in_pings.iter() { + let store = db.entry(store_name.to_string()).or_insert_with(Vec::new); + let execution_counter = CounterMetric::new(CommonMetricData { + name: "execution_counter".into(), + category: store_name.into(), + send_in_pings: vec![INTERNAL_STORAGE.into()], + lifetime: Lifetime::Ping, + ..Default::default() + }) + .get_value(glean, INTERNAL_STORAGE); + // Create StoredEvent object, and its JSON form for serialization on disk. + let event = StoredEvent { + event: RecordedEvent { + timestamp, + category: meta.inner.category.to_string(), + name: meta.inner.name.to_string(), + extra: extra.clone(), + }, + execution_counter, + }; + let event_json = serde_json::to_string(&event).unwrap(); // safe unwrap, event can always be serialized + store.push(event); + self.write_event_to_disk(store_name, &event_json); + if store_name == "events" && store.len() == glean.get_max_events() { + submit_max_capacity_event_ping = true; + } + } + } + if submit_max_capacity_event_ping { + glean.submit_ping_by_name("events", Some("max_capacity")); + true + } else { + false + } + } + + /// Writes an event to a single store on disk. + /// + /// # Arguments + /// + /// * `store_name` - The name of the store. + /// * `event_json` - The event content, as a single-line JSON-encoded string. + fn write_event_to_disk(&self, store_name: &str, event_json: &str) { + let _lock = self.file_lock.write().unwrap(); // safe unwrap, only error case is poisoning + if let Err(err) = OpenOptions::new() + .create(true) + .append(true) + .open(self.path.join(store_name)) + .and_then(|mut file| writeln!(file, "{}", event_json)) + { + log::warn!("IO error writing event to store '{}': {}", store_name, err); + } + } + + /// Normalizes the store in-place. + /// + /// A store may be in any order and contain any number of `glean.restarted` events, + /// whose values must be taken into account, along with `execution_counter` values, + /// to come up with the correct events with correct `timestamp` values, + /// on which we then sort. + /// + /// 1. Sort by `execution_counter` and `timestamp`, + /// breaking ties so that `glean.restarted` comes first. + /// 2. Remove all initial and final `glean.restarted` events + /// 3. For each group of events that share a `execution_counter`, + /// i. calculate the initial `glean.restarted` event's `timestamp`s to be + /// clamp(glean.startup.date - ping_info.start_time, biggest_timestamp_of_previous_group + 1) + /// ii. normalize each non-`glean-restarted` event's `timestamp` + /// relative to the `glean.restarted` event's uncalculated `timestamp` + /// 4. Remove `execution_counter` and `glean.startup.date` extra keys + /// 5. Sort by `timestamp` + /// + /// In the event that something goes awry, this will record an invalid_state on + /// glean.restarted if it is due to internal inconsistencies, or invalid_value + /// on client clock weirdness. + /// + /// # Arguments + /// + /// * `glean` - Used to report errors + /// * `store_name` - The name of the store we're normalizing. + /// * `store` - The store we're to normalize. + /// * `glean_start_time` - Used if the glean.startup.date or ping_info.start_time aren't available. Passed as a parameter to ease unit-testing. + fn normalize_store( + &self, + glean: &Glean, + store_name: &str, + store: &mut Vec<StoredEvent>, + glean_start_time: DateTime<FixedOffset>, + ) { + let is_glean_restarted = + |event: &RecordedEvent| event.category == "glean" && event.name == "restarted"; + let glean_restarted_meta = |store_name: &str| CommonMetricData { + name: "restarted".into(), + category: "glean".into(), + send_in_pings: vec![store_name.into()], + lifetime: Lifetime::Ping, + ..Default::default() + }; + // Step 1 + store.sort_by(|a, b| { + a.execution_counter + .cmp(&b.execution_counter) + .then_with(|| a.event.timestamp.cmp(&b.event.timestamp)) + .then_with(|| { + if is_glean_restarted(&a.event) { + Ordering::Less + } else { + Ordering::Greater + } + }) + }); + // Step 2 + // Find the index of the first and final non-`glean.restarted` events. + // Remove events before the first and after the final. + let final_event = match store + .iter() + .rposition(|event| !is_glean_restarted(&event.event)) + { + Some(idx) => idx + 1, + _ => 0, + }; + store.drain(final_event..); + let first_event = store + .iter() + .position(|event| !is_glean_restarted(&event.event)) + .unwrap_or(store.len()); + store.drain(..first_event); + if store.is_empty() { + // There was nothing but `glean.restarted` events. Job's done! + return; + } + // Step 3 + // It is allowed that there might not be any `glean.restarted` event, nor + // `execution_counter` extra values. (This should always be the case for the + // "events" ping, for instance). + // Other inconsistencies are evidence of errors, and so are logged. + let mut cur_ec = 0; + // The offset within a group of events with the same `execution_counter`. + let mut intra_group_offset = store[0].event.timestamp; + // The offset between this group and ping_info.start_date. + let mut inter_group_offset = 0; + let mut highest_ts = 0; + for event in store.iter_mut() { + let execution_counter = event.execution_counter.take().unwrap_or(0); + if is_glean_restarted(&event.event) { + // We've entered the next "event group". + // We need a new epoch based on glean.startup.date - ping_info.start_date + cur_ec = execution_counter; + let glean_startup_date = event + .event + .extra + .as_mut() + .and_then(|extra| { + extra.remove("glean.startup.date").and_then(|date_str| { + DateTime::parse_from_str(&date_str, TimeUnit::Minute.format_pattern()) + .map_err(|_| { + record_error( + glean, + &glean_restarted_meta(store_name).into(), + ErrorType::InvalidState, + format!("Unparseable glean.startup.date '{}'", date_str), + None, + ); + }) + .ok() + }) + }) + .unwrap_or(glean_start_time); + if event + .event + .extra + .as_ref() + .map_or(false, |extra| extra.is_empty()) + { + // Small optimization to save us sending empty dicts. + event.event.extra = None; + } + let ping_start = DatetimeMetric::new( + CommonMetricData { + name: format!("{}#start", store_name), + category: "".into(), + send_in_pings: vec![INTERNAL_STORAGE.into()], + lifetime: Lifetime::User, + ..Default::default() + }, + TimeUnit::Minute, + ); + let ping_start = ping_start + .get_value(glean, INTERNAL_STORAGE) + .unwrap_or(glean_start_time); + let time_from_ping_start_to_glean_restarted = + (glean_startup_date - ping_start).num_milliseconds(); + intra_group_offset = event.event.timestamp; + inter_group_offset = + u64::try_from(time_from_ping_start_to_glean_restarted).unwrap_or(0); + if inter_group_offset < highest_ts { + record_error( + glean, + &glean_restarted_meta(store_name).into(), + ErrorType::InvalidValue, + format!("Time between restart and ping start {} indicates client clock weirdness.", time_from_ping_start_to_glean_restarted), + None, + ); + // The client's clock went backwards enough that this event group's + // glean.restarted looks like it happened _before_ the final event of the previous group. + // Or, it went ahead enough to overflow u64. + // Adjust things so this group starts 1ms after the previous one. + inter_group_offset = highest_ts + 1; + } + } else if cur_ec == 0 { + // bug 1811872 - cur_ec might need initialization. + cur_ec = execution_counter; + } + event.event.timestamp = event.event.timestamp - intra_group_offset + inter_group_offset; + if execution_counter != cur_ec { + record_error( + glean, + &glean_restarted_meta(store_name).into(), + ErrorType::InvalidState, + format!( + "Inconsistent execution counter {} (expected {})", + execution_counter, cur_ec + ), + None, + ); + // Let's fix cur_ec up and hope this isn't a sign something big is broken. + cur_ec = execution_counter; + } + if highest_ts > event.event.timestamp { + // Even though we sorted everything, something in the + // execution_counter or glean.startup.date math went awry. + record_error( + glean, + &glean_restarted_meta(store_name).into(), + ErrorType::InvalidState, + format!( + "Inconsistent previous highest timestamp {} (expected <= {})", + highest_ts, event.event.timestamp + ), + None, + ); + // Let the highest_ts regress to event.timestamp to hope this minimizes weirdness. + } + highest_ts = event.event.timestamp + } + } + + /// Gets a snapshot of the stored event data as a JsonValue. + /// + /// # Arguments + /// + /// * `glean` - the Glean instance. + /// * `store_name` - The name of the desired store. + /// * `clear_store` - Whether to clear the store after snapshotting. + /// + /// # Returns + /// + /// A array of events, JSON encoded, if any. Otherwise `None`. + pub fn snapshot_as_json( + &self, + glean: &Glean, + store_name: &str, + clear_store: bool, + ) -> Option<JsonValue> { + let result = { + let mut db = self.event_stores.write().unwrap(); // safe unwrap, only error case is poisoning + db.get_mut(&store_name.to_string()).and_then(|store| { + if !store.is_empty() { + // Normalization happens in-place, so if we're not clearing, + // operate on a clone. + let mut clone; + let store = if clear_store { + store + } else { + clone = store.clone(); + &mut clone + }; + // We may need to normalize event timestamps across multiple restarts. + self.normalize_store(glean, store_name, store, glean.start_time()); + Some(json!(store)) + } else { + log::warn!("Unexpectly got empty event store for '{}'", store_name); + None + } + }) + }; + + if clear_store { + self.event_stores + .write() + .unwrap() // safe unwrap, only error case is poisoning + .remove(&store_name.to_string()); + + let _lock = self.file_lock.write().unwrap(); // safe unwrap, only error case is poisoning + if let Err(err) = fs::remove_file(self.path.join(store_name)) { + match err.kind() { + std::io::ErrorKind::NotFound => { + // silently drop this error, the file was already non-existing + } + _ => log::warn!("Error removing events queue file '{}': {}", store_name, err), + } + } + } + + result + } + + /// Clears all stored events, both in memory and on-disk. + pub fn clear_all(&self) -> Result<()> { + // safe unwrap, only error case is poisoning + self.event_stores.write().unwrap().clear(); + + // safe unwrap, only error case is poisoning + let _lock = self.file_lock.write().unwrap(); + std::fs::remove_dir_all(&self.path)?; + create_dir_all(&self.path)?; + + Ok(()) + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the vector of currently stored events for the given event metric in + /// the given store. + /// + /// This doesn't clear the stored value. + pub fn test_get_value<'a>( + &'a self, + meta: &'a CommonMetricDataInternal, + store_name: &str, + ) -> Option<Vec<RecordedEvent>> { + record_coverage(&meta.base_identifier()); + + let value: Vec<RecordedEvent> = self + .event_stores + .read() + .unwrap() // safe unwrap, only error case is poisoning + .get(&store_name.to_string()) + .into_iter() + .flatten() + .map(|stored_event| stored_event.event.clone()) + .filter(|event| event.name == meta.inner.name && event.category == meta.inner.category) + .collect(); + if !value.is_empty() { + Some(value) + } else { + None + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::tests::new_glean; + use crate::{test_get_num_recorded_errors, CommonMetricData}; + use chrono::{TimeZone, Timelike}; + + #[test] + fn handle_truncated_events_on_disk() { + let (glean, t) = new_glean(None); + + { + let db = EventDatabase::new(t.path()).unwrap(); + db.write_event_to_disk("events", "{\"timestamp\": 500"); + db.write_event_to_disk("events", "{\"timestamp\""); + db.write_event_to_disk( + "events", + "{\"timestamp\": 501, \"category\": \"ui\", \"name\": \"click\"}", + ); + } + + { + let db = EventDatabase::new(t.path()).unwrap(); + db.load_events_from_disk(&glean, false).unwrap(); + let events = &db.event_stores.read().unwrap()["events"]; + assert_eq!(1, events.len()); + } + } + + #[test] + fn stable_serialization() { + let event_empty = RecordedEvent { + timestamp: 2, + category: "cat".to_string(), + name: "name".to_string(), + extra: None, + }; + + let mut data = HashMap::new(); + data.insert("a key".to_string(), "a value".to_string()); + let event_data = RecordedEvent { + timestamp: 2, + category: "cat".to_string(), + name: "name".to_string(), + extra: Some(data), + }; + + let event_empty_json = ::serde_json::to_string_pretty(&event_empty).unwrap(); + let event_data_json = ::serde_json::to_string_pretty(&event_data).unwrap(); + + assert_eq!( + StoredEvent { + event: event_empty, + execution_counter: None + }, + serde_json::from_str(&event_empty_json).unwrap() + ); + assert_eq!( + StoredEvent { + event: event_data, + execution_counter: None + }, + serde_json::from_str(&event_data_json).unwrap() + ); + } + + #[test] + fn deserialize_existing_data() { + let event_empty_json = r#" +{ + "timestamp": 2, + "category": "cat", + "name": "name" +} + "#; + + let event_data_json = r#" +{ + "timestamp": 2, + "category": "cat", + "name": "name", + "extra": { + "a key": "a value" + } +} + "#; + + let event_empty = RecordedEvent { + timestamp: 2, + category: "cat".to_string(), + name: "name".to_string(), + extra: None, + }; + + let mut data = HashMap::new(); + data.insert("a key".to_string(), "a value".to_string()); + let event_data = RecordedEvent { + timestamp: 2, + category: "cat".to_string(), + name: "name".to_string(), + extra: Some(data), + }; + + assert_eq!( + StoredEvent { + event: event_empty, + execution_counter: None + }, + serde_json::from_str(event_empty_json).unwrap() + ); + assert_eq!( + StoredEvent { + event: event_data, + execution_counter: None + }, + serde_json::from_str(event_data_json).unwrap() + ); + } + + #[test] + fn doesnt_record_when_upload_is_disabled() { + let (mut glean, dir) = new_glean(None); + let db = EventDatabase::new(dir.path()).unwrap(); + + let test_storage = "test-storage"; + let test_category = "category"; + let test_name = "name"; + let test_timestamp = 2; + let test_meta = CommonMetricDataInternal::new(test_category, test_name, test_storage); + let event_data = RecordedEvent { + timestamp: test_timestamp, + category: test_category.to_string(), + name: test_name.to_string(), + extra: None, + }; + + // Upload is not yet disabled, + // so let's check that everything is getting recorded as expected. + db.record(&glean, &test_meta, 2, None); + { + let event_stores = db.event_stores.read().unwrap(); + assert_eq!( + &StoredEvent { + event: event_data, + execution_counter: None + }, + &event_stores.get(test_storage).unwrap()[0] + ); + assert_eq!(event_stores.get(test_storage).unwrap().len(), 1); + } + + glean.set_upload_enabled(false); + + // Now that upload is disabled, let's check nothing is recorded. + db.record(&glean, &test_meta, 2, None); + { + let event_stores = db.event_stores.read().unwrap(); + assert_eq!(event_stores.get(test_storage).unwrap().len(), 1); + } + } + + #[test] + fn normalize_store_of_glean_restarted() { + // Make sure stores empty of anything but glean.restarted events normalize without issue. + let (glean, _dir) = new_glean(None); + + let store_name = "store-name"; + let glean_restarted = StoredEvent { + event: RecordedEvent { + timestamp: 2, + category: "glean".into(), + name: "restarted".into(), + extra: None, + }, + execution_counter: None, + }; + let mut store = vec![glean_restarted.clone()]; + let glean_start_time = glean.start_time(); + + glean + .event_storage() + .normalize_store(&glean, store_name, &mut store, glean_start_time); + assert!(store.is_empty()); + + let mut store = vec![glean_restarted.clone(), glean_restarted.clone()]; + glean + .event_storage() + .normalize_store(&glean, store_name, &mut store, glean_start_time); + assert!(store.is_empty()); + + let mut store = vec![ + glean_restarted.clone(), + glean_restarted.clone(), + glean_restarted, + ]; + glean + .event_storage() + .normalize_store(&glean, store_name, &mut store, glean_start_time); + assert!(store.is_empty()); + } + + #[test] + fn normalize_store_of_glean_restarted_on_both_ends() { + // Make sure stores with non-glean.restarted events don't get drained too far. + let (glean, _dir) = new_glean(None); + + let store_name = "store-name"; + let glean_restarted = StoredEvent { + event: RecordedEvent { + timestamp: 2, + category: "glean".into(), + name: "restarted".into(), + extra: None, + }, + execution_counter: None, + }; + let not_glean_restarted = StoredEvent { + event: RecordedEvent { + timestamp: 20, + category: "category".into(), + name: "name".into(), + extra: None, + }, + execution_counter: None, + }; + let mut store = vec![ + glean_restarted.clone(), + not_glean_restarted.clone(), + glean_restarted, + ]; + let glean_start_time = glean.start_time(); + + glean + .event_storage() + .normalize_store(&glean, store_name, &mut store, glean_start_time); + assert_eq!(1, store.len()); + assert_eq!( + StoredEvent { + event: RecordedEvent { + timestamp: 0, + ..not_glean_restarted.event + }, + execution_counter: None + }, + store[0] + ); + } + + #[test] + fn normalize_store_single_run_timestamp_math() { + // With a single run of events (no non-initial or non-terminal `glean.restarted`), + // ensure the timestamp math works. + // (( works = Initial event gets to be 0, subsequent events get normalized to that 0 )) + let (glean, _dir) = new_glean(None); + + let store_name = "store-name"; + let glean_restarted = StoredEvent { + event: RecordedEvent { + timestamp: 2, + category: "glean".into(), + name: "restarted".into(), + extra: None, + }, + execution_counter: None, + }; + let timestamps = vec![20, 40, 200]; + let not_glean_restarted = StoredEvent { + event: RecordedEvent { + timestamp: timestamps[0], + category: "category".into(), + name: "name".into(), + extra: None, + }, + execution_counter: None, + }; + let mut store = vec![ + glean_restarted.clone(), + not_glean_restarted.clone(), + StoredEvent { + event: RecordedEvent { + timestamp: timestamps[1], + ..not_glean_restarted.event.clone() + }, + execution_counter: None, + }, + StoredEvent { + event: RecordedEvent { + timestamp: timestamps[2], + ..not_glean_restarted.event.clone() + }, + execution_counter: None, + }, + glean_restarted, + ]; + + glean + .event_storage() + .normalize_store(&glean, store_name, &mut store, glean.start_time()); + assert_eq!(3, store.len()); + for (timestamp, event) in timestamps.iter().zip(store.iter()) { + assert_eq!( + &StoredEvent { + event: RecordedEvent { + timestamp: timestamp - timestamps[0], + ..not_glean_restarted.clone().event + }, + execution_counter: None + }, + event + ); + } + } + + #[test] + fn normalize_store_multi_run_timestamp_math() { + // With multiple runs of events (separated by `glean.restarted`), + // ensure the timestamp math works. + // (( works = Initial event gets to be 0, subsequent events get normalized to that 0. + // Subsequent runs figure it out via glean.restarted.date and ping_info.start_time )) + let (glean, _dir) = new_glean(None); + + let store_name = "store-name"; + let glean_restarted = StoredEvent { + event: RecordedEvent { + category: "glean".into(), + name: "restarted".into(), + ..Default::default() + }, + execution_counter: None, + }; + let not_glean_restarted = StoredEvent { + event: RecordedEvent { + category: "category".into(), + name: "name".into(), + ..Default::default() + }, + execution_counter: None, + }; + + // This scenario represents a run of three events followed by an hour between runs, + // followed by one final event. + let timestamps = vec![20, 40, 200, 12]; + let ecs = vec![0, 1]; + let some_hour = 16; + let startup_date = FixedOffset::east(0) + .ymd(2022, 11, 24) + .and_hms(some_hour, 29, 0); // TimeUnit::Minute -- don't put seconds + let glean_start_time = startup_date.with_hour(some_hour - 1); + let restarted_ts = 2; + let mut store = vec![ + StoredEvent { + event: RecordedEvent { + timestamp: timestamps[0], + ..not_glean_restarted.event.clone() + }, + execution_counter: Some(ecs[0]), + }, + StoredEvent { + event: RecordedEvent { + timestamp: timestamps[1], + ..not_glean_restarted.event.clone() + }, + execution_counter: Some(ecs[0]), + }, + StoredEvent { + event: RecordedEvent { + timestamp: timestamps[2], + ..not_glean_restarted.event.clone() + }, + execution_counter: Some(ecs[0]), + }, + StoredEvent { + event: RecordedEvent { + extra: Some( + [( + "glean.startup.date".into(), + get_iso_time_string(startup_date, TimeUnit::Minute), + )] + .into(), + ), + timestamp: restarted_ts, + ..glean_restarted.event.clone() + }, + execution_counter: Some(ecs[1]), + }, + StoredEvent { + event: RecordedEvent { + timestamp: timestamps[3], + ..not_glean_restarted.event.clone() + }, + execution_counter: Some(ecs[1]), + }, + ]; + + glean.event_storage().normalize_store( + &glean, + store_name, + &mut store, + glean_start_time.unwrap(), + ); + assert_eq!(5, store.len()); // 4 "real" events plus 1 `glean.restarted` + + // Let's check the first three. + for (timestamp, event) in timestamps[..timestamps.len() - 1].iter().zip(store.clone()) { + assert_eq!( + StoredEvent { + event: RecordedEvent { + timestamp: timestamp - timestamps[0], + ..not_glean_restarted.event.clone() + }, + execution_counter: None, + }, + event + ); + } + // The fourth should be a glean.restarted and have a realtime-based timestamp. + let hour_in_millis = 3600000; + assert_eq!( + store[3], + StoredEvent { + event: RecordedEvent { + timestamp: hour_in_millis, + ..glean_restarted.event + }, + execution_counter: None, + } + ); + // The fifth should have a timestamp based on the new origin. + assert_eq!( + store[4], + StoredEvent { + event: RecordedEvent { + timestamp: hour_in_millis + timestamps[3] - restarted_ts, + ..not_glean_restarted.event + }, + execution_counter: None, + } + ); + } + + #[test] + fn normalize_store_multi_run_client_clocks() { + // With multiple runs of events (separated by `glean.restarted`), + // ensure the timestamp math works. Even when the client clock goes backwards. + let (glean, _dir) = new_glean(None); + + let store_name = "store-name"; + let glean_restarted = StoredEvent { + event: RecordedEvent { + category: "glean".into(), + name: "restarted".into(), + ..Default::default() + }, + execution_counter: None, + }; + let not_glean_restarted = StoredEvent { + event: RecordedEvent { + category: "category".into(), + name: "name".into(), + ..Default::default() + }, + execution_counter: None, + }; + + // This scenario represents a run of two events followed by negative one hours between runs, + // followed by two more events. + let timestamps = vec![20, 40, 12, 200]; + let ecs = vec![0, 1]; + let some_hour = 10; + let startup_date = FixedOffset::east(0) + .ymd(2022, 11, 25) + .and_hms(some_hour, 37, 0); // TimeUnit::Minute -- don't put seconds + let glean_start_time = startup_date.with_hour(some_hour + 1); + let restarted_ts = 2; + let mut store = vec![ + StoredEvent { + event: RecordedEvent { + timestamp: timestamps[0], + ..not_glean_restarted.event.clone() + }, + execution_counter: Some(ecs[0]), + }, + StoredEvent { + event: RecordedEvent { + timestamp: timestamps[1], + ..not_glean_restarted.event.clone() + }, + execution_counter: Some(ecs[0]), + }, + StoredEvent { + event: RecordedEvent { + extra: Some( + [( + "glean.startup.date".into(), + get_iso_time_string(startup_date, TimeUnit::Minute), + )] + .into(), + ), + timestamp: restarted_ts, + ..glean_restarted.event.clone() + }, + execution_counter: Some(ecs[1]), + }, + StoredEvent { + event: RecordedEvent { + timestamp: timestamps[2], + ..not_glean_restarted.event.clone() + }, + execution_counter: Some(ecs[1]), + }, + StoredEvent { + event: RecordedEvent { + timestamp: timestamps[3], + ..not_glean_restarted.event.clone() + }, + execution_counter: Some(ecs[1]), + }, + ]; + + glean.event_storage().normalize_store( + &glean, + store_name, + &mut store, + glean_start_time.unwrap(), + ); + assert_eq!(5, store.len()); // 4 "real" events plus 1 `glean.restarted` + + // Let's check the first two. + for (timestamp, event) in timestamps[..timestamps.len() - 2].iter().zip(store.clone()) { + assert_eq!( + StoredEvent { + event: RecordedEvent { + timestamp: timestamp - timestamps[0], + ..not_glean_restarted.event.clone() + }, + execution_counter: None, + }, + event + ); + } + // The third should be a glean.restarted. Its timestamp should be + // one larger than the largest timestamp seen so far (because that's + // how we ensure monotonic timestamps when client clocks go backwards). + assert_eq!( + store[2], + StoredEvent { + event: RecordedEvent { + timestamp: store[1].event.timestamp + 1, + ..glean_restarted.event + }, + execution_counter: None, + } + ); + // The fifth should have a timestamp based on the new origin. + assert_eq!( + store[3], + StoredEvent { + event: RecordedEvent { + timestamp: timestamps[2] - restarted_ts + store[2].event.timestamp, + ..not_glean_restarted.event + }, + execution_counter: None, + } + ); + // And we should have an InvalidValue on glean.restarted to show for it. + assert_eq!( + Ok(1), + test_get_num_recorded_errors( + &glean, + &CommonMetricData { + name: "restarted".into(), + category: "glean".into(), + send_in_pings: vec![store_name.into()], + lifetime: Lifetime::Ping, + ..Default::default() + } + .into(), + ErrorType::InvalidValue + ) + ); + } + + #[test] + fn normalize_store_non_zero_ec() { + // After the first run, execution_counter will likely be non-zero. + // Ensure normalizing a store that begins with non-zero ec works. + let (glean, _dir) = new_glean(None); + + let store_name = "store-name"; + let glean_restarted = StoredEvent { + event: RecordedEvent { + timestamp: 2, + category: "glean".into(), + name: "restarted".into(), + extra: None, + }, + execution_counter: Some(2), + }; + let not_glean_restarted = StoredEvent { + event: RecordedEvent { + timestamp: 20, + category: "category".into(), + name: "name".into(), + extra: None, + }, + execution_counter: Some(2), + }; + let glean_restarted_2 = StoredEvent { + event: RecordedEvent { + timestamp: 2, + category: "glean".into(), + name: "restarted".into(), + extra: None, + }, + execution_counter: Some(3), + }; + let mut store = vec![ + glean_restarted, + not_glean_restarted.clone(), + glean_restarted_2, + ]; + let glean_start_time = glean.start_time(); + + glean + .event_storage() + .normalize_store(&glean, store_name, &mut store, glean_start_time); + + assert_eq!(1, store.len()); + assert_eq!( + StoredEvent { + event: RecordedEvent { + timestamp: 0, + ..not_glean_restarted.event + }, + execution_counter: None + }, + store[0] + ); + // And we should have no InvalidState errors on glean.restarted. + assert!(test_get_num_recorded_errors( + &glean, + &CommonMetricData { + name: "restarted".into(), + category: "glean".into(), + send_in_pings: vec![store_name.into()], + lifetime: Lifetime::Ping, + ..Default::default() + } + .into(), + ErrorType::InvalidState + ) + .is_err()); + // (and, just because we're here, double-check there are no InvalidValue either). + assert!(test_get_num_recorded_errors( + &glean, + &CommonMetricData { + name: "restarted".into(), + category: "glean".into(), + send_in_pings: vec![store_name.into()], + lifetime: Lifetime::Ping, + ..Default::default() + } + .into(), + ErrorType::InvalidValue + ) + .is_err()); + } +} diff --git a/third_party/rust/glean-core/src/fd_logger.rs b/third_party/rust/glean-core/src/fd_logger.rs new file mode 100644 index 0000000000..46b219e040 --- /dev/null +++ b/third_party/rust/glean-core/src/fd_logger.rs @@ -0,0 +1,85 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::fs::File; +use std::io::Write; +use std::sync::RwLock; + +#[cfg(target_os = "windows")] +use std::os::windows::io::FromRawHandle; + +#[cfg(target_os = "windows")] +use std::ffi::c_void; + +#[cfg(not(target_os = "windows"))] +use std::os::unix::io::FromRawFd; + +use serde::Serialize; + +/// An implementation of log::Log that writes log messages in JSON format to a +/// file descriptor/handle. The logging level is ignored in this implementation: +/// it is up to the receiver of these log messages (on the language binding +/// side) to filter the log messages based on their level. +/// The JSON payload of each message in an object with the following keys: +/// - `level` (string): One of the logging levels defined here: +/// https://docs.rs/log/0.4.11/log/enum.Level.html +/// - `message` (string): The logging message. +pub struct FdLogger { + pub file: RwLock<File>, +} + +#[derive(Serialize)] +struct FdLoggingRecord { + level: String, + message: String, + target: String, +} + +#[cfg(target_os = "windows")] +unsafe fn get_file_from_fd(fd: u64) -> File { + File::from_raw_handle(fd as *mut c_void) +} + +#[cfg(not(target_os = "windows"))] +unsafe fn get_file_from_fd(fd: u64) -> File { + File::from_raw_fd(fd as i32) +} + +impl FdLogger { + pub unsafe fn new(fd: u64) -> Self { + FdLogger { + file: RwLock::new(get_file_from_fd(fd)), + } + } +} + +impl log::Log for FdLogger { + fn enabled(&self, _metadata: &log::Metadata) -> bool { + // This logger always emits logging messages of any level, and the + // language binding consuming these messages is responsible for + // filtering and routing them. + true + } + + fn log(&self, record: &log::Record) { + // Normally, classes implementing the Log trait would filter based on + // the log level here. But in this case, we want to emit all log + // messages and let the logging system in the language binding filter + // and route them. + let payload = FdLoggingRecord { + level: record.level().to_string(), + message: record.args().to_string(), + target: record.target().to_string(), + }; + let _ = writeln!( + self.file.write().unwrap(), + "{}", + serde_json::to_string(&payload).unwrap() + ); + } + + fn flush(&self) { + let _ = self.file.write().unwrap().flush(); + } +} diff --git a/third_party/rust/glean-core/src/glean.udl b/third_party/rust/glean-core/src/glean.udl new file mode 100644 index 0000000000..67466b4640 --- /dev/null +++ b/third_party/rust/glean-core/src/glean.udl @@ -0,0 +1,601 @@ +namespace glean { + void glean_enable_logging(); + + // Initialize the logging system to send JSON messages to a file descriptor + // (Unix) or file handle (Windows). + // + // No-op on Android and iOS. Use `glean_enable_logging` instead. + void glean_enable_logging_to_fd(u64 fd); + + // Initializes Glean. + // + // This will fully initialize Glean in a separate thread. + // It will return immediately. + void glean_initialize(InternalConfiguration cfg, ClientInfoMetrics client_info, OnGleanEvents callbacks); + + // Creates and initializes a new Glean object for use in a subprocess. + // + // Importantly, this will not send any pings at startup, since that + // sort of management should only happen in the main process. + // + // Must only be used for an uploader process. + // The general API or any metrics API **will not work**. + boolean glean_initialize_for_subprocess(InternalConfiguration cfg); + + void glean_set_upload_enabled(boolean enabled); + + // Experiment reporting API + void glean_set_experiment_active(string experiment_id, string branch, record<DOMString, string> extra); + void glean_set_experiment_inactive(string experiment_id); + RecordedExperiment? glean_test_get_experiment_data(string experiment_id); + + // Server Knobs API + void glean_set_metrics_enabled_config(string json); + + boolean glean_set_debug_view_tag(string tag); + boolean glean_set_source_tags(sequence<string> tags); + void glean_set_log_pings(boolean value); + + void glean_handle_client_active(); + void glean_handle_client_inactive(); + + void glean_submit_ping_by_name(string ping_name, optional string? reason = null); + boolean glean_submit_ping_by_name_sync(string ping_name, optional string? reason = null); + + void glean_set_test_mode(boolean enabled); + void glean_test_destroy_glean(boolean clear_stores, optional string? data_path = null); + + void glean_set_dirty_flag(boolean flag); + + PingUploadTask glean_get_upload_task(); + UploadTaskAction glean_process_ping_upload_response(string uuid, UploadResult result); +}; + +// A `Cow<'static, str>`, but really it's always the owned part. +[Custom] +typedef string CowString; + +// The Glean configuration. +// +// This exposes all configurable parameters to the SDK side. +// They should not be exposed directly to users of the SDK (except `upload_enabled`). +dictionary InternalConfiguration { + string data_path; + string application_id; + string language_binding_name; + boolean upload_enabled; + u32? max_events; + boolean delay_ping_lifetime_io; + string app_build; + boolean use_core_mps; + boolean trim_data_to_registered_pings; + LevelFilter? log_level; +}; + +// An enum representing the different logging levels for the `log` crate. +enum LevelFilter { + "Off", + "Error", + "Warn", + "Info", + "Debug", + "Trace", +}; + +// Values for the `client_info` metrics. +// The language SDK should collect them on `initialize` once. +// They will be re-used, e.g. when upload is toggled from off to on, to re-set them. +// +// See https://mozilla.github.io/glean/book/user/pings/index.html#the-client_info-section for details. +dictionary ClientInfoMetrics { + string app_build; + string app_display_version; + Datetime app_build_date; + string architecture; + string os_version; + + string? channel = null; + string? locale = null; + string? device_manufacturer = null; + string? device_model = null; + string? android_sdk_version = null; + i64? windows_build_number = null; +}; + +[Error] +enum CallbackError { + "UnexpectedError", +}; + +// A callback object, that is stored within the core logic for the entire lifetime of the application. +// +// This is used to trigger certain actions that need to happen on the foreign-language side. +callback interface OnGleanEvents { + // Initialization finished. + // + // The language SDK can do additional things from within the same initializer thread, + // e.g. starting to observe application events for foreground/background behavior. + // The observer then needs to call the respective client activity API. + void initialize_finished(); + + // Trigger the uploader whenever a ping was submitted. + // + // This should not block. + // The uploader needs to asynchronously poll Glean for new pings to upload. + [Throws=CallbackError] + void trigger_upload(); + + // Start the Metrics Ping Scheduler. + // + // *Note*: The implementor + // * DOES NOT need to schedule the uploader. + // * MUST NOT use a dispatched call in the immediate invocation. + // + // Returns whether it submitted a ping immediately. + boolean start_metrics_ping_scheduler(); + + // Called when upload is disabled and uploads should be stopped + [Throws=CallbackError] + void cancel_uploads(); + + // Called on shutdown, before Glean is fully shutdown. + // + // * This MUST NOT put any new tasks on the dispatcher. + // * New tasks will be ignored. + // * This SHOULD NOT block arbitrarily long. + // * Shutdown waits for a maximum of 30 seconds. + [Throws=CallbackError] + void shutdown(); +}; + +// Deserialized experiment data. +dictionary RecordedExperiment { + // The experiment's branch. + string branch; + // Any extra data associated with this experiment. + record<DOMString, string>? extra; +}; + +// Represents a request to upload a ping. +dictionary PingRequest { + // The Job ID to identify this request, + // this is the same as the ping UUID. + string document_id; + // The path for the server to upload the ping to. + string path; + // The body of the request, as a byte array. + // If gzip encoded, then the `headers` list will + // contain a `Content-Encoding` header with the value `gzip`. + sequence<u8> body; + // A map with all the headers to be sent with the request. + record<DOMString, string> headers; +}; + +// An enum representing the possible upload tasks to be performed by an uploader. +[Enum] +interface PingUploadTask { + // An upload task. + // + // * request: the ping request for upload + Upload(PingRequest request); + // A flag signaling that the pending pings directories are not done being processed, + // thus the requester should wait and come back later. + // + // * time: The time in milliseconds the requester should wait before requesting a new task. + Wait(u64 time); + + // A flag signaling that requester doesn't need to request + // any more upload tasks at this moment. + // + // * unused: _ignored_. + Done(i8 unused); +}; + +// The result of an attempted ping upload. +[Enum] +interface UploadResult { + // A recoverable failure. + // + // During upload something went wrong,/ e.g. the network connection failed. + // The upload should be retried at a later time. + // + // * unused: _ignored_. + RecoverableFailure(i8 unused); + + // An unrecoverable upload failure. + // + // A possible cause might be a malformed URL. + // + // * unused: _ignored_. + UnrecoverableFailure(i8 unused); + + // A HTTP response code. + // + // This can still indicate an error, depending on the status code. + // + // * code: The HTTP status code + HttpStatus(i32 code); + + // Signal that this uploader is done with work + // and won't accept new work. + Done(i8 unused); +}; + +// Communicating back whether the uploader loop should continue. +enum UploadTaskAction { + // Instruct the caller to continue with work. + "Next", + + // Instruct the caller to end work. + "End", +}; + +// The supported metrics' lifetimes. +// +// A metric's lifetime determines when its stored data gets reset. +enum Lifetime { + // The metric is reset with each sent ping + "Ping", + // The metric is reset on application restart + "Application", + // The metric is reset with each user profile + "User", +}; + +// The possible error types for metric recording. +enum ErrorType { + // For when the value to be recorded does not match the metric-specific restrictions + "InvalidValue", + // For when the label of a labeled metric does not match the restrictions + "InvalidLabel", + // For when the metric caught an invalid state while recording + "InvalidState", + // For when the value to be recorded overflows the metric-specific upper range + "InvalidOverflow", +}; + +interface PingType { + constructor(string name, boolean include_client_id, boolean send_if_empty, sequence<string> reason_codes); + void submit(optional string? reason = null); +}; + +// The common set of data shared across all different metric types. +dictionary CommonMetricData { + // The metric's category. + string category; + // The metric's name. + string name; + + // List of ping names to include this metric in. + sequence<string> send_in_pings; + // The metric's lifetime. + Lifetime lifetime; + + // Whether or not the metric is disabled. + // + // Disabled metrics are never recorded. + boolean disabled; + + // Dynamic label. + // + // When a labeled metric factory creates the specific metric to be recorded to, + // dynamic labels are stored in the specific label so that + // we can validate them when the Glean singleton is available. + string? dynamic_label = null; +}; + +interface CounterMetric { + constructor(CommonMetricData meta); + + void add(optional i32 amount = 1); + + i32? test_get_value(optional string? ping_name = null); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +// Different resolutions supported by the time related metric types +// (e.g. DatetimeMetric). +enum TimeUnit { + // Represents nanosecond precision. + "Nanosecond", + // Represents microsecond precision. + "Microsecond", + // Represents millisecond precision. + "Millisecond", + // Represents second precision. + "Second", + // Represents minute precision. + "Minute", + // Represents hour precision. + "Hour", + // Represents day precision. + "Day", +}; + +interface TimespanMetric { + constructor(CommonMetricData meta, TimeUnit time_unit); + + void start(); + + void stop(); + + void cancel(); + + void set_raw_nanos(i64 elapsed); + + i64? test_get_value(optional string? ping_name = null); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +interface BooleanMetric { + constructor(CommonMetricData meta); + + void set(boolean value); + + boolean? test_get_value(optional string? ping_name = null); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +interface StringMetric { + constructor(CommonMetricData meta); + + void set(string value); + + string? test_get_value(optional string? ping_name = null); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +interface LabeledCounter { + constructor(CommonMetricData meta, sequence<CowString>? labels); + + CounterMetric get(string label); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +interface LabeledBoolean { + constructor(CommonMetricData meta, sequence<CowString>? labels); + + BooleanMetric get(string label); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +interface LabeledString { + constructor(CommonMetricData meta, sequence<CowString>? labels); + + StringMetric get(string label); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +interface StringListMetric { + constructor(CommonMetricData meta); + + void add(string value); + + void set(sequence<string> value); + + sequence<string>? test_get_value(optional string? ping_name = null); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +interface UrlMetric { + constructor(CommonMetricData meta); + + void set(string value); + + string? test_get_value(optional string? ping_name = null); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +interface UuidMetric { + constructor(CommonMetricData meta); + + void set(string value); + + string generate_and_set(); + + string? test_get_value(optional string? ping_name = null); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +interface QuantityMetric { + constructor(CommonMetricData meta); + + void set(i64 value); + + i64? test_get_value(optional string? ping_name = null); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +// A snapshot of all buckets and the accumulated sum of a distribution. +dictionary DistributionData { + // A map containig the bucket index mapped to the accumulated count. + record<i64, i64> values; + + // The accumulated sum of all the samples in the distribution. + i64 sum; + + // The total number of entries in the distribution. + i64 count; +}; + +// Identifier for a running timer. +// +// Its internals are considered private, +// but due to UniFFI's behavior we expose it as a dictionary for now. +dictionary TimerId { + u64 id; +}; + +interface TimingDistributionMetric { + constructor(CommonMetricData meta, TimeUnit time_unit); + + TimerId start(); + + void stop_and_accumulate(TimerId timer_id); + + void cancel(TimerId timer_id); + + void accumulate_samples(sequence<i64> samples); + + DistributionData? test_get_value(optional string? ping_name = null); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +// Different resolutions supported by the memory related metric types +// (e.g. MemoryDistributionMetric). +enum MemoryUnit { + // 1 byte + "Byte", + // 2^10 bytes + "Kilobyte", + // 2^20 bytes + "Megabyte", + // 2^30 bytes + "Gigabyte", +}; + +interface MemoryDistributionMetric { + constructor(CommonMetricData meta, MemoryUnit memory_unit); + + void accumulate(i64 sample); + + void accumulate_samples(sequence<i64> samples); + + DistributionData? test_get_value(optional string? ping_name = null); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +// Different kinds of histograms. +enum HistogramType { + // A histogram with linear distributed buckets. + "Linear", + // A histogram with exponential distributed buckets. + "Exponential", +}; + +interface CustomDistributionMetric { + constructor(CommonMetricData meta, i64 range_min, i64 range_max, i64 bucket_count, HistogramType histogram_type); + + void accumulate_samples(sequence<i64> samples); + + DistributionData? test_get_value(optional string? ping_name = null); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +// Representation of a date, time and timezone. +dictionary Datetime { + i32 year; + u32 month; + u32 day; + u32 hour; + u32 minute; + u32 second; + u32 nanosecond; + i32 offset_seconds; +}; + +interface DatetimeMetric { + constructor(CommonMetricData meta, TimeUnit time_unit); + + void set(optional Datetime? value = null); + + Datetime? test_get_value(optional string? ping_name = null); + + string? test_get_value_as_string(optional string? ping_name = null); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +// Represents the recorded data for a single event. +dictionary RecordedEvent { + // The timestamp of when the event was recorded. + // + // This allows to order events from a single process run. + u64 timestamp; + + // The event's category. + // + // This is defined by users in the metrics file. + string category; + + // The event's name. + // + // This is defined by users in the metrics file. + string name; + + // A map of all extra data values. + // + // The set of allowed extra keys is defined by users in the metrics file. + record<DOMString, string>? extra; +}; + +interface EventMetric { + constructor(CommonMetricData meta, sequence<string> allowed_extra_keys); + + void record(record<DOMString, string> extra); + + sequence<RecordedEvent>? test_get_value(optional string? ping_name = null); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +dictionary Rate { + i32 numerator; + i32 denominator; +}; + +interface RateMetric { + constructor(CommonMetricData meta); + + void add_to_numerator(i32 amount); + + void add_to_denominator(i32 amount); + + Rate? test_get_value(optional string? ping_name = null); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +interface DenominatorMetric { + constructor(CommonMetricData meta, sequence<CommonMetricData> numerators); + + void add(i32 amount); + + i32? test_get_value(optional string? ping_name = null); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +interface NumeratorMetric { + constructor(CommonMetricData meta); + + void add_to_numerator(i32 amount); + + Rate? test_get_value(optional string? ping_name = null); + + i32 test_get_num_recorded_errors(ErrorType error); +}; + +interface TextMetric { + constructor(CommonMetricData meta); + + void set(string value); + + string? test_get_value(optional string? ping_name = null); + + i32 test_get_num_recorded_errors(ErrorType error); +}; diff --git a/third_party/rust/glean-core/src/glean_metrics.rs b/third_party/rust/glean-core/src/glean_metrics.rs new file mode 100644 index 0000000000..a252a3b78b --- /dev/null +++ b/third_party/rust/glean-core/src/glean_metrics.rs @@ -0,0 +1,26 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// ** IMPORTANT ** +// +// This file is required in order to include the ones generated by +// 'glean-parser' from the SDK registry files. + +pub mod error { + use crate::metrics::CounterMetric; + use crate::{CommonMetricData, Lifetime}; + use once_cell::sync::Lazy; + + #[allow(non_upper_case_globals)] + pub static preinit_tasks_overflow: Lazy<CounterMetric> = Lazy::new(|| { + CounterMetric::new(CommonMetricData { + category: "glean.error".into(), + name: "preinit_tasks_overflow".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + ..Default::default() + }) + }); +} diff --git a/third_party/rust/glean-core/src/histogram/exponential.rs b/third_party/rust/glean-core/src/histogram/exponential.rs new file mode 100644 index 0000000000..5481c4feb9 --- /dev/null +++ b/third_party/rust/glean-core/src/histogram/exponential.rs @@ -0,0 +1,206 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::HashMap; + +use once_cell::sync::OnceCell; +use serde::{Deserialize, Serialize}; + +use super::{Bucketing, Histogram}; + +use crate::util::floating_point_context::FloatingPointContext; + +/// Create the possible ranges in an exponential distribution from `min` to `max` with +/// `bucket_count` buckets. +/// +/// This algorithm calculates the bucket sizes using a natural log approach to get `bucket_count` number of buckets, +/// exponentially spaced between `min` and `max` +/// +/// Bucket limits are the minimal bucket value. +/// That means values in a bucket `i` are `bucket[i] <= value < bucket[i+1]`. +/// It will always contain an underflow bucket (`< 1`). +fn exponential_range(min: u64, max: u64, bucket_count: usize) -> Vec<u64> { + // Set the FPU control flag to the required state within this function + let _fpc = FloatingPointContext::new(); + + let log_max = (max as f64).ln(); + + let mut ranges = Vec::with_capacity(bucket_count); + let mut current = min; + if current == 0 { + current = 1; + } + + // undeflow bucket + ranges.push(0); + ranges.push(current); + + for i in 2..bucket_count { + let log_current = (current as f64).ln(); + let log_ratio = (log_max - log_current) / (bucket_count - i) as f64; + let log_next = log_current + log_ratio; + let next_value = log_next.exp().round() as u64; + current = if next_value > current { + next_value + } else { + current + 1 + }; + ranges.push(current); + } + + ranges +} + +/// An exponential bucketing algorithm. +/// +/// Buckets are pre-computed at instantiation with an exponential distribution from `min` to `max` +/// and `bucket_count` buckets. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct PrecomputedExponential { + // Don't serialize the (potentially large) array of ranges, instead compute them on first + // access. + #[serde(skip)] + bucket_ranges: OnceCell<Vec<u64>>, + min: u64, + max: u64, + bucket_count: usize, +} + +impl Bucketing for PrecomputedExponential { + /// Get the bucket for the sample. + /// + /// This uses a binary search to locate the index `i` of the bucket such that: + /// bucket[i] <= sample < bucket[i+1] + fn sample_to_bucket_minimum(&self, sample: u64) -> u64 { + let limit = match self.ranges().binary_search(&sample) { + // Found an exact match to fit it in + Ok(i) => i, + // Sorted it fits after the bucket's limit, therefore it fits into the previous bucket + Err(i) => i - 1, + }; + + self.ranges()[limit] + } + + fn ranges(&self) -> &[u64] { + // Create the exponential range on first access. + self.bucket_ranges + .get_or_init(|| exponential_range(self.min, self.max, self.bucket_count)) + } +} + +impl Histogram<PrecomputedExponential> { + /// Creates a histogram with `count` exponential buckets in the range `min` to `max`. + pub fn exponential( + min: u64, + max: u64, + bucket_count: usize, + ) -> Histogram<PrecomputedExponential> { + Histogram { + values: HashMap::new(), + count: 0, + sum: 0, + bucketing: PrecomputedExponential { + bucket_ranges: OnceCell::new(), + min, + max, + bucket_count, + }, + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + const DEFAULT_BUCKET_COUNT: usize = 100; + const DEFAULT_RANGE_MIN: u64 = 0; + const DEFAULT_RANGE_MAX: u64 = 60_000; + + #[test] + fn can_count() { + let mut hist = Histogram::exponential(1, 500, 10); + assert!(hist.is_empty()); + + for i in 1..=10 { + hist.accumulate(i); + } + + assert_eq!(10, hist.count()); + assert_eq!(55, hist.sum()); + } + + #[test] + fn overflow_values_accumulate_in_the_last_bucket() { + let mut hist = + Histogram::exponential(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT); + + hist.accumulate(DEFAULT_RANGE_MAX + 100); + assert_eq!(1, hist.values[&DEFAULT_RANGE_MAX]); + } + + #[test] + fn short_exponential_buckets_are_correct() { + let test_buckets = vec![0, 1, 2, 3, 5, 9, 16, 29, 54, 100]; + + assert_eq!(test_buckets, exponential_range(1, 100, 10)); + // There's always a zero bucket, so we increase the lower limit. + assert_eq!(test_buckets, exponential_range(0, 100, 10)); + } + + #[test] + fn default_exponential_buckets_are_correct() { + // Hand calculated values using current default range 0 - 60000 and bucket count of 100. + // NOTE: The final bucket, regardless of width, represents the overflow bucket to hold any + // values beyond the maximum (in this case the maximum is 60000) + let test_buckets = vec![ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 19, 21, 23, 25, 28, 31, 34, + 38, 42, 46, 51, 56, 62, 68, 75, 83, 92, 101, 111, 122, 135, 149, 164, 181, 200, 221, + 244, 269, 297, 328, 362, 399, 440, 485, 535, 590, 651, 718, 792, 874, 964, 1064, 1174, + 1295, 1429, 1577, 1740, 1920, 2118, 2337, 2579, 2846, 3140, 3464, 3822, 4217, 4653, + 5134, 5665, 6250, 6896, 7609, 8395, 9262, 10219, 11275, 12440, 13726, 15144, 16709, + 18436, 20341, 22443, 24762, 27321, 30144, 33259, 36696, 40488, 44672, 49288, 54381, + 60000, + ]; + + assert_eq!( + test_buckets, + exponential_range(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT) + ); + } + + #[test] + fn default_buckets_correctly_accumulate() { + let mut hist = + Histogram::exponential(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT); + + for i in &[1, 10, 100, 1000, 10000] { + hist.accumulate(*i); + } + + assert_eq!(11111, hist.sum()); + assert_eq!(5, hist.count()); + + assert_eq!(None, hist.values.get(&0)); // underflow is empty + assert_eq!(1, hist.values[&1]); // bucket_ranges[1] = 1 + assert_eq!(1, hist.values[&10]); // bucket_ranges[10] = 10 + assert_eq!(1, hist.values[&92]); // bucket_ranges[33] = 92 + assert_eq!(1, hist.values[&964]); // bucket_ranges[57] = 964 + assert_eq!(1, hist.values[&9262]); // bucket_ranges[80] = 9262 + } + + #[test] + fn accumulate_large_numbers() { + let mut hist = Histogram::exponential(1, 500, 10); + + hist.accumulate(u64::max_value()); + hist.accumulate(u64::max_value()); + + assert_eq!(2, hist.count()); + // Saturate before overflowing + assert_eq!(u64::max_value(), hist.sum()); + assert_eq!(2, hist.values[&500]); + } +} diff --git a/third_party/rust/glean-core/src/histogram/functional.rs b/third_party/rust/glean-core/src/histogram/functional.rs new file mode 100644 index 0000000000..64df9a1a4d --- /dev/null +++ b/third_party/rust/glean-core/src/histogram/functional.rs @@ -0,0 +1,174 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; + +use super::{Bucketing, Histogram}; + +use crate::util::floating_point_context::FloatingPointContext; + +/// A functional bucketing algorithm. +/// +/// Bucketing is performed by a function, rather than pre-computed buckets. +/// The bucket index of a given sample is determined with the following function: +/// +/// i = ⌊n log<sub>base</sub>(𝑥)⌋ +/// +/// In other words, there are n buckets for each power of `base` magnitude. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct Functional { + exponent: f64, +} + +impl Functional { + /// Instantiate a new functional bucketing. + fn new(log_base: f64, buckets_per_magnitude: f64) -> Functional { + // Set the FPU control flag to the required state within this function + let _fpc = FloatingPointContext::new(); + + let exponent = log_base.powf(1.0 / buckets_per_magnitude); + + Functional { exponent } + } + + /// Maps a sample to a "bucket index" that it belongs in. + /// A "bucket index" is the consecutive integer index of each bucket, useful as a + /// mathematical concept, even though the internal representation is stored and + /// sent using the minimum value in each bucket. + fn sample_to_bucket_index(&self, sample: u64) -> u64 { + // Set the FPU control flag to the required state within this function + let _fpc = FloatingPointContext::new(); + + ((sample.saturating_add(1)) as f64).log(self.exponent) as u64 + } + + /// Determines the minimum value of a bucket, given a bucket index. + fn bucket_index_to_bucket_minimum(&self, index: u64) -> u64 { + // Set the FPU control flag to the required state within this function + let _fpc = FloatingPointContext::new(); + + self.exponent.powf(index as f64) as u64 + } +} + +impl Bucketing for Functional { + fn sample_to_bucket_minimum(&self, sample: u64) -> u64 { + if sample == 0 { + return 0; + } + + let index = self.sample_to_bucket_index(sample); + self.bucket_index_to_bucket_minimum(index) + } + + fn ranges(&self) -> &[u64] { + unimplemented!("Bucket ranges for functional bucketing are not precomputed") + } +} + +impl Histogram<Functional> { + /// Creates a histogram with functional buckets. + pub fn functional(log_base: f64, buckets_per_magnitude: f64) -> Histogram<Functional> { + Histogram { + values: HashMap::new(), + count: 0, + sum: 0, + bucketing: Functional::new(log_base, buckets_per_magnitude), + } + } + + /// Gets a snapshot of all contiguous values. + /// + /// **Caution** This is a more specific implementation of `snapshot_values` on functional + /// histograms. `snapshot_values` cannot be used with those, due to buckets not being + /// precomputed. + pub fn snapshot(&self) -> HashMap<u64, u64> { + if self.values.is_empty() { + return HashMap::new(); + } + + let mut min_key = None; + let mut max_key = None; + + // `Iterator#min` and `Iterator#max` would do the same job independently, + // but we want to avoid iterating the keys twice, so we loop ourselves. + for key in self.values.keys() { + let key = *key; + + // safe unwrap, we checked it's not none + if min_key.is_none() || key < min_key.unwrap() { + min_key = Some(key); + } + + // safe unwrap, we checked it's not none + if max_key.is_none() || key > max_key.unwrap() { + max_key = Some(key); + } + } + + // Non-empty values, therefore minimum/maximum exists. + // safe unwraps, we set it at least once. + let min_bucket = self.bucketing.sample_to_bucket_index(min_key.unwrap()); + let max_bucket = self.bucketing.sample_to_bucket_index(max_key.unwrap()) + 1; + + let mut values = self.values.clone(); + + for idx in min_bucket..=max_bucket { + // Fill in missing entries. + let min_bucket = self.bucketing.bucket_index_to_bucket_minimum(idx); + let _ = values.entry(min_bucket).or_insert(0); + } + + values + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn can_count() { + let mut hist = Histogram::functional(2.0, 8.0); + assert!(hist.is_empty()); + + for i in 1..=10 { + hist.accumulate(i); + } + + assert_eq!(10, hist.count()); + assert_eq!(55, hist.sum()); + } + + #[test] + fn sample_to_bucket_minimum_correctly_rounds_down() { + let hist = Histogram::functional(2.0, 8.0); + + // Check each of the first 100 integers, where numerical accuracy of the round-tripping + // is most potentially problematic + for value in 0..100 { + let bucket_minimum = hist.bucketing.sample_to_bucket_minimum(value); + assert!(bucket_minimum <= value); + + assert_eq!( + bucket_minimum, + hist.bucketing.sample_to_bucket_minimum(bucket_minimum) + ); + } + + // Do an exponential sampling of higher numbers + for i in 11..500 { + let value = 1.5f64.powi(i); + let value = value as u64; + let bucket_minimum = hist.bucketing.sample_to_bucket_minimum(value); + assert!(bucket_minimum <= value); + assert_eq!( + bucket_minimum, + hist.bucketing.sample_to_bucket_minimum(bucket_minimum) + ); + } + } +} diff --git a/third_party/rust/glean-core/src/histogram/linear.rs b/third_party/rust/glean-core/src/histogram/linear.rs new file mode 100644 index 0000000000..7b30ea8f6c --- /dev/null +++ b/third_party/rust/glean-core/src/histogram/linear.rs @@ -0,0 +1,178 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::cmp; +use std::collections::HashMap; + +use once_cell::sync::OnceCell; +use serde::{Deserialize, Serialize}; + +use super::{Bucketing, Histogram}; + +/// Create the possible ranges in a linear distribution from `min` to `max` with +/// `bucket_count` buckets. +/// +/// This algorithm calculates `bucket_count` number of buckets of equal sizes between `min` and `max`. +/// +/// Bucket limits are the minimal bucket value. +/// That means values in a bucket `i` are `bucket[i] <= value < bucket[i+1]`. +/// It will always contain an underflow bucket (`< 1`). +fn linear_range(min: u64, max: u64, count: usize) -> Vec<u64> { + let mut ranges = Vec::with_capacity(count); + ranges.push(0); + + let min = cmp::max(1, min); + let count = count as u64; + for i in 1..count { + let range = (min * (count - 1 - i) + max * (i - 1)) / (count - 2); + ranges.push(range); + } + + ranges +} + +/// A linear bucketing algorithm. +/// +/// Buckets are pre-computed at instantiation with a linear distribution from `min` to `max` +/// and `bucket_count` buckets. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct PrecomputedLinear { + // Don't serialize the (potentially large) array of ranges, instead compute them on first + // access. + #[serde(skip)] + bucket_ranges: OnceCell<Vec<u64>>, + min: u64, + max: u64, + bucket_count: usize, +} + +impl Bucketing for PrecomputedLinear { + /// Get the bucket for the sample. + /// + /// This uses a binary search to locate the index `i` of the bucket such that: + /// bucket[i] <= sample < bucket[i+1] + fn sample_to_bucket_minimum(&self, sample: u64) -> u64 { + let limit = match self.ranges().binary_search(&sample) { + // Found an exact match to fit it in + Ok(i) => i, + // Sorted it fits after the bucket's limit, therefore it fits into the previous bucket + Err(i) => i - 1, + }; + + self.ranges()[limit] + } + + fn ranges(&self) -> &[u64] { + // Create the linear range on first access. + self.bucket_ranges + .get_or_init(|| linear_range(self.min, self.max, self.bucket_count)) + } +} + +impl Histogram<PrecomputedLinear> { + /// Creates a histogram with `bucket_count` linear buckets in the range `min` to `max`. + pub fn linear(min: u64, max: u64, bucket_count: usize) -> Histogram<PrecomputedLinear> { + Histogram { + values: HashMap::new(), + count: 0, + sum: 0, + bucketing: PrecomputedLinear { + bucket_ranges: OnceCell::new(), + min, + max, + bucket_count, + }, + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + const DEFAULT_BUCKET_COUNT: usize = 100; + const DEFAULT_RANGE_MIN: u64 = 0; + const DEFAULT_RANGE_MAX: u64 = 100; + + #[test] + fn can_count() { + let mut hist = Histogram::linear(1, 500, 10); + assert!(hist.is_empty()); + + for i in 1..=10 { + hist.accumulate(i); + } + + assert_eq!(10, hist.count()); + assert_eq!(55, hist.sum()); + } + + #[test] + fn overflow_values_accumulate_in_the_last_bucket() { + let mut hist = + Histogram::linear(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT); + + hist.accumulate(DEFAULT_RANGE_MAX + 100); + assert_eq!(1, hist.values[&DEFAULT_RANGE_MAX]); + } + + #[test] + fn short_linear_buckets_are_correct() { + let test_buckets = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 10]; + + assert_eq!(test_buckets, linear_range(1, 10, 10)); + // There's always a zero bucket, so we increase the lower limit. + assert_eq!(test_buckets, linear_range(0, 10, 10)); + } + + #[test] + fn long_linear_buckets_are_correct() { + // Hand calculated values using current default range 0 - 60000 and bucket count of 100. + // NOTE: The final bucket, regardless of width, represents the overflow bucket to hold any + // values beyond the maximum (in this case the maximum is 60000) + let test_buckets = vec![ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, + 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 100, + ]; + + assert_eq!( + test_buckets, + linear_range(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT) + ); + } + + #[test] + fn default_buckets_correctly_accumulate() { + let mut hist = + Histogram::linear(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT); + + for i in &[1, 10, 100, 1000, 10000] { + hist.accumulate(*i); + } + + assert_eq!(11111, hist.sum()); + assert_eq!(5, hist.count()); + + assert_eq!(None, hist.values.get(&0)); + assert_eq!(1, hist.values[&1]); + assert_eq!(1, hist.values[&10]); + assert_eq!(3, hist.values[&100]); + } + + #[test] + fn accumulate_large_numbers() { + let mut hist = Histogram::linear(1, 500, 10); + + hist.accumulate(u64::max_value()); + hist.accumulate(u64::max_value()); + + assert_eq!(2, hist.count()); + // Saturate before overflowing + assert_eq!(u64::max_value(), hist.sum()); + assert_eq!(2, hist.values[&500]); + } +} diff --git a/third_party/rust/glean-core/src/histogram/mod.rs b/third_party/rust/glean-core/src/histogram/mod.rs new file mode 100644 index 0000000000..282b02e0ab --- /dev/null +++ b/third_party/rust/glean-core/src/histogram/mod.rs @@ -0,0 +1,139 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A simple histogram implementation for exponential histograms. + +use std::collections::HashMap; +use std::convert::TryFrom; + +use serde::{Deserialize, Serialize}; + +use crate::error::{Error, ErrorKind}; + +pub use exponential::PrecomputedExponential; +pub use functional::Functional; +pub use linear::PrecomputedLinear; + +mod exponential; +mod functional; +mod linear; + +/// Different kinds of histograms. +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum HistogramType { + /// A histogram with linear distributed buckets. + Linear, + /// A histogram with exponential distributed buckets. + Exponential, +} + +impl TryFrom<i32> for HistogramType { + type Error = Error; + + fn try_from(value: i32) -> Result<HistogramType, Self::Error> { + match value { + 0 => Ok(HistogramType::Linear), + 1 => Ok(HistogramType::Exponential), + e => Err(ErrorKind::HistogramType(e).into()), + } + } +} + +/// A histogram. +/// +/// Stores the counts per bucket and tracks the count of added samples and the total sum. +/// The bucketing algorithm can be changed. +/// +/// ## Example +/// +/// ```rust,ignore +/// let mut hist = Histogram::exponential(1, 500, 10); +/// +/// for i in 1..=10 { +/// hist.accumulate(i); +/// } +/// +/// assert_eq!(10, hist.count()); +/// assert_eq!(55, hist.sum()); +/// ``` +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct Histogram<B> { + /// Mapping bucket's minimum to sample count. + values: HashMap<u64, u64>, + + /// The count of samples added. + count: u64, + /// The total sum of samples. + sum: u64, + + /// The bucketing algorithm used. + bucketing: B, +} + +/// A bucketing algorithm for histograms. +/// +/// It's responsible to calculate the bucket a sample goes into. +/// It can calculate buckets on-the-fly or pre-calculate buckets and re-use that when needed. +pub trait Bucketing { + /// Get the bucket's minimum value the sample falls into. + fn sample_to_bucket_minimum(&self, sample: u64) -> u64; + + /// The computed bucket ranges for this bucketing algorithm. + fn ranges(&self) -> &[u64]; +} + +impl<B: Bucketing> Histogram<B> { + /// Gets the number of buckets in this histogram. + pub fn bucket_count(&self) -> usize { + self.values.len() + } + + /// Adds a single value to this histogram. + pub fn accumulate(&mut self, sample: u64) { + let bucket_min = self.bucketing.sample_to_bucket_minimum(sample); + let entry = self.values.entry(bucket_min).or_insert(0); + *entry += 1; + self.sum = self.sum.saturating_add(sample); + self.count += 1; + } + + /// Gets the total sum of values recorded in this histogram. + pub fn sum(&self) -> u64 { + self.sum + } + + /// Gets the total count of values recorded in this histogram. + pub fn count(&self) -> u64 { + self.count + } + + /// Gets the filled values. + pub fn values(&self) -> &HashMap<u64, u64> { + &self.values + } + + /// Checks if this histogram recorded any values. + pub fn is_empty(&self) -> bool { + self.count() == 0 + } + + /// Gets a snapshot of all values from the first bucket until one past the last filled bucket, + /// filling in empty buckets with 0. + pub fn snapshot_values(&self) -> HashMap<u64, u64> { + let mut res = self.values.clone(); + + let max_bucket = self.values.keys().max().cloned().unwrap_or(0); + + for &min_bucket in self.bucketing.ranges() { + // Fill in missing entries. + let _ = res.entry(min_bucket).or_insert(0); + // stop one after the last filled bucket + if min_bucket > max_bucket { + break; + } + } + res + } +} diff --git a/third_party/rust/glean-core/src/internal_metrics.rs b/third_party/rust/glean-core/src/internal_metrics.rs new file mode 100644 index 0000000000..1a1c5576f2 --- /dev/null +++ b/third_party/rust/glean-core/src/internal_metrics.rs @@ -0,0 +1,261 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::borrow::Cow; + +use super::{metrics::*, CommonMetricData, Lifetime}; + +#[derive(Debug)] +pub struct CoreMetrics { + pub client_id: UuidMetric, + pub first_run_date: DatetimeMetric, + pub os: StringMetric, +} + +#[derive(Debug)] +pub struct AdditionalMetrics { + /// The number of times we encountered an IO error + /// when writing a pending ping to disk. + pub io_errors: CounterMetric, + + /// A count of the pings submitted, by ping type. + pub pings_submitted: LabeledMetric<CounterMetric>, + + /// Time waited for the uploader at shutdown. + pub shutdown_wait: TimingDistributionMetric, + + /// Time waited for the dispatcher to unblock during shutdown. + pub shutdown_dispatcher_wait: TimingDistributionMetric, +} + +impl CoreMetrics { + pub fn new() -> CoreMetrics { + CoreMetrics { + client_id: UuidMetric::new(CommonMetricData { + name: "client_id".into(), + category: "".into(), + send_in_pings: vec!["glean_client_info".into()], + lifetime: Lifetime::User, + disabled: false, + dynamic_label: None, + }), + + first_run_date: DatetimeMetric::new( + CommonMetricData { + name: "first_run_date".into(), + category: "".into(), + send_in_pings: vec!["glean_client_info".into()], + lifetime: Lifetime::User, + disabled: false, + dynamic_label: None, + }, + TimeUnit::Day, + ), + + os: StringMetric::new(CommonMetricData { + name: "os".into(), + category: "".into(), + send_in_pings: vec!["glean_client_info".into()], + lifetime: Lifetime::Application, + disabled: false, + dynamic_label: None, + }), + } + } +} + +impl AdditionalMetrics { + pub fn new() -> AdditionalMetrics { + AdditionalMetrics { + io_errors: CounterMetric::new(CommonMetricData { + name: "io".into(), + category: "glean.error".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }), + + pings_submitted: LabeledMetric::<CounterMetric>::new( + CommonMetricData { + name: "pings_submitted".into(), + category: "glean.validation".into(), + send_in_pings: vec!["metrics".into(), "baseline".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }, + None, + ), + + shutdown_wait: TimingDistributionMetric::new( + CommonMetricData { + name: "shutdown_wait".into(), + category: "glean.validation".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }, + TimeUnit::Millisecond, + ), + + shutdown_dispatcher_wait: TimingDistributionMetric::new( + CommonMetricData { + name: "shutdown_dispatcher_wait".into(), + category: "glean.validation".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }, + TimeUnit::Millisecond, + ), + } + } +} + +#[derive(Debug)] +pub struct UploadMetrics { + pub ping_upload_failure: LabeledMetric<CounterMetric>, + pub discarded_exceeding_pings_size: MemoryDistributionMetric, + pub pending_pings_directory_size: MemoryDistributionMetric, + pub deleted_pings_after_quota_hit: CounterMetric, + pub pending_pings: CounterMetric, + pub send_success: TimingDistributionMetric, + pub send_failure: TimingDistributionMetric, + pub in_flight_pings_dropped: CounterMetric, + pub missing_send_ids: CounterMetric, +} + +impl UploadMetrics { + pub fn new() -> UploadMetrics { + UploadMetrics { + ping_upload_failure: LabeledMetric::<CounterMetric>::new( + CommonMetricData { + name: "ping_upload_failure".into(), + category: "glean.upload".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }, + Some(vec![ + Cow::from("status_code_4xx"), + Cow::from("status_code_5xx"), + Cow::from("status_code_unknown"), + Cow::from("unrecoverable"), + Cow::from("recoverable"), + ]), + ), + + discarded_exceeding_pings_size: MemoryDistributionMetric::new( + CommonMetricData { + name: "discarded_exceeding_ping_size".into(), + category: "glean.upload".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }, + MemoryUnit::Kilobyte, + ), + + pending_pings_directory_size: MemoryDistributionMetric::new( + CommonMetricData { + name: "pending_pings_directory_size".into(), + category: "glean.upload".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }, + MemoryUnit::Kilobyte, + ), + + deleted_pings_after_quota_hit: CounterMetric::new(CommonMetricData { + name: "deleted_pings_after_quota_hit".into(), + category: "glean.upload".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }), + + pending_pings: CounterMetric::new(CommonMetricData { + name: "pending_pings".into(), + category: "glean.upload".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }), + + send_success: TimingDistributionMetric::new( + CommonMetricData { + name: "send_success".into(), + category: "glean.upload".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }, + TimeUnit::Millisecond, + ), + + send_failure: TimingDistributionMetric::new( + CommonMetricData { + name: "send_failure".into(), + category: "glean.upload".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }, + TimeUnit::Millisecond, + ), + + in_flight_pings_dropped: CounterMetric::new(CommonMetricData { + name: "in_flight_pings_dropped".into(), + category: "glean.upload".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }), + + missing_send_ids: CounterMetric::new(CommonMetricData { + name: "missing_send_ids".into(), + category: "glean.upload".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }), + } + } +} + +#[derive(Debug)] +pub struct DatabaseMetrics { + pub size: MemoryDistributionMetric, +} + +impl DatabaseMetrics { + pub fn new() -> DatabaseMetrics { + DatabaseMetrics { + size: MemoryDistributionMetric::new( + CommonMetricData { + name: "size".into(), + category: "glean.database".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }, + MemoryUnit::Byte, + ), + } + } +} diff --git a/third_party/rust/glean-core/src/internal_pings.rs b/third_party/rust/glean-core/src/internal_pings.rs new file mode 100644 index 0000000000..660af6d1bd --- /dev/null +++ b/third_party/rust/glean-core/src/internal_pings.rs @@ -0,0 +1,64 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::metrics::PingType; + +/// Glean-provided pings, all enabled by default. +/// +/// These pings are defined in `glean-core/pings.yaml` and for now manually translated into Rust code. +/// This might get auto-generated when the Rust API lands ([Bug 1579146](https://bugzilla.mozilla.org/show_bug.cgi?id=1579146)). +/// +/// They are parsed and registered by the platform-specific wrappers, but might be used Glean-internal directly. +#[derive(Debug, Clone)] +pub struct InternalPings { + pub baseline: PingType, + pub metrics: PingType, + pub events: PingType, + pub deletion_request: PingType, +} + +impl InternalPings { + pub fn new() -> InternalPings { + InternalPings { + baseline: PingType::new( + "baseline", + true, + true, + vec![ + "active".to_string(), + "dirty_startup".to_string(), + "inactive".to_string(), + ], + ), + metrics: PingType::new( + "metrics", + true, + false, + vec![ + "overdue".to_string(), + "reschedule".to_string(), + "today".to_string(), + "tomorrow".to_string(), + "upgrade".to_string(), + ], + ), + events: PingType::new( + "events", + true, + false, + vec![ + "startup".to_string(), + "inactive".to_string(), + "max_capacity".to_string(), + ], + ), + deletion_request: PingType::new( + "deletion-request", + true, + true, + vec!["at_init".to_string(), "set_upload_enabled".to_string()], + ), + } + } +} diff --git a/third_party/rust/glean-core/src/lib.rs b/third_party/rust/glean-core/src/lib.rs new file mode 100644 index 0000000000..7d6ea2f179 --- /dev/null +++ b/third_party/rust/glean-core/src/lib.rs @@ -0,0 +1,1108 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +#![allow(clippy::significant_drop_in_scrutinee)] +#![allow(clippy::uninlined_format_args)] +#![deny(rustdoc::broken_intra_doc_links)] +#![deny(missing_docs)] + +//! Glean is a modern approach for recording and sending Telemetry data. +//! +//! It's in use at Mozilla. +//! +//! All documentation can be found online: +//! +//! ## [The Glean SDK Book](https://mozilla.github.io/glean) + +use std::borrow::Cow; +use std::collections::HashMap; +use std::convert::TryFrom; +use std::fmt; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, Mutex}; +use std::thread; +use std::time::Duration; + +use crossbeam_channel::unbounded; +use log::{self, LevelFilter}; +use once_cell::sync::{Lazy, OnceCell}; +use uuid::Uuid; + +use metrics::MetricsEnabledConfig; + +mod common_metric_data; +mod core; +mod core_metrics; +mod coverage; +mod database; +mod debug; +mod dispatcher; +mod error; +mod error_recording; +mod event_database; +mod glean_metrics; +mod histogram; +mod internal_metrics; +mod internal_pings; +pub mod metrics; +pub mod ping; +mod scheduler; +pub mod storage; +mod system; +pub mod traits; +pub mod upload; +mod util; + +#[cfg(all(not(target_os = "android"), not(target_os = "ios")))] +mod fd_logger; + +pub use crate::common_metric_data::{CommonMetricData, Lifetime}; +pub use crate::core::Glean; +pub use crate::core_metrics::ClientInfoMetrics; +pub use crate::error::{Error, ErrorKind, Result}; +pub use crate::error_recording::{test_get_num_recorded_errors, ErrorType}; +pub use crate::histogram::HistogramType; +pub use crate::metrics::labeled::{ + AllowLabeled, LabeledBoolean, LabeledCounter, LabeledMetric, LabeledString, +}; +pub use crate::metrics::{ + BooleanMetric, CounterMetric, CustomDistributionMetric, Datetime, DatetimeMetric, + DenominatorMetric, DistributionData, EventMetric, MemoryDistributionMetric, MemoryUnit, + NumeratorMetric, PingType, QuantityMetric, Rate, RateMetric, RecordedEvent, RecordedExperiment, + StringListMetric, StringMetric, TextMetric, TimeUnit, TimerId, TimespanMetric, + TimingDistributionMetric, UrlMetric, UuidMetric, +}; +pub use crate::upload::{PingRequest, PingUploadTask, UploadResult, UploadTaskAction}; + +const GLEAN_VERSION: &str = env!("CARGO_PKG_VERSION"); +const GLEAN_SCHEMA_VERSION: u32 = 1; +const DEFAULT_MAX_EVENTS: u32 = 500; +static KNOWN_CLIENT_ID: Lazy<Uuid> = + Lazy::new(|| Uuid::parse_str("c0ffeec0-ffee-c0ff-eec0-ffeec0ffeec0").unwrap()); + +// The names of the pings directories. +pub(crate) const PENDING_PINGS_DIRECTORY: &str = "pending_pings"; +pub(crate) const DELETION_REQUEST_PINGS_DIRECTORY: &str = "deletion_request"; + +/// Set when `glean::initialize()` returns. +/// This allows to detect calls that happen before `glean::initialize()` was called. +/// Note: The initialization might still be in progress, as it runs in a separate thread. +static INITIALIZE_CALLED: AtomicBool = AtomicBool::new(false); + +/// Keep track of the debug features before Glean is initialized. +static PRE_INIT_DEBUG_VIEW_TAG: OnceCell<Mutex<String>> = OnceCell::new(); +static PRE_INIT_LOG_PINGS: AtomicBool = AtomicBool::new(false); +static PRE_INIT_SOURCE_TAGS: OnceCell<Mutex<Vec<String>>> = OnceCell::new(); + +/// Keep track of pings registered before Glean is initialized. +static PRE_INIT_PING_REGISTRATION: OnceCell<Mutex<Vec<metrics::PingType>>> = OnceCell::new(); + +/// Global singleton of the handles of the glean.init threads. +/// For joining. For tests. +/// (Why a Vec? There might be more than one concurrent call to initialize.) +static INIT_HANDLES: Lazy<Arc<Mutex<Vec<std::thread::JoinHandle<()>>>>> = + Lazy::new(|| Arc::new(Mutex::new(Vec::new()))); + +/// Configuration for Glean +#[derive(Debug, Clone)] +pub struct InternalConfiguration { + /// Whether upload should be enabled. + pub upload_enabled: bool, + /// Path to a directory to store all data in. + pub data_path: String, + /// The application ID (will be sanitized during initialization). + pub application_id: String, + /// The name of the programming language used by the binding creating this instance of Glean. + pub language_binding_name: String, + /// The maximum number of events to store before sending a ping containing events. + pub max_events: Option<u32>, + /// Whether Glean should delay persistence of data from metrics with ping lifetime. + pub delay_ping_lifetime_io: bool, + /// The application's build identifier. If this is different from the one provided for a previous init, + /// and use_core_mps is `true`, we will trigger a "metrics" ping. + pub app_build: String, + /// Whether Glean should schedule "metrics" pings. + pub use_core_mps: bool, + /// Whether Glean should, on init, trim its event storage to only the registered pings. + pub trim_data_to_registered_pings: bool, + /// The internal logging level. + pub log_level: Option<LevelFilter>, +} + +/// Launches a new task on the global dispatch queue with a reference to the Glean singleton. +fn launch_with_glean(callback: impl FnOnce(&Glean) + Send + 'static) { + dispatcher::launch(|| core::with_glean(callback)); +} + +/// Launches a new task on the global dispatch queue with a mutable reference to the +/// Glean singleton. +fn launch_with_glean_mut(callback: impl FnOnce(&mut Glean) + Send + 'static) { + dispatcher::launch(|| core::with_glean_mut(callback)); +} + +/// Block on the dispatcher emptying. +/// +/// This will panic if called before Glean is initialized. +fn block_on_dispatcher() { + dispatcher::block_on_queue() +} + +/// Returns a timestamp corresponding to "now" with millisecond precision. +pub fn get_timestamp_ms() -> u64 { + const NANOS_PER_MILLI: u64 = 1_000_000; + zeitstempel::now() / NANOS_PER_MILLI +} + +/// State to keep track for the Rust Language bindings. +/// +/// This is useful for setting Glean SDK-owned metrics when +/// the state of the upload is toggled. +struct State { + /// Client info metrics set by the application. + client_info: ClientInfoMetrics, + + callbacks: Box<dyn OnGleanEvents>, +} + +/// A global singleton storing additional state for Glean. +/// +/// Requires a Mutex, because in tests we can actual reset this. +static STATE: OnceCell<Mutex<State>> = OnceCell::new(); + +/// Get a reference to the global state object. +/// +/// Panics if no global state object was set. +#[track_caller] // If this fails we're interested in the caller. +fn global_state() -> &'static Mutex<State> { + STATE.get().unwrap() +} + +/// Set or replace the global bindings State object. +fn setup_state(state: State) { + // The `OnceCell` type wrapping our state is thread-safe and can only be set once. + // Therefore even if our check for it being empty succeeds, setting it could fail if a + // concurrent thread is quicker in setting it. + // However this will not cause a bigger problem, as the second `set` operation will just fail. + // We can log it and move on. + // + // For all wrappers this is not a problem, as the State object is intialized exactly once on + // calling `initialize` on the global singleton and further operations check that it has been + // initialized. + if STATE.get().is_none() { + if STATE.set(Mutex::new(state)).is_err() { + log::error!( + "Global Glean state object is initialized already. This probably happened concurrently." + ); + } + } else { + // We allow overriding the global State object to support test mode. + // In test mode the State object is fully destroyed and recreated. + // This all happens behind a mutex and is therefore also thread-safe. + let mut lock = STATE.get().unwrap().lock().unwrap(); + *lock = state; + } +} + +/// An error returned from callbacks. +#[derive(Debug)] +pub enum CallbackError { + /// An unexpected error occured. + UnexpectedError, +} + +impl fmt::Display for CallbackError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Unexpected error") + } +} + +impl From<uniffi::UnexpectedUniFFICallbackError> for CallbackError { + fn from(_: uniffi::UnexpectedUniFFICallbackError) -> CallbackError { + CallbackError::UnexpectedError + } +} + +/// A callback object used to trigger actions on the foreign-language side. +/// +/// A callback object is stored in glean-core for the entire lifetime of the application. +pub trait OnGleanEvents: Send { + /// Initialization finished. + /// + /// The language SDK can do additional things from within the same initializer thread, + /// e.g. starting to observe application events for foreground/background behavior. + /// The observer then needs to call the respective client activity API. + fn initialize_finished(&self); + + /// Trigger the uploader whenever a ping was submitted. + /// + /// This should not block. + /// The uploader needs to asynchronously poll Glean for new pings to upload. + fn trigger_upload(&self) -> Result<(), CallbackError>; + + /// Start the Metrics Ping Scheduler. + fn start_metrics_ping_scheduler(&self) -> bool; + + /// Called when upload is disabled and uploads should be stopped + fn cancel_uploads(&self) -> Result<(), CallbackError>; + + /// Called on shutdown, before glean-core is fully shutdown. + /// + /// * This MUST NOT put any new tasks on the dispatcher. + /// * New tasks will be ignored. + /// * This SHOULD NOT block arbitrarily long. + /// * Shutdown waits for a maximum of 30 seconds. + fn shutdown(&self) -> Result<(), CallbackError> { + // empty by default + Ok(()) + } +} + +/// Initializes Glean. +/// +/// # Arguments +/// +/// * `cfg` - the [`InternalConfiguration`] options to initialize with. +/// * `client_info` - the [`ClientInfoMetrics`] values used to set Glean +/// core metrics. +/// * `callbacks` - A callback object, stored for the entire application lifetime. +pub fn glean_initialize( + cfg: InternalConfiguration, + client_info: ClientInfoMetrics, + callbacks: Box<dyn OnGleanEvents>, +) { + initialize_inner(cfg, client_info, callbacks); +} + +/// Creates and initializes a new Glean object for use in a subprocess. +/// +/// Importantly, this will not send any pings at startup, since that +/// sort of management should only happen in the main process. +pub fn glean_initialize_for_subprocess(cfg: InternalConfiguration) -> bool { + let glean = match Glean::new_for_subprocess(&cfg, true) { + Ok(glean) => glean, + Err(err) => { + log::error!("Failed to initialize Glean: {}", err); + return false; + } + }; + if core::setup_glean(glean).is_err() { + return false; + } + log::info!("Glean initialized for subprocess"); + true +} + +fn initialize_inner( + cfg: InternalConfiguration, + client_info: ClientInfoMetrics, + callbacks: Box<dyn OnGleanEvents>, +) { + if was_initialize_called() { + log::error!("Glean should not be initialized multiple times"); + return; + } + + let init_handle = std::thread::Builder::new() + .name("glean.init".into()) + .spawn(move || { + let upload_enabled = cfg.upload_enabled; + let trim_data_to_registered_pings = cfg.trim_data_to_registered_pings; + + // Set the internal logging level. + if let Some(level) = cfg.log_level { + log::set_max_level(level) + } + + let glean = match Glean::new(cfg) { + Ok(glean) => glean, + Err(err) => { + log::error!("Failed to initialize Glean: {}", err); + return; + } + }; + if core::setup_glean(glean).is_err() { + return; + } + + log::info!("Glean initialized"); + + setup_state(State { + client_info, + callbacks, + }); + + let mut is_first_run = false; + let mut dirty_flag = false; + let mut pings_submitted = false; + core::with_glean_mut(|glean| { + // The debug view tag might have been set before initialize, + // get the cached value and set it. + if let Some(tag) = PRE_INIT_DEBUG_VIEW_TAG.get() { + let lock = tag.try_lock(); + if let Ok(ref debug_tag) = lock { + glean.set_debug_view_tag(debug_tag); + } + } + + // The log pings debug option might have been set before initialize, + // get the cached value and set it. + let log_pigs = PRE_INIT_LOG_PINGS.load(Ordering::SeqCst); + if log_pigs { + glean.set_log_pings(log_pigs); + } + + // The source tags might have been set before initialize, + // get the cached value and set them. + if let Some(tags) = PRE_INIT_SOURCE_TAGS.get() { + let lock = tags.try_lock(); + if let Ok(ref source_tags) = lock { + glean.set_source_tags(source_tags.to_vec()); + } + } + + // Get the current value of the dirty flag so we know whether to + // send a dirty startup baseline ping below. Immediately set it to + // `false` so that dirty startup pings won't be sent if Glean + // initialization does not complete successfully. + dirty_flag = glean.is_dirty_flag_set(); + glean.set_dirty_flag(false); + + // Perform registration of pings that were attempted to be + // registered before init. + if let Some(tags) = PRE_INIT_PING_REGISTRATION.get() { + let lock = tags.try_lock(); + if let Ok(pings) = lock { + for ping in &*pings { + glean.register_ping_type(ping); + } + } + } + + // If this is the first time ever the Glean SDK runs, make sure to set + // some initial core metrics in case we need to generate early pings. + // The next times we start, we would have them around already. + is_first_run = glean.is_first_run(); + if is_first_run { + let state = global_state().lock().unwrap(); + initialize_core_metrics(glean, &state.client_info); + } + + // Deal with any pending events so we can start recording new ones + pings_submitted = glean.on_ready_to_submit_pings(trim_data_to_registered_pings); + }); + + { + let state = global_state().lock().unwrap(); + // We need to kick off upload in these cases: + // 1. Pings were submitted through Glean and it is ready to upload those pings; + // 2. Upload is disabled, to upload a possible deletion-request ping. + if pings_submitted || !upload_enabled { + if let Err(e) = state.callbacks.trigger_upload() { + log::error!("Triggering upload failed. Error: {}", e); + } + } + } + + core::with_glean(|glean| { + // Start the MPS if its handled within Rust. + glean.start_metrics_ping_scheduler(); + }); + + // The metrics ping scheduler might _synchronously_ submit a ping + // so that it runs before we clear application-lifetime metrics further below. + // For that it needs access to the `Glean` object. + // Thus we need to unlock that by leaving the context above, + // then re-lock it afterwards. + // That's safe because user-visible functions will be queued and thus not execute until + // we unblock later anyway. + { + let state = global_state().lock().unwrap(); + + // Set up information and scheduling for Glean owned pings. Ideally, the "metrics" + // ping startup check should be performed before any other ping, since it relies + // on being dispatched to the API context before any other metric. + if state.callbacks.start_metrics_ping_scheduler() { + if let Err(e) = state.callbacks.trigger_upload() { + log::error!("Triggering upload failed. Error: {}", e); + } + } + } + + core::with_glean_mut(|glean| { + let state = global_state().lock().unwrap(); + + // Check if the "dirty flag" is set. That means the product was probably + // force-closed. If that's the case, submit a 'baseline' ping with the + // reason "dirty_startup". We only do that from the second run. + if !is_first_run && dirty_flag { + // The `submit_ping_by_name_sync` function cannot be used, otherwise + // startup will cause a dead-lock, since that function requests a + // write lock on the `glean` object. + // Note that unwrapping below is safe: the function will return an + // `Ok` value for a known ping. + if glean.submit_ping_by_name("baseline", Some("dirty_startup")) { + if let Err(e) = state.callbacks.trigger_upload() { + log::error!("Triggering upload failed. Error: {}", e); + } + } + } + + // From the second time we run, after all startup pings are generated, + // make sure to clear `lifetime: application` metrics and set them again. + // Any new value will be sent in newly generated pings after startup. + if !is_first_run { + glean.clear_application_lifetime_metrics(); + initialize_core_metrics(glean, &state.client_info); + } + }); + + // Signal Dispatcher that init is complete + match dispatcher::flush_init() { + Ok(task_count) if task_count > 0 => { + core::with_glean(|glean| { + glean_metrics::error::preinit_tasks_overflow + .add_sync(glean, task_count as i32); + }); + } + Ok(_) => {} + Err(err) => log::error!("Unable to flush the preinit queue: {}", err), + } + + let state = global_state().lock().unwrap(); + state.callbacks.initialize_finished(); + }) + .expect("Failed to spawn Glean's init thread"); + + // For test purposes, store the glean init thread's JoinHandle. + INIT_HANDLES.lock().unwrap().push(init_handle); + + // Mark the initialization as called: this needs to happen outside of the + // dispatched block! + INITIALIZE_CALLED.store(true, Ordering::SeqCst); + + // In test mode we wait for initialization to finish. + // This needs to run after we set `INITIALIZE_CALLED`, so it's similar to normal behavior. + if dispatcher::global::is_test_mode() { + join_init(); + } +} + +/// TEST ONLY FUNCTION +/// Waits on all the glean.init threads' join handles. +pub fn join_init() { + let mut handles = INIT_HANDLES.lock().unwrap(); + for handle in handles.drain(..) { + handle.join().unwrap(); + } +} + +/// Call the `shutdown` callback. +/// +/// This calls the shutdown in a separate thread and waits up to 30s for it to finish. +/// If not finished in that time frame it continues. +/// +/// Under normal operation that is fine, as the main process will end +/// and thus the thread will get killed. +fn uploader_shutdown() { + let timer_id = core::with_glean(|glean| glean.additional_metrics.shutdown_wait.start_sync()); + let (tx, rx) = unbounded(); + + let handle = thread::Builder::new() + .name("glean.shutdown".to_string()) + .spawn(move || { + let state = global_state().lock().unwrap(); + if let Err(e) = state.callbacks.shutdown() { + log::error!("Shutdown callback failed: {e:?}"); + } + + // Best-effort sending. The other side might have timed out already. + let _ = tx.send(()).ok(); + }) + .expect("Unable to spawn thread to wait on shutdown"); + + // TODO: 30 seconds? What's a good default here? Should this be configurable? + // Reasoning: + // * If we shut down early we might still be processing pending pings. + // In this case we wait at most 3 times for 1s = 3s before we upload. + // * If we're rate-limited the uploader sleeps for up to 60s. + // Thus waiting 30s will rarely allow another upload. + // * We don't know how long uploads take until we get data from bug 1814592. + let result = rx.recv_timeout(Duration::from_secs(30)); + + let stop_time = time::precise_time_ns(); + core::with_glean(|glean| { + glean + .additional_metrics + .shutdown_wait + .set_stop_and_accumulate(glean, timer_id, stop_time); + }); + + if result.is_err() { + log::warn!("Waiting for upload failed. We're shutting down."); + } else { + let _ = handle.join().ok(); + } +} + +/// Shuts down Glean in an orderly fashion. +pub fn shutdown() { + // Either init was never called or Glean was not fully initialized + // (e.g. due to an error). + // There's the potential that Glean is not initialized _yet_, + // but in progress. That's fine, we shutdown either way before doing any work. + if !was_initialize_called() || core::global_glean().is_none() { + log::warn!("Shutdown called before Glean is initialized"); + if let Err(e) = dispatcher::kill() { + log::error!("Can't kill dispatcher thread: {:?}", e); + } + + return; + } + + crate::launch_with_glean_mut(|glean| { + glean.cancel_metrics_ping_scheduler(); + glean.set_dirty_flag(false); + }); + + // We need to wait for above task to finish, + // but we also don't wait around forever. + // + // TODO: Make the timeout configurable? + // The default hang watchdog on Firefox waits 60s, + // Glean's `uploader_shutdown` further below waits up to 30s. + let timer_id = core::with_glean(|glean| { + glean + .additional_metrics + .shutdown_dispatcher_wait + .start_sync() + }); + if dispatcher::block_on_queue_timeout(Duration::from_secs(10)).is_err() { + log::error!( + "Timeout while blocking on the dispatcher. No further shutdown cleanup will happen." + ); + return; + } + let stop_time = time::precise_time_ns(); + core::with_glean(|glean| { + glean + .additional_metrics + .shutdown_dispatcher_wait + .set_stop_and_accumulate(glean, timer_id, stop_time); + }); + + if let Err(e) = dispatcher::shutdown() { + log::error!("Can't shutdown dispatcher thread: {:?}", e); + } + + uploader_shutdown(); + + // Be sure to call this _after_ draining the dispatcher + core::with_glean(|glean| { + if let Err(e) = glean.persist_ping_lifetime_data() { + log::error!("Can't persist ping lifetime data: {:?}", e); + } + }); +} + +/// Asks the database to persist ping-lifetime data to disk. Probably expensive to call. +/// Only has effect when Glean is configured with `delay_ping_lifetime_io: true`. +/// If Glean hasn't been initialized this will dispatch and return Ok(()), +/// otherwise it will block until the persist is done and return its Result. +pub fn persist_ping_lifetime_data() { + // This is async, we can't get the Error back to the caller. + crate::launch_with_glean(|glean| { + let _ = glean.persist_ping_lifetime_data(); + }); +} + +fn initialize_core_metrics(glean: &Glean, client_info: &ClientInfoMetrics) { + core_metrics::internal_metrics::app_build.set_sync(glean, &client_info.app_build[..]); + core_metrics::internal_metrics::app_display_version + .set_sync(glean, &client_info.app_display_version[..]); + core_metrics::internal_metrics::app_build_date + .set_sync(glean, Some(client_info.app_build_date.clone())); + if let Some(app_channel) = client_info.channel.as_ref() { + core_metrics::internal_metrics::app_channel.set_sync(glean, app_channel); + } + + core_metrics::internal_metrics::os_version.set_sync(glean, &client_info.os_version); + core_metrics::internal_metrics::architecture.set_sync(glean, &client_info.architecture); + + if let Some(android_sdk_version) = client_info.android_sdk_version.as_ref() { + core_metrics::internal_metrics::android_sdk_version.set_sync(glean, android_sdk_version); + } + if let Some(windows_build_number) = client_info.windows_build_number.as_ref() { + core_metrics::internal_metrics::windows_build_number.set_sync(glean, *windows_build_number); + } + if let Some(device_manufacturer) = client_info.device_manufacturer.as_ref() { + core_metrics::internal_metrics::device_manufacturer.set_sync(glean, device_manufacturer); + } + if let Some(device_model) = client_info.device_model.as_ref() { + core_metrics::internal_metrics::device_model.set_sync(glean, device_model); + } + if let Some(locale) = client_info.locale.as_ref() { + core_metrics::internal_metrics::locale.set_sync(glean, locale); + } +} + +/// Checks if [`initialize`] was ever called. +/// +/// # Returns +/// +/// `true` if it was, `false` otherwise. +fn was_initialize_called() -> bool { + INITIALIZE_CALLED.load(Ordering::SeqCst) +} + +/// Initialize the logging system based on the target platform. This ensures +/// that logging is shown when executing the Glean SDK unit tests. +#[no_mangle] +pub extern "C" fn glean_enable_logging() { + #[cfg(target_os = "android")] + { + let _ = std::panic::catch_unwind(|| { + let filter = android_logger::FilterBuilder::new() + .filter_module("glean_ffi", log::LevelFilter::Debug) + .filter_module("glean_core", log::LevelFilter::Debug) + .filter_module("glean", log::LevelFilter::Debug) + .filter_module("glean_core::ffi", log::LevelFilter::Info) + .build(); + android_logger::init_once( + android_logger::Config::default() + .with_max_level(log::LevelFilter::Debug) + .with_filter(filter) + .with_tag("libglean_ffi"), + ); + log::trace!("Android logging should be hooked up!") + }); + } + + // On iOS enable logging with a level filter. + #[cfg(target_os = "ios")] + { + // Debug logging in debug mode. + // (Note: `debug_assertions` is the next best thing to determine if this is a debug build) + #[cfg(debug_assertions)] + let level = log::LevelFilter::Debug; + #[cfg(not(debug_assertions))] + let level = log::LevelFilter::Info; + + let logger = oslog::OsLogger::new("org.mozilla.glean") + .level_filter(level) + // Filter UniFFI log messages + .category_level_filter("glean_core::ffi", log::LevelFilter::Info); + + match logger.init() { + Ok(_) => log::trace!("os_log should be hooked up!"), + // Please note that this is only expected to fail during unit tests, + // where the logger might have already been initialized by a previous + // test. So it's fine to print with the "logger". + Err(_) => log::warn!("os_log was already initialized"), + }; + } + + // When specifically requested make sure logging does something on non-Android platforms as well. + // Use the RUST_LOG environment variable to set the desired log level, + // e.g. setting RUST_LOG=debug sets the log level to debug. + #[cfg(all( + not(target_os = "android"), + not(target_os = "ios"), + feature = "enable_env_logger" + ))] + { + match env_logger::try_init() { + Ok(_) => log::trace!("stdout logging should be hooked up!"), + // Please note that this is only expected to fail during unit tests, + // where the logger might have already been initialized by a previous + // test. So it's fine to print with the "logger". + Err(_) => log::warn!("stdout logging was already initialized"), + }; + } +} + +/// Sets whether upload is enabled or not. +pub fn glean_set_upload_enabled(enabled: bool) { + if !was_initialize_called() { + return; + } + + crate::launch_with_glean_mut(move |glean| { + let state = global_state().lock().unwrap(); + let original_enabled = glean.is_upload_enabled(); + + if !enabled { + // Stop the MPS if its handled within Rust. + glean.cancel_metrics_ping_scheduler(); + // Stop wrapper-controlled uploader. + if let Err(e) = state.callbacks.cancel_uploads() { + log::error!("Canceling upload failed. Error: {}", e); + } + } + + glean.set_upload_enabled(enabled); + + if !original_enabled && enabled { + initialize_core_metrics(glean, &state.client_info); + } + + if original_enabled && !enabled { + if let Err(e) = state.callbacks.trigger_upload() { + log::error!("Triggering upload failed. Error: {}", e); + } + } + }) +} + +/// Register a new [`PingType`](PingType). +pub(crate) fn register_ping_type(ping: &PingType) { + // If this happens after Glean.initialize is called (and returns), + // we dispatch ping registration on the thread pool. + // Registering a ping should not block the application. + // Submission itself is also dispatched, so it will always come after the registration. + if was_initialize_called() { + let ping = ping.clone(); + crate::launch_with_glean_mut(move |glean| { + glean.register_ping_type(&ping); + }) + } else { + // We need to keep track of pings, so they get re-registered after a reset or + // if ping registration is attempted before Glean initializes. + // This state is kept across Glean resets, which should only ever happen in test mode. + // It's a set and keeping them around forever should not have much of an impact. + let m = PRE_INIT_PING_REGISTRATION.get_or_init(Default::default); + let mut lock = m.lock().unwrap(); + lock.push(ping.clone()); + } +} + +/// Indicate that an experiment is running. Glean will then add an +/// experiment annotation to the environment which is sent with pings. This +/// infomration is not persisted between runs. +/// +/// See [`core::Glean::set_experiment_active`]. +pub fn glean_set_experiment_active( + experiment_id: String, + branch: String, + extra: HashMap<String, String>, +) { + launch_with_glean(|glean| glean.set_experiment_active(experiment_id, branch, extra)) +} + +/// Indicate that an experiment is no longer running. +/// +/// See [`core::Glean::set_experiment_inactive`]. +pub fn glean_set_experiment_inactive(experiment_id: String) { + launch_with_glean(|glean| glean.set_experiment_inactive(experiment_id)) +} + +/// TEST ONLY FUNCTION. +/// Returns the [`RecordedExperiment`] for the given `experiment_id` +/// or `None` if the id isn't found. +pub fn glean_test_get_experiment_data(experiment_id: String) -> Option<RecordedExperiment> { + block_on_dispatcher(); + core::with_glean(|glean| glean.test_get_experiment_data(experiment_id.to_owned())) +} + +/// Sets a remote configuration to override metrics' default enabled/disabled +/// state +/// +/// See [`core::Glean::set_metrics_enabled_config`]. +pub fn glean_set_metrics_enabled_config(json: String) { + match MetricsEnabledConfig::try_from(json) { + Ok(cfg) => launch_with_glean(|glean| { + glean.set_metrics_enabled_config(cfg); + }), + Err(e) => { + log::error!("Error setting metrics feature config: {:?}", e); + } + } +} + +/// Sets a debug view tag. +/// +/// When the debug view tag is set, pings are sent with a `X-Debug-ID` header with the +/// value of the tag and are sent to the ["Ping Debug Viewer"](https://mozilla.github.io/glean/book/dev/core/internal/debug-pings.html). +/// +/// # Arguments +/// +/// * `tag` - A valid HTTP header value. Must match the regex: "[a-zA-Z0-9-]{1,20}". +/// +/// # Returns +/// +/// This will return `false` in case `tag` is not a valid tag and `true` otherwise. +/// If called before Glean is initialized it will always return `true`. +pub fn glean_set_debug_view_tag(tag: String) -> bool { + if was_initialize_called() { + crate::launch_with_glean_mut(move |glean| { + glean.set_debug_view_tag(&tag); + }); + true + } else { + // Glean has not been initialized yet. Cache the provided tag value. + let m = PRE_INIT_DEBUG_VIEW_TAG.get_or_init(Default::default); + let mut lock = m.lock().unwrap(); + *lock = tag; + // When setting the debug view tag before initialization, + // we don't validate the tag, thus this function always returns true. + true + } +} + +/// Sets source tags. +/// +/// Overrides any existing source tags. +/// Source tags will show in the destination datasets, after ingestion. +/// +/// **Note** If one or more tags are invalid, all tags are ignored. +/// +/// # Arguments +/// +/// * `tags` - A vector of at most 5 valid HTTP header values. Individual +/// tags must match the regex: "[a-zA-Z0-9-]{1,20}". +pub fn glean_set_source_tags(tags: Vec<String>) -> bool { + if was_initialize_called() { + crate::launch_with_glean_mut(|glean| { + glean.set_source_tags(tags); + }); + true + } else { + // Glean has not been initialized yet. Cache the provided source tags. + let m = PRE_INIT_SOURCE_TAGS.get_or_init(Default::default); + let mut lock = m.lock().unwrap(); + *lock = tags; + // When setting the source tags before initialization, + // we don't validate the tags, thus this function always returns true. + true + } +} + +/// Sets the log pings debug option. +/// +/// When the log pings debug option is `true`, +/// we log the payload of all succesfully assembled pings. +/// +/// # Arguments +/// +/// * `value` - The value of the log pings option +pub fn glean_set_log_pings(value: bool) { + if was_initialize_called() { + crate::launch_with_glean_mut(move |glean| { + glean.set_log_pings(value); + }); + } else { + PRE_INIT_LOG_PINGS.store(value, Ordering::SeqCst); + } +} + +/// Performs the collection/cleanup operations required by becoming active. +/// +/// This functions generates a baseline ping with reason `active` +/// and then sets the dirty bit. +/// This should be called whenever the consuming product becomes active (e.g. +/// getting to foreground). +pub fn glean_handle_client_active() { + dispatcher::launch(|| { + core::with_glean_mut(|glean| { + glean.handle_client_active(); + }); + + // The above call may generate pings, so we need to trigger + // the uploader. It's fine to trigger it if no ping was generated: + // it will bail out. + let state = global_state().lock().unwrap(); + if let Err(e) = state.callbacks.trigger_upload() { + log::error!("Triggering upload failed. Error: {}", e); + } + }); + + // The previous block of code may send a ping containing the `duration` metric, + // in `glean.handle_client_active`. We intentionally start recording a new + // `duration` after that happens, so that the measurement gets reported when + // calling `handle_client_inactive`. + core_metrics::internal_metrics::baseline_duration.start(); +} + +/// Performs the collection/cleanup operations required by becoming inactive. +/// +/// This functions generates a baseline and an events ping with reason +/// `inactive` and then clears the dirty bit. +/// This should be called whenever the consuming product becomes inactive (e.g. +/// getting to background). +pub fn glean_handle_client_inactive() { + // This needs to be called before the `handle_client_inactive` api: it stops + // measuring the duration of the previous activity time, before any ping is sent + // by the next call. + core_metrics::internal_metrics::baseline_duration.stop(); + + dispatcher::launch(|| { + core::with_glean_mut(|glean| { + glean.handle_client_inactive(); + }); + + // The above call may generate pings, so we need to trigger + // the uploader. It's fine to trigger it if no ping was generated: + // it will bail out. + let state = global_state().lock().unwrap(); + if let Err(e) = state.callbacks.trigger_upload() { + log::error!("Triggering upload failed. Error: {}", e); + } + }) +} + +/// Collect and submit a ping for eventual upload by name. +pub fn glean_submit_ping_by_name(ping_name: String, reason: Option<String>) { + dispatcher::launch(|| { + let sent = + core::with_glean(move |glean| glean.submit_ping_by_name(&ping_name, reason.as_deref())); + + if sent { + let state = global_state().lock().unwrap(); + if let Err(e) = state.callbacks.trigger_upload() { + log::error!("Triggering upload failed. Error: {}", e); + } + } + }) +} + +/// Collect and submit a ping (by its name) for eventual upload, synchronously. +/// +/// Note: This does not trigger the uploader. The caller is responsible to do this. +pub fn glean_submit_ping_by_name_sync(ping_name: String, reason: Option<String>) -> bool { + if !was_initialize_called() { + return false; + } + + core::with_glean(|glean| glean.submit_ping_by_name(&ping_name, reason.as_deref())) +} + +/// **TEST-ONLY Method** +/// +/// Set test mode +pub fn glean_set_test_mode(enabled: bool) { + dispatcher::global::TESTING_MODE.store(enabled, Ordering::SeqCst); +} + +/// **TEST-ONLY Method** +/// +/// Destroy the underlying database. +pub fn glean_test_destroy_glean(clear_stores: bool, data_path: Option<String>) { + if was_initialize_called() { + // Just because initialize was called doesn't mean it's done. + join_init(); + + dispatcher::reset_dispatcher(); + + // Only useful if Glean initialization finished successfully + // and set up the storage. + let has_storage = + core::with_opt_glean(|glean| glean.storage_opt().is_some()).unwrap_or(false); + if has_storage { + uploader_shutdown(); + } + + if core::global_glean().is_some() { + core::with_glean_mut(|glean| { + if clear_stores { + glean.test_clear_all_stores() + } + glean.destroy_db() + }); + } + + // Allow us to go through initialization again. + INITIALIZE_CALLED.store(false, Ordering::SeqCst); + } else if clear_stores { + if let Some(data_path) = data_path { + let _ = std::fs::remove_dir_all(data_path).ok(); + } else { + log::warn!("Asked to clear stores before initialization, but no data path given."); + } + } +} + +/// Get the next upload task +pub fn glean_get_upload_task() -> PingUploadTask { + core::with_opt_glean(|glean| glean.get_upload_task()).unwrap_or_else(PingUploadTask::done) +} + +/// Processes the response from an attempt to upload a ping. +pub fn glean_process_ping_upload_response(uuid: String, result: UploadResult) -> UploadTaskAction { + core::with_glean(|glean| glean.process_ping_upload_response(&uuid, result)) +} + +/// **TEST-ONLY Method** +/// +/// Set the dirty flag +pub fn glean_set_dirty_flag(new_value: bool) { + core::with_glean(|glean| glean.set_dirty_flag(new_value)) +} + +#[cfg(all(not(target_os = "android"), not(target_os = "ios")))] +static FD_LOGGER: OnceCell<fd_logger::FdLogger> = OnceCell::new(); + +/// Initialize the logging system to send JSON messages to a file descriptor +/// (Unix) or file handle (Windows). +/// +/// Not available on Android and iOS. +/// +/// `fd` is a writable file descriptor (on Unix) or file handle (on Windows). +/// +/// # Safety +/// +/// `fd` MUST be a valid open file descriptor (Unix) or file handle (Windows). +/// This function is marked safe, +/// because we can't call unsafe functions from generated UniFFI code. +#[cfg(all(not(target_os = "android"), not(target_os = "ios")))] +pub fn glean_enable_logging_to_fd(fd: u64) { + // SAFETY: + // This functions is unsafe. + // Due to UniFFI restrictions we cannot mark it as such. + // + // `fd` MUST be a valid open file descriptor (Unix) or file handle (Windows). + unsafe { + // Set up logging to a file descriptor/handle. For this usage, the + // language binding should setup a pipe and pass in the descriptor to + // the writing side of the pipe as the `fd` parameter. Log messages are + // written as JSON to the file descriptor. + let logger = FD_LOGGER.get_or_init(|| fd_logger::FdLogger::new(fd)); + // Set the level so everything goes through to the language + // binding side where it will be filtered by the language + // binding's logging system. + if log::set_logger(logger).is_ok() { + log::set_max_level(log::LevelFilter::Debug); + } + } +} + +/// Unused function. Not used on Android or iOS. +#[cfg(any(target_os = "android", target_os = "ios"))] +pub fn glean_enable_logging_to_fd(_fd: u64) { + // intentionally left empty +} + +#[allow(missing_docs)] +mod ffi { + use super::*; + uniffi::include_scaffolding!("glean"); + + type CowString = Cow<'static, str>; + + impl UniffiCustomTypeConverter for CowString { + type Builtin = String; + + fn into_custom(val: Self::Builtin) -> uniffi::Result<Self> { + Ok(Cow::from(val)) + } + + fn from_custom(obj: Self) -> Self::Builtin { + obj.into_owned() + } + } +} +pub use ffi::*; + +// Split unit tests to a separate file, to reduce the file of this one. +#[cfg(test)] +#[path = "lib_unit_tests.rs"] +mod tests; diff --git a/third_party/rust/glean-core/src/lib_unit_tests.rs b/third_party/rust/glean-core/src/lib_unit_tests.rs new file mode 100644 index 0000000000..6d67f6ab93 --- /dev/null +++ b/third_party/rust/glean-core/src/lib_unit_tests.rs @@ -0,0 +1,1080 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// NOTE: This is a test-only file that contains unit tests for +// the lib.rs file. + +use std::collections::HashSet; +use std::iter::FromIterator; + +use serde_json::json; + +use super::*; +use crate::metrics::{StringMetric, TimeUnit, TimespanMetric, TimingDistributionMetric}; + +const GLOBAL_APPLICATION_ID: &str = "org.mozilla.glean.test.app"; +pub fn new_glean(tempdir: Option<tempfile::TempDir>) -> (Glean, tempfile::TempDir) { + let _ = env_logger::builder().try_init(); + let dir = match tempdir { + Some(tempdir) => tempdir, + None => tempfile::tempdir().unwrap(), + }; + let tmpname = dir.path().display().to_string(); + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + (glean, dir) +} + +#[test] +fn path_is_constructed_from_data() { + let (glean, _t) = new_glean(None); + + assert_eq!( + "/submit/org-mozilla-glean-test-app/baseline/1/this-is-a-docid", + glean.make_path("baseline", "this-is-a-docid") + ); +} + +// Experiment's API tests: the next two tests come from glean-ac's +// ExperimentsStorageEngineTest.kt. +#[test] +fn experiment_id_and_branch_get_truncated_if_too_long() { + let t = tempfile::tempdir().unwrap(); + let name = t.path().display().to_string(); + let glean = Glean::with_options(&name, "org.mozilla.glean.tests", true); + + // Generate long strings for the used ids. + let very_long_id = "test-experiment-id".repeat(10); + let very_long_branch_id = "test-branch-id".repeat(10); + + // Mark the experiment as active. + glean.set_experiment_active( + very_long_id.clone(), + very_long_branch_id.clone(), + HashMap::new(), + ); + + // Generate the expected id and branch strings. + let mut expected_id = very_long_id; + expected_id.truncate(100); + let mut expected_branch_id = very_long_branch_id; + expected_branch_id.truncate(100); + + assert!( + glean + .test_get_experiment_data(expected_id.clone()) + .is_some(), + "An experiment with the truncated id should be available" + ); + + // Make sure the branch id was truncated as well. + let experiment_data = glean.test_get_experiment_data(expected_id); + assert!( + experiment_data.is_some(), + "Experiment data must be available" + ); + + let experiment_data = experiment_data.unwrap(); + assert_eq!(expected_branch_id, experiment_data.branch); +} + +#[test] +fn limits_on_experiments_extras_are_applied_correctly() { + let t = tempfile::tempdir().unwrap(); + let name = t.path().display().to_string(); + let glean = Glean::with_options(&name, "org.mozilla.glean.tests", true); + + let experiment_id = "test-experiment_id".to_string(); + let branch_id = "test-branch-id".to_string(); + let mut extras = HashMap::new(); + + let too_long_key = "0123456789".repeat(11); + let too_long_value = "0123456789".repeat(11); + + // Build and extras HashMap that's a little too long in every way + for n in 0..21 { + extras.insert(format!("{}-{}", n, too_long_key), too_long_value.clone()); + } + + // Mark the experiment as active. + glean.set_experiment_active(experiment_id.clone(), branch_id, extras); + + // Make sure it is active + assert!( + glean + .test_get_experiment_data(experiment_id.clone()) + .is_some(), + "An experiment with the truncated id should be available" + ); + + // Get the data + let experiment_data = glean.test_get_experiment_data(experiment_id); + assert!( + experiment_data.is_some(), + "Experiment data must be available" + ); + + // Parse the JSON and validate the lengths + let experiment_data = experiment_data.unwrap(); + assert_eq!( + 20, + experiment_data.extra.as_ref().unwrap().len(), + "Experiments extra must be less than max length" + ); + + for (key, value) in experiment_data.extra.as_ref().unwrap().iter() { + assert!( + key.len() <= 100, + "Experiments extra key must be less than max length" + ); + assert!( + value.len() <= 100, + "Experiments extra value must be less than max length" + ); + } +} + +#[test] +fn experiments_status_is_correctly_toggled() { + let t = tempfile::tempdir().unwrap(); + let name = t.path().display().to_string(); + let glean = Glean::with_options(&name, "org.mozilla.glean.tests", true); + + // Define the experiment's data. + let experiment_id: String = "test-toggle-experiment".into(); + let branch_id: String = "test-branch-toggle".into(); + let extra: HashMap<String, String> = [("test-key".into(), "test-value".into())] + .iter() + .cloned() + .collect(); + + // Activate an experiment. + glean.set_experiment_active(experiment_id.clone(), branch_id, extra.clone()); + + // Check that the experiment is marekd as active. + assert!( + glean + .test_get_experiment_data(experiment_id.clone()) + .is_some(), + "The experiment must be marked as active." + ); + + // Check that the extra data was stored. + let experiment_data = glean.test_get_experiment_data(experiment_id.clone()); + assert!( + experiment_data.is_some(), + "Experiment data must be available" + ); + + let experiment_data = experiment_data.unwrap(); + assert_eq!(experiment_data.extra.unwrap(), extra); + + // Disable the experiment and check that is no longer available. + glean.set_experiment_inactive(experiment_id.clone()); + assert!( + glean.test_get_experiment_data(experiment_id).is_none(), + "The experiment must not be available any more." + ); +} + +#[test] +fn client_id_and_first_run_date_must_be_regenerated() { + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + { + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + + glean.data_store.as_ref().unwrap().clear_all(); + + assert!(glean + .core_metrics + .client_id + .get_value(&glean, "glean_client_info") + .is_none()); + assert!(glean + .core_metrics + .first_run_date + .get_value(&glean, "glean_client_info") + .is_none()); + } + + { + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + assert!(glean + .core_metrics + .client_id + .get_value(&glean, "glean_client_info") + .is_some()); + assert!(glean + .core_metrics + .first_run_date + .get_value(&glean, "glean_client_info") + .is_some()); + } +} + +#[test] +fn basic_metrics_should_be_cleared_when_uploading_is_disabled() { + let (mut glean, _t) = new_glean(None); + let metric = StringMetric::new(CommonMetricData { + category: "category".to_string(), + name: "string_metric".to_string(), + send_in_pings: vec!["baseline".to_string()], + ..Default::default() + }); + + metric.set_sync(&glean, "TEST VALUE"); + assert!(metric.get_value(&glean, "baseline").is_some()); + + glean.set_upload_enabled(false); + assert!(metric.get_value(&glean, "baseline").is_none()); + + metric.set_sync(&glean, "TEST VALUE"); + assert!(metric.get_value(&glean, "baseline").is_none()); + + glean.set_upload_enabled(true); + assert!(metric.get_value(&glean, "baseline").is_none()); + + metric.set_sync(&glean, "TEST VALUE"); + assert!(metric.get_value(&glean, "baseline").is_some()); +} + +#[test] +fn first_run_date_is_managed_correctly_when_toggling_uploading() { + let (mut glean, _t) = new_glean(None); + + let original_first_run_date = glean + .core_metrics + .first_run_date + .get_value(&glean, "glean_client_info"); + + glean.set_upload_enabled(false); + assert_eq!( + original_first_run_date, + glean + .core_metrics + .first_run_date + .get_value(&glean, "glean_client_info") + ); + + glean.set_upload_enabled(true); + assert_eq!( + original_first_run_date, + glean + .core_metrics + .first_run_date + .get_value(&glean, "glean_client_info") + ); +} + +#[test] +fn client_id_is_managed_correctly_when_toggling_uploading() { + let (mut glean, _t) = new_glean(None); + + let original_client_id = glean + .core_metrics + .client_id + .get_value(&glean, "glean_client_info"); + assert!(original_client_id.is_some()); + assert_ne!(*KNOWN_CLIENT_ID, original_client_id.unwrap()); + + glean.set_upload_enabled(false); + assert_eq!( + *KNOWN_CLIENT_ID, + glean + .core_metrics + .client_id + .get_value(&glean, "glean_client_info") + .unwrap() + ); + + glean.set_upload_enabled(true); + let current_client_id = glean + .core_metrics + .client_id + .get_value(&glean, "glean_client_info"); + assert!(current_client_id.is_some()); + assert_ne!(*KNOWN_CLIENT_ID, current_client_id.unwrap()); + assert_ne!(original_client_id, current_client_id); +} + +#[test] +fn client_id_is_set_to_known_value_when_uploading_disabled_at_start() { + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, false); + + assert_eq!( + *KNOWN_CLIENT_ID, + glean + .core_metrics + .client_id + .get_value(&glean, "glean_client_info") + .unwrap() + ); +} + +#[test] +fn client_id_is_set_to_random_value_when_uploading_enabled_at_start() { + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + + let current_client_id = glean + .core_metrics + .client_id + .get_value(&glean, "glean_client_info"); + assert!(current_client_id.is_some()); + assert_ne!(*KNOWN_CLIENT_ID, current_client_id.unwrap()); +} + +#[test] +fn enabling_when_already_enabled_is_a_noop() { + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + let mut glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + + assert!(!glean.set_upload_enabled(true)); +} + +#[test] +fn disabling_when_already_disabled_is_a_noop() { + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + let mut glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, false); + + assert!(!glean.set_upload_enabled(false)); +} + +// Test that the enum variants keep a stable discriminant when serialized. +// Discriminant values are taken from a stable ordering from v20.0.0. +// New metrics after that should be added in order. +#[test] +#[rustfmt::skip] // Let's not add newlines unnecessary +fn correct_order() { + use histogram::Histogram; + use metrics::{Metric::*, TimeUnit}; + use std::time::Duration; + use util::local_now_with_offset; + + // Extract the discriminant of the serialized value, + // that is: the first 4 bytes. + fn discriminant(metric: &metrics::Metric) -> u32 { + let ser = bincode::serialize(metric).unwrap(); + (ser[0] as u32) + | (ser[1] as u32) << 8 + | (ser[2] as u32) << 16 + | (ser[3] as u32) << 24 + } + + // One of every metric type. The values are arbitrary and don't matter. + let long_string = "0123456789".repeat(200); + let all_metrics = vec![ + Boolean(false), + Counter(0), + CustomDistributionExponential(Histogram::exponential(1, 500, 10)), + CustomDistributionLinear(Histogram::linear(1, 500, 10)), + Datetime(local_now_with_offset(), TimeUnit::Second), + Experiment(RecordedExperiment { branch: "branch".into(), extra: None, }), + Quantity(0), + String("glean".into()), + StringList(vec!["glean".into()]), + Uuid("082c3e52-0a18-11ea-946f-0fe0c98c361c".into()), + Timespan(Duration::new(5, 0), TimeUnit::Second), + TimingDistribution(Histogram::functional(2.0, 8.0)), + MemoryDistribution(Histogram::functional(2.0, 8.0)), + Jwe("eyJhbGciOiJSU0EtT0FFUCIsImVuYyI6IkEyNTZHQ00ifQ.OKOawDo13gRp2ojaHV7LFpZcgV7T6DVZKTyKOMTYUmKoTCVJRgckCL9kiMT03JGeipsEdY3mx_etLbbWSrFr05kLzcSr4qKAq7YN7e9jwQRb23nfa6c9d-StnImGyFDbSv04uVuxIp5Zms1gNxKKK2Da14B8S4rzVRltdYwam_lDp5XnZAYpQdb76FdIKLaVmqgfwX7XWRxv2322i-vDxRfqNzo_tETKzpVLzfiwQyeyPGLBIO56YJ7eObdv0je81860ppamavo35UgoRdbYaBcoh9QcfylQr66oc6vFWXRcZ_ZT2LawVCWTIy3brGPi6UklfCpIMfIjf7iGdXKHzg.48V1_ALb6US04U3b.5eym8TW_c8SuK0ltJ3rpYIzOeDQz7TALvtu6UG9oMo4vpzs9tX_EFShS8iB7j6jiSdiwkIr3ajwQzaBtQD_A.XFBoMYUZodetZdvTiFvSkQ".into()), + Rate(0, 0), + Text(long_string), + ]; + + for metric in all_metrics { + let disc = discriminant(&metric); + + // DO NOT TOUCH THE EXPECTED VALUE. + // If this test fails because of non-equal discriminants, that is a bug in the code, not + // the test. + + // We're matching here, thus fail the build if new variants are added. + match metric { + Boolean(..) => assert_eq!( 0, disc), + Counter(..) => assert_eq!( 1, disc), + CustomDistributionExponential(..) => assert_eq!( 2, disc), + CustomDistributionLinear(..) => assert_eq!( 3, disc), + Datetime(..) => assert_eq!( 4, disc), + Experiment(..) => assert_eq!( 5, disc), + Quantity(..) => assert_eq!( 6, disc), + String(..) => assert_eq!( 7, disc), + StringList(..) => assert_eq!( 8, disc), + Uuid(..) => assert_eq!( 9, disc), + Timespan(..) => assert_eq!(10, disc), + TimingDistribution(..) => assert_eq!(11, disc), + MemoryDistribution(..) => assert_eq!(12, disc), + Jwe(..) => assert_eq!(13, disc), + Rate(..) => assert_eq!(14, disc), + Url(..) => assert_eq!(15, disc), + Text(..) => assert_eq!(16, disc), + } + } +} + +#[test] +#[rustfmt::skip] // Let's not merge lines +fn backwards_compatible_deserialization() { + use std::env; + use std::time::Duration; + use chrono::prelude::*; + use histogram::Histogram; + use metrics::{Metric::*, TimeUnit}; + + // Prepare some data to fill in + let dt = FixedOffset::east(9*3600).ymd(2014, 11, 28).and_hms_nano(21, 45, 59, 12); + + let mut custom_dist_exp = Histogram::exponential(1, 500, 10); + custom_dist_exp.accumulate(10); + + let mut custom_dist_linear = Histogram::linear(1, 500, 10); + custom_dist_linear.accumulate(10); + + let mut time_dist = Histogram::functional(2.0, 8.0); + time_dist.accumulate(10); + + let mut mem_dist = Histogram::functional(2.0, 16.0); + mem_dist.accumulate(10); + + // One of every metric type. The values are arbitrary, but stable. + let all_metrics = vec![ + ( + "boolean", + vec![0, 0, 0, 0, 1], + Boolean(true) + ), + ( + "counter", + vec![1, 0, 0, 0, 20, 0, 0, 0], + Counter(20) + ), + ( + "custom exponential distribution", + vec![2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 244, 1, 0, 0, 0, 0, 0, 0, 10, 0, + 0, 0, 0, 0, 0, 0], + CustomDistributionExponential(custom_dist_exp) + ), + ( + "custom linear distribution", + vec![3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 244, 1, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0], + CustomDistributionLinear(custom_dist_linear) + ), + ( + "datetime", + vec![4, 0, 0, 0, 35, 0, 0, 0, 0, 0, 0, 0, 50, 48, 49, 52, 45, 49, 49, 45, + 50, 56, 84, 50, 49, 58, 52, 53, 58, 53, 57, 46, 48, 48, 48, 48, 48, + 48, 48, 49, 50, 43, 48, 57, 58, 48, 48, 3, 0, 0, 0], + Datetime(dt, TimeUnit::Second), + ), + ( + "experiment", + vec![5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 98, 114, 97, 110, 99, 104, 0], + Experiment(RecordedExperiment { branch: "branch".into(), extra: None, }), + ), + ( + "quantity", + vec![6, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0], + Quantity(17) + ), + ( + "string", + vec![7, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 103, 108, 101, 97, 110], + String("glean".into()) + ), + ( + "string list", + vec![8, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, + 103, 108, 101, 97, 110], + StringList(vec!["glean".into()]) + ), + ( + "uuid", + vec![9, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0, 48, 56, 50, 99, 51, 101, 53, 50, + 45, 48, 97, 49, 56, 45, 49, 49, 101, 97, 45, 57, 52, 54, 102, 45, 48, + 102, 101, 48, 99, 57, 56, 99, 51, 54, 49, 99], + Uuid("082c3e52-0a18-11ea-946f-0fe0c98c361c".into()), + ), + ( + "timespan", + vec![10, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0], + Timespan(Duration::new(5, 0), TimeUnit::Second), + ), + ( + "timing distribution", + vec![11, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 123, 81, 125, + 60, 184, 114, 241, 63], + TimingDistribution(time_dist), + ), + ( + "memory distribution", + vec![12, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 15, 137, 249, + 108, 88, 181, 240, 63], + MemoryDistribution(mem_dist), + ), + ]; + + for (name, data, metric) in all_metrics { + // Helper to print serialization data if instructed by environment variable + // Run with: + // + // ```text + // PRINT_DATA=1 cargo test -p glean-core --lib -- --nocapture backwards + // ``` + // + // This should not be necessary to re-run and change here, unless a bincode upgrade + // requires us to also migrate existing data. + if env::var("PRINT_DATA").is_ok() { + let bindata = bincode::serialize(&metric).unwrap(); + println!("(\n {:?},\n vec!{:?},", name, bindata); + } else { + // Otherwise run the test + let deserialized = bincode::deserialize(&data).unwrap(); + if let CustomDistributionExponential(hist) = &deserialized { + hist.snapshot_values(); // Force initialization of the ranges + } + if let CustomDistributionLinear(hist) = &deserialized { + hist.snapshot_values(); // Force initialization of the ranges + } + + assert_eq!( + metric, deserialized, + "Expected properly deserialized {}", + name + ); + } + } +} + +#[test] +fn test_first_run() { + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + { + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + // Check that this is indeed the first run. + assert!(glean.is_first_run()); + } + + { + // Other runs must be not marked as "first run". + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + assert!(!glean.is_first_run()); + } +} + +#[test] +fn test_dirty_bit() { + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + { + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + // The dirty flag must not be set the first time Glean runs. + assert!(!glean.is_dirty_flag_set()); + + // Set the dirty flag and check that it gets correctly set. + glean.set_dirty_flag(true); + assert!(glean.is_dirty_flag_set()); + } + + { + // Check that next time Glean runs, it correctly picks up the "dirty flag". + // It is expected to be 'true'. + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + assert!(glean.is_dirty_flag_set()); + + // Set the dirty flag to false. + glean.set_dirty_flag(false); + assert!(!glean.is_dirty_flag_set()); + } + + { + // Check that next time Glean runs, it correctly picks up the "dirty flag". + // It is expected to be 'false'. + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + assert!(!glean.is_dirty_flag_set()); + } +} + +#[test] +fn test_change_metric_type_runtime() { + let dir = tempfile::tempdir().unwrap(); + + let (glean, _t) = new_glean(Some(dir)); + + // We attempt to create two metrics: one with a 'string' type and the other + // with a 'timespan' type, both being sent in the same pings and having the + // same lifetime. + let metric_name = "type_swap"; + let metric_category = "test"; + let metric_lifetime = Lifetime::Ping; + let ping_name = "store1"; + + let string_metric = StringMetric::new(CommonMetricData { + name: metric_name.into(), + category: metric_category.into(), + send_in_pings: vec![ping_name.into()], + disabled: false, + lifetime: metric_lifetime, + ..Default::default() + }); + + let string_value = "definitely-a-string!"; + string_metric.set_sync(&glean, string_value); + + assert_eq!( + string_metric.get_value(&glean, ping_name).unwrap(), + string_value, + "Expected properly deserialized string" + ); + + let timespan_metric = TimespanMetric::new( + CommonMetricData { + name: metric_name.into(), + category: metric_category.into(), + send_in_pings: vec![ping_name.into()], + disabled: false, + lifetime: metric_lifetime, + ..Default::default() + }, + TimeUnit::Nanosecond, + ); + + let duration = 60; + timespan_metric.set_start(&glean, 0); + timespan_metric.set_stop(&glean, duration); + + assert_eq!( + timespan_metric.get_value(&glean, ping_name).unwrap(), + 60, + "Expected properly deserialized time" + ); + + // We expect old data to be lost forever. See the following bug comment + // https://bugzilla.mozilla.org/show_bug.cgi?id=1621757#c1 for more context. + assert_eq!(None, string_metric.get_value(&glean, ping_name)); +} + +#[test] +fn timing_distribution_truncation() { + let dir = tempfile::tempdir().unwrap(); + + let (glean, _t) = new_glean(Some(dir)); + let max_sample_time = 1000 * 1000 * 1000 * 60 * 10; + + for (unit, expected_keys) in &[ + ( + TimeUnit::Nanosecond, + HashSet::<i64>::from_iter(vec![961_548, 939, 599_512_966_122, 1]), + ), + ( + TimeUnit::Microsecond, + HashSet::<i64>::from_iter(vec![939, 562_949_953_421_318, 599_512_966_122, 961_548]), + ), + ( + TimeUnit::Millisecond, + HashSet::<i64>::from_iter(vec![ + 961_548, + 576_460_752_303_431_040, + 599_512_966_122, + 562_949_953_421_318, + ]), + ), + ] { + let dist = TimingDistributionMetric::new( + CommonMetricData { + name: format!("local_metric_{:?}", unit), + category: "local".into(), + send_in_pings: vec!["baseline".into()], + ..Default::default() + }, + *unit, + ); + + for &value in &[ + 1, + 1_000, + 1_000_000, + max_sample_time, + max_sample_time * 1_000, + max_sample_time * 1_000_000, + ] { + let timer_id = 4u64.into(); + dist.set_start(timer_id, 0); + dist.set_stop_and_accumulate(&glean, timer_id, value); + } + + let snapshot = dist.get_value(&glean, "baseline").unwrap(); + + let mut keys = HashSet::new(); + let mut recorded_values = 0; + + for (&key, &value) in &snapshot.values { + // A snapshot potentially includes buckets with a 0 count. + // We can ignore them here. + if value > 0 { + assert!((key as u64) < max_sample_time * unit.as_nanos(1)); + keys.insert(key); + recorded_values += 1; + } + } + + assert_eq!(4, recorded_values); + assert_eq!(keys, *expected_keys); + + // The number of samples was originally designed around 1ns to + // 10minutes, with 8 steps per power of 2, which works out to 316 items. + // This is to ensure that holds even when the time unit is changed. + assert!(snapshot.values.len() < 316); + } +} + +#[test] +fn timing_distribution_truncation_accumulate() { + let dir = tempfile::tempdir().unwrap(); + + let (glean, _t) = new_glean(Some(dir)); + let max_sample_time = 1000 * 1000 * 1000 * 60 * 10; + + for &unit in &[ + TimeUnit::Nanosecond, + TimeUnit::Microsecond, + TimeUnit::Millisecond, + ] { + let dist = TimingDistributionMetric::new( + CommonMetricData { + name: format!("local_metric_{:?}", unit), + category: "local".into(), + send_in_pings: vec!["baseline".into()], + ..Default::default() + }, + unit, + ); + + let samples = [ + 1, + 1000, + 100000, + max_sample_time, + max_sample_time * 1_000, + max_sample_time * 1_000_000, + ]; + let timer_id = 4u64.into(); // xkcd#221 + + for sample in samples { + dist.set_start(timer_id, 0); + dist.set_stop_and_accumulate(&glean, timer_id, sample); + } + + let snapshot = dist.get_value(&glean, "baseline").unwrap(); + + // The number of samples was originally designed around 1ns to + // 10minutes, with 8 steps per power of 2, which works out to 316 items. + // This is to ensure that holds even when the time unit is changed. + assert!(snapshot.values.len() < 316); + } +} + +#[test] +fn test_setting_debug_view_tag() { + let dir = tempfile::tempdir().unwrap(); + + let (mut glean, _t) = new_glean(Some(dir)); + + let valid_tag = "valid-tag"; + assert!(glean.set_debug_view_tag(valid_tag)); + assert_eq!(valid_tag, glean.debug_view_tag().unwrap()); + + let invalid_tag = "invalid tag"; + assert!(!glean.set_debug_view_tag(invalid_tag)); + assert_eq!(valid_tag, glean.debug_view_tag().unwrap()); +} + +#[test] +fn test_setting_log_pings() { + let dir = tempfile::tempdir().unwrap(); + + let (mut glean, _t) = new_glean(Some(dir)); + assert!(!glean.log_pings()); + + glean.set_log_pings(true); + assert!(glean.log_pings()); + + glean.set_log_pings(false); + assert!(!glean.log_pings()); +} + +#[test] +fn test_set_metrics_disabled() { + let (glean, _t) = new_glean(None); + let metric = StringMetric::new(CommonMetricData { + category: "category".to_string(), + name: "string_metric".to_string(), + send_in_pings: vec!["baseline".to_string()], + ..Default::default() + }); + let another_metric = LabeledString::new( + CommonMetricData { + category: "category".to_string(), + name: "labeled_string_metric".to_string(), + send_in_pings: vec!["baseline".to_string()], + ..Default::default() + }, + Some(vec!["label1".into()]), + ); + + // 1. Set the metrics with a "TEST_VALUE" and ensure it was set + metric.set_sync(&glean, "TEST_VALUE"); + assert_eq!( + "TEST_VALUE", + metric.get_value(&glean, "baseline").unwrap(), + "Initial value must match" + ); + another_metric.get("label1").set_sync(&glean, "TEST_VALUE"); + assert_eq!( + "TEST_VALUE", + another_metric + .get("label1") + .get_value(&glean, "baseline") + .unwrap(), + "Initial value must match" + ); + + // 2. Set a configuration to disable the metrics + let mut metrics_enabled_config = json!( + { + "category.string_metric": false, + "category.labeled_string_metric": false, + } + ) + .to_string(); + glean.set_metrics_enabled_config( + MetricsEnabledConfig::try_from(metrics_enabled_config).unwrap(), + ); + + // 3. Since the metrics were disabled, setting a new value will be ignored + metric.set_sync(&glean, "VALUE_AFTER_DISABLED"); + assert_eq!( + "TEST_VALUE", + metric.get_value(&glean, "baseline").unwrap(), + "Shouldn't set when disabled" + ); + another_metric + .get("label1") + .set_sync(&glean, "VALUE_AFTER_DISABLED"); + assert_eq!( + "TEST_VALUE", + another_metric + .get("label1") + .get_value(&glean, "baseline") + .unwrap(), + "Shouldn't set when disabled" + ); + + // 4. Set a new configuration where the metrics are enabled + metrics_enabled_config = json!({}).to_string(); + glean.set_metrics_enabled_config( + MetricsEnabledConfig::try_from(metrics_enabled_config).unwrap(), + ); + + // 5. Since the metrics are now enabled, setting a new value should work + metric.set_sync(&glean, "VALUE_AFTER_REENABLED"); + assert_eq!( + "VALUE_AFTER_REENABLED", + metric.get_value(&glean, "baseline").unwrap(), + "Should set when re-enabled" + ); + another_metric + .get("label1") + .set_sync(&glean, "VALUE_AFTER_REENABLED"); + assert_eq!( + "VALUE_AFTER_REENABLED", + another_metric + .get("label1") + .get_value(&glean, "baseline") + .unwrap(), + "Should set when re-enabled" + ); +} + +#[test] +fn test_remote_settings_epoch() { + let (glean, _t) = new_glean(None); + + // 1. Ensure the starting epoch + let mut current_epoch = glean.remote_settings_epoch.load(Ordering::Acquire); + assert_eq!(0u8, current_epoch, "Current epoch must start at 0"); + + // 2. Set a configuration which will trigger incrementing the epoch + let metrics_enabled_config = json!( + { + "category.string_metric": false + } + ) + .to_string(); + glean.set_metrics_enabled_config( + MetricsEnabledConfig::try_from(metrics_enabled_config).unwrap(), + ); + + // 3. Ensure the epoch updated + current_epoch = glean.remote_settings_epoch.load(Ordering::Acquire); + assert_eq!(1u8, current_epoch, "Current epoch must match"); +} + +#[test] +fn test_remote_settings_epoch_updates_in_metric() { + let (glean, _t) = new_glean(None); + let metric = StringMetric::new(CommonMetricData { + category: "category".to_string(), + name: "string_metric".to_string(), + send_in_pings: vec!["baseline".to_string()], + ..Default::default() + }); + + // 1. Set the metric with a "TEST_VALUE" and ensure it was set + metric.set_sync(&glean, "TEST_VALUE"); + assert_eq!( + "TEST_VALUE", + metric.get_value(&glean, "baseline").unwrap(), + "Initial value must match" + ); + + // 2. Set a configuration to disable the `category.string_metric` + let metrics_enabled_config = json!( + { + "category.string_metric": false + } + ) + .to_string(); + glean.set_metrics_enabled_config( + MetricsEnabledConfig::try_from(metrics_enabled_config).unwrap(), + ); + + // 3. Ensure the epoch was updated + let current_epoch = glean.remote_settings_epoch.load(Ordering::Acquire); + assert_eq!(1u8, current_epoch, "Current epoch must update"); + + // 4. Since the metric was disabled, setting a new value will be ignored + // AND the metric should update its epoch to match the `current_epoch` + metric.set_sync(&glean, "VALUE_AFTER_DISABLED"); + assert_eq!( + "TEST_VALUE", + metric.get_value(&glean, "baseline").unwrap(), + "Shouldn't set when disabled" + ); + + use crate::metrics::MetricType; + // The "epoch" resides in the upper nibble of the `inner.disabled` field + let epoch = metric.meta().disabled.load(Ordering::Acquire) >> 4; + assert_eq!( + current_epoch, epoch, + "Epoch must match between metric and Glean core" + ); +} + +#[test] +#[should_panic] +fn test_empty_application_id() { + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + + let glean = Glean::with_options(&tmpname, "", true); + // Check that this is indeed the first run. + assert!(glean.is_first_run()); +} + +#[test] +fn records_database_file_size() { + let _ = env_logger::builder().is_test(true).try_init(); + + // Note: We don't use `new_glean` because we need to re-use the database directory. + + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + + // Initialize Glean once to ensure we create the database. + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + let database_size = &glean.database_metrics.size; + let data = database_size.get_value(&glean, "metrics"); + assert!(data.is_none()); + drop(glean); + + // Initialize Glean again to record file size. + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + + let database_size = &glean.database_metrics.size; + let data = database_size.get_value(&glean, "metrics"); + assert!(data.is_some()); + let data = data.unwrap(); + + // We should see the database containing some data. + assert!(data.sum > 0); +} + +#[cfg(not(target_os = "windows"))] +#[test] +fn records_io_errors() { + use std::fs; + let _ = env_logger::builder().is_test(true).try_init(); + + let (glean, _data_dir) = new_glean(None); + let pending_pings_dir = glean.get_data_path().join(crate::PENDING_PINGS_DIRECTORY); + fs::create_dir_all(&pending_pings_dir).unwrap(); + let attr = fs::metadata(&pending_pings_dir).unwrap(); + let original_permissions = attr.permissions(); + + // Remove write permissions on the pending_pings directory. + let mut permissions = original_permissions.clone(); + permissions.set_readonly(true); + fs::set_permissions(&pending_pings_dir, permissions).unwrap(); + + // Writing the ping file should fail. + let submitted = glean.internal_pings.baseline.submit_sync(&glean, None); + // But the return value is still `true` because we enqueue the ping anyway. + assert!(submitted); + + let metric = &glean.additional_metrics.io_errors; + assert_eq!( + 1, + metric.get_value(&glean, Some("metrics")).unwrap(), + "Should have recorded an IO error" + ); + + // Restore write permissions. + fs::set_permissions(&pending_pings_dir, original_permissions).unwrap(); + + // Now we can submit a ping + let submitted = glean.internal_pings.metrics.submit_sync(&glean, None); + assert!(submitted); +} + +#[test] +fn test_activity_api() { + let _ = env_logger::builder().is_test(true).try_init(); + + let dir = tempfile::tempdir().unwrap(); + let (mut glean, _t) = new_glean(Some(dir)); + + // Signal that the client was active. + glean.handle_client_active(); + + // Check that we set everything we needed for the 'active' status. + assert!(glean.is_dirty_flag_set()); + + // Signal back that client is ianctive. + glean.handle_client_inactive(); + + // Check that we set everything we needed for the 'inactive' status. + assert!(!glean.is_dirty_flag_set()); +} diff --git a/third_party/rust/glean-core/src/metrics/boolean.rs b/third_party/rust/glean-core/src/metrics/boolean.rs new file mode 100644 index 0000000000..71ed2372c2 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/boolean.rs @@ -0,0 +1,134 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::sync::Arc; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{test_get_num_recorded_errors, ErrorType}; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +/// A boolean metric. +/// +/// Records a simple flag. +#[derive(Clone, Debug)] +pub struct BooleanMetric { + meta: Arc<CommonMetricDataInternal>, +} + +impl MetricType for BooleanMetric { + fn meta(&self) -> &CommonMetricDataInternal { + &self.meta + } + + fn with_name(&self, name: String) -> Self { + let mut meta = (*self.meta).clone(); + meta.inner.name = name; + Self { + meta: Arc::new(meta), + } + } + + fn with_dynamic_label(&self, label: String) -> Self { + let mut meta = (*self.meta).clone(); + meta.inner.dynamic_label = Some(label); + Self { + meta: Arc::new(meta), + } + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl BooleanMetric { + /// Creates a new boolean metric. + pub fn new(meta: CommonMetricData) -> Self { + Self { + meta: Arc::new(meta.into()), + } + } + + /// Sets to the specified boolean value. + /// + /// # Arguments + /// + /// * `glean` - the Glean instance this metric belongs to. + /// * `value` - the value to set. + #[doc(hidden)] + pub fn set_sync(&self, glean: &Glean, value: bool) { + if !self.should_record(glean) { + return; + } + + let value = Metric::Boolean(value); + glean.storage().record(glean, &self.meta, &value) + } + + /// Sets to the specified boolean value. + /// + /// # Arguments + /// + /// * `glean` - the Glean instance this metric belongs to. + /// * `value` - the value to set. + pub fn set(&self, value: bool) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.set_sync(glean, value)) + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as a boolean. + /// + /// This doesn't clear the stored value. + #[doc(hidden)] + pub fn get_value(&self, glean: &Glean, ping_name: Option<&str>) -> Option<bool> { + let queried_ping_name = ping_name.unwrap_or_else(|| &self.meta().inner.send_in_pings[0]); + + match StorageManager.snapshot_metric_for_test( + glean.storage(), + queried_ping_name, + &self.meta.identifier(glean), + self.meta.inner.lifetime, + ) { + Some(Metric::Boolean(b)) => Some(b), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<bool> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| self.get_value(glean, ping_name.as_deref())) + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. inner to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0) + }) + } +} diff --git a/third_party/rust/glean-core/src/metrics/counter.rs b/third_party/rust/glean-core/src/metrics/counter.rs new file mode 100644 index 0000000000..ac544df95f --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/counter.rs @@ -0,0 +1,163 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::sync::Arc; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorType}; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +/// A counter metric. +/// +/// Used to count things. +/// The value can only be incremented, not decremented. +#[derive(Clone, Debug)] +pub struct CounterMetric { + meta: Arc<CommonMetricDataInternal>, +} + +impl MetricType for CounterMetric { + fn meta(&self) -> &CommonMetricDataInternal { + &self.meta + } + + fn with_name(&self, name: String) -> Self { + let mut meta = (*self.meta).clone(); + meta.inner.name = name; + Self { + meta: Arc::new(meta), + } + } + + fn with_dynamic_label(&self, label: String) -> Self { + let mut meta = (*self.meta).clone(); + meta.inner.dynamic_label = Some(label); + Self { + meta: Arc::new(meta), + } + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl CounterMetric { + /// Creates a new counter metric. + pub fn new(meta: CommonMetricData) -> Self { + Self { + meta: Arc::new(meta.into()), + } + } + + /// Increases the counter by `amount` synchronously. + #[doc(hidden)] + pub fn add_sync(&self, glean: &Glean, amount: i32) { + if !self.should_record(glean) { + return; + } + + if amount <= 0 { + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + format!("Added negative or zero value {}", amount), + None, + ); + return; + } + + // Let's be defensive here: + // The uploader tries to store a counter metric, + // but in tests that storage might be gone already. + // Let's just ignore those. + // This should never happen in real app usage. + if let Some(storage) = glean.storage_opt() { + storage.record_with(glean, &self.meta, |old_value| match old_value { + Some(Metric::Counter(old_value)) => { + Metric::Counter(old_value.saturating_add(amount)) + } + _ => Metric::Counter(amount), + }) + } else { + log::warn!( + "Couldn't get storage. Can't record counter '{}'.", + self.meta.base_identifier() + ); + } + } + + /// Increases the counter by `amount`. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + /// * `amount` - The amount to increase by. Should be positive. + /// + /// ## Notes + /// + /// Logs an error if the `amount` is 0 or negative. + pub fn add(&self, amount: i32) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.add_sync(glean, amount)) + } + + /// Get current value + #[doc(hidden)] + pub fn get_value<'a, S: Into<Option<&'a str>>>( + &self, + glean: &Glean, + ping_name: S, + ) -> Option<i32> { + let queried_ping_name = ping_name + .into() + .unwrap_or_else(|| &self.meta().inner.send_in_pings[0]); + + match StorageManager.snapshot_metric_for_test( + glean.storage(), + queried_ping_name, + &self.meta.identifier(glean), + self.meta.inner.lifetime, + ) { + Some(Metric::Counter(i)) => Some(i), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<i32> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| self.get_value(glean, ping_name.as_deref())) + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. inner to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0) + }) + } +} diff --git a/third_party/rust/glean-core/src/metrics/custom_distribution.rs b/third_party/rust/glean-core/src/metrics/custom_distribution.rs new file mode 100644 index 0000000000..929e4863ec --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/custom_distribution.rs @@ -0,0 +1,222 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::sync::Arc; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorType}; +use crate::histogram::{Bucketing, Histogram, HistogramType}; +use crate::metrics::{DistributionData, Metric, MetricType}; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +/// A custom distribution metric. +/// +/// Memory distributions are used to accumulate and store memory sizes. +#[derive(Clone, Debug)] +pub struct CustomDistributionMetric { + meta: Arc<CommonMetricDataInternal>, + range_min: u64, + range_max: u64, + bucket_count: u64, + histogram_type: HistogramType, +} + +/// Create a snapshot of the histogram. +/// +/// The snapshot can be serialized into the payload format. +pub(crate) fn snapshot<B: Bucketing>(hist: &Histogram<B>) -> DistributionData { + DistributionData { + values: hist + .snapshot_values() + .into_iter() + .map(|(k, v)| (k as i64, v as i64)) + .collect(), + sum: hist.sum() as i64, + count: hist.count() as i64, + } +} + +impl MetricType for CustomDistributionMetric { + fn meta(&self) -> &CommonMetricDataInternal { + &self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl CustomDistributionMetric { + /// Creates a new memory distribution metric. + pub fn new( + meta: CommonMetricData, + range_min: i64, + range_max: i64, + bucket_count: i64, + histogram_type: HistogramType, + ) -> Self { + Self { + meta: Arc::new(meta.into()), + range_min: range_min as u64, + range_max: range_max as u64, + bucket_count: bucket_count as u64, + histogram_type, + } + } + + /// Accumulates the provided signed samples in the metric. + /// + /// This is required so that the platform-specific code can provide us with + /// 64 bit signed integers if no `u64` comparable type is available. This + /// will take care of filtering and reporting errors for any provided negative + /// sample. + /// + /// # Arguments + /// + /// - `samples` - The vector holding the samples to be recorded by the metric. + /// + /// ## Notes + /// + /// Discards any negative value in `samples` and report an [`ErrorType::InvalidValue`] + /// for each of them. + pub fn accumulate_samples(&self, samples: Vec<i64>) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.accumulate_samples_sync(glean, samples)) + } + + /// Accumulates the provided sample in the metric synchronously. + /// + /// See [`accumulate_samples`](Self::accumulate_samples) for details. + #[doc(hidden)] + pub fn accumulate_samples_sync(&self, glean: &Glean, samples: Vec<i64>) { + if !self.should_record(glean) { + return; + } + + let mut num_negative_samples = 0; + + // Generic accumulation function to handle the different histogram types and count negative + // samples. + fn accumulate<B: Bucketing, F>( + samples: &[i64], + mut hist: Histogram<B>, + metric: F, + ) -> (i32, Metric) + where + F: Fn(Histogram<B>) -> Metric, + { + let mut num_negative_samples = 0; + for &sample in samples.iter() { + if sample < 0 { + num_negative_samples += 1; + } else { + let sample = sample as u64; + hist.accumulate(sample); + } + } + (num_negative_samples, metric(hist)) + } + + glean.storage().record_with(glean, &self.meta, |old_value| { + let (num_negative, hist) = match self.histogram_type { + HistogramType::Linear => { + let hist = if let Some(Metric::CustomDistributionLinear(hist)) = old_value { + hist + } else { + Histogram::linear( + self.range_min, + self.range_max, + self.bucket_count as usize, + ) + }; + accumulate(&samples, hist, Metric::CustomDistributionLinear) + } + HistogramType::Exponential => { + let hist = if let Some(Metric::CustomDistributionExponential(hist)) = old_value + { + hist + } else { + Histogram::exponential( + self.range_min, + self.range_max, + self.bucket_count as usize, + ) + }; + accumulate(&samples, hist, Metric::CustomDistributionExponential) + } + }; + + num_negative_samples = num_negative; + hist + }); + + if num_negative_samples > 0 { + let msg = format!("Accumulated {} negative samples", num_negative_samples); + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + msg, + num_negative_samples, + ); + } + } + + /// Gets the currently stored histogram. + #[doc(hidden)] + pub fn get_value<'a, S: Into<Option<&'a str>>>( + &self, + glean: &Glean, + ping_name: S, + ) -> Option<DistributionData> { + let queried_ping_name = ping_name + .into() + .unwrap_or_else(|| &self.meta().inner.send_in_pings[0]); + + match StorageManager.snapshot_metric_for_test( + glean.storage(), + queried_ping_name, + &self.meta.identifier(glean), + self.meta.inner.lifetime, + ) { + // Boxing the value, in order to return either of the possible buckets + Some(Metric::CustomDistributionExponential(hist)) => Some(snapshot(&hist)), + Some(Metric::CustomDistributionLinear(hist)) => Some(snapshot(&hist)), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<DistributionData> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| self.get_value(glean, ping_name.as_deref())) + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. inner to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0) + }) + } +} diff --git a/third_party/rust/glean-core/src/metrics/datetime.rs b/third_party/rust/glean-core/src/metrics/datetime.rs new file mode 100644 index 0000000000..3ef846a32c --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/datetime.rs @@ -0,0 +1,327 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::fmt; +use std::sync::Arc; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorType}; +use crate::metrics::time_unit::TimeUnit; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::util::{get_iso_time_string, local_now_with_offset}; +use crate::CommonMetricData; +use crate::Glean; + +use chrono::{DateTime, Datelike, FixedOffset, TimeZone, Timelike}; + +/// A datetime type. +/// +/// Used to feed data to the `DatetimeMetric`. +pub type ChronoDatetime = DateTime<FixedOffset>; + +/// Representation of a date, time and timezone. +#[derive(Clone, PartialEq, Eq)] +pub struct Datetime { + /// The year, e.g. 2021. + pub year: i32, + /// The month, 1=January. + pub month: u32, + /// The day of the month. + pub day: u32, + /// The hour. 0-23 + pub hour: u32, + /// The minute. 0-59. + pub minute: u32, + /// The second. 0-60. + pub second: u32, + /// The nanosecond part of the time. + pub nanosecond: u32, + /// The timezone offset from UTC in seconds. + /// Negative for west, positive for east of UTC. + pub offset_seconds: i32, +} + +impl fmt::Debug for Datetime { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "Datetime({:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}{}{:02}{:02})", + self.year, + self.month, + self.day, + self.hour, + self.minute, + self.second, + self.nanosecond, + if self.offset_seconds < 0 { "-" } else { "+" }, + self.offset_seconds / 3600, // hour part + (self.offset_seconds % 3600) / 60, // minute part + ) + } +} + +impl Default for Datetime { + fn default() -> Self { + Datetime { + year: 1970, + month: 1, + day: 1, + hour: 0, + minute: 0, + second: 0, + nanosecond: 0, + offset_seconds: 0, + } + } +} + +/// A datetime metric. +/// +/// Used to record an absolute date and time, such as the time the user first ran +/// the application. +#[derive(Clone, Debug)] +pub struct DatetimeMetric { + meta: Arc<CommonMetricDataInternal>, + time_unit: TimeUnit, +} + +impl MetricType for DatetimeMetric { + fn meta(&self) -> &CommonMetricDataInternal { + &self.meta + } +} + +impl From<ChronoDatetime> for Datetime { + fn from(dt: ChronoDatetime) -> Self { + let date = dt.date(); + let time = dt.time(); + let tz = dt.timezone(); + Self { + year: date.year(), + month: date.month(), + day: date.day(), + hour: time.hour(), + minute: time.minute(), + second: time.second(), + nanosecond: time.nanosecond(), + offset_seconds: tz.local_minus_utc(), + } + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl DatetimeMetric { + /// Creates a new datetime metric. + pub fn new(meta: CommonMetricData, time_unit: TimeUnit) -> Self { + Self { + meta: Arc::new(meta.into()), + time_unit, + } + } + + /// Sets the metric to a date/time including the timezone offset. + /// + /// # Arguments + /// + /// * `dt` - the optinal datetime to set this to. If missing the current date is used. + pub fn set(&self, dt: Option<Datetime>) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| { + metric.set_sync(glean, dt); + }) + } + + /// Sets the metric to a date/time which including the timezone offset synchronously. + /// + /// Use [`set`](Self::set) instead. + #[doc(hidden)] + pub fn set_sync(&self, glean: &Glean, value: Option<Datetime>) { + if !self.should_record(glean) { + return; + } + + let value = match value { + None => local_now_with_offset(), + Some(dt) => { + let timezone_offset = FixedOffset::east_opt(dt.offset_seconds); + if timezone_offset.is_none() { + let msg = format!( + "Invalid timezone offset {}. Not recording.", + dt.offset_seconds + ); + record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None); + return; + }; + + let datetime_obj = FixedOffset::east(dt.offset_seconds) + .ymd_opt(dt.year, dt.month, dt.day) + .and_hms_nano_opt(dt.hour, dt.minute, dt.second, dt.nanosecond); + + if let Some(dt) = datetime_obj.single() { + dt + } else { + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + "Invalid input data. Not recording.", + None, + ); + return; + } + } + }; + + self.set_sync_chrono(glean, value); + } + + pub(crate) fn set_sync_chrono(&self, glean: &Glean, value: ChronoDatetime) { + let value = Metric::Datetime(value, self.time_unit); + glean.storage().record(glean, &self.meta, &value) + } + + /// Gets the stored datetime value. + #[doc(hidden)] + pub fn get_value<'a, S: Into<Option<&'a str>>>( + &self, + glean: &Glean, + ping_name: S, + ) -> Option<ChronoDatetime> { + let (d, tu) = self.get_value_inner(glean, ping_name.into())?; + + // The string version of the test function truncates using string + // parsing. Unfortunately `parse_from_str` errors with `NotEnough` if we + // try to truncate with `get_iso_time_string` and then parse it back + // in a `Datetime`. So we need to truncate manually. + let time = d.time(); + match tu { + TimeUnit::Nanosecond => d.date().and_hms_nano_opt( + time.hour(), + time.minute(), + time.second(), + time.nanosecond(), + ), + TimeUnit::Microsecond => { + eprintln!( + "microseconds. nanoseconds={}, nanoseconds/1000={}", + time.nanosecond(), + time.nanosecond() / 1000 + ); + d.date().and_hms_nano_opt( + time.hour(), + time.minute(), + time.second(), + time.nanosecond() / 1000, + ) + } + TimeUnit::Millisecond => d.date().and_hms_nano_opt( + time.hour(), + time.minute(), + time.second(), + time.nanosecond() / 1000000, + ), + TimeUnit::Second => { + d.date() + .and_hms_nano_opt(time.hour(), time.minute(), time.second(), 0) + } + TimeUnit::Minute => d.date().and_hms_nano_opt(time.hour(), time.minute(), 0, 0), + TimeUnit::Hour => d.date().and_hms_nano_opt(time.hour(), 0, 0, 0), + TimeUnit::Day => d.date().and_hms_nano_opt(0, 0, 0, 0), + } + } + + fn get_value_inner( + &self, + glean: &Glean, + ping_name: Option<&str>, + ) -> Option<(ChronoDatetime, TimeUnit)> { + let queried_ping_name = ping_name.unwrap_or_else(|| &self.meta().inner.send_in_pings[0]); + + match StorageManager.snapshot_metric( + glean.storage(), + queried_ping_name, + &self.meta.identifier(glean), + self.meta.inner.lifetime, + ) { + Some(Metric::Datetime(d, tu)) => Some((d, tu)), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the stored datetime value. + /// + /// The precision of this value is truncated to the `time_unit` precision. + /// + /// # Arguments + /// + /// * `glean` - the Glean instance this metric belongs to. + /// * `storage_name` - the storage name to look into. + /// + /// # Returns + /// + /// The stored value or `None` if nothing stored. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<Datetime> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| { + let dt = self.get_value(glean, ping_name.as_deref()); + dt.map(Datetime::from) + }) + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the stored datetime value, formatted as an ISO8601 string. + /// + /// The precision of this value is truncated to the `time_unit` precision. + /// + /// # Arguments + /// + /// * `glean` - the Glean instance this metric belongs to. + /// * `storage_name` - the storage name to look into. + /// + /// # Returns + /// + /// The stored value or `None` if nothing stored. + pub fn test_get_value_as_string(&self, ping_name: Option<String>) -> Option<String> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| self.get_value_as_string(glean, ping_name)) + } + + /// **Test-only API** + /// + /// Gets the stored datetime value, formatted as an ISO8601 string. + #[doc(hidden)] + pub fn get_value_as_string(&self, glean: &Glean, ping_name: Option<String>) -> Option<String> { + let value = self.get_value_inner(glean, ping_name.as_deref()); + value.map(|(dt, tu)| get_iso_time_string(dt, tu)) + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. inner to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0) + }) + } +} diff --git a/third_party/rust/glean-core/src/metrics/denominator.rs b/third_party/rust/glean-core/src/metrics/denominator.rs new file mode 100644 index 0000000000..fb80874924 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/denominator.rs @@ -0,0 +1,140 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorType}; +use crate::metrics::CounterMetric; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::metrics::RateMetric; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +/// A Denominator metric (a kind of count shared among Rate metrics). +/// +/// Used to count things. +/// The value can only be incremented, not decremented. +// This is essentially a counter metric, +// which additionally forwards increments to the denominator to a list of associated rates. +// The numerator is incremented through the corresponding `NumeratorMetric`. +#[derive(Clone, Debug)] +pub struct DenominatorMetric { + counter: CounterMetric, + numerators: Vec<RateMetric>, +} + +impl MetricType for DenominatorMetric { + fn meta(&self) -> &CommonMetricDataInternal { + self.counter.meta() + } +} + +impl DenominatorMetric { + /// Creates a new denominator metric. + pub fn new(meta: CommonMetricData, numerators: Vec<CommonMetricData>) -> Self { + Self { + counter: CounterMetric::new(meta), + numerators: numerators.into_iter().map(RateMetric::new).collect(), + } + } + + /// Increases the denominator by `amount`. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + /// * `amount` - The amount to increase by. Should be positive. + /// + /// ## Notes + /// + /// Logs an error if the `amount` is 0 or negative. + pub fn add(&self, amount: i32) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.add_sync(glean, amount)) + } + + #[doc(hidden)] + pub fn add_sync(&self, glean: &Glean, amount: i32) { + if !self.should_record(glean) { + return; + } + + if amount <= 0 { + record_error( + glean, + self.meta(), + ErrorType::InvalidValue, + format!("Added negative or zero value {}", amount), + None, + ); + return; + } + + for num in &self.numerators { + num.add_to_denominator_sync(glean, amount); + } + + glean + .storage() + .record_with(glean, self.counter.meta(), |old_value| match old_value { + Some(Metric::Counter(old_value)) => { + Metric::Counter(old_value.saturating_add(amount)) + } + _ => Metric::Counter(amount), + }) + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<i32> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| self.get_value(glean, ping_name.as_deref())) + } + + #[doc(hidden)] + pub fn get_value<'a, S: Into<Option<&'a str>>>( + &self, + glean: &Glean, + ping_name: S, + ) -> Option<i32> { + let queried_ping_name = ping_name + .into() + .unwrap_or_else(|| &self.meta().inner.send_in_pings[0]); + + match StorageManager.snapshot_metric_for_test( + glean.storage(), + queried_ping_name, + &self.meta().identifier(glean), + self.meta().inner.lifetime, + ) { + Some(Metric::Counter(i)) => Some(i), + _ => None, + } + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - the optional name of the ping to retrieve the metric + /// for. inner to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0) + }) + } +} diff --git a/third_party/rust/glean-core/src/metrics/event.rs b/third_party/rust/glean-core/src/metrics/event.rs new file mode 100644 index 0000000000..2d5e2acdc3 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/event.rs @@ -0,0 +1,189 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::HashMap; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorType}; +use crate::event_database::RecordedEvent; +use crate::metrics::MetricType; +use crate::util::truncate_string_at_boundary_with_error; +use crate::CommonMetricData; +use crate::Glean; + +const MAX_LENGTH_EXTRA_KEY_VALUE: usize = 500; + +/// An event metric. +/// +/// Events allow recording of e.g. individual occurences of user actions, say +/// every time a view was open and from where. Each time you record an event, it +/// records a timestamp, the event's name and a set of custom values. +#[derive(Clone, Debug)] +pub struct EventMetric { + meta: CommonMetricDataInternal, + allowed_extra_keys: Vec<String>, +} + +impl MetricType for EventMetric { + fn meta(&self) -> &CommonMetricDataInternal { + &self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl EventMetric { + /// Creates a new event metric. + pub fn new(meta: CommonMetricData, allowed_extra_keys: Vec<String>) -> Self { + Self { + meta: meta.into(), + allowed_extra_keys, + } + } + + /// Records an event. + /// + /// # Arguments + /// + /// * `extra` - A [`HashMap`] of `(key, value)` pairs. + /// Keys must be one of the allowed extra keys. + /// If any key is not allowed, an error is reported and no event is recorded. + pub fn record(&self, extra: HashMap<String, String>) { + let timestamp = crate::get_timestamp_ms(); + self.record_with_time(timestamp, extra); + } + + /// Record a new event with a provided timestamp. + /// + /// It's the caller's responsibility to ensure the timestamp comes from the same clock source. + /// + /// # Arguments + /// + /// * `timestamp` - The event timestamp, in milliseconds. + /// * `extra` - A [`HashMap`] of `(key, value)` pairs. + /// Keys must be one of the allowed extra keys. + /// If any key is not allowed, an error is reported and no event is recorded. + pub fn record_with_time(&self, timestamp: u64, extra: HashMap<String, String>) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| { + let sent = metric.record_sync(glean, timestamp, extra); + if sent { + let state = crate::global_state().lock().unwrap(); + if let Err(e) = state.callbacks.trigger_upload() { + log::error!("Triggering upload failed. Error: {}", e); + } + } + }); + } + + /// Validate that extras are empty or all extra keys are allowed. + /// + /// If at least one key is not allowed, record an error and fail. + fn validate_extra( + &self, + glean: &Glean, + extra: HashMap<String, String>, + ) -> Result<Option<HashMap<String, String>>, ()> { + if extra.is_empty() { + return Ok(None); + } + + let mut extra_strings = HashMap::new(); + for (k, v) in extra.into_iter() { + if !self.allowed_extra_keys.contains(&k) { + let msg = format!("Invalid key index {}", k); + record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None); + return Err(()); + } + + let value = truncate_string_at_boundary_with_error( + glean, + &self.meta, + v, + MAX_LENGTH_EXTRA_KEY_VALUE, + ); + extra_strings.insert(k, value); + } + + Ok(Some(extra_strings)) + } + + /// Records an event. + /// + /// ## Returns + /// + /// `true` if a ping was submitted and should be uploaded. + /// `false` otherwise. + #[doc(hidden)] + pub fn record_sync( + &self, + glean: &Glean, + timestamp: u64, + extra: HashMap<String, String>, + ) -> bool { + if !self.should_record(glean) { + return false; + } + + let extra_strings = match self.validate_extra(glean, extra) { + Ok(extra) => extra, + Err(()) => return false, + }; + + glean + .event_storage() + .record(glean, &self.meta, timestamp, extra_strings) + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Get the vector of currently stored events for this event metric. + #[doc(hidden)] + pub fn get_value<'a, S: Into<Option<&'a str>>>( + &self, + glean: &Glean, + ping_name: S, + ) -> Option<Vec<RecordedEvent>> { + let queried_ping_name = ping_name + .into() + .unwrap_or_else(|| &self.meta().inner.send_in_pings[0]); + + glean + .event_storage() + .test_get_value(&self.meta, queried_ping_name) + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Get the vector of currently stored events for this event metric. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<Vec<RecordedEvent>> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| self.get_value(glean, ping_name.as_deref())) + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. inner to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0) + }) + } +} diff --git a/third_party/rust/glean-core/src/metrics/experiment.rs b/third_party/rust/glean-core/src/metrics/experiment.rs new file mode 100644 index 0000000000..23e6c41ce2 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/experiment.rs @@ -0,0 +1,266 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::cmp; +use std::collections::HashMap; +use std::sync::atomic::AtomicU8; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{record_error, ErrorType}; +use crate::metrics::{Metric, MetricType, RecordedExperiment}; +use crate::storage::{StorageManager, INTERNAL_STORAGE}; +use crate::util::{truncate_string_at_boundary, truncate_string_at_boundary_with_error}; +use crate::Lifetime; +use crate::{CommonMetricData, Glean}; + +/// The maximum length of the experiment id, the branch id, and the keys of the +/// `extra` map. Identifiers longer than this number of characters are truncated. +const MAX_EXPERIMENTS_IDS_LEN: usize = 100; +/// The maximum length of the experiment `extra` values. Values longer than this +/// limit will be truncated. +const MAX_EXPERIMENT_VALUE_LEN: usize = MAX_EXPERIMENTS_IDS_LEN; +/// The maximum number of extras allowed in the `extra` hash map. Any items added +/// beyond this limit will be dropped. Note that truncation of a hash map is +/// nondeterministic in which items are truncated. +const MAX_EXPERIMENTS_EXTRAS_SIZE: usize = 20; + +/// An experiment metric. +/// +/// Used to store active experiments. +/// This is used through the `set_experiment_active`/`set_experiment_inactive` Glean SDK API. +#[derive(Clone, Debug)] +pub struct ExperimentMetric { + meta: CommonMetricDataInternal, +} + +impl MetricType for ExperimentMetric { + fn meta(&self) -> &CommonMetricDataInternal { + &self.meta + } +} + +impl ExperimentMetric { + /// Creates a new experiment metric. + /// + /// # Arguments + /// + /// * `id` - the id of the experiment. Please note that this will be + /// truncated to `MAX_EXPERIMENTS_IDS_LEN`, if needed. + /// + /// # Implementation note + /// + /// This runs synchronously and queries the database to record potential errors. + pub fn new(glean: &Glean, id: String) -> Self { + let mut error = None; + + // Make sure that experiment id is within the expected limit. + let truncated_id = if id.len() > MAX_EXPERIMENTS_IDS_LEN { + let msg = format!( + "Value length {} for experiment id exceeds maximum of {}", + id.len(), + MAX_EXPERIMENTS_IDS_LEN + ); + error = Some(msg); + truncate_string_at_boundary(id, MAX_EXPERIMENTS_IDS_LEN) + } else { + id + }; + + let new_experiment = Self { + meta: CommonMetricDataInternal { + inner: CommonMetricData { + name: format!("{}#experiment", truncated_id), + // We don't need a category, the name is already unique + category: "".into(), + send_in_pings: vec![INTERNAL_STORAGE.into()], + lifetime: Lifetime::Application, + ..Default::default() + }, + disabled: AtomicU8::new(0), + }, + }; + + // Check for a truncation error to record + if let Some(msg) = error { + record_error( + glean, + &new_experiment.meta, + ErrorType::InvalidValue, + msg, + None, + ); + } + + new_experiment + } + + /// Records an experiment as active. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + /// * `branch` - the active branch of the experiment. Please note that this will be + /// truncated to `MAX_EXPERIMENTS_IDS_LEN`, if needed. + /// * `extra` - an optional, user defined String to String map used to provide richer + /// experiment context if needed. + pub fn set_active_sync(&self, glean: &Glean, branch: String, extra: HashMap<String, String>) { + if !self.should_record(glean) { + return; + } + + // Make sure that branch id is within the expected limit. + let truncated_branch = if branch.len() > MAX_EXPERIMENTS_IDS_LEN { + truncate_string_at_boundary_with_error( + glean, + &self.meta, + branch, + MAX_EXPERIMENTS_IDS_LEN, + ) + } else { + branch + }; + + // Apply limits to extras + if extra.len() > MAX_EXPERIMENTS_EXTRAS_SIZE { + let msg = format!( + "Extra hash map length {} exceeds maximum of {}", + extra.len(), + MAX_EXPERIMENTS_EXTRAS_SIZE + ); + record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None); + } + + let mut truncated_extras = + HashMap::with_capacity(cmp::min(extra.len(), MAX_EXPERIMENTS_EXTRAS_SIZE)); + for (key, value) in extra.into_iter().take(MAX_EXPERIMENTS_EXTRAS_SIZE) { + let truncated_key = if key.len() > MAX_EXPERIMENTS_IDS_LEN { + truncate_string_at_boundary_with_error( + glean, + &self.meta, + key, + MAX_EXPERIMENTS_IDS_LEN, + ) + } else { + key + }; + let truncated_value = if value.len() > MAX_EXPERIMENT_VALUE_LEN { + truncate_string_at_boundary_with_error( + glean, + &self.meta, + value, + MAX_EXPERIMENT_VALUE_LEN, + ) + } else { + value + }; + + truncated_extras.insert(truncated_key, truncated_value); + } + let truncated_extras = if truncated_extras.is_empty() { + None + } else { + Some(truncated_extras) + }; + + let value = Metric::Experiment(RecordedExperiment { + branch: truncated_branch, + extra: truncated_extras, + }); + glean.storage().record(glean, &self.meta, &value) + } + + /// Records an experiment as inactive. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + pub fn set_inactive_sync(&self, glean: &Glean) { + if !self.should_record(glean) { + return; + } + + if let Err(e) = glean.storage().remove_single_metric( + Lifetime::Application, + INTERNAL_STORAGE, + &self.meta.inner.name, + ) { + log::error!("Failed to set experiment as inactive: {:?}", e); + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored experiment data as a JSON representation of + /// the RecordedExperiment. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, glean: &Glean) -> Option<RecordedExperiment> { + match StorageManager.snapshot_metric_for_test( + glean.storage(), + INTERNAL_STORAGE, + &self.meta.identifier(glean), + self.meta.inner.lifetime, + ) { + Some(Metric::Experiment(e)) => Some(e), + _ => None, + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn stable_serialization() { + let experiment_empty = RecordedExperiment { + branch: "branch".into(), + extra: Default::default(), + }; + + let mut data = HashMap::new(); + data.insert("a key".to_string(), "a value".to_string()); + let experiment_data = RecordedExperiment { + branch: "branch".into(), + extra: Some(data), + }; + + let experiment_empty_bin = bincode::serialize(&experiment_empty).unwrap(); + let experiment_data_bin = bincode::serialize(&experiment_data).unwrap(); + + assert_eq!( + experiment_empty, + bincode::deserialize(&experiment_empty_bin).unwrap() + ); + assert_eq!( + experiment_data, + bincode::deserialize(&experiment_data_bin).unwrap() + ); + } + + #[test] + #[rustfmt::skip] // Let's not add newlines unnecessary + fn deserialize_old_encoding() { + // generated by `bincode::serialize` as of Glean commit ac27fceb7c0d5a7288d7d569e8c5c5399a53afb2 + // empty was generated from: `RecordedExperiment { branch: "branch".into(), extra: None, }` + let empty_bin = vec![6, 0, 0, 0, 0, 0, 0, 0, 98, 114, 97, 110, 99, 104]; + // data was generated from: RecordedExperiment { branch: "branch".into(), extra: Some({"a key": "a value"}), }; + let data_bin = vec![6, 0, 0, 0, 0, 0, 0, 0, 98, 114, 97, 110, 99, 104, + 1, 1, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, + 97, 32, 107, 101, 121, 7, 0, 0, 0, 0, 0, 0, 0, 97, + 32, 118, 97, 108, 117, 101]; + + + let mut data = HashMap::new(); + data.insert("a key".to_string(), "a value".to_string()); + let experiment_data = RecordedExperiment { branch: "branch".into(), extra: Some(data), }; + + // We can't actually decode old experiment data. + // Luckily Glean did store experiments in the database before commit ac27fceb7c0d5a7288d7d569e8c5c5399a53afb2. + let experiment_empty: Result<RecordedExperiment, _> = bincode::deserialize(&empty_bin); + assert!(experiment_empty.is_err()); + + assert_eq!(experiment_data, bincode::deserialize(&data_bin).unwrap()); + } +} diff --git a/third_party/rust/glean-core/src/metrics/labeled.rs b/third_party/rust/glean-core/src/metrics/labeled.rs new file mode 100644 index 0000000000..fa3e6a6a75 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/labeled.rs @@ -0,0 +1,294 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::borrow::Cow; +use std::collections::{hash_map::Entry, HashMap}; +use std::sync::{Arc, Mutex}; + +use crate::common_metric_data::{CommonMetricData, CommonMetricDataInternal}; +use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorType}; +use crate::metrics::{BooleanMetric, CounterMetric, Metric, MetricType, StringMetric}; +use crate::Glean; + +const MAX_LABELS: usize = 16; +const OTHER_LABEL: &str = "__other__"; +const MAX_LABEL_LENGTH: usize = 71; + +/// A labeled counter. +pub type LabeledCounter = LabeledMetric<CounterMetric>; + +/// A labeled boolean. +pub type LabeledBoolean = LabeledMetric<BooleanMetric>; + +/// A labeled string. +pub type LabeledString = LabeledMetric<StringMetric>; + +/// A labeled metric. +/// +/// Labeled metrics allow to record multiple sub-metrics of the same type under different string labels. +#[derive(Debug)] +pub struct LabeledMetric<T> { + labels: Option<Vec<Cow<'static, str>>>, + /// Type of the underlying metric + /// We hold on to an instance of it, which is cloned to create new modified instances. + submetric: T, + + /// A map from a unique ID for the labeled submetric to a handle of an instantiated + /// metric type. + label_map: Mutex<HashMap<String, Arc<T>>>, +} + +/// Sealed traits protect against downstream implementations. +/// +/// We wrap it in a private module that is inaccessible outside of this module. +mod private { + use crate::{ + metrics::BooleanMetric, metrics::CounterMetric, metrics::StringMetric, CommonMetricData, + }; + + /// The sealed labeled trait. + /// + /// This also allows us to hide methods, that are only used internally + /// and should not be visible to users of the object implementing the + /// `Labeled<T>` trait. + pub trait Sealed { + /// Create a new `glean_core` metric from the metadata. + fn new_inner(meta: crate::CommonMetricData) -> Self; + } + + impl Sealed for CounterMetric { + fn new_inner(meta: CommonMetricData) -> Self { + Self::new(meta) + } + } + + impl Sealed for BooleanMetric { + fn new_inner(meta: CommonMetricData) -> Self { + Self::new(meta) + } + } + + impl Sealed for StringMetric { + fn new_inner(meta: CommonMetricData) -> Self { + Self::new(meta) + } + } +} + +/// Trait for metrics that can be nested inside a labeled metric. +pub trait AllowLabeled: MetricType { + /// Create a new labeled metric. + fn new_labeled(meta: CommonMetricData) -> Self; +} + +// Implement the trait for everything we marked as allowed. +impl<T> AllowLabeled for T +where + T: MetricType, + T: private::Sealed, +{ + fn new_labeled(meta: CommonMetricData) -> Self { + T::new_inner(meta) + } +} + +impl<T> LabeledMetric<T> +where + T: AllowLabeled + Clone, +{ + /// Creates a new labeled metric from the given metric instance and optional list of labels. + /// + /// See [`get`](LabeledMetric::get) for information on how static or dynamic labels are handled. + pub fn new(meta: CommonMetricData, labels: Option<Vec<Cow<'static, str>>>) -> LabeledMetric<T> { + let submetric = T::new_labeled(meta); + LabeledMetric::new_inner(submetric, labels) + } + + fn new_inner(submetric: T, labels: Option<Vec<Cow<'static, str>>>) -> LabeledMetric<T> { + let label_map = Default::default(); + LabeledMetric { + labels, + submetric, + label_map, + } + } + + /// Creates a new metric with a specific label. + /// + /// This is used for static labels where we can just set the name to be `name/label`. + fn new_metric_with_name(&self, name: String) -> T { + self.submetric.with_name(name) + } + + /// Creates a new metric with a specific label. + /// + /// This is used for dynamic labels where we have to actually validate and correct the + /// label later when we have a Glean object. + fn new_metric_with_dynamic_label(&self, label: String) -> T { + self.submetric.with_dynamic_label(label) + } + + /// Creates a static label. + /// + /// # Safety + /// + /// Should only be called when static labels are available on this metric. + /// + /// # Arguments + /// + /// * `label` - The requested label + /// + /// # Returns + /// + /// The requested label if it is in the list of allowed labels. + /// Otherwise `OTHER_LABEL` is returned. + fn static_label<'a>(&self, label: &'a str) -> &'a str { + debug_assert!(self.labels.is_some()); + let labels = self.labels.as_ref().unwrap(); + if labels.iter().any(|l| l == label) { + label + } else { + OTHER_LABEL + } + } + + /// Gets a specific metric for a given label. + /// + /// If a set of acceptable labels were specified in the `metrics.yaml` file, + /// and the given label is not in the set, it will be recorded under the special `OTHER_LABEL` label. + /// + /// If a set of acceptable labels was not specified in the `metrics.yaml` file, + /// only the first 16 unique labels will be used. + /// After that, any additional labels will be recorded under the special `OTHER_LABEL` label. + /// + /// Labels must be `snake_case` and less than 30 characters. + /// If an invalid label is used, the metric will be recorded in the special `OTHER_LABEL` label. + pub fn get<S: AsRef<str>>(&self, label: S) -> Arc<T> { + let label = label.as_ref(); + + // The handle is a unique number per metric. + // The label identifies the submetric. + let id = format!("{}/{}", self.submetric.meta().base_identifier(), label); + + let mut map = self.label_map.lock().unwrap(); + match map.entry(id) { + Entry::Occupied(entry) => Arc::clone(entry.get()), + Entry::Vacant(entry) => { + // We have 2 scenarios to consider: + // * Static labels. No database access needed. We just look at what is in memory. + // * Dynamic labels. We look up in the database all previously stored + // labels in order to keep a maximum of allowed labels. This is done later + // when the specific metric is actually recorded, when we are guaranteed to have + // an initialized Glean object. + let metric = match self.labels { + Some(_) => { + let label = self.static_label(label); + self.new_metric_with_name(combine_base_identifier_and_label( + &self.submetric.meta().inner.name, + label, + )) + } + None => self.new_metric_with_dynamic_label(label.to_string()), + }; + let metric = Arc::new(metric); + entry.insert(Arc::clone(&metric)); + metric + } + } + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.submetric.meta(), error).unwrap_or(0) + }) + } +} + +/// Combines a metric's base identifier and label +pub fn combine_base_identifier_and_label(base_identifer: &str, label: &str) -> String { + format!("{}/{}", base_identifer, label) +} + +/// Strips the label off of a complete identifier +pub fn strip_label(identifier: &str) -> &str { + identifier.split_once('/').map_or(identifier, |s| s.0) +} + +/// Validates a dynamic label, changing it to `OTHER_LABEL` if it's invalid. +/// +/// Checks the requested label against limitations, such as the label length and allowed +/// characters. +/// +/// # Arguments +/// +/// * `label` - The requested label +/// +/// # Returns +/// +/// The entire identifier for the metric, including the base identifier and the corrected label. +/// The errors are logged. +pub fn validate_dynamic_label( + glean: &Glean, + meta: &CommonMetricDataInternal, + base_identifier: &str, + label: &str, +) -> String { + let key = combine_base_identifier_and_label(base_identifier, label); + for store in &meta.inner.send_in_pings { + if glean.storage().has_metric(meta.inner.lifetime, store, &key) { + return key; + } + } + + let mut label_count = 0; + let prefix = &key[..=base_identifier.len()]; + let mut snapshotter = |_: &[u8], _: &Metric| { + label_count += 1; + }; + + let lifetime = meta.inner.lifetime; + for store in &meta.inner.send_in_pings { + glean + .storage() + .iter_store_from(lifetime, store, Some(prefix), &mut snapshotter); + } + + let error = if label_count >= MAX_LABELS { + true + } else if label.len() > MAX_LABEL_LENGTH { + let msg = format!( + "label length {} exceeds maximum of {}", + label.len(), + MAX_LABEL_LENGTH + ); + record_error(glean, meta, ErrorType::InvalidLabel, msg, None); + true + } else if label.chars().any(|c| !c.is_ascii() || c.is_ascii_control()) { + let msg = format!("label must be printable ascii, got '{}'", label); + record_error(glean, meta, ErrorType::InvalidLabel, msg, None); + true + } else { + false + }; + + if error { + combine_base_identifier_and_label(base_identifier, OTHER_LABEL) + } else { + key + } +} diff --git a/third_party/rust/glean-core/src/metrics/memory_distribution.rs b/third_party/rust/glean-core/src/metrics/memory_distribution.rs new file mode 100644 index 0000000000..ac9eda1a90 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/memory_distribution.rs @@ -0,0 +1,282 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::sync::Arc; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorType}; +use crate::histogram::{Functional, Histogram}; +use crate::metrics::memory_unit::MemoryUnit; +use crate::metrics::{DistributionData, Metric, MetricType}; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +// The base of the logarithm used to determine bucketing +const LOG_BASE: f64 = 2.0; + +// The buckets per each order of magnitude of the logarithm. +const BUCKETS_PER_MAGNITUDE: f64 = 16.0; + +// Set a maximum recordable value of 1 terabyte so the buckets aren't +// completely unbounded. +const MAX_BYTES: u64 = 1 << 40; + +/// A memory distribution metric. +/// +/// Memory distributions are used to accumulate and store memory sizes. +#[derive(Clone, Debug)] +pub struct MemoryDistributionMetric { + meta: Arc<CommonMetricDataInternal>, + memory_unit: MemoryUnit, +} + +/// Create a snapshot of the histogram. +/// +/// The snapshot can be serialized into the payload format. +pub(crate) fn snapshot(hist: &Histogram<Functional>) -> DistributionData { + DistributionData { + // **Caution**: This cannot use `Histogram::snapshot_values` and needs to use the more + // specialized snapshot function. + values: hist + .snapshot() + .into_iter() + .map(|(k, v)| (k as i64, v as i64)) + .collect(), + sum: hist.sum() as i64, + count: hist.count() as i64, + } +} + +impl MetricType for MemoryDistributionMetric { + fn meta(&self) -> &CommonMetricDataInternal { + &self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl MemoryDistributionMetric { + /// Creates a new memory distribution metric. + pub fn new(meta: CommonMetricData, memory_unit: MemoryUnit) -> Self { + Self { + meta: Arc::new(meta.into()), + memory_unit, + } + } + + /// Accumulates the provided sample in the metric. + /// + /// # Arguments + /// + /// * `sample` - The sample to be recorded by the metric. The sample is assumed to be in the + /// configured memory unit of the metric. + /// + /// ## Notes + /// + /// Values bigger than 1 Terabyte (2<sup>40</sup> bytes) are truncated + /// and an [`ErrorType::InvalidValue`] error is recorded. + pub fn accumulate(&self, sample: i64) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.accumulate_sync(glean, sample)) + } + + /// Accumulates the provided sample in the metric synchronously. + /// + /// See [`accumulate`](Self::accumulate) for details. + #[doc(hidden)] + pub fn accumulate_sync(&self, glean: &Glean, sample: i64) { + if !self.should_record(glean) { + return; + } + + if sample < 0 { + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + "Accumulated a negative sample", + None, + ); + return; + } + + let mut sample = self.memory_unit.as_bytes(sample as u64); + + if sample > MAX_BYTES { + let msg = "Sample is bigger than 1 terabyte"; + record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None); + sample = MAX_BYTES; + } + + // Let's be defensive here: + // The uploader tries to store some memory distribution metrics, + // but in tests that storage might be gone already. + // Let's just ignore those. + // We do the same for counters and timing distributions. + // This should never happen in real app usage. + if let Some(storage) = glean.storage_opt() { + storage.record_with(glean, &self.meta, |old_value| match old_value { + Some(Metric::MemoryDistribution(mut hist)) => { + hist.accumulate(sample); + Metric::MemoryDistribution(hist) + } + _ => { + let mut hist = Histogram::functional(LOG_BASE, BUCKETS_PER_MAGNITUDE); + hist.accumulate(sample); + Metric::MemoryDistribution(hist) + } + }); + } else { + log::warn!( + "Couldn't get storage. Can't record memory distribution '{}'.", + self.meta.base_identifier() + ); + } + } + + /// Accumulates the provided signed samples in the metric. + /// + /// This is required so that the platform-specific code can provide us with + /// 64 bit signed integers if no `u64` comparable type is available. This + /// will take care of filtering and reporting errors for any provided negative + /// sample. + /// + /// Please note that this assumes that the provided samples are already in + /// the "unit" declared by the instance of the metric type (e.g. if the the + /// instance this method was called on is using [`MemoryUnit::Kilobyte`], then + /// `samples` are assumed to be in that unit). + /// + /// # Arguments + /// + /// * `samples` - The vector holding the samples to be recorded by the metric. + /// + /// ## Notes + /// + /// Discards any negative value in `samples` and report an [`ErrorType::InvalidValue`] + /// for each of them. + /// + /// Values bigger than 1 Terabyte (2<sup>40</sup> bytes) are truncated + /// and an [`ErrorType::InvalidValue`] error is recorded. + pub fn accumulate_samples(&self, samples: Vec<i64>) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.accumulate_samples_sync(glean, samples)) + } + + /// Accumulates the provided signed samples in the metric synchronously. + /// + /// See [`accumulate_samples`](Self::accumulate_samples) for details. + #[doc(hidden)] + pub fn accumulate_samples_sync(&self, glean: &Glean, samples: Vec<i64>) { + if !self.should_record(glean) { + return; + } + + let mut num_negative_samples = 0; + let mut num_too_log_samples = 0; + + glean.storage().record_with(glean, &self.meta, |old_value| { + let mut hist = match old_value { + Some(Metric::MemoryDistribution(hist)) => hist, + _ => Histogram::functional(LOG_BASE, BUCKETS_PER_MAGNITUDE), + }; + + for &sample in samples.iter() { + if sample < 0 { + num_negative_samples += 1; + } else { + let sample = sample as u64; + let mut sample = self.memory_unit.as_bytes(sample); + if sample > MAX_BYTES { + num_too_log_samples += 1; + sample = MAX_BYTES; + } + + hist.accumulate(sample); + } + } + Metric::MemoryDistribution(hist) + }); + + if num_negative_samples > 0 { + let msg = format!("Accumulated {} negative samples", num_negative_samples); + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + msg, + num_negative_samples, + ); + } + + if num_too_log_samples > 0 { + let msg = format!( + "Accumulated {} samples larger than 1TB", + num_too_log_samples + ); + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + msg, + num_too_log_samples, + ); + } + } + + /// Gets the currently stored value synchronously. + #[doc(hidden)] + pub fn get_value<'a, S: Into<Option<&'a str>>>( + &self, + glean: &Glean, + ping_name: S, + ) -> Option<DistributionData> { + let queried_ping_name = ping_name + .into() + .unwrap_or_else(|| &self.meta().inner.send_in_pings[0]); + + match StorageManager.snapshot_metric_for_test( + glean.storage(), + queried_ping_name, + &self.meta.identifier(glean), + self.meta.inner.lifetime, + ) { + Some(Metric::MemoryDistribution(hist)) => Some(snapshot(&hist)), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<DistributionData> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| self.get_value(glean, ping_name.as_deref())) + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0) + }) + } +} diff --git a/third_party/rust/glean-core/src/metrics/memory_unit.rs b/third_party/rust/glean-core/src/metrics/memory_unit.rs new file mode 100644 index 0000000000..ce51b975fa --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/memory_unit.rs @@ -0,0 +1,64 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::convert::TryFrom; + +use serde::{Deserialize, Serialize}; + +use crate::error::{Error, ErrorKind}; + +/// Different resolutions supported by the memory related metric types (e.g. +/// MemoryDistributionMetric). +#[derive(Copy, Clone, Debug, Deserialize, Serialize)] +#[serde(rename_all = "lowercase")] +#[repr(i32)] // use i32 to be compatible with our JNA definition +pub enum MemoryUnit { + /// 1 byte + Byte, + /// 2^10 bytes + Kilobyte, + /// 2^20 bytes + Megabyte, + /// 2^30 bytes + Gigabyte, +} + +impl MemoryUnit { + /// Converts a value in the given unit to bytes. + /// + /// # Arguments + /// + /// * `value` - the value to convert. + /// + /// # Returns + /// + /// The integer representation of the byte value. + pub fn as_bytes(self, value: u64) -> u64 { + use MemoryUnit::*; + match self { + Byte => value, + Kilobyte => value << 10, + Megabyte => value << 20, + Gigabyte => value << 30, + } + } +} + +/// Trait implementation for converting an integer value +/// to a [`MemoryUnit`]. This is used in the FFI code. Please +/// note that values should match the ordering of the platform +/// specific side of things (e.g. Kotlin implementation). +impl TryFrom<i32> for MemoryUnit { + type Error = Error; + + fn try_from(value: i32) -> Result<MemoryUnit, Self::Error> { + match value { + 0 => Ok(MemoryUnit::Byte), + 1 => Ok(MemoryUnit::Kilobyte), + 2 => Ok(MemoryUnit::Megabyte), + 3 => Ok(MemoryUnit::Gigabyte), + e => Err(ErrorKind::MemoryUnit(e).into()), + } + } +} diff --git a/third_party/rust/glean-core/src/metrics/metrics_enabled_config.rs b/third_party/rust/glean-core/src/metrics/metrics_enabled_config.rs new file mode 100644 index 0000000000..26d0deff31 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/metrics_enabled_config.rs @@ -0,0 +1,46 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::{collections::HashMap, convert::TryFrom}; + +use serde::{Deserialize, Serialize}; + +/// Represents a list of metrics and an associated boolean property +/// indicating if the metric is enabledfrom the remote-settings +/// configuration store. The expected format of this data is stringified JSON +/// in the following format: +/// ```json +/// { +/// "category.metric_name": true +/// } +/// ``` +#[derive(Serialize, Deserialize, Debug, Clone, Default)] +pub struct MetricsEnabledConfig { + /// This is a `HashMap` consisting of base_identifiers as keys + /// and bool values representing an override for the `disabled` + /// property of the metric, only inverted to reduce confusion. + /// If a particular metric has a value of `true` here, it means + /// the default of the metric will be overriden and set to the + /// enabled state. + #[serde(flatten)] + pub metrics_enabled: HashMap<String, bool>, +} + +impl MetricsEnabledConfig { + /// Creates a new MetricsEnabledConfig + pub fn new() -> Self { + Default::default() + } +} + +impl TryFrom<String> for MetricsEnabledConfig { + type Error = crate::ErrorKind; + + fn try_from(json: String) -> Result<Self, Self::Error> { + match serde_json::from_str(json.as_str()) { + Ok(config) => Ok(config), + Err(e) => Err(crate::ErrorKind::Json(e)), + } + } +} diff --git a/third_party/rust/glean-core/src/metrics/mod.rs b/third_party/rust/glean-core/src/metrics/mod.rs new file mode 100644 index 0000000000..43253b9aa7 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/mod.rs @@ -0,0 +1,285 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! The different metric types supported by the Glean SDK to handle data. + +use std::collections::HashMap; +use std::sync::atomic::Ordering; + +use chrono::{DateTime, FixedOffset}; +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value as JsonValue}; + +mod boolean; +mod counter; +mod custom_distribution; +mod datetime; +mod denominator; +mod event; +mod experiment; +pub(crate) mod labeled; +mod memory_distribution; +mod memory_unit; +mod metrics_enabled_config; +mod numerator; +mod ping; +mod quantity; +mod rate; +mod recorded_experiment; +mod string; +mod string_list; +mod text; +mod time_unit; +mod timespan; +mod timing_distribution; +mod url; +mod uuid; + +use crate::common_metric_data::CommonMetricDataInternal; +pub use crate::event_database::RecordedEvent; +use crate::histogram::{Functional, Histogram, PrecomputedExponential, PrecomputedLinear}; +pub use crate::metrics::datetime::Datetime; +use crate::util::get_iso_time_string; +use crate::Glean; + +pub use self::boolean::BooleanMetric; +pub use self::counter::CounterMetric; +pub use self::custom_distribution::CustomDistributionMetric; +pub use self::datetime::DatetimeMetric; +pub use self::denominator::DenominatorMetric; +pub use self::event::EventMetric; +pub(crate) use self::experiment::ExperimentMetric; +pub use self::labeled::{LabeledBoolean, LabeledCounter, LabeledMetric, LabeledString}; +pub use self::memory_distribution::MemoryDistributionMetric; +pub use self::memory_unit::MemoryUnit; +pub use self::numerator::NumeratorMetric; +pub use self::ping::PingType; +pub use self::quantity::QuantityMetric; +pub use self::rate::{Rate, RateMetric}; +pub use self::string::StringMetric; +pub use self::string_list::StringListMetric; +pub use self::text::TextMetric; +pub use self::time_unit::TimeUnit; +pub use self::timespan::TimespanMetric; +pub use self::timing_distribution::TimerId; +pub use self::timing_distribution::TimingDistributionMetric; +pub use self::url::UrlMetric; +pub use self::uuid::UuidMetric; +pub use crate::histogram::HistogramType; +pub use recorded_experiment::RecordedExperiment; + +pub use self::metrics_enabled_config::MetricsEnabledConfig; + +/// A snapshot of all buckets and the accumulated sum of a distribution. +// +// Note: Be careful when changing this structure. +// The serialized form ends up in the ping payload. +// New fields might require to be skipped on serialization. +#[derive(Debug, Serialize)] +pub struct DistributionData { + /// A map containig the bucket index mapped to the accumulated count. + /// + /// This can contain buckets with a count of `0`. + pub values: HashMap<i64, i64>, + + /// The accumulated sum of all the samples in the distribution. + pub sum: i64, + + /// The total number of entries in the distribution. + #[serde(skip)] + pub count: i64, +} + +/// The available metrics. +/// +/// This is the in-memory and persisted layout of a metric. +/// +/// ## Note +/// +/// The order of metrics in this enum is important, as it is used for serialization. +/// Do not reorder the variants. +/// +/// **Any new metric must be added at the end.** +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] +pub enum Metric { + /// A boolean metric. See [`BooleanMetric`] for more information. + Boolean(bool), + /// A counter metric. See [`CounterMetric`] for more information. + Counter(i32), + /// A custom distribution with precomputed exponential bucketing. + /// See [`CustomDistributionMetric`] for more information. + CustomDistributionExponential(Histogram<PrecomputedExponential>), + /// A custom distribution with precomputed linear bucketing. + /// See [`CustomDistributionMetric`] for more information. + CustomDistributionLinear(Histogram<PrecomputedLinear>), + /// A datetime metric. See [`DatetimeMetric`] for more information. + Datetime(DateTime<FixedOffset>, TimeUnit), + /// An experiment metric. See `ExperimentMetric` for more information. + Experiment(recorded_experiment::RecordedExperiment), + /// A quantity metric. See [`QuantityMetric`] for more information. + Quantity(i64), + /// A string metric. See [`StringMetric`] for more information. + String(String), + /// A string list metric. See [`StringListMetric`] for more information. + StringList(Vec<String>), + /// A UUID metric. See [`UuidMetric`] for more information. + Uuid(String), + /// A timespan metric. See [`TimespanMetric`] for more information. + Timespan(std::time::Duration, TimeUnit), + /// A timing distribution. See [`TimingDistributionMetric`] for more information. + TimingDistribution(Histogram<Functional>), + /// A memory distribution. See [`MemoryDistributionMetric`] for more information. + MemoryDistribution(Histogram<Functional>), + /// **DEPRECATED**: A JWE metric.. + /// Note: This variant MUST NOT be removed to avoid backwards-incompatible changes to the + /// serialization. This type has no underlying implementation anymore. + Jwe(String), + /// A rate metric. See [`RateMetric`] for more information. + Rate(i32, i32), + /// A URL metric. See [`UrlMetric`] for more information. + Url(String), + /// A Text metric. See [`TextMetric`] for more information. + Text(String), +} + +/// A [`MetricType`] describes common behavior across all metrics. +pub trait MetricType { + /// Access the stored metadata + fn meta(&self) -> &CommonMetricDataInternal; + + /// Create a new metric from this with a new name. + fn with_name(&self, _name: String) -> Self + where + Self: Sized, + { + unimplemented!() + } + + /// Create a new metric from this with a specific label. + fn with_dynamic_label(&self, _label: String) -> Self + where + Self: Sized, + { + unimplemented!() + } + + /// Whether this metric should currently be recorded + /// + /// This depends on the metrics own state, as determined by its metadata, + /// and whether upload is enabled on the Glean object. + fn should_record(&self, glean: &Glean) -> bool { + if !glean.is_upload_enabled() { + return false; + } + + // Technically nothing prevents multiple calls to should_record() to run in parallel, + // meaning both are reading self.meta().disabled and later writing it. In between it can + // also read remote_settings_metrics_config, which also could be modified in between those 2 reads. + // This means we could write the wrong remote_settings_epoch | current_disabled value. All in all + // at worst we would see that metric enabled/disabled wrongly once. + // But since everything is tunneled through the dispatcher, this should never ever happen. + + // Get the current disabled field from the metric metadata, including + // the encoded remote_settings epoch + let disabled_field = self.meta().disabled.load(Ordering::Relaxed); + // Grab the epoch from the upper nibble + let epoch = disabled_field >> 4; + // Get the disabled flag from the lower nibble + let disabled = disabled_field & 0xF; + // Get the current remote_settings epoch to see if we need to bother with the + // more expensive HashMap lookup + let remote_settings_epoch = glean.remote_settings_epoch.load(Ordering::Acquire); + if epoch == remote_settings_epoch { + return disabled == 0; + } + // The epoch's didn't match so we need to look up the disabled flag + // by the base_identifier from the in-memory HashMap + let metrics_enabled = &glean + .remote_settings_metrics_config + .lock() + .unwrap() + .metrics_enabled; + // Get the value from the remote configuration if it is there, otherwise return the default value. + let current_disabled = { + let base_id = self.meta().base_identifier(); + let identifier = base_id + .split_once('/') + .map(|split| split.0) + .unwrap_or(&base_id); + // NOTE: The `!` preceding the `*is_enabled` is important for inverting the logic since the + // underlying property in the metrics.yaml is `disabled` and the outward API is treating it as + // if it were `enabled` to make it easier to understand. + if let Some(is_enabled) = metrics_enabled.get(identifier) { + u8::from(!*is_enabled) + } else { + u8::from(self.meta().inner.disabled) + } + }; + + // Re-encode the epoch and enabled status and update the metadata + let new_disabled = (remote_settings_epoch << 4) | (current_disabled & 0xF); + self.meta().disabled.store(new_disabled, Ordering::Relaxed); + + // Return a boolean indicating whether or not the metric should be recorded + current_disabled == 0 + } +} + +impl Metric { + /// Gets the ping section the metric fits into. + /// + /// This determines the section of the ping to place the metric data in when + /// assembling the ping payload. + pub fn ping_section(&self) -> &'static str { + match self { + Metric::Boolean(_) => "boolean", + Metric::Counter(_) => "counter", + // Custom distributions are in the same section, no matter what bucketing. + Metric::CustomDistributionExponential(_) => "custom_distribution", + Metric::CustomDistributionLinear(_) => "custom_distribution", + Metric::Datetime(_, _) => "datetime", + Metric::Experiment(_) => panic!("Experiments should not be serialized through this"), + Metric::Quantity(_) => "quantity", + Metric::Rate(..) => "rate", + Metric::String(_) => "string", + Metric::StringList(_) => "string_list", + Metric::Timespan(..) => "timespan", + Metric::TimingDistribution(_) => "timing_distribution", + Metric::Url(_) => "url", + Metric::Uuid(_) => "uuid", + Metric::MemoryDistribution(_) => "memory_distribution", + Metric::Jwe(_) => "jwe", + Metric::Text(_) => "text", + } + } + + /// The JSON representation of the metric's data + pub fn as_json(&self) -> JsonValue { + match self { + Metric::Boolean(b) => json!(b), + Metric::Counter(c) => json!(c), + Metric::CustomDistributionExponential(hist) => { + json!(custom_distribution::snapshot(hist)) + } + Metric::CustomDistributionLinear(hist) => json!(custom_distribution::snapshot(hist)), + Metric::Datetime(d, time_unit) => json!(get_iso_time_string(*d, *time_unit)), + Metric::Experiment(e) => e.as_json(), + Metric::Quantity(q) => json!(q), + Metric::Rate(num, den) => { + json!({"numerator": num, "denominator": den}) + } + Metric::String(s) => json!(s), + Metric::StringList(v) => json!(v), + Metric::Timespan(time, time_unit) => { + json!({"value": time_unit.duration_convert(*time), "time_unit": time_unit}) + } + Metric::TimingDistribution(hist) => json!(timing_distribution::snapshot(hist)), + Metric::Url(s) => json!(s), + Metric::Uuid(s) => json!(s), + Metric::MemoryDistribution(hist) => json!(memory_distribution::snapshot(hist)), + Metric::Jwe(s) => json!(s), + Metric::Text(s) => json!(s), + } + } +} diff --git a/third_party/rust/glean-core/src/metrics/numerator.rs b/third_party/rust/glean-core/src/metrics/numerator.rs new file mode 100644 index 0000000000..3c340cab1d --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/numerator.rs @@ -0,0 +1,94 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::sync::Arc; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::ErrorType; +use crate::metrics::MetricType; +use crate::metrics::Rate; +use crate::metrics::RateMetric; +use crate::CommonMetricData; +use crate::Glean; + +/// Developer-facing API for recording rate metrics with external denominators. +/// +/// Instances of this class type are automatically generated by the parsers +/// at build time, allowing developers to record values that were previously +/// registered in the metrics.yaml file. +#[derive(Clone)] +pub struct NumeratorMetric(pub(crate) Arc<RateMetric>); + +impl MetricType for NumeratorMetric { + fn meta(&self) -> &CommonMetricDataInternal { + self.0.meta() + } +} + +impl NumeratorMetric { + /// The public constructor used by automatically generated metrics. + pub fn new(meta: CommonMetricData) -> Self { + Self(Arc::new(RateMetric::new(meta))) + } + + /// Increases the numerator by `amount`. + /// + /// # Arguments + /// + /// * `amount` - The amount to increase by. Should be non-negative. + /// + /// ## Notes + /// + /// Logs an error if the `amount` is negative. + pub fn add_to_numerator(&self, amount: i32) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.add_to_numerator_sync(glean, amount)); + } + + #[doc(hidden)] + pub fn add_to_numerator_sync(&self, glean: &Glean, amount: i32) { + self.0.add_to_numerator_sync(glean, amount) + } + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as a pair of integers. + /// + /// # Arguments + /// + /// * `ping_name` - the optional name of the ping to retrieve the metric + /// for. Defaults to the first value in `send_in_pings`. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<Rate> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| self.get_value(glean, ping_name.as_deref())) + } + + #[doc(hidden)] + pub fn get_value<'a, S: Into<Option<&'a str>>>( + &self, + glean: &Glean, + ping_name: S, + ) -> Option<Rate> { + self.0.get_value(glean, ping_name) + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - the optional name of the ping to retrieve the metric + /// for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + self.0.test_get_num_recorded_errors(error) + } +} diff --git a/third_party/rust/glean-core/src/metrics/ping.rs b/third_party/rust/glean-core/src/metrics/ping.rs new file mode 100644 index 0000000000..238a2af628 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/ping.rs @@ -0,0 +1,201 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::fmt; +use std::sync::Arc; + +use crate::ping::PingMaker; +use crate::Glean; + +use uuid::Uuid; + +/// Stores information about a ping. +/// +/// This is required so that given metric data queued on disk we can send +/// pings with the correct settings, e.g. whether it has a client_id. +#[derive(Clone)] +pub struct PingType(Arc<InnerPing>); + +struct InnerPing { + /// The name of the ping. + pub name: String, + /// Whether the ping should include the client ID. + pub include_client_id: bool, + /// Whether the ping should be sent if it is empty + pub send_if_empty: bool, + /// The "reason" codes that this ping can send + pub reason_codes: Vec<String>, +} + +impl fmt::Debug for PingType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("PingType") + .field("name", &self.0.name) + .field("include_client_id", &self.0.include_client_id) + .field("send_if_empty", &self.0.send_if_empty) + .field("reason_codes", &self.0.reason_codes) + .finish() + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl PingType { + /// Creates a new ping type for the given name, whether to include the client ID and whether to + /// send this ping empty. + /// + /// # Arguments + /// + /// * `name` - The name of the ping. + /// * `include_client_id` - Whether to include the client ID in the assembled ping when submitting. + /// * `send_if_empty` - Whether the ping should be sent empty or not. + /// * `reason_codes` - The valid reason codes for this ping. + pub fn new<A: Into<String>>( + name: A, + include_client_id: bool, + send_if_empty: bool, + reason_codes: Vec<String>, + ) -> Self { + let this = Self(Arc::new(InnerPing { + name: name.into(), + include_client_id, + send_if_empty, + reason_codes, + })); + + // Register this ping. + // That will happen asynchronously and not block operation. + crate::register_ping_type(&this); + + this + } + + pub(crate) fn name(&self) -> &str { + &self.0.name + } + + pub(crate) fn include_client_id(&self) -> bool { + self.0.include_client_id + } + + pub(crate) fn send_if_empty(&self) -> bool { + self.0.send_if_empty + } + + /// Submits the ping for eventual uploading. + /// + /// The ping content is assembled as soon as possible, but upload is not + /// guaranteed to happen immediately, as that depends on the upload policies. + /// + /// If the ping currently contains no content, it will not be sent, + /// unless it is configured to be sent if empty. + /// + /// # Arguments + /// + /// * `reason` - the reason the ping was triggered. Included in the + /// `ping_info.reason` part of the payload. + pub fn submit(&self, reason: Option<String>) { + let ping = PingType(Arc::clone(&self.0)); + + // Need to separate access to the Glean object from access to global state. + // `trigger_upload` itself might lock the Glean object and we need to avoid that deadlock. + crate::dispatcher::launch(|| { + let sent = + crate::core::with_glean(move |glean| ping.submit_sync(glean, reason.as_deref())); + if sent { + let state = crate::global_state().lock().unwrap(); + if let Err(e) = state.callbacks.trigger_upload() { + log::error!("Triggering upload failed. Error: {}", e); + } + } + }) + } + + /// Collects and submits a ping for eventual uploading. + /// + /// # Returns + /// + /// Whether the ping was succesfully assembled and queued. + #[doc(hidden)] + pub fn submit_sync(&self, glean: &Glean, reason: Option<&str>) -> bool { + if !glean.is_upload_enabled() { + log::info!("Glean disabled: not submitting any pings."); + return false; + } + + let ping = &self.0; + + // Allowing `clippy::manual_filter`. + // This causes a false positive. + // We have a side-effect in the `else` branch, + // so shouldn't delete it. + #[allow(unknown_lints)] + #[allow(clippy::manual_filter)] + let corrected_reason = match reason { + Some(reason) => { + if ping.reason_codes.contains(&reason.to_string()) { + Some(reason) + } else { + log::error!("Invalid reason code {} for ping {}", reason, ping.name); + None + } + } + None => None, + }; + + let ping_maker = PingMaker::new(); + let doc_id = Uuid::new_v4().to_string(); + let url_path = glean.make_path(&ping.name, &doc_id); + match ping_maker.collect(glean, self, corrected_reason, &doc_id, &url_path) { + None => { + log::info!( + "No content for ping '{}', therefore no ping queued.", + ping.name + ); + false + } + Some(ping) => { + // This metric is recorded *after* the ping is collected (since + // that is the only way to know *if* it will be submitted). The + // implication of this is that the count for a metrics ping will + // be included in the *next* metrics ping. + glean + .additional_metrics + .pings_submitted + .get(ping.name) + .add_sync(glean, 1); + + if let Err(e) = ping_maker.store_ping(glean.get_data_path(), &ping) { + log::warn!("IO error while writing ping to file: {}. Enqueuing upload of what we have in memory.", e); + glean.additional_metrics.io_errors.add_sync(glean, 1); + // `serde_json::to_string` only fails if serialization of the content + // fails or it contains maps with non-string keys. + // However `ping.content` is already a `JsonValue`, + // so both scenarios should be impossible. + let content = + ::serde_json::to_string(&ping.content).expect("ping serialization failed"); + glean.upload_manager.enqueue_ping( + glean, + ping.doc_id, + ping.url_path, + &content, + Some(ping.headers), + ); + return true; + } + + glean.upload_manager.enqueue_ping_from_file(glean, &doc_id); + + log::info!( + "The ping '{}' was submitted and will be sent as soon as possible", + ping.name + ); + + true + } + } + } +} diff --git a/third_party/rust/glean-core/src/metrics/quantity.rs b/third_party/rust/glean-core/src/metrics/quantity.rs new file mode 100644 index 0000000000..c59d3a4a21 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/quantity.rs @@ -0,0 +1,126 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorType}; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +/// A quantity metric. +/// +/// Used to store explicit non-negative integers. +#[derive(Clone, Debug)] +pub struct QuantityMetric { + meta: CommonMetricDataInternal, +} + +impl MetricType for QuantityMetric { + fn meta(&self) -> &CommonMetricDataInternal { + &self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl QuantityMetric { + /// Creates a new quantity metric. + pub fn new(meta: CommonMetricData) -> Self { + Self { meta: meta.into() } + } + + /// Sets the value. Must be non-negative. + /// + /// # Arguments + /// + /// * `value` - The value. Must be non-negative. + /// + /// ## Notes + /// + /// Logs an error if the `value` is negative. + pub fn set(&self, value: i64) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.set_sync(glean, value)) + } + + /// Sets the value synchronously. Must be non-negative. + #[doc(hidden)] + pub fn set_sync(&self, glean: &Glean, value: i64) { + if !self.should_record(glean) { + return; + } + + if value < 0 { + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + format!("Set negative value {}", value), + None, + ); + return; + } + + glean + .storage() + .record(glean, &self.meta, &Metric::Quantity(value)) + } + + /// Get current value. + #[doc(hidden)] + pub fn get_value<'a, S: Into<Option<&'a str>>>( + &self, + glean: &Glean, + ping_name: S, + ) -> Option<i64> { + let queried_ping_name = ping_name + .into() + .unwrap_or_else(|| &self.meta().inner.send_in_pings[0]); + + match StorageManager.snapshot_metric_for_test( + glean.storage(), + queried_ping_name, + &self.meta.identifier(glean), + self.meta.inner.lifetime, + ) { + Some(Metric::Quantity(i)) => Some(i), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<i64> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| self.get_value(glean, ping_name.as_deref())) + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0) + }) + } +} diff --git a/third_party/rust/glean-core/src/metrics/rate.rs b/third_party/rust/glean-core/src/metrics/rate.rs new file mode 100644 index 0000000000..ba7f085b55 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/rate.rs @@ -0,0 +1,191 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorType}; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +/// A rate value as given by its numerator and denominator. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct Rate { + /// A rate's numerator + pub numerator: i32, + /// A rate's denominator + pub denominator: i32, +} + +impl From<(i32, i32)> for Rate { + fn from((num, den): (i32, i32)) -> Self { + Self { + numerator: num, + denominator: den, + } + } +} + +/// A rate metric. +/// +/// Used to determine the proportion of things via two counts: +/// * A numerator defining the amount of times something happened, +/// * A denominator counting the amount of times someting could have happened. +/// +/// Both numerator and denominator can only be incremented, not decremented. +#[derive(Clone, Debug)] +pub struct RateMetric { + meta: CommonMetricDataInternal, +} + +impl MetricType for RateMetric { + fn meta(&self) -> &CommonMetricDataInternal { + &self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl RateMetric { + /// Creates a new rate metric. + pub fn new(meta: CommonMetricData) -> Self { + Self { meta: meta.into() } + } + + /// Increases the numerator by `amount`. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + /// * `amount` - The amount to increase by. Should be non-negative. + /// + /// ## Notes + /// + /// Logs an error if the `amount` is negative. + pub fn add_to_numerator(&self, amount: i32) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.add_to_numerator_sync(glean, amount)) + } + + #[doc(hidden)] + pub fn add_to_numerator_sync(&self, glean: &Glean, amount: i32) { + if !self.should_record(glean) { + return; + } + + if amount < 0 { + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + format!("Added negative value {} to numerator", amount), + None, + ); + return; + } + + glean + .storage() + .record_with(glean, &self.meta, |old_value| match old_value { + Some(Metric::Rate(num, den)) => Metric::Rate(num.saturating_add(amount), den), + _ => Metric::Rate(amount, 0), // Denominator will show up eventually. Probably. + }); + } + + /// Increases the denominator by `amount`. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + /// * `amount` - The amount to increase by. Should be non-negative. + /// + /// ## Notes + /// + /// Logs an error if the `amount` is negative. + pub fn add_to_denominator(&self, amount: i32) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.add_to_denominator_sync(glean, amount)) + } + + #[doc(hidden)] + pub fn add_to_denominator_sync(&self, glean: &Glean, amount: i32) { + if !self.should_record(glean) { + return; + } + + if amount < 0 { + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + format!("Added negative value {} to denominator", amount), + None, + ); + return; + } + + glean + .storage() + .record_with(glean, &self.meta, |old_value| match old_value { + Some(Metric::Rate(num, den)) => Metric::Rate(num, den.saturating_add(amount)), + _ => Metric::Rate(0, amount), + }); + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as a pair of integers. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<Rate> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| self.get_value(glean, ping_name.as_deref())) + } + + /// Get current value + #[doc(hidden)] + pub fn get_value<'a, S: Into<Option<&'a str>>>( + &self, + glean: &Glean, + ping_name: S, + ) -> Option<Rate> { + let queried_ping_name = ping_name + .into() + .unwrap_or_else(|| &self.meta().inner.send_in_pings[0]); + + match StorageManager.snapshot_metric_for_test( + glean.storage(), + queried_ping_name, + &self.meta.identifier(glean), + self.meta.inner.lifetime, + ) { + Some(Metric::Rate(n, d)) => Some((n, d).into()), + _ => None, + } + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0) + }) + } +} diff --git a/third_party/rust/glean-core/src/metrics/recorded_experiment.rs b/third_party/rust/glean-core/src/metrics/recorded_experiment.rs new file mode 100644 index 0000000000..8b9dc35d98 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/recorded_experiment.rs @@ -0,0 +1,35 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; +use serde_json::{json, Map as JsonMap, Value as JsonValue}; + +/// Deserialized experiment data. +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] +pub struct RecordedExperiment { + /// The experiment's branch as set through [`set_experiment_active`](crate::glean_set_experiment_active). + pub branch: String, + /// Any extra data associated with this experiment through [`set_experiment_active`](crate::glean_set_experiment_active). + /// Note: `Option` required to keep backwards-compatibility. + pub extra: Option<HashMap<String, String>>, +} + +impl RecordedExperiment { + /// Gets the recorded experiment data as a JSON value. + /// + /// For JSON, we don't want to include `{"extra": null}` -- we just want to skip + /// `extra` entirely. Unfortunately, we can't use a serde field annotation for this, + /// since that would break bincode serialization, which doesn't support skipping + /// fields. Therefore, we use a custom serialization function just for JSON here. + pub fn as_json(&self) -> JsonValue { + let mut value = JsonMap::new(); + value.insert("branch".to_string(), json!(self.branch)); + if self.extra.is_some() { + value.insert("extra".to_string(), json!(self.extra)); + } + JsonValue::Object(value) + } +} diff --git a/third_party/rust/glean-core/src/metrics/string.rs b/third_party/rust/glean-core/src/metrics/string.rs new file mode 100644 index 0000000000..5ed7b2c7f1 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/string.rs @@ -0,0 +1,176 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::sync::Arc; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{test_get_num_recorded_errors, ErrorType}; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::util::truncate_string_at_boundary_with_error; +use crate::CommonMetricData; +use crate::Glean; + +const MAX_LENGTH_VALUE: usize = 100; + +/// A string metric. +/// +/// Record an Unicode string value with arbitrary content. +/// Strings are length-limited to `MAX_LENGTH_VALUE` bytes. +#[derive(Clone, Debug)] +pub struct StringMetric { + meta: Arc<CommonMetricDataInternal>, +} + +impl MetricType for StringMetric { + fn meta(&self) -> &CommonMetricDataInternal { + &self.meta + } + + fn with_name(&self, name: String) -> Self { + let mut meta = (*self.meta).clone(); + meta.inner.name = name; + Self { + meta: Arc::new(meta), + } + } + + fn with_dynamic_label(&self, label: String) -> Self { + let mut meta = (*self.meta).clone(); + meta.inner.dynamic_label = Some(label); + Self { + meta: Arc::new(meta), + } + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl StringMetric { + /// Creates a new string metric. + pub fn new(meta: CommonMetricData) -> Self { + Self { + meta: Arc::new(meta.into()), + } + } + + /// Sets to the specified value. + /// + /// # Arguments + /// + /// * `value` - The string to set the metric to. + /// + /// ## Notes + /// + /// Truncates the value if it is longer than `MAX_LENGTH_VALUE` bytes and logs an error. + pub fn set(&self, value: String) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.set_sync(glean, &value)) + } + + /// Sets to the specified value synchronously. + #[doc(hidden)] + pub fn set_sync<S: Into<String>>(&self, glean: &Glean, value: S) { + if !self.should_record(glean) { + return; + } + + let s = truncate_string_at_boundary_with_error(glean, &self.meta, value, MAX_LENGTH_VALUE); + + let value = Metric::String(s); + glean.storage().record(glean, &self.meta, &value) + } + + /// Gets the current-stored value as a string, or None if there is no value. + #[doc(hidden)] + pub fn get_value<'a, S: Into<Option<&'a str>>>( + &self, + glean: &Glean, + ping_name: S, + ) -> Option<String> { + let queried_ping_name = ping_name + .into() + .unwrap_or_else(|| &self.meta().inner.send_in_pings[0]); + + match StorageManager.snapshot_metric_for_test( + glean.storage(), + queried_ping_name, + &self.meta.identifier(glean), + self.meta.inner.lifetime, + ) { + Some(Metric::String(s)) => Some(s), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as a string. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<String> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| self.get_value(glean, ping_name.as_deref())) + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0) + }) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test_get_num_recorded_errors; + use crate::tests::new_glean; + use crate::util::truncate_string_at_boundary; + use crate::ErrorType; + use crate::Lifetime; + + #[test] + fn setting_a_long_string_records_an_error() { + let (glean, _t) = new_glean(None); + + let metric = StringMetric::new(CommonMetricData { + name: "string_metric".into(), + category: "test".into(), + send_in_pings: vec!["store1".into()], + lifetime: Lifetime::Application, + disabled: false, + dynamic_label: None, + }); + + let sample_string = "0123456789".repeat(11); + metric.set_sync(&glean, sample_string.clone()); + + let truncated = truncate_string_at_boundary(sample_string, MAX_LENGTH_VALUE); + assert_eq!(truncated, metric.get_value(&glean, "store1").unwrap()); + + assert_eq!( + 1, + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidOverflow) + .unwrap() + ); + } +} diff --git a/third_party/rust/glean-core/src/metrics/string_list.rs b/third_party/rust/glean-core/src/metrics/string_list.rs new file mode 100644 index 0000000000..ab8657d3a5 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/string_list.rs @@ -0,0 +1,199 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::sync::Arc; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorType}; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::util::truncate_string_at_boundary_with_error; +use crate::CommonMetricData; +use crate::Glean; + +// Maximum length of any list +const MAX_LIST_LENGTH: usize = 20; +// Maximum length of any string in the list +const MAX_STRING_LENGTH: usize = 50; + +/// A string list metric. +/// +/// This allows appending a string value with arbitrary content to a list. +#[derive(Clone, Debug)] +pub struct StringListMetric { + meta: Arc<CommonMetricDataInternal>, +} + +impl MetricType for StringListMetric { + fn meta(&self) -> &CommonMetricDataInternal { + &self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl StringListMetric { + /// Creates a new string list metric. + pub fn new(meta: CommonMetricData) -> Self { + Self { + meta: Arc::new(meta.into()), + } + } + + /// Adds a new string to the list. + /// + /// # Arguments + /// + /// * `value` - The string to add. + /// + /// ## Notes + /// + /// Truncates the value if it is longer than `MAX_STRING_LENGTH` bytes and logs an error. + pub fn add(&self, value: String) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.add_sync(glean, value)) + } + + /// Adds a new string to the list synchronously + #[doc(hidden)] + pub fn add_sync<S: Into<String>>(&self, glean: &Glean, value: S) { + if !self.should_record(glean) { + return; + } + + let value = + truncate_string_at_boundary_with_error(glean, &self.meta, value, MAX_STRING_LENGTH); + let mut error = None; + glean + .storage() + .record_with(glean, &self.meta, |old_value| match old_value { + Some(Metric::StringList(mut old_value)) => { + if old_value.len() == MAX_LIST_LENGTH { + let msg = format!( + "String list length of {} exceeds maximum of {}", + old_value.len() + 1, + MAX_LIST_LENGTH + ); + error = Some(msg); + } else { + old_value.push(value.clone()); + } + Metric::StringList(old_value) + } + _ => Metric::StringList(vec![value.clone()]), + }); + + if let Some(msg) = error { + record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None); + } + } + + /// Sets to a specific list of strings. + /// + /// # Arguments + /// + /// * `value` - The list of string to set the metric to. + /// + /// ## Notes + /// + /// If passed an empty list, records an error and returns. + /// + /// Truncates the list if it is longer than `MAX_LIST_LENGTH` and logs an error. + /// + /// Truncates any value in the list if it is longer than `MAX_STRING_LENGTH` and logs an error. + pub fn set(&self, values: Vec<String>) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.set_sync(glean, values)) + } + + /// Sets to a specific list of strings synchronously. + #[doc(hidden)] + pub fn set_sync(&self, glean: &Glean, value: Vec<String>) { + if !self.should_record(glean) { + return; + } + + let value = if value.len() > MAX_LIST_LENGTH { + let msg = format!( + "StringList length {} exceeds maximum of {}", + value.len(), + MAX_LIST_LENGTH + ); + record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None); + value[0..MAX_LIST_LENGTH].to_vec() + } else { + value + }; + + let value = value + .into_iter() + .map(|elem| { + truncate_string_at_boundary_with_error(glean, &self.meta, elem, MAX_STRING_LENGTH) + }) + .collect(); + + let value = Metric::StringList(value); + glean.storage().record(glean, &self.meta, &value); + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently-stored values. + /// + /// This doesn't clear the stored value. + #[doc(hidden)] + pub fn get_value<'a, S: Into<Option<&'a str>>>( + &self, + glean: &Glean, + ping_name: S, + ) -> Option<Vec<String>> { + let queried_ping_name = ping_name + .into() + .unwrap_or_else(|| &self.meta().inner.send_in_pings[0]); + + match StorageManager.snapshot_metric_for_test( + glean.storage(), + queried_ping_name, + &self.meta.identifier(glean), + self.meta.inner.lifetime, + ) { + Some(Metric::StringList(values)) => Some(values), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently-stored values. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<Vec<String>> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| self.get_value(glean, ping_name.as_deref())) + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0) + }) + } +} diff --git a/third_party/rust/glean-core/src/metrics/text.rs b/third_party/rust/glean-core/src/metrics/text.rs new file mode 100644 index 0000000000..06ad5c0d78 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/text.rs @@ -0,0 +1,180 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::sync::Arc; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{test_get_num_recorded_errors, ErrorType}; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::util::truncate_string_at_boundary_with_error; +use crate::CommonMetricData; +use crate::Glean; + +// The maximum number of characters for text. +const MAX_LENGTH_VALUE: usize = 200 * 1024; + +/// A text metric. +/// +/// Records a single long Unicode text, +/// used when the limits on `String` are too low. +/// Text is length-limited to `MAX_LENGTH_VALUE` bytes. +#[derive(Clone, Debug)] +pub struct TextMetric { + meta: Arc<CommonMetricDataInternal>, +} + +impl MetricType for TextMetric { + fn meta(&self) -> &CommonMetricDataInternal { + &self.meta + } + + fn with_name(&self, name: String) -> Self { + let mut meta = (*self.meta).clone(); + meta.inner.name = name; + Self { + meta: Arc::new(meta), + } + } + + fn with_dynamic_label(&self, label: String) -> Self { + let mut meta = (*self.meta).clone(); + meta.inner.dynamic_label = Some(label); + Self { + meta: Arc::new(meta), + } + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl TextMetric { + /// Creates a new text metric. + pub fn new(meta: CommonMetricData) -> Self { + Self { + meta: Arc::new(meta.into()), + } + } + + /// Sets to the specified value. + /// + /// # Arguments + /// + /// * `value` - The text to set the metric to. + /// + /// ## Notes + /// + /// Truncates the value (at codepoint boundaries) if it is longer than `MAX_LENGTH_VALUE` bytes + /// and logs an error. + pub fn set(&self, value: String) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.set_sync(glean, &value)) + } + + /// Sets to the specified value synchronously, + /// truncating and recording an error if longer than `MAX_LENGTH_VALUE`. + #[doc(hidden)] + pub fn set_sync<S: Into<String>>(&self, glean: &Glean, value: S) { + if !self.should_record(glean) { + return; + } + + let s = truncate_string_at_boundary_with_error(glean, &self.meta, value, MAX_LENGTH_VALUE); + + let value = Metric::Text(s); + glean.storage().record(glean, &self.meta, &value) + } + + /// Gets the currently-stored value as a string, or None if there is no value. + #[doc(hidden)] + pub fn get_value<'a, S: Into<Option<&'a str>>>( + &self, + glean: &Glean, + ping_name: S, + ) -> Option<String> { + let queried_ping_name = ping_name + .into() + .unwrap_or_else(|| &self.meta().inner.send_in_pings[0]); + + match StorageManager.snapshot_metric_for_test( + glean.storage(), + queried_ping_name, + &self.meta.identifier(glean), + self.meta.inner.lifetime, + ) { + Some(Metric::Text(s)) => Some(s), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as a string. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<String> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| self.get_value(glean, ping_name.as_deref())) + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0) + }) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test_get_num_recorded_errors; + use crate::tests::new_glean; + use crate::util::truncate_string_at_boundary; + use crate::ErrorType; + use crate::Lifetime; + + #[test] + fn setting_a_long_string_records_an_error() { + let (glean, _t) = new_glean(None); + + let metric = TextMetric::new(CommonMetricData { + name: "text_metric".into(), + category: "test".into(), + send_in_pings: vec!["store1".into()], + lifetime: Lifetime::Application, + disabled: false, + dynamic_label: None, + }); + + let sample_string = "0123456789".repeat(200 * 1024); + metric.set_sync(&glean, sample_string.clone()); + + let truncated = truncate_string_at_boundary(sample_string, MAX_LENGTH_VALUE); + assert_eq!(truncated, metric.get_value(&glean, "store1").unwrap()); + + assert_eq!( + 1, + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidOverflow) + .unwrap() + ); + } +} diff --git a/third_party/rust/glean-core/src/metrics/time_unit.rs b/third_party/rust/glean-core/src/metrics/time_unit.rs new file mode 100644 index 0000000000..6d61a8a242 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/time_unit.rs @@ -0,0 +1,117 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::convert::TryFrom; +use std::time::Duration; + +use serde::{Deserialize, Serialize}; + +use crate::error::{Error, ErrorKind}; + +/// Different resolutions supported by the time related +/// metric types (e.g. DatetimeMetric). +#[derive(Copy, Clone, Debug, Deserialize, Serialize, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +#[repr(i32)] // use i32 to be compatible with our JNA definition +pub enum TimeUnit { + /// Truncate to nanosecond precision. + Nanosecond, + /// Truncate to microsecond precision. + Microsecond, + /// Truncate to millisecond precision. + Millisecond, + /// Truncate to second precision. + Second, + /// Truncate to minute precision. + Minute, + /// Truncate to hour precision. + Hour, + /// Truncate to day precision. + Day, +} + +impl TimeUnit { + /// Formats the given time unit, truncating the time if needed. + pub fn format_pattern(self) -> &'static str { + use TimeUnit::*; + match self { + Nanosecond => "%Y-%m-%dT%H:%M:%S%.f%:z", + Microsecond => "%Y-%m-%dT%H:%M:%S%.6f%:z", + Millisecond => "%Y-%m-%dT%H:%M:%S%.3f%:z", + Second => "%Y-%m-%dT%H:%M:%S%:z", + Minute => "%Y-%m-%dT%H:%M%:z", + Hour => "%Y-%m-%dT%H%:z", + Day => "%Y-%m-%d%:z", + } + } + + /// Converts a duration to the requested time unit. + /// + /// # Arguments + /// + /// * `duration` - the duration to convert. + /// + /// # Returns + /// + /// The integer representation of the converted duration. + pub fn duration_convert(self, duration: Duration) -> u64 { + use TimeUnit::*; + match self { + Nanosecond => duration.as_nanos() as u64, + Microsecond => duration.as_micros() as u64, + Millisecond => duration.as_millis() as u64, + Second => duration.as_secs(), + Minute => duration.as_secs() / 60, + Hour => duration.as_secs() / 60 / 60, + Day => duration.as_secs() / 60 / 60 / 24, + } + } + + /// Converts a duration in the given unit to nanoseconds. + /// + /// # Arguments + /// + /// * `duration` - the duration to convert. + /// + /// # Returns + /// + /// The integer representation of the nanosecond duration. + pub fn as_nanos(self, duration: u64) -> u64 { + use TimeUnit::*; + let duration = match self { + Nanosecond => Duration::from_nanos(duration), + Microsecond => Duration::from_micros(duration), + Millisecond => Duration::from_millis(duration), + Second => Duration::from_secs(duration), + Minute => Duration::from_secs(duration * 60), + Hour => Duration::from_secs(duration * 60 * 60), + Day => Duration::from_secs(duration * 60 * 60 * 24), + }; + + duration.as_nanos() as u64 + } +} + +/// Trait implementation for converting an integer value to a TimeUnit. +/// +/// This is used in the FFI code. +/// +/// Please note that values should match the ordering of the +/// platform specific side of things (e.g. Kotlin implementation). +impl TryFrom<i32> for TimeUnit { + type Error = Error; + + fn try_from(value: i32) -> Result<TimeUnit, Self::Error> { + match value { + 0 => Ok(TimeUnit::Nanosecond), + 1 => Ok(TimeUnit::Microsecond), + 2 => Ok(TimeUnit::Millisecond), + 3 => Ok(TimeUnit::Second), + 4 => Ok(TimeUnit::Minute), + 5 => Ok(TimeUnit::Hour), + 6 => Ok(TimeUnit::Day), + e => Err(ErrorKind::TimeUnit(e).into()), + } + } +} diff --git a/third_party/rust/glean-core/src/metrics/timespan.rs b/third_party/rust/glean-core/src/metrics/timespan.rs new file mode 100644 index 0000000000..b4d3bd5902 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/timespan.rs @@ -0,0 +1,308 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::convert::TryInto; +use std::sync::{Arc, RwLock}; +use std::time::Duration; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorType}; +use crate::metrics::time_unit::TimeUnit; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +/// A timespan metric. +/// +/// Timespans are used to make a measurement of how much time is spent in a particular task. +/// +// Implementation note: +// Because we dispatch this, we handle this with interior mutability. +// The whole struct is clonable, but that's comparable cheap, as it does not clone the data. +// Cloning `CommonMetricData` is not free, as it contains strings, so we also wrap that in an Arc. +#[derive(Clone, Debug)] +pub struct TimespanMetric { + meta: Arc<CommonMetricDataInternal>, + time_unit: TimeUnit, + start_time: Arc<RwLock<Option<u64>>>, +} + +impl MetricType for TimespanMetric { + fn meta(&self) -> &CommonMetricDataInternal { + &self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl TimespanMetric { + /// Creates a new timespan metric. + pub fn new(meta: CommonMetricData, time_unit: TimeUnit) -> Self { + Self { + meta: Arc::new(meta.into()), + time_unit, + start_time: Arc::new(RwLock::new(None)), + } + } + + /// Starts tracking time for the provided metric. + /// + /// This records an error if it's already tracking time (i.e. start was + /// already called with no corresponding + /// [`set_stop`](TimespanMetric::set_stop)): in that case the original start + /// time will be preserved. + pub fn start(&self) { + let start_time = time::precise_time_ns(); + + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.set_start(glean, start_time)); + } + + /// Set start time synchronously. + #[doc(hidden)] + pub fn set_start(&self, glean: &Glean, start_time: u64) { + if !self.should_record(glean) { + return; + } + + let mut lock = self + .start_time + .write() + .expect("Lock poisoned for timespan metric on start."); + + if lock.is_some() { + record_error( + glean, + &self.meta, + ErrorType::InvalidState, + "Timespan already started", + None, + ); + return; + } + + *lock = Some(start_time); + } + + /// Stops tracking time for the provided metric. Sets the metric to the elapsed time. + /// + /// This will record an error if no [`set_start`](TimespanMetric::set_start) was called. + pub fn stop(&self) { + let stop_time = time::precise_time_ns(); + + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.set_stop(glean, stop_time)); + } + + /// Set stop time synchronously. + #[doc(hidden)] + pub fn set_stop(&self, glean: &Glean, stop_time: u64) { + // Need to write in either case, so get the lock first. + let mut lock = self + .start_time + .write() + .expect("Lock poisoned for timespan metric on stop."); + + if !self.should_record(glean) { + // Reset timer when disabled, so that we don't record timespans across + // disabled/enabled toggling. + *lock = None; + return; + } + + if lock.is_none() { + record_error( + glean, + &self.meta, + ErrorType::InvalidState, + "Timespan not running", + None, + ); + return; + } + + let start_time = lock.take().unwrap(); + let duration = match stop_time.checked_sub(start_time) { + Some(duration) => duration, + None => { + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + "Timespan was negative", + None, + ); + return; + } + }; + let duration = Duration::from_nanos(duration); + self.set_raw_inner(glean, duration); + } + + /// Aborts a previous [`set_start`](TimespanMetric::set_start) call. No + /// error is recorded if no [`set_start`](TimespanMetric::set_start) was + /// called. + pub fn cancel(&self) { + let metric = self.clone(); + crate::dispatcher::launch(move || { + let mut lock = metric + .start_time + .write() + .expect("Lock poisoned for timespan metric on cancel."); + *lock = None; + }); + } + + /// Explicitly sets the timespan value. + /// + /// This API should only be used if your library or application requires + /// recording times in a way that can not make use of + /// [`set_start`](TimespanMetric::set_start)/[`set_stop`](TimespanMetric::set_stop)/[`cancel`](TimespanMetric::cancel). + /// + /// Care should be taken using this if the ping lifetime might contain more + /// than one timespan measurement. To be safe, + /// [`set_raw`](TimespanMetric::set_raw) should generally be followed by + /// sending a custom ping containing the timespan. + /// + /// # Arguments + /// + /// * `elapsed` - The elapsed time to record. + pub fn set_raw(&self, elapsed: Duration) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.set_raw_sync(glean, elapsed)); + } + + /// Explicitly sets the timespan value in nanoseconds. + /// + /// This API should only be used if your library or application requires + /// recording times in a way that can not make use of + /// [`set_start`](TimespanMetric::set_start)/[`set_stop`](TimespanMetric::set_stop)/[`cancel`](TimespanMetric::cancel). + /// + /// Care should be taken using this if the ping lifetime might contain more + /// than one timespan measurement. To be safe, + /// [`set_raw`](TimespanMetric::set_raw) should generally be followed by + /// sending a custom ping containing the timespan. + /// + /// # Arguments + /// + /// * `elapsed_nanos` - The elapsed time to record, in nanoseconds. + pub fn set_raw_nanos(&self, elapsed_nanos: i64) { + let elapsed = Duration::from_nanos(elapsed_nanos.try_into().unwrap_or(0)); + self.set_raw(elapsed) + } + + /// Explicitly sets the timespan value synchronously. + #[doc(hidden)] + pub fn set_raw_sync(&self, glean: &Glean, elapsed: Duration) { + if !self.should_record(glean) { + return; + } + + let lock = self + .start_time + .read() + .expect("Lock poisoned for timespan metric on set_raw."); + + if lock.is_some() { + record_error( + glean, + &self.meta, + ErrorType::InvalidState, + "Timespan already running. Raw value not recorded.", + None, + ); + return; + } + + self.set_raw_inner(glean, elapsed); + } + + fn set_raw_inner(&self, glean: &Glean, elapsed: Duration) { + let mut report_value_exists: bool = false; + glean.storage().record_with(glean, &self.meta, |old_value| { + match old_value { + Some(old @ Metric::Timespan(..)) => { + // If some value already exists, report an error. + // We do this out of the storage since recording an + // error accesses the storage as well. + report_value_exists = true; + old + } + _ => Metric::Timespan(elapsed, self.time_unit), + } + }); + + if report_value_exists { + record_error( + glean, + &self.meta, + ErrorType::InvalidState, + "Timespan value already recorded. New value discarded.", + None, + ); + }; + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<i64> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| { + self.get_value(glean, ping_name.as_deref()).map(|val| { + val.try_into() + .expect("Timespan can't be represented as i64") + }) + }) + } + + /// Get the current value + #[doc(hidden)] + pub fn get_value<'a, S: Into<Option<&'a str>>>( + &self, + glean: &Glean, + ping_name: S, + ) -> Option<u64> { + let queried_ping_name = ping_name + .into() + .unwrap_or_else(|| &self.meta().inner.send_in_pings[0]); + + match StorageManager.snapshot_metric_for_test( + glean.storage(), + queried_ping_name, + &self.meta.identifier(glean), + self.meta.inner.lifetime, + ) { + Some(Metric::Timespan(time, time_unit)) => Some(time_unit.duration_convert(time)), + _ => None, + } + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0) + }) + } +} diff --git a/third_party/rust/glean-core/src/metrics/timing_distribution.rs b/third_party/rust/glean-core/src/metrics/timing_distribution.rs new file mode 100644 index 0000000000..e339ef8882 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/timing_distribution.rs @@ -0,0 +1,557 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::HashMap; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorType}; +use crate::histogram::{Functional, Histogram}; +use crate::metrics::time_unit::TimeUnit; +use crate::metrics::{DistributionData, Metric, MetricType}; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +// The base of the logarithm used to determine bucketing +const LOG_BASE: f64 = 2.0; + +// The buckets per each order of magnitude of the logarithm. +const BUCKETS_PER_MAGNITUDE: f64 = 8.0; + +// Maximum time, which means we retain a maximum of 316 buckets. +// It is automatically adjusted based on the `time_unit` parameter +// so that: +// +// - `nanosecond` - 10 minutes +// - `microsecond` - ~6.94 days +// - `millisecond` - ~19 years +const MAX_SAMPLE_TIME: u64 = 1000 * 1000 * 1000 * 60 * 10; + +/// Identifier for a running timer. +/// +/// Its internals are considered private, +/// but due to UniFFI's behavior we expose its field for now. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +pub struct TimerId { + /// This timer's id. + pub id: u64, +} + +impl From<u64> for TimerId { + fn from(val: u64) -> TimerId { + TimerId { id: val } + } +} + +impl From<usize> for TimerId { + fn from(val: usize) -> TimerId { + TimerId { id: val as u64 } + } +} + +/// A timing distribution metric. +/// +/// Timing distributions are used to accumulate and store time measurement, for analyzing distributions of the timing data. +#[derive(Clone, Debug)] +pub struct TimingDistributionMetric { + meta: Arc<CommonMetricDataInternal>, + time_unit: TimeUnit, + next_id: Arc<AtomicUsize>, + start_times: Arc<Mutex<HashMap<TimerId, u64>>>, +} + +/// Create a snapshot of the histogram with a time unit. +/// +/// The snapshot can be serialized into the payload format. +pub(crate) fn snapshot(hist: &Histogram<Functional>) -> DistributionData { + DistributionData { + // **Caution**: This cannot use `Histogram::snapshot_values` and needs to use the more + // specialized snapshot function. + values: hist + .snapshot() + .into_iter() + .map(|(k, v)| (k as i64, v as i64)) + .collect(), + sum: hist.sum() as i64, + count: hist.count() as i64, + } +} + +impl MetricType for TimingDistributionMetric { + fn meta(&self) -> &CommonMetricDataInternal { + &self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl TimingDistributionMetric { + /// Creates a new timing distribution metric. + pub fn new(meta: CommonMetricData, time_unit: TimeUnit) -> Self { + Self { + meta: Arc::new(meta.into()), + time_unit, + next_id: Arc::new(AtomicUsize::new(0)), + start_times: Arc::new(Mutex::new(Default::default())), + } + } + + /// Starts tracking time for the provided metric. + /// + /// This records an error if it’s already tracking time (i.e. + /// [`set_start`](TimingDistributionMetric::set_start) was already called with no + /// corresponding [`set_stop_and_accumulate`](TimingDistributionMetric::set_stop_and_accumulate)): in + /// that case the original start time will be preserved. + /// + /// # Arguments + /// + /// * `start_time` - Timestamp in nanoseconds. + /// + /// # Returns + /// + /// A unique [`TimerId`] for the new timer. + pub fn start(&self) -> TimerId { + let start_time = time::precise_time_ns(); + let id = self.next_id.fetch_add(1, Ordering::SeqCst).into(); + let metric = self.clone(); + crate::launch_with_glean(move |_glean| metric.set_start(id, start_time)); + id + } + + pub(crate) fn start_sync(&self) -> TimerId { + let start_time = time::precise_time_ns(); + let id = self.next_id.fetch_add(1, Ordering::SeqCst).into(); + let metric = self.clone(); + metric.set_start(id, start_time); + id + } + + /// **Test-only API (exported for testing purposes).** + /// + /// Set start time for this metric synchronously. + /// + /// Use [`start`](Self::start) instead. + #[doc(hidden)] + pub fn set_start(&self, id: TimerId, start_time: u64) { + let mut map = self.start_times.lock().expect("can't lock timings map"); + map.insert(id, start_time); + } + + /// Stops tracking time for the provided metric and associated timer id. + /// + /// Adds a count to the corresponding bucket in the timing distribution. + /// This will record an error if no + /// [`set_start`](TimingDistributionMetric::set_start) was called. + /// + /// # Arguments + /// + /// * `id` - The [`TimerId`] to associate with this timing. This allows + /// for concurrent timing of events associated with different ids to the + /// same timespan metric. + /// * `stop_time` - Timestamp in nanoseconds. + pub fn stop_and_accumulate(&self, id: TimerId) { + let stop_time = time::precise_time_ns(); + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.set_stop_and_accumulate(glean, id, stop_time)); + } + + fn set_stop(&self, id: TimerId, stop_time: u64) -> Result<u64, (ErrorType, &str)> { + let mut start_times = self.start_times.lock().expect("can't lock timings map"); + let start_time = match start_times.remove(&id) { + Some(start_time) => start_time, + None => return Err((ErrorType::InvalidState, "Timing not running")), + }; + + let duration = match stop_time.checked_sub(start_time) { + Some(duration) => duration, + None => { + return Err(( + ErrorType::InvalidValue, + "Timer stopped with negative duration", + )) + } + }; + + Ok(duration) + } + + /// **Test-only API (exported for testing purposes).** + /// + /// Set stop time for this metric synchronously. + /// + /// Use [`stop_and_accumulate`](Self::stop_and_accumulate) instead. + #[doc(hidden)] + pub fn set_stop_and_accumulate(&self, glean: &Glean, id: TimerId, stop_time: u64) { + if !self.should_record(glean) { + let mut start_times = self.start_times.lock().expect("can't lock timings map"); + start_times.remove(&id); + return; + } + + // Duration is in nanoseconds. + let mut duration = match self.set_stop(id, stop_time) { + Err((err_type, err_msg)) => { + record_error(glean, &self.meta, err_type, err_msg, None); + return; + } + Ok(duration) => duration, + }; + + let min_sample_time = self.time_unit.as_nanos(1); + let max_sample_time = self.time_unit.as_nanos(MAX_SAMPLE_TIME); + + duration = if duration < min_sample_time { + // If measurement is less than the minimum, just truncate. This is + // not recorded as an error. + min_sample_time + } else if duration > max_sample_time { + let msg = format!( + "Sample is longer than the max for a time_unit of {:?} ({} ns)", + self.time_unit, max_sample_time + ); + record_error(glean, &self.meta, ErrorType::InvalidOverflow, msg, None); + max_sample_time + } else { + duration + }; + + if !self.should_record(glean) { + return; + } + + // Let's be defensive here: + // The uploader tries to store some timing distribution metrics, + // but in tests that storage might be gone already. + // Let's just ignore those. + // We do the same for counters. + // This should never happen in real app usage. + if let Some(storage) = glean.storage_opt() { + storage.record_with(glean, &self.meta, |old_value| match old_value { + Some(Metric::TimingDistribution(mut hist)) => { + hist.accumulate(duration); + Metric::TimingDistribution(hist) + } + _ => { + let mut hist = Histogram::functional(LOG_BASE, BUCKETS_PER_MAGNITUDE); + hist.accumulate(duration); + Metric::TimingDistribution(hist) + } + }); + } else { + log::warn!( + "Couldn't get storage. Can't record timing distribution '{}'.", + self.meta.base_identifier() + ); + } + } + + /// Aborts a previous [`start`](Self::start) call. + /// + /// No error is recorded if no [`start`](Self::start) was called. + /// + /// # Arguments + /// + /// * `id` - The [`TimerId`] to associate with this timing. This allows + /// for concurrent timing of events associated with different ids to the + /// same timing distribution metric. + pub fn cancel(&self, id: TimerId) { + let metric = self.clone(); + crate::launch_with_glean(move |_glean| metric.cancel_sync(id)); + } + + /// Aborts a previous [`start`](Self::start) call synchronously. + pub(crate) fn cancel_sync(&self, id: TimerId) { + let mut map = self.start_times.lock().expect("can't lock timings map"); + map.remove(&id); + } + + /// Accumulates the provided signed samples in the metric. + /// + /// This is required so that the platform-specific code can provide us with + /// 64 bit signed integers if no `u64` comparable type is available. This + /// will take care of filtering and reporting errors for any provided negative + /// sample. + /// + /// Please note that this assumes that the provided samples are already in + /// the "unit" declared by the instance of the metric type (e.g. if the + /// instance this method was called on is using [`TimeUnit::Second`], then + /// `samples` are assumed to be in that unit). + /// + /// # Arguments + /// + /// * `samples` - The vector holding the samples to be recorded by the metric. + /// + /// ## Notes + /// + /// Discards any negative value in `samples` and report an [`ErrorType::InvalidValue`] + /// for each of them. Reports an [`ErrorType::InvalidOverflow`] error for samples that + /// are longer than `MAX_SAMPLE_TIME`. + pub fn accumulate_samples(&self, samples: Vec<i64>) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.accumulate_samples_sync(glean, samples)) + } + + /// **Test-only API (exported for testing purposes).** + /// Accumulates the provided signed samples in the metric. + /// + /// Use [`accumulate_samples`](Self::accumulate_samples) + #[doc(hidden)] + pub fn accumulate_samples_sync(&self, glean: &Glean, samples: Vec<i64>) { + if !self.should_record(glean) { + return; + } + + let mut num_negative_samples = 0; + let mut num_too_long_samples = 0; + let max_sample_time = self.time_unit.as_nanos(MAX_SAMPLE_TIME); + + glean.storage().record_with(glean, &self.meta, |old_value| { + let mut hist = match old_value { + Some(Metric::TimingDistribution(hist)) => hist, + _ => Histogram::functional(LOG_BASE, BUCKETS_PER_MAGNITUDE), + }; + + for &sample in samples.iter() { + if sample < 0 { + num_negative_samples += 1; + } else { + let mut sample = sample as u64; + + // Check the range prior to converting the incoming unit to + // nanoseconds, so we can compare against the constant + // MAX_SAMPLE_TIME. + if sample == 0 { + sample = 1; + } else if sample > MAX_SAMPLE_TIME { + num_too_long_samples += 1; + sample = MAX_SAMPLE_TIME; + } + + sample = self.time_unit.as_nanos(sample); + + hist.accumulate(sample); + } + } + + Metric::TimingDistribution(hist) + }); + + if num_negative_samples > 0 { + let msg = format!("Accumulated {} negative samples", num_negative_samples); + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + msg, + num_negative_samples, + ); + } + + if num_too_long_samples > 0 { + let msg = format!( + "{} samples are longer than the maximum of {}", + num_too_long_samples, max_sample_time + ); + record_error( + glean, + &self.meta, + ErrorType::InvalidOverflow, + msg, + num_too_long_samples, + ); + } + } + + /// Accumulates the provided samples in the metric. + /// + /// # Arguments + /// + /// * `samples` - A list of samples recorded by the metric. + /// Samples must be in nanoseconds. + /// ## Notes + /// + /// Reports an [`ErrorType::InvalidOverflow`] error for samples that + /// are longer than `MAX_SAMPLE_TIME`. + pub fn accumulate_raw_samples_nanos(&self, samples: Vec<u64>) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| { + metric.accumulate_raw_samples_nanos_sync(glean, &samples) + }) + } + + /// **Test-only API (exported for testing purposes).** + /// + /// Accumulates the provided samples in the metric. + /// + /// Use [`accumulate_raw_samples_nanos`](Self::accumulate_raw_samples_nanos) instead. + #[doc(hidden)] + pub fn accumulate_raw_samples_nanos_sync(&self, glean: &Glean, samples: &[u64]) { + if !self.should_record(glean) { + return; + } + + let mut num_too_long_samples = 0; + let min_sample_time = self.time_unit.as_nanos(1); + let max_sample_time = self.time_unit.as_nanos(MAX_SAMPLE_TIME); + + glean.storage().record_with(glean, &self.meta, |old_value| { + let mut hist = match old_value { + Some(Metric::TimingDistribution(hist)) => hist, + _ => Histogram::functional(LOG_BASE, BUCKETS_PER_MAGNITUDE), + }; + + for &sample in samples.iter() { + let mut sample = sample; + + if sample < min_sample_time { + sample = min_sample_time; + } else if sample > max_sample_time { + num_too_long_samples += 1; + sample = max_sample_time; + } + + // `sample` is in nanoseconds. + hist.accumulate(sample); + } + + Metric::TimingDistribution(hist) + }); + + if num_too_long_samples > 0 { + let msg = format!( + "{} samples are longer than the maximum of {}", + num_too_long_samples, max_sample_time + ); + record_error( + glean, + &self.meta, + ErrorType::InvalidOverflow, + msg, + num_too_long_samples, + ); + } + } + + /// Gets the currently stored value as an integer. + #[doc(hidden)] + pub fn get_value<'a, S: Into<Option<&'a str>>>( + &self, + glean: &Glean, + ping_name: S, + ) -> Option<DistributionData> { + let queried_ping_name = ping_name + .into() + .unwrap_or_else(|| &self.meta().inner.send_in_pings[0]); + + match StorageManager.snapshot_metric_for_test( + glean.storage(), + queried_ping_name, + &self.meta.identifier(glean), + self.meta.inner.lifetime, + ) { + Some(Metric::TimingDistribution(hist)) => Some(snapshot(&hist)), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<DistributionData> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| self.get_value(glean, ping_name.as_deref())) + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0) + }) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn can_snapshot() { + use serde_json::json; + + let mut hist = Histogram::functional(2.0, 8.0); + + for i in 1..=10 { + hist.accumulate(i); + } + + let snap = snapshot(&hist); + + let expected_json = json!({ + "sum": 55, + "values": { + "1": 1, + "2": 1, + "3": 1, + "4": 1, + "5": 1, + "6": 1, + "7": 1, + "8": 1, + "9": 1, + "10": 1, + "11": 0, + }, + }); + + assert_eq!(expected_json, json!(snap)); + } + + #[test] + fn can_snapshot_sparse() { + use serde_json::json; + + let mut hist = Histogram::functional(2.0, 8.0); + + hist.accumulate(1024); + hist.accumulate(1024); + hist.accumulate(1116); + hist.accumulate(1448); + + let snap = snapshot(&hist); + + let expected_json = json!({ + "sum": 4612, + "values": { + "1024": 2, + "1116": 1, + "1217": 0, + "1327": 0, + "1448": 1, + "1579": 0, + }, + }); + + assert_eq!(expected_json, json!(snap)); + } +} diff --git a/third_party/rust/glean-core/src/metrics/url.rs b/third_party/rust/glean-core/src/metrics/url.rs new file mode 100644 index 0000000000..c9eb824a3e --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/url.rs @@ -0,0 +1,312 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::sync::Arc; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorType}; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::util::truncate_string_at_boundary_with_error; +use crate::CommonMetricData; +use crate::Glean; + +// The maximum number of characters a URL Metric may have, before encoding. +const MAX_URL_LENGTH: usize = 8192; + +/// A URL metric. +/// +/// Record an Unicode string value a URL content. +/// The URL is length-limited to `MAX_URL_LENGTH` bytes. +#[derive(Clone, Debug)] +pub struct UrlMetric { + meta: Arc<CommonMetricDataInternal>, +} + +impl MetricType for UrlMetric { + fn meta(&self) -> &CommonMetricDataInternal { + &self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl UrlMetric { + /// Creates a new string metric. + pub fn new(meta: CommonMetricData) -> Self { + Self { + meta: Arc::new(meta.into()), + } + } + + fn is_valid_url_scheme(&self, value: String) -> bool { + let mut splits = value.split(':'); + if let Some(scheme) = splits.next() { + if scheme.is_empty() { + return false; + } + let mut chars = scheme.chars(); + // The list of characters allowed in the scheme is on + // the spec here: https://url.spec.whatwg.org/#url-scheme-string + return chars.next().unwrap().is_ascii_alphabetic() + && chars.all(|c| c.is_ascii_alphanumeric() || ['+', '-', '.'].contains(&c)); + } + + // No ':' found, this is not valid :) + false + } + + /// Sets to the specified stringified URL. + /// + /// # Arguments + /// + /// * `value` - The stringified URL to set the metric to. + /// + /// ## Notes + /// + /// Truncates the value if it is longer than `MAX_URL_LENGTH` bytes and logs an error. + pub fn set<S: Into<String>>(&self, value: S) { + let value = value.into(); + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.set_sync(glean, value)) + } + + /// Sets to the specified stringified URL synchronously. + #[doc(hidden)] + pub fn set_sync<S: Into<String>>(&self, glean: &Glean, value: S) { + if !self.should_record(glean) { + return; + } + + let s = truncate_string_at_boundary_with_error(glean, &self.meta, value, MAX_URL_LENGTH); + + if s.starts_with("data:") { + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + "URL metric does not support data URLs.", + None, + ); + return; + } + + if !self.is_valid_url_scheme(s.clone()) { + let msg = format!("\"{}\" does not start with a valid URL scheme.", s); + record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None); + return; + } + + let value = Metric::Url(s); + glean.storage().record(glean, &self.meta, &value) + } + + #[doc(hidden)] + pub(crate) fn get_value<'a, S: Into<Option<&'a str>>>( + &self, + glean: &Glean, + ping_name: S, + ) -> Option<String> { + let queried_ping_name = ping_name + .into() + .unwrap_or_else(|| &self.meta().inner.send_in_pings[0]); + + match StorageManager.snapshot_metric_for_test( + glean.storage(), + queried_ping_name, + &self.meta.identifier(glean), + self.meta.inner.lifetime, + ) { + Some(Metric::Url(s)) => Some(s), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as a string. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<String> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| self.get_value(glean, ping_name.as_deref())) + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0) + }) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test_get_num_recorded_errors; + use crate::tests::new_glean; + use crate::ErrorType; + use crate::Lifetime; + + #[test] + fn payload_is_correct() { + let (glean, _t) = new_glean(None); + + let metric = UrlMetric::new(CommonMetricData { + name: "url_metric".into(), + category: "test".into(), + send_in_pings: vec!["store1".into()], + lifetime: Lifetime::Application, + disabled: false, + dynamic_label: None, + }); + + let sample_url = "glean://test".to_string(); + metric.set_sync(&glean, sample_url.clone()); + assert_eq!(sample_url, metric.get_value(&glean, "store1").unwrap()); + } + + #[test] + fn does_not_record_url_exceeding_maximum_length() { + let (glean, _t) = new_glean(None); + + let metric = UrlMetric::new(CommonMetricData { + name: "url_metric".into(), + category: "test".into(), + send_in_pings: vec!["store1".into()], + lifetime: Lifetime::Application, + disabled: false, + dynamic_label: None, + }); + + // Whenever the URL is longer than our MAX_URL_LENGTH, we truncate the URL to the + // MAX_URL_LENGTH. + // + // This 8-character string was chosen so we could have an even number that is + // a divisor of our MAX_URL_LENGTH. + let long_path_base = "abcdefgh"; + + // Using 2000 creates a string > 16000 characters, well over MAX_URL_LENGTH. + let test_url = format!("glean://{}", long_path_base.repeat(2000)); + metric.set_sync(&glean, test_url); + + // "glean://" is 8 characters + // "abcdefgh" (long_path_base) is 8 characters + // `long_path_base` is repeated 1023 times (8184) + // 8 + 8184 = 8192 (MAX_URL_LENGTH) + let expected = format!("glean://{}", long_path_base.repeat(1023)); + + assert_eq!(metric.get_value(&glean, "store1").unwrap(), expected); + assert_eq!( + 1, + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidOverflow) + .unwrap() + ); + } + + #[test] + fn does_not_record_data_urls() { + let (glean, _t) = new_glean(None); + + let metric = UrlMetric::new(CommonMetricData { + name: "url_metric".into(), + category: "test".into(), + send_in_pings: vec!["store1".into()], + lifetime: Lifetime::Application, + disabled: false, + dynamic_label: None, + }); + + let test_url = "data:application/json"; + metric.set_sync(&glean, test_url); + + assert!(metric.get_value(&glean, "store1").is_none()); + + assert_eq!( + 1, + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidValue).unwrap() + ); + } + + #[test] + fn url_validation_works_and_records_errors() { + let (glean, _t) = new_glean(None); + + let metric = UrlMetric::new(CommonMetricData { + name: "url_metric".into(), + category: "test".into(), + send_in_pings: vec!["store1".into()], + lifetime: Lifetime::Application, + disabled: false, + dynamic_label: None, + }); + + let incorrects = vec![ + "", + // Scheme may only start with upper or lowercase ASCII alpha[^1] character. + // [1]: https://infra.spec.whatwg.org/#ascii-alpha + "1glean://test", + "-glean://test", + // Scheme may only have ASCII alphanumeric characters or the `-`, `.`, `+` characters. + "шеллы://test", + "g!lean://test", + "g=lean://test", + // Scheme must be followed by `:` character. + "glean//test", + ]; + + let corrects = vec![ + // The minimum URL + "g:", + // Empty body is fine + "glean://", + // "//" is actually not even necessary + "glean:", + "glean:test", + "glean:test.com", + // Scheme may only have ASCII alphanumeric characters or the `-`, `.`, `+` characters. + "g-lean://test", + "g+lean://test", + "g.lean://test", + // Query parameters are fine + "glean://test?hello=world", + // Finally, some actual real world URLs + "https://infra.spec.whatwg.org/#ascii-alpha", + "https://infra.spec.whatwg.org/#ascii-alpha?test=for-glean", + ]; + + for incorrect in incorrects.clone().into_iter() { + metric.set_sync(&glean, incorrect); + assert!(metric.get_value(&glean, "store1").is_none()); + } + + assert_eq!( + incorrects.len(), + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidValue).unwrap() + as usize + ); + + for correct in corrects.into_iter() { + metric.set_sync(&glean, correct); + assert_eq!(metric.get_value(&glean, "store1").unwrap(), correct); + } + } +} diff --git a/third_party/rust/glean-core/src/metrics/uuid.rs b/third_party/rust/glean-core/src/metrics/uuid.rs new file mode 100644 index 0000000000..e78d15ad3b --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/uuid.rs @@ -0,0 +1,159 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::sync::Arc; + +use uuid::Uuid; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorType}; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +/// An UUID metric. +/// +/// Stores UUID v4 (randomly generated) values. +#[derive(Clone, Debug)] +pub struct UuidMetric { + meta: Arc<CommonMetricDataInternal>, +} + +impl MetricType for UuidMetric { + fn meta(&self) -> &CommonMetricDataInternal { + &self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl UuidMetric { + /// Creates a new UUID metric + pub fn new(meta: CommonMetricData) -> Self { + Self { + meta: Arc::new(meta.into()), + } + } + + /// Sets to the specified value. + /// + /// # Arguments + /// + /// * `value` - The [`Uuid`] to set the metric to. + pub fn set(&self, value: String) { + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.set_sync(glean, &value)) + } + + /// Sets to the specified value synchronously. + #[doc(hidden)] + pub fn set_sync<S: Into<String>>(&self, glean: &Glean, value: S) { + if !self.should_record(glean) { + return; + } + + let value = value.into(); + + if let Ok(uuid) = uuid::Uuid::parse_str(&value) { + let value = Metric::Uuid(uuid.as_hyphenated().to_string()); + glean.storage().record(glean, &self.meta, &value) + } else { + let msg = format!("Unexpected UUID value '{}'", value); + record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None); + } + } + + /// Sets to the specified value, from a string. + /// + /// This should only be used from FFI. When calling directly from Rust, it + /// is better to use [`set`](UuidMetric::set). + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + /// * `value` - The [`Uuid`] to set the metric to. + #[doc(hidden)] + pub fn set_from_uuid_sync(&self, glean: &Glean, value: Uuid) { + self.set_sync(glean, value.to_string()) + } + + /// Generates a new random [`Uuid`'] and sets the metric to it. + pub fn generate_and_set(&self) -> String { + let uuid = Uuid::new_v4(); + + let value = uuid.to_string(); + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.set_sync(glean, value)); + + uuid.to_string() + } + + /// Generates a new random [`Uuid`'] and sets the metric to it synchronously. + #[doc(hidden)] + pub fn generate_and_set_sync(&self, storage: &Glean) -> Uuid { + let uuid = Uuid::new_v4(); + self.set_sync(storage, uuid.to_string()); + uuid + } + + /// Gets the current-stored value as a string, or None if there is no value. + #[doc(hidden)] + pub fn get_value<'a, S: Into<Option<&'a str>>>( + &self, + glean: &Glean, + ping_name: S, + ) -> Option<Uuid> { + let queried_ping_name = ping_name + .into() + .unwrap_or_else(|| &self.meta().inner.send_in_pings[0]); + + match StorageManager.snapshot_metric_for_test( + glean.storage(), + queried_ping_name, + &self.meta.identifier(glean), + self.meta.inner.lifetime, + ) { + Some(Metric::Uuid(uuid)) => Uuid::parse_str(&uuid).ok(), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as a string. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<String> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| { + self.get_value(glean, ping_name.as_deref()) + .map(|uuid| uuid.to_string()) + }) + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0) + }) + } +} diff --git a/third_party/rust/glean-core/src/ping/mod.rs b/third_party/rust/glean-core/src/ping/mod.rs new file mode 100644 index 0000000000..6b342baf5c --- /dev/null +++ b/third_party/rust/glean-core/src/ping/mod.rs @@ -0,0 +1,391 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Ping collection, assembly & submission. + +use std::fs::{create_dir_all, File}; +use std::io::Write; +use std::path::{Path, PathBuf}; + +use log::info; +use serde_json::{json, Value as JsonValue}; + +use crate::common_metric_data::{CommonMetricData, Lifetime}; +use crate::metrics::{CounterMetric, DatetimeMetric, Metric, MetricType, PingType, TimeUnit}; +use crate::storage::{StorageManager, INTERNAL_STORAGE}; +use crate::upload::HeaderMap; +use crate::util::{get_iso_time_string, local_now_with_offset}; +use crate::{Glean, Result, DELETION_REQUEST_PINGS_DIRECTORY, PENDING_PINGS_DIRECTORY}; + +/// Holds everything you need to store or send a ping. +pub struct Ping<'a> { + /// The unique document id. + pub doc_id: &'a str, + /// The ping's name. + pub name: &'a str, + /// The path on the server to use when uplaoding this ping. + pub url_path: &'a str, + /// The payload, including `*_info` fields. + pub content: JsonValue, + /// The headers to upload with the payload. + pub headers: HeaderMap, +} + +/// Collect a ping's data, assemble it into its full payload and store it on disk. +pub struct PingMaker; + +fn merge(a: &mut JsonValue, b: &JsonValue) { + match (a, b) { + (&mut JsonValue::Object(ref mut a), JsonValue::Object(b)) => { + for (k, v) in b { + merge(a.entry(k.clone()).or_insert(JsonValue::Null), v); + } + } + (a, b) => { + *a = b.clone(); + } + } +} + +impl Default for PingMaker { + fn default() -> Self { + Self::new() + } +} + +impl PingMaker { + /// Creates a new [`PingMaker`]. + pub fn new() -> Self { + Self + } + + /// Gets, and then increments, the sequence number for a given ping. + fn get_ping_seq(&self, glean: &Glean, storage_name: &str) -> usize { + // Sequence numbers are stored as a counter under a name that includes the storage name + let seq = CounterMetric::new(CommonMetricData { + name: format!("{}#sequence", storage_name), + // We don't need a category, the name is already unique + category: "".into(), + send_in_pings: vec![INTERNAL_STORAGE.into()], + lifetime: Lifetime::User, + ..Default::default() + }); + + let current_seq = match StorageManager.snapshot_metric( + glean.storage(), + INTERNAL_STORAGE, + &seq.meta().identifier(glean), + seq.meta().inner.lifetime, + ) { + Some(Metric::Counter(i)) => i, + _ => 0, + }; + + // Increase to next sequence id + seq.add_sync(glean, 1); + + current_seq as usize + } + + /// Gets the formatted start and end times for this ping and update for the next ping. + fn get_start_end_times(&self, glean: &Glean, storage_name: &str) -> (String, String) { + let time_unit = TimeUnit::Minute; + + let start_time = DatetimeMetric::new( + CommonMetricData { + name: format!("{}#start", storage_name), + category: "".into(), + send_in_pings: vec![INTERNAL_STORAGE.into()], + lifetime: Lifetime::User, + ..Default::default() + }, + time_unit, + ); + + // "start_time" is the time the ping was generated the last time. + // If not available, we use the date the Glean object was initialized. + let start_time_data = start_time + .get_value(glean, INTERNAL_STORAGE) + .unwrap_or_else(|| glean.start_time()); + let end_time_data = local_now_with_offset(); + + // Update the start time with the current time. + start_time.set_sync_chrono(glean, end_time_data); + + // Format the times. + let start_time_data = get_iso_time_string(start_time_data, time_unit); + let end_time_data = get_iso_time_string(end_time_data, time_unit); + (start_time_data, end_time_data) + } + + fn get_ping_info(&self, glean: &Glean, storage_name: &str, reason: Option<&str>) -> JsonValue { + let (start_time, end_time) = self.get_start_end_times(glean, storage_name); + let mut map = json!({ + "seq": self.get_ping_seq(glean, storage_name), + "start_time": start_time, + "end_time": end_time, + }); + + if let Some(reason) = reason { + map.as_object_mut() + .unwrap() // safe unwrap, we created the object above + .insert("reason".to_string(), JsonValue::String(reason.to_string())); + }; + + // Get the experiment data, if available. + if let Some(experiment_data) = + StorageManager.snapshot_experiments_as_json(glean.storage(), INTERNAL_STORAGE) + { + map.as_object_mut() + .unwrap() // safe unwrap, we created the object above + .insert("experiments".to_string(), experiment_data); + }; + + map + } + + fn get_client_info(&self, glean: &Glean, include_client_id: bool) -> JsonValue { + // Add the "telemetry_sdk_build", which is the glean-core version. + let mut map = json!({ + "telemetry_sdk_build": crate::GLEAN_VERSION, + }); + + // Flatten the whole thing. + if let Some(client_info) = + StorageManager.snapshot_as_json(glean.storage(), "glean_client_info", true) + { + let client_info_obj = client_info.as_object().unwrap(); // safe unwrap, snapshot always returns an object. + for (_key, value) in client_info_obj { + merge(&mut map, value); + } + } else { + log::warn!("Empty client info data."); + } + + if !include_client_id { + // safe unwrap, we created the object above + map.as_object_mut().unwrap().remove("client_id"); + } + + json!(map) + } + + /// Build the headers to be persisted and sent with a ping. + /// + /// Currently the only headers we persist are `X-Debug-ID` and `X-Source-Tags`. + /// + /// # Arguments + /// + /// * `glean` - the [`Glean`] instance to collect headers from. + /// + /// # Returns + /// + /// A map of header names to header values. + /// Might be empty if there are no extra headers to send. + /// ``` + fn get_headers(&self, glean: &Glean) -> HeaderMap { + let mut headers_map = HeaderMap::new(); + + if let Some(debug_view_tag) = glean.debug_view_tag() { + headers_map.insert("X-Debug-ID".to_string(), debug_view_tag.to_string()); + } + + if let Some(source_tags) = glean.source_tags() { + headers_map.insert("X-Source-Tags".to_string(), source_tags.join(",")); + } + + headers_map + } + + /// Collects a snapshot for the given ping from storage and attach required meta information. + /// + /// # Arguments + /// + /// * `glean` - the [`Glean`] instance to collect data from. + /// * `ping` - the ping to collect for. + /// * `reason` - an optional reason code to include in the ping. + /// * `doc_id` - the ping's unique document identifier. + /// * `url_path` - the path on the server to upload this ping to. + /// + /// # Returns + /// + /// A fully assembled representation of the ping payload and associated metadata. + /// If there is no data stored for the ping, `None` is returned. + pub fn collect<'a>( + &self, + glean: &Glean, + ping: &'a PingType, + reason: Option<&str>, + doc_id: &'a str, + url_path: &'a str, + ) -> Option<Ping<'a>> { + info!("Collecting {}", ping.name()); + + let metrics_data = StorageManager.snapshot_as_json(glean.storage(), ping.name(), true); + let events_data = glean + .event_storage() + .snapshot_as_json(glean, ping.name(), true); + + let is_empty = metrics_data.is_none() && events_data.is_none(); + if !ping.send_if_empty() && is_empty { + info!("Storage for {} empty. Bailing out.", ping.name()); + return None; + } else if ping.name() == "events" && events_data.is_none() { + info!("No events for 'events' ping. Bailing out."); + return None; + } else if is_empty { + info!( + "Storage for {} empty. Ping will still be sent.", + ping.name() + ); + } + + let ping_info = self.get_ping_info(glean, ping.name(), reason); + let client_info = self.get_client_info(glean, ping.include_client_id()); + + let mut json = json!({ + "ping_info": ping_info, + "client_info": client_info + }); + let json_obj = json.as_object_mut()?; + if let Some(metrics_data) = metrics_data { + json_obj.insert("metrics".to_string(), metrics_data); + } + if let Some(events_data) = events_data { + json_obj.insert("events".to_string(), events_data); + } + + Some(Ping { + content: json, + name: ping.name(), + doc_id, + url_path, + headers: self.get_headers(glean), + }) + } + + /// Collects a snapshot for the given ping from storage and attach required meta information. + /// + /// # Arguments + /// + /// * `glean` - the [`Glean`] instance to collect data from. + /// * `ping` - the ping to collect for. + /// * `reason` - an optional reason code to include in the ping. + /// + /// # Returns + /// + /// A fully assembled ping payload in a string encoded as JSON. + /// If there is no data stored for the ping, `None` is returned. + pub fn collect_string( + &self, + glean: &Glean, + ping: &PingType, + reason: Option<&str>, + ) -> Option<String> { + self.collect(glean, ping, reason, "", "") + .map(|ping| ::serde_json::to_string_pretty(&ping.content).unwrap()) + } + + /// Gets the path to a directory for ping storage. + /// + /// The directory will be created inside the `data_path`. + /// The `pings` directory (and its parents) is created if it does not exist. + fn get_pings_dir(&self, data_path: &Path, ping_type: Option<&str>) -> std::io::Result<PathBuf> { + // Use a special directory for deletion-request pings + let pings_dir = match ping_type { + Some(ping_type) if ping_type == "deletion-request" => { + data_path.join(DELETION_REQUEST_PINGS_DIRECTORY) + } + _ => data_path.join(PENDING_PINGS_DIRECTORY), + }; + + create_dir_all(&pings_dir)?; + Ok(pings_dir) + } + + /// Gets path to a directory for temporary storage. + /// + /// The directory will be created inside the `data_path`. + /// The `tmp` directory (and its parents) is created if it does not exist. + fn get_tmp_dir(&self, data_path: &Path) -> std::io::Result<PathBuf> { + let pings_dir = data_path.join("tmp"); + create_dir_all(&pings_dir)?; + Ok(pings_dir) + } + + /// Stores a ping to disk in the pings directory. + pub fn store_ping(&self, data_path: &Path, ping: &Ping) -> std::io::Result<()> { + let pings_dir = self.get_pings_dir(data_path, Some(ping.name))?; + let temp_dir = self.get_tmp_dir(data_path)?; + + // Write to a temporary location and then move when done, + // for transactional writes. + let temp_ping_path = temp_dir.join(ping.doc_id); + let ping_path = pings_dir.join(ping.doc_id); + + log::debug!( + "Storing ping '{}' at '{}'", + ping.doc_id, + ping_path.display() + ); + + { + let mut file = File::create(&temp_ping_path)?; + file.write_all(ping.url_path.as_bytes())?; + file.write_all(b"\n")?; + file.write_all(::serde_json::to_string(&ping.content)?.as_bytes())?; + if !ping.headers.is_empty() { + file.write_all(b"\n{\"headers\":")?; + file.write_all(::serde_json::to_string(&ping.headers)?.as_bytes())?; + file.write_all(b"}")?; + } + } + + if let Err(e) = std::fs::rename(&temp_ping_path, &ping_path) { + log::warn!( + "Unable to move '{}' to '{}", + temp_ping_path.display(), + ping_path.display() + ); + return Err(e); + } + + Ok(()) + } + + /// Clears any pending pings in the queue. + pub fn clear_pending_pings(&self, data_path: &Path) -> Result<()> { + let pings_dir = self.get_pings_dir(data_path, None)?; + + std::fs::remove_dir_all(&pings_dir)?; + create_dir_all(&pings_dir)?; + + log::debug!("All pending pings deleted"); + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::tests::new_glean; + + #[test] + fn sequence_numbers_should_be_reset_when_toggling_uploading() { + let (mut glean, _t) = new_glean(None); + let ping_maker = PingMaker::new(); + + assert_eq!(0, ping_maker.get_ping_seq(&glean, "custom")); + assert_eq!(1, ping_maker.get_ping_seq(&glean, "custom")); + + glean.set_upload_enabled(false); + assert_eq!(0, ping_maker.get_ping_seq(&glean, "custom")); + assert_eq!(0, ping_maker.get_ping_seq(&glean, "custom")); + + glean.set_upload_enabled(true); + assert_eq!(0, ping_maker.get_ping_seq(&glean, "custom")); + assert_eq!(1, ping_maker.get_ping_seq(&glean, "custom")); + } +} diff --git a/third_party/rust/glean-core/src/scheduler.rs b/third_party/rust/glean-core/src/scheduler.rs new file mode 100644 index 0000000000..30fc956e25 --- /dev/null +++ b/third_party/rust/glean-core/src/scheduler.rs @@ -0,0 +1,560 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! # Metrics Ping Scheduler +//! +//! The Metrics Ping Scheduler (MPS) is responsible for scheduling "metrics" pings. +//! It implements the spec described in +//! [the docs](https://mozilla.github.io/glean/book/user/pings/metrics.html#scheduling) + +use crate::metrics::{DatetimeMetric, StringMetric, TimeUnit}; +use crate::storage::INTERNAL_STORAGE; +use crate::util::local_now_with_offset; +use crate::{CommonMetricData, Glean, Lifetime}; +use chrono::prelude::*; +use chrono::Duration; +use once_cell::sync::Lazy; +use std::sync::{Arc, Condvar, Mutex}; +use std::thread::JoinHandle; + +const SCHEDULED_HOUR: u32 = 4; + +// Clippy thinks an AtomicBool would be preferred, but Condvar requires a full Mutex. +// See https://github.com/rust-lang/rust-clippy/issues/1516 +#[allow(clippy::mutex_atomic)] +static TASK_CONDVAR: Lazy<Arc<(Mutex<bool>, Condvar)>> = + Lazy::new(|| Arc::new((Mutex::new(false), Condvar::new()))); + +/// Describes the interface for a submitter of "metrics" pings. +/// Used to decouple the implementation so we can test it. +trait MetricsPingSubmitter { + /// Submits a metrics ping, updating the last sent time to `now` + /// (which might not be _right now_ due to processing delays (or in tests)) + fn submit_metrics_ping(&self, glean: &Glean, reason: Option<&str>, now: DateTime<FixedOffset>); +} + +/// Describes the interface for a scheduler of "metrics" pings. +/// Used to decouple the implementation so we can test it. +trait MetricsPingScheduler { + /// Begins a recurring schedule of "metrics" ping submissions, on another thread. + /// `now` is used with `when` to determine the first schedule interval and + /// may not be _right now_ due to processing delays (or in tests). + fn start_scheduler( + &self, + submitter: impl MetricsPingSubmitter + Send + 'static, + now: DateTime<FixedOffset>, + when: When, + ); +} + +/// Uses Glean to submit "metrics" pings directly. +struct GleanMetricsPingSubmitter {} +impl MetricsPingSubmitter for GleanMetricsPingSubmitter { + fn submit_metrics_ping(&self, glean: &Glean, reason: Option<&str>, now: DateTime<FixedOffset>) { + glean.submit_ping_by_name("metrics", reason); + // Always update the collection date, irrespective of the ping being sent. + get_last_sent_time_metric().set_sync_chrono(glean, now); + } +} + +/// Schedule "metrics" pings directly using the default behaviour. +struct GleanMetricsPingScheduler {} +impl MetricsPingScheduler for GleanMetricsPingScheduler { + fn start_scheduler( + &self, + submitter: impl MetricsPingSubmitter + Send + 'static, + now: DateTime<FixedOffset>, + when: When, + ) { + start_scheduler(submitter, now, when); + } +} + +/// Performs startup checks to decide when to schedule the next "metrics" ping collection. +/// **Must** be called before draining the preinit queue. +/// (We're at the Language Bindings' mercy for that) +pub fn schedule(glean: &Glean) { + let now = local_now_with_offset(); + + let (cancelled_lock, _condvar) = &**TASK_CONDVAR; + if *cancelled_lock.lock().unwrap() { + log::debug!("Told to schedule, but already cancelled. Are we in a test?"); + } + *cancelled_lock.lock().unwrap() = false; // Uncancel the thread. + + let submitter = GleanMetricsPingSubmitter {}; + let scheduler = GleanMetricsPingScheduler {}; + + schedule_internal(glean, submitter, scheduler, now) +} + +/// Tells the scheduler task to exit quickly and cleanly. +pub fn cancel() { + let (cancelled_lock, condvar) = &**TASK_CONDVAR; // One `*` for Lazy, the second for Arc + *cancelled_lock.lock().unwrap() = true; // Cancel the scheduler thread. + condvar.notify_all(); // Notify any/all listening schedulers to check whether they were cancelled. +} + +fn schedule_internal( + glean: &Glean, + submitter: impl MetricsPingSubmitter + Send + 'static, + scheduler: impl MetricsPingScheduler, + now: DateTime<FixedOffset>, +) { + let last_sent_build_metric = get_last_sent_build_metric(); + if let Some(last_sent_build) = last_sent_build_metric.get_value(glean, Some(INTERNAL_STORAGE)) { + // If `app_build` is longer than StringMetric's max length, we will always + // treat it as a changed build when really it isn't. + // This will be externally-observable as InvalidOverflow errors on both the core + // `client_info.app_build` metric and the scheduler's internal metric. + if last_sent_build != glean.app_build { + last_sent_build_metric.set_sync(glean, &glean.app_build); + log::info!("App build changed. Sending 'metrics' ping"); + submitter.submit_metrics_ping(glean, Some("upgrade"), now); + scheduler.start_scheduler(submitter, now, When::Reschedule); + return; + } + } else { + // No value in last_sent_build. Better set one. + last_sent_build_metric.set_sync(glean, &glean.app_build); + } + + let last_sent_time = get_last_sent_time_metric().get_value(glean, INTERNAL_STORAGE); + if let Some(last_sent) = last_sent_time { + log::info!("The 'metrics' ping was last sent on {}", last_sent); + } + + // We aim to cover 3 cases here: + // + // 1. The ping was already collected on the current calendar day; + // only schedule one for collection on the next calendar day at the due time. + // 2. The ping was NOT collected on the current calendar day AND we're later + // than today's due time; collect the ping immediately. + // 3. The ping was NOT collected on the current calendar day BUT we still have + // some time to the due time; schedule for submitting the current calendar day. + + let already_sent_today = last_sent_time.map_or(false, |d| d.date() == now.date()); + if already_sent_today { + // Case #1 + log::info!("The 'metrics' ping was already sent today, {}", now); + scheduler.start_scheduler(submitter, now, When::Tomorrow); + } else if now > now.date().and_hms(SCHEDULED_HOUR, 0, 0) { + // Case #2 + log::info!("Sending the 'metrics' ping immediately, {}", now); + submitter.submit_metrics_ping(glean, Some("overdue"), now); + scheduler.start_scheduler(submitter, now, When::Reschedule); + } else { + // Case #3 + log::info!("The 'metrics' collection is scheduled for today, {}", now); + scheduler.start_scheduler(submitter, now, When::Today); + } +} + +/// "metrics" ping scheduling deadlines. +#[derive(Debug, PartialEq)] +enum When { + Today, + Tomorrow, + Reschedule, +} + +impl When { + /// Returns the duration from now until our deadline. + /// Note that std::time::Duration doesn't do negative time spans, so if + /// our deadline has passed, this will return zero. + fn until(&self, now: DateTime<FixedOffset>) -> std::time::Duration { + let fire_date = match self { + Self::Today => now.date().and_hms(SCHEDULED_HOUR, 0, 0), + // Doesn't actually save us from being an hour off on DST because + // chrono doesn't know when DST changes. : ( + Self::Tomorrow | Self::Reschedule => { + (now.date() + Duration::days(1)).and_hms(SCHEDULED_HOUR, 0, 0) + } + }; + // After rust-lang/rust#73544 can use std::time::Duration::ZERO + (fire_date - now) + .to_std() + .unwrap_or_else(|_| std::time::Duration::from_millis(0)) + } + + /// The "metrics" ping reason corresponding to our deadline. + fn reason(&self) -> &'static str { + match self { + Self::Today => "today", + Self::Tomorrow => "tomorrow", + Self::Reschedule => "reschedule", + } + } +} + +fn start_scheduler( + submitter: impl MetricsPingSubmitter + Send + 'static, + now: DateTime<FixedOffset>, + when: When, +) -> JoinHandle<()> { + let pair = Arc::clone(&TASK_CONDVAR); + std::thread::Builder::new() + .name("glean.mps".into()) + .spawn(move || { + let (cancelled_lock, condvar) = &*pair; + let mut when = when; + let mut now = now; + loop { + let dur = when.until(now); + log::info!("Scheduling for {:?} after {}, reason {:?}", dur, now, when); + let mut timed_out = false; + { + match condvar.wait_timeout_while(cancelled_lock.lock().unwrap(), dur, |cancelled| !*cancelled) { + Err(err) => { + log::warn!("Condvar wait failure. MPS exiting. {}", err); + break; + } + Ok((cancelled, wait_result)) => { + if *cancelled { + log::info!("Metrics Ping Scheduler cancelled. Exiting."); + break; + } else if wait_result.timed_out() { + // Can't get the global glean while holding cancelled's lock. + timed_out = true; + } else { + // This should be impossible. `cancelled_lock` is acquired, and + // `!*cancelled` is checked by the condvar before it is allowed + // to return from `wait_timeout_while` (I checked). + // So `Ok(_)` implies `*cancelled || wait_result.timed_out`. + log::warn!("Spurious wakeup of the MPS condvar should be impossible."); + } + } + } + } + // Safety: + // We are okay dropping the condvar's cancelled lock here because it only guards + // whether we're cancelled, and we've established that we weren't when we timed out. + // We might _now_ be cancelled at any time, in which case when we loop back over + // we'll immediately exit. But first we need to submit our "metrics" ping. + if timed_out { + log::info!("Time to submit our metrics ping, {:?}", when); + let glean = crate::core::global_glean().expect("Global Glean not present when trying to send scheduled 'metrics' ping?!").lock().unwrap(); + submitter.submit_metrics_ping(&glean, Some(when.reason()), now); + when = When::Reschedule; + } + now = local_now_with_offset(); + } + }).expect("Unable to spawn Metrics Ping Scheduler thread.") +} + +fn get_last_sent_time_metric() -> DatetimeMetric { + DatetimeMetric::new( + CommonMetricData { + name: "last_sent_time".into(), + category: "mps".into(), + send_in_pings: vec![INTERNAL_STORAGE.into()], + lifetime: Lifetime::User, + ..Default::default() + }, + TimeUnit::Minute, + ) +} + +fn get_last_sent_build_metric() -> StringMetric { + StringMetric::new(CommonMetricData { + name: "last_sent_build".into(), + category: "mps".into(), + send_in_pings: vec![INTERNAL_STORAGE.into()], + lifetime: Lifetime::User, + ..Default::default() + }) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::tests::new_glean; + use std::sync::atomic::{AtomicU32, Ordering}; + + struct ValidatingSubmitter<F: Fn(DateTime<FixedOffset>, Option<&str>)> { + submit_validator: F, + validator_run_count: Arc<AtomicU32>, + } + struct ValidatingScheduler<F: Fn(DateTime<FixedOffset>, When)> { + schedule_validator: F, + validator_run_count: Arc<AtomicU32>, + } + impl<F: Fn(DateTime<FixedOffset>, Option<&str>)> MetricsPingSubmitter for ValidatingSubmitter<F> { + fn submit_metrics_ping( + &self, + _glean: &Glean, + reason: Option<&str>, + now: DateTime<FixedOffset>, + ) { + (self.submit_validator)(now, reason); + self.validator_run_count.fetch_add(1, Ordering::Relaxed); + } + } + impl<F: Fn(DateTime<FixedOffset>, When)> MetricsPingScheduler for ValidatingScheduler<F> { + fn start_scheduler( + &self, + _submitter: impl MetricsPingSubmitter + Send + 'static, + now: DateTime<FixedOffset>, + when: When, + ) { + (self.schedule_validator)(now, when); + self.validator_run_count.fetch_add(1, Ordering::Relaxed); + } + } + + fn new_proxies< + F1: Fn(DateTime<FixedOffset>, Option<&str>), + F2: Fn(DateTime<FixedOffset>, When), + >( + submit_validator: F1, + schedule_validator: F2, + ) -> ( + ValidatingSubmitter<F1>, + Arc<AtomicU32>, + ValidatingScheduler<F2>, + Arc<AtomicU32>, + ) { + let submitter_count = Arc::new(AtomicU32::new(0)); + let submitter = ValidatingSubmitter { + submit_validator, + validator_run_count: Arc::clone(&submitter_count), + }; + let scheduler_count = Arc::new(AtomicU32::new(0)); + let scheduler = ValidatingScheduler { + schedule_validator, + validator_run_count: Arc::clone(&scheduler_count), + }; + (submitter, submitter_count, scheduler, scheduler_count) + } + + // Ensure on first run that we actually set the last sent build metric. + // (and that we send an "overdue" ping if it's after the scheduled hour) + #[test] + fn first_run_last_sent_build() { + let (mut glean, _t) = new_glean(None); + + glean.app_build = "a build".into(); + let lsb_metric = get_last_sent_build_metric(); + assert_eq!(None, lsb_metric.get_value(&glean, Some(INTERNAL_STORAGE))); + + let fake_now = FixedOffset::east(0) + .ymd(2022, 11, 15) + .and_hms(SCHEDULED_HOUR, 0, 1); + + let (submitter, submitter_count, scheduler, scheduler_count) = new_proxies( + |_, reason| assert_eq!(reason, Some("overdue")), + |_, when| assert_eq!(when, When::Reschedule), + ); + + schedule_internal(&glean, submitter, scheduler, fake_now); + assert_eq!(1, submitter_count.swap(0, Ordering::Relaxed)); + assert_eq!(1, scheduler_count.swap(0, Ordering::Relaxed)); + + assert_eq!( + Some(glean.app_build.to_string()), + lsb_metric.get_value(&glean, Some(INTERNAL_STORAGE)) + ); + } + + // Ensure that if we have a different build, we immediately submit an "upgrade" ping + // and schedule a "reschedule" ping for tomorrow. + #[test] + fn different_app_builds_submit_and_reschedule() { + let (mut glean, _t) = new_glean(None); + + glean.app_build = "a build".into(); + get_last_sent_build_metric().set_sync(&glean, "a different build"); + + let (submitter, submitter_count, scheduler, scheduler_count) = new_proxies( + |_, reason| assert_eq!(reason, Some("upgrade")), + |_, when| assert_eq!(when, When::Reschedule), + ); + + schedule_internal(&glean, submitter, scheduler, local_now_with_offset()); + assert_eq!(1, submitter_count.swap(0, Ordering::Relaxed)); + assert_eq!(1, scheduler_count.swap(0, Ordering::Relaxed)); + } + + // If we've already sent a ping today, ensure we don't send a ping but we + // do schedule a ping for tomorrow. ("Case #1" in schedule_internal) + #[test] + fn case_1_no_submit_but_schedule_tomorrow() { + let (glean, _t) = new_glean(None); + + let fake_now = FixedOffset::east(0).ymd(2021, 4, 30).and_hms(14, 36, 14); + get_last_sent_time_metric().set_sync_chrono(&glean, fake_now); + + let (submitter, submitter_count, scheduler, scheduler_count) = new_proxies( + |_, reason| panic!("Case #1 shouldn't submit a ping! reason: {:?}", reason), + |_, when| assert_eq!(when, When::Tomorrow), + ); + schedule_internal(&glean, submitter, scheduler, fake_now); + assert_eq!(0, submitter_count.swap(0, Ordering::Relaxed)); + assert_eq!(1, scheduler_count.swap(0, Ordering::Relaxed)); + } + + // If we haven't sent a ping today and we're after the scheduled time, + // ensure we send a ping and then schedule a "reschedule" ping for tomorrow. + // ("Case #2" in schedule_internal) + #[test] + fn case_2_submit_ping_and_reschedule() { + let (glean, _t) = new_glean(None); + + let fake_yesterday = FixedOffset::east(0) + .ymd(2021, 4, 29) + .and_hms(SCHEDULED_HOUR, 0, 1); + get_last_sent_time_metric().set_sync_chrono(&glean, fake_yesterday); + let fake_now = fake_yesterday + Duration::days(1); + + let (submitter, submitter_count, scheduler, scheduler_count) = new_proxies( + |_, reason| assert_eq!(reason, Some("overdue")), + |_, when| assert_eq!(when, When::Reschedule), + ); + schedule_internal(&glean, submitter, scheduler, fake_now); + assert_eq!(1, submitter_count.swap(0, Ordering::Relaxed)); + assert_eq!(1, scheduler_count.swap(0, Ordering::Relaxed)); + } + + // If we haven't sent a ping today and we're before the scheduled time, + // ensure we don't send a ping but schedule a "today" ping for today. + // ("Case #3" in schedule_internal) + #[test] + fn case_3_no_submit_but_schedule_today() { + let (glean, _t) = new_glean(None); + + let fake_yesterday = + FixedOffset::east(0) + .ymd(2021, 4, 29) + .and_hms(SCHEDULED_HOUR - 1, 0, 1); + get_last_sent_time_metric().set_sync_chrono(&glean, fake_yesterday); + let fake_now = fake_yesterday + Duration::days(1); + + let (submitter, submitter_count, scheduler, scheduler_count) = new_proxies( + |_, reason| panic!("Case #3 shouldn't submit a ping! reason: {:?}", reason), + |_, when| assert_eq!(when, When::Today), + ); + schedule_internal(&glean, submitter, scheduler, fake_now); + assert_eq!(0, submitter_count.swap(0, Ordering::Relaxed)); + assert_eq!(1, scheduler_count.swap(0, Ordering::Relaxed)); + } + + // `When` is responsible for date math. Let's make sure it's correct. + #[test] + fn when_gets_at_least_some_date_math_correct() { + let now = FixedOffset::east(0).ymd(2021, 4, 30).and_hms(15, 2, 10); + // `now` is after `SCHEDULED_HOUR` so should be zero: + assert_eq!(std::time::Duration::from_secs(0), When::Today.until(now)); + // If we bring it back before `SCHEDULED_HOUR` it should give us the duration: + let earlier = now.date().and_hms(SCHEDULED_HOUR - 1, 0, 0); + assert_eq!( + std::time::Duration::from_secs(3600), + When::Today.until(earlier) + ); + + // `Tomorrow` and `Reschedule` should differ only in their `reason()` + // 46670s is 12h57m10s (aka, the time from 15:02:10 to 04:00:00 + // (when the timezone doesn't change between them)). + assert_eq!( + std::time::Duration::from_secs(46670), + When::Tomorrow.until(now) + ); + assert_eq!( + std::time::Duration::from_secs(46670), + When::Reschedule.until(now) + ); + assert_eq!(When::Tomorrow.until(now), When::Reschedule.until(now)); + assert_ne!(When::Tomorrow.reason(), When::Reschedule.reason()); + } + + // Scheduler tests mutate global state and thus must not be run in parallel. + // Otherwise one test could cancel the other. + // This Mutex aims to solve that. + static SCHEDULER_TEST_MUTEX: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(())); + + // The scheduler has been designed to be cancellable. Can we cancel it? + #[test] + fn cancellable_tasks_can_be_cancelled() { + // First and foremost, all scheduler tests must ensure they start uncancelled. + // Perils of having shared state. + let _test_lock = SCHEDULER_TEST_MUTEX.lock().unwrap(); + let (cancelled_lock, _condvar) = &**TASK_CONDVAR; // One `*` for Lazy, the second for Arc + *cancelled_lock.lock().unwrap() = false; + + // Pick a time at least two hours from the next scheduled submission. + // (So that this test will time out if cancellation fails). + let now = FixedOffset::east(0) + .ymd(2021, 4, 30) + .and_hms(SCHEDULED_HOUR - 2, 0, 0); + + let proxy_factory = || { + new_proxies( + |_, reason| { + panic!( + "Shouldn't submit when testing scheduler. reason: {:?}", + reason + ) + }, + |_, _| panic!("Not even using the scheduler this time."), + ) + }; + + // Test Today. + let (submitter, submitter_count, _, _) = proxy_factory(); + let handle = start_scheduler(submitter, now, When::Today); + super::cancel(); + handle.join().unwrap(); // Should complete immediately. + assert_eq!(0, submitter_count.swap(0, Ordering::Relaxed)); + + // Test Tomorrow. + let (submitter, submitter_count, _, _) = proxy_factory(); + *cancelled_lock.lock().unwrap() = false; // Uncancel. + let handle = start_scheduler(submitter, now, When::Tomorrow); + super::cancel(); + handle.join().unwrap(); // Should complete immediately. + assert_eq!(0, submitter_count.swap(0, Ordering::Relaxed)); + + // Test Reschedule. + let (submitter, submitter_count, _, _) = proxy_factory(); + *cancelled_lock.lock().unwrap() = false; // Uncancel. + let handle = start_scheduler(submitter, now, When::Reschedule); + super::cancel(); + handle.join().unwrap(); // Should complete immediately. + assert_eq!(0, submitter_count.swap(0, Ordering::Relaxed)); + } + + // We're not keen to wait like the scheduler is, but we can test a quick schedule. + #[test] + fn immediate_task_runs_immediately() { + // First and foremost, all scheduler tests must ensure they start uncancelled. + // Perils of having shared state. + let _test_lock = SCHEDULER_TEST_MUTEX.lock().unwrap(); + let (cancelled_lock, _condvar) = &**TASK_CONDVAR; // One `*` for Lazy, the second for Arc + *cancelled_lock.lock().unwrap() = false; + + // We're actually going to submit a ping from the scheduler, which requires a global glean. + let (glean, _t) = new_glean(None); + assert!( + !glean.schedule_metrics_pings, + "Real schedulers not allowed in tests!" + ); + assert!(crate::core::setup_glean(glean).is_ok()); + + // We're choosing a time after SCHEDULED_HOUR so `When::Today` will give us a duration of 0. + let now = FixedOffset::east(0).ymd(2021, 4, 20).and_hms(15, 42, 0); + + let (submitter, submitter_count, _, _) = new_proxies( + move |_, reason| { + assert_eq!(reason, Some("today")); + // After submitting the ping we expect, let's cancel this scheduler so the thread exits. + // (But do it on another thread because the condvar loop is currently holding `cancelled`'s mutex) + std::thread::spawn(super::cancel); + }, + |_, _| panic!("Not using the scheduler this time."), + ); + + let handle = start_scheduler(submitter, now, When::Today); + handle.join().unwrap(); + assert_eq!(1, submitter_count.swap(0, Ordering::Relaxed)); + } +} diff --git a/third_party/rust/glean-core/src/storage/mod.rs b/third_party/rust/glean-core/src/storage/mod.rs new file mode 100644 index 0000000000..a1c17ffe5c --- /dev/null +++ b/third_party/rust/glean-core/src/storage/mod.rs @@ -0,0 +1,283 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +#![allow(non_upper_case_globals)] + +//! Storage snapshotting. + +use std::collections::HashMap; + +use serde_json::{json, Value as JsonValue}; + +use crate::coverage::record_coverage; +use crate::database::Database; +use crate::metrics::Metric; +use crate::Lifetime; + +// An internal ping name, not to be touched by anything else +pub(crate) const INTERNAL_STORAGE: &str = "glean_internal_info"; + +/// Snapshot metrics from the underlying database. +pub struct StorageManager; + +/// Labeled metrics are stored as `<metric id>/<label>`. +/// They need to go into a nested object in the final snapshot. +/// +/// We therefore extract the metric id and the label from the key and construct the new object or +/// add to it. +fn snapshot_labeled_metrics( + snapshot: &mut HashMap<String, HashMap<String, JsonValue>>, + metric_id: &str, + metric: &Metric, +) { + let ping_section = format!("labeled_{}", metric.ping_section()); + let map = snapshot.entry(ping_section).or_insert_with(HashMap::new); + + // Safe unwrap, the function is only called when the id does contain a '/' + let (metric_id, label) = metric_id.split_once('/').unwrap(); + + let obj = map.entry(metric_id.into()).or_insert_with(|| json!({})); + let obj = obj.as_object_mut().unwrap(); // safe unwrap, we constructed the object above + obj.insert(label.into(), metric.as_json()); +} + +impl StorageManager { + /// Snapshots the given store and optionally clear it. + /// + /// # Arguments + /// + /// * `storage` - the database to read from. + /// * `store_name` - the store to snapshot. + /// * `clear_store` - whether to clear the data after snapshotting. + /// + /// # Returns + /// + /// The stored data in a string encoded as JSON. + /// If no data for the store exists, `None` is returned. + pub fn snapshot( + &self, + storage: &Database, + store_name: &str, + clear_store: bool, + ) -> Option<String> { + self.snapshot_as_json(storage, store_name, clear_store) + .map(|data| ::serde_json::to_string_pretty(&data).unwrap()) + } + + /// Snapshots the given store and optionally clear it. + /// + /// # Arguments + /// + /// * `storage` - the database to read from. + /// * `store_name` - the store to snapshot. + /// * `clear_store` - whether to clear the data after snapshotting. + /// + /// # Returns + /// + /// A JSON representation of the stored data. + /// If no data for the store exists, `None` is returned. + pub fn snapshot_as_json( + &self, + storage: &Database, + store_name: &str, + clear_store: bool, + ) -> Option<JsonValue> { + let mut snapshot: HashMap<String, HashMap<String, JsonValue>> = HashMap::new(); + + let mut snapshotter = |metric_id: &[u8], metric: &Metric| { + let metric_id = String::from_utf8_lossy(metric_id).into_owned(); + if metric_id.contains('/') { + snapshot_labeled_metrics(&mut snapshot, &metric_id, metric); + } else { + let map = snapshot + .entry(metric.ping_section().into()) + .or_insert_with(HashMap::new); + map.insert(metric_id, metric.as_json()); + } + }; + + storage.iter_store_from(Lifetime::Ping, store_name, None, &mut snapshotter); + storage.iter_store_from(Lifetime::Application, store_name, None, &mut snapshotter); + storage.iter_store_from(Lifetime::User, store_name, None, &mut snapshotter); + + if clear_store { + if let Err(e) = storage.clear_ping_lifetime_storage(store_name) { + log::warn!("Failed to clear lifetime storage: {:?}", e); + } + } + + if snapshot.is_empty() { + None + } else { + Some(json!(snapshot)) + } + } + + /// Gets the current value of a single metric identified by name. + /// + /// # Arguments + /// + /// * `storage` - The database to get data from. + /// * `store_name` - The store name to look into. + /// * `metric_id` - The full metric identifier. + /// + /// # Returns + /// + /// The decoded metric or `None` if no data is found. + pub fn snapshot_metric( + &self, + storage: &Database, + store_name: &str, + metric_id: &str, + metric_lifetime: Lifetime, + ) -> Option<Metric> { + let mut snapshot: Option<Metric> = None; + + let mut snapshotter = |id: &[u8], metric: &Metric| { + let id = String::from_utf8_lossy(id).into_owned(); + if id == metric_id { + snapshot = Some(metric.clone()) + } + }; + + storage.iter_store_from(metric_lifetime, store_name, None, &mut snapshotter); + + snapshot + } + + /// Gets the current value of a single metric identified by name. + /// + /// Use this API, rather than `snapshot_metric` within the testing API, so + /// that the usage will be reported in coverage, if enabled. + /// + /// # Arguments + /// + /// * `storage` - The database to get data from. + /// * `store_name` - The store name to look into. + /// * `metric_id` - The full metric identifier. + /// + /// # Returns + /// + /// The decoded metric or `None` if no data is found. + pub fn snapshot_metric_for_test( + &self, + storage: &Database, + store_name: &str, + metric_id: &str, + metric_lifetime: Lifetime, + ) -> Option<Metric> { + record_coverage(metric_id); + self.snapshot_metric(storage, store_name, metric_id, metric_lifetime) + } + + /// Snapshots the experiments. + /// + /// # Arguments + /// + /// * `storage` - The database to get data from. + /// * `store_name` - The store name to look into. + /// + /// # Returns + /// + /// A JSON representation of the experiment data, in the following format: + /// + /// ```json + /// { + /// "experiment-id": { + /// "branch": "branch-id", + /// "extra": { + /// "additional": "property", + /// // ... + /// } + /// } + /// } + /// ``` + /// + /// If no data for the store exists, `None` is returned. + pub fn snapshot_experiments_as_json( + &self, + storage: &Database, + store_name: &str, + ) -> Option<JsonValue> { + let mut snapshot: HashMap<String, JsonValue> = HashMap::new(); + + let mut snapshotter = |metric_id: &[u8], metric: &Metric| { + let metric_id = String::from_utf8_lossy(metric_id).into_owned(); + if metric_id.ends_with("#experiment") { + let (name, _) = metric_id.split_once('#').unwrap(); // safe unwrap, we ensured there's a `#` in the string + snapshot.insert(name.to_string(), metric.as_json()); + } + }; + + storage.iter_store_from(Lifetime::Application, store_name, None, &mut snapshotter); + + if snapshot.is_empty() { + None + } else { + Some(json!(snapshot)) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::metrics::ExperimentMetric; + use crate::Glean; + + // Experiment's API tests: the next test comes from glean-ac's + // ExperimentsStorageEngineTest.kt. + #[test] + fn test_experiments_json_serialization() { + let t = tempfile::tempdir().unwrap(); + let name = t.path().display().to_string(); + let glean = Glean::with_options(&name, "org.mozilla.glean", true); + + let extra: HashMap<String, String> = [("test-key".into(), "test-value".into())] + .iter() + .cloned() + .collect(); + + let metric = ExperimentMetric::new(&glean, "some-experiment".to_string()); + + metric.set_active_sync(&glean, "test-branch".to_string(), extra); + let snapshot = StorageManager + .snapshot_experiments_as_json(glean.storage(), "glean_internal_info") + .unwrap(); + assert_eq!( + json!({"some-experiment": {"branch": "test-branch", "extra": {"test-key": "test-value"}}}), + snapshot + ); + + metric.set_inactive_sync(&glean); + + let empty_snapshot = + StorageManager.snapshot_experiments_as_json(glean.storage(), "glean_internal_info"); + assert!(empty_snapshot.is_none()); + } + + #[test] + fn test_experiments_json_serialization_empty() { + let t = tempfile::tempdir().unwrap(); + let name = t.path().display().to_string(); + let glean = Glean::with_options(&name, "org.mozilla.glean", true); + + let metric = ExperimentMetric::new(&glean, "some-experiment".to_string()); + + metric.set_active_sync(&glean, "test-branch".to_string(), HashMap::new()); + let snapshot = StorageManager + .snapshot_experiments_as_json(glean.storage(), "glean_internal_info") + .unwrap(); + assert_eq!( + json!({"some-experiment": {"branch": "test-branch"}}), + snapshot + ); + + metric.set_inactive_sync(&glean); + + let empty_snapshot = + StorageManager.snapshot_experiments_as_json(glean.storage(), "glean_internal_info"); + assert!(empty_snapshot.is_none()); + } +} diff --git a/third_party/rust/glean-core/src/system.rs b/third_party/rust/glean-core/src/system.rs new file mode 100644 index 0000000000..eac309cf0b --- /dev/null +++ b/third_party/rust/glean-core/src/system.rs @@ -0,0 +1,82 @@ +// Copyright (c) 2017 The Rust Project Developers +// Copyright (c) 2018-2020 The Rust Secure Code Working Group +// Licensed under the MIT License. +// Original license: +// https://github.com/rustsec/rustsec/blob/2a080f173ad9d8ac7fa260f0a3a6aebf0000de06/platforms/LICENSE-MIT +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +//! Detect and expose `target_os` as a constant. +//! +//! Code adopted from the "platforms" crate: <https://github.com/rustsec/rustsec/tree/2a080f173ad9d8ac7fa260f0a3a6aebf0000de06/platforms>. + +#[cfg(target_os = "android")] +/// `target_os` when building this crate: `android` +pub const OS: &str = "Android"; + +#[cfg(target_os = "ios")] +/// `target_os` when building this crate: `ios` +pub const OS: &str = "iOS"; + +#[cfg(target_os = "linux")] +/// `target_os` when building this crate: `linux` +pub const OS: &str = "Linux"; + +#[cfg(target_os = "macos")] +/// `target_os` when building this crate: `macos` +pub const OS: &str = "Darwin"; + +#[cfg(target_os = "windows")] +/// `target_os` when building this crate: `windows` +pub const OS: &str = "Windows"; + +#[cfg(target_os = "freebsd")] +/// `target_os` when building this crate: `freebsd` +pub const OS: &str = "FreeBSD"; + +#[cfg(target_os = "netbsd")] +/// `target_os` when building this crate: `netbsd` +pub const OS: &str = "NetBSD"; + +#[cfg(target_os = "openbsd")] +/// `target_os` when building this crate: `openbsd` +pub const OS: &str = "OpenBSD"; + +#[cfg(target_os = "solaris")] +/// `target_os` when building this crate: `solaris` +pub const OS: &str = "Solaris"; + +#[cfg(not(any( + target_os = "android", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "windows", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + target_os = "solaris", +)))] +pub const OS: &str = "Unknown"; diff --git a/third_party/rust/glean-core/src/traits/boolean.rs b/third_party/rust/glean-core/src/traits/boolean.rs new file mode 100644 index 0000000000..3c82ebe810 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/boolean.rs @@ -0,0 +1,43 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the [`BooleanMetric`](crate::metrics::BooleanMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Boolean { + /// Sets to the specified boolean value. + /// + /// # Arguments + /// + /// * `value` - the value to set. + fn set(&self, value: bool); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as a boolean. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>(&self, ping_name: S) -> Option<bool>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/counter.rs b/third_party/rust/glean-core/src/traits/counter.rs new file mode 100644 index 0000000000..4c95b60612 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/counter.rs @@ -0,0 +1,47 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the [`CounterMetric`](crate::metrics::CounterMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Counter { + /// Increases the counter by `amount`. + /// + /// # Arguments + /// + /// * `amount` - The amount to increase by. Should be positive. + /// + /// ## Notes + /// + /// Logs an error if the `amount` is 0 or negative. + fn add(&self, amount: i32); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>(&self, ping_name: S) -> Option<i32>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/custom_distribution.rs b/third_party/rust/glean-core/src/traits/custom_distribution.rs new file mode 100644 index 0000000000..c0c80c028b --- /dev/null +++ b/third_party/rust/glean-core/src/traits/custom_distribution.rs @@ -0,0 +1,58 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the +/// [`CustomDistributionMetric`](crate::metrics::CustomDistributionMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait CustomDistribution { + /// Accumulates the provided signed samples in the metric. + /// + /// This is required so that the platform-specific code can provide us with + /// 64 bit signed integers if no `u64` comparable type is available. This + /// will take care of filtering and reporting errors for any provided negative + /// sample. + /// + /// # Arguments + /// + /// - `samples` - The vector holding the samples to be recorded by the metric. + /// + /// ## Notes + /// + /// Discards any negative value in `samples` and report an + /// [`ErrorType::InvalidValue`](crate::ErrorType::InvalidValue) for each of + /// them. + fn accumulate_samples_signed(&self, samples: Vec<i64>); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored histogram. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>( + &self, + ping_name: S, + ) -> Option<crate::metrics::DistributionData>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// + /// # Returns + /// + /// The number of errors recorded. + fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/datetime.rs b/third_party/rust/glean-core/src/traits/datetime.rs new file mode 100644 index 0000000000..2f932a57e0 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/datetime.rs @@ -0,0 +1,52 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +#![allow(clippy::too_many_arguments)] + +use crate::ErrorType; + +/// A description for the [`DatetimeMetric`](crate::metrics::DatetimeMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Datetime { + /// Sets the metric to a date/time which including the timezone offset. + /// + /// # Arguments + /// + /// * `value` - Some [`Datetime`](crate::metrics::Datetime), with offset, to + /// set the metric to. If [`None`], the current local time is + /// used. + fn set(&self, value: Option<crate::metrics::Datetime>); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as a Datetime. + /// + /// The precision of this value is truncated to the `time_unit` precision. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>( + &self, + ping_name: S, + ) -> Option<crate::metrics::Datetime>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/event.rs b/third_party/rust/glean-core/src/traits/event.rs new file mode 100644 index 0000000000..aa84699b30 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/event.rs @@ -0,0 +1,118 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::HashMap; +use std::convert::TryFrom; +use std::hash::Hash; + +use crate::event_database::RecordedEvent; +use crate::ErrorType; + +/// Extra keys for events. +/// +/// Extra keys need to be pre-defined and map to a string representation. +/// +/// For user-defined `EventMetric`s these will be defined as `struct`s. +/// Each extra key will be a field in that struct. +/// Each field will correspond to an entry in the `ALLOWED_KEYS` list. +/// The Glean SDK requires the keys as strings for submission in pings, +/// whereas in code we want to provide users a type to work with +/// (e.g. to avoid typos or misuse of the API). +pub trait ExtraKeys { + /// List of allowed extra keys as strings. + const ALLOWED_KEYS: &'static [&'static str]; + + /// Convert the event extras into 2 lists: + /// + /// 1. The list of extra key indices. + /// Unset keys will be skipped. + /// 2. The list of extra values. + fn into_ffi_extra(self) -> HashMap<String, String>; +} + +/// Default of no extra keys for events. +/// +/// An enum with no values for convenient use as the default set of extra keys +/// that an [`EventMetric`](crate::metrics::EventMetric) can accept. +/// +/// *Note*: There exist no values for this enum, it can never exist. +/// It its equivalent to the [`never / !` type](https://doc.rust-lang.org/std/primitive.never.html). +#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] +pub enum NoExtraKeys {} + +impl ExtraKeys for NoExtraKeys { + const ALLOWED_KEYS: &'static [&'static str] = &[]; + + fn into_ffi_extra(self) -> HashMap<String, String> { + unimplemented!("non-existing extra keys can't be turned into a list") + } +} + +/// The possible errors when parsing to an extra key. +pub enum EventRecordingError { + /// The id doesn't correspond to a valid extra key + InvalidId, + /// The value doesn't correspond to a valid extra key + InvalidExtraKey, +} + +impl TryFrom<i32> for NoExtraKeys { + type Error = EventRecordingError; + + fn try_from(_value: i32) -> Result<Self, Self::Error> { + Err(EventRecordingError::InvalidExtraKey) + } +} + +impl TryFrom<&str> for NoExtraKeys { + type Error = EventRecordingError; + + fn try_from(_value: &str) -> Result<Self, Self::Error> { + Err(EventRecordingError::InvalidExtraKey) + } +} + +/// A description for the [`EventMetric`](crate::metrics::EventMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Event { + /// The type of the allowed extra keys for this event. + type Extra: ExtraKeys; + + /// Records an event. + /// + /// # Arguments + /// + /// * `extra` - (optional) An object for the extra keys. + fn record<M: Into<Option<Self::Extra>>>(&self, extra: M); + + /// **Exported for test purposes.** + /// + /// Get the vector of currently stored events for this event metric. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>( + &self, + ping_name: S, + ) -> Option<Vec<RecordedEvent>>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/labeled.rs b/third_party/rust/glean-core/src/traits/labeled.rs new file mode 100644 index 0000000000..2979ee2ee9 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/labeled.rs @@ -0,0 +1,40 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the [`LabeledMetric`](crate::metrics::LabeledMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Labeled<T> +where + T: Clone, +{ + /// Gets a specific metric for a given label. + /// + /// If a set of acceptable labels were specified in the `metrics.yaml` file, + /// and the given label is not in the set, it will be recorded under the special `OTHER_LABEL` label. + /// + /// If a set of acceptable labels was not specified in the `metrics.yaml` file, + /// only the first 16 unique labels will be used. + /// After that, any additional labels will be recorded under the special `OTHER_LABEL` label. + /// + /// Labels must be `snake_case` and less than 30 characters. + /// If an invalid label is used, the metric will be recorded in the special `OTHER_LABEL` label. + fn get(&self, label: &str) -> T; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/memory_distribution.rs b/third_party/rust/glean-core/src/traits/memory_distribution.rs new file mode 100644 index 0000000000..637b11ea14 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/memory_distribution.rs @@ -0,0 +1,54 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::metrics::DistributionData; +use crate::ErrorType; + +/// A description for the +/// [`MemoryDistributionMetric`](crate::metrics::MemoryDistributionMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait MemoryDistribution { + /// Accumulates the provided sample in the metric. + /// + /// # Arguments + /// + /// * `sample` - The sample to be recorded by the metric. The sample is assumed to be in the + /// configured memory unit of the metric. + /// + /// ## Notes + /// + /// Values bigger than 1 Terabyte (2<sup>40</sup> bytes) are truncated + /// and an `ErrorType::InvalidValue` error is recorded. + fn accumulate(&self, sample: u64); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as a DistributionData of the serialized value. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>( + &self, + ping_name: S, + ) -> Option<DistributionData>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// + /// # Returns + /// + /// The number of errors recorded. + fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/mod.rs b/third_party/rust/glean-core/src/traits/mod.rs new file mode 100644 index 0000000000..c4bcf7cdd6 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/mod.rs @@ -0,0 +1,50 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! API definitions for the different metric types supported by the Glean SDK. +//! +//! Individual metric types implement this trait to expose the specific metrics API. +//! It can be used by wrapping implementations to guarantee API conformance. + +mod boolean; +mod counter; +mod custom_distribution; +mod datetime; +mod event; +mod labeled; +mod memory_distribution; +mod numerator; +mod ping; +mod quantity; +mod rate; +mod string; +mod string_list; +mod text; +mod timespan; +mod timing_distribution; +mod url; +mod uuid; + +pub use self::boolean::Boolean; +pub use self::counter::Counter; +pub use self::custom_distribution::CustomDistribution; +pub use self::datetime::Datetime; +pub use self::event::Event; +pub use self::event::EventRecordingError; +pub use self::event::ExtraKeys; +pub use self::event::NoExtraKeys; +pub use self::labeled::Labeled; +pub use self::memory_distribution::MemoryDistribution; +pub use self::numerator::Numerator; +pub use self::ping::Ping; +pub use self::quantity::Quantity; +pub use self::rate::Rate; +pub use self::string::String; +pub use self::string_list::StringList; +pub use self::text::Text; +pub use self::timespan::Timespan; +pub use self::timing_distribution::TimingDistribution; +pub use self::url::Url; +pub use self::uuid::Uuid; +pub use crate::histogram::HistogramType; diff --git a/third_party/rust/glean-core/src/traits/numerator.rs b/third_party/rust/glean-core/src/traits/numerator.rs new file mode 100644 index 0000000000..e1de59b386 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/numerator.rs @@ -0,0 +1,47 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; +use crate::Rate; + +// When changing this trait, ensure all operations are implemented in the +// related type in `../metrics`. (Except test_get_num_errors) +/// A description for the `NumeratorMetric` subtype of the [`RateMetric`](crate::metrics::RateMetric) type. +pub trait Numerator { + /// Increases the numerator by `amount`. + /// + /// # Arguments + /// + /// * `amount` - The amount to increase by. Should be non-negative. + /// + /// ## Notes + /// + /// Logs an error if the `amount` is negative. + fn add_to_numerator(&self, amount: i32); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as a pair of integers. + /// + /// # Arguments + /// + /// * `ping_name` - the optional name of the ping to retrieve the metric + /// for. Defaults to the first value in `send_in_pings`. + /// + /// This doesn't clear the stored value. + fn test_get_value<'a, S: Into<Option<&'a str>>>(&self, ping_name: S) -> Option<Rate>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/ping.rs b/third_party/rust/glean-core/src/traits/ping.rs new file mode 100644 index 0000000000..e94b3e72e7 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/ping.rs @@ -0,0 +1,17 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +/// A description for the [`PingType`](crate::metrics::PingType) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Ping { + /// Submits the ping for eventual uploading + /// + /// # Arguments + /// + /// * `reason` - the reason the ping was triggered. Included in the + /// `ping_info.reason` part of the payload. + fn submit(&self, reason: Option<&str>); +} diff --git a/third_party/rust/glean-core/src/traits/quantity.rs b/third_party/rust/glean-core/src/traits/quantity.rs new file mode 100644 index 0000000000..53d01bc246 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/quantity.rs @@ -0,0 +1,47 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the [`QuantityMetric`](crate::metrics::QuantityMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Quantity { + /// Sets the value. Must be non-negative. + /// + /// # Arguments + /// + /// * `value` - The value. Must be non-negative. + /// + /// ## Notes + /// + /// Logs an error if the `value` is negative. + fn set(&self, value: i64); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>(&self, ping_name: S) -> Option<i64>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/rate.rs b/third_party/rust/glean-core/src/traits/rate.rs new file mode 100644 index 0000000000..92517928ae --- /dev/null +++ b/third_party/rust/glean-core/src/traits/rate.rs @@ -0,0 +1,57 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +// When changing this trait, ensure all operations are implemented in the +// related type in `../metrics`. (Except test_get_num_errors) +/// A description for the [`RateMetric`](crate::metrics::RateMetric) type. +pub trait Rate { + /// Increases the numerator by `amount`. + /// + /// # Arguments + /// + /// * `amount` - The amount to increase by. Should be non-negative. + /// + /// ## Notes + /// + /// Logs an error if the `amount` is negative. + fn add_to_numerator(&self, amount: i32); + + /// Increases the denominator by `amount`. + /// + /// # Arguments + /// + /// * `amount` - The amount to increase by. Should be non-negative. + /// + /// ## Notes + /// + /// Logs an error if the `amount` is negative. + fn add_to_denominator(&self, amount: i32); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as a pair of integers. + /// + /// # Arguments + /// + /// * `ping_name` - the optional name of the ping to retrieve the metric + /// for. Defaults to the first value in `send_in_pings`. + /// + /// This doesn't clear the stored value. + fn test_get_value<'a, S: Into<Option<&'a str>>>(&self, ping_name: S) -> Option<crate::Rate>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/string.rs b/third_party/rust/glean-core/src/traits/string.rs new file mode 100644 index 0000000000..28d8e6835c --- /dev/null +++ b/third_party/rust/glean-core/src/traits/string.rs @@ -0,0 +1,50 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the [`StringMetric`](crate::metrics::StringMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait String { + /// Sets to the specified value. + /// + /// # Arguments + /// + /// * `value` - The string to set the metric to. + /// + /// ## Notes + /// + /// Truncates the value if it is longer than `MAX_LENGTH_VALUE` bytes and logs an error. + fn set<S: Into<std::string::String>>(&self, value: S); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as a string. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>( + &self, + ping_name: S, + ) -> Option<std::string::String>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/string_list.rs b/third_party/rust/glean-core/src/traits/string_list.rs new file mode 100644 index 0000000000..5dcf27a979 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/string_list.rs @@ -0,0 +1,60 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the [`StringListMetric`](crate::metrics::StringListMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait StringList { + /// Adds a new string to the list. + /// + /// # Arguments + /// + /// * `value` - The string to add. + /// + /// ## Notes + /// + /// Truncates the value if it is longer than `MAX_STRING_LENGTH` bytes and logs an error. + fn add<S: Into<String>>(&self, value: S); + + /// Sets to a specific list of strings. + /// + /// # Arguments + /// + /// * `value` - The list of string to set the metric to. + /// + /// ## Notes + /// + /// If passed an empty list, records an error and returns. + /// Truncates the list if it is longer than `MAX_LIST_LENGTH` and logs an error. + /// Truncates any value in the list if it is longer than `MAX_STRING_LENGTH` and logs an error. + fn set(&self, value: Vec<String>); + + /// **Exported for test purposes.** + /// + /// Gets the currently-stored values. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>(&self, ping_name: S) -> Option<Vec<String>>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// + /// # Returns + /// + /// The number of errors recorded. + fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/text.rs b/third_party/rust/glean-core/src/traits/text.rs new file mode 100644 index 0000000000..d8955092ab --- /dev/null +++ b/third_party/rust/glean-core/src/traits/text.rs @@ -0,0 +1,50 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the [`TextMetric`](crate::metrics::TextMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Text { + /// Sets to the specified value. + /// + /// # Arguments + /// + /// * `value` - The string to set the metric to. + /// + /// ## Notes + /// + /// Truncates the value if it is longer than `MAX_LENGTH_VALUE` bytes and logs an error. + fn set<S: Into<std::string::String>>(&self, value: S); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as a string. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>( + &self, + ping_name: S, + ) -> Option<std::string::String>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/timespan.rs b/third_party/rust/glean-core/src/traits/timespan.rs new file mode 100644 index 0000000000..d72e4cc4c0 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/timespan.rs @@ -0,0 +1,67 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; +use std::time::Duration; + +/// A description for the [`TimespanMetric`](crate::metrics::TimespanMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Timespan { + /// Starts tracking time for the provided metric. + /// + /// This uses an internal monotonic timer. + /// + /// This records an error if it's already tracking time (i.e. + /// [`start`](Timespan::start) was already called with no corresponding + /// [`stop`](Timespan::stop)): in that case the original start time will be + /// preserved. + fn start(&self); + + /// Stops tracking time for the provided metric. Sets the metric to the elapsed time. + /// + /// This will record an error if no [`start`](Timespan::start) was called. + fn stop(&self); + + /// Aborts a previous [`start`](Timespan::start) call. No error is recorded + /// if no [`start`](Timespan::start) was called. + fn cancel(&self); + + /// Explicitly sets the timespan value. + /// + /// This API should only be used if your library or application requires recording + /// spans of time in a way that cannot make use of + /// [`start`](Timespan::start)/[`stop`](Timespan::stop)/[`cancel`](Timespan::cancel). + /// + /// # Arguments + /// + /// * `elapsed` - The elapsed time to record. + fn set_raw(&self, elapsed: Duration); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>(&self, ping_name: S) -> Option<u64>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/timing_distribution.rs b/third_party/rust/glean-core/src/traits/timing_distribution.rs new file mode 100644 index 0000000000..650a433e3d --- /dev/null +++ b/third_party/rust/glean-core/src/traits/timing_distribution.rs @@ -0,0 +1,73 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::metrics::DistributionData; +use crate::metrics::TimerId; +use crate::ErrorType; + +/// A description for the [`TimingDistributionMetric`](crate::metrics::TimingDistributionMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait TimingDistribution { + /// Start tracking time for the provided metric. + /// Multiple timers can run simultaneously. + /// + /// # Returns + /// + /// A unique [`TimerId`] for the new timer. + fn start(&self) -> TimerId; + + /// Stops tracking time for the provided metric and associated timer id. + /// + /// Adds a count to the corresponding bucket in the timing distribution. + /// This will record an error if no [`start`](TimingDistribution::start) was + /// called. + /// + /// # Arguments + /// + /// * `id` - The [`TimerId`] to associate with this timing. This allows + /// for concurrent timing of events associated with different ids to the + /// same timespan metric. + fn stop_and_accumulate(&self, id: TimerId); + + /// Aborts a previous [`start`](TimingDistribution::start) call. No + /// error is recorded if no [`start`](TimingDistribution::start) was + /// called. + /// + /// # Arguments + /// + /// * `id` - The [`TimerId`] to associate with this timing. This allows + /// for concurrent timing of events associated with different ids to the + /// same timing distribution metric. + fn cancel(&self, id: TimerId); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value of the metric. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>( + &self, + ping_name: S, + ) -> Option<DistributionData>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// + /// # Returns + /// + /// The number of errors recorded. + fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/url.rs b/third_party/rust/glean-core/src/traits/url.rs new file mode 100644 index 0000000000..9848ea7845 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/url.rs @@ -0,0 +1,51 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the [`UrlMetric`](crate::metrics::UrlMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Url { + /// Sets to the specified stringified URL. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + /// * `value` - The stringified URL to set the metric to. + /// + /// ## Notes + /// + /// Truncates the value if it is longer than `MAX_URL_LENGTH` bytes and logs an error. + fn set<S: Into<std::string::String>>(&self, value: S); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as a string. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>( + &self, + ping_name: S, + ) -> Option<std::string::String>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/uuid.rs b/third_party/rust/glean-core/src/traits/uuid.rs new file mode 100644 index 0000000000..1ba487d231 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/uuid.rs @@ -0,0 +1,46 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the [`UuidMetric`](crate::metrics::UuidMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Uuid { + /// Sets to the specified value. + /// + /// # Arguments + /// + /// * `value` - The [`Uuid`](uuid::Uuid) to set the metric to. + fn set(&self, value: uuid::Uuid); + + /// Generates a new random [`Uuid`](uuid::Uuid) and set the metric to it. + fn generate_and_set(&self) -> uuid::Uuid; + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as a string. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>(&self, ping_name: S) -> Option<uuid::Uuid>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32; +} diff --git a/third_party/rust/glean-core/src/upload/directory.rs b/third_party/rust/glean-core/src/upload/directory.rs new file mode 100644 index 0000000000..2bc3206569 --- /dev/null +++ b/third_party/rust/glean-core/src/upload/directory.rs @@ -0,0 +1,420 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Pings directory processing utilities. + +use std::cmp::Ordering; +use std::fs::{self, File}; +use std::io::{BufRead, BufReader}; +use std::path::{Path, PathBuf}; + +use serde::Deserialize; +use uuid::Uuid; + +use super::request::HeaderMap; +use crate::{DELETION_REQUEST_PINGS_DIRECTORY, PENDING_PINGS_DIRECTORY}; + +/// A representation of the data extracted from a ping file, +/// this will contain the document_id, path, JSON encoded body of a ping and the persisted headers. +pub type PingPayload = (String, String, String, Option<HeaderMap>); + +/// A struct to hold the result of scanning all pings directories. +#[derive(Clone, Debug, Default)] +pub struct PingPayloadsByDirectory { + pub pending_pings: Vec<(u64, PingPayload)>, + pub deletion_request_pings: Vec<(u64, PingPayload)>, +} + +impl PingPayloadsByDirectory { + /// Extends the data of this instance of PingPayloadsByDirectory + /// with the data from another instance of PingPayloadsByDirectory. + pub fn extend(&mut self, other: PingPayloadsByDirectory) { + self.pending_pings.extend(other.pending_pings); + self.deletion_request_pings + .extend(other.deletion_request_pings); + } + + // Get the sum of the number of deletion request and regular pending pings. + pub fn len(&self) -> usize { + self.pending_pings.len() + self.deletion_request_pings.len() + } +} + +/// Gets the file name from a path as a &str. +/// +/// # Panics +/// +/// Won't panic if not able to get file name. +fn get_file_name_as_str(path: &Path) -> Option<&str> { + match path.file_name() { + None => { + log::warn!("Error getting file name from path: {}", path.display()); + None + } + Some(file_name) => { + let file_name = file_name.to_str(); + if file_name.is_none() { + log::warn!("File name is not valid unicode: {}", path.display()); + } + file_name + } + } +} + +/// Processes a ping's metadata. +/// +/// The metadata is an optional third line in the ping file, +/// currently it contains only additonal headers to be added to each ping request. +/// Therefore, we will process the contents of this line +/// and return a HeaderMap of the persisted headers. +fn process_metadata(path: &str, metadata: &str) -> Option<HeaderMap> { + #[derive(Deserialize)] + struct PingMetadata { + pub headers: HeaderMap, + } + + if let Ok(metadata) = serde_json::from_str::<PingMetadata>(metadata) { + return Some(metadata.headers); + } else { + log::warn!("Error while parsing ping metadata: {}", path); + } + None +} + +/// Manages the pings directories. +#[derive(Debug, Clone)] +pub struct PingDirectoryManager { + /// Path to the pending pings directory. + pending_pings_dir: PathBuf, + /// Path to the deletion-request pings directory. + deletion_request_pings_dir: PathBuf, +} + +impl PingDirectoryManager { + /// Creates a new directory manager. + /// + /// # Arguments + /// + /// * `data_path` - Path to the pending pings directory. + pub fn new<P: Into<PathBuf>>(data_path: P) -> Self { + let data_path = data_path.into(); + Self { + pending_pings_dir: data_path.join(PENDING_PINGS_DIRECTORY), + deletion_request_pings_dir: data_path.join(DELETION_REQUEST_PINGS_DIRECTORY), + } + } + + /// Attempts to delete a ping file. + /// + /// # Arguments + /// + /// * `uuid` - The UUID of the ping file to be deleted + /// + /// # Returns + /// + /// Whether the file was successfully deleted. + /// + /// # Panics + /// + /// Won't panic if unable to delete the file. + pub fn delete_file(&self, uuid: &str) -> bool { + let path = match self.get_file_path(uuid) { + Some(path) => path, + None => { + log::warn!("Cannot find ping file to delete {}", uuid); + return false; + } + }; + + match fs::remove_file(&path) { + Err(e) => { + log::warn!("Error deleting file {}. {}", path.display(), e); + return false; + } + _ => log::info!("File was deleted {}", path.display()), + }; + + true + } + + /// Reads a ping file and returns the data from it. + /// + /// If the file is not properly formatted, it will be deleted and `None` will be returned. + /// + /// # Arguments + /// + /// * `document_id` - The UUID of the ping file to be processed + pub fn process_file(&self, document_id: &str) -> Option<PingPayload> { + let path = match self.get_file_path(document_id) { + Some(path) => path, + None => { + log::warn!("Cannot find ping file to process {}", document_id); + return None; + } + }; + let file = match File::open(&path) { + Ok(file) => file, + Err(e) => { + log::warn!("Error reading ping file {}. {}", path.display(), e); + return None; + } + }; + + log::info!("Processing ping at: {}", path.display()); + + // The way the ping file is structured: + // first line should always have the path, + // second line should have the body with the ping contents in JSON format + // and third line might contain ping metadata e.g. additional headers. + let mut lines = BufReader::new(file).lines(); + if let (Some(Ok(path)), Some(Ok(body)), Ok(metadata)) = + (lines.next(), lines.next(), lines.next().transpose()) + { + let headers = metadata.and_then(|m| process_metadata(&path, &m)); + return Some((document_id.into(), path, body, headers)); + } else { + log::warn!( + "Error processing ping file: {}. Ping file is not formatted as expected.", + document_id + ); + } + self.delete_file(document_id); + None + } + + /// Processes both ping directories. + pub fn process_dirs(&self) -> PingPayloadsByDirectory { + PingPayloadsByDirectory { + pending_pings: self.process_dir(&self.pending_pings_dir), + deletion_request_pings: self.process_dir(&self.deletion_request_pings_dir), + } + } + + /// Processes one of the pings directory and return a vector with the ping data + /// corresponding to each valid ping file in the directory. + /// This vector will be ordered by file `modified_date`. + /// + /// Any files that don't match the UUID regex will be deleted + /// to prevent files from polluting the pings directory. + /// + /// # Returns + /// + /// A vector of tuples with the file size and payload of each ping file in the directory. + fn process_dir(&self, dir: &Path) -> Vec<(u64, PingPayload)> { + log::trace!("Processing persisted pings."); + + let entries = match dir.read_dir() { + Ok(entries) => entries, + Err(_) => { + // This may error simply because the directory doesn't exist, + // which is expected if no pings were stored yet. + return Vec::new(); + } + }; + + let mut pending_pings: Vec<_> = entries + .filter_map(|entry| entry.ok()) + .filter_map(|entry| { + let path = entry.path(); + if let Some(file_name) = get_file_name_as_str(&path) { + // Delete file if it doesn't match the pattern. + if Uuid::parse_str(file_name).is_err() { + log::warn!("Pattern mismatch. Deleting {}", path.display()); + self.delete_file(file_name); + return None; + } + if let Some(data) = self.process_file(file_name) { + let metadata = match fs::metadata(&path) { + Ok(metadata) => metadata, + Err(e) => { + // There's a rare case where this races against a parallel deletion + // of all pending ping files. + // This could therefore fail, in which case we don't care about the + // result and can ignore the ping, it's already been deleted. + log::warn!( + "Unable to read metadata for file: {}, error: {:?}", + path.display(), + e + ); + return None; + } + }; + return Some((metadata, data)); + } + }; + None + }) + .collect(); + + // This will sort the pings by date in ascending order (oldest -> newest). + pending_pings.sort_by(|(a, _), (b, _)| { + // We might not be able to get the modified date for a given file, + // in which case we just put it at the end. + if let (Ok(a), Ok(b)) = (a.modified(), b.modified()) { + a.cmp(&b) + } else { + Ordering::Less + } + }); + + pending_pings + .into_iter() + .map(|(metadata, data)| (metadata.len(), data)) + .collect() + } + + /// Gets the path for a ping file based on its document_id. + /// + /// Will look for files in each ping directory until something is found. + /// If nothing is found, returns `None`. + fn get_file_path(&self, document_id: &str) -> Option<PathBuf> { + for dir in [&self.pending_pings_dir, &self.deletion_request_pings_dir].iter() { + let path = dir.join(document_id); + if path.exists() { + return Some(path); + } + } + None + } +} + +#[cfg(test)] +mod test { + use std::fs::File; + + use super::*; + use crate::metrics::PingType; + use crate::tests::new_glean; + + #[test] + fn doesnt_panic_if_no_pending_pings_directory() { + let dir = tempfile::tempdir().unwrap(); + let directory_manager = PingDirectoryManager::new(dir.path()); + + // Verify that processing the directory didn't panic + let data = directory_manager.process_dirs(); + assert_eq!(data.pending_pings.len(), 0); + assert_eq!(data.deletion_request_pings.len(), 0); + } + + #[test] + fn gets_correct_data_from_valid_ping_file() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit the ping to populate the pending_pings directory + ping_type.submit_sync(&glean, None); + + let directory_manager = PingDirectoryManager::new(dir.path()); + + // Try and process the pings directories + let data = directory_manager.process_dirs(); + + // Verify there is just the one request + assert_eq!(data.pending_pings.len(), 1); + assert_eq!(data.deletion_request_pings.len(), 0); + + // Verify request was returned for the "test" ping + let ping = &data.pending_pings[0].1; + let request_ping_type = ping.1.split('/').nth(3).unwrap(); + assert_eq!(request_ping_type, "test"); + } + + #[test] + fn non_uuid_files_are_deleted_and_ignored() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit the ping to populate the pending_pings directory + ping_type.submit_sync(&glean, None); + + let directory_manager = PingDirectoryManager::new(dir.path()); + + let not_uuid_path = dir + .path() + .join(PENDING_PINGS_DIRECTORY) + .join("not-uuid-file-name.txt"); + File::create(¬_uuid_path).unwrap(); + + // Try and process the pings directories + let data = directory_manager.process_dirs(); + + // Verify there is just the one request + assert_eq!(data.pending_pings.len(), 1); + assert_eq!(data.deletion_request_pings.len(), 0); + + // Verify request was returned for the "test" ping + let ping = &data.pending_pings[0].1; + let request_ping_type = ping.1.split('/').nth(3).unwrap(); + assert_eq!(request_ping_type, "test"); + + // Verify that file was indeed deleted + assert!(!not_uuid_path.exists()); + } + + #[test] + fn wrongly_formatted_files_are_deleted_and_ignored() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit the ping to populate the pending_pings directory + ping_type.submit_sync(&glean, None); + + let directory_manager = PingDirectoryManager::new(dir.path()); + + let wrong_contents_file_path = dir + .path() + .join(PENDING_PINGS_DIRECTORY) + .join(Uuid::new_v4().to_string()); + File::create(&wrong_contents_file_path).unwrap(); + + // Try and process the pings directories + let data = directory_manager.process_dirs(); + + // Verify there is just the one request + assert_eq!(data.pending_pings.len(), 1); + assert_eq!(data.deletion_request_pings.len(), 0); + + // Verify request was returned for the "test" ping + let ping = &data.pending_pings[0].1; + let request_ping_type = ping.1.split('/').nth(3).unwrap(); + assert_eq!(request_ping_type, "test"); + + // Verify that file was indeed deleted + assert!(!wrong_contents_file_path.exists()); + } + + #[test] + fn takes_deletion_request_pings_into_account_while_processing() { + let (glean, dir) = new_glean(None); + + // Submit a deletion request ping to populate deletion request folder. + glean + .internal_pings + .deletion_request + .submit_sync(&glean, None); + + let directory_manager = PingDirectoryManager::new(dir.path()); + + // Try and process the pings directories + let data = directory_manager.process_dirs(); + + assert_eq!(data.pending_pings.len(), 0); + assert_eq!(data.deletion_request_pings.len(), 1); + + // Verify request was returned for the "deletion-request" ping + let ping = &data.deletion_request_pings[0].1; + let request_ping_type = ping.1.split('/').nth(3).unwrap(); + assert_eq!(request_ping_type, "deletion-request"); + } +} diff --git a/third_party/rust/glean-core/src/upload/mod.rs b/third_party/rust/glean-core/src/upload/mod.rs new file mode 100644 index 0000000000..5c6c0b4f0f --- /dev/null +++ b/third_party/rust/glean-core/src/upload/mod.rs @@ -0,0 +1,1683 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Manages the pending pings queue and directory. +//! +//! * Keeps track of pending pings, loading any unsent ping from disk on startup; +//! * Exposes [`get_upload_task`](PingUploadManager::get_upload_task) API for +//! the platform layer to request next upload task; +//! * Exposes +//! [`process_ping_upload_response`](PingUploadManager::process_ping_upload_response) +//! API to check the HTTP response from the ping upload and either delete the +//! corresponding ping from disk or re-enqueue it for sending. + +use std::collections::HashMap; +use std::collections::VecDeque; +use std::convert::TryInto; +use std::path::PathBuf; +use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; +use std::sync::{Arc, RwLock, RwLockWriteGuard}; +use std::thread; +use std::time::{Duration, Instant}; + +use chrono::Utc; + +use crate::error::ErrorKind; +use crate::TimerId; +use crate::{internal_metrics::UploadMetrics, Glean}; +use directory::{PingDirectoryManager, PingPayloadsByDirectory}; +use policy::Policy; +use request::create_date_header_value; + +pub use request::{HeaderMap, PingRequest}; +pub use result::{UploadResult, UploadTaskAction}; + +mod directory; +mod policy; +mod request; +mod result; + +const WAIT_TIME_FOR_PING_PROCESSING: u64 = 1000; // in milliseconds + +#[derive(Debug)] +struct RateLimiter { + /// The instant the current interval has started. + started: Option<Instant>, + /// The count for the current interval. + count: u32, + /// The duration of each interval. + interval: Duration, + /// The maximum count per interval. + max_count: u32, +} + +/// An enum to represent the current state of the RateLimiter. +#[derive(PartialEq)] +enum RateLimiterState { + /// The RateLimiter has not reached the maximum count and is still incrementing. + Incrementing, + /// The RateLimiter has reached the maximum count for the current interval. + /// + /// This variant contains the remaining time (in milliseconds) + /// until the rate limiter is not throttled anymore. + Throttled(u64), +} + +impl RateLimiter { + pub fn new(interval: Duration, max_count: u32) -> Self { + Self { + started: None, + count: 0, + interval, + max_count, + } + } + + fn reset(&mut self) { + self.started = Some(Instant::now()); + self.count = 0; + } + + fn elapsed(&self) -> Duration { + self.started.unwrap().elapsed() + } + + // The counter should reset if + // + // 1. It has never started; + // 2. It has been started more than the interval time ago; + // 3. Something goes wrong while trying to calculate the elapsed time since the last reset. + fn should_reset(&self) -> bool { + if self.started.is_none() { + return true; + } + + // Safe unwrap, we already stated that `self.started` is not `None` above. + if self.elapsed() > self.interval { + return true; + } + + false + } + + /// Tries to increment the internal counter. + /// + /// # Returns + /// + /// The current state of the RateLimiter. + pub fn get_state(&mut self) -> RateLimiterState { + if self.should_reset() { + self.reset(); + } + + if self.count == self.max_count { + // Note that `remining` can't be a negative number because we just called `reset`, + // which will check if it is and reset if so. + let remaining = self.interval.as_millis() - self.elapsed().as_millis(); + return RateLimiterState::Throttled( + remaining + .try_into() + .unwrap_or(self.interval.as_secs() * 1000), + ); + } + + self.count += 1; + RateLimiterState::Incrementing + } +} + +/// An enum representing the possible upload tasks to be performed by an uploader. +/// +/// When asking for the next ping request to upload, +/// the requester may receive one out of three possible tasks. +#[derive(PartialEq, Eq, Debug)] +pub enum PingUploadTask { + /// An upload task + Upload { + /// The ping request for upload + /// See [`PingRequest`](struct.PingRequest.html) for more information. + request: PingRequest, + }, + + /// A flag signaling that the pending pings directories are not done being processed, + /// thus the requester should wait and come back later. + Wait { + /// The time in milliseconds + /// the requester should wait before requesting a new task. + time: u64, + }, + + /// A flag signaling that requester doesn't need to request any more upload tasks at this moment. + /// + /// There are three possibilities for this scenario: + /// * Pending pings queue is empty, no more pings to request; + /// * Requester has gotten more than MAX_WAIT_ATTEMPTS (3, by default) `PingUploadTask::Wait` responses in a row; + /// * Requester has reported more than MAX_RECOVERABLE_FAILURES_PER_UPLOADING_WINDOW + /// recoverable upload failures on the same uploading window (see below) + /// and should stop requesting at this moment. + /// + /// An "uploading window" starts when a requester gets a new + /// `PingUploadTask::Upload(PingRequest)` response and finishes when they + /// finally get a `PingUploadTask::Done` or `PingUploadTask::Wait` response. + Done { + #[doc(hidden)] + /// Unused field. Required because UniFFI can't handle variants without fields. + unused: i8, + }, +} + +impl PingUploadTask { + /// Whether the current task is an upload task. + pub fn is_upload(&self) -> bool { + matches!(self, PingUploadTask::Upload { .. }) + } + + /// Whether the current task is wait task. + pub fn is_wait(&self) -> bool { + matches!(self, PingUploadTask::Wait { .. }) + } + + pub(crate) fn done() -> Self { + PingUploadTask::Done { unused: 0 } + } +} + +/// Manages the pending pings queue and directory. +#[derive(Debug)] +pub struct PingUploadManager { + /// A FIFO queue storing a `PingRequest` for each pending ping. + queue: RwLock<VecDeque<PingRequest>>, + /// A manager for the pending pings directories. + directory_manager: PingDirectoryManager, + /// A flag signaling if we are done processing the pending pings directories. + processed_pending_pings: Arc<AtomicBool>, + /// A vector to store the pending pings processed off-thread. + cached_pings: Arc<RwLock<PingPayloadsByDirectory>>, + /// The number of upload failures for the current uploading window. + recoverable_failure_count: AtomicU32, + /// The number or times in a row a user has received a `PingUploadTask::Wait` response. + wait_attempt_count: AtomicU32, + /// A ping counter to help rate limit the ping uploads. + /// + /// To keep resource usage in check, + /// we may want to limit the amount of pings sent in a given interval. + rate_limiter: Option<RwLock<RateLimiter>>, + /// The name of the programming language used by the binding creating this instance of PingUploadManager. + /// + /// This will be used to build the value User-Agent header for each ping request. + language_binding_name: String, + /// Metrics related to ping uploading. + upload_metrics: UploadMetrics, + /// Policies for ping storage, uploading and requests. + policy: Policy, + + in_flight: RwLock<HashMap<String, (TimerId, TimerId)>>, +} + +impl PingUploadManager { + /// Creates a new PingUploadManager. + /// + /// # Arguments + /// + /// * `data_path` - Path to the pending pings directory. + /// * `language_binding_name` - The name of the language binding calling this managers instance. + /// + /// # Panics + /// + /// Will panic if unable to spawn a new thread. + pub fn new<P: Into<PathBuf>>(data_path: P, language_binding_name: &str) -> Self { + Self { + queue: RwLock::new(VecDeque::new()), + directory_manager: PingDirectoryManager::new(data_path), + processed_pending_pings: Arc::new(AtomicBool::new(false)), + cached_pings: Arc::new(RwLock::new(PingPayloadsByDirectory::default())), + recoverable_failure_count: AtomicU32::new(0), + wait_attempt_count: AtomicU32::new(0), + rate_limiter: None, + language_binding_name: language_binding_name.into(), + upload_metrics: UploadMetrics::new(), + policy: Policy::default(), + in_flight: RwLock::new(HashMap::default()), + } + } + + /// Spawns a new thread and processes the pending pings directories, + /// filling up the queue with whatever pings are in there. + /// + /// # Returns + /// + /// The `JoinHandle` to the spawned thread + pub fn scan_pending_pings_directories(&self) -> std::thread::JoinHandle<()> { + let local_manager = self.directory_manager.clone(); + let local_cached_pings = self.cached_pings.clone(); + let local_flag = self.processed_pending_pings.clone(); + thread::Builder::new() + .name("glean.ping_directory_manager.process_dir".to_string()) + .spawn(move || { + let mut local_cached_pings = local_cached_pings + .write() + .expect("Can't write to pending pings cache."); + local_cached_pings.extend(local_manager.process_dirs()); + local_flag.store(true, Ordering::SeqCst); + }) + .expect("Unable to spawn thread to process pings directories.") + } + + /// Creates a new upload manager with no limitations, for tests. + #[cfg(test)] + pub fn no_policy<P: Into<PathBuf>>(data_path: P) -> Self { + let mut upload_manager = Self::new(data_path, "Test"); + + // Disable all policies for tests, if necessary individuals tests can re-enable them. + upload_manager.policy.set_max_recoverable_failures(None); + upload_manager.policy.set_max_wait_attempts(None); + upload_manager.policy.set_max_ping_body_size(None); + upload_manager + .policy + .set_max_pending_pings_directory_size(None); + upload_manager.policy.set_max_pending_pings_count(None); + + // When building for tests, always scan the pending pings directories and do it sync. + upload_manager + .scan_pending_pings_directories() + .join() + .unwrap(); + + upload_manager + } + + fn processed_pending_pings(&self) -> bool { + self.processed_pending_pings.load(Ordering::SeqCst) + } + + fn recoverable_failure_count(&self) -> u32 { + self.recoverable_failure_count.load(Ordering::SeqCst) + } + + fn wait_attempt_count(&self) -> u32 { + self.wait_attempt_count.load(Ordering::SeqCst) + } + + /// Attempts to build a ping request from a ping file payload. + /// + /// Returns the `PingRequest` or `None` if unable to build, + /// in which case it will delete the ping file and record an error. + fn build_ping_request( + &self, + glean: &Glean, + document_id: &str, + path: &str, + body: &str, + headers: Option<HeaderMap>, + ) -> Option<PingRequest> { + let mut request = PingRequest::builder( + &self.language_binding_name, + self.policy.max_ping_body_size(), + ) + .document_id(document_id) + .path(path) + .body(body); + + if let Some(headers) = headers { + request = request.headers(headers); + } + + match request.build() { + Ok(request) => Some(request), + Err(e) => { + log::warn!("Error trying to build ping request: {}", e); + self.directory_manager.delete_file(document_id); + + // Record the error. + // Currently the only possible error is PingBodyOverflow. + if let ErrorKind::PingBodyOverflow(s) = e.kind() { + self.upload_metrics + .discarded_exceeding_pings_size + .accumulate_sync(glean, *s as i64 / 1024); + } + + None + } + } + } + + /// Enqueue a ping for upload. + pub fn enqueue_ping( + &self, + glean: &Glean, + document_id: &str, + path: &str, + body: &str, + headers: Option<HeaderMap>, + ) { + let mut queue = self + .queue + .write() + .expect("Can't write to pending pings queue."); + + // Checks if a ping with this `document_id` is already enqueued. + if queue + .iter() + .any(|request| request.document_id == document_id) + { + log::warn!( + "Attempted to enqueue a duplicate ping {} at {}.", + document_id, + path + ); + return; + } + + { + let in_flight = self.in_flight.read().unwrap(); + if in_flight.contains_key(document_id) { + log::warn!( + "Attempted to enqueue an in-flight ping {} at {}.", + document_id, + path + ); + self.upload_metrics + .in_flight_pings_dropped + .add_sync(glean, 0); + return; + } + } + + log::trace!("Enqueuing ping {} at {}", document_id, path); + if let Some(request) = self.build_ping_request(glean, document_id, path, body, headers) { + queue.push_back(request) + } + } + + /// Enqueues pings that might have been cached. + /// + /// The size of the PENDING_PINGS_DIRECTORY directory will be calculated + /// (by accumulating each ping's size in that directory) + /// and in case we exceed the quota, defined by the `quota` arg, + /// outstanding pings get deleted and are not enqueued. + /// + /// The size of the DELETION_REQUEST_PINGS_DIRECTORY will not be calculated + /// and no deletion-request pings will be deleted. Deletion request pings + /// are not very common and usually don't contain any data, + /// we don't expect that directory to ever reach quota. + /// Most importantly, we don't want to ever delete deletion-request pings. + /// + /// # Arguments + /// + /// * `glean` - The Glean object holding the database. + fn enqueue_cached_pings(&self, glean: &Glean) { + let mut cached_pings = self + .cached_pings + .write() + .expect("Can't write to pending pings cache."); + + if cached_pings.len() > 0 { + let mut pending_pings_directory_size: u64 = 0; + let mut pending_pings_count = 0; + let mut deleting = false; + + let total = cached_pings.pending_pings.len() as u64; + self.upload_metrics + .pending_pings + .add_sync(glean, total.try_into().unwrap_or(0)); + + if total > self.policy.max_pending_pings_count() { + log::warn!( + "More than {} pending pings in the directory, will delete {} old pings.", + self.policy.max_pending_pings_count(), + total - self.policy.max_pending_pings_count() + ); + } + + // The pending pings vector is sorted by date in ascending order (oldest -> newest). + // We need to calculate the size of the pending pings directory + // and delete the **oldest** pings in case quota is reached. + // Thus, we reverse the order of the pending pings vector, + // so that we iterate in descending order (newest -> oldest). + cached_pings.pending_pings.reverse(); + cached_pings.pending_pings.retain(|(file_size, (document_id, _, _, _))| { + pending_pings_count += 1; + pending_pings_directory_size += file_size; + + // We don't want to spam the log for every ping over the quota. + if !deleting && pending_pings_directory_size > self.policy.max_pending_pings_directory_size() { + log::warn!( + "Pending pings directory has reached the size quota of {} bytes, outstanding pings will be deleted.", + self.policy.max_pending_pings_directory_size() + ); + deleting = true; + } + + // Once we reach the number of allowed pings we start deleting, + // no matter what size. + // We already log this before the loop. + if pending_pings_count > self.policy.max_pending_pings_count() { + deleting = true; + } + + if deleting && self.directory_manager.delete_file(document_id) { + self.upload_metrics + .deleted_pings_after_quota_hit + .add_sync(glean, 1); + return false; + } + + true + }); + // After calculating the size of the pending pings directory, + // we record the calculated number and reverse the pings array back for enqueueing. + cached_pings.pending_pings.reverse(); + self.upload_metrics + .pending_pings_directory_size + .accumulate_sync(glean, pending_pings_directory_size as i64 / 1024); + + // Enqueue the remaining pending pings and + // enqueue all deletion-request pings. + let deletion_request_pings = cached_pings.deletion_request_pings.drain(..); + for (_, (document_id, path, body, headers)) in deletion_request_pings { + self.enqueue_ping(glean, &document_id, &path, &body, headers); + } + let pending_pings = cached_pings.pending_pings.drain(..); + for (_, (document_id, path, body, headers)) in pending_pings { + self.enqueue_ping(glean, &document_id, &path, &body, headers); + } + } + } + + /// Adds rate limiting capability to this upload manager. + /// + /// The rate limiter will limit the amount of calls to `get_upload_task` per interval. + /// + /// Setting this will restart count and timer in case there was a previous rate limiter set + /// (e.g. if we have reached the current limit and call this function, we start counting again + /// and the caller is allowed to asks for tasks). + /// + /// # Arguments + /// + /// * `interval` - the amount of seconds in each rate limiting window. + /// * `max_tasks` - the maximum amount of task requests allowed per interval. + pub fn set_rate_limiter(&mut self, interval: u64, max_tasks: u32) { + self.rate_limiter = Some(RwLock::new(RateLimiter::new( + Duration::from_secs(interval), + max_tasks, + ))); + } + + /// Reads a ping file, creates a `PingRequest` and adds it to the queue. + /// + /// Duplicate requests won't be added. + /// + /// # Arguments + /// + /// * `glean` - The Glean object holding the database. + /// * `document_id` - The UUID of the ping in question. + pub fn enqueue_ping_from_file(&self, glean: &Glean, document_id: &str) { + if let Some((doc_id, path, body, headers)) = + self.directory_manager.process_file(document_id) + { + self.enqueue_ping(glean, &doc_id, &path, &body, headers) + } + } + + /// Clears the pending pings queue, leaves the deletion-request pings. + pub fn clear_ping_queue(&self) -> RwLockWriteGuard<'_, VecDeque<PingRequest>> { + log::trace!("Clearing ping queue"); + let mut queue = self + .queue + .write() + .expect("Can't write to pending pings queue."); + + queue.retain(|ping| ping.is_deletion_request()); + log::trace!( + "{} pings left in the queue (only deletion-request expected)", + queue.len() + ); + queue + } + + fn get_upload_task_internal(&self, glean: &Glean, log_ping: bool) -> PingUploadTask { + // Helper to decide whether to return PingUploadTask::Wait or PingUploadTask::Done. + // + // We want to limit the amount of PingUploadTask::Wait returned in a row, + // in case we reach MAX_WAIT_ATTEMPTS we want to actually return PingUploadTask::Done. + let wait_or_done = |time: u64| { + self.wait_attempt_count.fetch_add(1, Ordering::SeqCst); + if self.wait_attempt_count() > self.policy.max_wait_attempts() { + PingUploadTask::done() + } else { + PingUploadTask::Wait { time } + } + }; + + if !self.processed_pending_pings() { + log::info!( + "Tried getting an upload task, but processing is ongoing. Will come back later." + ); + return wait_or_done(WAIT_TIME_FOR_PING_PROCESSING); + } + + // This is a no-op in case there are no cached pings. + self.enqueue_cached_pings(glean); + + if self.recoverable_failure_count() >= self.policy.max_recoverable_failures() { + log::warn!( + "Reached maximum recoverable failures for the current uploading window. You are done." + ); + return PingUploadTask::done(); + } + + let mut queue = self + .queue + .write() + .expect("Can't write to pending pings queue."); + match queue.front() { + Some(request) => { + if let Some(rate_limiter) = &self.rate_limiter { + let mut rate_limiter = rate_limiter + .write() + .expect("Can't write to the rate limiter."); + if let RateLimiterState::Throttled(remaining) = rate_limiter.get_state() { + log::info!( + "Tried getting an upload task, but we are throttled at the moment." + ); + return wait_or_done(remaining); + } + } + + log::info!( + "New upload task with id {} (path: {})", + request.document_id, + request.path + ); + + if log_ping { + if let Some(body) = request.pretty_body() { + chunked_log_info(&request.path, &body); + } else { + chunked_log_info(&request.path, "<invalid ping payload>"); + } + } + + { + // Synchronous timer starts. + // We're in the uploader thread anyway. + // But also: No data is stored on disk. + let mut in_flight = self.in_flight.write().unwrap(); + let success_id = self.upload_metrics.send_success.start_sync(); + let failure_id = self.upload_metrics.send_failure.start_sync(); + in_flight.insert(request.document_id.clone(), (success_id, failure_id)); + } + + let mut request = queue.pop_front().unwrap(); + + // Adding the `Date` header just before actual upload happens. + request + .headers + .insert("Date".to_string(), create_date_header_value(Utc::now())); + + PingUploadTask::Upload { request } + } + None => { + log::info!("No more pings to upload! You are done."); + PingUploadTask::done() + } + } + } + + /// Gets the next `PingUploadTask`. + /// + /// # Arguments + /// + /// * `glean` - The Glean object holding the database. + /// * `log_ping` - Whether to log the ping before returning. + /// + /// # Returns + /// + /// The next [`PingUploadTask`](enum.PingUploadTask.html). + pub fn get_upload_task(&self, glean: &Glean, log_ping: bool) -> PingUploadTask { + let task = self.get_upload_task_internal(glean, log_ping); + + if !task.is_wait() && self.wait_attempt_count() > 0 { + self.wait_attempt_count.store(0, Ordering::SeqCst); + } + + if !task.is_upload() && self.recoverable_failure_count() > 0 { + self.recoverable_failure_count.store(0, Ordering::SeqCst); + } + + task + } + + /// Processes the response from an attempt to upload a ping. + /// + /// Based on the HTTP status of said response, + /// the possible outcomes are: + /// + /// * **200 - 299 Success** + /// Any status on the 2XX range is considered a succesful upload, + /// which means the corresponding ping file can be deleted. + /// _Known 2XX status:_ + /// * 200 - OK. Request accepted into the pipeline. + /// + /// * **400 - 499 Unrecoverable error** + /// Any status on the 4XX range means something our client did is not correct. + /// It is unlikely that the client is going to recover from this by retrying, + /// so in this case the corresponding ping file can also be deleted. + /// _Known 4XX status:_ + /// * 404 - not found - POST/PUT to an unknown namespace + /// * 405 - wrong request type (anything other than POST/PUT) + /// * 411 - missing content-length header + /// * 413 - request body too large Note that if we have badly-behaved clients that + /// retry on 4XX, we should send back 202 on body/path too long). + /// * 414 - request path too long (See above) + /// + /// * **Any other error** + /// For any other error, a warning is logged and the ping is re-enqueued. + /// _Known other errors:_ + /// * 500 - internal error + /// + /// # Note + /// + /// The disk I/O performed by this function is not done off-thread, + /// as it is expected to be called off-thread by the platform. + /// + /// # Arguments + /// + /// * `glean` - The Glean object holding the database. + /// * `document_id` - The UUID of the ping in question. + /// * `status` - The HTTP status of the response. + pub fn process_ping_upload_response( + &self, + glean: &Glean, + document_id: &str, + status: UploadResult, + ) -> UploadTaskAction { + use UploadResult::*; + + let stop_time = time::precise_time_ns(); + + if let Some(label) = status.get_label() { + let metric = self.upload_metrics.ping_upload_failure.get(label); + metric.add_sync(glean, 1); + } + + let send_ids = { + let mut lock = self.in_flight.write().unwrap(); + lock.remove(document_id) + }; + + if send_ids.is_none() { + self.upload_metrics.missing_send_ids.add_sync(glean, 1); + } + + match status { + HttpStatus { code } if (200..=299).contains(&code) => { + log::info!("Ping {} successfully sent {}.", document_id, code); + if let Some((success_id, failure_id)) = send_ids { + self.upload_metrics + .send_success + .set_stop_and_accumulate(glean, success_id, stop_time); + self.upload_metrics.send_failure.cancel_sync(failure_id); + } + self.directory_manager.delete_file(document_id); + } + + UnrecoverableFailure { .. } | HttpStatus { code: 400..=499 } => { + log::warn!( + "Unrecoverable upload failure while attempting to send ping {}. Error was {:?}", + document_id, + status + ); + if let Some((success_id, failure_id)) = send_ids { + self.upload_metrics.send_success.cancel_sync(success_id); + self.upload_metrics + .send_failure + .set_stop_and_accumulate(glean, failure_id, stop_time); + } + self.directory_manager.delete_file(document_id); + } + + RecoverableFailure { .. } | HttpStatus { .. } => { + log::warn!( + "Recoverable upload failure while attempting to send ping {}, will retry. Error was {:?}", + document_id, + status + ); + if let Some((success_id, failure_id)) = send_ids { + self.upload_metrics.send_success.cancel_sync(success_id); + self.upload_metrics + .send_failure + .set_stop_and_accumulate(glean, failure_id, stop_time); + } + self.enqueue_ping_from_file(glean, document_id); + self.recoverable_failure_count + .fetch_add(1, Ordering::SeqCst); + } + + Done { .. } => { + log::debug!("Uploader signaled Done. Exiting."); + if let Some((success_id, failure_id)) = send_ids { + self.upload_metrics.send_success.cancel_sync(success_id); + self.upload_metrics.send_failure.cancel_sync(failure_id); + } + return UploadTaskAction::End; + } + }; + + UploadTaskAction::Next + } +} + +/// Splits log message into chunks on Android. +#[cfg(target_os = "android")] +pub fn chunked_log_info(path: &str, payload: &str) { + // Since the logcat ring buffer size is configurable, but it's 'max payload' size is not, + // we must break apart long pings into chunks no larger than the max payload size of 4076b. + // We leave some head space for our prefix. + const MAX_LOG_PAYLOAD_SIZE_BYTES: usize = 4000; + + // If the length of the ping will fit within one logcat payload, then we can + // short-circuit here and avoid some overhead, otherwise we must split up the + // message so that we don't truncate it. + if path.len() + payload.len() <= MAX_LOG_PAYLOAD_SIZE_BYTES { + log::info!("Glean ping to URL: {}\n{}", path, payload); + return; + } + + // Otherwise we break it apart into chunks of smaller size, + // prefixing it with the path and a counter. + let mut start = 0; + let mut end = MAX_LOG_PAYLOAD_SIZE_BYTES; + let mut chunk_idx = 1; + // Might be off by 1 on edge cases, but do we really care? + let total_chunks = payload.len() / MAX_LOG_PAYLOAD_SIZE_BYTES + 1; + + while end < payload.len() { + // Find char boundary from the end. + // It's UTF-8, so it is within 4 bytes from here. + for _ in 0..4 { + if payload.is_char_boundary(end) { + break; + } + end -= 1; + } + + log::info!( + "Glean ping to URL: {} [Part {} of {}]\n{}", + path, + chunk_idx, + total_chunks, + &payload[start..end] + ); + + // Move on with the string + start = end; + end = end + MAX_LOG_PAYLOAD_SIZE_BYTES; + chunk_idx += 1; + } + + // Print any suffix left + if start < payload.len() { + log::info!( + "Glean ping to URL: {} [Part {} of {}]\n{}", + path, + chunk_idx, + total_chunks, + &payload[start..] + ); + } +} + +/// Logs payload in one go (all other OS). +#[cfg(not(target_os = "android"))] +pub fn chunked_log_info(_path: &str, payload: &str) { + log::info!("{}", payload) +} + +#[cfg(test)] +mod test { + use std::thread; + use std::time::Duration; + + use uuid::Uuid; + + use super::*; + use crate::metrics::PingType; + use crate::{tests::new_glean, PENDING_PINGS_DIRECTORY}; + + const PATH: &str = "/submit/app_id/ping_name/schema_version/doc_id"; + + #[test] + fn doesnt_error_when_there_are_no_pending_pings() { + let (glean, _t) = new_glean(None); + + // Try and get the next request. + // Verify request was not returned + assert_eq!(glean.get_upload_task(), PingUploadTask::done()); + } + + #[test] + fn returns_ping_request_when_there_is_one() { + let (glean, dir) = new_glean(None); + + let upload_manager = PingUploadManager::no_policy(dir.path()); + + // Enqueue a ping + upload_manager.enqueue_ping(&glean, &Uuid::new_v4().to_string(), PATH, "", None); + + // Try and get the next request. + // Verify request was returned + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_upload()); + } + + #[test] + fn returns_as_many_ping_requests_as_there_are() { + let (glean, dir) = new_glean(None); + + let upload_manager = PingUploadManager::no_policy(dir.path()); + + // Enqueue a ping multiple times + let n = 10; + for _ in 0..n { + upload_manager.enqueue_ping(&glean, &Uuid::new_v4().to_string(), PATH, "", None); + } + + // Verify a request is returned for each submitted ping + for _ in 0..n { + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_upload()); + } + + // Verify that after all requests are returned, none are left + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::done() + ); + } + + #[test] + fn limits_the_number_of_pings_when_there_is_rate_limiting() { + let (glean, dir) = new_glean(None); + + let mut upload_manager = PingUploadManager::no_policy(dir.path()); + + // Add a rate limiter to the upload mangager with max of 10 pings every 3 seconds. + let max_pings_per_interval = 10; + upload_manager.set_rate_limiter(3, 10); + + // Enqueue the max number of pings allowed per uploading window + for _ in 0..max_pings_per_interval { + upload_manager.enqueue_ping(&glean, &Uuid::new_v4().to_string(), PATH, "", None); + } + + // Verify a request is returned for each submitted ping + for _ in 0..max_pings_per_interval { + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_upload()); + } + + // Enqueue just one more ping + upload_manager.enqueue_ping(&glean, &Uuid::new_v4().to_string(), PATH, "", None); + + // Verify that we are indeed told to wait because we are at capacity + match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Wait { time } => { + // Wait for the uploading window to reset + thread::sleep(Duration::from_millis(time)); + } + _ => panic!("Expected upload manager to return a wait task!"), + }; + + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_upload()); + } + + #[test] + fn clearing_the_queue_works_correctly() { + let (glean, dir) = new_glean(None); + + let upload_manager = PingUploadManager::no_policy(dir.path()); + + // Enqueue a ping multiple times + for _ in 0..10 { + upload_manager.enqueue_ping(&glean, &Uuid::new_v4().to_string(), PATH, "", None); + } + + // Clear the queue + drop(upload_manager.clear_ping_queue()); + + // Verify there really isn't any ping in the queue + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::done() + ); + } + + #[test] + fn clearing_the_queue_doesnt_clear_deletion_request_pings() { + let (mut glean, _t) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit the ping multiple times + let n = 10; + for _ in 0..n { + ping_type.submit_sync(&glean, None); + } + + glean + .internal_pings + .deletion_request + .submit_sync(&glean, None); + + // Clear the queue + drop(glean.upload_manager.clear_ping_queue()); + + let upload_task = glean.get_upload_task(); + match upload_task { + PingUploadTask::Upload { request } => assert!(request.is_deletion_request()), + _ => panic!("Expected upload manager to return the next request!"), + } + + // Verify there really isn't any other pings in the queue + assert_eq!(glean.get_upload_task(), PingUploadTask::done()); + } + + #[test] + fn fills_up_queue_successfully_from_disk() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit the ping multiple times + let n = 10; + for _ in 0..n { + ping_type.submit_sync(&glean, None); + } + + // Create a new upload manager pointing to the same data_path as the glean instance. + let upload_manager = PingUploadManager::no_policy(dir.path()); + + // Verify the requests were properly enqueued + for _ in 0..n { + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_upload()); + } + + // Verify that after all requests are returned, none are left + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::done() + ); + } + + #[test] + fn processes_correctly_success_upload_response() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit a ping + ping_type.submit_sync(&glean, None); + + // Get the pending ping directory path + let pending_pings_dir = dir.path().join(PENDING_PINGS_DIRECTORY); + + // Get the submitted PingRequest + match glean.get_upload_task() { + PingUploadTask::Upload { request } => { + // Simulate the processing of a sucessfull request + let document_id = request.document_id; + glean.process_ping_upload_response(&document_id, UploadResult::http_status(200)); + // Verify file was deleted + assert!(!pending_pings_dir.join(document_id).exists()); + } + _ => panic!("Expected upload manager to return the next request!"), + } + + // Verify that after request is returned, none are left + assert_eq!(glean.get_upload_task(), PingUploadTask::done()); + } + + #[test] + fn processes_correctly_client_error_upload_response() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit a ping + ping_type.submit_sync(&glean, None); + + // Get the pending ping directory path + let pending_pings_dir = dir.path().join(PENDING_PINGS_DIRECTORY); + + // Get the submitted PingRequest + match glean.get_upload_task() { + PingUploadTask::Upload { request } => { + // Simulate the processing of a client error + let document_id = request.document_id; + glean.process_ping_upload_response(&document_id, UploadResult::http_status(404)); + // Verify file was deleted + assert!(!pending_pings_dir.join(document_id).exists()); + } + _ => panic!("Expected upload manager to return the next request!"), + } + + // Verify that after request is returned, none are left + assert_eq!(glean.get_upload_task(), PingUploadTask::done()); + } + + #[test] + fn processes_correctly_server_error_upload_response() { + let (mut glean, _t) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit a ping + ping_type.submit_sync(&glean, None); + + // Get the submitted PingRequest + match glean.get_upload_task() { + PingUploadTask::Upload { request } => { + // Simulate the processing of a client error + let document_id = request.document_id; + glean.process_ping_upload_response(&document_id, UploadResult::http_status(500)); + // Verify this ping was indeed re-enqueued + match glean.get_upload_task() { + PingUploadTask::Upload { request } => { + assert_eq!(document_id, request.document_id); + } + _ => panic!("Expected upload manager to return the next request!"), + } + } + _ => panic!("Expected upload manager to return the next request!"), + } + + // Verify that after request is returned, none are left + assert_eq!(glean.get_upload_task(), PingUploadTask::done()); + } + + #[test] + fn processes_correctly_unrecoverable_upload_response() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit a ping + ping_type.submit_sync(&glean, None); + + // Get the pending ping directory path + let pending_pings_dir = dir.path().join(PENDING_PINGS_DIRECTORY); + + // Get the submitted PingRequest + match glean.get_upload_task() { + PingUploadTask::Upload { request } => { + // Simulate the processing of a client error + let document_id = request.document_id; + glean.process_ping_upload_response( + &document_id, + UploadResult::unrecoverable_failure(), + ); + // Verify file was deleted + assert!(!pending_pings_dir.join(document_id).exists()); + } + _ => panic!("Expected upload manager to return the next request!"), + } + + // Verify that after request is returned, none are left + assert_eq!(glean.get_upload_task(), PingUploadTask::done()); + } + + #[test] + fn new_pings_are_added_while_upload_in_progress() { + let (glean, dir) = new_glean(None); + + let upload_manager = PingUploadManager::no_policy(dir.path()); + + let doc1 = Uuid::new_v4().to_string(); + let path1 = format!("/submit/app_id/test-ping/1/{}", doc1); + + let doc2 = Uuid::new_v4().to_string(); + let path2 = format!("/submit/app_id/test-ping/1/{}", doc2); + + // Enqueue a ping + upload_manager.enqueue_ping(&glean, &doc1, &path1, "", None); + + // Try and get the first request. + let req = match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Upload { request } => request, + _ => panic!("Expected upload manager to return the next request!"), + }; + assert_eq!(doc1, req.document_id); + + // Schedule the next one while the first one is "in progress" + upload_manager.enqueue_ping(&glean, &doc2, &path2, "", None); + + // Mark as processed + upload_manager.process_ping_upload_response( + &glean, + &req.document_id, + UploadResult::http_status(200), + ); + + // Get the second request. + let req = match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Upload { request } => request, + _ => panic!("Expected upload manager to return the next request!"), + }; + assert_eq!(doc2, req.document_id); + + // Mark as processed + upload_manager.process_ping_upload_response( + &glean, + &req.document_id, + UploadResult::http_status(200), + ); + + // ... and then we're done. + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::done() + ); + } + + #[test] + fn adds_debug_view_header_to_requests_when_tag_is_set() { + let (mut glean, _t) = new_glean(None); + + glean.set_debug_view_tag("valid-tag"); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit a ping + ping_type.submit_sync(&glean, None); + + // Get the submitted PingRequest + match glean.get_upload_task() { + PingUploadTask::Upload { request } => { + assert_eq!(request.headers.get("X-Debug-ID").unwrap(), "valid-tag") + } + _ => panic!("Expected upload manager to return the next request!"), + } + } + + #[test] + fn duplicates_are_not_enqueued() { + let (glean, dir) = new_glean(None); + + // Create a new upload manager so that we have access to its functions directly, + // make it synchronous so we don't have to manually wait for the scanning to finish. + let upload_manager = PingUploadManager::no_policy(dir.path()); + + let doc_id = Uuid::new_v4().to_string(); + let path = format!("/submit/app_id/test-ping/1/{}", doc_id); + + // Try to enqueue a ping with the same doc_id twice + upload_manager.enqueue_ping(&glean, &doc_id, &path, "", None); + upload_manager.enqueue_ping(&glean, &doc_id, &path, "", None); + + // Get a task once + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_upload()); + + // There should be no more queued tasks + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::done() + ); + } + + #[test] + fn maximum_of_recoverable_errors_is_enforced_for_uploading_window() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit the ping multiple times + let n = 5; + for _ in 0..n { + ping_type.submit_sync(&glean, None); + } + + let mut upload_manager = PingUploadManager::no_policy(dir.path()); + + // Set a policy for max recoverable failures, this is usually disabled for tests. + let max_recoverable_failures = 3; + upload_manager + .policy + .set_max_recoverable_failures(Some(max_recoverable_failures)); + + // Return the max recoverable error failures in a row + for _ in 0..max_recoverable_failures { + match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Upload { request } => { + upload_manager.process_ping_upload_response( + &glean, + &request.document_id, + UploadResult::recoverable_failure(), + ); + } + _ => panic!("Expected upload manager to return the next request!"), + } + } + + // Verify that after returning the max amount of recoverable failures, + // we are done even though we haven't gotten all the enqueued requests. + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::done() + ); + + // Verify all requests are returned when we try again. + for _ in 0..n { + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_upload()); + } + } + + #[test] + fn quota_is_enforced_when_enqueueing_cached_pings() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit the ping multiple times + let n = 10; + for _ in 0..n { + ping_type.submit_sync(&glean, None); + } + + let directory_manager = PingDirectoryManager::new(dir.path()); + let pending_pings = directory_manager.process_dirs().pending_pings; + // The pending pings array is sorted by date in ascending order, + // the newest element is the last one. + let (_, newest_ping) = &pending_pings.last().unwrap(); + let (newest_ping_id, _, _, _) = &newest_ping; + + // Create a new upload manager pointing to the same data_path as the glean instance. + let mut upload_manager = PingUploadManager::no_policy(dir.path()); + + // Set the quota to just a little over the size on an empty ping file. + // This way we can check that one ping is kept and all others are deleted. + // + // From manual testing I figured out an empty ping file is 324bytes, + // I am setting this a little over just so that minor changes to the ping structure + // don't immediatelly break this. + upload_manager + .policy + .set_max_pending_pings_directory_size(Some(500)); + + // Get a task once + // One ping should have been enqueued. + // Make sure it is the newest ping. + match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Upload { request } => assert_eq!(&request.document_id, newest_ping_id), + _ => panic!("Expected upload manager to return the next request!"), + } + + // Verify that no other requests were returned, + // they should all have been deleted because pending pings quota was hit. + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::done() + ); + + // Verify that the correct number of deleted pings was recorded + assert_eq!( + n - 1, + upload_manager + .upload_metrics + .deleted_pings_after_quota_hit + .get_value(&glean, Some("metrics")) + .unwrap() + ); + assert_eq!( + n, + upload_manager + .upload_metrics + .pending_pings + .get_value(&glean, Some("metrics")) + .unwrap() + ); + } + + #[test] + fn number_quota_is_enforced_when_enqueueing_cached_pings() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // How many pings we allow at maximum + let count_quota = 3; + // The number of pings we fill the pending pings directory with. + let n = 10; + + // Submit the ping multiple times + for _ in 0..n { + ping_type.submit_sync(&glean, None); + } + + let directory_manager = PingDirectoryManager::new(dir.path()); + let pending_pings = directory_manager.process_dirs().pending_pings; + // The pending pings array is sorted by date in ascending order, + // the newest element is the last one. + let expected_pings = pending_pings + .iter() + .rev() + .take(count_quota) + .map(|(_, ping)| ping.0.clone()) + .collect::<Vec<_>>(); + + // Create a new upload manager pointing to the same data_path as the glean instance. + let mut upload_manager = PingUploadManager::no_policy(dir.path()); + + upload_manager + .policy + .set_max_pending_pings_count(Some(count_quota as u64)); + + // Get a task once + // One ping should have been enqueued. + // Make sure it is the newest ping. + for ping_id in expected_pings.iter().rev() { + match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Upload { request } => assert_eq!(&request.document_id, ping_id), + _ => panic!("Expected upload manager to return the next request!"), + } + } + + // Verify that no other requests were returned, + // they should all have been deleted because pending pings quota was hit. + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::done() + ); + + // Verify that the correct number of deleted pings was recorded + assert_eq!( + (n - count_quota) as i32, + upload_manager + .upload_metrics + .deleted_pings_after_quota_hit + .get_value(&glean, Some("metrics")) + .unwrap() + ); + assert_eq!( + n as i32, + upload_manager + .upload_metrics + .pending_pings + .get_value(&glean, Some("metrics")) + .unwrap() + ); + } + + #[test] + fn size_and_count_quota_work_together_size_first() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + let expected_number_of_pings = 3; + // The number of pings we fill the pending pings directory with. + let n = 10; + + // Submit the ping multiple times + for _ in 0..n { + ping_type.submit_sync(&glean, None); + } + + let directory_manager = PingDirectoryManager::new(dir.path()); + let pending_pings = directory_manager.process_dirs().pending_pings; + // The pending pings array is sorted by date in ascending order, + // the newest element is the last one. + let expected_pings = pending_pings + .iter() + .rev() + .take(expected_number_of_pings) + .map(|(_, ping)| ping.0.clone()) + .collect::<Vec<_>>(); + + // Create a new upload manager pointing to the same data_path as the glean instance. + let mut upload_manager = PingUploadManager::no_policy(dir.path()); + + // From manual testing we figured out an empty ping file is 324bytes, + // so this allows 3 pings. + upload_manager + .policy + .set_max_pending_pings_directory_size(Some(1000)); + upload_manager.policy.set_max_pending_pings_count(Some(5)); + + // Get a task once + // One ping should have been enqueued. + // Make sure it is the newest ping. + for ping_id in expected_pings.iter().rev() { + match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Upload { request } => assert_eq!(&request.document_id, ping_id), + _ => panic!("Expected upload manager to return the next request!"), + } + } + + // Verify that no other requests were returned, + // they should all have been deleted because pending pings quota was hit. + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::done() + ); + + // Verify that the correct number of deleted pings was recorded + assert_eq!( + (n - expected_number_of_pings) as i32, + upload_manager + .upload_metrics + .deleted_pings_after_quota_hit + .get_value(&glean, Some("metrics")) + .unwrap() + ); + assert_eq!( + n as i32, + upload_manager + .upload_metrics + .pending_pings + .get_value(&glean, Some("metrics")) + .unwrap() + ); + } + + #[test] + fn size_and_count_quota_work_together_count_first() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + let expected_number_of_pings = 2; + // The number of pings we fill the pending pings directory with. + let n = 10; + + // Submit the ping multiple times + for _ in 0..n { + ping_type.submit_sync(&glean, None); + } + + let directory_manager = PingDirectoryManager::new(dir.path()); + let pending_pings = directory_manager.process_dirs().pending_pings; + // The pending pings array is sorted by date in ascending order, + // the newest element is the last one. + let expected_pings = pending_pings + .iter() + .rev() + .take(expected_number_of_pings) + .map(|(_, ping)| ping.0.clone()) + .collect::<Vec<_>>(); + + // Create a new upload manager pointing to the same data_path as the glean instance. + let mut upload_manager = PingUploadManager::no_policy(dir.path()); + + // From manual testing we figured out an empty ping file is 324bytes, + // so this allows 3 pings. + upload_manager + .policy + .set_max_pending_pings_directory_size(Some(1000)); + upload_manager.policy.set_max_pending_pings_count(Some(2)); + + // Get a task once + // One ping should have been enqueued. + // Make sure it is the newest ping. + for ping_id in expected_pings.iter().rev() { + match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Upload { request } => assert_eq!(&request.document_id, ping_id), + _ => panic!("Expected upload manager to return the next request!"), + } + } + + // Verify that no other requests were returned, + // they should all have been deleted because pending pings quota was hit. + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::done() + ); + + // Verify that the correct number of deleted pings was recorded + assert_eq!( + (n - expected_number_of_pings) as i32, + upload_manager + .upload_metrics + .deleted_pings_after_quota_hit + .get_value(&glean, Some("metrics")) + .unwrap() + ); + assert_eq!( + n as i32, + upload_manager + .upload_metrics + .pending_pings + .get_value(&glean, Some("metrics")) + .unwrap() + ); + } + + #[test] + fn maximum_wait_attemps_is_enforced() { + let (glean, dir) = new_glean(None); + + let mut upload_manager = PingUploadManager::no_policy(dir.path()); + + // Define a max_wait_attemps policy, this is disabled for tests by default. + let max_wait_attempts = 3; + upload_manager + .policy + .set_max_wait_attempts(Some(max_wait_attempts)); + + // Add a rate limiter to the upload mangager with max of 1 ping 5secs. + // + // We arbitrarily set the maximum pings per interval to a very low number, + // when the rate limiter reaches it's limit get_upload_task returns a PingUploadTask::Wait, + // which will allow us to test the limitations around returning too many of those in a row. + let secs_per_interval = 5; + let max_pings_per_interval = 1; + upload_manager.set_rate_limiter(secs_per_interval, max_pings_per_interval); + + // Enqueue two pings + upload_manager.enqueue_ping(&glean, &Uuid::new_v4().to_string(), PATH, "", None); + upload_manager.enqueue_ping(&glean, &Uuid::new_v4().to_string(), PATH, "", None); + + // Get the first ping, it should be returned normally. + match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Upload { .. } => {} + _ => panic!("Expected upload manager to return the next request!"), + } + + // Try to get the next ping, + // we should be throttled and thus get a PingUploadTask::Wait. + // Check that we are indeed allowed to get this response as many times as expected. + for _ in 0..max_wait_attempts { + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_wait()); + } + + // Check that after we get PingUploadTask::Wait the allowed number of times, + // we then get PingUploadTask::Done. + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::done() + ); + + // Wait for the rate limiter to allow upload tasks again. + thread::sleep(Duration::from_secs(secs_per_interval)); + + // Check that we are allowed again to get pings. + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_upload()); + + // And once we are done we don't need to wait anymore. + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::done() + ); + } + + #[test] + fn wait_task_contains_expected_wait_time_when_pending_pings_dir_not_processed_yet() { + let (glean, dir) = new_glean(None); + let upload_manager = PingUploadManager::new(dir.path(), "test"); + match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Wait { time } => { + assert_eq!(time, WAIT_TIME_FOR_PING_PROCESSING); + } + _ => panic!("Expected upload manager to return a wait task!"), + }; + } + + #[test] + fn cannot_enqueue_ping_while_its_being_processed() { + let (glean, dir) = new_glean(None); + + let upload_manager = PingUploadManager::no_policy(dir.path()); + + // Enqueue a ping and start processing it + let identifier = &Uuid::new_v4().to_string(); + upload_manager.enqueue_ping(&glean, identifier, PATH, "", None); + assert!(upload_manager.get_upload_task(&glean, false).is_upload()); + + // Attempt to re-enqueue the same ping + upload_manager.enqueue_ping(&glean, identifier, PATH, "", None); + + // No new pings should have been enqueued so the upload task is Done. + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::done() + ); + + // Process the upload response + upload_manager.process_ping_upload_response( + &glean, + identifier, + UploadResult::http_status(200), + ); + } +} diff --git a/third_party/rust/glean-core/src/upload/policy.rs b/third_party/rust/glean-core/src/upload/policy.rs new file mode 100644 index 0000000000..91467ebd82 --- /dev/null +++ b/third_party/rust/glean-core/src/upload/policy.rs @@ -0,0 +1,112 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Policies for ping storage, uploading and requests. + +const MAX_RECOVERABLE_FAILURES: u32 = 3; +const MAX_WAIT_ATTEMPTS: u32 = 3; +const MAX_PING_BODY_SIZE: usize = 1024 * 1024; // 1 MB +const MAX_PENDING_PINGS_DIRECTORY_SIZE: u64 = 10 * 1024 * 1024; // 10MB + +// The average number of baseline pings per client (on Fenix) is at 15 pings a day. +// The P99 value is ~110. +// With a maximum of (a nice round) 250 we can store about 2 days worth of pings. +// A baseline ping file averages about 600 bytes, so that's a total of just 144 kB we store. +// With the default rate limit of 15 pings per 60s it would take roughly 16 minutes to send out all pending +// pings. +const MAX_PENDING_PINGS_COUNT: u64 = 250; + +/// A struct holding the values for all the policies related to ping storage, uploading and requests. +#[derive(Debug)] +pub struct Policy { + /// The maximum recoverable failures allowed per uploading window. + /// + /// Limiting this is necessary to avoid infinite loops on requesting upload tasks. + max_recoverable_failures: Option<u32>, + /// The maximum of [`PingUploadTask::Wait`] responses a user may get in a row + /// when calling [`get_upload_task`]. + /// + /// Limiting this is necessary to avoid infinite loops on requesting upload tasks. + max_wait_attempts: Option<u32>, + /// The maximum size in bytes a ping body may have to be eligible for upload. + max_ping_body_size: Option<usize>, + /// The maximum size in byte the pending pings directory may have on disk. + max_pending_pings_directory_size: Option<u64>, + /// The maximum number of pending pings on disk. + max_pending_pings_count: Option<u64>, +} + +impl Default for Policy { + fn default() -> Self { + Policy { + max_recoverable_failures: Some(MAX_RECOVERABLE_FAILURES), + max_wait_attempts: Some(MAX_WAIT_ATTEMPTS), + max_ping_body_size: Some(MAX_PING_BODY_SIZE), + max_pending_pings_directory_size: Some(MAX_PENDING_PINGS_DIRECTORY_SIZE), + max_pending_pings_count: Some(MAX_PENDING_PINGS_COUNT), + } + } +} + +impl Policy { + pub fn max_recoverable_failures(&self) -> u32 { + match &self.max_recoverable_failures { + Some(v) => *v, + None => u32::MAX, + } + } + + #[cfg(test)] + pub fn set_max_recoverable_failures(&mut self, v: Option<u32>) { + self.max_recoverable_failures = v; + } + + pub fn max_wait_attempts(&self) -> u32 { + match &self.max_wait_attempts { + Some(v) => *v, + None => u32::MAX, + } + } + + #[cfg(test)] + pub fn set_max_wait_attempts(&mut self, v: Option<u32>) { + self.max_wait_attempts = v; + } + + pub fn max_ping_body_size(&self) -> usize { + match &self.max_ping_body_size { + Some(v) => *v, + None => usize::MAX, + } + } + + #[cfg(test)] + pub fn set_max_ping_body_size(&mut self, v: Option<usize>) { + self.max_ping_body_size = v; + } + + pub fn max_pending_pings_directory_size(&self) -> u64 { + match &self.max_pending_pings_directory_size { + Some(v) => *v, + None => u64::MAX, + } + } + + pub fn max_pending_pings_count(&self) -> u64 { + match &self.max_pending_pings_count { + Some(v) => *v, + None => u64::MAX, + } + } + + #[cfg(test)] + pub fn set_max_pending_pings_directory_size(&mut self, v: Option<u64>) { + self.max_pending_pings_directory_size = v; + } + + #[cfg(test)] + pub fn set_max_pending_pings_count(&mut self, v: Option<u64>) { + self.max_pending_pings_count = v; + } +} diff --git a/third_party/rust/glean-core/src/upload/request.rs b/third_party/rust/glean-core/src/upload/request.rs new file mode 100644 index 0000000000..0fd5ec5713 --- /dev/null +++ b/third_party/rust/glean-core/src/upload/request.rs @@ -0,0 +1,289 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Ping request representation. + +use std::collections::HashMap; + +use chrono::prelude::{DateTime, Utc}; +use flate2::{read::GzDecoder, write::GzEncoder, Compression}; +use serde_json::{self, Value as JsonValue}; +use std::io::prelude::*; + +use crate::error::{ErrorKind, Result}; +use crate::system; + +/// A representation for request headers. +pub type HeaderMap = HashMap<String, String>; + +/// Creates a formatted date string that can be used with Date headers. +pub(crate) fn create_date_header_value(current_time: DateTime<Utc>) -> String { + // Date headers are required to be in the following format: + // + // <day-name>, <day> <month> <year> <hour>:<minute>:<second> GMT + // + // as documented here: + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Date + // Unfortunately we can't use `current_time.to_rfc2822()` as it + // formats as "Mon, 22 Jun 2020 10:40:34 +0000", with an ending + // "+0000" instead of "GMT". That's why we need to go with manual + // formatting. + current_time.format("%a, %d %b %Y %T GMT").to_string() +} + +fn create_x_telemetry_agent_header_value( + version: &str, + language_binding_name: &str, + system: &str, +) -> String { + format!( + "Glean/{} ({} on {})", + version, language_binding_name, system + ) +} + +/// Attempt to gzip the contents of a ping. +fn gzip_content(path: &str, content: &[u8]) -> Option<Vec<u8>> { + let mut gzipper = GzEncoder::new(Vec::new(), Compression::default()); + + // Attempt to add the content to the gzipper. + if let Err(e) = gzipper.write_all(content) { + log::warn!("Failed to write to the gzipper: {} - {:?}", path, e); + return None; + } + + gzipper.finish().ok() +} + +pub struct Builder { + document_id: Option<String>, + path: Option<String>, + body: Option<Vec<u8>>, + headers: HeaderMap, + body_max_size: usize, +} + +impl Builder { + /// Creates a new builder for a PingRequest. + pub fn new(language_binding_name: &str, body_max_size: usize) -> Self { + let mut headers = HashMap::new(); + headers.insert( + "X-Telemetry-Agent".to_string(), + create_x_telemetry_agent_header_value( + crate::GLEAN_VERSION, + language_binding_name, + system::OS, + ), + ); + headers.insert( + "Content-Type".to_string(), + "application/json; charset=utf-8".to_string(), + ); + + Self { + document_id: None, + path: None, + body: None, + headers, + body_max_size, + } + } + + /// Sets the document_id for this request. + pub fn document_id<S: Into<String>>(mut self, value: S) -> Self { + self.document_id = Some(value.into()); + self + } + + /// Sets the path for this request. + pub fn path<S: Into<String>>(mut self, value: S) -> Self { + self.path = Some(value.into()); + self + } + + /// Sets the body for this request. + /// + /// This method will also attempt to gzip the body contents + /// and add headers related to the body that was just added. + /// + /// Namely these headers are the "Content-Length" with the length of the body + /// and in case we are successfull on gzipping the contents, the "Content-Encoding"="gzip". + /// + /// **Important** + /// If we are unable to gzip we don't panic and instead just set the uncompressed body. + /// + /// # Panics + /// + /// This method will panic in case we try to set the body before setting the path. + pub fn body<S: Into<String>>(mut self, value: S) -> Self { + // Attempt to gzip the body contents. + let original_as_string = value.into(); + let gzipped_content = gzip_content( + self.path + .as_ref() + .expect("Path must be set before attempting to set the body"), + original_as_string.as_bytes(), + ); + let add_gzip_header = gzipped_content.is_some(); + let body = gzipped_content.unwrap_or_else(|| original_as_string.into_bytes()); + + // Include headers related to body + self = self.header("Content-Length", &body.len().to_string()); + if add_gzip_header { + self = self.header("Content-Encoding", "gzip"); + } + + self.body = Some(body); + self + } + + /// Sets a header for this request. + pub fn header<S: Into<String>>(mut self, key: S, value: S) -> Self { + self.headers.insert(key.into(), value.into()); + self + } + + /// Sets multiple headers for this request at once. + pub fn headers(mut self, values: HeaderMap) -> Self { + self.headers.extend(values); + self + } + + /// Consumes the builder and create a PingRequest. + /// + /// # Panics + /// + /// This method will panic if any of the required fields are missing: + /// `document_id`, `path` and `body`. + pub fn build(self) -> Result<PingRequest> { + let body = self + .body + .expect("body must be set before attempting to build PingRequest"); + + if body.len() > self.body_max_size { + return Err(ErrorKind::PingBodyOverflow(body.len()).into()); + } + + Ok(PingRequest { + document_id: self + .document_id + .expect("document_id must be set before attempting to build PingRequest"), + path: self + .path + .expect("path must be set before attempting to build PingRequest"), + body, + headers: self.headers, + }) + } +} + +/// Represents a request to upload a ping. +#[derive(PartialEq, Eq, Debug, Clone)] +pub struct PingRequest { + /// The Job ID to identify this request, + /// this is the same as the ping UUID. + pub document_id: String, + /// The path for the server to upload the ping to. + pub path: String, + /// The body of the request, as a byte array. If gzip encoded, then + /// the `headers` list will contain a `Content-Encoding` header with + /// the value `gzip`. + pub body: Vec<u8>, + /// A map with all the headers to be sent with the request. + pub headers: HeaderMap, +} + +impl PingRequest { + /// Creates a new builder-style structure to help build a PingRequest. + /// + /// # Arguments + /// + /// * `language_binding_name` - The name of the language used by the binding that instantiated this Glean instance. + /// This is used to build the X-Telemetry-Agent header value. + /// * `body_max_size` - The maximum size in bytes the compressed ping body may have to be eligible for upload. + pub fn builder(language_binding_name: &str, body_max_size: usize) -> Builder { + Builder::new(language_binding_name, body_max_size) + } + + /// Verifies if current request is for a deletion-request ping. + pub fn is_deletion_request(&self) -> bool { + // The path format should be `/submit/<app_id>/<ping_name>/<schema_version/<doc_id>` + self.path + .split('/') + .nth(3) + .map(|url| url == "deletion-request") + .unwrap_or(false) + } + + /// Decompresses and pretty-format the ping payload + /// + /// Should be used for logging when required. + /// This decompresses the payload in memory. + pub fn pretty_body(&self) -> Option<String> { + let mut gz = GzDecoder::new(&self.body[..]); + let mut s = String::with_capacity(self.body.len()); + + gz.read_to_string(&mut s) + .ok() + .map(|_| &s[..]) + .or_else(|| std::str::from_utf8(&self.body).ok()) + .and_then(|payload| serde_json::from_str::<JsonValue>(payload).ok()) + .and_then(|json| serde_json::to_string_pretty(&json).ok()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use chrono::offset::TimeZone; + + #[test] + fn date_header_resolution() { + let date: DateTime<Utc> = Utc.ymd(2018, 2, 25).and_hms(11, 10, 37); + let test_value = create_date_header_value(date); + assert_eq!("Sun, 25 Feb 2018 11:10:37 GMT", test_value); + } + + #[test] + fn x_telemetry_agent_header_resolution() { + let test_value = create_x_telemetry_agent_header_value("0.0.0", "Rust", "Windows"); + assert_eq!("Glean/0.0.0 (Rust on Windows)", test_value); + } + + #[test] + fn correctly_builds_ping_request() { + let request = PingRequest::builder(/* language_binding_name */ "Rust", 1024 * 1024) + .document_id("woop") + .path("/random/path/doesnt/matter") + .body("{}") + .build() + .unwrap(); + + assert_eq!(request.document_id, "woop"); + assert_eq!(request.path, "/random/path/doesnt/matter"); + + // Make sure all the expected headers were added. + assert!(request.headers.contains_key("X-Telemetry-Agent")); + assert!(request.headers.contains_key("Content-Type")); + assert!(request.headers.contains_key("Content-Length")); + + // the `Date` header is added by the `get_upload_task` just before + // returning the upload request + } + + #[test] + fn errors_when_request_body_exceeds_max_size() { + // Create a new builder with an arbitrarily small value, + // se we can test that the builder errors when body max size exceeds the expected. + let request = Builder::new( + /* language_binding_name */ "Rust", /* body_max_size */ 1, + ) + .document_id("woop") + .path("/random/path/doesnt/matter") + .body("{}") + .build(); + + assert!(request.is_err()); + } +} diff --git a/third_party/rust/glean-core/src/upload/result.rs b/third_party/rust/glean-core/src/upload/result.rs new file mode 100644 index 0000000000..3097af9d64 --- /dev/null +++ b/third_party/rust/glean-core/src/upload/result.rs @@ -0,0 +1,98 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +/// The result of an attempted ping upload. +#[derive(Debug)] +pub enum UploadResult { + /// A recoverable failure. + /// + /// During upload something went wrong, + /// e.g. the network connection failed. + /// The upload should be retried at a later time. + RecoverableFailure { + #[doc(hidden)] + /// Unused field. Required because UniFFI can't handle variants without fields. + unused: i8, + }, + + /// An unrecoverable upload failure. + /// + /// A possible cause might be a malformed URL. + UnrecoverableFailure { + #[doc(hidden)] + /// Unused field. Required because UniFFI can't handle variants without fields. + unused: i8, + }, + + /// A HTTP response code. + /// + /// This can still indicate an error, depending on the status code. + HttpStatus { + /// The HTTP status code + code: i32, + }, + + /// Signal that this uploader is done with work + /// and won't accept new work. + Done { + #[doc(hidden)] + /// Unused field. Required because UniFFI can't handle variants without fields. + unused: i8, + }, +} + +impl UploadResult { + /// Gets the label to be used in recording error counts for upload. + /// + /// Returns `None` if the upload finished succesfully. + /// Failures are recorded in the `ping_upload_failure` metric. + pub fn get_label(&self) -> Option<&str> { + match self { + UploadResult::HttpStatus { code: 200..=299 } => None, + UploadResult::HttpStatus { code: 400..=499 } => Some("status_code_4xx"), + UploadResult::HttpStatus { code: 500..=599 } => Some("status_code_5xx"), + UploadResult::HttpStatus { .. } => Some("status_code_unknown"), + UploadResult::UnrecoverableFailure { .. } => Some("unrecoverable"), + UploadResult::RecoverableFailure { .. } => Some("recoverable"), + UploadResult::Done { .. } => None, + } + } + + /// A recoverable failure. + /// + /// During upload something went wrong, + /// e.g. the network connection failed. + /// The upload should be retried at a later time. + pub fn recoverable_failure() -> Self { + Self::RecoverableFailure { unused: 0 } + } + + /// An unrecoverable upload failure. + /// + /// A possible cause might be a malformed URL. + pub fn unrecoverable_failure() -> Self { + Self::UnrecoverableFailure { unused: 0 } + } + + /// A HTTP response code. + /// + /// This can still indicate an error, depending on the status code. + pub fn http_status(code: i32) -> Self { + Self::HttpStatus { code } + } + + /// This uploader is done. + pub fn done() -> Self { + Self::Done { unused: 0 } + } +} + +/// Communication back whether the uploader loop should continue. +#[derive(Debug)] +pub enum UploadTaskAction { + /// Instruct the caller to continue with work. + Next, + /// Instruct the caller to end work. + End, +} diff --git a/third_party/rust/glean-core/src/util.rs b/third_party/rust/glean-core/src/util.rs new file mode 100644 index 0000000000..52cc5d57ce --- /dev/null +++ b/third_party/rust/glean-core/src/util.rs @@ -0,0 +1,312 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use chrono::{DateTime, FixedOffset, Local}; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{record_error, ErrorType}; +use crate::metrics::TimeUnit; +use crate::Glean; + +/// Generates a pipeline-friendly string +/// that replaces non alphanumeric characters with dashes. +pub fn sanitize_application_id(application_id: &str) -> String { + let mut last_dash = false; + application_id + .chars() + .filter_map(|x| match x { + 'A'..='Z' | 'a'..='z' | '0'..='9' => { + last_dash = false; + Some(x.to_ascii_lowercase()) + } + _ => { + let result = if last_dash { None } else { Some('-') }; + last_dash = true; + result + } + }) + .collect() +} + +/// Generates an ISO8601 compliant date/time string for the given time, +/// truncating it to the provided [`TimeUnit`]. +/// +/// # Arguments +/// +/// * `datetime` - the [`DateTime`] object that holds the date, time and timezone information. +/// * `truncate_to` - the desired resolution to use for the output string. +/// +/// # Returns +/// +/// A string representing the provided date/time truncated to the requested time unit. +pub fn get_iso_time_string(datetime: DateTime<FixedOffset>, truncate_to: TimeUnit) -> String { + datetime.format(truncate_to.format_pattern()).to_string() +} + +/// Get the current date & time with a fixed-offset timezone. +/// +/// This converts from the `Local` timezone into its fixed-offset equivalent. +/// If a timezone outside of [-24h, +24h] is detected it corrects the timezone offset to UTC (+0). +pub(crate) fn local_now_with_offset() -> DateTime<FixedOffset> { + #[cfg(target_os = "windows")] + { + // `Local::now` takes the user's timezone offset + // and panics if it's not within a range of [-24, +24] hours. + // This causes crashes in a small number of clients on Windows. + // + // We can't determine the faulty clients + // or the circumstancens under which this happens, + // so the best we can do is have a workaround: + // + // We try getting the time and timezone first, + // then manually check that it is a valid timezone offset. + // If it is, we proceed and use that time and offset. + // If it isn't we fallback to UTC. + // + // This has the small downside that it will use 2 calls to get the time, + // but only on Windows. + // + // See https://bugzilla.mozilla.org/show_bug.cgi?id=1611770. + + use chrono::Utc; + + // Get timespec, including the user's timezone. + let tm = time::now(); + // Same as chrono: + // https://docs.rs/chrono/0.4.10/src/chrono/offset/local.rs.html#37 + let offset = tm.tm_utcoff; + if let None = FixedOffset::east_opt(offset) { + log::warn!( + "Detected invalid timezone offset: {}. Using UTC fallback.", + offset + ); + let now: DateTime<Utc> = Utc::now(); + let utc_offset = FixedOffset::east(0); + return now.with_timezone(&utc_offset); + } + } + + let now: DateTime<Local> = Local::now(); + now.with_timezone(now.offset()) +} + +/// Truncates a string, ensuring that it doesn't end in the middle of a codepoint. +/// +/// # Arguments +/// +/// * `value` - The string to truncate. +/// * `length` - The length, in bytes, to truncate to. The resulting string will +/// be at most this many bytes, but may be shorter to prevent ending in the middle +/// of a codepoint. +/// +/// # Returns +/// +/// A string, with at most `length` bytes. +pub(crate) fn truncate_string_at_boundary<S: Into<String>>(value: S, length: usize) -> String { + let s = value.into(); + if s.len() > length { + for i in (0..=length).rev() { + if s.is_char_boundary(i) { + return s[0..i].to_string(); + } + } + // If we never saw a character boundary, the safest thing we can do is + // return the empty string, though this should never happen in practice. + return "".to_string(); + } + s +} + +/// Truncates a string, ensuring that it doesn't end in the middle of a codepoint. +/// If the string required truncation, records an error through the error +/// reporting mechanism. +/// +/// # Arguments +/// +/// * `glean` - The Glean instance the metric doing the truncation belongs to. +/// * `meta` - The metadata for the metric. Used for recording the error. +/// * `value` - The String to truncate. +/// * `length` - The length, in bytes, to truncate to. The resulting string will +/// be at most this many bytes, but may be shorter to prevent ending in the middle +/// of a codepoint. +/// +/// # Returns +/// +/// A string, with at most `length` bytes. +pub(crate) fn truncate_string_at_boundary_with_error<S: Into<String>>( + glean: &Glean, + meta: &CommonMetricDataInternal, + value: S, + length: usize, +) -> String { + let s = value.into(); + if s.len() > length { + let msg = format!("Value length {} exceeds maximum of {}", s.len(), length); + record_error(glean, meta, ErrorType::InvalidOverflow, msg, None); + truncate_string_at_boundary(s, length) + } else { + s + } +} + +// On i686 on Windows, the CPython interpreter sets the FPU precision control +// flag to 53 bits of precision, rather than the 64 bit default. On x86_64 on +// Windows, the CPython interpreter changes the rounding control settings. This +// causes different floating point results than on other architectures. This +// context manager makes it easy to set the correct precision and rounding control +// to match our other targets and platforms. +// +// See https://bugzilla.mozilla.org/show_bug.cgi?id=1623335 for additional context. +#[cfg(all(target_os = "windows", target_env = "gnu"))] +pub mod floating_point_context { + // `size_t` is "pointer size", which is equivalent to Rust's `usize`. + // It's defined as such in libc: + // * https://github.com/rust-lang/libc/blob/bcbfeb5516cd5bb055198dbfbddf8d626fa2be07/src/unix/mod.rs#L19 + // * https://github.com/rust-lang/libc/blob/bcbfeb5516cd5bb055198dbfbddf8d626fa2be07/src/windows/mod.rs#L16 + #[allow(non_camel_case_types)] + type size_t = usize; + + #[link(name = "m")] + extern "C" { + // Gets and sets the floating point control word. + // See documentation here: + // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/controlfp-s + fn _controlfp_s(current: *mut size_t, new: size_t, mask: size_t) -> size_t; + } + + // Rounding control mask + const MCW_RC: size_t = 0x00000300; + // Round by truncation + const RC_CHOP: size_t = 0x00000300; + // Precision control mask + const MCW_PC: size_t = 0x00030000; + // Values for 64-bit precision + const PC_64: size_t = 0x00000000; + + pub struct FloatingPointContext { + original_value: size_t, + } + + impl FloatingPointContext { + pub fn new() -> Self { + let mut current: size_t = 0; + let _err = unsafe { _controlfp_s(&mut current, PC_64 | RC_CHOP, MCW_PC | MCW_RC) }; + + FloatingPointContext { + original_value: current, + } + } + } + + impl Drop for FloatingPointContext { + fn drop(&mut self) { + let mut current: size_t = 0; + let _err = unsafe { _controlfp_s(&mut current, self.original_value, MCW_PC | MCW_RC) }; + } + } +} + +#[cfg(not(all(target_os = "windows", target_env = "gnu")))] +pub mod floating_point_context { + pub struct FloatingPointContext {} + + impl FloatingPointContext { + pub fn new() -> Self { + FloatingPointContext {} + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use chrono::offset::TimeZone; + + #[test] + fn test_sanitize_application_id() { + assert_eq!( + "org-mozilla-test-app", + sanitize_application_id("org.mozilla.test-app") + ); + assert_eq!( + "org-mozilla-test-app", + sanitize_application_id("org.mozilla..test---app") + ); + assert_eq!( + "org-mozilla-test-app", + sanitize_application_id("org-mozilla-test-app") + ); + assert_eq!( + "org-mozilla-test-app", + sanitize_application_id("org.mozilla.Test.App") + ); + } + + #[test] + fn test_get_iso_time_string() { + // `1985-07-03T12:09:14.000560274+01:00` + let dt = FixedOffset::east(3600) + .ymd(1985, 7, 3) + .and_hms_nano(12, 9, 14, 1_560_274); + assert_eq!( + "1985-07-03T12:09:14.001560274+01:00", + get_iso_time_string(dt, TimeUnit::Nanosecond) + ); + assert_eq!( + "1985-07-03T12:09:14.001560+01:00", + get_iso_time_string(dt, TimeUnit::Microsecond) + ); + assert_eq!( + "1985-07-03T12:09:14.001+01:00", + get_iso_time_string(dt, TimeUnit::Millisecond) + ); + assert_eq!( + "1985-07-03T12:09:14+01:00", + get_iso_time_string(dt, TimeUnit::Second) + ); + assert_eq!( + "1985-07-03T12:09+01:00", + get_iso_time_string(dt, TimeUnit::Minute) + ); + assert_eq!( + "1985-07-03T12+01:00", + get_iso_time_string(dt, TimeUnit::Hour) + ); + assert_eq!("1985-07-03+01:00", get_iso_time_string(dt, TimeUnit::Day)); + } + + #[test] + fn local_now_gets_the_time() { + let now = Local::now(); + let fixed_now = local_now_with_offset(); + + // We can't compare across differing timezones, so we just compare the UTC timestamps. + // The second timestamp should be just a few nanoseconds later. + assert!( + fixed_now.naive_utc() >= now.naive_utc(), + "Time mismatch. Local now: {:?}, Fixed now: {:?}", + now, + fixed_now + ); + } + + #[test] + fn truncate_safely_test() { + let value = "电脑坏了".to_string(); + let truncated = truncate_string_at_boundary(value, 10); + assert_eq!("电脑坏", truncated); + + let value = "0123456789abcdef".to_string(); + let truncated = truncate_string_at_boundary(value, 10); + assert_eq!("0123456789", truncated); + } + + #[test] + #[should_panic] + fn truncate_naive() { + // Ensure that truncating the naïve way on this string would panic + let value = "电脑坏了".to_string(); + value[0..10].to_string(); + } +} |