diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/glean-core/src/metrics/url.rs | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/glean-core/src/metrics/url.rs')
-rw-r--r-- | third_party/rust/glean-core/src/metrics/url.rs | 312 |
1 files changed, 312 insertions, 0 deletions
diff --git a/third_party/rust/glean-core/src/metrics/url.rs b/third_party/rust/glean-core/src/metrics/url.rs new file mode 100644 index 0000000000..c9eb824a3e --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/url.rs @@ -0,0 +1,312 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::sync::Arc; + +use crate::common_metric_data::CommonMetricDataInternal; +use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorType}; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::util::truncate_string_at_boundary_with_error; +use crate::CommonMetricData; +use crate::Glean; + +// The maximum number of characters a URL Metric may have, before encoding. +const MAX_URL_LENGTH: usize = 8192; + +/// A URL metric. +/// +/// Record an Unicode string value a URL content. +/// The URL is length-limited to `MAX_URL_LENGTH` bytes. +#[derive(Clone, Debug)] +pub struct UrlMetric { + meta: Arc<CommonMetricDataInternal>, +} + +impl MetricType for UrlMetric { + fn meta(&self) -> &CommonMetricDataInternal { + &self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl UrlMetric { + /// Creates a new string metric. + pub fn new(meta: CommonMetricData) -> Self { + Self { + meta: Arc::new(meta.into()), + } + } + + fn is_valid_url_scheme(&self, value: String) -> bool { + let mut splits = value.split(':'); + if let Some(scheme) = splits.next() { + if scheme.is_empty() { + return false; + } + let mut chars = scheme.chars(); + // The list of characters allowed in the scheme is on + // the spec here: https://url.spec.whatwg.org/#url-scheme-string + return chars.next().unwrap().is_ascii_alphabetic() + && chars.all(|c| c.is_ascii_alphanumeric() || ['+', '-', '.'].contains(&c)); + } + + // No ':' found, this is not valid :) + false + } + + /// Sets to the specified stringified URL. + /// + /// # Arguments + /// + /// * `value` - The stringified URL to set the metric to. + /// + /// ## Notes + /// + /// Truncates the value if it is longer than `MAX_URL_LENGTH` bytes and logs an error. + pub fn set<S: Into<String>>(&self, value: S) { + let value = value.into(); + let metric = self.clone(); + crate::launch_with_glean(move |glean| metric.set_sync(glean, value)) + } + + /// Sets to the specified stringified URL synchronously. + #[doc(hidden)] + pub fn set_sync<S: Into<String>>(&self, glean: &Glean, value: S) { + if !self.should_record(glean) { + return; + } + + let s = truncate_string_at_boundary_with_error(glean, &self.meta, value, MAX_URL_LENGTH); + + if s.starts_with("data:") { + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + "URL metric does not support data URLs.", + None, + ); + return; + } + + if !self.is_valid_url_scheme(s.clone()) { + let msg = format!("\"{}\" does not start with a valid URL scheme.", s); + record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None); + return; + } + + let value = Metric::Url(s); + glean.storage().record(glean, &self.meta, &value) + } + + #[doc(hidden)] + pub(crate) fn get_value<'a, S: Into<Option<&'a str>>>( + &self, + glean: &Glean, + ping_name: S, + ) -> Option<String> { + let queried_ping_name = ping_name + .into() + .unwrap_or_else(|| &self.meta().inner.send_in_pings[0]); + + match StorageManager.snapshot_metric_for_test( + glean.storage(), + queried_ping_name, + &self.meta.identifier(glean), + self.meta.inner.lifetime, + ) { + Some(Metric::Url(s)) => Some(s), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as a string. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, ping_name: Option<String>) -> Option<String> { + crate::block_on_dispatcher(); + crate::core::with_glean(|glean| self.get_value(glean, ping_name.as_deref())) + } + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 { + crate::block_on_dispatcher(); + + crate::core::with_glean(|glean| { + test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0) + }) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test_get_num_recorded_errors; + use crate::tests::new_glean; + use crate::ErrorType; + use crate::Lifetime; + + #[test] + fn payload_is_correct() { + let (glean, _t) = new_glean(None); + + let metric = UrlMetric::new(CommonMetricData { + name: "url_metric".into(), + category: "test".into(), + send_in_pings: vec!["store1".into()], + lifetime: Lifetime::Application, + disabled: false, + dynamic_label: None, + }); + + let sample_url = "glean://test".to_string(); + metric.set_sync(&glean, sample_url.clone()); + assert_eq!(sample_url, metric.get_value(&glean, "store1").unwrap()); + } + + #[test] + fn does_not_record_url_exceeding_maximum_length() { + let (glean, _t) = new_glean(None); + + let metric = UrlMetric::new(CommonMetricData { + name: "url_metric".into(), + category: "test".into(), + send_in_pings: vec!["store1".into()], + lifetime: Lifetime::Application, + disabled: false, + dynamic_label: None, + }); + + // Whenever the URL is longer than our MAX_URL_LENGTH, we truncate the URL to the + // MAX_URL_LENGTH. + // + // This 8-character string was chosen so we could have an even number that is + // a divisor of our MAX_URL_LENGTH. + let long_path_base = "abcdefgh"; + + // Using 2000 creates a string > 16000 characters, well over MAX_URL_LENGTH. + let test_url = format!("glean://{}", long_path_base.repeat(2000)); + metric.set_sync(&glean, test_url); + + // "glean://" is 8 characters + // "abcdefgh" (long_path_base) is 8 characters + // `long_path_base` is repeated 1023 times (8184) + // 8 + 8184 = 8192 (MAX_URL_LENGTH) + let expected = format!("glean://{}", long_path_base.repeat(1023)); + + assert_eq!(metric.get_value(&glean, "store1").unwrap(), expected); + assert_eq!( + 1, + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidOverflow) + .unwrap() + ); + } + + #[test] + fn does_not_record_data_urls() { + let (glean, _t) = new_glean(None); + + let metric = UrlMetric::new(CommonMetricData { + name: "url_metric".into(), + category: "test".into(), + send_in_pings: vec!["store1".into()], + lifetime: Lifetime::Application, + disabled: false, + dynamic_label: None, + }); + + let test_url = "data:application/json"; + metric.set_sync(&glean, test_url); + + assert!(metric.get_value(&glean, "store1").is_none()); + + assert_eq!( + 1, + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidValue).unwrap() + ); + } + + #[test] + fn url_validation_works_and_records_errors() { + let (glean, _t) = new_glean(None); + + let metric = UrlMetric::new(CommonMetricData { + name: "url_metric".into(), + category: "test".into(), + send_in_pings: vec!["store1".into()], + lifetime: Lifetime::Application, + disabled: false, + dynamic_label: None, + }); + + let incorrects = vec![ + "", + // Scheme may only start with upper or lowercase ASCII alpha[^1] character. + // [1]: https://infra.spec.whatwg.org/#ascii-alpha + "1glean://test", + "-glean://test", + // Scheme may only have ASCII alphanumeric characters or the `-`, `.`, `+` characters. + "шеллы://test", + "g!lean://test", + "g=lean://test", + // Scheme must be followed by `:` character. + "glean//test", + ]; + + let corrects = vec![ + // The minimum URL + "g:", + // Empty body is fine + "glean://", + // "//" is actually not even necessary + "glean:", + "glean:test", + "glean:test.com", + // Scheme may only have ASCII alphanumeric characters or the `-`, `.`, `+` characters. + "g-lean://test", + "g+lean://test", + "g.lean://test", + // Query parameters are fine + "glean://test?hello=world", + // Finally, some actual real world URLs + "https://infra.spec.whatwg.org/#ascii-alpha", + "https://infra.spec.whatwg.org/#ascii-alpha?test=for-glean", + ]; + + for incorrect in incorrects.clone().into_iter() { + metric.set_sync(&glean, incorrect); + assert!(metric.get_value(&glean, "store1").is_none()); + } + + assert_eq!( + incorrects.len(), + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidValue).unwrap() + as usize + ); + + for correct in corrects.into_iter() { + metric.set_sync(&glean, correct); + assert_eq!(metric.get_value(&glean, "store1").unwrap(), correct); + } + } +} |