diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-15 03:34:42 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-15 03:34:42 +0000 |
commit | da4c7e7ed675c3bf405668739c3012d140856109 (patch) | |
tree | cdd868dba063fecba609a1d819de271f0d51b23e /third_party/rust/relevancy/src/url_hash.rs | |
parent | Adding upstream version 125.0.3. (diff) | |
download | firefox-da4c7e7ed675c3bf405668739c3012d140856109.tar.xz firefox-da4c7e7ed675c3bf405668739c3012d140856109.zip |
Adding upstream version 126.0.upstream/126.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/relevancy/src/url_hash.rs')
-rw-r--r-- | third_party/rust/relevancy/src/url_hash.rs | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/third_party/rust/relevancy/src/url_hash.rs b/third_party/rust/relevancy/src/url_hash.rs new file mode 100644 index 0000000000..d31a45d06b --- /dev/null +++ b/third_party/rust/relevancy/src/url_hash.rs @@ -0,0 +1,63 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use md5::{Digest, Md5}; +use url::{Host, Url}; + +pub type UrlHash = [u8; 16]; + +/// Given a URL, extract the part of it that we want to use to identify it. +/// +/// We currently use the final 3 components of the URL domain. +/// +/// TODO: decide if this should be 3 or 3 components. +pub fn url_hash_source(url: &str) -> Option<String> { + let url = Url::parse(url).ok()?; + let domain = match url.host() { + Some(Host::Domain(d)) => d, + _ => return None, + }; + // This will store indexes of `.` chars as we search backwards. + let mut pos = domain.len(); + for _ in 0..3 { + match domain[0..pos].rfind('.') { + Some(p) => pos = p, + // The domain has less than 3 dots, return it all + None => return Some(domain.to_owned()), + } + } + Some(domain[pos + 1..].to_owned()) +} + +pub fn hash_url(url: &str) -> Option<UrlHash> { + url_hash_source(url).map(|hash_source| { + let mut hasher = Md5::new(); + hasher.update(hash_source); + let result = hasher.finalize(); + result.into() + }) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_url_hash_source() { + let table = [ + ("http://example.com/some-path", Some("example.com")), + ("http://foo.example.com/some-path", Some("foo.example.com")), + ( + "http://foo.bar.baz.example.com/some-path", + Some("baz.example.com"), + ), + ("http://foo.com.uk/some-path", Some("foo.com.uk")), + ("http://amazon.com/some-path", Some("amazon.com")), + ("http://192.168.0.1/some-path", None), + ]; + for (url, expected) in table { + assert_eq!(url_hash_source(url).as_deref(), expected) + } + } +} |