/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #![allow(unknown_lints)] #![warn(rust_2018_idioms)] // (It's tempting to avoid the utf8 checks, but they're easy to get wrong, so) #![deny(unsafe_code)] #[cfg(feature = "serde_support")] mod serde_support; #[cfg(feature = "rusqlite_support")] mod rusqlite_support; use std::{ cmp::Ordering, fmt, hash::{Hash, Hasher}, ops, str, }; /// This is a type intended to be used to represent the guids used by sync. It /// has several benefits over using a `String`: /// /// 1. It's more explicit about what is being stored, and could prevent bugs /// where a Guid is passed to a function expecting text. /// /// 2. Guids are guaranteed to be immutable. /// /// 3. It's optimized for the guids commonly used by sync. In particular, short guids /// (including the guids which would meet `PlacesUtils.isValidGuid`) do not incur /// any heap allocation, and are stored inline. #[derive(Clone)] pub struct Guid(Repr); // The internal representation of a GUID. Most Sync GUIDs are 12 bytes, // and contain only base64url characters; we can store them on the stack // without a heap allocation. However, arbitrary ascii guids of up to length 64 // are possible, in which case we fall back to a heap-allocated string. // // This is separate only because making `Guid` an enum would expose the // internals. #[derive(Clone)] enum Repr { // see FastGuid for invariants Fast(FastGuid), // invariants: // - _0.len() > MAX_FAST_GUID_LEN Slow(String), } /// Invariants: /// /// - `len <= MAX_FAST_GUID_LEN`. /// - `data[0..len]` encodes valid utf8. /// - `data[len..].iter().all(|&b| b == b'\0')` /// /// Note: None of these are required for memory safety, just correctness. #[derive(Clone)] struct FastGuid { len: u8, data: [u8; MAX_FAST_GUID_LEN], } // This is the maximum length (experimentally determined) we can make it before // `Repr::Fast` is larger than `Guid::Slow` on 32 bit systems. The important // thing is really that it's not too big, and is above 12 bytes. const MAX_FAST_GUID_LEN: usize = 14; impl FastGuid { #[inline] fn from_slice(bytes: &[u8]) -> Self { // Checked by the caller, so debug_assert is fine. debug_assert!( can_use_fast(bytes), "Bug: Caller failed to check can_use_fast: {:?}", bytes ); let mut data = [0u8; MAX_FAST_GUID_LEN]; data[0..bytes.len()].copy_from_slice(bytes); FastGuid { len: bytes.len() as u8, data, } } #[inline] fn as_str(&self) -> &str { // Note: we only use debug_assert! to enusre valid utf8-ness, so this need str::from_utf8(self.bytes()).expect("Invalid fast guid bytes!") } #[inline] fn len(&self) -> usize { self.len as usize } #[inline] fn bytes(&self) -> &[u8] { &self.data[0..self.len()] } } // Returns: // - true to use Repr::Fast // - false to use Repr::Slow #[inline] fn can_use_fast>(bytes: &T) -> bool { let bytes = bytes.as_ref(); // This is fine as a debug_assert since we'll still panic if it's ever used // in such a way where it would matter. debug_assert!(str::from_utf8(bytes).is_ok()); bytes.len() <= MAX_FAST_GUID_LEN } impl Guid { /// Create a guid from a `str`. #[inline] pub fn new(s: &str) -> Self { Guid::from_slice(s.as_ref()) } /// Create an empty guid. Usable as a constant. #[inline] pub const fn empty() -> Self { Guid(Repr::Fast(FastGuid { len: 0, data: [0u8; MAX_FAST_GUID_LEN], })) } /// Create a random guid (of 12 base64url characters). Requires the `random` /// feature. #[cfg(feature = "random")] pub fn random() -> Self { let bytes: [u8; 9] = rand::random(); // Note: only first 12 bytes are used, but remaining are required to // build the FastGuid let mut output = [0u8; MAX_FAST_GUID_LEN]; let bytes_written = base64::encode_config_slice(bytes, base64::URL_SAFE_NO_PAD, &mut output[..12]); debug_assert!(bytes_written == 12); Guid(Repr::Fast(FastGuid { len: 12, data: output, })) } /// Convert `b` into a `Guid`. #[inline] pub fn from_string(s: String) -> Self { Guid::from_vec(s.into_bytes()) } /// Convert `b` into a `Guid`. #[inline] pub fn from_slice(b: &[u8]) -> Self { if can_use_fast(b) { Guid(Repr::Fast(FastGuid::from_slice(b))) } else { Guid::new_slow(b.into()) } } /// Convert `v` to a `Guid`, consuming it. #[inline] pub fn from_vec(v: Vec) -> Self { if can_use_fast(&v) { Guid(Repr::Fast(FastGuid::from_slice(&v))) } else { Guid::new_slow(v) } } /// Get the data backing this `Guid` as a `&[u8]`. #[inline] pub fn as_bytes(&self) -> &[u8] { match &self.0 { Repr::Fast(rep) => rep.bytes(), Repr::Slow(rep) => rep.as_ref(), } } /// Get the data backing this `Guid` as a `&str`. #[inline] pub fn as_str(&self) -> &str { match &self.0 { Repr::Fast(rep) => rep.as_str(), Repr::Slow(rep) => rep.as_ref(), } } /// Convert this `Guid` into a `String`, consuming it in the process. #[inline] pub fn into_string(self) -> String { match self.0 { Repr::Fast(rep) => rep.as_str().into(), Repr::Slow(rep) => rep, } } /// Returns true for Guids that are deemed valid by the sync server. /// See https://github.com/mozilla-services/server-syncstorage/blob/d92ef07877aebd05b92f87f6ade341d6a55bffc8/syncstorage/bso.py#L24 pub fn is_valid_for_sync_server(&self) -> bool { !self.is_empty() && self.len() <= 64 && self .bytes() .all(|b| (b' '..=b'~').contains(&b) && b != b',') } /// Returns true for Guids that are valid places guids, and false for all others. pub fn is_valid_for_places(&self) -> bool { self.len() == 12 && self.bytes().all(Guid::is_valid_places_byte) } /// Returns true if the byte `b` is a valid base64url byte. #[inline] pub fn is_valid_places_byte(b: u8) -> bool { BASE64URL_BYTES[b as usize] == 1 } #[cold] fn new_slow(v: Vec) -> Self { assert!( !can_use_fast(&v), "Could use fast for guid (len = {})", v.len() ); Guid(Repr::Slow( String::from_utf8(v).expect("Invalid slow guid bytes!"), )) } } // This is used to implement the places tests. const BASE64URL_BYTES: [u8; 256] = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; impl Ord for Guid { fn cmp(&self, other: &Self) -> Ordering { self.as_bytes().cmp(other.as_bytes()) } } impl PartialOrd for Guid { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl PartialEq for Guid { fn eq(&self, other: &Self) -> bool { self.as_bytes() == other.as_bytes() } } impl Eq for Guid {} impl Hash for Guid { fn hash(&self, state: &mut H) { self.as_bytes().hash(state); } } impl<'a> From<&'a str> for Guid { #[inline] fn from(s: &'a str) -> Guid { Guid::from_slice(s.as_ref()) } } impl<'a> From<&'a &str> for Guid { #[inline] fn from(s: &'a &str) -> Guid { Guid::from_slice(s.as_ref()) } } impl<'a> From<&'a [u8]> for Guid { #[inline] fn from(s: &'a [u8]) -> Guid { Guid::from_slice(s) } } impl From for Guid { #[inline] fn from(s: String) -> Guid { Guid::from_string(s) } } impl From> for Guid { #[inline] fn from(v: Vec) -> Guid { Guid::from_vec(v) } } impl From for String { #[inline] fn from(guid: Guid) -> String { guid.into_string() } } impl From for Vec { #[inline] fn from(guid: Guid) -> Vec { guid.into_string().into_bytes() } } impl AsRef for Guid { #[inline] fn as_ref(&self) -> &str { self.as_str() } } impl AsRef<[u8]> for Guid { #[inline] fn as_ref(&self) -> &[u8] { self.as_bytes() } } impl ops::Deref for Guid { type Target = str; #[inline] fn deref(&self) -> &str { self.as_str() } } // The default Debug impl is pretty unhelpful here. impl fmt::Debug for Guid { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "Guid({:?})", self.as_str()) } } impl fmt::Display for Guid { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.as_str(), f) } } impl std::default::Default for Guid { /// Create a default guid by calling `Guid::empty()` #[inline] fn default() -> Self { Guid::empty() } } macro_rules! impl_guid_eq { ($($other: ty),+) => {$( // This macro is used for items with and without lifetimes. #[allow(clippy::extra_unused_lifetimes)] impl<'a> PartialEq<$other> for Guid { #[inline] fn eq(&self, other: &$other) -> bool { PartialEq::eq(AsRef::<[u8]>::as_ref(self), AsRef::<[u8]>::as_ref(other)) } } #[allow(clippy::extra_unused_lifetimes)] impl<'a> PartialEq for $other { #[inline] fn eq(&self, other: &Guid) -> bool { PartialEq::eq(AsRef::<[u8]>::as_ref(self), AsRef::<[u8]>::as_ref(other)) } } )+} } // Implement direct comparison with some common types from the stdlib. impl_guid_eq![str, &'a str, String, [u8], &'a [u8], Vec]; #[cfg(test)] mod test { use super::*; #[test] fn test_base64url_bytes() { let mut expect = [0u8; 256]; for b in b'0'..=b'9' { expect[b as usize] = 1; } for b in b'a'..=b'z' { expect[b as usize] = 1; } for b in b'A'..=b'Z' { expect[b as usize] = 1; } expect[b'_' as usize] = 1; expect[b'-' as usize] = 1; assert_eq!(&BASE64URL_BYTES[..], &expect[..]); } #[test] fn test_valid_for_places() { assert!(Guid::from("aaaabbbbcccc").is_valid_for_places()); assert!(Guid::from_slice(b"09_az-AZ_09-").is_valid_for_places()); assert!(!Guid::from("aaaabbbbccccd").is_valid_for_places()); // too long assert!(!Guid::from("aaaabbbbccc").is_valid_for_places()); // too short assert!(!Guid::from("aaaabbbbccc=").is_valid_for_places()); // right length, bad character assert!(!Guid::empty().is_valid_for_places()); // empty isn't valid to insert. } #[test] fn test_valid_for_sync_server() { assert!(!Guid::empty().is_valid_for_sync_server()); // empty isn't valid remotely. } #[allow(clippy::cmp_owned)] // See clippy note below. #[test] fn test_comparison() { assert_eq!(Guid::from("abcdabcdabcd"), "abcdabcdabcd"); assert_ne!(Guid::from("abcdabcdabcd".to_string()), "ABCDabcdabcd"); assert_eq!(Guid::from("abcdabcdabcd"), &b"abcdabcdabcd"[..]); // b"abcdabcdabcd" has type &[u8; 12]... assert_ne!(Guid::from(&b"abcdabcdabcd"[..]), &b"ABCDabcdabcd"[..]); assert_eq!( Guid::from(b"abcdabcdabcd"[..].to_owned()), "abcdabcdabcd".to_string() ); assert_ne!(Guid::from("abcdabcdabcd"), "ABCDabcdabcd".to_string()); assert_eq!( Guid::from("abcdabcdabcd1234"), Vec::from(b"abcdabcdabcd1234".as_ref()) ); assert_ne!( Guid::from("abcdabcdabcd4321"), Vec::from(b"ABCDabcdabcd4321".as_ref()) ); // order by data instead of length // hrmph - clippy in 1.54-nightly complains about the below: // 'error: this creates an owned instance just for comparison' // '... help: try: `*"aaaaaa"`' // and suggests a change that's wrong - so we've ignored the lint above. assert!(Guid::from("zzz") > Guid::from("aaaaaa")); assert!(Guid::from("ThisIsASolowGuid") < Guid::from("zzz")); assert!(Guid::from("ThisIsASolowGuid") > Guid::from("AnotherSlowGuid")); } #[cfg(feature = "random")] #[test] fn test_random() { use std::collections::HashSet; // Used to verify uniqueness within our sample of 1000. Could cause // random failures, but desktop has the same test, and it's never caused // a problem AFAIK. let mut seen: HashSet = HashSet::new(); for _ in 0..1000 { let g = Guid::random(); assert_eq!(g.len(), 12); assert!(g.is_valid_for_places()); let decoded = base64::decode_config(&g, base64::URL_SAFE_NO_PAD).unwrap(); assert_eq!(decoded.len(), 9); let no_collision = seen.insert(g.clone().into_string()); assert!(no_collision, "{}", g); } } }