diff options
Diffstat (limited to '')
-rw-r--r-- | vendor/percent-encoding/src/lib.rs (renamed from vendor/percent-encoding/lib.rs) | 74 |
1 files changed, 50 insertions, 24 deletions
diff --git a/vendor/percent-encoding/lib.rs b/vendor/percent-encoding/src/lib.rs index 27eaf6740..46a5d747c 100644 --- a/vendor/percent-encoding/lib.rs +++ b/vendor/percent-encoding/src/lib.rs @@ -6,7 +6,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! URLs use special chacters to indicate the parts of the request. +//! URLs use special characters to indicate the parts of the request. //! For example, a `?` question mark marks the end of a path and the start of a query string. //! In order for that character to exist inside a path, it needs to be encoded differently. //! @@ -21,9 +21,9 @@ //! The [`AsciiSet`] parameter of [`percent_encode`] and [`utf8_percent_encode`] //! lets callers configure this. //! -//! This crate delibarately does not provide many different sets. +//! This crate deliberately does not provide many different sets. //! Users should consider in what context the encoded string will be used, -//! real relevant specifications, and define their own set. +//! read relevant specifications, and define their own set. //! This is done by using the `add` method of an existing set. //! //! # Examples @@ -37,15 +37,22 @@ //! assert_eq!(utf8_percent_encode("foo <bar>", FRAGMENT).to_string(), "foo%20%3Cbar%3E"); //! ``` -use std::borrow::Cow; -use std::fmt; -use std::slice; -use std::str; +#![no_std] +#[cfg(feature = "alloc")] +extern crate alloc; + +#[cfg(feature = "alloc")] +use alloc::{ + borrow::{Cow, ToOwned}, + string::String, + vec::Vec, +}; +use core::{fmt, mem, slice, str}; /// Represents a set of characters or bytes in the ASCII range. /// -/// This used in [`percent_encode`] and [`utf8_percent_encode`]. -/// This is simlar to [percent-encode sets](https://url.spec.whatwg.org/#percent-encoded-bytes). +/// This is used in [`percent_encode`] and [`utf8_percent_encode`]. +/// This is similar to [percent-encode sets](https://url.spec.whatwg.org/#percent-encoded-bytes). /// /// Use the `add` method of an existing set to define a new set. For example: /// @@ -63,7 +70,7 @@ type Chunk = u32; const ASCII_RANGE_LEN: usize = 0x80; -const BITS_PER_CHUNK: usize = 8 * std::mem::size_of::<Chunk>(); +const BITS_PER_CHUNK: usize = 8 * mem::size_of::<Chunk>(); impl AsciiSet { /// Called with UTF-8 bytes rather than code points. @@ -109,7 +116,7 @@ macro_rules! static_assert { ($( $bool: expr, )+) => { fn _static_assert() { $( - let _ = std::mem::transmute::<[u8; $bool as usize], u8>; + let _ = mem::transmute::<[u8; $bool as usize], u8>; )+ } } @@ -252,6 +259,8 @@ impl<'a> Iterator for PercentEncode<'a> { self.bytes = remaining; Some(percent_encode_byte(first_byte)) } else { + // The unsafe blocks here are appropriate because the bytes are + // confirmed as a subset of UTF-8 in should_percent_encode. for (i, &byte) in remaining.iter().enumerate() { if self.ascii_set.should_percent_encode(byte) { // 1 for first_byte + i for previous iterations of this loop @@ -279,7 +288,7 @@ impl<'a> Iterator for PercentEncode<'a> { } impl<'a> fmt::Display for PercentEncode<'a> { - fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { for c in (*self).clone() { formatter.write_str(c)? } @@ -287,6 +296,7 @@ impl<'a> fmt::Display for PercentEncode<'a> { } } +#[cfg(feature = "alloc")] impl<'a> From<PercentEncode<'a>> for Cow<'a, str> { fn from(mut iter: PercentEncode<'a>) -> Self { match iter.next() { @@ -310,7 +320,7 @@ impl<'a> From<PercentEncode<'a>> for Cow<'a, str> { /// /// See [`percent_decode`] regarding the return type. #[inline] -pub fn percent_decode_str(input: &str) -> PercentDecode { +pub fn percent_decode_str(input: &str) -> PercentDecode<'_> { percent_decode(input.as_bytes()) } @@ -333,7 +343,7 @@ pub fn percent_decode_str(input: &str) -> PercentDecode { /// assert_eq!(percent_decode(b"foo%20bar%3f").decode_utf8().unwrap(), "foo bar?"); /// ``` #[inline] -pub fn percent_decode(input: &[u8]) -> PercentDecode { +pub fn percent_decode(input: &[u8]) -> PercentDecode<'_> { PercentDecode { bytes: input.iter(), } @@ -345,7 +355,7 @@ pub struct PercentDecode<'a> { bytes: slice::Iter<'a, u8>, } -fn after_percent_sign(iter: &mut slice::Iter<u8>) -> Option<u8> { +fn after_percent_sign(iter: &mut slice::Iter<'_, u8>) -> Option<u8> { let mut cloned_iter = iter.clone(); let h = char::from(*cloned_iter.next()?).to_digit(16)?; let l = char::from(*cloned_iter.next()?).to_digit(16)?; @@ -368,10 +378,11 @@ impl<'a> Iterator for PercentDecode<'a> { fn size_hint(&self) -> (usize, Option<usize>) { let bytes = self.bytes.len(); - (bytes / 3, Some(bytes)) + ((bytes + 2) / 3, Some(bytes)) } } +#[cfg(feature = "alloc")] impl<'a> From<PercentDecode<'a>> for Cow<'a, [u8]> { fn from(iter: PercentDecode<'a>) -> Self { match iter.if_any() { @@ -383,6 +394,7 @@ impl<'a> From<PercentDecode<'a>> for Cow<'a, [u8]> { impl<'a> PercentDecode<'a> { /// If the percent-decoding is different from the input, return it as a new bytes vector. + #[cfg(feature = "alloc")] fn if_any(&self) -> Option<Vec<u8>> { let mut bytes_iter = self.bytes.clone(); while bytes_iter.any(|&b| b == b'%') { @@ -402,6 +414,7 @@ impl<'a> PercentDecode<'a> { /// Decode the result of percent-decoding as UTF-8. /// /// This is return `Err` when the percent-decoded bytes are not well-formed in UTF-8. + #[cfg(feature = "alloc")] pub fn decode_utf8(self) -> Result<Cow<'a, str>, str::Utf8Error> { match self.clone().into() { Cow::Borrowed(bytes) => match str::from_utf8(bytes) { @@ -419,24 +432,37 @@ impl<'a> PercentDecode<'a> { /// /// Invalid UTF-8 percent-encoded byte sequences will be replaced � U+FFFD, /// the replacement character. + #[cfg(feature = "alloc")] pub fn decode_utf8_lossy(self) -> Cow<'a, str> { decode_utf8_lossy(self.clone().into()) } } -fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> { +#[cfg(feature = "alloc")] +fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> { + // Note: This function is duplicated in `form_urlencoded/src/query_encoding.rs`. match input { Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), Cow::Owned(bytes) => { - let raw_utf8: *const [u8]; match String::from_utf8_lossy(&bytes) { - Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(), - Cow::Owned(s) => return s.into(), + Cow::Borrowed(utf8) => { + // If from_utf8_lossy returns a Cow::Borrowed, then we can + // be sure our original bytes were valid UTF-8. This is because + // if the bytes were invalid UTF-8 from_utf8_lossy would have + // to allocate a new owned string to back the Cow so it could + // replace invalid bytes with a placeholder. + + // First we do a debug_assert to confirm our description above. + let raw_utf8: *const [u8] = utf8.as_bytes(); + debug_assert!(raw_utf8 == &*bytes as *const [u8]); + + // Given we know the original input bytes are valid UTF-8, + // and we have ownership of those bytes, we re-use them and + // return a Cow::Owned here. + Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) }) + } + Cow::Owned(s) => Cow::Owned(s), } - // from_utf8_lossy returned a borrow of `bytes` unchanged. - debug_assert!(raw_utf8 == &*bytes as *const [u8]); - // Reuse the existing `Vec` allocation. - unsafe { String::from_utf8_unchecked(bytes) }.into() } } } |