diff options
Diffstat (limited to 'third_party/rust/form_urlencoded/src/lib.rs')
-rw-r--r-- | third_party/rust/form_urlencoded/src/lib.rs | 416 |
1 files changed, 416 insertions, 0 deletions
diff --git a/third_party/rust/form_urlencoded/src/lib.rs b/third_party/rust/form_urlencoded/src/lib.rs new file mode 100644 index 0000000000..477594bf71 --- /dev/null +++ b/third_party/rust/form_urlencoded/src/lib.rs @@ -0,0 +1,416 @@ +// Copyright 2013-2016 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Parser and serializer for the [`application/x-www-form-urlencoded` syntax]( +//! http://url.spec.whatwg.org/#application/x-www-form-urlencoded), +//! as used by HTML forms. +//! +//! Converts between a string (such as an URL’s query string) +//! and a sequence of (name, value) pairs. + +use percent_encoding::{percent_decode, percent_encode_byte}; +use std::borrow::{Borrow, Cow}; +use std::str; + +/// Convert a byte string in the `application/x-www-form-urlencoded` syntax +/// into a iterator of (name, value) pairs. +/// +/// Use `parse(input.as_bytes())` to parse a `&str` string. +/// +/// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be +/// converted to `[("#first", "%try%")]`. +#[inline] +pub fn parse(input: &[u8]) -> Parse<'_> { + Parse { input } +} +/// The return type of `parse()`. +#[derive(Copy, Clone)] +pub struct Parse<'a> { + input: &'a [u8], +} + +impl<'a> Iterator for Parse<'a> { + type Item = (Cow<'a, str>, Cow<'a, str>); + + fn next(&mut self) -> Option<Self::Item> { + loop { + if self.input.is_empty() { + return None; + } + let mut split2 = self.input.splitn(2, |&b| b == b'&'); + let sequence = split2.next().unwrap(); + self.input = split2.next().unwrap_or(&[][..]); + if sequence.is_empty() { + continue; + } + let mut split2 = sequence.splitn(2, |&b| b == b'='); + let name = split2.next().unwrap(); + let value = split2.next().unwrap_or(&[][..]); + return Some((decode(name), decode(value))); + } + } +} + +fn decode(input: &[u8]) -> Cow<'_, str> { + let replaced = replace_plus(input); + decode_utf8_lossy(match percent_decode(&replaced).into() { + Cow::Owned(vec) => Cow::Owned(vec), + Cow::Borrowed(_) => replaced, + }) +} + +/// Replace b'+' with b' ' +fn replace_plus(input: &[u8]) -> Cow<'_, [u8]> { + match input.iter().position(|&b| b == b'+') { + None => Cow::Borrowed(input), + Some(first_position) => { + let mut replaced = input.to_owned(); + replaced[first_position] = b' '; + for byte in &mut replaced[first_position + 1..] { + if *byte == b'+' { + *byte = b' '; + } + } + Cow::Owned(replaced) + } + } +} + +impl<'a> Parse<'a> { + /// Return a new iterator that yields pairs of `String` instead of pairs of `Cow<str>`. + pub fn into_owned(self) -> ParseIntoOwned<'a> { + ParseIntoOwned { inner: self } + } +} + +/// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow<str>`. +pub struct ParseIntoOwned<'a> { + inner: Parse<'a>, +} + +impl<'a> Iterator for ParseIntoOwned<'a> { + type Item = (String, String); + + fn next(&mut self) -> Option<Self::Item> { + self.inner + .next() + .map(|(k, v)| (k.into_owned(), v.into_owned())) + } +} + +/// The [`application/x-www-form-urlencoded` byte serializer]( +/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer). +/// +/// Return an iterator of `&str` slices. +pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> { + ByteSerialize { bytes: input } +} + +/// Return value of `byte_serialize()`. +#[derive(Debug)] +pub struct ByteSerialize<'a> { + bytes: &'a [u8], +} + +fn byte_serialized_unchanged(byte: u8) -> bool { + matches!(byte, b'*' | b'-' | b'.' | b'0' ..= b'9' | b'A' ..= b'Z' | b'_' | b'a' ..= b'z') +} + +impl<'a> Iterator for ByteSerialize<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option<&'a str> { + if let Some((&first, tail)) = self.bytes.split_first() { + if !byte_serialized_unchanged(first) { + self.bytes = tail; + return Some(if first == b' ' { + "+" + } else { + percent_encode_byte(first) + }); + } + let position = tail.iter().position(|&b| !byte_serialized_unchanged(b)); + let (unchanged_slice, remaining) = match position { + // 1 for first_byte + i unchanged in tail + Some(i) => self.bytes.split_at(1 + i), + None => (self.bytes, &[][..]), + }; + self.bytes = remaining; + // This unsafe is appropriate because we have already checked these + // bytes in byte_serialized_unchanged, which checks for a subset + // of UTF-8. So we know these bytes are valid UTF-8, and doing + // another UTF-8 check would be wasteful. + Some(unsafe { str::from_utf8_unchecked(unchanged_slice) }) + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + if self.bytes.is_empty() { + (0, Some(0)) + } else { + (1, Some(self.bytes.len())) + } + } +} + +/// The [`application/x-www-form-urlencoded` serializer]( +/// https://url.spec.whatwg.org/#concept-urlencoded-serializer). +pub struct Serializer<'a, T: Target> { + target: Option<T>, + start_position: usize, + encoding: EncodingOverride<'a>, +} + +pub trait Target { + fn as_mut_string(&mut self) -> &mut String; + fn finish(self) -> Self::Finished; + type Finished; +} + +impl Target for String { + fn as_mut_string(&mut self) -> &mut String { + self + } + fn finish(self) -> Self { + self + } + type Finished = Self; +} + +impl<'a> Target for &'a mut String { + fn as_mut_string(&mut self) -> &mut String { + &mut **self + } + fn finish(self) -> Self { + self + } + type Finished = Self; +} + +impl<'a, T: Target> Serializer<'a, T> { + /// Create a new `application/x-www-form-urlencoded` serializer for the given target. + /// + /// If the target is non-empty, + /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax. + pub fn new(target: T) -> Self { + Self::for_suffix(target, 0) + } + + /// Create a new `application/x-www-form-urlencoded` serializer + /// for a suffix of the given target. + /// + /// If that suffix is non-empty, + /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax. + pub fn for_suffix(mut target: T, start_position: usize) -> Self { + if target.as_mut_string().len() < start_position { + panic!( + "invalid length {} for target of length {}", + start_position, + target.as_mut_string().len() + ); + } + + Serializer { + target: Some(target), + start_position, + encoding: None, + } + } + + /// Remove any existing name/value pair. + /// + /// Panics if called after `.finish()`. + pub fn clear(&mut self) -> &mut Self { + string(&mut self.target).truncate(self.start_position); + self + } + + /// Set the character encoding to be used for names and values before percent-encoding. + pub fn encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self { + self.encoding = new; + self + } + + /// Serialize and append a name/value pair. + /// + /// Panics if called after `.finish()`. + pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self { + append_pair( + string(&mut self.target), + self.start_position, + self.encoding, + name, + value, + ); + self + } + + /// Serialize and append a name of parameter without any value. + /// + /// Panics if called after `.finish()`. + pub fn append_key_only(&mut self, name: &str) -> &mut Self { + append_key_only( + string(&mut self.target), + self.start_position, + self.encoding, + name, + ); + self + } + + /// Serialize and append a number of name/value pairs. + /// + /// This simply calls `append_pair` repeatedly. + /// This can be more convenient, so the user doesn’t need to introduce a block + /// to limit the scope of `Serializer`’s borrow of its string. + /// + /// Panics if called after `.finish()`. + pub fn extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self + where + I: IntoIterator, + I::Item: Borrow<(K, V)>, + K: AsRef<str>, + V: AsRef<str>, + { + { + let string = string(&mut self.target); + for pair in iter { + let &(ref k, ref v) = pair.borrow(); + append_pair( + string, + self.start_position, + self.encoding, + k.as_ref(), + v.as_ref(), + ); + } + } + self + } + + /// Serialize and append a number of names without values. + /// + /// This simply calls `append_key_only` repeatedly. + /// This can be more convenient, so the user doesn’t need to introduce a block + /// to limit the scope of `Serializer`’s borrow of its string. + /// + /// Panics if called after `.finish()`. + pub fn extend_keys_only<I, K>(&mut self, iter: I) -> &mut Self + where + I: IntoIterator, + I::Item: Borrow<K>, + K: AsRef<str>, + { + { + let string = string(&mut self.target); + for key in iter { + let k = key.borrow().as_ref(); + append_key_only(string, self.start_position, self.encoding, k); + } + } + self + } + + /// If this serializer was constructed with a string, take and return that string. + /// + /// ```rust + /// use form_urlencoded; + /// let encoded: String = form_urlencoded::Serializer::new(String::new()) + /// .append_pair("foo", "bar & baz") + /// .append_pair("saison", "Été+hiver") + /// .finish(); + /// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver"); + /// ``` + /// + /// Panics if called more than once. + pub fn finish(&mut self) -> T::Finished { + self.target + .take() + .expect("url::form_urlencoded::Serializer double finish") + .finish() + } +} + +fn append_separator_if_needed(string: &mut String, start_position: usize) { + if string.len() > start_position { + string.push('&') + } +} + +fn string<T: Target>(target: &mut Option<T>) -> &mut String { + target + .as_mut() + .expect("url::form_urlencoded::Serializer finished") + .as_mut_string() +} + +fn append_pair( + string: &mut String, + start_position: usize, + encoding: EncodingOverride<'_>, + name: &str, + value: &str, +) { + append_separator_if_needed(string, start_position); + append_encoded(name, string, encoding); + string.push('='); + append_encoded(value, string, encoding); +} + +fn append_key_only( + string: &mut String, + start_position: usize, + encoding: EncodingOverride, + name: &str, +) { + append_separator_if_needed(string, start_position); + append_encoded(name, string, encoding); +} + +fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride<'_>) { + string.extend(byte_serialize(&encode(encoding, s))) +} + +pub(crate) fn encode<'a>(encoding_override: EncodingOverride<'_>, input: &'a str) -> Cow<'a, [u8]> { + if let Some(o) = encoding_override { + return o(input); + } + input.as_bytes().into() +} + +pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> { + // Note: This function is duplicated in `percent_encoding/lib.rs`. + match input { + Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), + Cow::Owned(bytes) => { + match String::from_utf8_lossy(&bytes) { + Cow::Borrowed(utf8) => { + // If from_utf8_lossy returns a Cow::Borrowed, then we can + // be sure our original bytes were valid UTF-8. This is because + // if the bytes were invalid UTF-8 from_utf8_lossy would have + // to allocate a new owned string to back the Cow so it could + // replace invalid bytes with a placeholder. + + // First we do a debug_assert to confirm our description above. + let raw_utf8: *const [u8] = utf8.as_bytes(); + debug_assert!(raw_utf8 == &*bytes as *const [u8]); + + // Given we know the original input bytes are valid UTF-8, + // and we have ownership of those bytes, we re-use them and + // return a Cow::Owned here. + Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) }) + } + Cow::Owned(s) => Cow::Owned(s), + } + } + } +} + +pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>; |