summaryrefslogtreecommitdiffstats
path: root/third_party/rust/mime/src/parse.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/mime/src/parse.rs')
-rw-r--r--third_party/rust/mime/src/parse.rs363
1 files changed, 363 insertions, 0 deletions
diff --git a/third_party/rust/mime/src/parse.rs b/third_party/rust/mime/src/parse.rs
new file mode 100644
index 0000000000..d55e5494cb
--- /dev/null
+++ b/third_party/rust/mime/src/parse.rs
@@ -0,0 +1,363 @@
+#[allow(unused, deprecated)]
+use std::ascii::AsciiExt;
+use std::error::Error;
+use std::fmt;
+use std::iter::Enumerate;
+use std::str::Bytes;
+
+use super::{Mime, Source, ParamSource, Indexed, CHARSET, UTF_8};
+
+#[derive(Debug)]
+pub enum ParseError {
+ MissingSlash,
+ MissingEqual,
+ MissingQuote,
+ InvalidToken {
+ pos: usize,
+ byte: u8,
+ },
+}
+
+impl ParseError {
+ fn s(&self) -> &str {
+ use self::ParseError::*;
+
+ match *self {
+ MissingSlash => "a slash (/) was missing between the type and subtype",
+ MissingEqual => "an equals sign (=) was missing between a parameter and its value",
+ MissingQuote => "a quote (\") was missing from a parameter value",
+ InvalidToken { .. } => "an invalid token was encountered",
+ }
+ }
+}
+
+impl fmt::Display for ParseError {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ if let ParseError::InvalidToken { pos, byte } = *self {
+ write!(f, "{}, {:X} at position {}", self.s(), byte, pos)
+ } else {
+ f.write_str(self.s())
+ }
+ }
+}
+
+impl Error for ParseError {
+ // Minimum Rust is 1.15, Error::description was still required then
+ #[allow(deprecated)]
+ fn description(&self) -> &str {
+ self.s()
+ }
+}
+
+pub fn parse(s: &str) -> Result<Mime, ParseError> {
+ if s == "*/*" {
+ return Ok(::STAR_STAR);
+ }
+
+ let mut iter = s.bytes().enumerate();
+ // toplevel
+ let mut start;
+ let slash;
+ loop {
+ match iter.next() {
+ Some((_, c)) if is_token(c) => (),
+ Some((i, b'/')) if i > 0 => {
+ slash = i;
+ start = i + 1;
+ break;
+ },
+ None => return Err(ParseError::MissingSlash), // EOF and no toplevel is no Mime
+ Some((pos, byte)) => return Err(ParseError::InvalidToken {
+ pos: pos,
+ byte: byte,
+ })
+ };
+
+ }
+
+ // sublevel
+ let mut plus = None;
+ loop {
+ match iter.next() {
+ Some((i, b'+')) if i > start => {
+ plus = Some(i);
+ },
+ Some((i, b';')) if i > start => {
+ start = i;
+ break;
+ },
+ Some((_, c)) if is_token(c) => (),
+ None => {
+ return Ok(Mime {
+ source: Source::Dynamic(s.to_ascii_lowercase()),
+ slash: slash,
+ plus: plus,
+ params: ParamSource::None,
+ });
+ },
+ Some((pos, byte)) => return Err(ParseError::InvalidToken {
+ pos: pos,
+ byte: byte,
+ })
+ };
+ }
+
+ // params
+ let params = params_from_str(s, &mut iter, start)?;
+
+ let src = match params {
+ ParamSource::Utf8(_) => s.to_ascii_lowercase(),
+ ParamSource::Custom(semicolon, ref indices) => lower_ascii_with_params(s, semicolon, indices),
+ ParamSource::None => {
+ // Chop off the empty list
+ s[..start].to_ascii_lowercase()
+ }
+ };
+
+ Ok(Mime {
+ source: Source::Dynamic(src),
+ slash: slash,
+ plus: plus,
+ params: params,
+ })
+}
+
+
+fn params_from_str(s: &str, iter: &mut Enumerate<Bytes>, mut start: usize) -> Result<ParamSource, ParseError> {
+ let semicolon = start;
+ start += 1;
+ let mut params = ParamSource::None;
+ 'params: while start < s.len() {
+ let name;
+ // name
+ 'name: loop {
+ match iter.next() {
+ Some((i, b' ')) if i == start => {
+ start = i + 1;
+ continue 'params;
+ },
+ Some((_, c)) if is_token(c) => (),
+ Some((i, b'=')) if i > start => {
+ name = Indexed(start, i);
+ start = i + 1;
+ break 'name;
+ },
+ None => return Err(ParseError::MissingEqual),
+ Some((pos, byte)) => return Err(ParseError::InvalidToken {
+ pos: pos,
+ byte: byte,
+ }),
+ }
+ }
+
+ let value;
+ // values must be restrict-name-char or "anything goes"
+ let mut is_quoted = false;
+
+ 'value: loop {
+ if is_quoted {
+ match iter.next() {
+ Some((i, b'"')) if i > start => {
+ value = Indexed(start, i);
+ break 'value;
+ },
+ Some((_, c)) if is_restricted_quoted_char(c) => (),
+ None => return Err(ParseError::MissingQuote),
+ Some((pos, byte)) => return Err(ParseError::InvalidToken {
+ pos: pos,
+ byte: byte,
+ }),
+ }
+ } else {
+ match iter.next() {
+ Some((i, b'"')) if i == start => {
+ is_quoted = true;
+ start = i + 1;
+ },
+ Some((_, c)) if is_token(c) => (),
+ Some((i, b';')) if i > start => {
+ value = Indexed(start, i);
+ start = i + 1;
+ break 'value;
+ }
+ None => {
+ value = Indexed(start, s.len());
+ start = s.len();
+ break 'value;
+ },
+
+ Some((pos, byte)) => return Err(ParseError::InvalidToken {
+ pos: pos,
+ byte: byte,
+ }),
+ }
+ }
+ }
+
+ if is_quoted {
+ 'ws: loop {
+ match iter.next() {
+ Some((i, b';')) => {
+ // next param
+ start = i + 1;
+ break 'ws;
+ },
+ Some((_, b' ')) => {
+ // skip whitespace
+ },
+ None => {
+ // eof
+ start = s.len();
+ break 'ws;
+ },
+ Some((pos, byte)) => return Err(ParseError::InvalidToken {
+ pos: pos,
+ byte: byte,
+ }),
+ }
+ }
+ }
+
+ match params {
+ ParamSource::Utf8(i) => {
+ let i = i + 2;
+ let charset = Indexed(i, "charset".len() + i);
+ let utf8 = Indexed(charset.1 + 1, charset.1 + "utf-8".len() + 1);
+ params = ParamSource::Custom(semicolon, vec![
+ (charset, utf8),
+ (name, value),
+ ]);
+ },
+ ParamSource::Custom(_, ref mut vec) => {
+ vec.push((name, value));
+ },
+ ParamSource::None => {
+ if semicolon + 2 == name.0 && CHARSET == &s[name.0..name.1] {
+ if UTF_8 == &s[value.0..value.1] {
+ params = ParamSource::Utf8(semicolon);
+ continue 'params;
+ }
+ }
+ params = ParamSource::Custom(semicolon, vec![(name, value)]);
+ },
+ }
+ }
+ Ok(params)
+}
+
+fn lower_ascii_with_params(s: &str, semi: usize, params: &[(Indexed, Indexed)]) -> String {
+ let mut owned = s.to_owned();
+ owned[..semi].make_ascii_lowercase();
+
+ for &(ref name, ref value) in params {
+ owned[name.0..name.1].make_ascii_lowercase();
+ // Since we just converted this part of the string to lowercase,
+ // we can skip the `Name == &str` unicase check and do a faster
+ // memcmp instead.
+ if &owned[name.0..name.1] == CHARSET.source {
+ owned[value.0..value.1].make_ascii_lowercase();
+ }
+ }
+
+ owned
+}
+
+// From [RFC6838](http://tools.ietf.org/html/rfc6838#section-4.2):
+//
+// > All registered media types MUST be assigned top-level type and
+// > subtype names. The combination of these names serves to uniquely
+// > identify the media type, and the subtype name facet (or the absence
+// > of one) identifies the registration tree. Both top-level type and
+// > subtype names are case-insensitive.
+// >
+// > Type and subtype names MUST conform to the following ABNF:
+// >
+// > type-name = restricted-name
+// > subtype-name = restricted-name
+// >
+// > restricted-name = restricted-name-first *126restricted-name-chars
+// > restricted-name-first = ALPHA / DIGIT
+// > restricted-name-chars = ALPHA / DIGIT / "!" / "#" /
+// > "$" / "&" / "-" / "^" / "_"
+// > restricted-name-chars =/ "." ; Characters before first dot always
+// > ; specify a facet name
+// > restricted-name-chars =/ "+" ; Characters after last plus always
+// > ; specify a structured syntax suffix
+
+// However, [HTTP](https://tools.ietf.org/html/rfc7231#section-3.1.1.1):
+//
+// > media-type = type "/" subtype *( OWS ";" OWS parameter )
+// > type = token
+// > subtype = token
+// > parameter = token "=" ( token / quoted-string )
+//
+// Where token is defined as:
+//
+// > token = 1*tchar
+// > tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
+// > "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
+//
+// So, clearly, ¯\_(Ä_/¯
+
+macro_rules! byte_map {
+ ($($flag:expr,)*) => ([
+ $($flag != 0,)*
+ ])
+}
+
+static TOKEN_MAP: [bool; 256] = byte_map![
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+];
+
+fn is_token(c: u8) -> bool {
+ TOKEN_MAP[c as usize]
+}
+
+fn is_restricted_quoted_char(c: u8) -> bool {
+ c > 31 && c != 127
+}
+
+#[test]
+#[allow(warnings)] // ... ranges deprecated
+fn test_lookup_tables() {
+ for (i, &valid) in TOKEN_MAP.iter().enumerate() {
+ let i = i as u8;
+ let should = match i {
+ b'a'...b'z' |
+ b'A'...b'Z' |
+ b'0'...b'9' |
+ b'!' |
+ b'#' |
+ b'$' |
+ b'%' |
+ b'&' |
+ b'\'' |
+ b'*' |
+ b'+' |
+ b'-' |
+ b'.' |
+ b'^' |
+ b'_' |
+ b'`' |
+ b'|' |
+ b'~' => true,
+ _ => false
+ };
+ assert_eq!(valid, should, "{:?} ({}) should be {}", i as char, i, should);
+ }
+}