#![doc(html_root_url = "https://docs.rs/httparse/1.3.3")] #![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(test, deny(warnings))] #![deny(missing_docs)] //! # httparse //! //! A push library for parsing HTTP/1.x requests and responses. //! //! The focus is on speed and safety. Unsafe code is used to keep parsing fast, //! but unsafety is contained in a submodule, with invariants enforced. The //! parsing internals use an `Iterator` instead of direct indexing, while //! skipping bounds checks. //! //! With Rust 1.27.0 or later, support for SIMD is enabled automatically. //! If building an executable to be run on multiple platforms, and thus //! not passing `target_feature` or `target_cpu` flags to the compiler, //! runtime detection can still detect SSE4.2 or AVX2 support to provide //! massive wins. //! //! If compiling for a specific target, remembering to include //! `-C target_cpu=native` allows the detection to become compile time checks, //! making it *even* faster. #[cfg(feature = "std")] extern crate std as core; use core::{fmt, result, str, slice}; use iter::Bytes; mod iter; #[macro_use] mod macros; mod simd; #[inline] fn shrink(slice: &mut &mut [T], len: usize) { debug_assert!(slice.len() >= len); let ptr = slice.as_mut_ptr(); *slice = unsafe { slice::from_raw_parts_mut(ptr, len) }; } /// Determines if byte is a token char. /// /// > ```notrust /// > token = 1*tchar /// > /// > tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" /// > / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" /// > / DIGIT / ALPHA /// > ; any VCHAR, except delimiters /// > ``` #[inline] fn is_token(b: u8) -> bool { b > 0x1F && b < 0x7F } // ASCII codes to accept URI string. // i.e. A-Z a-z 0-9 !#$%&'*+-._();:@=,/?[]~^ // TODO: Make a stricter checking for URI string? static URI_MAP: [bool; 256] = byte_map![ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // \0 \n 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // commands 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // \w ! " # $ % & ' ( ) * + , - . / 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // @ A B C D E F G H I J K L M N O 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // P Q R S T U V W X Y Z [ \ ] ^ _ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // ` a b c d e f g h i j k l m n o 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // p q r s t u v w x y z { | } ~ del // ====== Extended ASCII (aka. obs-text) ====== 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; #[inline] fn is_uri_token(b: u8) -> bool { URI_MAP[b as usize] } static HEADER_NAME_MAP: [bool; 256] = byte_map![ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; #[inline] fn is_header_name_token(b: u8) -> bool { HEADER_NAME_MAP[b as usize] } static HEADER_VALUE_MAP: [bool; 256] = byte_map![ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ]; #[inline] fn is_header_value_token(b: u8) -> bool { HEADER_VALUE_MAP[b as usize] } /// An error in parsing. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum Error { /// Invalid byte in header name. HeaderName, /// Invalid byte in header value. HeaderValue, /// Invalid byte in new line. NewLine, /// Invalid byte in Response status. Status, /// Invalid byte where token is required. Token, /// Parsed more headers than provided buffer can contain. TooManyHeaders, /// Invalid byte in HTTP version. Version, } impl Error { #[inline] fn description_str(&self) -> &'static str { match *self { Error::HeaderName => "invalid header name", Error::HeaderValue => "invalid header value", Error::NewLine => "invalid new line", Error::Status => "invalid response status", Error::Token => "invalid token", Error::TooManyHeaders => "too many headers", Error::Version => "invalid HTTP version", } } } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str(self.description_str()) } } #[cfg(feature = "std")] impl std::error::Error for Error { fn description(&self) -> &str { self.description_str() } } /// An error in parsing a chunk size. // Note: Move this into the error enum once v2.0 is released. #[derive(Debug, PartialEq, Eq)] pub struct InvalidChunkSize; impl fmt::Display for InvalidChunkSize { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str("invalid chunk size") } } /// A Result of any parsing action. /// /// If the input is invalid, an `Error` will be returned. Note that incomplete /// data is not considered invalid, and so will not return an error, but rather /// a `Ok(Status::Partial)`. pub type Result = result::Result, Error>; /// The result of a successful parse pass. /// /// `Complete` is used when the buffer contained the complete value. /// `Partial` is used when parsing did not reach the end of the expected value, /// but no invalid data was found. #[derive(Copy, Clone, PartialEq, Debug)] pub enum Status { /// The completed result. Complete(T), /// A partial result. Partial } impl Status { /// Convenience method to check if status is complete. #[inline] pub fn is_complete(&self) -> bool { match *self { Status::Complete(..) => true, Status::Partial => false } } /// Convenience method to check if status is partial. #[inline] pub fn is_partial(&self) -> bool { match *self { Status::Complete(..) => false, Status::Partial => true } } /// Convenience method to unwrap a Complete value. Panics if the status is /// `Partial`. #[inline] pub fn unwrap(self) -> T { match self { Status::Complete(t) => t, Status::Partial => panic!("Tried to unwrap Status::Partial") } } } /// A parsed Request. /// /// The optional values will be `None` if a parse was not complete, and did not /// parse the associated property. This allows you to inspect the parts that /// could be parsed, before reading more, in case you wish to exit early. /// /// # Example /// /// ```no_run /// let buf = b"GET /404 HTTP/1.1\r\nHost:"; /// let mut headers = [httparse::EMPTY_HEADER; 16]; /// let mut req = httparse::Request::new(&mut headers); /// let res = req.parse(buf).unwrap(); /// if res.is_partial() { /// match req.path { /// Some(ref path) => { /// // check router for path. /// // /404 doesn't exist? we could stop parsing /// }, /// None => { /// // must read more and parse again /// } /// } /// } /// ``` #[derive(Debug, PartialEq)] pub struct Request<'headers, 'buf: 'headers> { /// The request method, such as `GET`. pub method: Option<&'buf str>, /// The request path, such as `/about-us`. pub path: Option<&'buf str>, /// The request version, such as `HTTP/1.1`. pub version: Option, /// The request headers. pub headers: &'headers mut [Header<'buf>] } impl<'h, 'b> Request<'h, 'b> { /// Creates a new Request, using a slice of headers you allocate. #[inline] pub fn new(headers: &'h mut [Header<'b>]) -> Request<'h, 'b> { Request { method: None, path: None, version: None, headers: headers, } } /// Try to parse a buffer of bytes into the Request. pub fn parse(&mut self, buf: &'b [u8]) -> Result { let orig_len = buf.len(); let mut bytes = Bytes::new(buf); complete!(skip_empty_lines(&mut bytes)); self.method = Some(complete!(parse_token(&mut bytes))); self.path = Some(complete!(parse_uri(&mut bytes))); self.version = Some(complete!(parse_version(&mut bytes))); newline!(bytes); let len = orig_len - bytes.len(); let headers_len = complete!(parse_headers_iter(&mut self.headers, &mut bytes)); Ok(Status::Complete(len + headers_len)) } } #[inline] fn skip_empty_lines(bytes: &mut Bytes) -> Result<()> { loop { let b = bytes.peek(); match b { Some(b'\r') => { // there's `\r`, so it's safe to bump 1 pos unsafe { bytes.bump() }; expect!(bytes.next() == b'\n' => Err(Error::NewLine)); }, Some(b'\n') => { // there's `\n`, so it's safe to bump 1 pos unsafe { bytes.bump(); } }, Some(..) => { bytes.slice(); return Ok(Status::Complete(())); }, None => return Ok(Status::Partial) } } } /// A parsed Response. /// /// See `Request` docs for explanation of optional values. #[derive(Debug, PartialEq)] pub struct Response<'headers, 'buf: 'headers> { /// The response version, such as `HTTP/1.1`. pub version: Option, /// The response code, such as `200`. pub code: Option, /// The response reason-phrase, such as `OK`. pub reason: Option<&'buf str>, /// The response headers. pub headers: &'headers mut [Header<'buf>] } impl<'h, 'b> Response<'h, 'b> { /// Creates a new `Response` using a slice of `Header`s you have allocated. #[inline] pub fn new(headers: &'h mut [Header<'b>]) -> Response<'h, 'b> { Response { version: None, code: None, reason: None, headers: headers, } } /// Try to parse a buffer of bytes into this `Response`. pub fn parse(&mut self, buf: &'b [u8]) -> Result { let orig_len = buf.len(); let mut bytes = Bytes::new(buf); complete!(skip_empty_lines(&mut bytes)); self.version = Some(complete!(parse_version(&mut bytes))); space!(bytes or Error::Version); self.code = Some(complete!(parse_code(&mut bytes))); // RFC7230 says there must be 'SP' and then reason-phrase, but admits // its only for legacy reasons. With the reason-phrase completely // optional (and preferred to be omitted) in HTTP2, we'll just // handle any response that doesn't include a reason-phrase, because // it's more lenient, and we don't care anyways. // // So, a SP means parse a reason-phrase. // A newline means go to headers. // Anything else we'll say is a malformed status. match next!(bytes) { b' ' => { bytes.slice(); self.reason = Some(complete!(parse_reason(&mut bytes))); }, b'\r' => { expect!(bytes.next() == b'\n' => Err(Error::Status)); bytes.slice(); self.reason = Some(""); }, b'\n' => self.reason = Some(""), _ => return Err(Error::Status), } let len = orig_len - bytes.len(); let headers_len = complete!(parse_headers_iter(&mut self.headers, &mut bytes)); Ok(Status::Complete(len + headers_len)) } } /// Represents a parsed header. #[derive(Copy, Clone, PartialEq, Debug)] pub struct Header<'a> { /// The name portion of a header. /// /// A header name must be valid ASCII-US, so it's safe to store as a `&str`. pub name: &'a str, /// The value portion of a header. /// /// While headers **should** be ASCII-US, the specification allows for /// values that may not be, and so the value is stored as bytes. pub value: &'a [u8], } /// An empty header, useful for constructing a `Header` array to pass in for /// parsing. /// /// # Example /// /// ``` /// let headers = [httparse::EMPTY_HEADER; 64]; /// ``` pub const EMPTY_HEADER: Header<'static> = Header { name: "", value: b"" }; #[inline] fn parse_version(bytes: &mut Bytes) -> Result { if let Some(mut eight) = bytes.next_8() { expect!(eight._0() => b'H' |? Err(Error::Version)); expect!(eight._1() => b'T' |? Err(Error::Version)); expect!(eight._2() => b'T' |? Err(Error::Version)); expect!(eight._3() => b'P' |? Err(Error::Version)); expect!(eight._4() => b'/' |? Err(Error::Version)); expect!(eight._5() => b'1' |? Err(Error::Version)); expect!(eight._6() => b'.' |? Err(Error::Version)); let v = match eight._7() { b'0' => 0, b'1' => 1, _ => return Err(Error::Version) }; return Ok(Status::Complete(v)) } // else (but not in `else` because of borrow checker) // If there aren't at least 8 bytes, we still want to detect early // if this is a valid version or not. If it is, we'll return Partial. expect!(bytes.next() == b'H' => Err(Error::Version)); expect!(bytes.next() == b'T' => Err(Error::Version)); expect!(bytes.next() == b'T' => Err(Error::Version)); expect!(bytes.next() == b'P' => Err(Error::Version)); expect!(bytes.next() == b'/' => Err(Error::Version)); expect!(bytes.next() == b'1' => Err(Error::Version)); expect!(bytes.next() == b'.' => Err(Error::Version)); Ok(Status::Partial) } /// From [RFC 7230](https://tools.ietf.org/html/rfc7230): /// /// > ```notrust /// > reason-phrase = *( HTAB / SP / VCHAR / obs-text ) /// > HTAB = %x09 ; horizontal tab /// > VCHAR = %x21-7E ; visible (printing) characters /// > obs-text = %x80-FF /// > ``` /// /// > A.2. Changes from RFC 2616 /// > /// > Non-US-ASCII content in header fields and the reason phrase /// > has been obsoleted and made opaque (the TEXT rule was removed). /// /// Note that the following implementation deliberately rejects the obsoleted (non-US-ASCII) text range. /// /// The fully compliant parser should probably just return the reason-phrase as an opaque &[u8] data /// and leave interpretation to user or specialized helpers (akin to .display() in std::path::Path) #[inline] fn parse_reason<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> { loop { let b = next!(bytes); if b == b'\r' { expect!(bytes.next() == b'\n' => Err(Error::Status)); return Ok(Status::Complete(unsafe { // all bytes up till `i` must have been HTAB / SP / VCHAR str::from_utf8_unchecked(bytes.slice_skip(2)) })); } else if b == b'\n' { return Ok(Status::Complete(unsafe { // all bytes up till `i` must have been HTAB / SP / VCHAR str::from_utf8_unchecked(bytes.slice_skip(1)) })); } else if !((b >= 0x20 && b <= 0x7E) || b == b'\t') { return Err(Error::Status); } } } #[inline] fn parse_token<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> { loop { let b = next!(bytes); if b == b' ' { return Ok(Status::Complete(unsafe { // all bytes up till `i` must have been `is_token`. str::from_utf8_unchecked(bytes.slice_skip(1)) })); } else if !is_token(b) { return Err(Error::Token); } } } #[inline] fn parse_uri<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> { simd::match_uri_vectored(bytes); loop { let b = next!(bytes); if b == b' ' { return Ok(Status::Complete(unsafe { // all bytes up till `i` must have been `is_token`. str::from_utf8_unchecked(bytes.slice_skip(1)) })); } else if !is_uri_token(b) { return Err(Error::Token); } } } #[inline] fn parse_code(bytes: &mut Bytes) -> Result { let hundreds = expect!(bytes.next() == b'0'...b'9' => Err(Error::Status)); let tens = expect!(bytes.next() == b'0'...b'9' => Err(Error::Status)); let ones = expect!(bytes.next() == b'0'...b'9' => Err(Error::Status)); Ok(Status::Complete((hundreds - b'0') as u16 * 100 + (tens - b'0') as u16 * 10 + (ones - b'0') as u16)) } /// Parse a buffer of bytes as headers. /// /// The return value, if complete and successful, includes the index of the /// buffer that parsing stopped at, and a sliced reference to the parsed /// headers. The length of the slice will be equal to the number of properly /// parsed headers. /// /// # Example /// /// ``` /// let buf = b"Host: foo.bar\nAccept: */*\n\nblah blah"; /// let mut headers = [httparse::EMPTY_HEADER; 4]; /// assert_eq!(httparse::parse_headers(buf, &mut headers), /// Ok(httparse::Status::Complete((27, &[ /// httparse::Header { name: "Host", value: b"foo.bar" }, /// httparse::Header { name: "Accept", value: b"*/*" } /// ][..])))); /// ``` pub fn parse_headers<'b: 'h, 'h>(src: &'b [u8], mut dst: &'h mut [Header<'b>]) -> Result<(usize, &'h [Header<'b>])> { let mut iter = Bytes::new(src); let pos = complete!(parse_headers_iter(&mut dst, &mut iter)); Ok(Status::Complete((pos, dst))) } #[inline] fn parse_headers_iter<'a, 'b>(headers: &mut &mut [Header<'a>], bytes: &'b mut Bytes<'a>) -> Result { let mut num_headers: usize = 0; let mut count: usize = 0; let mut result = Err(Error::TooManyHeaders); { let mut iter = headers.iter_mut(); 'headers: loop { // a newline here means the head is over! let b = next!(bytes); if b == b'\r' { expect!(bytes.next() == b'\n' => Err(Error::NewLine)); result = Ok(Status::Complete(count + bytes.pos())); break; } else if b == b'\n' { result = Ok(Status::Complete(count + bytes.pos())); break; } else if !is_header_name_token(b) { return Err(Error::HeaderName); } let header = match iter.next() { Some(header) => header, None => break 'headers }; num_headers += 1; // parse header name until colon 'name: loop { let b = next!(bytes); if b == b':' { count += bytes.pos(); header.name = unsafe { str::from_utf8_unchecked(bytes.slice_skip(1)) }; break 'name; } else if !is_header_name_token(b) { return Err(Error::HeaderName); } } let mut b; 'value: loop { // eat white space between colon and value 'whitespace: loop { b = next!(bytes); if b == b' ' || b == b'\t' { count += bytes.pos(); bytes.slice(); continue 'whitespace; } else { if !is_header_value_token(b) { break 'value; } break 'whitespace; } } // parse value till EOL simd::match_header_value_vectored(bytes); macro_rules! check { ($bytes:ident, $i:ident) => ({ b = $bytes.$i(); if !is_header_value_token(b) { break 'value; } }); ($bytes:ident) => ({ check!($bytes, _0); check!($bytes, _1); check!($bytes, _2); check!($bytes, _3); check!($bytes, _4); check!($bytes, _5); check!($bytes, _6); check!($bytes, _7); }) } while let Some(mut bytes8) = bytes.next_8() { check!(bytes8); } loop { b = next!(bytes); if !is_header_value_token(b) { break 'value; } } } //found_ctl let value_slice : &[u8] = if b == b'\r' { expect!(bytes.next() == b'\n' => Err(Error::HeaderValue)); count += bytes.pos(); // having just check that `\r\n` exists, it's safe to skip those 2 bytes unsafe { bytes.slice_skip(2) } } else if b == b'\n' { count += bytes.pos(); // having just check that `\r\n` exists, it's safe to skip 1 byte unsafe { bytes.slice_skip(1) } } else { return Err(Error::HeaderValue); }; // trim trailing whitespace in the header if let Some(last_visible) = value_slice.iter().rposition(|b| *b != b' ' && *b != b'\t' ) { // There is at least one non-whitespace character. header.value = &value_slice[0..last_visible+1]; } else { // There is no non-whitespace character. This can only happen when value_slice is // empty. header.value = value_slice; } } } // drop iter shrink(headers, num_headers); result } /// Parse a buffer of bytes as a chunk size. /// /// The return value, if complete and successful, includes the index of the /// buffer that parsing stopped at, and the size of the following chunk. /// /// # Example /// /// ``` /// let buf = b"4\r\nRust\r\n0\r\n\r\n"; /// assert_eq!(httparse::parse_chunk_size(buf), /// Ok(httparse::Status::Complete((3, 4)))); /// ``` pub fn parse_chunk_size(buf: &[u8]) -> result::Result, InvalidChunkSize> { const RADIX: u64 = 16; let mut bytes = Bytes::new(buf); let mut size = 0; let mut in_chunk_size = true; let mut in_ext = false; let mut count = 0; loop { let b = next!(bytes); match b { b'0' ... b'9' if in_chunk_size => { if count > 15 { return Err(InvalidChunkSize); } count += 1; size *= RADIX; size += (b - b'0') as u64; }, b'a' ... b'f' if in_chunk_size => { if count > 15 { return Err(InvalidChunkSize); } count += 1; size *= RADIX; size += (b + 10 - b'a') as u64; } b'A' ... b'F' if in_chunk_size => { if count > 15 { return Err(InvalidChunkSize); } count += 1; size *= RADIX; size += (b + 10 - b'A') as u64; } b'\r' => { match next!(bytes) { b'\n' => break, _ => return Err(InvalidChunkSize), } } // If we weren't in the extension yet, the ";" signals its start b';' if !in_ext => { in_ext = true; in_chunk_size = false; } // "Linear white space" is ignored between the chunk size and the // extension separator token (";") due to the "implied *LWS rule". b'\t' | b' ' if !in_ext & !in_chunk_size => {} // LWS can follow the chunk size, but no more digits can come b'\t' | b' ' if in_chunk_size => in_chunk_size = false, // We allow any arbitrary octet once we are in the extension, since // they all get ignored anyway. According to the HTTP spec, valid // extensions would have a more strict syntax: // (token ["=" (token | quoted-string)]) // but we gain nothing by rejecting an otherwise valid chunk size. _ if in_ext => {} // Finally, if we aren't in the extension and we're reading any // other octet, the chunk size line is invalid! _ => return Err(InvalidChunkSize), } } Ok(Status::Complete((bytes.pos(), size))) } #[cfg(test)] mod tests { use super::{Request, Response, Status, EMPTY_HEADER, shrink, parse_chunk_size}; const NUM_OF_HEADERS: usize = 4; #[test] fn test_shrink() { let mut arr = [EMPTY_HEADER; 16]; { let slice = &mut &mut arr[..]; assert_eq!(slice.len(), 16); shrink(slice, 4); assert_eq!(slice.len(), 4); } assert_eq!(arr.len(), 16); } macro_rules! req { ($name:ident, $buf:expr, |$arg:ident| $body:expr) => ( req! {$name, $buf, Ok(Status::Complete($buf.len())), |$arg| $body } ); ($name:ident, $buf:expr, $len:expr, |$arg:ident| $body:expr) => ( #[test] fn $name() { let mut headers = [EMPTY_HEADER; NUM_OF_HEADERS]; let mut req = Request::new(&mut headers[..]); let status = req.parse($buf.as_ref()); assert_eq!(status, $len); closure(req); fn closure($arg: Request) { $body } } ) } req! { test_request_simple, b"GET / HTTP/1.1\r\n\r\n", |req| { assert_eq!(req.method.unwrap(), "GET"); assert_eq!(req.path.unwrap(), "/"); assert_eq!(req.version.unwrap(), 1); assert_eq!(req.headers.len(), 0); } } req! { test_request_simple_with_query_params, b"GET /thing?data=a HTTP/1.1\r\n\r\n", |req| { assert_eq!(req.method.unwrap(), "GET"); assert_eq!(req.path.unwrap(), "/thing?data=a"); assert_eq!(req.version.unwrap(), 1); assert_eq!(req.headers.len(), 0); } } req! { test_request_simple_with_whatwg_query_params, b"GET /thing?data=a^ HTTP/1.1\r\n\r\n", |req| { assert_eq!(req.method.unwrap(), "GET"); assert_eq!(req.path.unwrap(), "/thing?data=a^"); assert_eq!(req.version.unwrap(), 1); assert_eq!(req.headers.len(), 0); } } req! { test_request_headers, b"GET / HTTP/1.1\r\nHost: foo.com\r\nCookie: \r\n\r\n", |req| { assert_eq!(req.method.unwrap(), "GET"); assert_eq!(req.path.unwrap(), "/"); assert_eq!(req.version.unwrap(), 1); assert_eq!(req.headers.len(), 2); assert_eq!(req.headers[0].name, "Host"); assert_eq!(req.headers[0].value, b"foo.com"); assert_eq!(req.headers[1].name, "Cookie"); assert_eq!(req.headers[1].value, b""); } } req! { test_request_headers_optional_whitespace, b"GET / HTTP/1.1\r\nHost: \tfoo.com\t \r\nCookie: \t \r\n\r\n", |req| { assert_eq!(req.method.unwrap(), "GET"); assert_eq!(req.path.unwrap(), "/"); assert_eq!(req.version.unwrap(), 1); assert_eq!(req.headers.len(), 2); assert_eq!(req.headers[0].name, "Host"); assert_eq!(req.headers[0].value, b"foo.com"); assert_eq!(req.headers[1].name, "Cookie"); assert_eq!(req.headers[1].value, b""); } } req! { // test the scalar parsing test_request_header_value_htab_short, b"GET / HTTP/1.1\r\nUser-Agent: some\tagent\r\n\r\n", |req| { assert_eq!(req.method.unwrap(), "GET"); assert_eq!(req.path.unwrap(), "/"); assert_eq!(req.version.unwrap(), 1); assert_eq!(req.headers.len(), 1); assert_eq!(req.headers[0].name, "User-Agent"); assert_eq!(req.headers[0].value, b"some\tagent"); } } req! { // test the sse42 parsing test_request_header_value_htab_med, b"GET / HTTP/1.1\r\nUser-Agent: 1234567890some\tagent\r\n\r\n", |req| { assert_eq!(req.method.unwrap(), "GET"); assert_eq!(req.path.unwrap(), "/"); assert_eq!(req.version.unwrap(), 1); assert_eq!(req.headers.len(), 1); assert_eq!(req.headers[0].name, "User-Agent"); assert_eq!(req.headers[0].value, b"1234567890some\tagent"); } } req! { // test the avx2 parsing test_request_header_value_htab_long, b"GET / HTTP/1.1\r\nUser-Agent: 1234567890some\t1234567890agent1234567890\r\n\r\n", |req| { assert_eq!(req.method.unwrap(), "GET"); assert_eq!(req.path.unwrap(), "/"); assert_eq!(req.version.unwrap(), 1); assert_eq!(req.headers.len(), 1); assert_eq!(req.headers[0].name, "User-Agent"); assert_eq!(req.headers[0].value, &b"1234567890some\t1234567890agent1234567890"[..]); } } req! { test_request_headers_max, b"GET / HTTP/1.1\r\nA: A\r\nB: B\r\nC: C\r\nD: D\r\n\r\n", |req| { assert_eq!(req.headers.len(), NUM_OF_HEADERS); } } req! { test_request_multibyte, b"GET / HTTP/1.1\r\nHost: foo.com\r\nUser-Agent: \xe3\x81\xb2\xe3/1.0\r\n\r\n", |req| { assert_eq!(req.method.unwrap(), "GET"); assert_eq!(req.path.unwrap(), "/"); assert_eq!(req.version.unwrap(), 1); assert_eq!(req.headers[0].name, "Host"); assert_eq!(req.headers[0].value, b"foo.com"); assert_eq!(req.headers[1].name, "User-Agent"); assert_eq!(req.headers[1].value, b"\xe3\x81\xb2\xe3/1.0"); } } req! { test_request_partial, b"GET / HTTP/1.1\r\n\r", Ok(Status::Partial), |_req| {} } req! { test_request_partial_version, b"GET / HTTP/1.", Ok(Status::Partial), |_req| {} } req! { test_request_newlines, b"GET / HTTP/1.1\nHost: foo.bar\n\n", |_r| {} } req! { test_request_empty_lines_prefix, b"\r\n\r\nGET / HTTP/1.1\r\n\r\n", |req| { assert_eq!(req.method.unwrap(), "GET"); assert_eq!(req.path.unwrap(), "/"); assert_eq!(req.version.unwrap(), 1); assert_eq!(req.headers.len(), 0); } } req! { test_request_empty_lines_prefix_lf_only, b"\n\nGET / HTTP/1.1\n\n", |req| { assert_eq!(req.method.unwrap(), "GET"); assert_eq!(req.path.unwrap(), "/"); assert_eq!(req.version.unwrap(), 1); assert_eq!(req.headers.len(), 0); } } req! { test_request_with_invalid_token_delimiter, b"GET\n/ HTTP/1.1\r\nHost: foo.bar\r\n\r\n", Err(::Error::Token), |_r| {} } req! { test_request_with_invalid_but_short_version, b"GET / HTTP/1!", Err(::Error::Version), |_r| {} } macro_rules! res { ($name:ident, $buf:expr, |$arg:ident| $body:expr) => ( res! {$name, $buf, Ok(Status::Complete($buf.len())), |$arg| $body } ); ($name:ident, $buf:expr, $len:expr, |$arg:ident| $body:expr) => ( #[test] fn $name() { let mut headers = [EMPTY_HEADER; NUM_OF_HEADERS]; let mut res = Response::new(&mut headers[..]); let status = res.parse($buf.as_ref()); assert_eq!(status, $len); closure(res); fn closure($arg: Response) { $body } } ) } res! { test_response_simple, b"HTTP/1.1 200 OK\r\n\r\n", |res| { assert_eq!(res.version.unwrap(), 1); assert_eq!(res.code.unwrap(), 200); assert_eq!(res.reason.unwrap(), "OK"); } } res! { test_response_newlines, b"HTTP/1.0 403 Forbidden\nServer: foo.bar\n\n", |_r| {} } res! { test_response_reason_missing, b"HTTP/1.1 200 \r\n\r\n", |res| { assert_eq!(res.version.unwrap(), 1); assert_eq!(res.code.unwrap(), 200); assert_eq!(res.reason.unwrap(), ""); } } res! { test_response_reason_missing_no_space, b"HTTP/1.1 200\r\n\r\n", |res| { assert_eq!(res.version.unwrap(), 1); assert_eq!(res.code.unwrap(), 200); assert_eq!(res.reason.unwrap(), ""); } } res! { test_response_reason_missing_no_space_with_headers, b"HTTP/1.1 200\r\nFoo: bar\r\n\r\n", |res| { assert_eq!(res.version.unwrap(), 1); assert_eq!(res.code.unwrap(), 200); assert_eq!(res.reason.unwrap(), ""); assert_eq!(res.headers.len(), 1); assert_eq!(res.headers[0].name, "Foo"); assert_eq!(res.headers[0].value, b"bar"); } } res! { test_response_reason_with_space_and_tab, b"HTTP/1.1 101 Switching Protocols\t\r\n\r\n", |res| { assert_eq!(res.version.unwrap(), 1); assert_eq!(res.code.unwrap(), 101); assert_eq!(res.reason.unwrap(), "Switching Protocols\t"); } } static RESPONSE_REASON_WITH_OBS_TEXT_BYTE: &'static [u8] = b"HTTP/1.1 200 X\xFFZ\r\n\r\n"; res! { test_response_reason_with_obsolete_text_byte, RESPONSE_REASON_WITH_OBS_TEXT_BYTE, Err(::Error::Status), |_res| {} } res! { test_response_reason_with_nul_byte, b"HTTP/1.1 200 \x00\r\n\r\n", Err(::Error::Status), |_res| {} } res! { test_response_version_missing_space, b"HTTP/1.1", Ok(Status::Partial), |_res| {} } res! { test_response_code_missing_space, b"HTTP/1.1 200", Ok(Status::Partial), |_res| {} } res! { test_response_empty_lines_prefix_lf_only, b"\n\nHTTP/1.1 200 OK\n\n", |_res| {} } #[test] fn test_chunk_size() { assert_eq!(parse_chunk_size(b"0\r\n"), Ok(Status::Complete((3, 0)))); assert_eq!(parse_chunk_size(b"12\r\nchunk"), Ok(Status::Complete((4, 18)))); assert_eq!(parse_chunk_size(b"3086d\r\n"), Ok(Status::Complete((7, 198765)))); assert_eq!(parse_chunk_size(b"3735AB1;foo bar*\r\n"), Ok(Status::Complete((18, 57891505)))); assert_eq!(parse_chunk_size(b"3735ab1 ; baz \r\n"), Ok(Status::Complete((16, 57891505)))); assert_eq!(parse_chunk_size(b"77a65\r"), Ok(Status::Partial)); assert_eq!(parse_chunk_size(b"ab"), Ok(Status::Partial)); assert_eq!(parse_chunk_size(b"567f8a\rfoo"), Err(::InvalidChunkSize)); assert_eq!(parse_chunk_size(b"567f8a\rfoo"), Err(::InvalidChunkSize)); assert_eq!(parse_chunk_size(b"567xf8a\r\n"), Err(::InvalidChunkSize)); assert_eq!(parse_chunk_size(b"ffffffffffffffff\r\n"), Ok(Status::Complete((18, ::core::u64::MAX)))); assert_eq!(parse_chunk_size(b"1ffffffffffffffff\r\n"), Err(::InvalidChunkSize)); assert_eq!(parse_chunk_size(b"Affffffffffffffff\r\n"), Err(::InvalidChunkSize)); assert_eq!(parse_chunk_size(b"fffffffffffffffff\r\n"), Err(::InvalidChunkSize)); } #[cfg(feature = "std")] #[test] fn test_std_error() { use super::Error; use std::error::Error as StdError; let err = Error::HeaderName; assert_eq!(err.to_string(), err.description()); } }