diff options
Diffstat (limited to 'third_party/rust/httparse/src')
-rw-r--r-- | third_party/rust/httparse/src/iter.rs | 156 | ||||
-rw-r--r-- | third_party/rust/httparse/src/lib.rs | 1138 | ||||
-rw-r--r-- | third_party/rust/httparse/src/macros.rs | 59 | ||||
-rw-r--r-- | third_party/rust/httparse/src/simd/avx2.rs | 116 | ||||
-rw-r--r-- | third_party/rust/httparse/src/simd/fallback.rs | 8 | ||||
-rw-r--r-- | third_party/rust/httparse/src/simd/mod.rs | 238 | ||||
-rw-r--r-- | third_party/rust/httparse/src/simd/sse42.rs | 84 |
7 files changed, 1799 insertions, 0 deletions
diff --git a/third_party/rust/httparse/src/iter.rs b/third_party/rust/httparse/src/iter.rs new file mode 100644 index 0000000000..ca2767a1df --- /dev/null +++ b/third_party/rust/httparse/src/iter.rs @@ -0,0 +1,156 @@ +use core::slice; + +pub struct Bytes<'a> { + slice: &'a [u8], + pos: usize +} + +impl<'a> Bytes<'a> { + #[inline] + pub fn new(slice: &'a [u8]) -> Bytes<'a> { + Bytes { + slice: slice, + pos: 0 + } + } + + #[inline] + pub fn pos(&self) -> usize { + self.pos + } + + #[inline] + pub fn peek(&self) -> Option<u8> { + self.slice.get(self.pos).cloned() + } + + #[inline] + pub unsafe fn bump(&mut self) { + debug_assert!(self.pos + 1 <= self.slice.len(), "overflow"); + self.pos += 1; + } + + #[allow(unused)] + #[inline] + pub unsafe fn advance(&mut self, n: usize) { + debug_assert!(self.pos + n <= self.slice.len(), "overflow"); + self.pos += n; + } + + #[inline] + pub fn len(&self) -> usize { + self.slice.len() + } + + #[inline] + pub fn slice(&mut self) -> &'a [u8] { + // not moving position at all, so it's safe + unsafe { + self.slice_skip(0) + } + } + + #[inline] + pub unsafe fn slice_skip(&mut self, skip: usize) -> &'a [u8] { + debug_assert!(self.pos >= skip); + let head_pos = self.pos - skip; + let ptr = self.slice.as_ptr(); + let head = slice::from_raw_parts(ptr, head_pos); + let tail = slice::from_raw_parts(ptr.offset(self.pos as isize), self.slice.len() - self.pos); + self.pos = 0; + self.slice = tail; + head + } + + #[inline] + pub fn next_8<'b>(&'b mut self) -> Option<Bytes8<'b, 'a>> { + if self.slice.len() > self.pos + 8 { + Some(Bytes8::new(self)) + } else { + None + } + } +} + +impl<'a> AsRef<[u8]> for Bytes<'a> { + #[inline] + fn as_ref(&self) -> &[u8] { + &self.slice[self.pos..] + } +} + +impl<'a> Iterator for Bytes<'a> { + type Item = u8; + + #[inline] + fn next(&mut self) -> Option<u8> { + if self.slice.len() > self.pos { + let b = unsafe { *self.slice.get_unchecked(self.pos) }; + self.pos += 1; + Some(b) + } else { + None + } + } +} + +pub struct Bytes8<'a, 'b: 'a> { + bytes: &'a mut Bytes<'b>, + #[cfg(debug_assertions)] + pos: usize +} + +macro_rules! bytes8_methods { + ($f:ident, $pos:expr) => { + #[inline] + pub fn $f(&mut self) -> u8 { + self.assert_pos($pos); + let b = unsafe { *self.bytes.slice.get_unchecked(self.bytes.pos) }; + self.bytes.pos += 1; + b + } + }; + () => { + bytes8_methods!(_0, 0); + bytes8_methods!(_1, 1); + bytes8_methods!(_2, 2); + bytes8_methods!(_3, 3); + bytes8_methods!(_4, 4); + bytes8_methods!(_5, 5); + bytes8_methods!(_6, 6); + bytes8_methods!(_7, 7); + } +} + +impl<'a, 'b: 'a> Bytes8<'a, 'b> { + bytes8_methods! {} + + #[cfg(not(debug_assertions))] + #[inline] + fn new(bytes: &'a mut Bytes<'b>) -> Bytes8<'a, 'b> { + Bytes8 { + bytes: bytes, + } + } + + #[cfg(debug_assertions)] + #[inline] + fn new(bytes: &'a mut Bytes<'b>) -> Bytes8<'a, 'b> { + Bytes8 { + bytes: bytes, + pos: 0, + } + } + + #[cfg(not(debug_assertions))] + #[inline] + fn assert_pos(&mut self, _pos: usize) { + } + + #[cfg(debug_assertions)] + #[inline] + fn assert_pos(&mut self, pos: usize) { + assert!(self.pos == pos); + self.pos += 1; + } +} diff --git a/third_party/rust/httparse/src/lib.rs b/third_party/rust/httparse/src/lib.rs new file mode 100644 index 0000000000..99e9e09953 --- /dev/null +++ b/third_party/rust/httparse/src/lib.rs @@ -0,0 +1,1138 @@ +#![doc(html_root_url = "https://docs.rs/httparse/1.3.3")] +#![cfg_attr(not(feature = "std"), no_std)] +#![cfg_attr(test, deny(warnings))] +#![deny(missing_docs)] + +//! # httparse +//! +//! A push library for parsing HTTP/1.x requests and responses. +//! +//! The focus is on speed and safety. Unsafe code is used to keep parsing fast, +//! but unsafety is contained in a submodule, with invariants enforced. The +//! parsing internals use an `Iterator` instead of direct indexing, while +//! skipping bounds checks. +//! +//! With Rust 1.27.0 or later, support for SIMD is enabled automatically. +//! If building an executable to be run on multiple platforms, and thus +//! not passing `target_feature` or `target_cpu` flags to the compiler, +//! runtime detection can still detect SSE4.2 or AVX2 support to provide +//! massive wins. +//! +//! If compiling for a specific target, remembering to include +//! `-C target_cpu=native` allows the detection to become compile time checks, +//! making it *even* faster. +#[cfg(feature = "std")] +extern crate std as core; + +use core::{fmt, result, str, slice}; + +use iter::Bytes; + +mod iter; +#[macro_use] mod macros; +mod simd; + +#[inline] +fn shrink<T>(slice: &mut &mut [T], len: usize) { + debug_assert!(slice.len() >= len); + let ptr = slice.as_mut_ptr(); + *slice = unsafe { slice::from_raw_parts_mut(ptr, len) }; +} + +/// Determines if byte is a token char. +/// +/// > ```notrust +/// > token = 1*tchar +/// > +/// > tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" +/// > / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" +/// > / DIGIT / ALPHA +/// > ; any VCHAR, except delimiters +/// > ``` +#[inline] +fn is_token(b: u8) -> bool { + b > 0x1F && b < 0x7F +} + +// ASCII codes to accept URI string. +// i.e. A-Z a-z 0-9 !#$%&'*+-._();:@=,/?[]~^ +// TODO: Make a stricter checking for URI string? +static URI_MAP: [bool; 256] = byte_map![ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +// \0 \n + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +// commands + 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +// \w ! " # $ % & ' ( ) * + , - . / + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, +// 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +// @ A B C D E F G H I J K L M N O + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, +// P Q R S T U V W X Y Z [ \ ] ^ _ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +// ` a b c d e f g h i j k l m n o + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, +// p q r s t u v w x y z { | } ~ del +// ====== Extended ASCII (aka. obs-text) ====== + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +]; + +#[inline] +fn is_uri_token(b: u8) -> bool { + URI_MAP[b as usize] +} + +static HEADER_NAME_MAP: [bool; 256] = byte_map![ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +]; + +#[inline] +fn is_header_name_token(b: u8) -> bool { + HEADER_NAME_MAP[b as usize] +} + +static HEADER_VALUE_MAP: [bool; 256] = byte_map![ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +]; + + +#[inline] +fn is_header_value_token(b: u8) -> bool { + HEADER_VALUE_MAP[b as usize] +} + +/// An error in parsing. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum Error { + /// Invalid byte in header name. + HeaderName, + /// Invalid byte in header value. + HeaderValue, + /// Invalid byte in new line. + NewLine, + /// Invalid byte in Response status. + Status, + /// Invalid byte where token is required. + Token, + /// Parsed more headers than provided buffer can contain. + TooManyHeaders, + /// Invalid byte in HTTP version. + Version, +} + +impl Error { + #[inline] + fn description_str(&self) -> &'static str { + match *self { + Error::HeaderName => "invalid header name", + Error::HeaderValue => "invalid header value", + Error::NewLine => "invalid new line", + Error::Status => "invalid response status", + Error::Token => "invalid token", + Error::TooManyHeaders => "too many headers", + Error::Version => "invalid HTTP version", + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(self.description_str()) + } +} + +#[cfg(feature = "std")] +impl std::error::Error for Error { + fn description(&self) -> &str { + self.description_str() + } +} + +/// An error in parsing a chunk size. +// Note: Move this into the error enum once v2.0 is released. +#[derive(Debug, PartialEq, Eq)] +pub struct InvalidChunkSize; + +impl fmt::Display for InvalidChunkSize { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("invalid chunk size") + } +} + +/// A Result of any parsing action. +/// +/// If the input is invalid, an `Error` will be returned. Note that incomplete +/// data is not considered invalid, and so will not return an error, but rather +/// a `Ok(Status::Partial)`. +pub type Result<T> = result::Result<Status<T>, Error>; + +/// The result of a successful parse pass. +/// +/// `Complete` is used when the buffer contained the complete value. +/// `Partial` is used when parsing did not reach the end of the expected value, +/// but no invalid data was found. +#[derive(Copy, Clone, PartialEq, Debug)] +pub enum Status<T> { + /// The completed result. + Complete(T), + /// A partial result. + Partial +} + +impl<T> Status<T> { + /// Convenience method to check if status is complete. + #[inline] + pub fn is_complete(&self) -> bool { + match *self { + Status::Complete(..) => true, + Status::Partial => false + } + } + + /// Convenience method to check if status is partial. + #[inline] + pub fn is_partial(&self) -> bool { + match *self { + Status::Complete(..) => false, + Status::Partial => true + } + } + + /// Convenience method to unwrap a Complete value. Panics if the status is + /// `Partial`. + #[inline] + pub fn unwrap(self) -> T { + match self { + Status::Complete(t) => t, + Status::Partial => panic!("Tried to unwrap Status::Partial") + } + } +} + +/// A parsed Request. +/// +/// The optional values will be `None` if a parse was not complete, and did not +/// parse the associated property. This allows you to inspect the parts that +/// could be parsed, before reading more, in case you wish to exit early. +/// +/// # Example +/// +/// ```no_run +/// let buf = b"GET /404 HTTP/1.1\r\nHost:"; +/// let mut headers = [httparse::EMPTY_HEADER; 16]; +/// let mut req = httparse::Request::new(&mut headers); +/// let res = req.parse(buf).unwrap(); +/// if res.is_partial() { +/// match req.path { +/// Some(ref path) => { +/// // check router for path. +/// // /404 doesn't exist? we could stop parsing +/// }, +/// None => { +/// // must read more and parse again +/// } +/// } +/// } +/// ``` +#[derive(Debug, PartialEq)] +pub struct Request<'headers, 'buf: 'headers> { + /// The request method, such as `GET`. + pub method: Option<&'buf str>, + /// The request path, such as `/about-us`. + pub path: Option<&'buf str>, + /// The request version, such as `HTTP/1.1`. + pub version: Option<u8>, + /// The request headers. + pub headers: &'headers mut [Header<'buf>] +} + +impl<'h, 'b> Request<'h, 'b> { + /// Creates a new Request, using a slice of headers you allocate. + #[inline] + pub fn new(headers: &'h mut [Header<'b>]) -> Request<'h, 'b> { + Request { + method: None, + path: None, + version: None, + headers: headers, + } + } + + /// Try to parse a buffer of bytes into the Request. + pub fn parse(&mut self, buf: &'b [u8]) -> Result<usize> { + let orig_len = buf.len(); + let mut bytes = Bytes::new(buf); + complete!(skip_empty_lines(&mut bytes)); + self.method = Some(complete!(parse_token(&mut bytes))); + self.path = Some(complete!(parse_uri(&mut bytes))); + self.version = Some(complete!(parse_version(&mut bytes))); + newline!(bytes); + + let len = orig_len - bytes.len(); + let headers_len = complete!(parse_headers_iter(&mut self.headers, &mut bytes)); + + Ok(Status::Complete(len + headers_len)) + } +} + +#[inline] +fn skip_empty_lines(bytes: &mut Bytes) -> Result<()> { + loop { + let b = bytes.peek(); + match b { + Some(b'\r') => { + // there's `\r`, so it's safe to bump 1 pos + unsafe { bytes.bump() }; + expect!(bytes.next() == b'\n' => Err(Error::NewLine)); + }, + Some(b'\n') => { + // there's `\n`, so it's safe to bump 1 pos + unsafe { bytes.bump(); } + }, + Some(..) => { + bytes.slice(); + return Ok(Status::Complete(())); + }, + None => return Ok(Status::Partial) + } + } +} + +/// A parsed Response. +/// +/// See `Request` docs for explanation of optional values. +#[derive(Debug, PartialEq)] +pub struct Response<'headers, 'buf: 'headers> { + /// The response version, such as `HTTP/1.1`. + pub version: Option<u8>, + /// The response code, such as `200`. + pub code: Option<u16>, + /// The response reason-phrase, such as `OK`. + pub reason: Option<&'buf str>, + /// The response headers. + pub headers: &'headers mut [Header<'buf>] +} + +impl<'h, 'b> Response<'h, 'b> { + /// Creates a new `Response` using a slice of `Header`s you have allocated. + #[inline] + pub fn new(headers: &'h mut [Header<'b>]) -> Response<'h, 'b> { + Response { + version: None, + code: None, + reason: None, + headers: headers, + } + } + + /// Try to parse a buffer of bytes into this `Response`. + pub fn parse(&mut self, buf: &'b [u8]) -> Result<usize> { + let orig_len = buf.len(); + let mut bytes = Bytes::new(buf); + + complete!(skip_empty_lines(&mut bytes)); + self.version = Some(complete!(parse_version(&mut bytes))); + space!(bytes or Error::Version); + self.code = Some(complete!(parse_code(&mut bytes))); + + // RFC7230 says there must be 'SP' and then reason-phrase, but admits + // its only for legacy reasons. With the reason-phrase completely + // optional (and preferred to be omitted) in HTTP2, we'll just + // handle any response that doesn't include a reason-phrase, because + // it's more lenient, and we don't care anyways. + // + // So, a SP means parse a reason-phrase. + // A newline means go to headers. + // Anything else we'll say is a malformed status. + match next!(bytes) { + b' ' => { + bytes.slice(); + self.reason = Some(complete!(parse_reason(&mut bytes))); + }, + b'\r' => { + expect!(bytes.next() == b'\n' => Err(Error::Status)); + bytes.slice(); + self.reason = Some(""); + }, + b'\n' => self.reason = Some(""), + _ => return Err(Error::Status), + } + + + let len = orig_len - bytes.len(); + let headers_len = complete!(parse_headers_iter(&mut self.headers, &mut bytes)); + Ok(Status::Complete(len + headers_len)) + } +} + +/// Represents a parsed header. +#[derive(Copy, Clone, PartialEq, Debug)] +pub struct Header<'a> { + /// The name portion of a header. + /// + /// A header name must be valid ASCII-US, so it's safe to store as a `&str`. + pub name: &'a str, + /// The value portion of a header. + /// + /// While headers **should** be ASCII-US, the specification allows for + /// values that may not be, and so the value is stored as bytes. + pub value: &'a [u8], +} + +/// An empty header, useful for constructing a `Header` array to pass in for +/// parsing. +/// +/// # Example +/// +/// ``` +/// let headers = [httparse::EMPTY_HEADER; 64]; +/// ``` +pub const EMPTY_HEADER: Header<'static> = Header { name: "", value: b"" }; + +#[inline] +fn parse_version(bytes: &mut Bytes) -> Result<u8> { + if let Some(mut eight) = bytes.next_8() { + expect!(eight._0() => b'H' |? Err(Error::Version)); + expect!(eight._1() => b'T' |? Err(Error::Version)); + expect!(eight._2() => b'T' |? Err(Error::Version)); + expect!(eight._3() => b'P' |? Err(Error::Version)); + expect!(eight._4() => b'/' |? Err(Error::Version)); + expect!(eight._5() => b'1' |? Err(Error::Version)); + expect!(eight._6() => b'.' |? Err(Error::Version)); + let v = match eight._7() { + b'0' => 0, + b'1' => 1, + _ => return Err(Error::Version) + }; + return Ok(Status::Complete(v)) + } + + // else (but not in `else` because of borrow checker) + + // If there aren't at least 8 bytes, we still want to detect early + // if this is a valid version or not. If it is, we'll return Partial. + expect!(bytes.next() == b'H' => Err(Error::Version)); + expect!(bytes.next() == b'T' => Err(Error::Version)); + expect!(bytes.next() == b'T' => Err(Error::Version)); + expect!(bytes.next() == b'P' => Err(Error::Version)); + expect!(bytes.next() == b'/' => Err(Error::Version)); + expect!(bytes.next() == b'1' => Err(Error::Version)); + expect!(bytes.next() == b'.' => Err(Error::Version)); + Ok(Status::Partial) +} + +/// From [RFC 7230](https://tools.ietf.org/html/rfc7230): +/// +/// > ```notrust +/// > reason-phrase = *( HTAB / SP / VCHAR / obs-text ) +/// > HTAB = %x09 ; horizontal tab +/// > VCHAR = %x21-7E ; visible (printing) characters +/// > obs-text = %x80-FF +/// > ``` +/// +/// > A.2. Changes from RFC 2616 +/// > +/// > Non-US-ASCII content in header fields and the reason phrase +/// > has been obsoleted and made opaque (the TEXT rule was removed). +/// +/// Note that the following implementation deliberately rejects the obsoleted (non-US-ASCII) text range. +/// +/// The fully compliant parser should probably just return the reason-phrase as an opaque &[u8] data +/// and leave interpretation to user or specialized helpers (akin to .display() in std::path::Path) +#[inline] +fn parse_reason<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> { + loop { + let b = next!(bytes); + if b == b'\r' { + expect!(bytes.next() == b'\n' => Err(Error::Status)); + return Ok(Status::Complete(unsafe { + // all bytes up till `i` must have been HTAB / SP / VCHAR + str::from_utf8_unchecked(bytes.slice_skip(2)) + })); + } else if b == b'\n' { + return Ok(Status::Complete(unsafe { + // all bytes up till `i` must have been HTAB / SP / VCHAR + str::from_utf8_unchecked(bytes.slice_skip(1)) + })); + } else if !((b >= 0x20 && b <= 0x7E) || b == b'\t') { + return Err(Error::Status); + } + } +} + +#[inline] +fn parse_token<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> { + loop { + let b = next!(bytes); + if b == b' ' { + return Ok(Status::Complete(unsafe { + // all bytes up till `i` must have been `is_token`. + str::from_utf8_unchecked(bytes.slice_skip(1)) + })); + } else if !is_token(b) { + return Err(Error::Token); + } + } +} + +#[inline] +fn parse_uri<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> { + simd::match_uri_vectored(bytes); + + loop { + let b = next!(bytes); + if b == b' ' { + return Ok(Status::Complete(unsafe { + // all bytes up till `i` must have been `is_token`. + str::from_utf8_unchecked(bytes.slice_skip(1)) + })); + } else if !is_uri_token(b) { + return Err(Error::Token); + } + } +} + + +#[inline] +fn parse_code(bytes: &mut Bytes) -> Result<u16> { + let hundreds = expect!(bytes.next() == b'0'...b'9' => Err(Error::Status)); + let tens = expect!(bytes.next() == b'0'...b'9' => Err(Error::Status)); + let ones = expect!(bytes.next() == b'0'...b'9' => Err(Error::Status)); + + Ok(Status::Complete((hundreds - b'0') as u16 * 100 + + (tens - b'0') as u16 * 10 + + (ones - b'0') as u16)) +} + +/// Parse a buffer of bytes as headers. +/// +/// The return value, if complete and successful, includes the index of the +/// buffer that parsing stopped at, and a sliced reference to the parsed +/// headers. The length of the slice will be equal to the number of properly +/// parsed headers. +/// +/// # Example +/// +/// ``` +/// let buf = b"Host: foo.bar\nAccept: */*\n\nblah blah"; +/// let mut headers = [httparse::EMPTY_HEADER; 4]; +/// assert_eq!(httparse::parse_headers(buf, &mut headers), +/// Ok(httparse::Status::Complete((27, &[ +/// httparse::Header { name: "Host", value: b"foo.bar" }, +/// httparse::Header { name: "Accept", value: b"*/*" } +/// ][..])))); +/// ``` +pub fn parse_headers<'b: 'h, 'h>(src: &'b [u8], mut dst: &'h mut [Header<'b>]) + -> Result<(usize, &'h [Header<'b>])> { + let mut iter = Bytes::new(src); + let pos = complete!(parse_headers_iter(&mut dst, &mut iter)); + Ok(Status::Complete((pos, dst))) +} + + +#[inline] +fn parse_headers_iter<'a, 'b>(headers: &mut &mut [Header<'a>], bytes: &'b mut Bytes<'a>) + -> Result<usize> { + let mut num_headers: usize = 0; + let mut count: usize = 0; + let mut result = Err(Error::TooManyHeaders); + + { + let mut iter = headers.iter_mut(); + + 'headers: loop { + // a newline here means the head is over! + let b = next!(bytes); + if b == b'\r' { + expect!(bytes.next() == b'\n' => Err(Error::NewLine)); + result = Ok(Status::Complete(count + bytes.pos())); + break; + } else if b == b'\n' { + result = Ok(Status::Complete(count + bytes.pos())); + break; + } else if !is_header_name_token(b) { + return Err(Error::HeaderName); + } + + let header = match iter.next() { + Some(header) => header, + None => break 'headers + }; + + num_headers += 1; + // parse header name until colon + 'name: loop { + let b = next!(bytes); + if b == b':' { + count += bytes.pos(); + header.name = unsafe { + str::from_utf8_unchecked(bytes.slice_skip(1)) + }; + break 'name; + } else if !is_header_name_token(b) { + return Err(Error::HeaderName); + } + } + + let mut b; + + 'value: loop { + + // eat white space between colon and value + 'whitespace: loop { + b = next!(bytes); + if b == b' ' || b == b'\t' { + count += bytes.pos(); + bytes.slice(); + continue 'whitespace; + } else { + if !is_header_value_token(b) { + break 'value; + } + break 'whitespace; + } + } + + // parse value till EOL + + simd::match_header_value_vectored(bytes); + + macro_rules! check { + ($bytes:ident, $i:ident) => ({ + b = $bytes.$i(); + if !is_header_value_token(b) { + break 'value; + } + }); + ($bytes:ident) => ({ + check!($bytes, _0); + check!($bytes, _1); + check!($bytes, _2); + check!($bytes, _3); + check!($bytes, _4); + check!($bytes, _5); + check!($bytes, _6); + check!($bytes, _7); + }) + } + while let Some(mut bytes8) = bytes.next_8() { + check!(bytes8); + } + loop { + b = next!(bytes); + if !is_header_value_token(b) { + break 'value; + } + } + } + + //found_ctl + let value_slice : &[u8] = if b == b'\r' { + expect!(bytes.next() == b'\n' => Err(Error::HeaderValue)); + count += bytes.pos(); + // having just check that `\r\n` exists, it's safe to skip those 2 bytes + unsafe { + bytes.slice_skip(2) + } + } else if b == b'\n' { + count += bytes.pos(); + // having just check that `\r\n` exists, it's safe to skip 1 byte + unsafe { + bytes.slice_skip(1) + } + } else { + return Err(Error::HeaderValue); + }; + // trim trailing whitespace in the header + if let Some(last_visible) = value_slice.iter().rposition(|b| *b != b' ' && *b != b'\t' ) { + // There is at least one non-whitespace character. + header.value = &value_slice[0..last_visible+1]; + } else { + // There is no non-whitespace character. This can only happen when value_slice is + // empty. + header.value = value_slice; + } + } + } // drop iter + + shrink(headers, num_headers); + result +} + +/// Parse a buffer of bytes as a chunk size. +/// +/// The return value, if complete and successful, includes the index of the +/// buffer that parsing stopped at, and the size of the following chunk. +/// +/// # Example +/// +/// ``` +/// let buf = b"4\r\nRust\r\n0\r\n\r\n"; +/// assert_eq!(httparse::parse_chunk_size(buf), +/// Ok(httparse::Status::Complete((3, 4)))); +/// ``` +pub fn parse_chunk_size(buf: &[u8]) + -> result::Result<Status<(usize, u64)>, InvalidChunkSize> { + const RADIX: u64 = 16; + let mut bytes = Bytes::new(buf); + let mut size = 0; + let mut in_chunk_size = true; + let mut in_ext = false; + let mut count = 0; + loop { + let b = next!(bytes); + match b { + b'0' ... b'9' if in_chunk_size => { + if count > 15 { + return Err(InvalidChunkSize); + } + count += 1; + size *= RADIX; + size += (b - b'0') as u64; + }, + b'a' ... b'f' if in_chunk_size => { + if count > 15 { + return Err(InvalidChunkSize); + } + count += 1; + size *= RADIX; + size += (b + 10 - b'a') as u64; + } + b'A' ... b'F' if in_chunk_size => { + if count > 15 { + return Err(InvalidChunkSize); + } + count += 1; + size *= RADIX; + size += (b + 10 - b'A') as u64; + } + b'\r' => { + match next!(bytes) { + b'\n' => break, + _ => return Err(InvalidChunkSize), + } + } + // If we weren't in the extension yet, the ";" signals its start + b';' if !in_ext => { + in_ext = true; + in_chunk_size = false; + } + // "Linear white space" is ignored between the chunk size and the + // extension separator token (";") due to the "implied *LWS rule". + b'\t' | b' ' if !in_ext & !in_chunk_size => {} + // LWS can follow the chunk size, but no more digits can come + b'\t' | b' ' if in_chunk_size => in_chunk_size = false, + // We allow any arbitrary octet once we are in the extension, since + // they all get ignored anyway. According to the HTTP spec, valid + // extensions would have a more strict syntax: + // (token ["=" (token | quoted-string)]) + // but we gain nothing by rejecting an otherwise valid chunk size. + _ if in_ext => {} + // Finally, if we aren't in the extension and we're reading any + // other octet, the chunk size line is invalid! + _ => return Err(InvalidChunkSize), + } + } + Ok(Status::Complete((bytes.pos(), size))) +} + +#[cfg(test)] +mod tests { + use super::{Request, Response, Status, EMPTY_HEADER, shrink, parse_chunk_size}; + + const NUM_OF_HEADERS: usize = 4; + + #[test] + fn test_shrink() { + let mut arr = [EMPTY_HEADER; 16]; + { + let slice = &mut &mut arr[..]; + assert_eq!(slice.len(), 16); + shrink(slice, 4); + assert_eq!(slice.len(), 4); + } + assert_eq!(arr.len(), 16); + } + + macro_rules! req { + ($name:ident, $buf:expr, |$arg:ident| $body:expr) => ( + req! {$name, $buf, Ok(Status::Complete($buf.len())), |$arg| $body } + ); + ($name:ident, $buf:expr, $len:expr, |$arg:ident| $body:expr) => ( + #[test] + fn $name() { + let mut headers = [EMPTY_HEADER; NUM_OF_HEADERS]; + let mut req = Request::new(&mut headers[..]); + let status = req.parse($buf.as_ref()); + assert_eq!(status, $len); + closure(req); + + fn closure($arg: Request) { + $body + } + } + ) + } + + req! { + test_request_simple, + b"GET / HTTP/1.1\r\n\r\n", + |req| { + assert_eq!(req.method.unwrap(), "GET"); + assert_eq!(req.path.unwrap(), "/"); + assert_eq!(req.version.unwrap(), 1); + assert_eq!(req.headers.len(), 0); + } + } + + req! { + test_request_simple_with_query_params, + b"GET /thing?data=a HTTP/1.1\r\n\r\n", + |req| { + assert_eq!(req.method.unwrap(), "GET"); + assert_eq!(req.path.unwrap(), "/thing?data=a"); + assert_eq!(req.version.unwrap(), 1); + assert_eq!(req.headers.len(), 0); + } + } + + req! { + test_request_simple_with_whatwg_query_params, + b"GET /thing?data=a^ HTTP/1.1\r\n\r\n", + |req| { + assert_eq!(req.method.unwrap(), "GET"); + assert_eq!(req.path.unwrap(), "/thing?data=a^"); + assert_eq!(req.version.unwrap(), 1); + assert_eq!(req.headers.len(), 0); + } + } + + req! { + test_request_headers, + b"GET / HTTP/1.1\r\nHost: foo.com\r\nCookie: \r\n\r\n", + |req| { + assert_eq!(req.method.unwrap(), "GET"); + assert_eq!(req.path.unwrap(), "/"); + assert_eq!(req.version.unwrap(), 1); + assert_eq!(req.headers.len(), 2); + assert_eq!(req.headers[0].name, "Host"); + assert_eq!(req.headers[0].value, b"foo.com"); + assert_eq!(req.headers[1].name, "Cookie"); + assert_eq!(req.headers[1].value, b""); + } + } + + req! { + test_request_headers_optional_whitespace, + b"GET / HTTP/1.1\r\nHost: \tfoo.com\t \r\nCookie: \t \r\n\r\n", + |req| { + assert_eq!(req.method.unwrap(), "GET"); + assert_eq!(req.path.unwrap(), "/"); + assert_eq!(req.version.unwrap(), 1); + assert_eq!(req.headers.len(), 2); + assert_eq!(req.headers[0].name, "Host"); + assert_eq!(req.headers[0].value, b"foo.com"); + assert_eq!(req.headers[1].name, "Cookie"); + assert_eq!(req.headers[1].value, b""); + } + } + + req! { + // test the scalar parsing + test_request_header_value_htab_short, + b"GET / HTTP/1.1\r\nUser-Agent: some\tagent\r\n\r\n", + |req| { + assert_eq!(req.method.unwrap(), "GET"); + assert_eq!(req.path.unwrap(), "/"); + assert_eq!(req.version.unwrap(), 1); + assert_eq!(req.headers.len(), 1); + assert_eq!(req.headers[0].name, "User-Agent"); + assert_eq!(req.headers[0].value, b"some\tagent"); + } + } + + req! { + // test the sse42 parsing + test_request_header_value_htab_med, + b"GET / HTTP/1.1\r\nUser-Agent: 1234567890some\tagent\r\n\r\n", + |req| { + assert_eq!(req.method.unwrap(), "GET"); + assert_eq!(req.path.unwrap(), "/"); + assert_eq!(req.version.unwrap(), 1); + assert_eq!(req.headers.len(), 1); + assert_eq!(req.headers[0].name, "User-Agent"); + assert_eq!(req.headers[0].value, b"1234567890some\tagent"); + } + } + + req! { + // test the avx2 parsing + test_request_header_value_htab_long, + b"GET / HTTP/1.1\r\nUser-Agent: 1234567890some\t1234567890agent1234567890\r\n\r\n", + |req| { + assert_eq!(req.method.unwrap(), "GET"); + assert_eq!(req.path.unwrap(), "/"); + assert_eq!(req.version.unwrap(), 1); + assert_eq!(req.headers.len(), 1); + assert_eq!(req.headers[0].name, "User-Agent"); + assert_eq!(req.headers[0].value, &b"1234567890some\t1234567890agent1234567890"[..]); + } + } + + req! { + test_request_headers_max, + b"GET / HTTP/1.1\r\nA: A\r\nB: B\r\nC: C\r\nD: D\r\n\r\n", + |req| { + assert_eq!(req.headers.len(), NUM_OF_HEADERS); + } + } + + req! { + test_request_multibyte, + b"GET / HTTP/1.1\r\nHost: foo.com\r\nUser-Agent: \xe3\x81\xb2\xe3/1.0\r\n\r\n", + |req| { + assert_eq!(req.method.unwrap(), "GET"); + assert_eq!(req.path.unwrap(), "/"); + assert_eq!(req.version.unwrap(), 1); + assert_eq!(req.headers[0].name, "Host"); + assert_eq!(req.headers[0].value, b"foo.com"); + assert_eq!(req.headers[1].name, "User-Agent"); + assert_eq!(req.headers[1].value, b"\xe3\x81\xb2\xe3/1.0"); + } + } + + + req! { + test_request_partial, + b"GET / HTTP/1.1\r\n\r", Ok(Status::Partial), + |_req| {} + } + + req! { + test_request_partial_version, + b"GET / HTTP/1.", Ok(Status::Partial), + |_req| {} + } + + req! { + test_request_newlines, + b"GET / HTTP/1.1\nHost: foo.bar\n\n", + |_r| {} + } + + req! { + test_request_empty_lines_prefix, + b"\r\n\r\nGET / HTTP/1.1\r\n\r\n", + |req| { + assert_eq!(req.method.unwrap(), "GET"); + assert_eq!(req.path.unwrap(), "/"); + assert_eq!(req.version.unwrap(), 1); + assert_eq!(req.headers.len(), 0); + } + } + + req! { + test_request_empty_lines_prefix_lf_only, + b"\n\nGET / HTTP/1.1\n\n", + |req| { + assert_eq!(req.method.unwrap(), "GET"); + assert_eq!(req.path.unwrap(), "/"); + assert_eq!(req.version.unwrap(), 1); + assert_eq!(req.headers.len(), 0); + } + } + + req! { + test_request_with_invalid_token_delimiter, + b"GET\n/ HTTP/1.1\r\nHost: foo.bar\r\n\r\n", + Err(::Error::Token), + |_r| {} + } + + + req! { + test_request_with_invalid_but_short_version, + b"GET / HTTP/1!", + Err(::Error::Version), + |_r| {} + } + + macro_rules! res { + ($name:ident, $buf:expr, |$arg:ident| $body:expr) => ( + res! {$name, $buf, Ok(Status::Complete($buf.len())), |$arg| $body } + ); + ($name:ident, $buf:expr, $len:expr, |$arg:ident| $body:expr) => ( + #[test] + fn $name() { + let mut headers = [EMPTY_HEADER; NUM_OF_HEADERS]; + let mut res = Response::new(&mut headers[..]); + let status = res.parse($buf.as_ref()); + assert_eq!(status, $len); + closure(res); + + fn closure($arg: Response) { + $body + } + } + ) + } + + res! { + test_response_simple, + b"HTTP/1.1 200 OK\r\n\r\n", + |res| { + assert_eq!(res.version.unwrap(), 1); + assert_eq!(res.code.unwrap(), 200); + assert_eq!(res.reason.unwrap(), "OK"); + } + } + + res! { + test_response_newlines, + b"HTTP/1.0 403 Forbidden\nServer: foo.bar\n\n", + |_r| {} + } + + res! { + test_response_reason_missing, + b"HTTP/1.1 200 \r\n\r\n", + |res| { + assert_eq!(res.version.unwrap(), 1); + assert_eq!(res.code.unwrap(), 200); + assert_eq!(res.reason.unwrap(), ""); + } + } + + res! { + test_response_reason_missing_no_space, + b"HTTP/1.1 200\r\n\r\n", + |res| { + assert_eq!(res.version.unwrap(), 1); + assert_eq!(res.code.unwrap(), 200); + assert_eq!(res.reason.unwrap(), ""); + } + } + + res! { + test_response_reason_missing_no_space_with_headers, + b"HTTP/1.1 200\r\nFoo: bar\r\n\r\n", + |res| { + assert_eq!(res.version.unwrap(), 1); + assert_eq!(res.code.unwrap(), 200); + assert_eq!(res.reason.unwrap(), ""); + assert_eq!(res.headers.len(), 1); + assert_eq!(res.headers[0].name, "Foo"); + assert_eq!(res.headers[0].value, b"bar"); + } + } + + res! { + test_response_reason_with_space_and_tab, + b"HTTP/1.1 101 Switching Protocols\t\r\n\r\n", + |res| { + assert_eq!(res.version.unwrap(), 1); + assert_eq!(res.code.unwrap(), 101); + assert_eq!(res.reason.unwrap(), "Switching Protocols\t"); + } + } + + static RESPONSE_REASON_WITH_OBS_TEXT_BYTE: &'static [u8] = b"HTTP/1.1 200 X\xFFZ\r\n\r\n"; + res! { + test_response_reason_with_obsolete_text_byte, + RESPONSE_REASON_WITH_OBS_TEXT_BYTE, + Err(::Error::Status), + |_res| {} + } + + res! { + test_response_reason_with_nul_byte, + b"HTTP/1.1 200 \x00\r\n\r\n", + Err(::Error::Status), + |_res| {} + } + + res! { + test_response_version_missing_space, + b"HTTP/1.1", + Ok(Status::Partial), + |_res| {} + } + + res! { + test_response_code_missing_space, + b"HTTP/1.1 200", + Ok(Status::Partial), + |_res| {} + } + + res! { + test_response_empty_lines_prefix_lf_only, + b"\n\nHTTP/1.1 200 OK\n\n", + |_res| {} + } + + #[test] + fn test_chunk_size() { + assert_eq!(parse_chunk_size(b"0\r\n"), Ok(Status::Complete((3, 0)))); + assert_eq!(parse_chunk_size(b"12\r\nchunk"), Ok(Status::Complete((4, 18)))); + assert_eq!(parse_chunk_size(b"3086d\r\n"), Ok(Status::Complete((7, 198765)))); + assert_eq!(parse_chunk_size(b"3735AB1;foo bar*\r\n"), Ok(Status::Complete((18, 57891505)))); + assert_eq!(parse_chunk_size(b"3735ab1 ; baz \r\n"), Ok(Status::Complete((16, 57891505)))); + assert_eq!(parse_chunk_size(b"77a65\r"), Ok(Status::Partial)); + assert_eq!(parse_chunk_size(b"ab"), Ok(Status::Partial)); + assert_eq!(parse_chunk_size(b"567f8a\rfoo"), Err(::InvalidChunkSize)); + assert_eq!(parse_chunk_size(b"567f8a\rfoo"), Err(::InvalidChunkSize)); + assert_eq!(parse_chunk_size(b"567xf8a\r\n"), Err(::InvalidChunkSize)); + assert_eq!(parse_chunk_size(b"ffffffffffffffff\r\n"), Ok(Status::Complete((18, ::core::u64::MAX)))); + assert_eq!(parse_chunk_size(b"1ffffffffffffffff\r\n"), Err(::InvalidChunkSize)); + assert_eq!(parse_chunk_size(b"Affffffffffffffff\r\n"), Err(::InvalidChunkSize)); + assert_eq!(parse_chunk_size(b"fffffffffffffffff\r\n"), Err(::InvalidChunkSize)); + } + + #[cfg(feature = "std")] + #[test] + fn test_std_error() { + use super::Error; + use std::error::Error as StdError; + let err = Error::HeaderName; + assert_eq!(err.to_string(), err.description()); + } +} diff --git a/third_party/rust/httparse/src/macros.rs b/third_party/rust/httparse/src/macros.rs new file mode 100644 index 0000000000..c6f4ab63f3 --- /dev/null +++ b/third_party/rust/httparse/src/macros.rs @@ -0,0 +1,59 @@ +///! Utility macros + +macro_rules! next { + ($bytes:ident) => ({ + match $bytes.next() { + Some(b) => b, + None => return Ok(Status::Partial) + } + }) +} + +macro_rules! expect { + ($bytes:ident.next() == $pat:pat => $ret:expr) => { + expect!(next!($bytes) => $pat |? $ret) + }; + ($e:expr => $pat:pat |? $ret:expr) => { + match $e { + v@$pat => v, + _ => return $ret + } + }; +} + +macro_rules! complete { + ($e:expr) => { + match try!($e) { + Status::Complete(v) => v, + Status::Partial => return Ok(Status::Partial) + } + } +} + +macro_rules! byte_map { + ($($flag:expr,)*) => ([ + $($flag != 0,)* + ]) +} + +macro_rules! space { + ($bytes:ident or $err:expr) => ({ + expect!($bytes.next() == b' ' => Err($err)); + $bytes.slice(); + }) +} + +macro_rules! newline { + ($bytes:ident) => ({ + match next!($bytes) { + b'\r' => { + expect!($bytes.next() == b'\n' => Err(Error::NewLine)); + $bytes.slice(); + }, + b'\n' => { + $bytes.slice(); + }, + _ => return Err(Error::NewLine) + } + }) +} diff --git a/third_party/rust/httparse/src/simd/avx2.rs b/third_party/rust/httparse/src/simd/avx2.rs new file mode 100644 index 0000000000..368c52c2d8 --- /dev/null +++ b/third_party/rust/httparse/src/simd/avx2.rs @@ -0,0 +1,116 @@ +use ::iter::Bytes; + +pub enum Scan { + /// Returned when an implementation finds a noteworthy token. + Found, + /// Returned when an implementation couldn't keep running because the input was too short. + TooShort, +} + + +pub unsafe fn parse_uri_batch_32<'a>(bytes: &mut Bytes<'a>) -> Scan { + while bytes.as_ref().len() >= 32 { + let advance = match_url_char_32_avx(bytes.as_ref()); + bytes.advance(advance); + + if advance != 32 { + return Scan::Found; + } + } + Scan::TooShort +} + +#[cfg(target_arch = "x86_64")] +#[target_feature(enable = "avx2")] +#[inline] +#[allow(non_snake_case, overflowing_literals)] +unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize { + debug_assert!(buf.len() >= 32); + + /* + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + */ + use core::arch::x86_64::*; + + let ptr = buf.as_ptr(); + + let LSH: __m256i = _mm256_set1_epi8(0x0f); + let URI: __m256i = _mm256_setr_epi8( + 0xb8, 0xfc, 0xf8, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, + 0xfc, 0xfc, 0xfc, 0x7c, 0x54, 0x7c, 0xd4, 0x7c, + 0xb8, 0xfc, 0xf8, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, + 0xfc, 0xfc, 0xfc, 0x7c, 0x54, 0x7c, 0xd4, 0x7c, + ); + let ARF: __m256i = _mm256_setr_epi8( + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ); + + let data = _mm256_lddqu_si256(ptr as *const _); + let rbms = _mm256_shuffle_epi8(URI, data); + let cols = _mm256_and_si256(LSH, _mm256_srli_epi16(data, 4)); + let bits = _mm256_and_si256(_mm256_shuffle_epi8(ARF, cols), rbms); + + let v = _mm256_cmpeq_epi8(bits, _mm256_setzero_si256()); + let r = 0xffffffff_00000000 | _mm256_movemask_epi8(v) as u64; + + _tzcnt_u64(r) as usize +} + +#[cfg(target_arch = "x86")] +unsafe fn match_url_char_32_avx(_: &[u8]) -> usize { + unreachable!("AVX2 detection should be disabled for x86"); +} + +pub unsafe fn match_header_value_batch_32(bytes: &mut Bytes) -> Scan { + while bytes.as_ref().len() >= 32 { + let advance = match_header_value_char_32_avx(bytes.as_ref()); + bytes.advance(advance); + + if advance != 32 { + return Scan::Found; + } + } + Scan::TooShort +} + +#[cfg(target_arch = "x86_64")] +#[target_feature(enable = "avx2")] +#[inline] +#[allow(non_snake_case)] +unsafe fn match_header_value_char_32_avx(buf: &[u8]) -> usize { + debug_assert!(buf.len() >= 32); + + /* + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + */ + use core::arch::x86_64::*; + + let ptr = buf.as_ptr(); + + // %x09 %x20-%x7e %x80-%xff + let TAB: __m256i = _mm256_set1_epi8(0x09); + let DEL: __m256i = _mm256_set1_epi8(0x7f); + let LOW: __m256i = _mm256_set1_epi8(0x1f); + + let dat = _mm256_lddqu_si256(ptr as *const _); + let low = _mm256_cmpgt_epi8(dat, LOW); + let tab = _mm256_cmpeq_epi8(dat, TAB); + let del = _mm256_cmpeq_epi8(dat, DEL); + let bit = _mm256_andnot_si256(del, _mm256_or_si256(low, tab)); + let rev = _mm256_cmpeq_epi8(bit, _mm256_setzero_si256()); + let res = 0xffffffff_00000000 | _mm256_movemask_epi8(rev) as u64; + + _tzcnt_u64(res) as usize +} + +#[cfg(target_arch = "x86")] +unsafe fn match_header_value_char_32_avx(_: &[u8]) -> usize { + unreachable!("AVX2 detection should be disabled for x86"); +} diff --git a/third_party/rust/httparse/src/simd/fallback.rs b/third_party/rust/httparse/src/simd/fallback.rs new file mode 100644 index 0000000000..4a79cb9978 --- /dev/null +++ b/third_party/rust/httparse/src/simd/fallback.rs @@ -0,0 +1,8 @@ +use ::iter::Bytes; + +// Fallbacks that do nothing... + +#[inline(always)] +pub fn match_uri_vectored(_: &mut Bytes) {} +#[inline(always)] +pub fn match_header_value_vectored(_: &mut Bytes) {} diff --git a/third_party/rust/httparse/src/simd/mod.rs b/third_party/rust/httparse/src/simd/mod.rs new file mode 100644 index 0000000000..e78034996a --- /dev/null +++ b/third_party/rust/httparse/src/simd/mod.rs @@ -0,0 +1,238 @@ +#[cfg(not(all( + httparse_simd, + any( + target_arch = "x86", + target_arch = "x86_64", + ), +)))] +mod fallback; + +#[cfg(not(all( + httparse_simd, + any( + target_arch = "x86", + target_arch = "x86_64", + ), +)))] +pub use self::fallback::*; + +#[cfg(all( + httparse_simd, + any( + target_arch = "x86", + target_arch = "x86_64", + ), +))] +mod sse42; + +#[cfg(all( + httparse_simd, + any( + httparse_simd_target_feature_avx2, + not(httparse_simd_target_feature_sse42), + ), + any( + target_arch = "x86", + target_arch = "x86_64", + ), +))] +mod avx2; + +#[cfg(all( + httparse_simd, + not(any( + httparse_simd_target_feature_sse42, + httparse_simd_target_feature_avx2, + )), + any( + target_arch = "x86", + target_arch = "x86_64", + ), +))] +mod runtime { + //! Runtime detection of simd features. Used when the build script + //! doesn't notice any target features at build time. + //! + //! While `is_x86_feature_detected!` has it's own caching built-in, + //! at least in 1.27.0, the functions don't inline, leaving using it + //! actually *slower* than just using the scalar fallback. + + use core::sync::atomic::{AtomicUsize, ATOMIC_USIZE_INIT, Ordering}; + + static FEATURE: AtomicUsize = ATOMIC_USIZE_INIT; + + const INIT: usize = 0; + const SSE_42: usize = 1; + const AVX_2: usize = 2; + const AVX_2_AND_SSE_42: usize = 3; + const NONE: usize = ::core::usize::MAX; + + fn detect() -> usize { + let feat = FEATURE.load(Ordering::Relaxed); + if feat == INIT { + if cfg!(target_arch = "x86_64") && is_x86_feature_detected!("avx2") { + if is_x86_feature_detected!("sse4.2") { + FEATURE.store(AVX_2_AND_SSE_42, Ordering::Relaxed); + return AVX_2_AND_SSE_42; + } else { + FEATURE.store(AVX_2, Ordering::Relaxed); + return AVX_2; + } + } else if is_x86_feature_detected!("sse4.2") { + FEATURE.store(SSE_42, Ordering::Relaxed); + return SSE_42; + } else { + FEATURE.store(NONE, Ordering::Relaxed); + } + } + feat + } + + pub fn match_uri_vectored(bytes: &mut ::Bytes) { + unsafe { + match detect() { + SSE_42 => super::sse42::parse_uri_batch_16(bytes), + AVX_2 => { super::avx2::parse_uri_batch_32(bytes); }, + AVX_2_AND_SSE_42 => { + if let super::avx2::Scan::Found = super::avx2::parse_uri_batch_32(bytes) { + return; + } + super::sse42::parse_uri_batch_16(bytes) + }, + _ => () + } + } + + // else do nothing + } + + pub fn match_header_value_vectored(bytes: &mut ::Bytes) { + unsafe { + match detect() { + SSE_42 => super::sse42::match_header_value_batch_16(bytes), + AVX_2 => { super::avx2::match_header_value_batch_32(bytes); }, + AVX_2_AND_SSE_42 => { + if let super::avx2::Scan::Found = super::avx2::match_header_value_batch_32(bytes) { + return; + } + super::sse42::match_header_value_batch_16(bytes) + }, + _ => () + } + } + + // else do nothing + } +} + +#[cfg(all( + httparse_simd, + not(any( + httparse_simd_target_feature_sse42, + httparse_simd_target_feature_avx2, + )), + any( + target_arch = "x86", + target_arch = "x86_64", + ), +))] +pub use self::runtime::*; + +#[cfg(all( + httparse_simd, + httparse_simd_target_feature_sse42, + not(httparse_simd_target_feature_avx2), + any( + target_arch = "x86", + target_arch = "x86_64", + ), +))] +mod sse42_compile_time { + pub fn match_uri_vectored(bytes: &mut ::Bytes) { + if is_x86_feature_detected!("sse4.2") { + unsafe { + super::sse42::parse_uri_batch_16(bytes); + } + } + + // else do nothing + } + + pub fn match_header_value_vectored(bytes: &mut ::Bytes) { + if is_x86_feature_detected!("sse4.2") { + unsafe { + super::sse42::match_header_value_batch_16(bytes); + } + } + + // else do nothing + } +} + +#[cfg(all( + httparse_simd, + httparse_simd_target_feature_sse42, + not(httparse_simd_target_feature_avx2), + any( + target_arch = "x86", + target_arch = "x86_64", + ), +))] +pub use self::sse42_compile_time::*; + +#[cfg(all( + httparse_simd, + httparse_simd_target_feature_avx2, + any( + target_arch = "x86", + target_arch = "x86_64", + ), +))] +mod avx2_compile_time { + pub fn match_uri_vectored(bytes: &mut ::Bytes) { + // do both, since avx2 only works when bytes.len() >= 32 + if cfg!(target_arch = "x86_64") && is_x86_feature_detected!("avx2") { + unsafe { + super::avx2::parse_uri_batch_32(bytes); + } + + } + if is_x86_feature_detected!("sse4.2") { + unsafe { + super::sse42::parse_uri_batch_16(bytes); + } + } + + // else do nothing + } + + pub fn match_header_value_vectored(bytes: &mut ::Bytes) { + // do both, since avx2 only works when bytes.len() >= 32 + if cfg!(target_arch = "x86_64") && is_x86_feature_detected!("avx2") { + let scanned = unsafe { + super::avx2::match_header_value_batch_32(bytes) + }; + + if let super::avx2::Scan::Found = scanned { + return; + } + } + if is_x86_feature_detected!("sse4.2") { + unsafe { + super::sse42::match_header_value_batch_16(bytes); + } + } + + // else do nothing + } +} + +#[cfg(all( + httparse_simd, + httparse_simd_target_feature_avx2, + any( + target_arch = "x86", + target_arch = "x86_64", + ), +))] +pub use self::avx2_compile_time::*; diff --git a/third_party/rust/httparse/src/simd/sse42.rs b/third_party/rust/httparse/src/simd/sse42.rs new file mode 100644 index 0000000000..1770ba9aeb --- /dev/null +++ b/third_party/rust/httparse/src/simd/sse42.rs @@ -0,0 +1,84 @@ +use ::iter::Bytes; + +pub unsafe fn parse_uri_batch_16<'a>(bytes: &mut Bytes<'a>) { + while bytes.as_ref().len() >= 16 { + let advance = match_url_char_16_sse(bytes.as_ref()); + bytes.advance(advance); + + if advance != 16 { + break; + } + } +} + +#[target_feature(enable = "sse4.2")] +#[allow(non_snake_case, overflowing_literals)] +unsafe fn match_url_char_16_sse(buf: &[u8]) -> usize { + debug_assert!(buf.len() >= 16); + + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + + let ptr = buf.as_ptr(); + + let LSH: __m128i = _mm_set1_epi8(0x0f); + let URI: __m128i = _mm_setr_epi8( + 0xb8, 0xfc, 0xf8, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, + 0xfc, 0xfc, 0xfc, 0x7c, 0x54, 0x7c, 0xd4, 0x7c, + ); + let ARF: __m128i = _mm_setr_epi8( + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ); + + let data = _mm_lddqu_si128(ptr as *const _); + let rbms = _mm_shuffle_epi8(URI, data); + let cols = _mm_and_si128(LSH, _mm_srli_epi16(data, 4)); + let bits = _mm_and_si128(_mm_shuffle_epi8(ARF, cols), rbms); + + let v = _mm_cmpeq_epi8(bits, _mm_setzero_si128()); + let r = 0xffff_0000 | _mm_movemask_epi8(v) as u32; + + _tzcnt_u32(r) as usize +} + +pub unsafe fn match_header_value_batch_16(bytes: &mut Bytes) { + while bytes.as_ref().len() >= 16 { + let advance = match_header_value_char_16_sse(bytes.as_ref()); + bytes.advance(advance); + + if advance != 16 { + break; + } + } +} + +#[target_feature(enable = "sse4.2")] +#[allow(non_snake_case)] +unsafe fn match_header_value_char_16_sse(buf: &[u8]) -> usize { + debug_assert!(buf.len() >= 16); + + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + + let ptr = buf.as_ptr(); + + // %x09 %x20-%x7e %x80-%xff + let TAB: __m128i = _mm_set1_epi8(0x09); + let DEL: __m128i = _mm_set1_epi8(0x7f); + let LOW: __m128i = _mm_set1_epi8(0x1f); + + let dat = _mm_lddqu_si128(ptr as *const _); + let low = _mm_cmpgt_epi8(dat, LOW); + let tab = _mm_cmpeq_epi8(dat, TAB); + let del = _mm_cmpeq_epi8(dat, DEL); + let bit = _mm_andnot_si128(del, _mm_or_si128(low, tab)); + let rev = _mm_cmpeq_epi8(bit, _mm_setzero_si128()); + let res = 0xffff_0000 | _mm_movemask_epi8(rev) as u32; + + _tzcnt_u32(res) as usize +} |