From 64d98f8ee037282c35007b64c2649055c56af1db Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:19:03 +0200 Subject: Merging upstream version 1.68.2+dfsg1. Signed-off-by: Daniel Baumann --- vendor/time/src/parsing/combinator/mod.rs | 192 ++++++++++++++++++++++ vendor/time/src/parsing/combinator/rfc/iso8601.rs | 173 +++++++++++++++++++ vendor/time/src/parsing/combinator/rfc/mod.rs | 10 ++ vendor/time/src/parsing/combinator/rfc/rfc2234.rs | 13 ++ vendor/time/src/parsing/combinator/rfc/rfc2822.rs | 115 +++++++++++++ 5 files changed, 503 insertions(+) create mode 100644 vendor/time/src/parsing/combinator/mod.rs create mode 100644 vendor/time/src/parsing/combinator/rfc/iso8601.rs create mode 100644 vendor/time/src/parsing/combinator/rfc/mod.rs create mode 100644 vendor/time/src/parsing/combinator/rfc/rfc2234.rs create mode 100644 vendor/time/src/parsing/combinator/rfc/rfc2822.rs (limited to 'vendor/time/src/parsing/combinator') diff --git a/vendor/time/src/parsing/combinator/mod.rs b/vendor/time/src/parsing/combinator/mod.rs new file mode 100644 index 000000000..3b4bc7a81 --- /dev/null +++ b/vendor/time/src/parsing/combinator/mod.rs @@ -0,0 +1,192 @@ +//! Implementations of the low-level parser combinators. + +pub(crate) mod rfc; + +use crate::format_description::modifier::Padding; +use crate::parsing::shim::{Integer, IntegerParseBytes}; +use crate::parsing::ParsedItem; + +/// Parse a "+" or "-" sign. Returns the ASCII byte representing the sign, if present. +pub(crate) const fn sign(input: &[u8]) -> Option> { + match input { + [sign @ (b'-' | b'+'), remaining @ ..] => Some(ParsedItem(remaining, *sign)), + _ => None, + } +} + +/// Consume the first matching item, returning its associated value. +pub(crate) fn first_match<'a, T>( + options: impl IntoIterator, + case_sensitive: bool, +) -> impl FnMut(&'a [u8]) -> Option> { + let mut options = options.into_iter(); + move |input| { + options.find_map(|(expected, t)| { + if case_sensitive { + Some(ParsedItem(input.strip_prefix(expected)?, t)) + } else { + let n = expected.len(); + if n <= input.len() { + let (head, tail) = input.split_at(n); + if head.eq_ignore_ascii_case(expected) { + return Some(ParsedItem(tail, t)); + } + } + None + } + }) + } +} + +/// Consume zero or more instances of the provided parser. The parser must return the unit value. +pub(crate) fn zero_or_more<'a, P: Fn(&'a [u8]) -> Option>>( + parser: P, +) -> impl FnMut(&'a [u8]) -> ParsedItem<'a, ()> { + move |mut input| { + while let Some(remaining) = parser(input) { + input = remaining.into_inner(); + } + ParsedItem(input, ()) + } +} + +/// Consume one of or more instances of the provided parser. The parser must produce the unit value. +pub(crate) fn one_or_more<'a, P: Fn(&'a [u8]) -> Option>>( + parser: P, +) -> impl Fn(&'a [u8]) -> Option> { + move |mut input| { + input = parser(input)?.into_inner(); + while let Some(remaining) = parser(input) { + input = remaining.into_inner(); + } + Some(ParsedItem(input, ())) + } +} + +/// Consume between `n` and `m` instances of the provided parser. +pub(crate) fn n_to_m< + 'a, + const N: u8, + const M: u8, + T, + P: Fn(&'a [u8]) -> Option>, +>( + parser: P, +) -> impl Fn(&'a [u8]) -> Option> { + debug_assert!(M >= N); + move |mut input| { + // We need to keep this to determine the total length eventually consumed. + let orig_input = input; + + // Mandatory + for _ in 0..N { + input = parser(input)?.0; + } + + // Optional + for _ in N..M { + match parser(input) { + Some(parsed) => input = parsed.0, + None => break, + } + } + + Some(ParsedItem( + input, + &orig_input[..(orig_input.len() - input.len())], + )) + } +} + +/// Consume between `n` and `m` digits, returning the numerical value. +pub(crate) fn n_to_m_digits( + input: &[u8], +) -> Option> { + debug_assert!(M >= N); + n_to_m::(any_digit)(input)?.flat_map(|value| value.parse_bytes()) +} + +/// Consume exactly `n` digits, returning the numerical value. +pub(crate) fn exactly_n_digits(input: &[u8]) -> Option> { + n_to_m_digits::(input) +} + +/// Consume exactly `n` digits, returning the numerical value. +pub(crate) fn exactly_n_digits_padded<'a, const N: u8, T: Integer>( + padding: Padding, +) -> impl Fn(&'a [u8]) -> Option> { + n_to_m_digits_padded::(padding) +} + +/// Consume between `n` and `m` digits, returning the numerical value. +pub(crate) fn n_to_m_digits_padded<'a, const N: u8, const M: u8, T: Integer>( + padding: Padding, +) -> impl Fn(&'a [u8]) -> Option> { + debug_assert!(M >= N); + move |mut input| match padding { + Padding::None => n_to_m_digits::<1, M, _>(input), + Padding::Space => { + debug_assert!(N > 0); + + let mut orig_input = input; + for _ in 0..(N - 1) { + match ascii_char::(input) { + Some(parsed) => input = parsed.0, + None => break, + } + } + let pad_width = (orig_input.len() - input.len()) as u8; + + orig_input = input; + for _ in 0..(N - pad_width) { + input = any_digit(input)?.0; + } + for _ in N..M { + match any_digit(input) { + Some(parsed) => input = parsed.0, + None => break, + } + } + + ParsedItem(input, &orig_input[..(orig_input.len() - input.len())]) + .flat_map(|value| value.parse_bytes()) + } + Padding::Zero => n_to_m_digits::(input), + } +} + +/// Consume exactly one digit. +pub(crate) const fn any_digit(input: &[u8]) -> Option> { + match input { + [c, remaining @ ..] if c.is_ascii_digit() => Some(ParsedItem(remaining, *c)), + _ => None, + } +} + +/// Consume exactly one of the provided ASCII characters. +pub(crate) fn ascii_char(input: &[u8]) -> Option> { + debug_assert!(CHAR.is_ascii_graphic() || CHAR.is_ascii_whitespace()); + match input { + [c, remaining @ ..] if *c == CHAR => Some(ParsedItem(remaining, ())), + _ => None, + } +} + +/// Consume exactly one of the provided ASCII characters, case-insensitive. +pub(crate) fn ascii_char_ignore_case(input: &[u8]) -> Option> { + debug_assert!(CHAR.is_ascii_graphic() || CHAR.is_ascii_whitespace()); + match input { + [c, remaining @ ..] if c.eq_ignore_ascii_case(&CHAR) => Some(ParsedItem(remaining, ())), + _ => None, + } +} + +/// Optionally consume an input with a given parser. +pub(crate) fn opt<'a, T>( + parser: impl Fn(&'a [u8]) -> Option>, +) -> impl Fn(&'a [u8]) -> ParsedItem<'a, Option> { + move |input| match parser(input) { + Some(value) => value.map(Some), + None => ParsedItem(input, None), + } +} diff --git a/vendor/time/src/parsing/combinator/rfc/iso8601.rs b/vendor/time/src/parsing/combinator/rfc/iso8601.rs new file mode 100644 index 000000000..613a9057f --- /dev/null +++ b/vendor/time/src/parsing/combinator/rfc/iso8601.rs @@ -0,0 +1,173 @@ +//! Rules defined in [ISO 8601]. +//! +//! [ISO 8601]: https://www.iso.org/iso-8601-date-and-time-format.html + +use core::num::{NonZeroU16, NonZeroU8}; + +use crate::parsing::combinator::{any_digit, ascii_char, exactly_n_digits, first_match, sign}; +use crate::parsing::ParsedItem; +use crate::{Month, Weekday}; + +/// What kind of format is being parsed. This is used to ensure each part of the format (date, time, +/// offset) is the same kind. +#[derive(Debug, Clone, Copy)] +pub(crate) enum ExtendedKind { + /// The basic format. + Basic, + /// The extended format. + Extended, + /// ¯\_(ツ)_/¯ + Unknown, +} + +impl ExtendedKind { + /// Is it possible that the format is extended? + pub(crate) const fn maybe_extended(self) -> bool { + matches!(self, Self::Extended | Self::Unknown) + } + + /// Is the format known for certain to be extended? + pub(crate) const fn is_extended(self) -> bool { + matches!(self, Self::Extended) + } + + /// If the kind is `Unknown`, make it `Basic`. Otherwise, do nothing. Returns `Some` if and only + /// if the kind is now `Basic`. + pub(crate) fn coerce_basic(&mut self) -> Option<()> { + match self { + Self::Basic => Some(()), + Self::Extended => None, + Self::Unknown => { + *self = Self::Basic; + Some(()) + } + } + } + + /// If the kind is `Unknown`, make it `Extended`. Otherwise, do nothing. Returns `Some` if and + /// only if the kind is now `Extended`. + pub(crate) fn coerce_extended(&mut self) -> Option<()> { + match self { + Self::Basic => None, + Self::Extended => Some(()), + Self::Unknown => { + *self = Self::Extended; + Some(()) + } + } + } +} + +/// Parse a possibly expanded year. +pub(crate) fn year(input: &[u8]) -> Option> { + Some(match sign(input) { + Some(ParsedItem(input, sign)) => exactly_n_digits::<6, u32>(input)?.map(|val| { + let val = val as i32; + if sign == b'-' { -val } else { val } + }), + None => exactly_n_digits::<4, u32>(input)?.map(|val| val as _), + }) +} + +/// Parse a month. +pub(crate) fn month(input: &[u8]) -> Option> { + first_match( + [ + (b"01".as_slice(), Month::January), + (b"02".as_slice(), Month::February), + (b"03".as_slice(), Month::March), + (b"04".as_slice(), Month::April), + (b"05".as_slice(), Month::May), + (b"06".as_slice(), Month::June), + (b"07".as_slice(), Month::July), + (b"08".as_slice(), Month::August), + (b"09".as_slice(), Month::September), + (b"10".as_slice(), Month::October), + (b"11".as_slice(), Month::November), + (b"12".as_slice(), Month::December), + ], + true, + )(input) +} + +/// Parse a week number. +pub(crate) fn week(input: &[u8]) -> Option> { + exactly_n_digits::<2, _>(input) +} + +/// Parse a day of the month. +pub(crate) fn day(input: &[u8]) -> Option> { + exactly_n_digits::<2, _>(input) +} + +/// Parse a day of the week. +pub(crate) fn dayk(input: &[u8]) -> Option> { + first_match( + [ + (b"1".as_slice(), Weekday::Monday), + (b"2".as_slice(), Weekday::Tuesday), + (b"3".as_slice(), Weekday::Wednesday), + (b"4".as_slice(), Weekday::Thursday), + (b"5".as_slice(), Weekday::Friday), + (b"6".as_slice(), Weekday::Saturday), + (b"7".as_slice(), Weekday::Sunday), + ], + true, + )(input) +} + +/// Parse a day of the year. +pub(crate) fn dayo(input: &[u8]) -> Option> { + exactly_n_digits::<3, _>(input) +} + +/// Parse the hour. +pub(crate) fn hour(input: &[u8]) -> Option> { + exactly_n_digits::<2, _>(input) +} + +/// Parse the minute. +pub(crate) fn min(input: &[u8]) -> Option> { + exactly_n_digits::<2, _>(input) +} + +/// Parse a floating point number as its integer and optional fractional parts. +/// +/// The number must have two digits before the decimal point. If a decimal point is present, at +/// least one digit must follow. +/// +/// The return type is a tuple of the integer part and optional fraction part. +pub(crate) fn float(input: &[u8]) -> Option)>> { + // Two digits before the decimal. + let ParsedItem(input, integer_part) = match input { + [ + first_digit @ b'0'..=b'9', + second_digit @ b'0'..=b'9', + input @ .., + ] => ParsedItem(input, (first_digit - b'0') * 10 + (second_digit - b'0')), + _ => return None, + }; + + if let Some(ParsedItem(input, ())) = decimal_sign(input) { + // Mandatory post-decimal digit. + let ParsedItem(mut input, mut fractional_part) = + any_digit(input)?.map(|digit| ((digit - b'0') as f64) / 10.); + + let mut divisor = 10.; + // Any number of subsequent digits. + while let Some(ParsedItem(new_input, digit)) = any_digit(input) { + input = new_input; + divisor *= 10.; + fractional_part += (digit - b'0') as f64 / divisor; + } + + Some(ParsedItem(input, (integer_part, Some(fractional_part)))) + } else { + Some(ParsedItem(input, (integer_part, None))) + } +} + +/// Parse a "decimal sign", which is either a comma or a period. +fn decimal_sign(input: &[u8]) -> Option> { + ascii_char::(input).or_else(|| ascii_char::(input)) +} diff --git a/vendor/time/src/parsing/combinator/rfc/mod.rs b/vendor/time/src/parsing/combinator/rfc/mod.rs new file mode 100644 index 000000000..2974a4d5c --- /dev/null +++ b/vendor/time/src/parsing/combinator/rfc/mod.rs @@ -0,0 +1,10 @@ +//! Combinators for rules as defined in a standard. +//! +//! When applicable, these rules have been converted strictly following the ABNF syntax as specified +//! in [RFC 2234]. +//! +//! [RFC 2234]: https://datatracker.ietf.org/doc/html/rfc2234 + +pub(crate) mod iso8601; +pub(crate) mod rfc2234; +pub(crate) mod rfc2822; diff --git a/vendor/time/src/parsing/combinator/rfc/rfc2234.rs b/vendor/time/src/parsing/combinator/rfc/rfc2234.rs new file mode 100644 index 000000000..675344435 --- /dev/null +++ b/vendor/time/src/parsing/combinator/rfc/rfc2234.rs @@ -0,0 +1,13 @@ +//! Rules defined in [RFC 2234]. +//! +//! [RFC 2234]: https://datatracker.ietf.org/doc/html/rfc2234 + +use crate::parsing::ParsedItem; + +/// Consume exactly one space or tab. +pub(crate) const fn wsp(input: &[u8]) -> Option> { + match input { + [b' ' | b'\t', rest @ ..] => Some(ParsedItem(rest, ())), + _ => None, + } +} diff --git a/vendor/time/src/parsing/combinator/rfc/rfc2822.rs b/vendor/time/src/parsing/combinator/rfc/rfc2822.rs new file mode 100644 index 000000000..8410de06e --- /dev/null +++ b/vendor/time/src/parsing/combinator/rfc/rfc2822.rs @@ -0,0 +1,115 @@ +//! Rules defined in [RFC 2822]. +//! +//! [RFC 2822]: https://datatracker.ietf.org/doc/html/rfc2822 + +use crate::parsing::combinator::rfc::rfc2234::wsp; +use crate::parsing::combinator::{ascii_char, one_or_more, zero_or_more}; +use crate::parsing::ParsedItem; + +/// Consume the `fws` rule. +// The full rule is equivalent to /\r\n[ \t]+|[ \t]+(?:\r\n[ \t]+)*/ +pub(crate) fn fws(mut input: &[u8]) -> Option> { + if let [b'\r', b'\n', rest @ ..] = input { + one_or_more(wsp)(rest) + } else { + input = one_or_more(wsp)(input)?.into_inner(); + while let [b'\r', b'\n', rest @ ..] = input { + input = one_or_more(wsp)(rest)?.into_inner(); + } + Some(ParsedItem(input, ())) + } +} + +/// Consume the `cfws` rule. +// The full rule is equivalent to any combination of `fws` and `comment` so long as it is not empty. +pub(crate) fn cfws(input: &[u8]) -> Option> { + one_or_more(|input| fws(input).or_else(|| comment(input)))(input) +} + +/// Consume the `comment` rule. +fn comment(mut input: &[u8]) -> Option> { + input = ascii_char::(input)?.into_inner(); + input = zero_or_more(fws)(input).into_inner(); + while let Some(rest) = ccontent(input) { + input = rest.into_inner(); + input = zero_or_more(fws)(input).into_inner(); + } + input = ascii_char::(input)?.into_inner(); + + Some(ParsedItem(input, ())) +} + +/// Consume the `ccontent` rule. +fn ccontent(input: &[u8]) -> Option> { + ctext(input) + .or_else(|| quoted_pair(input)) + .or_else(|| comment(input)) +} + +/// Consume the `ctext` rule. +#[allow(clippy::unnecessary_lazy_evaluations)] // rust-lang/rust-clippy#8522 +fn ctext(input: &[u8]) -> Option> { + no_ws_ctl(input).or_else(|| match input { + [33..=39 | 42..=91 | 93..=126, rest @ ..] => Some(ParsedItem(rest, ())), + _ => None, + }) +} + +/// Consume the `quoted_pair` rule. +fn quoted_pair(mut input: &[u8]) -> Option> { + input = ascii_char::(input)?.into_inner(); + + let old_input_len = input.len(); + + input = text(input).into_inner(); + + // If nothing is parsed, this means we hit the `obs-text` rule and nothing matched. This is + // technically a success, but we should still check the `obs-qp` rule to ensure we consume + // everything possible. + if input.len() == old_input_len { + match input { + [0..=127, rest @ ..] => Some(ParsedItem(rest, ())), + _ => Some(ParsedItem(input, ())), + } + } else { + Some(ParsedItem(input, ())) + } +} + +/// Consume the `no_ws_ctl` rule. +const fn no_ws_ctl(input: &[u8]) -> Option> { + match input { + [1..=8 | 11..=12 | 14..=31 | 127, rest @ ..] => Some(ParsedItem(rest, ())), + _ => None, + } +} + +/// Consume the `text` rule. +fn text<'a>(input: &'a [u8]) -> ParsedItem<'a, ()> { + let new_text = |input: &'a [u8]| match input { + [1..=9 | 11..=12 | 14..=127, rest @ ..] => Some(ParsedItem(rest, ())), + _ => None, + }; + + let obs_char = |input: &'a [u8]| match input { + // This is technically allowed, but consuming this would mean the rest of the string is + // eagerly consumed without consideration for where the comment actually ends. + [b')', ..] => None, + [0..=9 | 11..=12 | 14..=127, rest @ ..] => Some(rest), + _ => None, + }; + + let obs_text = |mut input| { + input = zero_or_more(ascii_char::)(input).into_inner(); + input = zero_or_more(ascii_char::)(input).into_inner(); + while let Some(rest) = obs_char(input) { + input = rest; + input = zero_or_more(ascii_char::)(input).into_inner(); + input = zero_or_more(ascii_char::)(input).into_inner(); + } + + ParsedItem(input, ()) + }; + + new_text(input).unwrap_or_else(|| obs_text(input)) +} -- cgit v1.2.3