/// Find and parse sign and get remaining bytes. #[inline] fn parse_sign<'a>(bytes: &'a [u8]) -> (bool, &'a [u8]) { match bytes.get(0) { Some(&b'+') => (true, &bytes[1..]), Some(&b'-') => (false, &bytes[1..]), _ => (true, bytes), } } // Convert u8 to digit. #[inline] fn to_digit(c: u8) -> Option { (c as char).to_digit(10) } // Add digit from exponent. #[inline] fn add_digit_i32(value: i32, digit: u32) -> Option { return value.checked_mul(10)?.checked_add(digit as i32); } // Subtract digit from exponent. #[inline] fn sub_digit_i32(value: i32, digit: u32) -> Option { return value.checked_mul(10)?.checked_sub(digit as i32); } // Convert character to digit. #[inline] fn is_digit(c: u8) -> bool { to_digit(c).is_some() } // Split buffer at index. #[inline] fn split_at_index<'a>(digits: &'a [u8], index: usize) -> (&'a [u8], &'a [u8]) { (&digits[..index], &digits[index..]) } /// Consume until a an invalid digit is found. /// /// - `digits` - Slice containing 0 or more digits. #[inline] fn consume_digits<'a>(digits: &'a [u8]) -> (&'a [u8], &'a [u8]) { // Consume all digits. let mut index = 0; while index < digits.len() && is_digit(digits[index]) { index += 1; } split_at_index(digits, index) } // Trim leading 0s. #[inline] fn ltrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8] { let count = bytes.iter().take_while(|&&si| si == b'0').count(); &bytes[count..] } // Trim trailing 0s. #[inline] fn rtrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8] { let count = bytes.iter().rev().take_while(|&&si| si == b'0').count(); let index = bytes.len() - count; &bytes[..index] } // PARSERS // ------- /// Parse the exponent of the float. /// /// * `exponent` - Slice containing the exponent digits. /// * `is_positive` - If the exponent sign is positive. fn parse_exponent(exponent: &[u8], is_positive: bool) -> i32 { // Parse the sign bit or current data. let mut value: i32 = 0; match is_positive { true => { for c in exponent { value = match add_digit_i32(value, to_digit(*c).unwrap()) { Some(v) => v, None => return i32::max_value(), }; } }, false => { for c in exponent { value = match sub_digit_i32(value, to_digit(*c).unwrap()) { Some(v) => v, None => return i32::min_value(), }; } }, } value } pub fn case_insensitive_starts_with<'a, 'b, Iter1, Iter2>(mut x: Iter1, mut y: Iter2) -> bool where Iter1: Iterator, Iter2: Iterator, { // We use a faster optimization here for ASCII letters, which NaN // and infinite strings **must** be. [A-Z] is 0x41-0x5A, while // [a-z] is 0x61-0x7A. Therefore, the xor must be 0 or 32 if they // are case-insensitive equal, but only if at least 1 of the inputs // is an ASCII letter. loop { let yi = y.next(); if yi.is_none() { return true; } let yi = *yi.unwrap(); let is_not_equal = x.next().map_or(true, |&xi| { let xor = xi ^ yi; xor != 0 && xor != 0x20 }); if is_not_equal { return false; } } } /// Parse float from input bytes, returning the float and the remaining bytes. /// /// * `bytes` - Array of bytes leading with float-data. pub fn parse_float<'a, F>(bytes: &'a [u8]) -> (F, &'a [u8]) where F: minimal_lexical::Float, { let start = bytes; // Parse the sign. let (is_positive, bytes) = parse_sign(bytes); // Check NaN, Inf, Infinity if case_insensitive_starts_with(bytes.iter(), b"NaN".iter()) { let mut float = F::from_bits(F::EXPONENT_MASK | (F::HIDDEN_BIT_MASK >> 1)); if !is_positive { float = -float; } return (float, &bytes[3..]); } else if case_insensitive_starts_with(bytes.iter(), b"Infinity".iter()) { let mut float = F::from_bits(F::EXPONENT_MASK); if !is_positive { float = -float; } return (float, &bytes[8..]); } else if case_insensitive_starts_with(bytes.iter(), b"inf".iter()) { let mut float = F::from_bits(F::EXPONENT_MASK); if !is_positive { float = -float; } return (float, &bytes[3..]); } // Extract and parse the float components: // 1. Integer // 2. Fraction // 3. Exponent let (integer_slc, bytes) = consume_digits(bytes); let (fraction_slc, bytes) = match bytes.first() { Some(&b'.') => consume_digits(&bytes[1..]), _ => (&bytes[..0], bytes), }; let (exponent, bytes) = match bytes.first() { Some(&b'e') | Some(&b'E') => { // Extract and parse the exponent. let (is_positive, bytes) = parse_sign(&bytes[1..]); let (exponent, bytes) = consume_digits(bytes); (parse_exponent(exponent, is_positive), bytes) }, _ => (0, bytes), }; if bytes.len() == start.len() { return (F::from_u64(0), bytes); } // Note: You may want to check and validate the float data here: // 1). Many floats require integer or fraction digits, if a fraction // is present. // 2). All floats require either integer or fraction digits. // 3). Some floats do not allow a '+' sign before the significant digits. // 4). Many floats require exponent digits after the exponent symbol. // 5). Some floats do not allow a '+' sign before the exponent. // We now need to trim leading and trailing 0s from the integer // and fraction, respectively. This is required to make the // fast and moderate paths more efficient, and for the slow // path. let integer_slc = ltrim_zero(integer_slc); let fraction_slc = rtrim_zero(fraction_slc); // Create the float and return our data. let mut float: F = minimal_lexical::parse_float(integer_slc.iter(), fraction_slc.iter(), exponent); if !is_positive { float = -float; } (float, bytes) } macro_rules! b { ($x:literal) => { $x.as_bytes() }; } #[test] fn f32_test() { assert_eq!( (184467440000000000000.0, b!("\x00\x00006")), parse_float::(b"000184467440737095516150\x00\x00006") ); } #[test] fn f64_test() { assert_eq!( (184467440737095500000.0, b!("\x00\x00006")), parse_float::(b"000184467440737095516150\x00\x00006") ); }