// Copyright Mozilla Foundation. See the COPYRIGHT // file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. macro_rules! decoder_function { ($preamble:block, $loop_preable:block, $eof:block, $body:block, $slf:ident, $src_consumed:ident, $dest:ident, $source:ident, $b:ident, $destination_handle:ident, $unread_handle:ident, $destination_check:ident, $name:ident, $code_unit:ty, $dest_struct:ident) => ( pub fn $name(&mut $slf, src: &[u8], dst: &mut [$code_unit], last: bool) -> (DecoderResult, usize, usize) { let mut $source = ByteSource::new(src); let mut $dest = $dest_struct::new(dst); loop { // TODO: remove this loop { // Start non-boilerplate $preamble // End non-boilerplate } loop { { $loop_preable } match $source.check_available() { Space::Full($src_consumed) => { if last { // Start non-boilerplate $eof // End non-boilerplate } return (DecoderResult::InputEmpty, $src_consumed, $dest.written()); } Space::Available(source_handle) => { match $dest.$destination_check() { Space::Full(dst_written) => { return (DecoderResult::OutputFull, source_handle.consumed(), dst_written); } Space::Available($destination_handle) => { let ($b, $unread_handle) = source_handle.read(); // Start non-boilerplate $body // End non-boilerplate } } } } } } }); } macro_rules! decoder_functions { ( $preamble:block, $loop_preable:block, $eof:block, $body:block, $slf:ident, $src_consumed:ident, $dest:ident, $source:ident, $b:ident, $destination_handle:ident, $unread_handle:ident, $destination_check:ident ) => { decoder_function!( $preamble, $loop_preable, $eof, $body, $slf, $src_consumed, $dest, $source, $b, $destination_handle, $unread_handle, $destination_check, decode_to_utf8_raw, u8, Utf8Destination ); decoder_function!( $preamble, $loop_preable, $eof, $body, $slf, $src_consumed, $dest, $source, $b, $destination_handle, $unread_handle, $destination_check, decode_to_utf16_raw, u16, Utf16Destination ); }; } macro_rules! ascii_compatible_two_byte_decoder_function { ($lead:block, $trail:block, $slf:ident, $non_ascii:ident, $byte:ident, $lead_minus_offset:ident, $unread_handle_trail:ident, $source:ident, $handle:ident, $outermost:tt, $copy_ascii:ident, $destination_check:ident, $name:ident, $code_unit:ty, $dest_struct:ident, $ascii_punctuation:expr) => ( pub fn $name(&mut $slf, src: &[u8], dst: &mut [$code_unit], last: bool) -> (DecoderResult, usize, usize) { let mut $source = ByteSource::new(src); let mut dest_prolog = $dest_struct::new(dst); let dest = match $slf.lead { Some(lead) => { let $lead_minus_offset = lead; $slf.lead = None; // Since we don't have `goto` we could use to jump into the trail // handling part of the main loop, we need to repeat trail handling // here. match $source.check_available() { Space::Full(src_consumed_prolog) => { if last { return (DecoderResult::Malformed(1, 0), src_consumed_prolog, dest_prolog.written()); } return (DecoderResult::InputEmpty, src_consumed_prolog, dest_prolog.written()); } Space::Available(source_handle_prolog) => { match dest_prolog.$destination_check() { Space::Full(dst_written_prolog) => { return (DecoderResult::OutputFull, source_handle_prolog.consumed(), dst_written_prolog); } Space::Available($handle) => { let ($byte, $unread_handle_trail) = source_handle_prolog.read(); // Start non-boilerplate $trail // End non-boilerplate } } } } }, None => { &mut dest_prolog } }; $outermost: loop { match dest.$copy_ascii(&mut $source) { CopyAsciiResult::Stop(ret) => return ret, CopyAsciiResult::GoOn((mut $non_ascii, mut $handle)) => { 'middle: loop { let dest_again = { let $lead_minus_offset = { // Start non-boilerplate $lead // End non-boilerplate }; match $source.check_available() { Space::Full(src_consumed_trail) => { if last { return (DecoderResult::Malformed(1, 0), src_consumed_trail, $handle.written()); } $slf.lead = Some($lead_minus_offset); return (DecoderResult::InputEmpty, src_consumed_trail, $handle.written()); } Space::Available(source_handle_trail) => { let ($byte, $unread_handle_trail) = source_handle_trail.read(); // Start non-boilerplate $trail // End non-boilerplate } } }; match $source.check_available() { Space::Full(src_consumed) => { return (DecoderResult::InputEmpty, src_consumed, dest_again.written()); } Space::Available(source_handle) => { match dest_again.$destination_check() { Space::Full(dst_written) => { return (DecoderResult::OutputFull, source_handle.consumed(), dst_written); } Space::Available(mut destination_handle) => { let (mut b, unread_handle) = source_handle.read(); let source_again = unread_handle.commit(); 'innermost: loop { if b > 127 { $non_ascii = b; $handle = destination_handle; continue 'middle; } // Testing on Haswell says that we should write the // byte unconditionally instead of trying to unread it // to make it part of the next SIMD stride. let dest_again_again = destination_handle.write_ascii(b); if $ascii_punctuation && b < 60 { // We've got punctuation match source_again.check_available() { Space::Full(src_consumed_again) => { return (DecoderResult::InputEmpty, src_consumed_again, dest_again_again.written()); } Space::Available(source_handle_again) => { match dest_again_again.$destination_check() { Space::Full(dst_written_again) => { return (DecoderResult::OutputFull, source_handle_again.consumed(), dst_written_again); } Space::Available(destination_handle_again) => { { let (b_again, _unread_handle_again) = source_handle_again.read(); b = b_again; destination_handle = destination_handle_again; continue 'innermost; } } } } } } // We've got markup or ASCII text continue $outermost; } } } } } } } } } }); } macro_rules! ascii_compatible_two_byte_decoder_functions { ( $lead:block, $trail:block, $slf:ident, $non_ascii:ident, $byte:ident, $lead_minus_offset:ident, $unread_handle_trail:ident, $source:ident, $handle:ident, $outermost:tt, $copy_ascii:ident, $destination_check:ident, $ascii_punctuation:expr ) => { ascii_compatible_two_byte_decoder_function!( $lead, $trail, $slf, $non_ascii, $byte, $lead_minus_offset, $unread_handle_trail, $source, $handle, $outermost, $copy_ascii, $destination_check, decode_to_utf8_raw, u8, Utf8Destination, $ascii_punctuation ); ascii_compatible_two_byte_decoder_function!( $lead, $trail, $slf, $non_ascii, $byte, $lead_minus_offset, $unread_handle_trail, $source, $handle, $outermost, $copy_ascii, $destination_check, decode_to_utf16_raw, u16, Utf16Destination, $ascii_punctuation ); }; } macro_rules! gb18030_decoder_function { ($first_body:block, $second_body:block, $third_body:block, $fourth_body:block, $slf:ident, $non_ascii:ident, $first_minus_offset:ident, $second:ident, $second_minus_offset:ident, $unread_handle_second:ident, $third:ident, $third_minus_offset:ident, $unread_handle_third:ident, $fourth:ident, $fourth_minus_offset:ident, $unread_handle_fourth:ident, $source:ident, $handle:ident, $outermost:tt, $name:ident, $code_unit:ty, $dest_struct:ident) => ( #[cfg_attr(feature = "cargo-clippy", allow(never_loop))] pub fn $name(&mut $slf, src: &[u8], dst: &mut [$code_unit], last: bool) -> (DecoderResult, usize, usize) { let mut $source = ByteSource::new(src); let mut dest = $dest_struct::new(dst); { if let Some(ascii) = $slf.pending_ascii { match dest.check_space_bmp() { Space::Full(_) => { return (DecoderResult::OutputFull, 0, 0); } Space::Available(pending_ascii_handle) => { $slf.pending_ascii = None; pending_ascii_handle.write_ascii(ascii); } } } } while !$slf.pending.is_none() { match $source.check_available() { Space::Full(src_consumed) => { if last { // Start non-boilerplate let count = $slf.pending.count(); $slf.pending = Gb18030Pending::None; return (DecoderResult::Malformed(count as u8, 0), src_consumed, dest.written()); // End non-boilerplate } return (DecoderResult::InputEmpty, src_consumed, dest.written()); } Space::Available(source_handle) => { match dest.check_space_astral() { Space::Full(dst_written) => { return (DecoderResult::OutputFull, source_handle.consumed(), dst_written); } Space::Available($handle) => { let (byte, unread_handle) = source_handle.read(); match $slf.pending { Gb18030Pending::One($first_minus_offset) => { $slf.pending = Gb18030Pending::None; let $second = byte; let $unread_handle_second = unread_handle; // If second is between 0x40 and 0x7E, // inclusive, subtract offset 0x40. Else if // second is between 0x80 and 0xFE, inclusive, // subtract offset 0x41. In both cases, // handle as a two-byte sequence. // Else if second is between 0x30 and 0x39, // inclusive, subtract offset 0x30 and // handle as a four-byte sequence. let $second_minus_offset = $second.wrapping_sub(0x30); // It's not optimal to do this check first, // but this results in more readable code. if $second_minus_offset > (0x39 - 0x30) { // Start non-boilerplate $second_body // End non-boilerplate } else { // Four-byte! $slf.pending = Gb18030Pending::Two($first_minus_offset, $second_minus_offset); $handle.commit() } } Gb18030Pending::Two($first_minus_offset, $second_minus_offset) => { $slf.pending = Gb18030Pending::None; let $third = byte; let $unread_handle_third = unread_handle; let $third_minus_offset = { // Start non-boilerplate $third_body // End non-boilerplate }; $slf.pending = Gb18030Pending::Three($first_minus_offset, $second_minus_offset, $third_minus_offset); $handle.commit() } Gb18030Pending::Three($first_minus_offset, $second_minus_offset, $third_minus_offset) => { $slf.pending = Gb18030Pending::None; let $fourth = byte; let $unread_handle_fourth = unread_handle; // Start non-boilerplate $fourth_body // End non-boilerplate } Gb18030Pending::None => unreachable!("Checked in loop condition"), }; } } } } } $outermost: loop { match dest.copy_ascii_from_check_space_astral(&mut $source) { CopyAsciiResult::Stop(ret) => return ret, CopyAsciiResult::GoOn((mut $non_ascii, mut $handle)) => { 'middle: loop { let dest_again = { let $first_minus_offset = { // Start non-boilerplate $first_body // End non-boilerplate }; match $source.check_available() { Space::Full(src_consumed_trail) => { if last { return (DecoderResult::Malformed(1, 0), src_consumed_trail, $handle.written()); } $slf.pending = Gb18030Pending::One($first_minus_offset); return (DecoderResult::InputEmpty, src_consumed_trail, $handle.written()); } Space::Available(source_handle_trail) => { let ($second, $unread_handle_second) = source_handle_trail.read(); // Start non-boilerplate // If second is between 0x40 and 0x7E, // inclusive, subtract offset 0x40. Else if // second is between 0x80 and 0xFE, inclusive, // subtract offset 0x41. In both cases, // handle as a two-byte sequence. // Else if second is between 0x30 and 0x39, // inclusive, subtract offset 0x30 and // handle as a four-byte sequence. let $second_minus_offset = $second.wrapping_sub(0x30); // It's not optimal to do this check first, // but this results in more readable code. if $second_minus_offset > (0x39 - 0x30) { // Start non-boilerplate $second_body // End non-boilerplate } else { // Four-byte! match $unread_handle_second.commit().check_available() { Space::Full(src_consumed_third) => { if last { return (DecoderResult::Malformed(2, 0), src_consumed_third, $handle.written()); } $slf.pending = Gb18030Pending::Two($first_minus_offset, $second_minus_offset); return (DecoderResult::InputEmpty, src_consumed_third, $handle.written()); } Space::Available(source_handle_third) => { let ($third, $unread_handle_third) = source_handle_third.read(); let $third_minus_offset = { // Start non-boilerplate $third_body // End non-boilerplate }; match $unread_handle_third.commit() .check_available() { Space::Full(src_consumed_fourth) => { if last { return (DecoderResult::Malformed(3, 0), src_consumed_fourth, $handle.written()); } $slf.pending = Gb18030Pending::Three($first_minus_offset, $second_minus_offset, $third_minus_offset); return (DecoderResult::InputEmpty, src_consumed_fourth, $handle.written()); } Space::Available(source_handle_fourth) => { let ($fourth, $unread_handle_fourth) = source_handle_fourth.read(); // Start non-boilerplate $fourth_body // End non-boilerplate } } } } } // End non-boilerplate } } }; match $source.check_available() { Space::Full(src_consumed) => { return (DecoderResult::InputEmpty, src_consumed, dest_again.written()); } Space::Available(source_handle) => { match dest_again.check_space_astral() { Space::Full(dst_written) => { return (DecoderResult::OutputFull, source_handle.consumed(), dst_written); } Space::Available(destination_handle) => { let (b, _) = source_handle.read(); loop { if b > 127 { $non_ascii = b; $handle = destination_handle; continue 'middle; } // Testing on Haswell says that we should write the // byte unconditionally instead of trying to unread it // to make it part of the next SIMD stride. destination_handle.write_ascii(b); // We've got markup or ASCII text continue $outermost; } } } } } } } } } }); } macro_rules! gb18030_decoder_functions { ( $first_body:block, $second_body:block, $third_body:block, $fourth_body:block, $slf:ident, $non_ascii:ident, $first_minus_offset:ident, $second:ident, $second_minus_offset:ident, $unread_handle_second:ident, $third:ident, $third_minus_offset:ident, $unread_handle_third:ident, $fourth:ident, $fourth_minus_offset:ident, $unread_handle_fourth:ident, $source:ident, $handle:ident, $outermost:tt ) => { gb18030_decoder_function!( $first_body, $second_body, $third_body, $fourth_body, $slf, $non_ascii, $first_minus_offset, $second, $second_minus_offset, $unread_handle_second, $third, $third_minus_offset, $unread_handle_third, $fourth, $fourth_minus_offset, $unread_handle_fourth, $source, $handle, $outermost, decode_to_utf8_raw, u8, Utf8Destination ); gb18030_decoder_function!( $first_body, $second_body, $third_body, $fourth_body, $slf, $non_ascii, $first_minus_offset, $second, $second_minus_offset, $unread_handle_second, $third, $third_minus_offset, $unread_handle_third, $fourth, $fourth_minus_offset, $unread_handle_fourth, $source, $handle, $outermost, decode_to_utf16_raw, u16, Utf16Destination ); }; } macro_rules! euc_jp_decoder_function { ($jis0802_trail_body:block, $jis0812_lead_body:block, $jis0812_trail_body:block, $half_width_katakana_body:block, $slf:ident, $non_ascii:ident, $jis0208_lead_minus_offset:ident, $byte:ident, $unread_handle_trail:ident, $jis0212_lead_minus_offset:ident, $lead:ident, $unread_handle_jis0212:ident, $source:ident, $handle:ident, $name:ident, $code_unit:ty, $dest_struct:ident) => ( #[cfg_attr(feature = "cargo-clippy", allow(never_loop))] pub fn $name(&mut $slf, src: &[u8], dst: &mut [$code_unit], last: bool) -> (DecoderResult, usize, usize) { let mut $source = ByteSource::new(src); let mut dest = $dest_struct::new(dst); while !$slf.pending.is_none() { match $source.check_available() { Space::Full(src_consumed) => { if last { // Start non-boilerplate let count = $slf.pending.count(); $slf.pending = EucJpPending::None; return (DecoderResult::Malformed(count as u8, 0), src_consumed, dest.written()); // End non-boilerplate } return (DecoderResult::InputEmpty, src_consumed, dest.written()); } Space::Available(source_handle) => { match dest.check_space_bmp() { Space::Full(dst_written) => { return (DecoderResult::OutputFull, source_handle.consumed(), dst_written); } Space::Available($handle) => { let ($byte, $unread_handle_trail) = source_handle.read(); match $slf.pending { EucJpPending::Jis0208Lead($jis0208_lead_minus_offset) => { $slf.pending = EucJpPending::None; // Start non-boilerplate $jis0802_trail_body // End non-boilerplate } EucJpPending::Jis0212Shift => { $slf.pending = EucJpPending::None; let $lead = $byte; let $unread_handle_jis0212 = $unread_handle_trail; let $jis0212_lead_minus_offset = { // Start non-boilerplate $jis0812_lead_body // End non-boilerplate }; $slf.pending = EucJpPending::Jis0212Lead($jis0212_lead_minus_offset); $handle.commit() } EucJpPending::Jis0212Lead($jis0212_lead_minus_offset) => { $slf.pending = EucJpPending::None; // Start non-boilerplate $jis0812_trail_body // End non-boilerplate } EucJpPending::HalfWidthKatakana => { $slf.pending = EucJpPending::None; // Start non-boilerplate $half_width_katakana_body // End non-boilerplate } EucJpPending::None => unreachable!("Checked in loop condition"), }; } } } } } 'outermost: loop { match dest.copy_ascii_from_check_space_bmp(&mut $source) { CopyAsciiResult::Stop(ret) => return ret, CopyAsciiResult::GoOn((mut $non_ascii, mut $handle)) => { 'middle: loop { let dest_again = { // If lead is between 0xA1 and 0xFE, inclusive, // subtract 0xA1. Else if lead is 0x8E, handle the // next byte as half-width Katakana. Else if lead is // 0x8F, expect JIS 0212. let $jis0208_lead_minus_offset = $non_ascii.wrapping_sub(0xA1); if $jis0208_lead_minus_offset <= (0xFE - 0xA1) { // JIS 0208 match $source.check_available() { Space::Full(src_consumed_trail) => { if last { return (DecoderResult::Malformed(1, 0), src_consumed_trail, $handle.written()); } $slf.pending = EucJpPending::Jis0208Lead($jis0208_lead_minus_offset); return (DecoderResult::InputEmpty, src_consumed_trail, $handle.written()); } Space::Available(source_handle_trail) => { let ($byte, $unread_handle_trail) = source_handle_trail.read(); // Start non-boilerplate $jis0802_trail_body // End non-boilerplate } } } else if $non_ascii == 0x8F { match $source.check_available() { Space::Full(src_consumed_jis0212) => { if last { return (DecoderResult::Malformed(1, 0), src_consumed_jis0212, $handle.written()); } $slf.pending = EucJpPending::Jis0212Shift; return (DecoderResult::InputEmpty, src_consumed_jis0212, $handle.written()); } Space::Available(source_handle_jis0212) => { let ($lead, $unread_handle_jis0212) = source_handle_jis0212.read(); let $jis0212_lead_minus_offset = { // Start non-boilerplate $jis0812_lead_body // End non-boilerplate }; match $unread_handle_jis0212.commit().check_available() { Space::Full(src_consumed_trail) => { if last { return (DecoderResult::Malformed(2, 0), src_consumed_trail, $handle.written()); } $slf.pending = EucJpPending::Jis0212Lead($jis0212_lead_minus_offset); return (DecoderResult::InputEmpty, src_consumed_trail, $handle.written()); } Space::Available(source_handle_trail) => { let ($byte, $unread_handle_trail) = source_handle_trail.read(); // Start non-boilerplate $jis0812_trail_body // End non-boilerplate } } } } } else if $non_ascii == 0x8E { match $source.check_available() { Space::Full(src_consumed_trail) => { if last { return (DecoderResult::Malformed(1, 0), src_consumed_trail, $handle.written()); } $slf.pending = EucJpPending::HalfWidthKatakana; return (DecoderResult::InputEmpty, src_consumed_trail, $handle.written()); } Space::Available(source_handle_trail) => { let ($byte, $unread_handle_trail) = source_handle_trail.read(); // Start non-boilerplate $half_width_katakana_body // End non-boilerplate } } } else { return (DecoderResult::Malformed(1, 0), $source.consumed(), $handle.written()); } }; match $source.check_available() { Space::Full(src_consumed) => { return (DecoderResult::InputEmpty, src_consumed, dest_again.written()); } Space::Available(source_handle) => { match dest_again.check_space_bmp() { Space::Full(dst_written) => { return (DecoderResult::OutputFull, source_handle.consumed(), dst_written); } Space::Available(destination_handle) => { let (b, _) = source_handle.read(); loop { if b > 127 { $non_ascii = b; $handle = destination_handle; continue 'middle; } // Testing on Haswell says that we should write the // byte unconditionally instead of trying to unread it // to make it part of the next SIMD stride. destination_handle.write_ascii(b); // We've got markup or ASCII text continue 'outermost; } } } } } } } } } }); } macro_rules! euc_jp_decoder_functions { ( $jis0802_trail_body:block, $jis0812_lead_body:block, $jis0812_trail_body:block, $half_width_katakana_body:block, $slf:ident, $non_ascii:ident, $jis0208_lead_minus_offset:ident, $byte:ident, $unread_handle_trail:ident, $jis0212_lead_minus_offset:ident, $lead:ident, $unread_handle_jis0212:ident, $source:ident, $handle:ident ) => { euc_jp_decoder_function!( $jis0802_trail_body, $jis0812_lead_body, $jis0812_trail_body, $half_width_katakana_body, $slf, $non_ascii, $jis0208_lead_minus_offset, $byte, $unread_handle_trail, $jis0212_lead_minus_offset, $lead, $unread_handle_jis0212, $source, $handle, decode_to_utf8_raw, u8, Utf8Destination ); euc_jp_decoder_function!( $jis0802_trail_body, $jis0812_lead_body, $jis0812_trail_body, $half_width_katakana_body, $slf, $non_ascii, $jis0208_lead_minus_offset, $byte, $unread_handle_trail, $jis0212_lead_minus_offset, $lead, $unread_handle_jis0212, $source, $handle, decode_to_utf16_raw, u16, Utf16Destination ); }; } macro_rules! encoder_function { ($eof:block, $body:block, $slf:ident, $src_consumed:ident, $source:ident, $dest:ident, $c:ident, $destination_handle:ident, $unread_handle:ident, $destination_check:ident, $name:ident, $input:ty, $source_struct:ident) => ( pub fn $name(&mut $slf, src: &$input, dst: &mut [u8], last: bool) -> (EncoderResult, usize, usize) { let mut $source = $source_struct::new(src); let mut $dest = ByteDestination::new(dst); loop { match $source.check_available() { Space::Full($src_consumed) => { if last { // Start non-boilerplate $eof // End non-boilerplate } return (EncoderResult::InputEmpty, $src_consumed, $dest.written()); } Space::Available(source_handle) => { match $dest.$destination_check() { Space::Full(dst_written) => { return (EncoderResult::OutputFull, source_handle.consumed(), dst_written); } Space::Available($destination_handle) => { let ($c, $unread_handle) = source_handle.read(); // Start non-boilerplate $body // End non-boilerplate } } } } } }); } macro_rules! encoder_functions { ( $eof:block, $body:block, $slf:ident, $src_consumed:ident, $source:ident, $dest:ident, $c:ident, $destination_handle:ident, $unread_handle:ident, $destination_check:ident ) => { encoder_function!( $eof, $body, $slf, $src_consumed, $source, $dest, $c, $destination_handle, $unread_handle, $destination_check, encode_from_utf8_raw, str, Utf8Source ); encoder_function!( $eof, $body, $slf, $src_consumed, $source, $dest, $c, $destination_handle, $unread_handle, $destination_check, encode_from_utf16_raw, [u16], Utf16Source ); }; } macro_rules! ascii_compatible_encoder_function { ($bmp_body:block, $astral_body:block, $bmp:ident, $astral:ident, $slf:ident, $source:ident, $handle:ident, $copy_ascii:ident, $destination_check:ident, $name:ident, $input:ty, $source_struct:ident, $ascii_punctuation:expr) => ( pub fn $name(&mut $slf, src: &$input, dst: &mut [u8], _last: bool) -> (EncoderResult, usize, usize) { let mut $source = $source_struct::new(src); let mut dest = ByteDestination::new(dst); 'outermost: loop { match $source.$copy_ascii(&mut dest) { CopyAsciiResult::Stop(ret) => return ret, CopyAsciiResult::GoOn((mut non_ascii, mut $handle)) => { 'middle: loop { let dest_again = match non_ascii { NonAscii::BmpExclAscii($bmp) => { // Start non-boilerplate $bmp_body // End non-boilerplate } NonAscii::Astral($astral) => { // Start non-boilerplate $astral_body // End non-boilerplate } }; match $source.check_available() { Space::Full(src_consumed) => { return (EncoderResult::InputEmpty, src_consumed, dest_again.written()); } Space::Available(source_handle) => { match dest_again.$destination_check() { Space::Full(dst_written) => { return (EncoderResult::OutputFull, source_handle.consumed(), dst_written); } Space::Available(mut destination_handle) => { let (mut c, unread_handle) = source_handle.read_enum(); let source_again = unread_handle.commit(); 'innermost: loop { let ascii = match c { Unicode::NonAscii(non_ascii_again) => { non_ascii = non_ascii_again; $handle = destination_handle; continue 'middle; } Unicode::Ascii(a) => a, }; // Testing on Haswell says that we should write the // byte unconditionally instead of trying to unread it // to make it part of the next SIMD stride. let dest_again_again = destination_handle.write_one(ascii); if $ascii_punctuation && ascii < 60 { // We've got punctuation match source_again.check_available() { Space::Full(src_consumed_again) => { return (EncoderResult::InputEmpty, src_consumed_again, dest_again_again.written()); } Space::Available(source_handle_again) => { match dest_again_again.$destination_check() { Space::Full(dst_written_again) => { return (EncoderResult::OutputFull, source_handle_again.consumed(), dst_written_again); } Space::Available(destination_handle_again) => { { let (c_again, _unread_handle_again) = source_handle_again.read_enum(); c = c_again; destination_handle = destination_handle_again; continue 'innermost; } } } } } } // We've got markup or ASCII text continue 'outermost; } } } } } } } } } }); } macro_rules! ascii_compatible_encoder_functions { ( $bmp_body:block, $astral_body:block, $bmp:ident, $astral:ident, $slf:ident, $source:ident, $handle:ident, $copy_ascii:ident, $destination_check:ident, $ascii_punctuation:expr ) => { ascii_compatible_encoder_function!( $bmp_body, $astral_body, $bmp, $astral, $slf, $source, $handle, $copy_ascii, $destination_check, encode_from_utf8_raw, str, Utf8Source, $ascii_punctuation ); ascii_compatible_encoder_function!( $bmp_body, $astral_body, $bmp, $astral, $slf, $source, $handle, $copy_ascii, $destination_check, encode_from_utf16_raw, [u16], Utf16Source, $ascii_punctuation ); }; } macro_rules! ascii_compatible_bmp_encoder_function { ( $bmp_body:block, $bmp:ident, $slf:ident, $source:ident, $handle:ident, $copy_ascii:ident, $destination_check:ident, $name:ident, $input:ty, $source_struct:ident, $ascii_punctuation:expr ) => { ascii_compatible_encoder_function!( $bmp_body, { return ( EncoderResult::Unmappable(astral), $source.consumed(), $handle.written(), ); }, $bmp, astral, $slf, $source, $handle, $copy_ascii, $destination_check, $name, $input, $source_struct, $ascii_punctuation ); }; } macro_rules! ascii_compatible_bmp_encoder_functions { ( $bmp_body:block, $bmp:ident, $slf:ident, $source:ident, $handle:ident, $copy_ascii:ident, $destination_check:ident, $ascii_punctuation:expr ) => { ascii_compatible_encoder_functions!( $bmp_body, { return ( EncoderResult::Unmappable(astral), $source.consumed(), $handle.written(), ); }, $bmp, astral, $slf, $source, $handle, $copy_ascii, $destination_check, $ascii_punctuation ); }; } macro_rules! public_decode_function{ ($(#[$meta:meta])*, $decode_to_utf:ident, $decode_to_utf_raw:ident, $decode_to_utf_checking_end:ident, $decode_to_utf_after_one_potential_bom_byte:ident, $decode_to_utf_after_two_potential_bom_bytes:ident, $decode_to_utf_checking_end_with_offset:ident, $code_unit:ty) => ( $(#[$meta])* pub fn $decode_to_utf(&mut self, src: &[u8], dst: &mut [$code_unit], last: bool) -> (DecoderResult, usize, usize) { let mut offset = 0usize; loop { match self.life_cycle { // The common case. (Post-sniffing.) DecoderLifeCycle::Converting => { return self.$decode_to_utf_checking_end(src, dst, last); } // The rest is all BOM sniffing! DecoderLifeCycle::AtStart => { debug_assert_eq!(offset, 0usize); if src.is_empty() { return (DecoderResult::InputEmpty, 0, 0); } match src[0] { 0xEFu8 => { self.life_cycle = DecoderLifeCycle::SeenUtf8First; offset += 1; continue; } 0xFEu8 => { self.life_cycle = DecoderLifeCycle::SeenUtf16BeFirst; offset += 1; continue; } 0xFFu8 => { self.life_cycle = DecoderLifeCycle::SeenUtf16LeFirst; offset += 1; continue; } _ => { self.life_cycle = DecoderLifeCycle::Converting; continue; } } } DecoderLifeCycle::AtUtf8Start => { debug_assert_eq!(offset, 0usize); if src.is_empty() { return (DecoderResult::InputEmpty, 0, 0); } match src[0] { 0xEFu8 => { self.life_cycle = DecoderLifeCycle::SeenUtf8First; offset += 1; continue; } _ => { self.life_cycle = DecoderLifeCycle::Converting; continue; } } } DecoderLifeCycle::AtUtf16BeStart => { debug_assert_eq!(offset, 0usize); if src.is_empty() { return (DecoderResult::InputEmpty, 0, 0); } match src[0] { 0xFEu8 => { self.life_cycle = DecoderLifeCycle::SeenUtf16BeFirst; offset += 1; continue; } _ => { self.life_cycle = DecoderLifeCycle::Converting; continue; } } } DecoderLifeCycle::AtUtf16LeStart => { debug_assert_eq!(offset, 0usize); if src.is_empty() { return (DecoderResult::InputEmpty, 0, 0); } match src[0] { 0xFFu8 => { self.life_cycle = DecoderLifeCycle::SeenUtf16LeFirst; offset += 1; continue; } _ => { self.life_cycle = DecoderLifeCycle::Converting; continue; } } } DecoderLifeCycle::SeenUtf8First => { if offset >= src.len() { if last { return self.$decode_to_utf_after_one_potential_bom_byte(src, dst, last, offset, 0xEFu8); } return (DecoderResult::InputEmpty, offset, 0); } if src[offset] == 0xBBu8 { self.life_cycle = DecoderLifeCycle::SeenUtf8Second; offset += 1; continue; } return self.$decode_to_utf_after_one_potential_bom_byte(src, dst, last, offset, 0xEFu8); } DecoderLifeCycle::SeenUtf8Second => { if offset >= src.len() { if last { return self.$decode_to_utf_after_two_potential_bom_bytes(src, dst, last, offset); } return (DecoderResult::InputEmpty, offset, 0); } if src[offset] == 0xBFu8 { self.life_cycle = DecoderLifeCycle::Converting; offset += 1; if self.encoding != UTF_8 { self.encoding = UTF_8; self.variant = UTF_8.new_variant_decoder(); } return self.$decode_to_utf_checking_end_with_offset(src, dst, last, offset); } return self.$decode_to_utf_after_two_potential_bom_bytes(src, dst, last, offset); } DecoderLifeCycle::SeenUtf16BeFirst => { if offset >= src.len() { if last { return self.$decode_to_utf_after_one_potential_bom_byte(src, dst, last, offset, 0xFEu8); } return (DecoderResult::InputEmpty, offset, 0); } if src[offset] == 0xFFu8 { self.life_cycle = DecoderLifeCycle::Converting; offset += 1; if self.encoding != UTF_16BE { self.encoding = UTF_16BE; self.variant = UTF_16BE.new_variant_decoder(); } return self.$decode_to_utf_checking_end_with_offset(src, dst, last, offset); } return self.$decode_to_utf_after_one_potential_bom_byte(src, dst, last, offset, 0xFEu8); } DecoderLifeCycle::SeenUtf16LeFirst => { if offset >= src.len() { if last { return self.$decode_to_utf_after_one_potential_bom_byte(src, dst, last, offset, 0xFFu8); } return (DecoderResult::InputEmpty, offset, 0); } if src[offset] == 0xFEu8 { self.life_cycle = DecoderLifeCycle::Converting; offset += 1; if self.encoding != UTF_16LE { self.encoding = UTF_16LE; self.variant = UTF_16LE.new_variant_decoder(); } return self.$decode_to_utf_checking_end_with_offset(src, dst, last, offset); } return self.$decode_to_utf_after_one_potential_bom_byte(src, dst, last, offset, 0xFFu8); } DecoderLifeCycle::ConvertingWithPendingBB => { debug_assert_eq!(offset, 0usize); return self.$decode_to_utf_after_one_potential_bom_byte(src, dst, last, 0usize, 0xBBu8); } DecoderLifeCycle::Finished => panic!("Must not use a decoder that has finished."), } } } fn $decode_to_utf_after_one_potential_bom_byte(&mut self, src: &[u8], dst: &mut [$code_unit], last: bool, offset: usize, first_byte: u8) -> (DecoderResult, usize, usize) { self.life_cycle = DecoderLifeCycle::Converting; if offset == 0usize { // First byte was seen previously. let first = [first_byte]; let mut out_read = 0usize; let (mut first_result, _, mut first_written) = self.variant .$decode_to_utf_raw(&first[..], dst, false); match first_result { DecoderResult::InputEmpty => { let (result, read, written) = self.$decode_to_utf_checking_end(src, &mut dst[first_written..], last); first_result = result; out_read = read; // Overwrite, don't add! first_written += written; } DecoderResult::Malformed(_, _) => { // Wasn't read from `src`!, leave out_read to 0 } DecoderResult::OutputFull => { panic!("Output buffer must have been too small."); } } return (first_result, out_read, first_written); } debug_assert_eq!(offset, 1usize); // The first byte is in `src`, so no need to push it separately. self.$decode_to_utf_checking_end(src, dst, last) } fn $decode_to_utf_after_two_potential_bom_bytes(&mut self, src: &[u8], dst: &mut [$code_unit], last: bool, offset: usize) -> (DecoderResult, usize, usize) { self.life_cycle = DecoderLifeCycle::Converting; if offset == 0usize { // The first two bytes are not in the current buffer.. let ef_bb = [0xEFu8, 0xBBu8]; let (mut first_result, mut first_read, mut first_written) = self.variant .$decode_to_utf_raw(&ef_bb[..], dst, false); match first_result { DecoderResult::InputEmpty => { let (result, read, written) = self.$decode_to_utf_checking_end(src, &mut dst[first_written..], last); first_result = result; first_read = read; // Overwrite, don't add! first_written += written; } DecoderResult::Malformed(_, _) => { if first_read == 1usize { // The first byte was malformed. We need to handle // the second one, which isn't in `src`, later. self.life_cycle = DecoderLifeCycle::ConvertingWithPendingBB; } first_read = 0usize; // Wasn't read from `src`! } DecoderResult::OutputFull => { panic!("Output buffer must have been too small."); } } return (first_result, first_read, first_written); } if offset == 1usize { // The first byte isn't in the current buffer but the second one // is. return self.$decode_to_utf_after_one_potential_bom_byte(src, dst, last, 0usize, 0xEFu8); } debug_assert_eq!(offset, 2usize); // The first two bytes are in `src`, so no need to push them separately. self.$decode_to_utf_checking_end(src, dst, last) } /// Calls `$decode_to_utf_checking_end` with `offset` bytes omitted from /// the start of `src` but adjusting the return values to show those bytes /// as having been consumed. fn $decode_to_utf_checking_end_with_offset(&mut self, src: &[u8], dst: &mut [$code_unit], last: bool, offset: usize) -> (DecoderResult, usize, usize) { debug_assert_eq!(self.life_cycle, DecoderLifeCycle::Converting); let (result, read, written) = self.$decode_to_utf_checking_end(&src[offset..], dst, last); (result, read + offset, written) } /// Calls through to the delegate and adjusts life cycle iff `last` is /// `true` and result is `DecoderResult::InputEmpty`. fn $decode_to_utf_checking_end(&mut self, src: &[u8], dst: &mut [$code_unit], last: bool) -> (DecoderResult, usize, usize) { debug_assert_eq!(self.life_cycle, DecoderLifeCycle::Converting); let (result, read, written) = self.variant .$decode_to_utf_raw(src, dst, last); if last { if let DecoderResult::InputEmpty = result { self.life_cycle = DecoderLifeCycle::Finished; } } (result, read, written) }); }