// Copyright Mozilla Foundation. See the COPYRIGHT // file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. use super::*; use crate::handles::*; use crate::variant::*; cfg_if! { if #[cfg(feature = "simd-accel")] { use simd_funcs::*; use packed_simd::u16x8; #[inline(always)] fn shift_upper(unpacked: u16x8) -> u16x8 { let highest_ascii = u16x8::splat(0x7F); unpacked + unpacked.gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) } } else { } } pub struct UserDefinedDecoder; impl UserDefinedDecoder { pub fn new() -> VariantDecoder { VariantDecoder::UserDefined(UserDefinedDecoder) } pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option { Some(byte_length) } pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option { byte_length.checked_mul(3) } pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option { byte_length.checked_mul(3) } decoder_function!( {}, {}, {}, { if b < 0x80 { // ASCII run not optimized, because binary data expected destination_handle.write_ascii(b); continue; } destination_handle.write_upper_bmp(u16::from(b) + 0xF700); continue; }, self, src_consumed, dest, source, b, destination_handle, _unread_handle, check_space_bmp, decode_to_utf8_raw, u8, Utf8Destination ); #[cfg(not(feature = "simd-accel"))] pub fn decode_to_utf16_raw( &mut self, src: &[u8], dst: &mut [u16], _last: bool, ) -> (DecoderResult, usize, usize) { let (pending, length) = if dst.len() < src.len() { (DecoderResult::OutputFull, dst.len()) } else { (DecoderResult::InputEmpty, src.len()) }; let src_trim = &src[..length]; let dst_trim = &mut dst[..length]; src_trim .iter() .zip(dst_trim.iter_mut()) .for_each(|(from, to)| { *to = { let unit = *from; if unit < 0x80 { u16::from(unit) } else { u16::from(unit) + 0xF700 } } }); (pending, length, length) } #[cfg(feature = "simd-accel")] pub fn decode_to_utf16_raw( &mut self, src: &[u8], dst: &mut [u16], _last: bool, ) -> (DecoderResult, usize, usize) { let (pending, length) = if dst.len() < src.len() { (DecoderResult::OutputFull, dst.len()) } else { (DecoderResult::InputEmpty, src.len()) }; // Not bothering with alignment let tail_start = length & !0xF; let simd_iterations = length >> 4; let src_ptr = src.as_ptr(); let dst_ptr = dst.as_mut_ptr(); for i in 0..simd_iterations { let input = unsafe { load16_unaligned(src_ptr.add(i * 16)) }; let (first, second) = simd_unpack(input); unsafe { store8_unaligned(dst_ptr.add(i * 16), shift_upper(first)); store8_unaligned(dst_ptr.add((i * 16) + 8), shift_upper(second)); } } let src_tail = &src[tail_start..length]; let dst_tail = &mut dst[tail_start..length]; src_tail .iter() .zip(dst_tail.iter_mut()) .for_each(|(from, to)| { *to = { let unit = *from; if unit < 0x80 { u16::from(unit) } else { u16::from(unit) + 0xF700 } } }); (pending, length, length) } } pub struct UserDefinedEncoder; impl UserDefinedEncoder { pub fn new(encoding: &'static Encoding) -> Encoder { Encoder::new(encoding, VariantEncoder::UserDefined(UserDefinedEncoder)) } pub fn max_buffer_length_from_utf16_without_replacement( &self, u16_length: usize, ) -> Option { Some(u16_length) } pub fn max_buffer_length_from_utf8_without_replacement( &self, byte_length: usize, ) -> Option { Some(byte_length) } encoder_functions!( {}, { if c <= '\u{7F}' { // TODO optimize ASCII run destination_handle.write_one(c as u8); continue; } if c < '\u{F780}' || c > '\u{F7FF}' { return ( EncoderResult::Unmappable(c), unread_handle.consumed(), destination_handle.written(), ); } destination_handle.write_one((u32::from(c) - 0xF700) as u8); continue; }, self, src_consumed, source, dest, c, destination_handle, unread_handle, check_space_one ); } // Any copyright to the test code below this comment is dedicated to the // Public Domain. http://creativecommons.org/publicdomain/zero/1.0/ #[cfg(all(test, feature = "alloc"))] mod tests { use super::super::testing::*; use super::super::*; fn decode_x_user_defined(bytes: &[u8], expect: &str) { decode(X_USER_DEFINED, bytes, expect); } fn encode_x_user_defined(string: &str, expect: &[u8]) { encode(X_USER_DEFINED, string, expect); } #[test] fn test_x_user_defined_decode() { // Empty decode_x_user_defined(b"", ""); // ASCII decode_x_user_defined(b"\x61\x62", "\u{0061}\u{0062}"); decode_x_user_defined(b"\x80\xFF", "\u{F780}\u{F7FF}"); decode_x_user_defined(b"\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62", "\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}"); } #[test] fn test_x_user_defined_encode() { // Empty encode_x_user_defined("", b""); // ASCII encode_x_user_defined("\u{0061}\u{0062}", b"\x61\x62"); encode_x_user_defined("\u{F780}\u{F7FF}", b"\x80\xFF"); encode_x_user_defined("\u{F77F}\u{F800}", b""); } #[test] fn test_x_user_defined_from_two_low_surrogates() { let expectation = b"��"; let mut output = [0u8; 40]; let mut encoder = X_USER_DEFINED.new_encoder(); let (result, read, written, had_errors) = encoder.encode_from_utf16(&[0xDC00u16, 0xDEDEu16], &mut output[..], true); assert_eq!(result, CoderResult::InputEmpty); assert_eq!(read, 2); assert_eq!(written, expectation.len()); assert!(had_errors); assert_eq!(&output[..written], expectation); } }