// Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT // file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. // Adapted from third_party/rust/encoding_rs/src/lib.rs, so the // "top-level directory" in the above notice refers to // third_party/rust/encoding_rs/. extern crate encoding_rs; extern crate nserror; extern crate nsstring; extern crate xmldecl; use encoding_rs::*; use nserror::*; use nsstring::*; use std::slice; /// Takes `Option`, the destination string and a value /// to return on failure and tries to start a bulk write of the /// destination string with the capacity given by the `usize` /// wrapped in the first argument. Returns the bulk write /// handle. macro_rules! try_start_bulk_write { ($needed:expr, $dst:ident, $ret:expr) => {{ let needed = match $needed { Some(needed) => needed, None => { return $ret; } }; match unsafe { $dst.bulk_write(needed, 0, false) } { Err(_) => { return $ret; } Ok(handle) => handle, } }}; } #[no_mangle] pub unsafe extern "C" fn mozilla_encoding_decode_to_nsstring( encoding: *mut *const Encoding, src: *const u8, src_len: usize, dst: *mut nsAString, ) -> nsresult { let (rv, enc) = decode_to_nsstring(&**encoding, slice::from_raw_parts(src, src_len), &mut *dst); *encoding = enc as *const Encoding; rv } pub fn decode_to_nsstring( encoding: &'static Encoding, src: &[u8], dst: &mut nsAString, ) -> (nsresult, &'static Encoding) { if let Some((enc, bom_length)) = Encoding::for_bom(src) { return ( decode_to_nsstring_without_bom_handling(enc, &src[bom_length..], dst), enc, ); } ( decode_to_nsstring_without_bom_handling(encoding, src, dst), encoding, ) } #[no_mangle] pub unsafe extern "C" fn mozilla_encoding_decode_to_nsstring_with_bom_removal( encoding: *const Encoding, src: *const u8, src_len: usize, dst: *mut nsAString, ) -> nsresult { decode_to_nsstring_with_bom_removal(&*encoding, slice::from_raw_parts(src, src_len), &mut *dst) } pub fn decode_to_nsstring_with_bom_removal( encoding: &'static Encoding, src: &[u8], dst: &mut nsAString, ) -> nsresult { let without_bom = if encoding == UTF_8 && src.starts_with(b"\xEF\xBB\xBF") { &src[3..] } else if (encoding == UTF_16LE && src.starts_with(b"\xFF\xFE")) || (encoding == UTF_16BE && src.starts_with(b"\xFE\xFF")) { &src[2..] } else { src }; decode_to_nsstring_without_bom_handling(encoding, without_bom, dst) } #[no_mangle] pub unsafe extern "C" fn mozilla_encoding_decode_to_nsstring_without_bom_handling( encoding: *const Encoding, src: *const u8, src_len: usize, dst: *mut nsAString, ) -> nsresult { decode_to_nsstring_without_bom_handling( &*encoding, slice::from_raw_parts(src, src_len), &mut *dst, ) } pub fn decode_to_nsstring_without_bom_handling( encoding: &'static Encoding, src: &[u8], dst: &mut nsAString, ) -> nsresult { let mut decoder = encoding.new_decoder_without_bom_handling(); let mut handle = try_start_bulk_write!( decoder.max_utf16_buffer_length(src.len()), dst, NS_ERROR_OUT_OF_MEMORY ); let (result, read, written, had_errors) = decoder.decode_to_utf16(src, handle.as_mut_slice(), true); debug_assert_eq!(result, CoderResult::InputEmpty); debug_assert_eq!(read, src.len()); debug_assert!(written <= handle.as_mut_slice().len()); let _ = handle.finish(written, true); if had_errors { return NS_OK_HAD_REPLACEMENTS; } NS_OK } #[no_mangle] pub unsafe extern "C" fn mozilla_encoding_decode_to_nsstring_without_bom_handling_and_without_replacement( encoding: *const Encoding, src: *const u8, src_len: usize, dst: *mut nsAString, ) -> nsresult { decode_to_nsstring_without_bom_handling_and_without_replacement( &*encoding, slice::from_raw_parts(src, src_len), &mut *dst, ) } pub fn decode_to_nsstring_without_bom_handling_and_without_replacement( encoding: &'static Encoding, src: &[u8], dst: &mut nsAString, ) -> nsresult { let mut decoder = encoding.new_decoder_without_bom_handling(); let mut handle = try_start_bulk_write!( decoder.max_utf16_buffer_length(src.len()), dst, NS_ERROR_OUT_OF_MEMORY ); let (result, read, written) = decoder.decode_to_utf16_without_replacement(src, handle.as_mut_slice(), true); match result { DecoderResult::InputEmpty => { debug_assert_eq!(read, src.len()); debug_assert!(written <= handle.as_mut_slice().len()); let _ = handle.finish(written, true); NS_OK } DecoderResult::Malformed(_, _) => { // Let handle's drop() run NS_ERROR_UDEC_ILLEGALINPUT } DecoderResult::OutputFull => unreachable!(), } } #[no_mangle] pub unsafe extern "C" fn mozilla_encoding_encode_from_utf16( encoding: *mut *const Encoding, src: *const u16, src_len: usize, dst: *mut nsACString, ) -> nsresult { let (rv, enc) = encode_from_utf16(&**encoding, slice::from_raw_parts(src, src_len), &mut *dst); *encoding = enc as *const Encoding; rv } pub fn encode_from_utf16( encoding: &'static Encoding, src: &[u16], dst: &mut nsACString, ) -> (nsresult, &'static Encoding) { let output_encoding = encoding.output_encoding(); let mut encoder = output_encoding.new_encoder(); let mut handle = try_start_bulk_write!( encoder.max_buffer_length_from_utf16_if_no_unmappables(src.len()), dst, (NS_ERROR_OUT_OF_MEMORY, output_encoding) ); let mut total_read = 0; let mut total_written = 0; let mut total_had_errors = false; loop { let (result, read, written, had_errors) = encoder.encode_from_utf16( &src[total_read..], &mut (handle.as_mut_slice())[total_written..], true, ); total_read += read; total_written += written; total_had_errors |= had_errors; match result { CoderResult::InputEmpty => { debug_assert_eq!(total_read, src.len()); debug_assert!(total_written <= handle.as_mut_slice().len()); let _ = handle.finish(total_written, true); if total_had_errors { return (NS_OK_HAD_REPLACEMENTS, output_encoding); } return (NS_OK, output_encoding); } CoderResult::OutputFull => { if let Some(needed) = checked_add( total_written, encoder.max_buffer_length_from_utf16_if_no_unmappables(src.len() - total_read), ) { if unsafe { handle .restart_bulk_write(needed, total_written, false) .is_ok() } { continue; } } return (NS_ERROR_OUT_OF_MEMORY, output_encoding); } } } } #[no_mangle] pub unsafe extern "C" fn mozilla_encoding_decode_to_nscstring( encoding: *mut *const Encoding, src: *const nsACString, dst: *mut nsACString, ) -> nsresult { debug_assert_ne!(src as usize, dst as usize); let (rv, enc) = decode_to_nscstring(&**encoding, &*src, &mut *dst); *encoding = enc as *const Encoding; rv } pub fn decode_to_nscstring( encoding: &'static Encoding, src: &nsACString, dst: &mut nsACString, ) -> (nsresult, &'static Encoding) { if let Some((enc, bom_length)) = Encoding::for_bom(src) { return ( decode_from_slice_to_nscstring_without_bom_handling(enc, &src[bom_length..], dst, 0), enc, ); } ( decode_to_nscstring_without_bom_handling(encoding, src, dst), encoding, ) } #[no_mangle] pub unsafe extern "C" fn mozilla_encoding_decode_to_nscstring_with_bom_removal( encoding: *const Encoding, src: *const nsACString, dst: *mut nsACString, ) -> nsresult { debug_assert_ne!(src as usize, dst as usize); decode_to_nscstring_with_bom_removal(&*encoding, &*src, &mut *dst) } pub fn decode_to_nscstring_with_bom_removal( encoding: &'static Encoding, src: &nsACString, dst: &mut nsACString, ) -> nsresult { let without_bom = if encoding == UTF_8 && src.starts_with(b"\xEF\xBB\xBF") { &src[3..] } else if (encoding == UTF_16LE && src.starts_with(b"\xFF\xFE")) || (encoding == UTF_16BE && src.starts_with(b"\xFE\xFF")) { &src[2..] } else { return decode_to_nscstring_without_bom_handling(encoding, src, dst); }; decode_from_slice_to_nscstring_without_bom_handling(encoding, without_bom, dst, 0) } #[no_mangle] pub unsafe extern "C" fn mozilla_encoding_decode_to_nscstring_without_bom_handling( encoding: *const Encoding, src: *const nsACString, dst: *mut nsACString, ) -> nsresult { debug_assert_ne!(src as usize, dst as usize); decode_to_nscstring_without_bom_handling(&*encoding, &*src, &mut *dst) } pub fn decode_to_nscstring_without_bom_handling( encoding: &'static Encoding, src: &nsACString, dst: &mut nsACString, ) -> nsresult { let bytes = &src[..]; let valid_up_to = if encoding == UTF_8 { Encoding::utf8_valid_up_to(bytes) } else if encoding.is_ascii_compatible() { Encoding::ascii_valid_up_to(bytes) } else if encoding == ISO_2022_JP { Encoding::iso_2022_jp_ascii_valid_up_to(bytes) } else { return decode_from_slice_to_nscstring_without_bom_handling(encoding, src, dst, 0); }; if valid_up_to == bytes.len() { if dst.fallible_assign(src).is_err() { return NS_ERROR_OUT_OF_MEMORY; } return NS_OK; } decode_from_slice_to_nscstring_without_bom_handling(encoding, src, dst, valid_up_to) } #[no_mangle] pub unsafe extern "C" fn mozilla_encoding_decode_from_slice_to_nscstring_without_bom_handling( encoding: *const Encoding, src: *const u8, src_len: usize, dst: *mut nsACString, already_validated: usize, ) -> nsresult { decode_from_slice_to_nscstring_without_bom_handling( &*encoding, slice::from_raw_parts(src, src_len), &mut *dst, already_validated, ) } fn decode_from_slice_to_nscstring_without_bom_handling( encoding: &'static Encoding, src: &[u8], dst: &mut nsACString, already_validated: usize, ) -> nsresult { let bytes = src; let mut decoder = encoding.new_decoder_without_bom_handling(); let mut handle = try_start_bulk_write!(Some(src.len()), dst, NS_ERROR_OUT_OF_MEMORY); if already_validated != 0 { (handle.as_mut_slice())[..already_validated].copy_from_slice(&bytes[..already_validated]); } let mut total_read = already_validated; let mut total_written = already_validated; let mut total_had_errors = false; loop { let (result, read, written, had_errors) = decoder.decode_to_utf8( &bytes[total_read..], &mut (handle.as_mut_slice())[total_written..], true, ); total_read += read; total_written += written; total_had_errors |= had_errors; match result { CoderResult::InputEmpty => { debug_assert_eq!(total_read, bytes.len()); let _ = handle.finish(total_written, true); if total_had_errors { return NS_OK_HAD_REPLACEMENTS; } return NS_OK; } CoderResult::OutputFull => { // Allocate for the worst case. That is, we should come // here at most once per invocation of this method. if let Some(needed) = checked_add( total_written, decoder.max_utf8_buffer_length(bytes.len() - total_read), ) { if unsafe { handle .restart_bulk_write(needed, total_written, false) .is_ok() } { continue; } } return NS_ERROR_OUT_OF_MEMORY; } } } } #[no_mangle] pub unsafe extern "C" fn mozilla_encoding_decode_to_nscstring_without_bom_handling_and_without_replacement( encoding: *const Encoding, src: *const nsACString, dst: *mut nsACString, ) -> nsresult { decode_to_nscstring_without_bom_handling_and_without_replacement(&*encoding, &*src, &mut *dst) } pub fn decode_to_nscstring_without_bom_handling_and_without_replacement( encoding: &'static Encoding, src: &nsACString, dst: &mut nsACString, ) -> nsresult { let bytes = &src[..]; if encoding == UTF_8 { let valid_up_to = Encoding::utf8_valid_up_to(bytes); if valid_up_to == bytes.len() { if dst.fallible_assign(src).is_err() { return NS_ERROR_OUT_OF_MEMORY; } return NS_OK; } return NS_ERROR_UDEC_ILLEGALINPUT; } let valid_up_to = if encoding.is_ascii_compatible() { Encoding::ascii_valid_up_to(bytes) } else if encoding == ISO_2022_JP { Encoding::iso_2022_jp_ascii_valid_up_to(bytes) } else { 0 }; if valid_up_to == bytes.len() { if dst.fallible_assign(src).is_err() { return NS_ERROR_OUT_OF_MEMORY; } return NS_OK; } let mut decoder = encoding.new_decoder_without_bom_handling(); let mut handle = try_start_bulk_write!( checked_add( valid_up_to, decoder.max_utf8_buffer_length_without_replacement(bytes.len() - valid_up_to) ), dst, NS_ERROR_OUT_OF_MEMORY ); let (result, read, written) = { let dest = handle.as_mut_slice(); dest[..valid_up_to].copy_from_slice(&bytes[..valid_up_to]); decoder.decode_to_utf8_without_replacement( &src[valid_up_to..], &mut dest[valid_up_to..], true, ) }; match result { DecoderResult::InputEmpty => { debug_assert_eq!(valid_up_to + read, src.len()); debug_assert!(valid_up_to + written <= handle.as_mut_slice().len()); let _ = handle.finish(valid_up_to + written, true); NS_OK } DecoderResult::Malformed(_, _) => { // let handle's drop() run NS_ERROR_UDEC_ILLEGALINPUT } DecoderResult::OutputFull => unreachable!(), } } #[no_mangle] pub unsafe extern "C" fn mozilla_encoding_encode_from_nscstring( encoding: *mut *const Encoding, src: *const nsACString, dst: *mut nsACString, ) -> nsresult { let (rv, enc) = encode_from_nscstring(&**encoding, &*src, &mut *dst); *encoding = enc as *const Encoding; rv } pub fn encode_from_nscstring( encoding: &'static Encoding, src: &nsACString, dst: &mut nsACString, ) -> (nsresult, &'static Encoding) { let output_encoding = encoding.output_encoding(); let bytes = &src[..]; if output_encoding == UTF_8 { let valid_up_to = Encoding::utf8_valid_up_to(bytes); if valid_up_to == bytes.len() { if dst.fallible_assign(src).is_err() { return (NS_ERROR_OUT_OF_MEMORY, output_encoding); } return (NS_OK, output_encoding); } return (NS_ERROR_UDEC_ILLEGALINPUT, output_encoding); } let valid_up_to = if output_encoding == ISO_2022_JP { Encoding::iso_2022_jp_ascii_valid_up_to(bytes) } else { debug_assert!(output_encoding.is_ascii_compatible()); Encoding::ascii_valid_up_to(bytes) }; if valid_up_to == bytes.len() { if dst.fallible_assign(src).is_err() { return (NS_ERROR_OUT_OF_MEMORY, output_encoding); } return (NS_OK, output_encoding); } // Encoder requires valid UTF-8. Using std instead of encoding_rs // to avoid unsafe blocks. let trail = if let Ok(trail) = ::std::str::from_utf8(&bytes[valid_up_to..]) { trail } else { return (NS_ERROR_UDEC_ILLEGALINPUT, output_encoding); }; let mut encoder = output_encoding.new_encoder(); let mut handle = try_start_bulk_write!( checked_add( valid_up_to, encoder.max_buffer_length_from_utf8_if_no_unmappables(trail.len()) ), dst, (NS_ERROR_OUT_OF_MEMORY, output_encoding) ); if valid_up_to != 0 { // to_mut() shouldn't fail right after setting length. (handle.as_mut_slice())[..valid_up_to].copy_from_slice(&bytes[..valid_up_to]); } // `total_read` tracks `trail` only but `total_written` tracks the overall situation! // This asymmetry is here, because trail is materialized as `str` without resorting // to unsafe code here. let mut total_read = 0; let mut total_written = valid_up_to; let mut total_had_errors = false; loop { let (result, read, written, had_errors) = encoder.encode_from_utf8( &trail[total_read..], &mut (handle.as_mut_slice())[total_written..], true, ); total_read += read; total_written += written; total_had_errors |= had_errors; match result { CoderResult::InputEmpty => { debug_assert_eq!(valid_up_to + total_read, src.len()); debug_assert!(total_written <= handle.as_mut_slice().len()); let _ = handle.finish(total_written, true); if total_had_errors { return (NS_OK_HAD_REPLACEMENTS, output_encoding); } return (NS_OK, output_encoding); } CoderResult::OutputFull => { if let Some(needed) = checked_add( total_written, encoder.max_buffer_length_from_utf8_if_no_unmappables(trail.len() - total_read), ) { if unsafe { handle .restart_bulk_write(needed, total_written, false) .is_ok() } { continue; } } return (NS_ERROR_OUT_OF_MEMORY, output_encoding); } } } } #[inline(always)] fn checked_add(num: usize, opt: Option) -> Option { if let Some(n) = opt { n.checked_add(num) } else { None } } // Declared in nsHtml5StreamParser.cpp #[no_mangle] pub unsafe extern "C" fn xmldecl_parse( buf: *const u8, buf_len: usize, ) -> *const encoding_rs::Encoding { if let Some(encoding) = xmldecl::parse(std::slice::from_raw_parts(buf, buf_len)) { encoding } else { std::ptr::null() } }