Adding upstream version 110.0.1.upstream/110.0.1 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 09:22:09 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 09:22:09 +0000
commit: 43a97878ce14b72f0981164f87f2e35e14151312 (patch)
tree: 620249daf56c0258faa40cbdcf9cfba06de2a846 /third_party/rust/encoding_c_mem/src
parent: Initial commit. (diff)
download: firefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz
firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip
1 files changed, 825 insertions, 0 deletions
diff --git a/third_party/rust/encoding_c_mem/src/lib.rs b/third_party/rust/encoding_c_mem/src/lib.rs
new file mode 100644
index 0000000000..e5f31c1be0
--- /dev/null
+++ b/third_party/rust/encoding_c_mem/src/lib.rs
@@ -0,0 +1,825 @@
+// Copyright Mozilla Foundation. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! FFI bindings for `encoding_rs::mem`.
+//!
+//! _Note:_ "Latin1" in this module refers to the Unicode range from U+0000 to
+//! U+00FF, inclusive, and does not refer to the windows-1252 range. This
+//! in-memory encoding is sometimes used as a storage optimization of text
+//! when UTF-16 indexing and length semantics are exposed.
+
+use encoding_rs::mem::Latin1Bidi;
+
+/// Checks whether the buffer is all-ASCII.
+///
+/// May read the entire buffer even if it isn't all-ASCII. (I.e. the function
+/// is not guaranteed to fail fast.)
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
+/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
+/// still has to be non-`NULL`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_ascii(buffer: *const u8, len: usize) -> bool {
+    encoding_rs::mem::is_ascii(::std::slice::from_raw_parts(buffer, len))
+}
+
+/// Checks whether the buffer is all-Basic Latin (i.e. UTF-16 representing
+/// only ASCII characters).
+///
+/// May read the entire buffer even if it isn't all-ASCII. (I.e. the function
+/// is not guaranteed to fail fast.)
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
+/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
+/// still has to be non-`NULL` and aligned.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_basic_latin(buffer: *const u16, len: usize) -> bool {
+    encoding_rs::mem::is_basic_latin(::std::slice::from_raw_parts(buffer, len))
+}
+
+/// Checks whether the buffer is valid UTF-8 representing only code points
+/// less than or equal to U+00FF.
+///
+/// Fails fast. (I.e. returns before having read the whole buffer if UTF-8
+/// invalidity or code points above U+00FF are discovered.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
+/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
+/// still has to be non-`NULL`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_utf8_latin1(buffer: *const u8, len: usize) -> bool {
+    encoding_rs::mem::is_utf8_latin1(::std::slice::from_raw_parts(buffer, len))
+}
+
+/// Checks whether the buffer represents only code points less than or equal
+/// to U+00FF.
+///
+/// Fails fast. (I.e. returns before having read the whole buffer if code
+/// points above U+00FF are discovered.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block,
+/// if `buffer` is `NULL`, or if the memory designated by `buffer` and `buffer_len`
+/// does not contain valid UTF-8. (If `buffer_len` is `0`, `buffer` may be bogus but
+/// still has to be non-`NULL`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_str_latin1(buffer: *const u8, len: usize) -> bool {
+    encoding_rs::mem::is_str_latin1(::std::str::from_utf8_unchecked(
+        ::std::slice::from_raw_parts(buffer, len),
+    ))
+}
+
+/// Checks whether the buffer represents only code point less than or equal
+/// to U+00FF.
+///
+/// May read the entire buffer even if it isn't all-Latin1. (I.e. the function
+/// is not guaranteed to fail fast.)
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
+/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
+/// still has to be non-`NULL` and aligned.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_utf16_latin1(buffer: *const u16, len: usize) -> bool {
+    encoding_rs::mem::is_utf16_latin1(::std::slice::from_raw_parts(buffer, len))
+}
+
+/// Checks whether a potentially-invalid UTF-8 buffer contains code points
+/// that trigger right-to-left processing.
+///
+/// The check is done on a Unicode block basis without regard to assigned
+/// vs. unassigned code points in the block. Hebrew presentation forms in
+/// the Alphabetic Presentation Forms block are treated as if they formed
+/// a block on their own (i.e. it treated as right-to-left). Additionally,
+/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
+/// for. Control characters that are technically bidi controls but do not
+/// cause right-to-left behavior without the presence of right-to-left
+/// characters or right-to-left controls are not checked for. As a special
+/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
+///
+/// Returns `true` if the input is invalid UTF-8 or the input contains an
+/// RTL character. Returns `false` if the input is valid UTF-8 and contains
+/// no RTL characters.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
+/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
+/// still has to be non-`NULL`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_utf8_bidi(buffer: *const u8, len: usize) -> bool {
+    encoding_rs::mem::is_utf8_bidi(::std::slice::from_raw_parts(buffer, len))
+}
+
+/// Checks whether a valid UTF-8 buffer contains code points that trigger
+/// right-to-left processing.
+///
+/// The check is done on a Unicode block basis without regard to assigned
+/// vs. unassigned code points in the block. Hebrew presentation forms in
+/// the Alphabetic Presentation Forms block are treated as if they formed
+/// a block on their own (i.e. it treated as right-to-left). Additionally,
+/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
+/// for. Control characters that are technically bidi controls but do not
+/// cause right-to-left behavior without the presence of right-to-left
+/// characters or right-to-left controls are not checked for. As a special
+/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block,
+/// if `buffer` is `NULL`, or if the memory designated by `buffer` and `buffer_len`
+/// does not contain valid UTF-8. (If `buffer_len` is `0`, `buffer` may be bogus but
+/// still has to be non-`NULL`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_str_bidi(buffer: *const u8, len: usize) -> bool {
+    encoding_rs::mem::is_str_bidi(::std::str::from_utf8_unchecked(
+        ::std::slice::from_raw_parts(buffer, len),
+    ))
+}
+
+/// Checks whether a UTF-16 buffer contains code points that trigger
+/// right-to-left processing.
+///
+/// The check is done on a Unicode block basis without regard to assigned
+/// vs. unassigned code points in the block. Hebrew presentation forms in
+/// the Alphabetic Presentation Forms block are treated as if they formed
+/// a block on their own (i.e. it treated as right-to-left). Additionally,
+/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
+/// for. Control characters that are technically bidi controls but do not
+/// cause right-to-left behavior without the presence of right-to-left
+/// characters or right-to-left controls are not checked for. As a special
+/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
+///
+/// Returns `true` if the input contains an RTL character or an unpaired
+/// high surrogate that could be the high half of an RTL character.
+/// Returns `false` if the input contains neither RTL characters nor
+/// unpaired high surrogates that could be higher halves of RTL characters.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
+/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
+/// still has to be non-`NULL` and aligned.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_utf16_bidi(buffer: *const u16, len: usize) -> bool {
+    encoding_rs::mem::is_utf16_bidi(::std::slice::from_raw_parts(buffer, len))
+}
+
+/// Checks whether a scalar value triggers right-to-left processing.
+///
+/// The check is done on a Unicode block basis without regard to assigned
+/// vs. unassigned code points in the block. Hebrew presentation forms in
+/// the Alphabetic Presentation Forms block are treated as if they formed
+/// a block on their own (i.e. it treated as right-to-left). Additionally,
+/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
+/// for. Control characters that are technically bidi controls but do not
+/// cause right-to-left behavior without the presence of right-to-left
+/// characters or right-to-left controls are not checked for. As a special
+/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
+///
+/// # Undefined behavior
+///
+/// Undefined behavior ensues if `c` is not a valid Unicode Scalar Value.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_char_bidi(c: char) -> bool {
+    encoding_rs::mem::is_char_bidi(c)
+}
+
+/// Checks whether a UTF-16 code unit triggers right-to-left processing.
+///
+/// The check is done on a Unicode block basis without regard to assigned
+/// vs. unassigned code points in the block. Hebrew presentation forms in
+/// the Alphabetic Presentation Forms block are treated as if they formed
+/// a block on their own (i.e. it treated as right-to-left). Additionally,
+/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
+/// for. Control characters that are technically bidi controls but do not
+/// cause right-to-left behavior without the presence of right-to-left
+/// characters or right-to-left controls are not checked for. As a special
+/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
+///
+/// Since supplementary-plane right-to-left blocks are identifiable from the
+/// high surrogate without examining the low surrogate, this function returns
+/// `true` for such high surrogates making the function suitable for handling
+/// supplementary-plane text without decoding surrogate pairs to scalar
+/// values. Obviously, such high surrogates are then reported as right-to-left
+/// even if actually unpaired.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_utf16_code_unit_bidi(u: u16) -> bool {
+    encoding_rs::mem::is_utf16_code_unit_bidi(u)
+}
+
+/// Checks whether a potentially invalid UTF-8 buffer contains code points
+/// that trigger right-to-left processing or is all-Latin1.
+///
+/// Possibly more efficient than performing the checks separately.
+///
+/// Returns `Latin1Bidi::Latin1` if `is_utf8_latin1()` would return `true`.
+/// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf8_bidi()` would return
+/// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
+/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
+/// still has to be non-`NULL`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_check_utf8_for_latin1_and_bidi(
+    buffer: *const u8,
+    len: usize,
+) -> Latin1Bidi {
+    encoding_rs::mem::check_utf8_for_latin1_and_bidi(::std::slice::from_raw_parts(buffer, len))
+}
+
+/// Checks whether a valid UTF-8 buffer contains code points
+/// that trigger right-to-left processing or is all-Latin1.
+///
+/// Possibly more efficient than performing the checks separately.
+///
+/// Returns `Latin1Bidi::Latin1` if `is_str_latin1()` would return `true`.
+/// Otherwise, returns `Latin1Bidi::Bidi` if `is_str_bidi()` would return
+/// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block,
+/// if `buffer` is `NULL`, or if the memory designated by `buffer` and `buffer_len`
+/// does not contain valid UTF-8. (If `buffer_len` is `0`, `buffer` may be bogus but
+/// still has to be non-`NULL`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_check_str_for_latin1_and_bidi(
+    buffer: *const u8,
+    len: usize,
+) -> Latin1Bidi {
+    encoding_rs::mem::check_str_for_latin1_and_bidi(::std::str::from_utf8_unchecked(
+        ::std::slice::from_raw_parts(buffer, len),
+    ))
+}
+
+/// Checks whether a potentially invalid UTF-16 buffer contains code points
+/// that trigger right-to-left processing or is all-Latin1.
+///
+/// Possibly more efficient than performing the checks separately.
+///
+/// Returns `Latin1Bidi::Latin1` if `is_utf16_latin1()` would return `true`.
+/// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf16_bidi()` would return
+/// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
+/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
+/// still has to be non-`NULL` and aligned.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_check_utf16_for_latin1_and_bidi(
+    buffer: *const u16,
+    len: usize,
+) -> Latin1Bidi {
+    encoding_rs::mem::check_utf16_for_latin1_and_bidi(::std::slice::from_raw_parts(buffer, len))
+}
+
+/// Converts potentially-invalid UTF-8 to valid UTF-16 with errors replaced
+/// with the REPLACEMENT CHARACTER.
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer _plus one_.
+///
+/// Returns the number of `u16`s written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
+/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
+/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
+/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
+/// aligned. Likewise for `dst` and `dst_len`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_utf8_to_utf16(
+    src: *const u8,
+    src_len: usize,
+    dst: *mut u16,
+    dst_len: usize,
+) -> usize {
+    encoding_rs::mem::convert_utf8_to_utf16(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    )
+}
+
+/// Converts valid UTF-8 to valid UTF-16.
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer.
+///
+/// Returns the number of `u16`s written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
+/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
+/// block, if `dst` is `NULL`, if the two memory blocks overlap, of if the
+/// buffer designated by `src` and `src_len` does not contain valid UTF-8. (If
+/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
+/// aligned. Likewise for `dst` and `dst_len`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_str_to_utf16(
+    src: *const u8,
+    src_len: usize,
+    dst: *mut u16,
+    dst_len: usize,
+) -> usize {
+    encoding_rs::mem::convert_str_to_utf16(
+        ::std::str::from_utf8_unchecked(::std::slice::from_raw_parts(src, src_len)),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    )
+}
+
+/// Converts potentially-invalid UTF-8 to valid UTF-16 signaling on error.
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer.
+///
+/// Returns the number of `u16`s written or `SIZE_MAX` if the input was invalid.
+///
+/// When the input was invalid, some output may have been written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
+/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
+/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
+/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
+/// aligned. Likewise for `dst` and `dst_len`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_utf8_to_utf16_without_replacement(
+    src: *const u8,
+    src_len: usize,
+    dst: *mut u16,
+    dst_len: usize,
+) -> usize {
+    encoding_rs::mem::convert_utf8_to_utf16_without_replacement(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    ).unwrap_or(::std::usize::MAX)
+}
+
+/// Converts potentially-invalid UTF-16 to valid UTF-8 with errors replaced
+/// with the REPLACEMENT CHARACTER with potentially insufficient output
+/// space.
+///
+/// Writes the number of code units read into `*src_len` and the number of
+/// bytes written into `*dst_len`.
+///
+/// Guarantees that the bytes in the destination beyond the number of
+/// bytes claimed as written by the second item of the return tuple
+/// are left unmodified.
+///
+/// Not all code units are read if there isn't enough output space.
+///
+/// Note  that this method isn't designed for general streamability but for
+/// not allocating memory for the worst case up front. Specifically,
+/// if the input starts with or ends with an unpaired surrogate, those are
+/// replaced with the REPLACEMENT CHARACTER.
+///
+/// Matches the semantics of `TextEncoder.encodeInto()` from the
+/// Encoding Standard.
+///
+/// # Safety
+///
+/// If you want to convert into a `&mut str`, use
+/// `convert_utf16_to_str_partial()` instead of using this function
+/// together with the `unsafe` method `as_bytes_mut()` on `&mut str`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
+/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
+/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
+/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
+/// aligned. Likewise for `dst` and `dst_len`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_utf16_to_utf8_partial(
+    src: *const u16,
+    src_len: *mut usize,
+    dst: *mut u8,
+    dst_len: *mut usize,
+) {
+    let (read, written) = encoding_rs::mem::convert_utf16_to_utf8_partial(
+        ::std::slice::from_raw_parts(src, *src_len),
+        ::std::slice::from_raw_parts_mut(dst, *dst_len),
+    );
+    *src_len = read;
+    *dst_len = written;
+}
+
+/// Converts potentially-invalid UTF-16 to valid UTF-8 with errors replaced
+/// with the REPLACEMENT CHARACTER.
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer times three.
+///
+/// Returns the number of bytes written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+///
+/// # Safety
+///
+/// If you want to convert into a `&mut str`, use `convert_utf16_to_str()`
+/// instead of using this function together with the `unsafe` method
+/// `as_bytes_mut()` on `&mut str`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
+/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
+/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
+/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
+/// aligned. Likewise for `dst` and `dst_len`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_utf16_to_utf8(
+    src: *const u16,
+    src_len: usize,
+    dst: *mut u8,
+    dst_len: usize,
+) -> usize {
+    encoding_rs::mem::convert_utf16_to_utf8(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    )
+}
+
+/// Converts bytes whose unsigned value is interpreted as Unicode code point
+/// (i.e. U+0000 to U+00FF, inclusive) to UTF-16.
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer.
+///
+/// The number of `u16`s written equals the length of the source buffer.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
+/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
+/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
+/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
+/// aligned. Likewise for `dst` and `dst_len`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_latin1_to_utf16(
+    src: *const u8,
+    src_len: usize,
+    dst: *mut u16,
+    dst_len: usize,
+) {
+    encoding_rs::mem::convert_latin1_to_utf16(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    );
+}
+
+/// Converts bytes whose unsigned value is interpreted as Unicode code point
+/// (i.e. U+0000 to U+00FF, inclusive) to UTF-8 with potentially insufficient
+/// output space.
+///
+/// Writes the number of code units read into `*src_len` and the number of
+/// bytes written into `*dst_len`.
+///
+/// If the output isn't large enough, not all input is consumed.
+///
+/// # Safety
+///
+/// If you want to convert into a `&mut str`, use
+/// `encoding_mem_convert_latin1_to_str_partial()` instead of using this function
+/// together with the `unsafe` method `as_bytes_mut()` on `&mut str`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
+/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
+/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
+/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
+/// aligned. Likewise for `dst` and `dst_len`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_latin1_to_utf8_partial(
+    src: *const u8,
+    src_len: *mut usize,
+    dst: *mut u8,
+    dst_len: *mut usize,
+) {
+    let (read, written) = encoding_rs::mem::convert_latin1_to_utf8_partial(
+        ::std::slice::from_raw_parts(src, *src_len),
+        ::std::slice::from_raw_parts_mut(dst, *dst_len),
+    );
+    *src_len = read;
+    *dst_len = written;
+}
+
+/// Converts bytes whose unsigned value is interpreted as Unicode code point
+/// (i.e. U+0000 to U+00FF, inclusive) to UTF-8.
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer times two.
+///
+/// Returns the number of bytes written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+///
+/// # Safety
+///
+/// Note that this function may write garbage beyond the number of bytes
+/// indicated by the return value, so using a `&mut str` interpreted as
+/// `&mut [u8]` as the destination is not safe. If you want to convert into
+/// a `&mut str`, use `convert_utf16_to_str()` instead of this function.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
+/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
+/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
+/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
+/// aligned. Likewise for `dst` and `dst_len`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_latin1_to_utf8(
+    src: *const u8,
+    src_len: usize,
+    dst: *mut u8,
+    dst_len: usize,
+) -> usize {
+    encoding_rs::mem::convert_latin1_to_utf8(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    )
+}
+
+/// If the input is valid UTF-8 representing only Unicode code points from
+/// U+0000 to U+00FF, inclusive, converts the input into output that
+/// represents the value of each code point as the unsigned byte value of
+/// each output byte.
+///
+/// If the input does not fulfill the condition stated above, this function
+/// panics if debug assertions are enabled (and fuzzing isn't) and otherwise
+/// does something that is memory-safe without any promises about any
+/// properties of the output. In particular, callers shouldn't assume the
+/// output to be the same across crate versions or CPU architectures and
+/// should not assume that non-ASCII input can't map to ASCII output.
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer.
+///
+/// Returns the number of bytes written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+///
+/// If debug assertions are enabled (and not fuzzing) and the input is
+/// not in the range U+0000 to U+00FF, inclusive.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
+/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
+/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
+/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
+/// aligned. Likewise for `dst` and `dst_len`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_utf8_to_latin1_lossy(
+    src: *const u8,
+    src_len: usize,
+    dst: *mut u8,
+    dst_len: usize,
+) -> usize {
+    encoding_rs::mem::convert_utf8_to_latin1_lossy(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    )
+}
+
+/// If the input is valid UTF-16 representing only Unicode code points from
+/// U+0000 to U+00FF, inclusive, converts the input into output that
+/// represents the value of each code point as the unsigned byte value of
+/// each output byte.
+///
+/// If the input does not fulfill the condition stated above, does something
+/// that is memory-safe without any promises about any properties of the
+/// output and will probably assert in debug builds in future versions.
+/// In particular, callers shouldn't assume the output to be the same across
+/// crate versions or CPU architectures and should not assume that non-ASCII
+/// input can't map to ASCII output.
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer.
+///
+/// The number of bytes written equals the length of the source buffer.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+///
+/// (Probably in future versions if debug assertions are enabled (and not
+/// fuzzing) and the input is not in the range U+0000 to U+00FF, inclusive.)
+///
+/// # Undefined behavior
+///
+/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
+/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
+/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
+/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
+/// aligned. Likewise for `dst` and `dst_len`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_utf16_to_latin1_lossy(
+    src: *const u16,
+    src_len: usize,
+    dst: *mut u8,
+    dst_len: usize,
+) {
+    encoding_rs::mem::convert_utf16_to_latin1_lossy(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    );
+}
+
+/// Returns the index of the first unpaired surrogate or, if the input is
+/// valid UTF-16 in its entirety, the length of the input.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
+/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
+/// still has to be non-`NULL` and aligned.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_utf16_valid_up_to(buffer: *const u16, len: usize) -> usize {
+    encoding_rs::mem::utf16_valid_up_to(::std::slice::from_raw_parts(buffer, len))
+}
+
+/// Returns the index of first byte that starts an invalid byte
+/// sequence or a non-Latin1 byte sequence, or the length of the
+/// string if there are neither.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
+/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
+/// still has to be non-`NULL` and aligned.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_utf8_latin1_up_to(buffer: *const u8, len: usize) -> usize {
+    encoding_rs::mem::utf8_latin1_up_to(::std::slice::from_raw_parts(buffer, len))
+}
+
+/// Returns the index of first byte that starts a non-Latin1 byte
+/// sequence, or the length of the string if there are none.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block,
+/// if `buffer` is `NULL`, or if the memory block does not contain valid UTF-8.
+/// (If `buffer_len` is `0`, `buffer` may be bogus but still has to be non-`NULL`
+/// and aligned.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_str_latin1_up_to(buffer: *const u8, len: usize) -> usize {
+    encoding_rs::mem::str_latin1_up_to(::std::str::from_utf8_unchecked(
+        ::std::slice::from_raw_parts(buffer, len),
+    ))
+}
+
+/// Replaces unpaired surrogates in the input with the REPLACEMENT CHARACTER.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
+/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
+/// still has to be non-`NULL` and aligned.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_ensure_utf16_validity(buffer: *mut u16, len: usize) {
+    encoding_rs::mem::ensure_utf16_validity(::std::slice::from_raw_parts_mut(buffer, len));
+}
+
+/// Copies ASCII from source to destination up to the first non-ASCII byte
+/// (or the end of the input if it is ASCII in its entirety).
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer.
+///
+/// Returns the number of bytes written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
+/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
+/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
+/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
+/// aligned. Likewise for `dst` and `dst_len`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_copy_ascii_to_ascii(
+    src: *const u8,
+    src_len: usize,
+    dst: *mut u8,
+    dst_len: usize,
+) -> usize {
+    encoding_rs::mem::copy_ascii_to_ascii(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    )
+}
+
+/// Copies ASCII from source to destination zero-extending it to UTF-16 up to
+/// the first non-ASCII byte (or the end of the input if it is ASCII in its
+/// entirety).
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer.
+///
+/// Returns the number of `u16`s written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
+/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
+/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
+/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
+/// aligned. Likewise for `dst` and `dst_len`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_copy_ascii_to_basic_latin(
+    src: *const u8,
+    src_len: usize,
+    dst: *mut u16,
+    dst_len: usize,
+) -> usize {
+    encoding_rs::mem::copy_ascii_to_basic_latin(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    )
+}
+
+/// Copies Basic Latin from source to destination narrowing it to ASCII up to
+/// the first non-Basic Latin code unit (or the end of the input if it is
+/// Basic Latin in its entirety).
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer.
+///
+/// Returns the number of bytes written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
+/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
+/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
+/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
+/// aligned. Likewise for `dst` and `dst_len`.)
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_copy_basic_latin_to_ascii(
+    src: *const u16,
+    src_len: usize,
+    dst: *mut u8,
+    dst_len: usize,
+) -> usize {
+    encoding_rs::mem::copy_basic_latin_to_ascii(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    )
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 09:22:09 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 09:22:09 +0000
commit	43a97878ce14b72f0981164f87f2e35e14151312 (patch)
tree	620249daf56c0258faa40cbdcf9cfba06de2a846 /third_party/rust/encoding_c_mem/src
parent	Initial commit. (diff)
download	firefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip