3 files changed, 2214 insertions, 0 deletions
diff --git a/third_party/rust/encoding_c/include/encoding_rs.h b/third_party/rust/encoding_c/include/encoding_rs.h
new file mode 100644
index 0000000000..39231b7a0f
--- /dev/null
+++ b/third_party/rust/encoding_c/include/encoding_rs.h
@@ -0,0 +1,692 @@
+// Copyright Mozilla Foundation. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+// Instead, please regenerate using encoding_c/build.rs.
+
+#ifndef cheddar_generated_encoding_rs_h
+#define cheddar_generated_encoding_rs_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "encoding_rs_statics.h"
+
+/// Implements the
+/// [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
+/// algorithm.
+///
+/// If, after ASCII-lowercasing and removing leading and trailing
+/// whitespace, the argument matches a label defined in the ENCODING_RS_ENCODING
+/// Standard, `const ENCODING_RS_ENCODING*` representing the corresponding
+/// encoding is returned. If there is no match, `NULL` is returned.
+///
+/// This is the right function to use if the action upon the method returning
+/// `NULL` is to use a fallback encoding (e.g. `WINDOWS_1252_ENCODING`) instead.
+/// When the action upon the method returning `NULL` is not to proceed with
+/// a fallback but to refuse processing, `encoding_for_label_no_replacement()`
+/// is more appropriate.
+///
+/// The argument buffer can be in any ASCII-compatible encoding. It is not
+/// required to be UTF-8.
+///
+/// `label` must be non-`NULL` even if `label_len` is zero. When `label_len`
+/// is zero, it is OK for `label` to be something non-dereferencable,
+/// such as `0x1`. This is required due to Rust's optimization for slices
+/// within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `label` and `label_len` don't designate a valid memory block
+/// of if `label` is `NULL`.
+ENCODING_RS_ENCODING const* encoding_for_label(uint8_t const* label,
+                                               size_t label_len);
+
+/// This function behaves the same as `encoding_for_label()`, except when
+/// `encoding_for_label()` would return `REPLACEMENT_ENCODING`, this method
+/// returns `NULL` instead.
+///
+/// This method is useful in scenarios where a fatal error is required
+/// upon invalid label, because in those cases the caller typically wishes
+/// to treat the labels that map to the replacement encoding as fatal
+/// errors, too.
+///
+/// It is not OK to use this funciton when the action upon the method returning
+/// `NULL` is to use a fallback encoding (e.g. `WINDOWS_1252_ENCODING`). In
+/// such a case, the `encoding_for_label()` function should be used instead
+/// in order to avoid unsafe fallback for labels that `encoding_for_label()`
+/// maps to `REPLACEMENT_ENCODING`.
+///
+/// The argument buffer can be in any ASCII-compatible encoding. It is not
+/// required to be UTF-8.
+///
+/// `label` must be non-`NULL` even if `label_len` is zero. When `label_len`
+/// is zero, it is OK for `label` to be something non-dereferencable,
+/// such as `0x1`. This is required due to Rust's optimization for slices
+/// within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `label` and `label_len` don't designate a valid memory block
+/// of if `label` is `NULL`.
+ENCODING_RS_ENCODING const* encoding_for_label_no_replacement(
+    uint8_t const* label, size_t label_len);
+
+/// Performs non-incremental BOM sniffing.
+///
+/// The argument must either be a buffer representing the entire input
+/// stream (non-streaming case) or a buffer representing at least the first
+/// three bytes of the input stream (streaming case).
+///
+/// Returns `UTF_8_ENCODING`, `UTF_16LE_ENCODING` or `UTF_16BE_ENCODING` if the
+/// argument starts with the UTF-8, UTF-16LE or UTF-16BE BOM or `NULL`
+/// otherwise. Upon return, `*buffer_len` is the length of the BOM (zero if
+/// there is no BOM).
+///
+/// `buffer` must be non-`NULL` even if `*buffer_len` is zero. When
+/// `*buffer_len` is zero, it is OK for `buffer` to be something
+/// non-dereferencable, such as `0x1`. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `*buffer_len` don't designate a valid memory
+/// block of if `buffer` is `NULL`.
+ENCODING_RS_ENCODING const* encoding_for_bom(uint8_t const* buffer,
+                                             size_t* buffer_len);
+
+/// Writes the name of the given `ENCODING_RS_ENCODING` to a caller-supplied
+/// buffer as ASCII and returns the number of bytes / ASCII characters written.
+///
+/// The output is not null-terminated.
+///
+/// The caller _MUST_ ensure that `name_out` points to a buffer whose length
+/// is at least `ENCODING_NAME_MAX_LENGTH` bytes.
+///
+/// # Undefined behavior
+///
+/// UB ensues if either argument is `NULL` or if `name_out` doesn't point to
+/// a valid block of memory whose length is at least
+/// `ENCODING_NAME_MAX_LENGTH` bytes.
+size_t encoding_name(ENCODING_RS_ENCODING const* encoding, uint8_t* name_out);
+
+/// Checks whether the _output encoding_ of this encoding can encode every
+/// Unicode scalar. (Only true if the output encoding is UTF-8.)
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+bool encoding_can_encode_everything(ENCODING_RS_ENCODING const* encoding);
+
+/// Checks whether the bytes 0x00...0x7F map exclusively to the characters
+/// U+0000...U+007F and vice versa.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+bool encoding_is_ascii_compatible(ENCODING_RS_ENCODING const* encoding);
+
+/// Checks whether this encoding maps one byte to one Basic Multilingual
+/// Plane code point (i.e. byte length equals decoded UTF-16 length) and
+/// vice versa (for mappable characters).
+///
+/// `true` iff this encoding is on the list of [Legacy single-byte
+/// encodings](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings)
+/// in the spec or x-user-defined.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+bool encoding_is_single_byte(ENCODING_RS_ENCODING const* encoding);
+
+/// Returns the _output encoding_ of this encoding. This is UTF-8 for
+/// UTF-16BE, UTF-16LE and replacement and the encoding itself otherwise.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+ENCODING_RS_ENCODING const* encoding_output_encoding(
+    ENCODING_RS_ENCODING const* encoding);
+
+/// Allocates a new `ENCODING_RS_DECODER` for the given `ENCODING_RS_ENCODING`
+/// on the heap with BOM sniffing enabled and returns a pointer to the
+/// newly-allocated `ENCODING_RS_DECODER`.
+///
+/// BOM sniffing may cause the returned decoder to morph into a decoder
+/// for UTF-8, UTF-16LE or UTF-16BE instead of this encoding.
+///
+/// Once the allocated `ENCODING_RS_DECODER` is no longer needed, the caller
+/// _MUST_ deallocate it by passing the pointer returned by this function to
+/// `decoder_free()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+ENCODING_RS_DECODER* encoding_new_decoder(ENCODING_RS_ENCODING const* encoding);
+
+/// Allocates a new `ENCODING_RS_DECODER` for the given `ENCODING_RS_ENCODING`
+/// on the heap with BOM removal and returns a pointer to the newly-allocated
+/// `ENCODING_RS_DECODER`.
+///
+/// If the input starts with bytes that are the BOM for this encoding,
+/// those bytes are removed. However, the decoder never morphs into a
+/// decoder for another encoding: A BOM for another encoding is treated as
+/// (potentially malformed) input to the decoding algorithm for this
+/// encoding.
+///
+/// Once the allocated `ENCODING_RS_DECODER` is no longer needed, the caller
+/// _MUST_ deallocate it by passing the pointer returned by this function to
+/// `decoder_free()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+ENCODING_RS_DECODER* encoding_new_decoder_with_bom_removal(
+    ENCODING_RS_ENCODING const* encoding);
+
+/// Allocates a new `ENCODING_RS_DECODER` for the given `ENCODING_RS_ENCODING`
+/// on the heap with BOM handling disabled and returns a pointer to the
+/// newly-allocated `ENCODING_RS_DECODER`.
+///
+/// If the input starts with bytes that look like a BOM, those bytes are
+/// not treated as a BOM. (Hence, the decoder never morphs into a decoder
+/// for another encoding.)
+///
+/// _Note:_ If the caller has performed BOM sniffing on its own but has not
+/// removed the BOM, the caller should use
+/// `encoding_new_decoder_with_bom_removal()` instead of this function to cause
+/// the BOM to be removed.
+///
+/// Once the allocated `ENCODING_RS_DECODER` is no longer needed, the caller
+/// _MUST_ deallocate it by passing the pointer returned by this function to
+/// `decoder_free()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+ENCODING_RS_DECODER* encoding_new_decoder_without_bom_handling(
+    ENCODING_RS_ENCODING const* encoding);
+
+/// Allocates a new `ENCODING_RS_DECODER` for the given `ENCODING_RS_ENCODING`
+/// into memory provided by the caller with BOM sniffing enabled. (In practice,
+/// the target should likely be a pointer previously returned by
+/// `encoding_new_decoder()`.)
+///
+/// Note: If the caller has already performed BOM sniffing but has
+/// not removed the BOM, the caller should still use this function in
+/// order to cause the BOM to be ignored.
+///
+/// # Undefined behavior
+///
+/// UB ensues if either argument is `NULL`.
+void encoding_new_decoder_into(ENCODING_RS_ENCODING const* encoding,
+                               ENCODING_RS_DECODER* decoder);
+
+/// Allocates a new `ENCODING_RS_DECODER` for the given `ENCODING_RS_ENCODING`
+/// into memory provided by the caller with BOM removal.
+///
+/// If the input starts with bytes that are the BOM for this encoding,
+/// those bytes are removed. However, the decoder never morphs into a
+/// decoder for another encoding: A BOM for another encoding is treated as
+/// (potentially malformed) input to the decoding algorithm for this
+/// encoding.
+///
+/// Once the allocated `ENCODING_RS_DECODER` is no longer needed, the caller
+/// _MUST_ deallocate it by passing the pointer returned by this function to
+/// `decoder_free()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if either argument is `NULL`.
+void encoding_new_decoder_with_bom_removal_into(
+    ENCODING_RS_ENCODING const* encoding, ENCODING_RS_DECODER* decoder);
+
+/// Allocates a new `ENCODING_RS_DECODER` for the given `ENCODING_RS_ENCODING`
+/// into memory provided by the caller with BOM handling disabled.
+///
+/// If the input starts with bytes that look like a BOM, those bytes are
+/// not treated as a BOM. (Hence, the decoder never morphs into a decoder
+/// for another encoding.)
+///
+/// _Note:_ If the caller has performed BOM sniffing on its own but has not
+/// removed the BOM, the caller should use
+/// `encoding_new_decoder_with_bom_removal_into()` instead of this function to
+/// cause the BOM to be removed.
+///
+/// # Undefined behavior
+///
+/// UB ensues if either argument is `NULL`.
+void encoding_new_decoder_without_bom_handling_into(
+    ENCODING_RS_ENCODING const* encoding, ENCODING_RS_DECODER* decoder);
+
+/// Allocates a new `ENCODING_RS_ENCODER` for the given `ENCODING_RS_ENCODING`
+/// on the heap and returns a pointer to the newly-allocated
+/// `ENCODING_RS_ENCODER`. (Exception, if the `ENCODING_RS_ENCODING` is
+/// `replacement`, a new `ENCODING_RS_DECODER` for UTF-8 is instantiated (and
+/// that `ENCODING_RS_DECODER` reports `UTF_8` as its `ENCODING_RS_ENCODING`).
+///
+/// Once the allocated `ENCODING_RS_ENCODER` is no longer needed, the caller
+/// _MUST_ deallocate it by passing the pointer returned by this function to
+/// `encoder_free()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+ENCODING_RS_ENCODER* encoding_new_encoder(ENCODING_RS_ENCODING const* encoding);
+
+/// Allocates a new `ENCODING_RS_ENCODER` for the given `ENCODING_RS_ENCODING`
+/// into memory provided by the caller. (In practice, the target should likely
+/// be a pointer previously returned by `encoding_new_encoder()`.)
+///
+/// # Undefined behavior
+///
+/// UB ensues if either argument is `NULL`.
+void encoding_new_encoder_into(ENCODING_RS_ENCODING const* encoding,
+                               ENCODING_RS_ENCODER* encoder);
+
+/// Validates UTF-8.
+///
+/// Returns the index of the first byte that makes the input malformed as
+/// UTF-8 or `buffer_len` if `buffer` is entirely valid.
+///
+/// `buffer` must be non-`NULL` even if `buffer_len` is zero. When
+/// `buffer_len` is zero, it is OK for `buffer` to be something
+/// non-dereferencable, such as `0x1`. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory
+/// block of if `buffer` is `NULL`.
+size_t encoding_utf8_valid_up_to(uint8_t const* buffer, size_t buffer_len);
+
+/// Validates ASCII.
+///
+/// Returns the index of the first byte that makes the input malformed as
+/// ASCII or `buffer_len` if `buffer` is entirely valid.
+///
+/// `buffer` must be non-`NULL` even if `buffer_len` is zero. When
+/// `buffer_len` is zero, it is OK for `buffer` to be something
+/// non-dereferencable, such as `0x1`. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory
+/// block of if `buffer` is `NULL`.
+size_t encoding_ascii_valid_up_to(uint8_t const* buffer, size_t buffer_len);
+
+/// Validates ISO-2022-JP ASCII-state data.
+///
+/// Returns the index of the first byte that makes the input not representable
+/// in the ASCII state of ISO-2022-JP or `buffer_len` if `buffer` is entirely
+/// representable in the ASCII state of ISO-2022-JP.
+///
+/// `buffer` must be non-`NULL` even if `buffer_len` is zero. When
+/// `buffer_len` is zero, it is OK for `buffer` to be something
+/// non-dereferencable, such as `0x1`. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory
+/// block of if `buffer` is `NULL`.
+size_t encoding_iso_2022_jp_ascii_valid_up_to(uint8_t const* buffer,
+                                              size_t buffer_len);
+
+/// Deallocates a `ENCODING_RS_DECODER` previously allocated by
+/// `encoding_new_decoder()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+void decoder_free(ENCODING_RS_DECODER* decoder);
+
+/// The `ENCODING_RS_ENCODING` this `ENCODING_RS_DECODER` is for.
+///
+/// BOM sniffing can change the return value of this method during the life
+/// of the decoder.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+ENCODING_RS_ENCODING const* decoder_encoding(
+    ENCODING_RS_DECODER const* decoder);
+
+/// Query the worst-case UTF-8 output size _with replacement_.
+///
+/// Returns the size of the output buffer in UTF-8 code units (`uint8_t`)
+/// that will not overflow given the current state of the decoder and
+/// `byte_length` number of additional input bytes when decoding with
+/// errors handled by outputting a REPLACEMENT CHARACTER for each malformed
+/// sequence or `SIZE_MAX` if `size_t` would overflow.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `decoder` is `NULL`.
+size_t decoder_max_utf8_buffer_length(ENCODING_RS_DECODER const* decoder,
+                                      size_t byte_length);
+
+/// Query the worst-case UTF-8 output size _without replacement_.
+///
+/// Returns the size of the output buffer in UTF-8 code units (`uint8_t`)
+/// that will not overflow given the current state of the decoder and
+/// `byte_length` number of additional input bytes when decoding without
+/// replacement error handling or `SIZE_MAX` if `size_t` would overflow.
+///
+/// Note that this value may be too small for the `_with_replacement` case.
+/// Use `decoder_max_utf8_buffer_length()` for that case.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `decoder` is `NULL`.
+size_t decoder_max_utf8_buffer_length_without_replacement(
+    ENCODING_RS_DECODER const* decoder, size_t byte_length);
+
+/// Incrementally decode a byte stream into UTF-8 with malformed sequences
+/// replaced with the REPLACEMENT CHARACTER.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `decoder_decode_*` functions are mapped from Rust and the documentation
+/// for the [`ENCODING_RS_DECODER`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Decoder.html
+uint32_t decoder_decode_to_utf8(ENCODING_RS_DECODER* decoder,
+                                uint8_t const* src, size_t* src_len,
+                                uint8_t* dst, size_t* dst_len, bool last,
+                                bool* had_replacements);
+
+/// Incrementally decode a byte stream into UTF-8 _without replacement_.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `decoder_decode_*` functions are mapped from Rust and the documentation
+/// for the [`ENCODING_RS_DECODER`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Decoder.html
+uint32_t decoder_decode_to_utf8_without_replacement(
+    ENCODING_RS_DECODER* decoder, uint8_t const* src, size_t* src_len,
+    uint8_t* dst, size_t* dst_len, bool last);
+
+/// Query the worst-case UTF-16 output size (with or without replacement).
+///
+/// Returns the size of the output buffer in UTF-16 code units (`char16_t`)
+/// that will not overflow given the current state of the decoder and
+/// `byte_length` number of additional input bytes or `SIZE_MAX` if `size_t`
+/// would overflow.
+///
+/// Since the REPLACEMENT CHARACTER fits into one UTF-16 code unit, the
+/// return value of this method applies also in the
+/// `_without_replacement` case.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `decoder` is `NULL`.
+size_t decoder_max_utf16_buffer_length(ENCODING_RS_DECODER const* decoder,
+                                       size_t u16_length);
+
+/// Incrementally decode a byte stream into UTF-16 with malformed sequences
+/// replaced with the REPLACEMENT CHARACTER.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `decoder_decode_*` functions are mapped from Rust and the documentation
+/// for the [`ENCODING_RS_DECODER`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Decoder.html
+uint32_t decoder_decode_to_utf16(ENCODING_RS_DECODER* decoder,
+                                 uint8_t const* src, size_t* src_len,
+                                 char16_t* dst, size_t* dst_len, bool last,
+                                 bool* had_replacements);
+
+/// Incrementally decode a byte stream into UTF-16 _without replacement_.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `decoder_decode_*` functions are mapped from Rust and the documentation
+/// for the [`ENCODING_RS_DECODER`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Decoder.html
+uint32_t decoder_decode_to_utf16_without_replacement(
+    ENCODING_RS_DECODER* decoder, uint8_t const* src, size_t* src_len,
+    char16_t* dst, size_t* dst_len, bool last);
+
+/// Checks for compatibility with storing Unicode scalar values as unsigned
+/// bytes taking into account the state of the decoder.
+///
+/// Returns `SIZE_MAX` if the decoder is not in a neutral state, including waiting
+/// for the BOM, or if the encoding is never Latin1-byte-compatible.
+///
+/// Otherwise returns the index of the first byte whose unsigned value doesn't
+/// directly correspond to the decoded Unicode scalar value, or the length
+/// of the input if all bytes in the input decode directly to scalar values
+/// corresponding to the unsigned byte values.
+///
+/// Does not change the state of the decoder.
+///
+/// Do not use this unless you are supporting SpiderMonkey/V8-style string
+/// storage optimizations.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `*buffer_len` don't designate a valid memory
+/// block of if `buffer` is `NULL`.
+size_t decoder_latin1_byte_compatible_up_to(ENCODING_RS_DECODER const* decoder,
+                                            uint8_t const* buffer,
+                                            size_t buffer_len);
+
+/// Deallocates an `ENCODING_RS_ENCODER` previously allocated by
+/// `encoding_new_encoder()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+void encoder_free(ENCODING_RS_ENCODER* encoder);
+
+/// The `ENCODING_RS_ENCODING` this `ENCODING_RS_ENCODER` is for.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+ENCODING_RS_ENCODING const* encoder_encoding(
+    ENCODING_RS_ENCODER const* encoder);
+
+/// Returns `true` if this is an ISO-2022-JP encoder that's not in the
+/// ASCII state and `false` otherwise.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+bool encoder_has_pending_state(ENCODING_RS_ENCODER const* encoder);
+
+/// Query the worst-case output size when encoding from UTF-8 with
+/// replacement.
+///
+/// Returns the size of the output buffer in bytes that will not overflow
+/// given the current state of the encoder and `byte_length` number of
+/// additional input code units if there are no unmappable characters in
+/// the input or `SIZE_MAX` if `size_t` would overflow.
+size_t encoder_max_buffer_length_from_utf8_if_no_unmappables(
+    ENCODING_RS_ENCODER const* encoder, size_t byte_length);
+
+/// Query the worst-case output size when encoding from UTF-8 without
+/// replacement.
+///
+/// Returns the size of the output buffer in bytes that will not overflow
+/// given the current state of the encoder and `byte_length` number of
+/// additional input code units or `SIZE_MAX` if `size_t` would overflow.
+size_t encoder_max_buffer_length_from_utf8_without_replacement(
+    ENCODING_RS_ENCODER const* encoder, size_t byte_length);
+
+/// Incrementally encode into byte stream from UTF-8 with unmappable
+/// characters replaced with HTML (decimal) numeric character references.
+///
+/// The input absolutely _MUST_ be valid UTF-8 or the behavior is memory-unsafe!
+/// If in doubt, check the validity of input before using!
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `encoder_encode_*` functions are mapped from Rust and the documentation
+/// for the [`ENCODING_RS_ENCODER`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Encoder.html
+uint32_t encoder_encode_from_utf8(ENCODING_RS_ENCODER* encoder,
+                                  uint8_t const* src, size_t* src_len,
+                                  uint8_t* dst, size_t* dst_len, bool last,
+                                  bool* had_replacements);
+
+/// Incrementally encode into byte stream from UTF-8 _without replacement_.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `encoder_encode_*` functions are mapped from Rust and the documentation
+/// for the [`ENCODING_RS_ENCODER`][1] struct for the semantics.
+///
+/// The input absolutely _MUST_ be valid UTF-8 or the behavior is memory-unsafe!
+/// If in doubt, check the validity of input before using!
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Encoder.html
+uint32_t encoder_encode_from_utf8_without_replacement(
+    ENCODING_RS_ENCODER* encoder, uint8_t const* src, size_t* src_len,
+    uint8_t* dst, size_t* dst_len, bool last);
+
+/// Query the worst-case output size when encoding from UTF-16 with
+/// replacement.
+///
+/// Returns the size of the output buffer in bytes that will not overflow
+/// given the current state of the encoder and `u16_length` number of
+/// additional input code units if there are no unmappable characters in
+/// the input or `SIZE_MAX` if `size_t` would overflow.
+size_t encoder_max_buffer_length_from_utf16_if_no_unmappables(
+    ENCODING_RS_ENCODER const* encoder, size_t u16_length);
+
+/// Query the worst-case output size when encoding from UTF-16 without
+/// replacement.
+///
+/// Returns the size of the output buffer in bytes that will not overflow
+/// given the current state of the encoder and `u16_length` number of
+/// additional input code units or `SIZE_MAX` if `size_t` would overflow.
+size_t encoder_max_buffer_length_from_utf16_without_replacement(
+    ENCODING_RS_ENCODER const* encoder, size_t u16_length);
+
+/// Incrementally encode into byte stream from UTF-16 with unmappable
+/// characters replaced with HTML (decimal) numeric character references.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `encoder_encode_*` functions are mapped from Rust and the documentation
+/// for the [`ENCODING_RS_ENCODER`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Encoder.html
+uint32_t encoder_encode_from_utf16(ENCODING_RS_ENCODER* encoder,
+                                   char16_t const* src, size_t* src_len,
+                                   uint8_t* dst, size_t* dst_len, bool last,
+                                   bool* had_replacements);
+
+/// Incrementally encode into byte stream from UTF-16 _without replacement_.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `encoder_encode_*` functions are mapped from Rust and the documentation
+/// for the [`ENCODING_RS_ENCODER`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Encoder.html
+uint32_t encoder_encode_from_utf16_without_replacement(
+    ENCODING_RS_ENCODER* encoder, char16_t const* src, size_t* src_len,
+    uint8_t* dst, size_t* dst_len, bool last);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third_party/rust/encoding_c/include/encoding_rs_cpp.h b/third_party/rust/encoding_c/include/encoding_rs_cpp.h
new file mode 100644
index 0000000000..4ec5181ee9
--- /dev/null
+++ b/third_party/rust/encoding_c/include/encoding_rs_cpp.h
@@ -0,0 +1,1351 @@
+// Copyright Mozilla Foundation. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#pragma once
+
+#ifndef encoding_rs_cpp_h_
+#define encoding_rs_cpp_h_
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <tuple>
+#include <vector>
+#include "gsl/gsl"
+
+namespace encoding_rs {
+class Encoding;
+class Decoder;
+class Encoder;
+};  // namespace encoding_rs
+
+#define ENCODING_RS_ENCODING encoding_rs::Encoding
+#define ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR \
+  gsl::not_null<const encoding_rs::Encoding*>
+#define ENCODING_RS_ENCODER encoding_rs::Encoder
+#define ENCODING_RS_DECODER encoding_rs::Decoder
+
+#include "encoding_rs.h"
+
+namespace encoding_rs {
+
+/**
+ * A converter that decodes a byte stream into Unicode according to a
+ * character encoding in a streaming (incremental) manner.
+ *
+ * The various `decode_*` methods take an input buffer (`src`) and an output
+ * buffer `dst` both of which are caller-allocated. There are variants for
+ * both UTF-8 and UTF-16 output buffers.
+ *
+ * A `decode_*` method decodes bytes from `src` into Unicode characters stored
+ * into `dst` until one of the following three things happens:
+ *
+ * 1. A malformed byte sequence is encountered (`*_without_replacement`
+ *    variants only).
+ *
+ * 2. The output buffer has been filled so near capacity that the decoder
+ *    cannot be sure that processing an additional byte of input wouldn't
+ *    cause so much output that the output buffer would overflow.
+ *
+ * 3. All the input bytes have been processed.
+ *
+ * The `decode_*` method then returns tuple of a status indicating which one
+ * of the three reasons to return happened, how many input bytes were read,
+ * how many output code units (`uint8_t` when decoding into UTF-8 and `char16_t`
+ * when decoding to UTF-16) were written, and in the case of the
+ * variants performing replacement, a boolean indicating whether an error was
+ * replaced with the REPLACEMENT CHARACTER during the call.
+ *
+ * The number of bytes "written" is what's logically written. Garbage may be
+ * written in the output buffer beyond the point logically written to.
+ *
+ * In the case of the `*_without_replacement` variants, the status is a
+ * `uint32_t` whose possible values are packed info about a malformed byte
+ * sequence, `OUTPUT_FULL` and `INPUT_EMPTY` corresponding to the three cases
+ * listed above).
+ *
+ * Packed info about malformed sequences has the following format:
+ * The lowest 8 bits, which can have the decimal value 0, 1, 2 or 3,
+ * indicate the number of bytes that were consumed after the malformed
+ * sequence and whose next-lowest 8 bits, when shifted right by 8 indicate
+ * the length of the malformed byte sequence (possible decimal values 1, 2,
+ * 3 or 4). The maximum possible sum of the two is 6.
+ *
+ * In the case of methods whose name does not end with
+ * `*_without_replacement`, malformed sequences are automatically replaced
+ * with the REPLACEMENT CHARACTER and errors do not cause the methods to
+ * return early.
+ *
+ * When decoding to UTF-8, the output buffer must have at least 4 bytes of
+ * space. When decoding to UTF-16, the output buffer must have at least two
+ * UTF-16 code units (`char16_t`) of space.
+ *
+ * When decoding to UTF-8 without replacement, the methods are guaranteed
+ * not to return indicating that more output space is needed if the length
+ * of the output buffer is at least the length returned by
+ * `max_utf8_buffer_length_without_replacement()`. When decoding to UTF-8
+ * with replacement, the length of the output buffer that guarantees the
+ * methods not to return indicating that more output space is needed is given
+ * by `max_utf8_buffer_length()`. When decoding to UTF-16 with
+ * or without replacement, the length of the output buffer that guarantees
+ * the methods not to return indicating that more output space is needed is
+ * given by `max_utf16_buffer_length()`.
+ *
+ * The output written into `dst` is guaranteed to be valid UTF-8 or UTF-16,
+ * and the output after each `decode_*` call is guaranteed to consist of
+ * complete characters. (I.e. the code unit sequence for the last character is
+ * guaranteed not to be split across output buffers.)
+ *
+ * The boolean argument `last` indicates that the end of the stream is reached
+ * when all the bytes in `src` have been consumed.
+ *
+ * A `Decoder` object can be used to incrementally decode a byte stream.
+ *
+ * During the processing of a single stream, the caller must call `decode_*`
+ * zero or more times with `last` set to `false` and then call `decode_*` at
+ * least once with `last` set to `true`. If `decode_*` returns `INPUT_EMPTY`,
+ * the processing of the stream has ended. Otherwise, the caller must call
+ * `decode_*` again with `last` set to `true` (or treat a malformed result,
+ * i.e. neither `INPUT_EMPTY` nor `OUTPUT_FULL`, as a fatal error).
+ *
+ * Once the stream has ended, the `Decoder` object must not be used anymore.
+ * That is, you need to create another one to process another stream.
+ *
+ * When the decoder returns `OUTPUT_FULL` or the decoder returns a malformed
+ * result and the caller does not wish to treat it as a fatal error, the input
+ * buffer `src` may not have been completely consumed. In that case, the caller
+ * must pass the unconsumed contents of `src` to `decode_*` again upon the next
+ * call.
+ *
+ * # Infinite loops
+ *
+ * When converting with a fixed-size output buffer whose size is too small to
+ * accommodate one character of output, an infinite loop ensues. When
+ * converting with a fixed-size output buffer, it generally makes sense to
+ * make the buffer fairly large (e.g. couple of kilobytes).
+ */
+class Decoder final {
+ public:
+  ~Decoder() {}
+  static inline void operator delete(void* decoder) {
+    decoder_free(reinterpret_cast<Decoder*>(decoder));
+  }
+
+  /**
+   * The `Encoding` this `Decoder` is for.
+   *
+   * BOM sniffing can change the return value of this method during the life
+   * of the decoder.
+   */
+  inline gsl::not_null<const Encoding*> encoding() const {
+    return gsl::not_null<const Encoding*>(decoder_encoding(this));
+  }
+
+  /**
+   * Query the worst-case UTF-8 output size _with replacement_.
+   *
+   * Returns the size of the output buffer in UTF-8 code units (`uint8_t`)
+   * that will not overflow given the current state of the decoder and
+   * `byte_length` number of additional input bytes when decoding with
+   * errors handled by outputting a REPLACEMENT CHARACTER for each malformed
+   * sequence or `std::optional` without value if `size_t` would overflow.
+   */
+  inline std::optional<size_t> max_utf8_buffer_length(
+      size_t byte_length) const {
+    size_t val = decoder_max_utf8_buffer_length(this, byte_length);
+    if (val == SIZE_MAX) {
+      return std::nullopt;
+    }
+    return val;
+  }
+
+  /**
+   * Query the worst-case UTF-8 output size _without replacement_.
+   *
+   * Returns the size of the output buffer in UTF-8 code units (`uint8_t`)
+   * that will not overflow given the current state of the decoder and
+   * `byte_length` number of additional input bytes when decoding without
+   * replacement error handling or `std::optional` without value if `size_t`
+   * would overflow.
+   *
+   * Note that this value may be too small for the `_with_replacement` case.
+   * Use `max_utf8_buffer_length()` for that case.
+   */
+  inline std::optional<size_t> max_utf8_buffer_length_without_replacement(
+      size_t byte_length) const {
+    size_t val =
+        decoder_max_utf8_buffer_length_without_replacement(this, byte_length);
+    if (val == SIZE_MAX) {
+      return std::nullopt;
+    }
+    return val;
+  }
+
+  /**
+   * Incrementally decode a byte stream into UTF-8 with malformed sequences
+   * replaced with the REPLACEMENT CHARACTER.
+   *
+   * See the documentation of the class for documentation for `decode_*`
+   * methods collectively.
+   */
+  inline std::tuple<uint32_t, size_t, size_t, bool> decode_to_utf8(
+      gsl::span<const uint8_t> src, gsl::span<uint8_t> dst, bool last) {
+    size_t src_read = src.size();
+    size_t dst_written = dst.size();
+    bool had_replacements;
+    uint32_t result =
+        decoder_decode_to_utf8(this, null_to_bogus<const uint8_t>(src.data()),
+                               &src_read, null_to_bogus<uint8_t>(dst.data()),
+                               &dst_written, last, &had_replacements);
+    return {result, src_read, dst_written, had_replacements};
+  }
+
+  /**
+   * Incrementally decode a byte stream into UTF-8 _without replacement_.
+   *
+   * See the documentation of the class for documentation for `decode_*`
+   * methods collectively.
+   */
+  inline std::tuple<uint32_t, size_t, size_t>
+  decode_to_utf8_without_replacement(gsl::span<const uint8_t> src,
+                                     gsl::span<uint8_t> dst, bool last) {
+    size_t src_read = src.size();
+    size_t dst_written = dst.size();
+    uint32_t result = decoder_decode_to_utf8_without_replacement(
+        this, null_to_bogus<const uint8_t>(src.data()), &src_read,
+        null_to_bogus<uint8_t>(dst.data()), &dst_written, last);
+    return {result, src_read, dst_written};
+  }
+
+  /**
+   * Query the worst-case UTF-16 output size (with or without replacement).
+   *
+   * Returns the size of the output buffer in UTF-16 code units (`char16_t`)
+   * that will not overflow given the current state of the decoder and
+   * `byte_length` number of additional input bytes  or `std::optional`
+   * without value if `size_t` would overflow.
+   *
+   * Since the REPLACEMENT CHARACTER fits into one UTF-16 code unit, the
+   * return value of this method applies also in the
+   * `_without_replacement` case.
+   */
+  inline std::optional<size_t> max_utf16_buffer_length(
+      size_t byte_length) const {
+    size_t val = decoder_max_utf16_buffer_length(this, byte_length);
+    if (val == SIZE_MAX) {
+      return std::nullopt;
+    }
+    return val;
+  }
+
+  /**
+   * Incrementally decode a byte stream into UTF-16 with malformed sequences
+   * replaced with the REPLACEMENT CHARACTER.
+   *
+   * See the documentation of the class for documentation for `decode_*`
+   * methods collectively.
+   */
+  inline std::tuple<uint32_t, size_t, size_t, bool> decode_to_utf16(
+      gsl::span<const uint8_t> src, gsl::span<char16_t> dst, bool last) {
+    size_t src_read = src.size();
+    size_t dst_written = dst.size();
+    bool had_replacements;
+    uint32_t result =
+        decoder_decode_to_utf16(this, null_to_bogus<const uint8_t>(src.data()),
+                                &src_read, null_to_bogus<char16_t>(dst.data()),
+                                &dst_written, last, &had_replacements);
+    return {result, src_read, dst_written, had_replacements};
+  }
+
+  /**
+   * Incrementally decode a byte stream into UTF-16 _without replacement_.
+   *
+   * See the documentation of the class for documentation for `decode_*`
+   * methods collectively.
+   */
+  inline std::tuple<uint32_t, size_t, size_t>
+  decode_to_utf16_without_replacement(gsl::span<const uint8_t> src,
+                                      gsl::span<char16_t> dst, bool last) {
+    size_t src_read = src.size();
+    size_t dst_written = dst.size();
+    uint32_t result = decoder_decode_to_utf16_without_replacement(
+        this, null_to_bogus<const uint8_t>(src.data()), &src_read,
+        null_to_bogus<char16_t>(dst.data()), &dst_written, last);
+    return {result, src_read, dst_written};
+  }
+
+  /**
+   * Checks for compatibility with storing Unicode scalar values as unsigned
+   * bytes taking into account the state of the decoder.
+   *
+   * Returns `std::nullopt` if the decoder is not in a neutral state, including
+   * waiting for the BOM, or if the encoding is never Latin1-byte-compatible.
+   *
+   * Otherwise returns the index of the first byte whose unsigned value doesn't
+   * directly correspond to the decoded Unicode scalar value, or the length
+   * of the input if all bytes in the input decode directly to scalar values
+   * corresponding to the unsigned byte values.
+   *
+   * Does not change the state of the decoder.
+   *
+   * Do not use this unless you are supporting SpiderMonkey/V8-style string
+   * storage optimizations.
+   */
+  inline std::optional<size_t> latin1_byte_compatible_up_to(
+      gsl::span<const uint8_t> buffer) const {
+    size_t val = decoder_latin1_byte_compatible_up_to(
+        this, null_to_bogus<const uint8_t>(buffer.data()),
+        static_cast<size_t>(buffer.size()));
+    if (val == SIZE_MAX) {
+      return std::nullopt;
+    }
+    return val;
+  }
+
+ private:
+  /**
+   * Replaces `nullptr` with a bogus pointer suitable for use as part of a
+   * zero-length Rust slice.
+   */
+  template <class T>
+  static inline T* null_to_bogus(T* ptr) {
+    return ptr ? ptr : reinterpret_cast<T*>(alignof(T));
+  }
+
+  Decoder() = delete;
+  Decoder(const Decoder&) = delete;
+  Decoder& operator=(const Decoder&) = delete;
+};
+
+/**
+ * A converter that encodes a Unicode stream into bytes according to a
+ * character encoding in a streaming (incremental) manner.
+ *
+ * The various `encode_*` methods take an input buffer (`src`) and an output
+ * buffer `dst` both of which are caller-allocated. There are variants for
+ * both UTF-8 and UTF-16 input buffers.
+ *
+ * An `encode_*` method encode characters from `src` into bytes characters
+ * stored into `dst` until one of the following three things happens:
+ *
+ * 1. An unmappable character is encountered (`*_without_replacement` variants
+ *    only).
+ *
+ * 2. The output buffer has been filled so near capacity that the decoder
+ *    cannot be sure that processing an additional character of input wouldn't
+ *    cause so much output that the output buffer would overflow.
+ *
+ * 3. All the input characters have been processed.
+ *
+ * The `encode_*` method then returns tuple of a status indicating which one
+ * of the three reasons to return happened, how many input code units (`uint8_t`
+ * when encoding from UTF-8 and `char16_t` when encoding from UTF-16) were read,
+ * how many output bytes were written, and in the case of the variants that
+ * perform replacement, a boolean indicating whether an unmappable
+ * character was replaced with a numeric character reference during the call.
+ *
+ * The number of bytes "written" is what's logically written. Garbage may be
+ * written in the output buffer beyond the point logically written to.
+ *
+ * In the case of the methods whose name ends with
+ * `*_without_replacement`, the status is a `uint32_t` whose possible values
+ * are an unmappable code point, `OUTPUT_FULL` and `INPUT_EMPTY` corresponding
+ * to the three cases listed above).
+ *
+ * In the case of methods whose name does not end with
+ * `*_without_replacement`, unmappable characters are automatically replaced
+ * with the corresponding numeric character references and unmappable
+ * characters do not cause the methods to return early.
+ *
+ * When encoding from UTF-8 without replacement, the methods are guaranteed
+ * not to return indicating that more output space is needed if the length
+ * of the output buffer is at least the length returned by
+ * `max_buffer_length_from_utf8_without_replacement()`. When encoding from
+ * UTF-8 with replacement, the length of the output buffer that guarantees the
+ * methods not to return indicating that more output space is needed in the
+ * absence of unmappable characters is given by
+ * `max_buffer_length_from_utf8_if_no_unmappables()`. When encoding from
+ * UTF-16 without replacement, the methods are guaranteed not to return
+ * indicating that more output space is needed if the length of the output
+ * buffer is at least the length returned by
+ * `max_buffer_length_from_utf16_without_replacement()`. When encoding
+ * from UTF-16 with replacement, the the length of the output buffer that
+ * guarantees the methods not to return indicating that more output space is
+ * needed in the absence of unmappable characters is given by
+ * `max_buffer_length_from_utf16_if_no_unmappables()`.
+ * When encoding with replacement, applications are not expected to size the
+ * buffer for the worst case ahead of time but to resize the buffer if there
+ * are unmappable characters. This is why max length queries are only available
+ * for the case where there are no unmappable characters.
+ *
+ * When encoding from UTF-8, each `src` buffer _must_ be valid UTF-8. When
+ * encoding from UTF-16, unpaired surrogates in the input are treated as U+FFFD
+ * REPLACEMENT CHARACTERS. Therefore, in order for astral characters not to
+ * turn into a pair of REPLACEMENT CHARACTERS, the caller must ensure that
+ * surrogate pairs are not split across input buffer boundaries.
+ *
+ * After an `encode_*` call returns, the output produced so far, taken as a
+ * whole from the start of the stream, is guaranteed to consist of a valid
+ * byte sequence in the target encoding. (I.e. the code unit sequence for a
+ * character is guaranteed not to be split across output buffers. However, due
+ * to the stateful nature of ISO-2022-JP, the stream needs to be considered
+ * from the start for it to be valid. For other encodings, the validity holds
+ * on a per-output buffer basis.)
+ *
+ * The boolean argument `last` indicates that the end of the stream is reached
+ * when all the characters in `src` have been consumed. This argument is needed
+ * for ISO-2022-JP and is ignored for other encodings.
+ *
+ * An `Encoder` object can be used to incrementally encode a byte stream.
+ *
+ * During the processing of a single stream, the caller must call `encode_*`
+ * zero or more times with `last` set to `false` and then call `encode_*` at
+ * least once with `last` set to `true`. If `encode_*` returns `INPUT_EMPTY`,
+ * the processing of the stream has ended. Otherwise, the caller must call
+ * `encode_*` again with `last` set to `true` (or treat an unmappable result,
+ * i.e. neither `INPUT_EMPTY` nor `OUTPUT_FULL`, as a fatal error).
+ *
+ * Once the stream has ended, the `Encoder` object must not be used anymore.
+ * That is, you need to create another one to process another stream.
+ *
+ * When the encoder returns `OUTPUT_FULL` or the encoder returns an unmappable
+ * result and the caller does not wish to treat it as a fatal error, the input
+ * buffer `src` may not have been completely consumed. In that case, the caller
+ * must pass the unconsumed contents of `src` to `encode_*` again upon the next
+ * call.
+ *
+ * # Infinite loops
+ *
+ * When converting with a fixed-size output buffer whose size is too small to
+ * accommodate one character of output, an infinite loop ensues. When
+ * converting with a fixed-size output buffer, it generally makes sense to
+ * make the buffer fairly large (e.g. couple of kilobytes).
+ */
+class Encoder final {
+ public:
+  ~Encoder() {}
+
+  static inline void operator delete(void* encoder) {
+    encoder_free(reinterpret_cast<Encoder*>(encoder));
+  }
+
+  /**
+   * The `Encoding` this `Encoder` is for.
+   */
+  inline gsl::not_null<const Encoding*> encoding() const {
+    return gsl::not_null<const Encoding*>(encoder_encoding(this));
+  }
+
+  /**
+   * Returns `true` if this is an ISO-2022-JP encoder that's not in the
+   * ASCII state and `false` otherwise.
+   */
+  inline bool has_pending_state() const {
+    return encoder_has_pending_state(this);
+  }
+
+  /**
+   * Query the worst-case output size when encoding from UTF-8 with
+   * replacement.
+   *
+   * Returns the size of the output buffer in bytes that will not overflow
+   * given the current state of the encoder and `byte_length` number of
+   * additional input code units if there are no unmappable characters in
+   * the input or `SIZE_MAX` if `size_t` would overflow.
+   */
+  inline std::optional<size_t> max_buffer_length_from_utf8_if_no_unmappables(
+      size_t byte_length) const {
+    size_t val = encoder_max_buffer_length_from_utf8_if_no_unmappables(
+        this, byte_length);
+    if (val == SIZE_MAX) {
+      return std::nullopt;
+    }
+    return val;
+  }
+
+  /**
+   * Query the worst-case output size when encoding from UTF-8 without
+   * replacement.
+   *
+   * Returns the size of the output buffer in bytes that will not overflow
+   * given the current state of the encoder and `byte_length` number of
+   * additional input code units or `SIZE_MAX` if `size_t` would overflow.
+   */
+  inline std::optional<size_t> max_buffer_length_from_utf8_without_replacement(
+      size_t byte_length) const {
+    size_t val = encoder_max_buffer_length_from_utf8_without_replacement(
+        this, byte_length);
+    if (val == SIZE_MAX) {
+      return std::nullopt;
+    }
+    return val;
+  }
+
+  /**
+   * Incrementally encode into byte stream from UTF-8 with unmappable
+   * characters replaced with HTML (decimal) numeric character references.
+   *
+   * See the documentation of the class for documentation for `encode_*`
+   * methods collectively.
+   */
+  inline std::tuple<uint32_t, size_t, size_t, bool> encode_from_utf8(
+      std::string_view src, gsl::span<uint8_t> dst, bool last) {
+    size_t src_read = src.size();
+    size_t dst_written = dst.size();
+    bool had_replacements;
+    uint32_t result = encoder_encode_from_utf8(
+        this,
+        null_to_bogus<const uint8_t>(
+            reinterpret_cast<const uint8_t*>(src.data())),
+        &src_read, null_to_bogus<uint8_t>(dst.data()), &dst_written, last,
+        &had_replacements);
+    return {result, src_read, dst_written, had_replacements};
+  }
+
+  /**
+   * Incrementally encode into byte stream from UTF-8 _without replacement_.
+   *
+   * See the documentation of the class for documentation for `encode_*`
+   * methods collectively.
+   */
+  inline std::tuple<uint32_t, size_t, size_t>
+  encode_from_utf8_without_replacement(std::string_view src,
+                                       gsl::span<uint8_t> dst, bool last) {
+    size_t src_read = src.size();
+    size_t dst_written = dst.size();
+    uint32_t result = encoder_encode_from_utf8_without_replacement(
+        this,
+        null_to_bogus<const uint8_t>(
+            reinterpret_cast<const uint8_t*>(src.data())),
+        &src_read, null_to_bogus<uint8_t>(dst.data()), &dst_written, last);
+    return {result, src_read, dst_written};
+  }
+
+  /**
+   * Query the worst-case output size when encoding from UTF-16 with
+   * replacement.
+   *
+   * Returns the size of the output buffer in bytes that will not overflow
+   * given the current state of the encoder and `u16_length` number of
+   * additional input code units if there are no unmappable characters in
+   * the input or `SIZE_MAX` if `size_t` would overflow.
+   */
+  inline std::optional<size_t> max_buffer_length_from_utf16_if_no_unmappables(
+      size_t u16_length) const {
+    size_t val = encoder_max_buffer_length_from_utf16_if_no_unmappables(
+        this, u16_length);
+    if (val == SIZE_MAX) {
+      return std::nullopt;
+    }
+    return val;
+  }
+
+  /**
+   * Query the worst-case output size when encoding from UTF-16 without
+   * replacement.
+   *
+   * Returns the size of the output buffer in bytes that will not overflow
+   * given the current state of the encoder and `u16_length` number of
+   * additional input code units or `SIZE_MAX` if `size_t` would overflow.
+   */
+  inline std::optional<size_t> max_buffer_length_from_utf16_without_replacement(
+      size_t u16_length) const {
+    size_t val = encoder_max_buffer_length_from_utf16_without_replacement(
+        this, u16_length);
+    if (val == SIZE_MAX) {
+      return std::nullopt;
+    }
+    return val;
+  }
+
+  /**
+   * Incrementally encode into byte stream from UTF-16 with unmappable
+   * characters replaced with HTML (decimal) numeric character references.
+   *
+   * See the documentation of the class for documentation for `encode_*`
+   * methods collectively.
+   */
+  inline std::tuple<uint32_t, size_t, size_t, bool> encode_from_utf16(
+      std::u16string_view src, gsl::span<uint8_t> dst, bool last) {
+    size_t src_read = src.size();
+    size_t dst_written = dst.size();
+    bool had_replacements;
+    uint32_t result = encoder_encode_from_utf16(
+        this, null_to_bogus<const char16_t>(src.data()), &src_read,
+        null_to_bogus<uint8_t>(dst.data()), &dst_written, last,
+        &had_replacements);
+    return {result, src_read, dst_written, had_replacements};
+  }
+
+  /**
+   * Incrementally encode into byte stream from UTF-16 _without replacement_.
+   *
+   * See the documentation of the class for documentation for `encode_*`
+   * methods collectively.
+   */
+  inline std::tuple<uint32_t, size_t, size_t>
+  encode_from_utf16_without_replacement(std::u16string_view src,
+                                        gsl::span<uint8_t> dst, bool last) {
+    size_t src_read = src.size();
+    size_t dst_written = dst.size();
+    uint32_t result = encoder_encode_from_utf16_without_replacement(
+        this, null_to_bogus<const char16_t>(src.data()), &src_read,
+        null_to_bogus<uint8_t>(dst.data()), &dst_written, last);
+    return {result, src_read, dst_written};
+  }
+
+ private:
+  /**
+   * Replaces `nullptr` with a bogus pointer suitable for use as part of a
+   * zero-length Rust slice.
+   */
+  template <class T>
+  static inline T* null_to_bogus(T* ptr) {
+    return ptr ? ptr : reinterpret_cast<T*>(alignof(T));
+  }
+
+  Encoder() = delete;
+  Encoder(const Encoder&) = delete;
+  Encoder& operator=(const Encoder&) = delete;
+};
+
+/**
+ * An encoding as defined in the Encoding Standard
+ * (https://encoding.spec.whatwg.org/).
+ *
+ * An _encoding_ defines a mapping from a byte sequence to a Unicode code point
+ * sequence and, in most cases, vice versa. Each encoding has a name, an output
+ * encoding, and one or more labels.
+ *
+ * _Labels_ are ASCII-case-insensitive strings that are used to identify an
+ * encoding in formats and protocols. The _name_ of the encoding is the
+ * preferred label in the case appropriate for returning from the
+ * `characterSet` property of the `Document` DOM interface, except for
+ * the replacement encoding whose name is not one of its labels.
+ *
+ * The _output encoding_ is the encoding used for form submission and URL
+ * parsing on Web pages in the encoding. This is UTF-8 for the replacement,
+ * UTF-16LE and UTF-16BE encodings and the encoding itself for other
+ * encodings.
+ *
+ * # Streaming vs. Non-Streaming
+ *
+ * When you have the entire input in a single buffer, you can use the
+ * methods `decode()`, `decode_with_bom_removal()`,
+ * `decode_without_bom_handling()`,
+ * `decode_without_bom_handling_and_without_replacement()` and
+ * `encode()`. Unlike the rest of the API, these methods perform heap
+ * allocations. You should the `Decoder` and `Encoder` objects when your input
+ * is split into multiple buffers or when you want to control the allocation of
+ * the output buffers.
+ *
+ * # Instances
+ *
+ * All instances of `Encoding` are statically allocated and have the process's
+ * lifetime. There is precisely one unique `Encoding` instance for each
+ * encoding defined in the Encoding Standard.
+ *
+ * To obtain a reference to a particular encoding whose identity you know at
+ * compile time, use a `static` that refers to encoding. There is a `static`
+ * for each encoding. The `static`s are named in all caps with hyphens
+ * replaced with underscores and with `_ENCODING` appended to the
+ * name. For example, if you know at compile time that you will want to
+ * decode using the UTF-8 encoding, use the `UTF_8_ENCODING` `static`.
+ *
+ * If you don't know what encoding you need at compile time and need to
+ * dynamically get an encoding by label, use `Encoding::for_label()`.
+ *
+ * Instances of `Encoding` can be compared with `==`.
+ */
+class Encoding final {
+ public:
+  /**
+   * Implements the _get an encoding_ algorithm
+   * (https://encoding.spec.whatwg.org/#concept-encoding-get).
+   *
+   * If, after ASCII-lowercasing and removing leading and trailing
+   * whitespace, the argument matches a label defined in the Encoding
+   * Standard, `const Encoding*` representing the corresponding
+   * encoding is returned. If there is no match, `nullptr` is returned.
+   *
+   * This is the right method to use if the action upon the method returning
+   * `nullptr` is to use a fallback encoding (e.g. `WINDOWS_1252_ENCODING`)
+   * instead. When the action upon the method returning `nullptr` is not to
+   * proceed with a fallback but to refuse processing,
+   * `for_label_no_replacement()` is more appropriate.
+   */
+  static inline const Encoding* for_label(gsl::cstring_span<> label) {
+    return encoding_for_label(
+        null_to_bogus<const uint8_t>(
+            reinterpret_cast<const uint8_t*>(label.data())),
+        label.length());
+  }
+
+  /**
+   * This method behaves the same as `for_label()`, except when `for_label()`
+   * would return `REPLACEMENT_ENCODING`, this method returns `nullptr` instead.
+   *
+   * This method is useful in scenarios where a fatal error is required
+   * upon invalid label, because in those cases the caller typically wishes
+   * to treat the labels that map to the replacement encoding as fatal
+   * errors, too.
+   *
+   * It is not OK to use this method when the action upon the method returning
+   * `nullptr` is to use a fallback encoding (e.g. `WINDOWS_1252_ENCODING`). In
+   * such a case, the `for_label()` method should be used instead in order to
+   * avoid
+   * unsafe fallback for labels that `for_label()` maps to
+   * `REPLACEMENT_ENCODING`.
+   */
+  static inline const Encoding* for_label_no_replacement(
+      gsl::cstring_span<> label) {
+    return encoding_for_label_no_replacement(
+        null_to_bogus<const uint8_t>(
+            reinterpret_cast<const uint8_t*>(label.data())),
+        label.length());
+  }
+
+  /**
+   * Performs non-incremental BOM sniffing.
+   *
+   * The argument must either be a buffer representing the entire input
+   * stream (non-streaming case) or a buffer representing at least the first
+   * three bytes of the input stream (streaming case).
+   *
+   * Returns a std::optinal wrapping `make_tuple(UTF_8_ENCODING, 3)`,
+   * `make_tuple(UTF_16LE_ENCODING, 2)` or `make_tuple(UTF_16BE_ENCODING, 3)`
+   * if the argument starts with the UTF-8, UTF-16LE or UTF-16BE BOM or
+   * `std::nullopt` otherwise.
+   */
+  static inline std::optional<
+      std::tuple<gsl::not_null<const Encoding*>, size_t>>
+  for_bom(gsl::span<const uint8_t> buffer) {
+    size_t len = buffer.size();
+    const Encoding* encoding =
+        encoding_for_bom(null_to_bogus(buffer.data()), &len);
+    if (encoding) {
+      return std::make_tuple(gsl::not_null<const Encoding*>(encoding), len);
+    }
+    return std::nullopt;
+  }
+
+  /**
+   * Returns the name of this encoding.
+   *
+   * This name is appropriate to return as-is from the DOM
+   * `document.characterSet` property.
+   */
+  inline std::string name() const {
+    std::string name(ENCODING_NAME_MAX_LENGTH, '\0');
+    // http://herbsutter.com/2008/04/07/cringe-not-vectors-are-guaranteed-to-be-contiguous/#comment-483
+    size_t length = encoding_name(this, reinterpret_cast<uint8_t*>(&name[0]));
+    name.resize(length);
+    return name;
+  }
+
+  /**
+   * Checks whether the _output encoding_ of this encoding can encode every
+   * Unicode code point. (Only true if the output encoding is UTF-8.)
+   */
+  inline bool can_encode_everything() const {
+    return encoding_can_encode_everything(this);
+  }
+
+  /**
+   * Checks whether the bytes 0x00...0x7F map exclusively to the characters
+   * U+0000...U+007F and vice versa.
+   */
+  inline bool is_ascii_compatible() const {
+    return encoding_is_ascii_compatible(this);
+  }
+
+  /**
+   * Checks whether this encoding maps one byte to one Basic Multilingual
+   * Plane code point (i.e. byte length equals decoded UTF-16 length) and
+   * vice versa (for mappable characters).
+   *
+   * `true` iff this encoding is on the list of Legacy single-byte
+   * encodings (https://encoding.spec.whatwg.org/#legacy-single-byte-encodings)
+   * in the spec or x-user-defined.
+   */
+  inline bool is_single_byte() const { return encoding_is_single_byte(this); }
+
+  /**
+   * Returns the _output encoding_ of this encoding. This is UTF-8 for
+   * UTF-16BE, UTF-16LE and replacement and the encoding itself otherwise.
+   */
+  inline gsl::not_null<const Encoding*> output_encoding() const {
+    return gsl::not_null<const Encoding*>(encoding_output_encoding(this));
+  }
+
+  /**
+   * Decode complete input to `std::string` _with BOM sniffing_ and with
+   * malformed sequences replaced with the REPLACEMENT CHARACTER when the
+   * entire input is available as a single buffer (i.e. the end of the
+   * buffer marks the end of the stream).
+   *
+   * This method implements the (non-streaming version of) the
+   * _decode_ (https://encoding.spec.whatwg.org/#decode) spec concept.
+   *
+   * The second item in the returned tuple is the encoding that was actually
+   * used (which may differ from this encoding thanks to BOM sniffing).
+   *
+   * The third item in the returned tuple indicates whether there were
+   * malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use `new_decoder()`
+   * when decoding segmented input.
+   */
+  inline std::tuple<std::string, gsl::not_null<const Encoding*>, bool> decode(
+      gsl::span<const uint8_t> bytes) const {
+    auto opt = Encoding::for_bom(bytes);
+    const Encoding* encoding;
+    if (opt) {
+      size_t bom_length;
+      std::tie(encoding, bom_length) = *opt;
+      bytes = bytes.subspan(bom_length);
+    } else {
+      encoding = this;
+    }
+    auto [str, had_errors] = encoding->decode_without_bom_handling(bytes);
+    return {str, gsl::not_null<const Encoding*>(encoding), had_errors};
+  }
+
+  /**
+   * Decode complete input to `std::string` _with BOM removal_ and with
+   * malformed sequences replaced with the REPLACEMENT CHARACTER when the
+   * entire input is available as a single buffer (i.e. the end of the
+   * buffer marks the end of the stream).
+   *
+   * When invoked on `UTF_8`, this method implements the (non-streaming
+   * version of) the _UTF-8 decode_
+   * (https://encoding.spec.whatwg.org/#utf-8-decode) spec concept.
+   *
+   * The second item in the returned pair indicates whether there were
+   * malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use
+   * `new_decoder_with_bom_removal()` when decoding segmented input.
+   */
+  inline std::tuple<std::string, bool> decode_with_bom_removal(
+      gsl::span<const uint8_t> bytes) const {
+    if (this == UTF_8_ENCODING && bytes.size() >= 3 &&
+        (gsl::as_bytes(bytes.first<3>()) ==
+         gsl::as_bytes(gsl::make_span("\xEF\xBB\xBF")))) {
+      bytes = bytes.subspan(3, bytes.size() - 3);
+    } else if (this == UTF_16LE_ENCODING && bytes.size() >= 2 &&
+               (gsl::as_bytes(bytes.first<2>()) ==
+                gsl::as_bytes(gsl::make_span("\xFF\xFE")))) {
+      bytes = bytes.subspan(2, bytes.size() - 2);
+    } else if (this == UTF_16BE_ENCODING && bytes.size() >= 2 &&
+               (gsl::as_bytes(bytes.first<2>()) ==
+                gsl::as_bytes(gsl::make_span("\xFE\xFF")))) {
+      bytes = bytes.subspan(2, bytes.size() - 2);
+    }
+    return decode_without_bom_handling(bytes);
+  }
+
+  /**
+   * Decode complete input to `std::string` _without BOM handling_ and
+   * with malformed sequences replaced with the REPLACEMENT CHARACTER when
+   * the entire input is available as a single buffer (i.e. the end of the
+   * buffer marks the end of the stream).
+   *
+   * When invoked on `UTF_8`, this method implements the (non-streaming
+   * version of) the _UTF-8 decode without BOM_
+   * (https://encoding.spec.whatwg.org/#utf-8-decode-without-bom) spec concept.
+   *
+   * The second item in the returned pair indicates whether there were
+   * malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use
+   * `new_decoder_without_bom_handling()` when decoding segmented input.
+   */
+  inline std::tuple<std::string, bool> decode_without_bom_handling(
+      gsl::span<const uint8_t> bytes) const {
+    auto decoder = new_decoder_without_bom_handling();
+    auto needed = decoder->max_utf8_buffer_length(bytes.size());
+    if (!needed) {
+      throw std::overflow_error("Overflow in buffer size computation.");
+    }
+    std::string string(needed.value(), '\0');
+    const auto [result, read, written, had_errors] = decoder->decode_to_utf8(
+        bytes,
+        gsl::make_span(reinterpret_cast<uint8_t*>(&string[0]), string.size()),
+        true);
+    assert(read == static_cast<size_t>(bytes.size()));
+    assert(written <= static_cast<size_t>(string.size()));
+    assert(result == INPUT_EMPTY);
+    string.resize(written);
+    return {string, had_errors};
+  }
+
+  /**
+   * Decode complete input to `std::string` _without BOM handling_ and
+   * _with malformed sequences treated as fatal_ when the entire input is
+   * available as a single buffer (i.e. the end of the buffer marks the end
+   * of the stream).
+   *
+   * When invoked on `UTF_8`, this method implements the (non-streaming
+   * version of) the _UTF-8 decode without BOM or fail_
+   * (https://encoding.spec.whatwg.org/#utf-8-decode-without-bom-or-fail)
+   * spec concept.
+   *
+   * Returns `std::nullopt` if a malformed sequence was encountered and the result
+   * of the decode as `std::optional<std::string>` otherwise.
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use
+   * `new_decoder_without_bom_handling()` when decoding segmented input.
+   */
+  inline std::optional<std::string>
+  decode_without_bom_handling_and_without_replacement(
+      gsl::span<const uint8_t> bytes) const {
+    auto decoder = new_decoder_without_bom_handling();
+    auto needed =
+        decoder->max_utf8_buffer_length_without_replacement(bytes.size());
+    if (!needed) {
+      throw std::overflow_error("Overflow in buffer size computation.");
+    }
+    std::string string(needed.value(), '\0');
+    const auto [result, read, written] =
+        decoder->decode_to_utf8_without_replacement(
+            bytes,
+            gsl::make_span(reinterpret_cast<uint8_t*>(&string[0]),
+                           string.size()),
+            true);
+    assert(result != OUTPUT_FULL);
+    if (result == INPUT_EMPTY) {
+      assert(read == static_cast<size_t>(bytes.size()));
+      assert(written <= static_cast<size_t>(string.size()));
+      string.resize(written);
+      return string;
+    }
+    return std::nullopt;
+  }
+
+  /**
+   * Decode complete input to `std::u16string` _with BOM sniffing_ and with
+   * malformed sequences replaced with the REPLACEMENT CHARACTER when the
+   * entire input is available as a single buffer (i.e. the end of the
+   * buffer marks the end of the stream).
+   *
+   * This method implements the (non-streaming version of) the
+   * _decode_ (https://encoding.spec.whatwg.org/#decode) spec concept.
+   *
+   * The second item in the returned tuple is the encoding that was actually
+   * used (which may differ from this encoding thanks to BOM sniffing).
+   *
+   * The third item in the returned tuple indicates whether there were
+   * malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use `new_decoder()`
+   * when decoding segmented input.
+   */
+  inline std::tuple<std::u16string, gsl::not_null<const Encoding*>, bool>
+  decode16(gsl::span<const uint8_t> bytes) const {
+    auto opt = Encoding::for_bom(bytes);
+    const Encoding* encoding;
+    if (opt) {
+      size_t bom_length;
+      std::tie(encoding, bom_length) = *opt;
+      bytes = bytes.subspan(bom_length);
+    } else {
+      encoding = this;
+    }
+    auto [str, had_errors] = encoding->decode16_without_bom_handling(bytes);
+    return {str, gsl::not_null<const Encoding*>(encoding), had_errors};
+  }
+
+  /**
+   * Decode complete input to `std::u16string` _with BOM removal_ and with
+   * malformed sequences replaced with the REPLACEMENT CHARACTER when the
+   * entire input is available as a single buffer (i.e. the end of the
+   * buffer marks the end of the stream).
+   *
+   * When invoked on `UTF_8`, this method implements the (non-streaming
+   * version of) the _UTF-8 decode_
+   * (https://encoding.spec.whatwg.org/#utf-8-decode) spec concept.
+   *
+   * The second item in the returned pair indicates whether there were
+   * malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use
+   * `new_decoder_with_bom_removal()` when decoding segmented input.
+   */
+  inline std::tuple<std::u16string, bool> decode16_with_bom_removal(
+      gsl::span<const uint8_t> bytes) const {
+    if (this == UTF_8_ENCODING && bytes.size() >= 3 &&
+        (gsl::as_bytes(bytes.first<3>()) ==
+         gsl::as_bytes(gsl::make_span("\xEF\xBB\xBF")))) {
+      bytes = bytes.subspan(3, bytes.size() - 3);
+    } else if (this == UTF_16LE_ENCODING && bytes.size() >= 2 &&
+               (gsl::as_bytes(bytes.first<2>()) ==
+                gsl::as_bytes(gsl::make_span("\xFF\xFE")))) {
+      bytes = bytes.subspan(2, bytes.size() - 2);
+    } else if (this == UTF_16BE_ENCODING && bytes.size() >= 2 &&
+               (gsl::as_bytes(bytes.first<2>()) ==
+                gsl::as_bytes(gsl::make_span("\xFE\xFF")))) {
+      bytes = bytes.subspan(2, bytes.size() - 2);
+    }
+    return decode16_without_bom_handling(bytes);
+  }
+
+  /**
+   * Decode complete input to `std::u16string` _without BOM handling_ and
+   * with malformed sequences replaced with the REPLACEMENT CHARACTER when
+   * the entire input is available as a single buffer (i.e. the end of the
+   * buffer marks the end of the stream).
+   *
+   * When invoked on `UTF_8`, this method implements the (non-streaming
+   * version of) the _UTF-8 decode without BOM_
+   * (https://encoding.spec.whatwg.org/#utf-8-decode-without-bom) spec concept.
+   *
+   * The second item in the returned pair indicates whether there were
+   * malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use
+   * `new_decoder_without_bom_handling()` when decoding segmented input.
+   */
+  inline std::tuple<std::u16string, bool> decode16_without_bom_handling(
+      gsl::span<const uint8_t> bytes) const {
+    auto decoder = new_decoder_without_bom_handling();
+    auto needed = decoder->max_utf16_buffer_length(bytes.size());
+    if (!needed) {
+      throw std::overflow_error("Overflow in buffer size computation.");
+    }
+    std::u16string string(needed.value(), '\0');
+    const auto [result, read, written, had_errors] = decoder->decode_to_utf16(
+        bytes, gsl::make_span(&string[0], string.size()), true);
+    assert(read == static_cast<size_t>(bytes.size()));
+    assert(written <= static_cast<size_t>(string.size()));
+    assert(result == INPUT_EMPTY);
+    string.resize(written);
+    return {string, had_errors};
+  }
+
+  /**
+   * Decode complete input to `std::u16string` _without BOM handling_ and
+   * _with malformed sequences treated as fatal_ when the entire input is
+   * available as a single buffer (i.e. the end of the buffer marks the end
+   * of the stream).
+   *
+   * When invoked on `UTF_8`, this method implements the (non-streaming
+   * version of) the _UTF-8 decode without BOM or fail_
+   * (https://encoding.spec.whatwg.org/#utf-8-decode-without-bom-or-fail)
+   * spec concept.
+   *
+   * Returns `std::nullopt` if a malformed sequence was encountered and the result
+   * of the decode as `std::optional<std::u16string>` otherwise.
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use
+   * `new_decoder_without_bom_handling()` when decoding segmented input.
+   */
+  inline std::optional<std::u16string>
+  decode16_without_bom_handling_and_without_replacement(
+      gsl::span<const uint8_t> bytes) const {
+    auto decoder = new_decoder_without_bom_handling();
+    auto needed = decoder->max_utf16_buffer_length(bytes.size());
+    if (!needed) {
+      throw std::overflow_error("Overflow in buffer size computation.");
+    }
+    std::u16string string(needed.value(), '\0');
+    const auto [result, read, written] =
+        decoder->decode_to_utf16_without_replacement(
+            bytes, gsl::make_span(&string[0], string.size()), true);
+    assert(result != OUTPUT_FULL);
+    if (result == INPUT_EMPTY) {
+      assert(read == static_cast<size_t>(bytes.size()));
+      assert(written <= static_cast<size_t>(string.size()));
+      string.resize(written);
+      return string;
+    }
+    return std::nullopt;
+  }
+
+  /**
+   * Encode complete input to `std::vector<uint8_t>` with unmappable characters
+   * replaced with decimal numeric character references when the entire input
+   * is available as a single buffer (i.e. the end of the buffer marks the
+   * end of the stream).
+   *
+   * This method implements the (non-streaming version of) the
+   * _encode_ (https://encoding.spec.whatwg.org/#encode) spec concept.
+   *
+   * The second item in the returned tuple is the encoding that was actually
+   * used (which may differ from this encoding thanks to some encodings
+   * having UTF-8 as their output encoding).
+   *
+   * The third item in the returned tuple indicates whether there were
+   * unmappable characters (that were replaced with HTML numeric character
+   * references).
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use `new_encoder()`
+   * when encoding segmented output.
+   */
+  inline std::tuple<std::vector<uint8_t>, gsl::not_null<const Encoding*>, bool>
+  encode(std::string_view string) const {
+    auto output_enc = output_encoding();
+    if (output_enc == UTF_8_ENCODING) {
+      std::vector<uint8_t> vec(string.size());
+      std::memcpy(&vec[0], string.data(), string.size());
+    }
+    auto encoder = output_enc->new_encoder();
+    auto needed =
+        encoder->max_buffer_length_from_utf8_if_no_unmappables(string.size());
+    if (!needed) {
+      throw std::overflow_error("Overflow in buffer size computation.");
+    }
+    std::vector<uint8_t> vec(needed.value());
+    bool total_had_errors = false;
+    size_t total_read = 0;
+    size_t total_written = 0;
+    for (;;) {
+      const auto [result, read, written, had_errors] =
+          encoder->encode_from_utf8(string.substr(total_read),
+                                    gsl::make_span(vec).subspan(total_written),
+                                    true);
+      total_read += read;
+      total_written += written;
+      total_had_errors |= had_errors;
+      if (result == INPUT_EMPTY) {
+        assert(total_read == static_cast<size_t>(string.size()));
+        assert(total_written <= static_cast<size_t>(vec.size()));
+        vec.resize(total_written);
+        return {vec, gsl::not_null<const Encoding*>(output_enc),
+                total_had_errors};
+      }
+      auto needed = encoder->max_buffer_length_from_utf8_if_no_unmappables(
+          string.size() - total_read);
+      if (!needed) {
+        throw std::overflow_error("Overflow in buffer size computation.");
+      }
+      vec.resize(total_written + needed.value());
+    }
+  }
+
+  /**
+   * Encode complete input to `std::vector<uint8_t>` with unmappable characters
+   * replaced with decimal numeric character references when the entire input
+   * is available as a single buffer (i.e. the end of the buffer marks the
+   * end of the stream).
+   *
+   * This method implements the (non-streaming version of) the
+   * _encode_ (https://encoding.spec.whatwg.org/#encode) spec concept.
+   *
+   * The second item in the returned tuple is the encoding that was actually
+   * used (which may differ from this encoding thanks to some encodings
+   * having UTF-8 as their output encoding).
+   *
+   * The third item in the returned tuple indicates whether there were
+   * unmappable characters (that were replaced with HTML numeric character
+   * references).
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use `new_encoder()`
+   * when encoding segmented output.
+   */
+  inline std::tuple<std::vector<uint8_t>, gsl::not_null<const Encoding*>, bool>
+  encode(std::u16string_view string) const {
+    auto output_enc = output_encoding();
+    auto encoder = output_enc->new_encoder();
+    auto needed =
+        encoder->max_buffer_length_from_utf16_if_no_unmappables(string.size());
+    if (!needed) {
+      throw std::overflow_error("Overflow in buffer size computation.");
+    }
+    std::vector<uint8_t> vec(needed.value());
+    bool total_had_errors = false;
+    size_t total_read = 0;
+    size_t total_written = 0;
+    for (;;) {
+      const auto [result, read, written, had_errors] =
+          encoder->encode_from_utf16(string.substr(total_read),
+                                     gsl::make_span(vec).subspan(total_written),
+                                     true);
+      total_read += read;
+      total_written += written;
+      total_had_errors |= had_errors;
+      if (result == INPUT_EMPTY) {
+        assert(total_read == static_cast<size_t>(string.size()));
+        assert(total_written <= static_cast<size_t>(vec.size()));
+        vec.resize(total_written);
+        return {vec, gsl::not_null<const Encoding*>(output_enc),
+                total_had_errors};
+      }
+      auto needed = encoder->max_buffer_length_from_utf16_if_no_unmappables(
+          string.size() - total_read);
+      if (!needed) {
+        throw std::overflow_error("Overflow in buffer size computation.");
+      }
+      vec.resize(total_written + needed.value());
+    }
+  }
+
+  /**
+   * Instantiates a new decoder for this encoding with BOM sniffing enabled.
+   *
+   * BOM sniffing may cause the returned decoder to morph into a decoder
+   * for UTF-8, UTF-16LE or UTF-16BE instead of this encoding.
+   */
+  inline std::unique_ptr<Decoder> new_decoder() const {
+    return std::unique_ptr<Decoder>(encoding_new_decoder(this));
+  }
+
+  /**
+   * Instantiates a new decoder for this encoding with BOM sniffing enabled
+   * into memory occupied by a previously-instantiated decoder.
+   *
+   * BOM sniffing may cause the returned decoder to morph into a decoder
+   * for UTF-8, UTF-16LE or UTF-16BE instead of this encoding.
+   */
+  inline void new_decoder_into(Decoder& decoder) const {
+    encoding_new_decoder_into(this, &decoder);
+  }
+
+  /**
+   * Instantiates a new decoder for this encoding with BOM removal.
+   *
+   * If the input starts with bytes that are the BOM for this encoding,
+   * those bytes are removed. However, the decoder never morphs into a
+   * decoder for another encoding: A BOM for another encoding is treated as
+   * (potentially malformed) input to the decoding algorithm for this
+   * encoding.
+   */
+  inline std::unique_ptr<Decoder> new_decoder_with_bom_removal() const {
+    return std::unique_ptr<Decoder>(
+        encoding_new_decoder_with_bom_removal(this));
+  }
+
+  /**
+   * Instantiates a new decoder for this encoding with BOM removal
+   * into memory occupied by a previously-instantiated decoder.
+   *
+   * If the input starts with bytes that are the BOM for this encoding,
+   * those bytes are removed. However, the decoder never morphs into a
+   * decoder for another encoding: A BOM for another encoding is treated as
+   * (potentially malformed) input to the decoding algorithm for this
+   * encoding.
+   */
+  inline void new_decoder_with_bom_removal_into(Decoder& decoder) const {
+    encoding_new_decoder_with_bom_removal_into(this, &decoder);
+  }
+
+  /**
+   * Instantiates a new decoder for this encoding with BOM handling disabled.
+   *
+   * If the input starts with bytes that look like a BOM, those bytes are
+   * not treated as a BOM. (Hence, the decoder never morphs into a decoder
+   * for another encoding.)
+   *
+   * _Note:_ If the caller has performed BOM sniffing on its own but has not
+   * removed the BOM, the caller should use `new_decoder_with_bom_removal()`
+   * instead of this method to cause the BOM to be removed.
+   */
+  inline std::unique_ptr<Decoder> new_decoder_without_bom_handling() const {
+    return std::unique_ptr<Decoder>(
+        encoding_new_decoder_without_bom_handling(this));
+  }
+
+  /**
+   * Instantiates a new decoder for this encoding with BOM handling disabled
+   * into memory occupied by a previously-instantiated decoder.
+   *
+   * If the input starts with bytes that look like a BOM, those bytes are
+   * not treated as a BOM. (Hence, the decoder never morphs into a decoder
+   * for another encoding.)
+   *
+   * _Note:_ If the caller has performed BOM sniffing on its own but has not
+   * removed the BOM, the caller should use
+   * `new_decoder_with_bom_removal_into()`
+   * instead of this method to cause the BOM to be removed.
+   */
+  inline void new_decoder_without_bom_handling_into(Decoder& decoder) const {
+    encoding_new_decoder_without_bom_handling_into(this, &decoder);
+  }
+
+  /**
+   * Instantiates a new encoder for the output encoding of this encoding.
+   */
+  inline std::unique_ptr<Encoder> new_encoder() const {
+    return std::unique_ptr<Encoder>(encoding_new_encoder(this));
+  }
+
+  /**
+   * Instantiates a new encoder for the output encoding of this encoding
+   * into memory occupied by a previously-instantiated encoder.
+   */
+  inline void new_encoder_into(Encoder& encoder) const {
+    encoding_new_encoder_into(this, &encoder);
+  }
+
+  /**
+   * Validates UTF-8.
+   *
+   * Returns the index of the first byte that makes the input malformed as
+   * UTF-8 or the length of the input if the input is entirely valid.
+   */
+  static inline size_t utf8_valid_up_to(gsl::span<const uint8_t> buffer) {
+    return encoding_utf8_valid_up_to(
+        null_to_bogus<const uint8_t>(buffer.data()), buffer.size());
+  }
+
+  /**
+   * Validates ASCII.
+   *
+   * Returns the index of the first byte that makes the input malformed as
+   * ASCII or the length of the input if the input is entirely valid.
+   */
+  static inline size_t ascii_valid_up_to(gsl::span<const uint8_t> buffer) {
+    return encoding_ascii_valid_up_to(
+        null_to_bogus<const uint8_t>(buffer.data()), buffer.size());
+  }
+
+  /**
+   * Validates ISO-2022-JP ASCII-state data.
+   *
+   * Returns the index of the first byte that makes the input not
+   * representable in the ASCII state of ISO-2022-JP or the length of the
+   * input if the input is entirely representable in the ASCII state of
+   * ISO-2022-JP.
+   */
+  static inline size_t iso_2022_jp_ascii_valid_up_to(
+      gsl::span<const uint8_t> buffer) {
+    return encoding_iso_2022_jp_ascii_valid_up_to(
+        null_to_bogus<const uint8_t>(buffer.data()), buffer.size());
+  }
+
+ private:
+  /**
+   * Replaces `nullptr` with a bogus pointer suitable for use as part of a
+   * zero-length Rust slice.
+   */
+  template <class T>
+  static inline T* null_to_bogus(T* ptr) {
+    return ptr ? ptr : reinterpret_cast<T*>(alignof(T));
+  }
+
+  Encoding() = delete;
+  Encoding(const Encoding&) = delete;
+  Encoding& operator=(const Encoding&) = delete;
+  ~Encoding() = delete;
+};
+
+};  // namespace encoding_rs
+
+#endif  // encoding_rs_cpp_h_
diff --git a/third_party/rust/encoding_c/include/encoding_rs_statics.h b/third_party/rust/encoding_c/include/encoding_rs_statics.h
new file mode 100644
index 0000000000..c3e84d586e
--- /dev/null
+++ b/third_party/rust/encoding_c/include/encoding_rs_statics.h
@@ -0,0 +1,171 @@
+// Copyright Mozilla Foundation. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+// Instead, please regenerate using generate-encoding-data.py
+
+// This file is not meant to be included directly. Instead, encoding_rs.h
+// includes this file.
+
+#ifndef encoding_rs_statics_h_
+#define encoding_rs_statics_h_
+
+#ifndef ENCODING_RS_ENCODING
+#define ENCODING_RS_ENCODING Encoding
+#ifndef __cplusplus
+typedef struct Encoding_ Encoding;
+#endif
+#endif
+
+#ifndef ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR
+#define ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const ENCODING_RS_ENCODING*
+#endif
+
+#ifndef ENCODING_RS_ENCODER
+#define ENCODING_RS_ENCODER Encoder
+#ifndef __cplusplus
+typedef struct Encoder_ Encoder;
+#endif
+#endif
+
+#ifndef ENCODING_RS_DECODER
+#define ENCODING_RS_DECODER Decoder
+#ifndef __cplusplus
+typedef struct Decoder_ Decoder;
+#endif
+#endif
+
+#define INPUT_EMPTY 0
+
+#define OUTPUT_FULL 0xFFFFFFFF
+
+// x-mac-cyrillic
+#define ENCODING_NAME_MAX_LENGTH 14
+
+/// The Big5 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const BIG5_ENCODING;
+
+/// The EUC-JP encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const EUC_JP_ENCODING;
+
+/// The EUC-KR encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const EUC_KR_ENCODING;
+
+/// The GBK encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const GBK_ENCODING;
+
+/// The IBM866 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const IBM866_ENCODING;
+
+/// The ISO-2022-JP encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const ISO_2022_JP_ENCODING;
+
+/// The ISO-8859-10 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const ISO_8859_10_ENCODING;
+
+/// The ISO-8859-13 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const ISO_8859_13_ENCODING;
+
+/// The ISO-8859-14 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const ISO_8859_14_ENCODING;
+
+/// The ISO-8859-15 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const ISO_8859_15_ENCODING;
+
+/// The ISO-8859-16 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const ISO_8859_16_ENCODING;
+
+/// The ISO-8859-2 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const ISO_8859_2_ENCODING;
+
+/// The ISO-8859-3 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const ISO_8859_3_ENCODING;
+
+/// The ISO-8859-4 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const ISO_8859_4_ENCODING;
+
+/// The ISO-8859-5 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const ISO_8859_5_ENCODING;
+
+/// The ISO-8859-6 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const ISO_8859_6_ENCODING;
+
+/// The ISO-8859-7 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const ISO_8859_7_ENCODING;
+
+/// The ISO-8859-8 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const ISO_8859_8_ENCODING;
+
+/// The ISO-8859-8-I encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const ISO_8859_8_I_ENCODING;
+
+/// The KOI8-R encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const KOI8_R_ENCODING;
+
+/// The KOI8-U encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const KOI8_U_ENCODING;
+
+/// The Shift_JIS encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const SHIFT_JIS_ENCODING;
+
+/// The UTF-16BE encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const UTF_16BE_ENCODING;
+
+/// The UTF-16LE encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const UTF_16LE_ENCODING;
+
+/// The UTF-8 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const UTF_8_ENCODING;
+
+/// The gb18030 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const GB18030_ENCODING;
+
+/// The macintosh encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const MACINTOSH_ENCODING;
+
+/// The replacement encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const REPLACEMENT_ENCODING;
+
+/// The windows-1250 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const WINDOWS_1250_ENCODING;
+
+/// The windows-1251 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const WINDOWS_1251_ENCODING;
+
+/// The windows-1252 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const WINDOWS_1252_ENCODING;
+
+/// The windows-1253 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const WINDOWS_1253_ENCODING;
+
+/// The windows-1254 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const WINDOWS_1254_ENCODING;
+
+/// The windows-1255 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const WINDOWS_1255_ENCODING;
+
+/// The windows-1256 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const WINDOWS_1256_ENCODING;
+
+/// The windows-1257 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const WINDOWS_1257_ENCODING;
+
+/// The windows-1258 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const WINDOWS_1258_ENCODING;
+
+/// The windows-874 encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const WINDOWS_874_ENCODING;
+
+/// The x-mac-cyrillic encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const X_MAC_CYRILLIC_ENCODING;
+
+/// The x-user-defined encoding.
+extern ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR const X_USER_DEFINED_ENCODING;
+
+#endif // encoding_rs_statics_h_