Merging upstream version 1.76.0+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-06-19 09:26:03 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-06-19 09:26:03 +0000
commit: 9918693037dce8aa4bb6f08741b6812923486c18 (patch)
tree: 21d2b40bec7e6a7ea664acee056eb3d08e15a1cf /vendor/fst/src/raw
parent: Releasing progress-linux version 1.75.0+dfsg1-5~progress7.99u1. (diff)
download: rustc-9918693037dce8aa4bb6f08741b6812923486c18.tar.xz
rustc-9918693037dce8aa4bb6f08741b6812923486c18.zip
12 files changed, 5008 insertions, 0 deletions
diff --git a/vendor/fst/src/raw/build.rs b/vendor/fst/src/raw/build.rs
new file mode 100644
index 000000000..e93626b27
--- /dev/null
+++ b/vendor/fst/src/raw/build.rs
@@ -0,0 +1,474 @@
+use std::io;
+
+use crate::bytes;
+use crate::error::Result;
+use crate::raw::counting_writer::CountingWriter;
+use crate::raw::error::Error;
+use crate::raw::registry::{Registry, RegistryEntry};
+use crate::raw::{
+    CompiledAddr, Fst, FstType, Output, Transition, EMPTY_ADDRESS,
+    NONE_ADDRESS, VERSION,
+};
+// use raw::registry_minimal::{Registry, RegistryEntry};
+use crate::stream::{IntoStreamer, Streamer};
+
+/// A builder for creating a finite state transducer.
+///
+/// This is not your average everyday builder. It has two important qualities
+/// that make it a bit unique from what you might expect:
+///
+/// 1. All keys must be added in lexicographic order. Adding a key out of order
+///    will result in an error. Additionally, adding a duplicate key with an
+///    output value will also result in an error. That is, once a key is
+///    associated with a value, that association can never be modified or
+///    deleted.
+/// 2. The representation of an fst is streamed to *any* `io::Write` as it is
+///    built. For an in memory representation, this can be a `Vec<u8>`.
+///
+/// Point (2) is especially important because it means that an fst can be
+/// constructed *without storing the entire fst in memory*. Namely, since it
+/// works with any `io::Write`, it can be streamed directly to a file.
+///
+/// With that said, the builder does use memory, but **memory usage is bounded
+/// to a constant size**. The amount of memory used trades off with the
+/// compression ratio. Currently, the implementation hard codes this trade off
+/// which can result in about 5-20MB of heap usage during construction. (N.B.
+/// Guaranteeing a maximal compression ratio requires memory proportional to
+/// the size of the fst, which defeats some of the benefit of streaming
+/// it to disk. In practice, a small bounded amount of memory achieves
+/// close-to-minimal compression ratios.)
+///
+/// The algorithmic complexity of fst construction is `O(n)` where `n` is the
+/// number of elements added to the fst.
+pub struct Builder<W> {
+    /// The FST raw data is written directly to `wtr`.
+    ///
+    /// No internal buffering is done.
+    wtr: CountingWriter<W>,
+    /// The stack of unfinished nodes.
+    ///
+    /// An unfinished node is a node that could potentially have a new
+    /// transition added to it when a new word is added to the dictionary.
+    unfinished: UnfinishedNodes,
+    /// A map of finished nodes.
+    ///
+    /// A finished node is one that has been compiled and written to `wtr`.
+    /// After this point, the node is considered immutable and will never
+    /// change.
+    registry: Registry,
+    /// The last word added.
+    ///
+    /// This is used to enforce the invariant that words are added in sorted
+    /// order.
+    last: Option<Vec<u8>>,
+    /// The address of the last compiled node.
+    ///
+    /// This is used to optimize states with one transition that point
+    /// to the previously compiled node. (The previously compiled node in
+    /// this case actually corresponds to the next state for the transition,
+    /// since states are compiled in reverse.)
+    last_addr: CompiledAddr,
+    /// The number of keys added.
+    len: usize,
+}
+
+#[derive(Debug)]
+struct UnfinishedNodes {
+    stack: Vec<BuilderNodeUnfinished>,
+}
+
+#[derive(Debug)]
+struct BuilderNodeUnfinished {
+    node: BuilderNode,
+    last: Option<LastTransition>,
+}
+
+#[derive(Debug, Hash, Eq, PartialEq)]
+pub struct BuilderNode {
+    pub is_final: bool,
+    pub final_output: Output,
+    pub trans: Vec<Transition>,
+}
+
+#[derive(Debug)]
+struct LastTransition {
+    inp: u8,
+    out: Output,
+}
+
+impl Builder<Vec<u8>> {
+    /// Create a builder that builds an fst in memory.
+    #[inline]
+    pub fn memory() -> Builder<Vec<u8>> {
+        Builder::new(Vec::with_capacity(10 * (1 << 10))).unwrap()
+    }
+
+    /// Finishes construction of the FST and returns it.
+    #[inline]
+    pub fn into_fst(self) -> Fst<Vec<u8>> {
+        self.into_inner().and_then(Fst::new).unwrap()
+    }
+}
+
+impl<W: io::Write> Builder<W> {
+    /// Create a builder that builds an fst by writing it to `wtr` in a
+    /// streaming fashion.
+    pub fn new(wtr: W) -> Result<Builder<W>> {
+        Builder::new_type(wtr, 0)
+    }
+
+    /// The same as `new`, except it sets the type of the fst to the type
+    /// given.
+    pub fn new_type(wtr: W, ty: FstType) -> Result<Builder<W>> {
+        let mut wtr = CountingWriter::new(wtr);
+        // Don't allow any nodes to have address 0-7. We use these to encode
+        // the API version. We also use addresses `0` and `1` as special
+        // sentinel values, so they should never correspond to a real node.
+        bytes::io_write_u64_le(VERSION, &mut wtr)?;
+        // Similarly for 8-15 for the fst type.
+        bytes::io_write_u64_le(ty, &mut wtr)?;
+        Ok(Builder {
+            wtr,
+            unfinished: UnfinishedNodes::new(),
+            registry: Registry::new(10_000, 2),
+            last: None,
+            last_addr: NONE_ADDRESS,
+            len: 0,
+        })
+    }
+
+    /// Adds a byte string to this FST with a zero output value.
+    pub fn add<B>(&mut self, bs: B) -> Result<()>
+    where
+        B: AsRef<[u8]>,
+    {
+        self.check_last_key(bs.as_ref(), false)?;
+        self.insert_output(bs, None)
+    }
+
+    /// Insert a new key-value pair into the fst.
+    ///
+    /// Keys must be convertible to byte strings. Values must be a `u64`, which
+    /// is a restriction of the current implementation of finite state
+    /// transducers. (Values may one day be expanded to other types.)
+    ///
+    /// If a key is inserted that is less than or equal to any previous key
+    /// added, then an error is returned. Similarly, if there was a problem
+    /// writing to the underlying writer, an error is returned.
+    pub fn insert<B>(&mut self, bs: B, val: u64) -> Result<()>
+    where
+        B: AsRef<[u8]>,
+    {
+        self.check_last_key(bs.as_ref(), true)?;
+        self.insert_output(bs, Some(Output::new(val)))
+    }
+
+    /// Calls insert on each item in the iterator.
+    ///
+    /// If an error occurred while adding an element, processing is stopped
+    /// and the error is returned.
+    ///
+    /// If a key is inserted that is less than or equal to any previous key
+    /// added, then an error is returned. Similarly, if there was a problem
+    /// writing to the underlying writer, an error is returned.
+    pub fn extend_iter<T, I>(&mut self, iter: I) -> Result<()>
+    where
+        T: AsRef<[u8]>,
+        I: IntoIterator<Item = (T, Output)>,
+    {
+        for (key, out) in iter {
+            self.insert(key, out.value())?;
+        }
+        Ok(())
+    }
+
+    /// Calls insert on each item in the stream.
+    ///
+    /// Note that unlike `extend_iter`, this is not generic on the items in
+    /// the stream.
+    ///
+    /// If a key is inserted that is less than or equal to any previous key
+    /// added, then an error is returned. Similarly, if there was a problem
+    /// writing to the underlying writer, an error is returned.
+    pub fn extend_stream<'f, I, S>(&mut self, stream: I) -> Result<()>
+    where
+        I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], Output)>,
+        S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], Output)>,
+    {
+        let mut stream = stream.into_stream();
+        while let Some((key, out)) = stream.next() {
+            self.insert(key, out.value())?;
+        }
+        Ok(())
+    }
+
+    /// Finishes the construction of the fst and flushes the underlying
+    /// writer. After completion, the data written to `W` may be read using
+    /// one of `Fst`'s constructor methods.
+    pub fn finish(self) -> Result<()> {
+        self.into_inner()?;
+        Ok(())
+    }
+
+    /// Just like `finish`, except it returns the underlying writer after
+    /// flushing it.
+    pub fn into_inner(mut self) -> Result<W> {
+        self.compile_from(0)?;
+        let root_node = self.unfinished.pop_root();
+        let root_addr = self.compile(&root_node)?;
+        bytes::io_write_u64_le(self.len as u64, &mut self.wtr)?;
+        bytes::io_write_u64_le(root_addr as u64, &mut self.wtr)?;
+
+        let sum = self.wtr.masked_checksum();
+        let mut wtr = self.wtr.into_inner();
+        bytes::io_write_u32_le(sum, &mut wtr)?;
+        wtr.flush()?;
+        Ok(wtr)
+    }
+
+    fn insert_output<B>(&mut self, bs: B, out: Option<Output>) -> Result<()>
+    where
+        B: AsRef<[u8]>,
+    {
+        let bs = bs.as_ref();
+        if bs.is_empty() {
+            self.len = 1; // must be first key, so length is always 1
+            self.unfinished.set_root_output(out.unwrap_or(Output::zero()));
+            return Ok(());
+        }
+        let (prefix_len, out) = if let Some(out) = out {
+            self.unfinished.find_common_prefix_and_set_output(bs, out)
+        } else {
+            (self.unfinished.find_common_prefix(bs), Output::zero())
+        };
+        if prefix_len == bs.len() {
+            // If the prefix found consumes the entire set of bytes, then
+            // the prefix *equals* the bytes given. This means it is a
+            // duplicate value with no output. So we can give up here.
+            //
+            // If the below assert fails, then that means we let a duplicate
+            // value through even when inserting outputs.
+            assert!(out.is_zero());
+            return Ok(());
+        }
+        self.len += 1;
+        self.compile_from(prefix_len)?;
+        self.unfinished.add_suffix(&bs[prefix_len..], out);
+        Ok(())
+    }
+
+    fn compile_from(&mut self, istate: usize) -> Result<()> {
+        let mut addr = NONE_ADDRESS;
+        while istate + 1 < self.unfinished.len() {
+            let node = if addr == NONE_ADDRESS {
+                self.unfinished.pop_empty()
+            } else {
+                self.unfinished.pop_freeze(addr)
+            };
+            addr = self.compile(&node)?;
+            assert!(addr != NONE_ADDRESS);
+        }
+        self.unfinished.top_last_freeze(addr);
+        Ok(())
+    }
+
+    fn compile(&mut self, node: &BuilderNode) -> Result<CompiledAddr> {
+        if node.is_final
+            && node.trans.is_empty()
+            && node.final_output.is_zero()
+        {
+            return Ok(EMPTY_ADDRESS);
+        }
+        let entry = self.registry.entry(&node);
+        if let RegistryEntry::Found(ref addr) = entry {
+            return Ok(*addr);
+        }
+        let start_addr = self.wtr.count() as CompiledAddr;
+        node.compile_to(&mut self.wtr, self.last_addr, start_addr)?;
+        self.last_addr = self.wtr.count() as CompiledAddr - 1;
+        if let RegistryEntry::NotFound(cell) = entry {
+            cell.insert(self.last_addr);
+        }
+        Ok(self.last_addr)
+    }
+
+    fn check_last_key(&mut self, bs: &[u8], check_dupe: bool) -> Result<()> {
+        if let Some(ref mut last) = self.last {
+            if check_dupe && bs == &**last {
+                return Err(Error::DuplicateKey { got: bs.to_vec() }.into());
+            }
+            if bs < &**last {
+                return Err(Error::OutOfOrder {
+                    previous: last.to_vec(),
+                    got: bs.to_vec(),
+                }
+                .into());
+            }
+            last.clear();
+            for &b in bs {
+                last.push(b);
+            }
+        } else {
+            self.last = Some(bs.to_vec());
+        }
+        Ok(())
+    }
+
+    /// Gets a reference to the underlying writer.
+    pub fn get_ref(&self) -> &W {
+        self.wtr.get_ref()
+    }
+
+    /// Returns the number of bytes written to the underlying writer
+    pub fn bytes_written(&self) -> u64 {
+        self.wtr.count()
+    }
+}
+
+impl UnfinishedNodes {
+    fn new() -> UnfinishedNodes {
+        let mut unfinished = UnfinishedNodes { stack: Vec::with_capacity(64) };
+        unfinished.push_empty(false);
+        unfinished
+    }
+
+    fn len(&self) -> usize {
+        self.stack.len()
+    }
+
+    fn push_empty(&mut self, is_final: bool) {
+        self.stack.push(BuilderNodeUnfinished {
+            node: BuilderNode { is_final, ..BuilderNode::default() },
+            last: None,
+        });
+    }
+
+    fn pop_root(&mut self) -> BuilderNode {
+        assert!(self.stack.len() == 1);
+        assert!(self.stack[0].last.is_none());
+        self.stack.pop().unwrap().node
+    }
+
+    fn pop_freeze(&mut self, addr: CompiledAddr) -> BuilderNode {
+        let mut unfinished = self.stack.pop().unwrap();
+        unfinished.last_compiled(addr);
+        unfinished.node
+    }
+
+    fn pop_empty(&mut self) -> BuilderNode {
+        let unfinished = self.stack.pop().unwrap();
+        assert!(unfinished.last.is_none());
+        unfinished.node
+    }
+
+    fn set_root_output(&mut self, out: Output) {
+        self.stack[0].node.is_final = true;
+        self.stack[0].node.final_output = out;
+    }
+
+    fn top_last_freeze(&mut self, addr: CompiledAddr) {
+        let last = self.stack.len().checked_sub(1).unwrap();
+        self.stack[last].last_compiled(addr);
+    }
+
+    fn add_suffix(&mut self, bs: &[u8], out: Output) {
+        if bs.is_empty() {
+            return;
+        }
+        let last = self.stack.len().checked_sub(1).unwrap();
+        assert!(self.stack[last].last.is_none());
+        self.stack[last].last = Some(LastTransition { inp: bs[0], out });
+        for &b in &bs[1..] {
+            self.stack.push(BuilderNodeUnfinished {
+                node: BuilderNode::default(),
+                last: Some(LastTransition { inp: b, out: Output::zero() }),
+            });
+        }
+        self.push_empty(true);
+    }
+
+    fn find_common_prefix(&mut self, bs: &[u8]) -> usize {
+        bs.iter()
+            .zip(&self.stack)
+            .take_while(|&(&b, ref node)| {
+                node.last.as_ref().map(|t| t.inp == b).unwrap_or(false)
+            })
+            .count()
+    }
+
+    fn find_common_prefix_and_set_output(
+        &mut self,
+        bs: &[u8],
+        mut out: Output,
+    ) -> (usize, Output) {
+        let mut i = 0;
+        while i < bs.len() {
+            let add_prefix = match self.stack[i].last.as_mut() {
+                Some(ref mut t) if t.inp == bs[i] => {
+                    i += 1;
+                    let common_pre = t.out.prefix(out);
+                    let add_prefix = t.out.sub(common_pre);
+                    out = out.sub(common_pre);
+                    t.out = common_pre;
+                    add_prefix
+                }
+                _ => break,
+            };
+            if !add_prefix.is_zero() {
+                self.stack[i].add_output_prefix(add_prefix);
+            }
+        }
+        (i, out)
+    }
+}
+
+impl BuilderNodeUnfinished {
+    fn last_compiled(&mut self, addr: CompiledAddr) {
+        if let Some(trans) = self.last.take() {
+            self.node.trans.push(Transition {
+                inp: trans.inp,
+                out: trans.out,
+                addr,
+            });
+        }
+    }
+
+    fn add_output_prefix(&mut self, prefix: Output) {
+        if self.node.is_final {
+            self.node.final_output = prefix.cat(self.node.final_output);
+        }
+        for t in &mut self.node.trans {
+            t.out = prefix.cat(t.out);
+        }
+        if let Some(ref mut t) = self.last {
+            t.out = prefix.cat(t.out);
+        }
+    }
+}
+
+impl Clone for BuilderNode {
+    fn clone(&self) -> BuilderNode {
+        BuilderNode {
+            is_final: self.is_final,
+            final_output: self.final_output,
+            trans: self.trans.clone(),
+        }
+    }
+
+    fn clone_from(&mut self, source: &BuilderNode) {
+        self.is_final = source.is_final;
+        self.final_output = source.final_output;
+        self.trans.clear();
+        self.trans.extend(source.trans.iter());
+    }
+}
+
+impl Default for BuilderNode {
+    fn default() -> BuilderNode {
+        BuilderNode {
+            is_final: false,
+            final_output: Output::zero(),
+            trans: vec![],
+        }
+    }
+}
diff --git a/vendor/fst/src/raw/common_inputs.rs b/vendor/fst/src/raw/common_inputs.rs
new file mode 100644
index 000000000..f7d7d23b1
--- /dev/null
+++ b/vendor/fst/src/raw/common_inputs.rs
@@ -0,0 +1,289 @@
+pub const COMMON_INPUTS: [u8; 256] = [
+    84,  // '\x00'
+    85,  // '\x01'
+    86,  // '\x02'
+    87,  // '\x03'
+    88,  // '\x04'
+    89,  // '\x05'
+    90,  // '\x06'
+    91,  // '\x07'
+    92,  // '\x08'
+    93,  // '\t'
+    94,  // '\n'
+    95,  // '\x0b'
+    96,  // '\x0c'
+    97,  // '\r'
+    98,  // '\x0e'
+    99,  // '\x0f'
+    100, // '\x10'
+    101, // '\x11'
+    102, // '\x12'
+    103, // '\x13'
+    104, // '\x14'
+    105, // '\x15'
+    106, // '\x16'
+    107, // '\x17'
+    108, // '\x18'
+    109, // '\x19'
+    110, // '\x1a'
+    111, // '\x1b'
+    112, // '\x1c'
+    113, // '\x1d'
+    114, // '\x1e'
+    115, // '\x1f'
+    116, // ' '
+    80,  // '!'
+    117, // '"'
+    118, // '#'
+    79,  // '$'
+    39,  // '%'
+    30,  // '&'
+    81,  // "'"
+    75,  // '('
+    74,  // ')'
+    82,  // '*'
+    57,  // '+'
+    66,  // ','
+    16,  // '-'
+    12,  // '.'
+    2,   // '/'
+    19,  // '0'
+    20,  // '1'
+    21,  // '2'
+    27,  // '3'
+    32,  // '4'
+    29,  // '5'
+    35,  // '6'
+    36,  // '7'
+    37,  // '8'
+    34,  // '9'
+    24,  // ':'
+    73,  // ';'
+    119, // '<'
+    23,  // '='
+    120, // '>'
+    40,  // '?'
+    83,  // '@'
+    44,  // 'A'
+    48,  // 'B'
+    42,  // 'C'
+    43,  // 'D'
+    49,  // 'E'
+    46,  // 'F'
+    62,  // 'G'
+    61,  // 'H'
+    47,  // 'I'
+    69,  // 'J'
+    68,  // 'K'
+    58,  // 'L'
+    56,  // 'M'
+    55,  // 'N'
+    59,  // 'O'
+    51,  // 'P'
+    72,  // 'Q'
+    54,  // 'R'
+    45,  // 'S'
+    52,  // 'T'
+    64,  // 'U'
+    65,  // 'V'
+    63,  // 'W'
+    71,  // 'X'
+    67,  // 'Y'
+    70,  // 'Z'
+    77,  // '['
+    121, // '\\'
+    78,  // ']'
+    122, // '^'
+    31,  // '_'
+    123, // '`'
+    4,   // 'a'
+    25,  // 'b'
+    9,   // 'c'
+    17,  // 'd'
+    1,   // 'e'
+    26,  // 'f'
+    22,  // 'g'
+    13,  // 'h'
+    7,   // 'i'
+    50,  // 'j'
+    38,  // 'k'
+    14,  // 'l'
+    15,  // 'm'
+    10,  // 'n'
+    3,   // 'o'
+    8,   // 'p'
+    60,  // 'q'
+    6,   // 'r'
+    5,   // 's'
+    0,   // 't'
+    18,  // 'u'
+    33,  // 'v'
+    11,  // 'w'
+    41,  // 'x'
+    28,  // 'y'
+    53,  // 'z'
+    124, // '{'
+    125, // '|'
+    126, // '}'
+    76,  // '~'
+    127, // '\x7f'
+    128, // '\x80'
+    129, // '\x81'
+    130, // '\x82'
+    131, // '\x83'
+    132, // '\x84'
+    133, // '\x85'
+    134, // '\x86'
+    135, // '\x87'
+    136, // '\x88'
+    137, // '\x89'
+    138, // '\x8a'
+    139, // '\x8b'
+    140, // '\x8c'
+    141, // '\x8d'
+    142, // '\x8e'
+    143, // '\x8f'
+    144, // '\x90'
+    145, // '\x91'
+    146, // '\x92'
+    147, // '\x93'
+    148, // '\x94'
+    149, // '\x95'
+    150, // '\x96'
+    151, // '\x97'
+    152, // '\x98'
+    153, // '\x99'
+    154, // '\x9a'
+    155, // '\x9b'
+    156, // '\x9c'
+    157, // '\x9d'
+    158, // '\x9e'
+    159, // '\x9f'
+    160, // '\xa0'
+    161, // '¡'
+    162, // '¢'
+    163, // '£'
+    164, // '¤'
+    165, // '¥'
+    166, // '¦'
+    167, // '§'
+    168, // '¨'
+    169, // '©'
+    170, // 'ª'
+    171, // '«'
+    172, // '¬'
+    173, // '\xad'
+    174, // '®'
+    175, // '¯'
+    176, // '°'
+    177, // '±'
+    178, // '²'
+    179, // '³'
+    180, // '´'
+    181, // 'µ'
+    182, // '¶'
+    183, // '·'
+    184, // '¸'
+    185, // '¹'
+    186, // 'º'
+    187, // '»'
+    188, // '¼'
+    189, // '½'
+    190, // '¾'
+    191, // '¿'
+    192, // 'À'
+    193, // 'Á'
+    194, // 'Â'
+    195, // 'Ã'
+    196, // 'Ä'
+    197, // 'Å'
+    198, // 'Æ'
+    199, // 'Ç'
+    200, // 'È'
+    201, // 'É'
+    202, // 'Ê'
+    203, // 'Ë'
+    204, // 'Ì'
+    205, // 'Í'
+    206, // 'Î'
+    207, // 'Ï'
+    208, // 'Ð'
+    209, // 'Ñ'
+    210, // 'Ò'
+    211, // 'Ó'
+    212, // 'Ô'
+    213, // 'Õ'
+    214, // 'Ö'
+    215, // '×'
+    216, // 'Ø'
+    217, // 'Ù'
+    218, // 'Ú'
+    219, // 'Û'
+    220, // 'Ü'
+    221, // 'Ý'
+    222, // 'Þ'
+    223, // 'ß'
+    224, // 'à'
+    225, // 'á'
+    226, // 'â'
+    227, // 'ã'
+    228, // 'ä'
+    229, // 'å'
+    230, // 'æ'
+    231, // 'ç'
+    232, // 'è'
+    233, // 'é'
+    234, // 'ê'
+    235, // 'ë'
+    236, // 'ì'
+    237, // 'í'
+    238, // 'î'
+    239, // 'ï'
+    240, // 'ð'
+    241, // 'ñ'
+    242, // 'ò'
+    243, // 'ó'
+    244, // 'ô'
+    245, // 'õ'
+    246, // 'ö'
+    247, // '÷'
+    248, // 'ø'
+    249, // 'ù'
+    250, // 'ú'
+    251, // 'û'
+    252, // 'ü'
+    253, // 'ý'
+    254, // 'þ'
+    255, // 'ÿ'
+];
+
+pub const COMMON_INPUTS_INV: [u8; 256] = [
+    b't', b'e', b'/', b'o', b'a', b's', b'r', b'i', b'p', b'c', b'n', b'w',
+    b'.', b'h', b'l', b'm', b'-', b'd', b'u', b'0', b'1', b'2', b'g', b'=',
+    b':', b'b', b'f', b'3', b'y', b'5', b'&', b'_', b'4', b'v', b'9', b'6',
+    b'7', b'8', b'k', b'%', b'?', b'x', b'C', b'D', b'A', b'S', b'F', b'I',
+    b'B', b'E', b'j', b'P', b'T', b'z', b'R', b'N', b'M', b'+', b'L', b'O',
+    b'q', b'H', b'G', b'W', b'U', b'V', b',', b'Y', b'K', b'J', b'Z', b'X',
+    b'Q', b';', b')', b'(', b'~', b'[', b']', b'$', b'!', b'\'', b'*', b'@',
+    b'\x00', b'\x01', b'\x02', b'\x03', b'\x04', b'\x05', b'\x06', b'\x07',
+    b'\x08', b'\t', b'\n', b'\x0b', b'\x0c', b'\r', b'\x0e', b'\x0f', b'\x10',
+    b'\x11', b'\x12', b'\x13', b'\x14', b'\x15', b'\x16', b'\x17', b'\x18',
+    b'\x19', b'\x1a', b'\x1b', b'\x1c', b'\x1d', b'\x1e', b'\x1f', b' ', b'"',
+    b'#', b'<', b'>', b'\\', b'^', b'`', b'{', b'|', b'}', b'\x7f', b'\x80',
+    b'\x81', b'\x82', b'\x83', b'\x84', b'\x85', b'\x86', b'\x87', b'\x88',
+    b'\x89', b'\x8a', b'\x8b', b'\x8c', b'\x8d', b'\x8e', b'\x8f', b'\x90',
+    b'\x91', b'\x92', b'\x93', b'\x94', b'\x95', b'\x96', b'\x97', b'\x98',
+    b'\x99', b'\x9a', b'\x9b', b'\x9c', b'\x9d', b'\x9e', b'\x9f', b'\xa0',
+    b'\xa1', b'\xa2', b'\xa3', b'\xa4', b'\xa5', b'\xa6', b'\xa7', b'\xa8',
+    b'\xa9', b'\xaa', b'\xab', b'\xac', b'\xad', b'\xae', b'\xaf', b'\xb0',
+    b'\xb1', b'\xb2', b'\xb3', b'\xb4', b'\xb5', b'\xb6', b'\xb7', b'\xb8',
+    b'\xb9', b'\xba', b'\xbb', b'\xbc', b'\xbd', b'\xbe', b'\xbf', b'\xc0',
+    b'\xc1', b'\xc2', b'\xc3', b'\xc4', b'\xc5', b'\xc6', b'\xc7', b'\xc8',
+    b'\xc9', b'\xca', b'\xcb', b'\xcc', b'\xcd', b'\xce', b'\xcf', b'\xd0',
+    b'\xd1', b'\xd2', b'\xd3', b'\xd4', b'\xd5', b'\xd6', b'\xd7', b'\xd8',
+    b'\xd9', b'\xda', b'\xdb', b'\xdc', b'\xdd', b'\xde', b'\xdf', b'\xe0',
+    b'\xe1', b'\xe2', b'\xe3', b'\xe4', b'\xe5', b'\xe6', b'\xe7', b'\xe8',
+    b'\xe9', b'\xea', b'\xeb', b'\xec', b'\xed', b'\xee', b'\xef', b'\xf0',
+    b'\xf1', b'\xf2', b'\xf3', b'\xf4', b'\xf5', b'\xf6', b'\xf7', b'\xf8',
+    b'\xf9', b'\xfa', b'\xfb', b'\xfc', b'\xfd', b'\xfe', b'\xff',
+];
diff --git a/vendor/fst/src/raw/counting_writer.rs b/vendor/fst/src/raw/counting_writer.rs
new file mode 100644
index 000000000..70d9315ff
--- /dev/null
+++ b/vendor/fst/src/raw/counting_writer.rs
@@ -0,0 +1,70 @@
+use std::io;
+
+use crate::raw::crc32::CheckSummer;
+
+/// Wraps any writer that counts and checksums bytes written.
+pub struct CountingWriter<W> {
+    wtr: W,
+    cnt: u64,
+    summer: CheckSummer,
+}
+
+impl<W: io::Write> CountingWriter<W> {
+    /// Wrap the given writer with a counter.
+    pub fn new(wtr: W) -> CountingWriter<W> {
+        CountingWriter { wtr, cnt: 0, summer: CheckSummer::new() }
+    }
+
+    /// Return the total number of bytes written to the underlying writer.
+    ///
+    /// The count returned is the sum of all counts resulting from a call
+    /// to `write`.
+    pub fn count(&self) -> u64 {
+        self.cnt
+    }
+
+    /// Returns the masked CRC32C checksum of the bytes written so far.
+    ///
+    /// This "masked" checksum is the same one used by the Snappy frame format.
+    /// Masking is supposed to make the checksum robust with respect to data
+    /// that contains the checksum itself.
+    pub fn masked_checksum(&self) -> u32 {
+        self.summer.masked()
+    }
+
+    /// Unwrap the counting writer and return the inner writer.
+    pub fn into_inner(self) -> W {
+        self.wtr
+    }
+
+    /// Gets a reference to the underlying writer.
+    pub fn get_ref(&self) -> &W {
+        &self.wtr
+    }
+}
+
+impl<W: io::Write> io::Write for CountingWriter<W> {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        self.summer.update(buf);
+        let n = self.wtr.write(buf)?;
+        self.cnt += n as u64;
+        Ok(n)
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        self.wtr.flush()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::CountingWriter;
+    use std::io::Write;
+
+    #[test]
+    fn counts_bytes() {
+        let mut wtr = CountingWriter::new(vec![]);
+        wtr.write_all(b"foobar").unwrap();
+        assert_eq!(wtr.count(), 6);
+    }
+}
diff --git a/vendor/fst/src/raw/crc32.rs b/vendor/fst/src/raw/crc32.rs
new file mode 100644
index 000000000..06b444205
--- /dev/null
+++ b/vendor/fst/src/raw/crc32.rs
@@ -0,0 +1,59 @@
+use crate::bytes;
+use crate::raw::crc32_table::{TABLE, TABLE16};
+
+/// Provides a simple API to perform a rolling CRC32C checksum.
+#[derive(Clone, Copy, Debug)]
+pub struct CheckSummer {
+    sum: u32,
+}
+
+impl CheckSummer {
+    /// Create a new checksummer that can compute CRC32C checksums on arbitrary
+    /// bytes.
+    pub fn new() -> CheckSummer {
+        CheckSummer { sum: 0 }
+    }
+
+    /// Returns the "masked" CRC32 checksum of the data so far using the
+    /// Castagnoli polynomial. This "masked" checksum is the same one used
+    /// by the Snappy frame format. Masking is supposed to make the checksum
+    /// robust with respect to data that contains the checksum itself.
+    pub fn masked(&self) -> u32 {
+        let sum = self.sum;
+        (sum.wrapping_shr(15) | sum.wrapping_shl(17)).wrapping_add(0xA282EAD8)
+    }
+
+    /// Update the current checksum with the checksum for the given bytes.
+    pub fn update(&mut self, buf: &[u8]) {
+        self.sum = crc32c_slice16(self.sum, buf);
+    }
+}
+
+/// Returns the CRC32 checksum of `buf` using the Castagnoli polynomial.
+fn crc32c_slice16(prev: u32, mut buf: &[u8]) -> u32 {
+    let mut crc: u32 = !prev;
+    while buf.len() >= 16 {
+        crc ^= bytes::read_u32_le(buf);
+        crc = TABLE16[0][buf[15] as usize]
+            ^ TABLE16[1][buf[14] as usize]
+            ^ TABLE16[2][buf[13] as usize]
+            ^ TABLE16[3][buf[12] as usize]
+            ^ TABLE16[4][buf[11] as usize]
+            ^ TABLE16[5][buf[10] as usize]
+            ^ TABLE16[6][buf[9] as usize]
+            ^ TABLE16[7][buf[8] as usize]
+            ^ TABLE16[8][buf[7] as usize]
+            ^ TABLE16[9][buf[6] as usize]
+            ^ TABLE16[10][buf[5] as usize]
+            ^ TABLE16[11][buf[4] as usize]
+            ^ TABLE16[12][(crc >> 24) as u8 as usize]
+            ^ TABLE16[13][(crc >> 16) as u8 as usize]
+            ^ TABLE16[14][(crc >> 8) as u8 as usize]
+            ^ TABLE16[15][(crc) as u8 as usize];
+        buf = &buf[16..];
+    }
+    for &b in buf {
+        crc = TABLE[((crc as u8) ^ b) as usize] ^ (crc >> 8);
+    }
+    !crc
+}
diff --git a/vendor/fst/src/raw/crc32_table.rs b/vendor/fst/src/raw/crc32_table.rs
new file mode 100644
index 000000000..7821b5d86
--- /dev/null
+++ b/vendor/fst/src/raw/crc32_table.rs
@@ -0,0 +1,2 @@
+// Generated by build.rs.
+include!(concat!(env!("OUT_DIR"), "/crc32_table.rs"));
diff --git a/vendor/fst/src/raw/error.rs b/vendor/fst/src/raw/error.rs
new file mode 100644
index 000000000..b28377179
--- /dev/null
+++ b/vendor/fst/src/raw/error.rs
@@ -0,0 +1,171 @@
+use std::fmt;
+use std::str;
+use std::string::FromUtf8Error;
+
+use crate::raw::FstType;
+
+/// An error that occurred while using a finite state transducer.
+///
+/// This enum is non-exhaustive. New variants may be added to it in
+/// compatible releases.
+pub enum Error {
+    /// A version mismatch occurred while reading a finite state transducer.
+    ///
+    /// This occurs when the API version (of the crate) does not match the
+    /// version encoded in the finite state transducer.
+    ///
+    /// When this error is encountered, there are only two ways to fix it:
+    ///
+    /// 1. Change the version of the library to one that is compatible with
+    ///    the given finite state transducer.
+    /// 2. Rebuild the finite state transducer.
+    Version {
+        /// The expected version, which is hard-coded into the current version
+        /// of this crate.
+        expected: u64,
+        /// The version read from the finite state transducer.
+        got: u64,
+    },
+    /// An unexpected error occurred while reading a finite state transducer.
+    /// Usually this occurs because the data is corrupted or is not actually
+    /// a finite state transducer serialized by this library.
+    Format {
+        /// The number of bytes given to the FST constructor.
+        size: usize,
+    },
+    /// An error that is returned if verification of an FST fails because of a
+    /// checksum mismatch.
+    ChecksumMismatch {
+        /// The checksum that was expected.
+        expected: u32,
+        /// The checksum that was actually computed.
+        got: u32,
+    },
+    /// An error that is returned if the caller attempts to verify an FST
+    /// that does not have a checksum, as is the case for all FSTs generated
+    /// by this crate before version `0.4`.
+    ChecksumMissing,
+    /// A duplicate key was inserted into a finite state transducer, which is
+    /// not allowed.
+    DuplicateKey {
+        /// The duplicate key.
+        got: Vec<u8>,
+    },
+    /// A key was inserted out of order into a finite state transducer.
+    ///
+    /// Keys must always be inserted in lexicographic order.
+    OutOfOrder {
+        /// The last key successfully inserted.
+        previous: Vec<u8>,
+        /// The key that caused this error to occur.
+        got: Vec<u8>,
+    },
+    /// A finite state transducer with an unexpected type was found.
+    ///
+    /// This is not currently used in this crate, but callers may wish to
+    /// employ its use for alternative data structures implemented on top of
+    /// finite state transducers.
+    WrongType {
+        /// The expected finite state transducer type.
+        expected: FstType,
+        /// The type read from a finite state transducer.
+        got: FstType,
+    },
+    /// An error that occurred when trying to decode a UTF-8 byte key.
+    FromUtf8(FromUtf8Error),
+    /// Hints that destructuring should not be exhaustive.
+    ///
+    /// This enum may grow additional variants, so this makes sure clients
+    /// don't count on exhaustive matching. (Otherwise, adding a new variant
+    /// could break existing code.)
+    #[doc(hidden)]
+    __Nonexhaustive,
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match *self {
+            Error::FromUtf8(ref err) => err.fmt(f),
+            Error::Version { expected, got } => write!(
+                f,
+                "\
+Error opening FST: expected API version {}, got API version {}. \
+It looks like the FST you're trying to open is either not an FST file or it \
+was generated with a different version of the 'fst' crate. You'll either need \
+to change the version of the 'fst' crate you're using, or re-generate the
+FST.",
+                expected, got
+            ),
+            Error::Format { size } => write!(
+                f,
+                "\
+Error opening FST with size {} bytes: An unknown error occurred. This \
+usually means you're trying to read data that isn't actually an encoded FST.",
+                size
+            ),
+            Error::ChecksumMismatch { expected, got } => write!(
+                f,
+                "FST verification failed: expected checksum of {} but got {}",
+                expected, got,
+            ),
+            Error::ChecksumMissing => write!(
+                f,
+                "FST verification failed: FST does not contain a checksum",
+            ),
+            Error::DuplicateKey { ref got } => write!(
+                f,
+                "Error inserting duplicate key: '{}'.",
+                format_bytes(&*got)
+            ),
+            Error::OutOfOrder { ref previous, ref got } => write!(
+                f,
+                "\
+Error inserting out-of-order key: '{}'. (Previous key was '{}'.) Keys must be \
+inserted in lexicographic order.",
+                format_bytes(&*got),
+                format_bytes(&*previous)
+            ),
+            Error::WrongType { expected, got } => write!(
+                f,
+                "\
+Error opening FST: expected type '{}', got type '{}'.",
+                expected, got
+            ),
+            Error::__Nonexhaustive => unreachable!(),
+        }
+    }
+}
+
+impl fmt::Debug for Error {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::Display::fmt(self, f)
+    }
+}
+
+impl std::error::Error for Error {
+    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+        match *self {
+            Error::FromUtf8(ref err) => Some(err),
+            _ => None,
+        }
+    }
+}
+
+impl From<FromUtf8Error> for Error {
+    #[inline]
+    fn from(err: FromUtf8Error) -> Error {
+        Error::FromUtf8(err)
+    }
+}
+
+/// Attempt to convert an arbitrary byte string to a more convenient display
+/// form.
+///
+/// Essentially, try to decode the bytes as UTF-8 and show that. Failing that,
+/// just show the sequence of bytes.
+fn format_bytes(bytes: &[u8]) -> String {
+    match str::from_utf8(bytes) {
+        Ok(s) => s.to_owned(),
+        Err(_) => format!("{:?}", bytes),
+    }
+}
diff --git a/vendor/fst/src/raw/mod.rs b/vendor/fst/src/raw/mod.rs
new file mode 100644
index 000000000..6cc0eeb2c
--- /dev/null
+++ b/vendor/fst/src/raw/mod.rs
@@ -0,0 +1,1382 @@
+/*!
+Operations on raw finite state transducers.
+
+This sub-module exposes the guts of a finite state transducer. Many parts of
+it, such as construction and traversal, are mirrored in the `set` and `map`
+sub-modules. Other parts of it, such as direct access to nodes and transitions
+in the transducer, do not have any analog.
+
+# Overview of types
+
+`Fst` is a read only interface to pre-constructed finite state transducers.
+`Node` is a read only interface to a single node in a transducer. `Builder` is
+used to create new finite state transducers. (Once a transducer is created, it
+can never be modified.) `Stream` is a stream of all inputs and outputs in a
+transducer. `StreamBuilder` builds range queries. `OpBuilder` collects streams
+and executes set operations like `union` or `intersection` on them with the
+option of specifying a merge strategy for output values.
+
+Most of the rest of the types are streams from set operations.
+*/
+use std::cmp;
+use std::fmt;
+
+use crate::automaton::{AlwaysMatch, Automaton};
+use crate::bytes;
+use crate::error::Result;
+use crate::stream::{IntoStreamer, Streamer};
+
+pub use crate::raw::build::Builder;
+pub use crate::raw::error::Error;
+pub use crate::raw::node::{Node, Transitions};
+pub use crate::raw::ops::{
+    Difference, IndexedValue, Intersection, OpBuilder, SymmetricDifference,
+    Union,
+};
+
+mod build;
+mod common_inputs;
+mod counting_writer;
+mod crc32;
+mod crc32_table;
+mod error;
+mod node;
+mod ops;
+mod registry;
+mod registry_minimal;
+#[cfg(test)]
+mod tests;
+
+/// The API version of this crate.
+///
+/// This version number is written to every finite state transducer created by
+/// this crate. When a finite state transducer is read, its version number is
+/// checked against this value.
+///
+/// Currently, any version mismatch results in an error. Fixing this requires
+/// regenerating the finite state transducer or switching to a version of this
+/// crate that is compatible with the serialized transducer. This particular
+/// behavior may be relaxed in future versions.
+pub const VERSION: u64 = 3;
+
+/// A sentinel value used to indicate an empty final state.
+const EMPTY_ADDRESS: CompiledAddr = 0;
+
+/// A sentinel value used to indicate an invalid state.
+///
+/// This is never the address of a node in a serialized transducer.
+const NONE_ADDRESS: CompiledAddr = 1;
+
+/// FstType is a convention used to indicate the type of the underlying
+/// transducer.
+///
+/// This crate reserves the range 0-255 (inclusive) but currently leaves the
+/// meaning of 0-255 unspecified.
+pub type FstType = u64;
+
+/// CompiledAddr is the type used to address nodes in a finite state
+/// transducer.
+///
+/// It is most useful as a pointer to nodes. It can be used in the `Fst::node`
+/// method to resolve the pointer.
+pub type CompiledAddr = usize;
+
+/// An acyclic deterministic finite state transducer.
+///
+/// # How does it work?
+///
+/// The short answer: it's just like a prefix trie, which compresses keys
+/// based only on their prefixes, except that a automaton/transducer also
+/// compresses suffixes.
+///
+/// The longer answer is that keys in an automaton are stored only in the
+/// transitions from one state to another. A key can be acquired by tracing
+/// a path from the root of the automaton to any match state. The inputs along
+/// each transition are concatenated. Once a match state is reached, the
+/// concatenation of inputs up until that point corresponds to a single key.
+///
+/// But why is it called a transducer instead of an automaton? A finite state
+/// transducer is just like a finite state automaton, except that it has output
+/// transitions in addition to input transitions. Namely, the value associated
+/// with any particular key is determined by summing the outputs along every
+/// input transition that leads to the key's corresponding match state.
+///
+/// This is best demonstrated with a couple images. First, let's ignore the
+/// "transducer" aspect and focus on a plain automaton.
+///
+/// Consider that your keys are abbreviations of some of the months in the
+/// Gregorian calendar:
+///
+/// ```plain
+/// jan
+/// feb
+/// mar
+/// may
+/// jun
+/// jul
+/// ```
+///
+/// The corresponding automaton that stores all of these as keys looks like
+/// this:
+///
+/// ![finite state automaton](https://burntsushi.net/stuff/months-set.png)
+///
+/// Notice here how the prefix and suffix of `jan` and `jun` are shared.
+/// Similarly, the prefixes of `jun` and `jul` are shared and the prefixes
+/// of `mar` and `may` are shared.
+///
+/// All of the keys from this automaton can be enumerated in lexicographic
+/// order by following every transition from each node in lexicographic
+/// order. Since it is acyclic, the procedure will terminate.
+///
+/// A key can be found by tracing it through the transitions in the automaton.
+/// For example, the key `aug` is known not to be in the automaton by only
+/// visiting the root state (because there is no `a` transition). For another
+/// example, the key `jax` is known not to be in the set only after moving
+/// through the transitions for `j` and `a`. Namely, after those transitions
+/// are followed, there are no transitions for `x`.
+///
+/// Notice here that looking up a key is proportional the length of the key
+/// itself. Namely, lookup time is not affected by the number of keys in the
+/// automaton!
+///
+/// Additionally, notice that the automaton exploits the fact that many keys
+/// share common prefixes and suffixes. For example, `jun` and `jul` are
+/// represented with no more states than would be required to represent either
+/// one on its own. Instead, the only change is a single extra transition. This
+/// is a form of compression and is key to how the automatons produced by this
+/// crate are so small.
+///
+/// Let's move on to finite state transducers. Consider the same set of keys
+/// as above, but let's assign their numeric month values:
+///
+/// ```plain
+/// jan,1
+/// feb,2
+/// mar,3
+/// may,5
+/// jun,6
+/// jul,7
+/// ```
+///
+/// The corresponding transducer looks very similar to the automaton above,
+/// except outputs have been added to some of the transitions:
+///
+/// ![finite state transducer](https://burntsushi.net/stuff/months-map.png)
+///
+/// All of the operations with a transducer are the same as described above
+/// for automatons. Additionally, the same compression techniques are used:
+/// common prefixes and suffixes in keys are exploited.
+///
+/// The key difference is that some transitions have been given an output.
+/// As one follows input transitions, one must sum the outputs as they
+/// are seen. (A transition with no output represents the additive identity,
+/// or `0` in this case.) For example, when looking up `feb`, the transition
+/// `f` has output `2`, the transition `e` has output `0`, and the transition
+/// `b` also has output `0`. The sum of these is `2`, which is exactly the
+/// value we associated with `feb`.
+///
+/// For another more interesting example, consider `jul`. The `j` transition
+/// has output `1`, the `u` transition has output `5` and the `l` transition
+/// has output `1`. Summing these together gets us `7`, which is again the
+/// correct value associated with `jul`. Notice that if we instead looked up
+/// the `jun` key, then the `n` transition would be followed instead of the
+/// `l` transition, which has no output. Therefore, the `jun` key equals
+/// `1+5+0=6`.
+///
+/// The trick to transducers is that there exists a unique path through the
+/// transducer for every key, and its outputs are stored appropriately along
+/// this path such that the correct value is returned when they are all summed
+/// together. This process also enables the data that makes up each value to be
+/// shared across many values in the transducer in exactly the same way that
+/// keys are shared. This is yet another form of compression!
+///
+/// # Bonus: a billion strings
+///
+/// The amount of compression one can get from automata can be absolutely
+/// ridiuclous. Consider the particular case of storing all billion strings
+/// in the range `0000000001-1000000000`, e.g.,
+///
+/// ```plain
+/// 0000000001
+/// 0000000002
+/// ...
+/// 0000000100
+/// 0000000101
+/// ...
+/// 0999999999
+/// 1000000000
+/// ```
+///
+/// The corresponding automaton looks like this:
+///
+/// ![finite state automaton - one billion strings](https://burntsushi.net/stuff/one-billion.png)
+///
+/// Indeed, the on disk size of this automaton is a mere **251 bytes**.
+///
+/// Of course, this is a bit of a pathological best case, but it does serve
+/// to show how good compression can be in the optimal case.
+///
+/// Also, check out the
+/// [corresponding transducer](https://burntsushi.net/stuff/one-billion-map.svg)
+/// that maps each string to its integer value. It's a bit bigger, but still
+/// only takes up **896 bytes** of space on disk. This demonstrates that
+/// output values are also compressible.
+///
+/// # Does this crate produce minimal transducers?
+///
+/// For any non-trivial sized set of keys, it is unlikely that this crate will
+/// produce a minimal transducer. As far as this author knows, guaranteeing a
+/// minimal transducer requires working memory proportional to the number of
+/// states. This can be quite costly and is anathema to the main design goal of
+/// this crate: provide the ability to work with gigantic sets of strings with
+/// constant memory overhead.
+///
+/// Instead, construction of a finite state transducer uses a cache of
+/// states. More frequently used states are cached and reused, which provides
+/// reasonably good compression ratios. (No comprehensive benchmarks exist to
+/// back up this claim.)
+///
+/// It is possible that this crate may expose a way to guarantee minimal
+/// construction of transducers at the expense of exorbitant memory
+/// requirements.
+///
+/// # Bibliography
+///
+/// I initially got the idea to use finite state tranducers to represent
+/// ordered sets/maps from
+/// [Michael
+/// McCandless'](http://blog.mikemccandless.com/2010/12/using-finite-state-transducers-in.html)
+/// work on incorporating transducers in Lucene.
+///
+/// However, my work would also not have been possible without the hard work
+/// of many academics, especially
+/// [Jan Daciuk](http://galaxy.eti.pg.gda.pl/katedry/kiw/pracownicy/Jan.Daciuk/personal/).
+///
+/// * [Incremental construction of minimal acyclic finite-state automata](https://www.mitpressjournals.org/doi/pdfplus/10.1162/089120100561601)
+///   (Section 3 provides a decent overview of the algorithm used to construct
+///   transducers in this crate, assuming all outputs are `0`.)
+/// * [Direct Construction of Minimal Acyclic Subsequential Transducers](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.24.3698&rep=rep1&type=pdf)
+///   (The whole thing. The proof is dense but illuminating. The algorithm at
+///   the end is the money shot, namely, it incorporates output values.)
+/// * [Experiments with Automata Compression](https://www.researchgate.net/profile/Jiri-Dvorsky/publication/221568039_Word_Random_Access_Compression/links/0c96052c095630d5b3000000/Word-Random-Access-Compression.pdf#page=116), [Smaller Representation of Finite State Automata](https://www.cs.put.poznan.pl/dweiss/site/publications/download/fsacomp.pdf)
+///   (various compression techniques for representing states/transitions)
+/// * [Jan Daciuk's dissertation](http://www.pg.gda.pl/~jandac/thesis.ps.gz)
+///   (excellent for in depth overview)
+/// * [Comparison of Construction Algorithms for Minimal, Acyclic, Deterministic, Finite-State Automata from Sets of Strings](https://www.cs.mun.ca/~harold/Courses/Old/CS4750/Diary/q3p2qx4lv71m5vew.pdf)
+///   (excellent for surface level overview)
+#[derive(Clone)]
+pub struct Fst<D> {
+    meta: Meta,
+    data: D,
+}
+
+#[derive(Debug, Clone)]
+struct Meta {
+    version: u64,
+    root_addr: CompiledAddr,
+    ty: FstType,
+    len: usize,
+    /// A checksum is missing when the FST version is <= 2. (Checksums were
+    /// added in version 3.)
+    checksum: Option<u32>,
+}
+
+impl Fst<Vec<u8>> {
+    /// Create a new FST from an iterator of lexicographically ordered byte
+    /// strings. Every key's value is set to `0`.
+    ///
+    /// If the iterator does not yield values in lexicographic order, then an
+    /// error is returned.
+    ///
+    /// Note that this is a convenience function to build an FST in memory.
+    /// To build an FST that streams to an arbitrary `io::Write`, use
+    /// `raw::Builder`.
+    pub fn from_iter_set<K, I>(iter: I) -> Result<Fst<Vec<u8>>>
+    where
+        K: AsRef<[u8]>,
+        I: IntoIterator<Item = K>,
+    {
+        let mut builder = Builder::memory();
+        for k in iter {
+            builder.add(k)?;
+        }
+        Ok(builder.into_fst())
+    }
+
+    /// Create a new FST from an iterator of lexicographically ordered byte
+    /// strings. The iterator should consist of tuples, where the first element
+    /// is the byte string and the second element is its corresponding value.
+    ///
+    /// If the iterator does not yield unique keys in lexicographic order, then
+    /// an error is returned.
+    ///
+    /// Note that this is a convenience function to build an FST in memory.
+    /// To build an FST that streams to an arbitrary `io::Write`, use
+    /// `raw::Builder`.
+    pub fn from_iter_map<K, I>(iter: I) -> Result<Fst<Vec<u8>>>
+    where
+        K: AsRef<[u8]>,
+        I: IntoIterator<Item = (K, u64)>,
+    {
+        let mut builder = Builder::memory();
+        for (k, v) in iter {
+            builder.insert(k, v)?;
+        }
+        Ok(builder.into_fst())
+    }
+}
+
+impl<D: AsRef<[u8]>> Fst<D> {
+    /// Creates a transducer from its representation as a raw byte sequence.
+    ///
+    /// This operation is intentionally very cheap (no allocations and no
+    /// copies). In particular, no verification on the integrity of the
+    /// FST is performed. Callers may opt into integrity checks via the
+    /// [`Fst::verify`](struct.Fst.html#method.verify) method.
+    ///
+    /// The fst must have been written with a compatible finite state
+    /// transducer builder (`Builder` qualifies). If the format is invalid or
+    /// if there is a mismatch between the API version of this library and the
+    /// fst, then an error is returned.
+    #[inline]
+    pub fn new(data: D) -> Result<Fst<D>> {
+        let bytes = data.as_ref();
+        if bytes.len() < 36 {
+            return Err(Error::Format { size: bytes.len() }.into());
+        }
+        // The read_u64 unwraps below are OK because they can never fail.
+        // They can only fail when there is an IO error or if there is an
+        // unexpected EOF. However, we are reading from a byte slice (no
+        // IO errors possible) and we've confirmed the byte slice is at least
+        // N bytes (no unexpected EOF).
+        let version = bytes::read_u64_le(&bytes);
+        if version == 0 || version > VERSION {
+            return Err(
+                Error::Version { expected: VERSION, got: version }.into()
+            );
+        }
+        let ty = bytes::read_u64_le(&bytes[8..]);
+
+        let (end, checksum) = if version <= 2 {
+            (bytes.len(), None)
+        } else {
+            let checksum = bytes::read_u32_le(&bytes[bytes.len() - 4..]);
+            (bytes.len() - 4, Some(checksum))
+        };
+        let root_addr = {
+            let last = &bytes[end - 8..];
+            u64_to_usize(bytes::read_u64_le(last))
+        };
+        let len = {
+            let last2 = &bytes[end - 16..];
+            u64_to_usize(bytes::read_u64_le(last2))
+        };
+        // The root node is always the last node written, so its address should
+        // be near the end. After the root node is written, we still have to
+        // write the root *address* and the number of keys in the FST, along
+        // with the checksum. That's 20 bytes. The extra byte used below (21
+        // and not 20) comes from the fact that the root address points to
+        // the last byte in the root node, rather than the byte immediately
+        // following the root node.
+        //
+        // If this check passes, it is still possible that the FST is invalid
+        // but probably unlikely. If this check reports a false positive, then
+        // the program will probably panic. In the worst case, the FST will
+        // operate but be subtly wrong. (This would require the bytes to be in
+        // a format expected by an FST, which is incredibly unlikely.)
+        //
+        // The special check for EMPTY_ADDRESS is needed since an empty FST
+        // has a root node that is empty and final, which means it has the
+        // special address `0`. In that case, the FST is the smallest it can
+        // be: the version, type, root address and number of nodes. That's
+        // 36 bytes (8 byte u64 each).
+        //
+        // And finally, our calculation changes somewhat based on version.
+        // If the FST version is less than 3, then it does not have a checksum.
+        let (empty_total, addr_offset) =
+            if version <= 2 { (32, 17) } else { (36, 21) };
+        if (root_addr == EMPTY_ADDRESS && bytes.len() != empty_total)
+            && root_addr + addr_offset != bytes.len()
+        {
+            return Err(Error::Format { size: bytes.len() }.into());
+        }
+        let meta = Meta { version, root_addr, ty, len, checksum };
+        Ok(Fst { meta, data })
+    }
+
+    /// Retrieves the value associated with a key.
+    ///
+    /// If the key does not exist, then `None` is returned.
+    #[inline]
+    pub fn get<B: AsRef<[u8]>>(&self, key: B) -> Option<Output> {
+        self.as_ref().get(key.as_ref())
+    }
+
+    /// Returns true if and only if the given key is in this FST.
+    #[inline]
+    pub fn contains_key<B: AsRef<[u8]>>(&self, key: B) -> bool {
+        self.as_ref().contains_key(key.as_ref())
+    }
+
+    /// Retrieves the key associated with the given value.
+    ///
+    /// This is like `get_key_into`, but will return the key itself without
+    /// allowing the caller to reuse an allocation.
+    ///
+    /// If the given value does not exist, then `None` is returned.
+    ///
+    /// The values in this FST are not monotonically increasing when sorted
+    /// lexicographically by key, then this routine has unspecified behavior.
+    #[inline]
+    pub fn get_key(&self, value: u64) -> Option<Vec<u8>> {
+        let mut key = vec![];
+        if self.get_key_into(value, &mut key) {
+            Some(key)
+        } else {
+            None
+        }
+    }
+
+    /// Retrieves the key associated with the given value.
+    ///
+    /// If the given value does not exist, then `false` is returned. In this
+    /// case, the contents of `key` are unspecified.
+    ///
+    /// The given buffer is not clearer before the key is written to it.
+    ///
+    /// The values in this FST are not monotonically increasing when sorted
+    /// lexicographically by key, then this routine has unspecified behavior.
+    #[inline]
+    pub fn get_key_into(&self, value: u64, key: &mut Vec<u8>) -> bool {
+        self.as_ref().get_key_into(value, key)
+    }
+
+    /// Return a lexicographically ordered stream of all key-value pairs in
+    /// this fst.
+    #[inline]
+    pub fn stream(&self) -> Stream<'_> {
+        StreamBuilder::new(self.as_ref(), AlwaysMatch).into_stream()
+    }
+
+    /// Return a builder for range queries.
+    ///
+    /// A range query returns a subset of key-value pairs in this fst in a
+    /// range given in lexicographic order.
+    #[inline]
+    pub fn range(&self) -> StreamBuilder<'_> {
+        StreamBuilder::new(self.as_ref(), AlwaysMatch)
+    }
+
+    /// Executes an automaton on the keys of this FST.
+    #[inline]
+    pub fn search<A: Automaton>(&self, aut: A) -> StreamBuilder<'_, A> {
+        StreamBuilder::new(self.as_ref(), aut)
+    }
+
+    /// Executes an automaton on the keys of this FST and yields matching
+    /// keys along with the corresponding matching states in the given
+    /// automaton.
+    #[inline]
+    pub fn search_with_state<A: Automaton>(
+        &self,
+        aut: A,
+    ) -> StreamWithStateBuilder<'_, A> {
+        StreamWithStateBuilder::new(self.as_ref(), aut)
+    }
+
+    /// Returns the number of keys in this fst.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.as_ref().len()
+    }
+
+    /// Returns true if and only if this fst has no keys.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.as_ref().is_empty()
+    }
+
+    /// Returns the number of bytes used by this fst.
+    #[inline]
+    pub fn size(&self) -> usize {
+        self.as_ref().size()
+    }
+
+    /// Attempts to verify this FST by computing its checksum.
+    ///
+    /// This will scan over all of the bytes in the underlying FST, so this
+    /// may be an expensive operation depending on the size of the FST.
+    ///
+    /// This returns an error in two cases:
+    ///
+    /// 1. When a checksum does not exist, which is the case for FSTs that were
+    ///    produced by the `fst` crate before version `0.4`.
+    /// 2. When the checksum in the FST does not match the computed checksum
+    ///    performed by this procedure.
+    #[inline]
+    pub fn verify(&self) -> Result<()> {
+        use crate::raw::crc32::CheckSummer;
+
+        let expected = match self.as_ref().meta.checksum {
+            None => return Err(Error::ChecksumMissing.into()),
+            Some(expected) => expected,
+        };
+        let mut summer = CheckSummer::new();
+        summer.update(&self.as_bytes()[..self.as_bytes().len() - 4]);
+        let got = summer.masked();
+        if expected == got {
+            return Ok(());
+        }
+        Err(Error::ChecksumMismatch { expected, got }.into())
+    }
+
+    /// Creates a new fst operation with this fst added to it.
+    ///
+    /// The `OpBuilder` type can be used to add additional fst streams
+    /// and perform set operations like union, intersection, difference and
+    /// symmetric difference on the keys of the fst. These set operations also
+    /// allow one to specify how conflicting values are merged in the stream.
+    #[inline]
+    pub fn op(&self) -> OpBuilder<'_> {
+        OpBuilder::new().add(self)
+    }
+
+    /// Returns true if and only if the `self` fst is disjoint with the fst
+    /// `stream`.
+    ///
+    /// `stream` must be a lexicographically ordered sequence of byte strings
+    /// with associated values.
+    #[inline]
+    pub fn is_disjoint<'f, I, S>(&self, stream: I) -> bool
+    where
+        I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], Output)>,
+        S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], Output)>,
+    {
+        self.op().add(stream).intersection().next().is_none()
+    }
+
+    /// Returns true if and only if the `self` fst is a subset of the fst
+    /// `stream`.
+    ///
+    /// `stream` must be a lexicographically ordered sequence of byte strings
+    /// with associated values.
+    #[inline]
+    pub fn is_subset<'f, I, S>(&self, stream: I) -> bool
+    where
+        I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], Output)>,
+        S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], Output)>,
+    {
+        let mut op = self.op().add(stream).intersection();
+        let mut count = 0;
+        while let Some(_) = op.next() {
+            count += 1;
+        }
+        count == self.len()
+    }
+
+    /// Returns true if and only if the `self` fst is a superset of the fst
+    /// `stream`.
+    ///
+    /// `stream` must be a lexicographically ordered sequence of byte strings
+    /// with associated values.
+    #[inline]
+    pub fn is_superset<'f, I, S>(&self, stream: I) -> bool
+    where
+        I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], Output)>,
+        S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], Output)>,
+    {
+        let mut op = self.op().add(stream).union();
+        let mut count = 0;
+        while let Some(_) = op.next() {
+            count += 1;
+        }
+        count == self.len()
+    }
+
+    /// Returns the underlying type of this fst.
+    ///
+    /// FstType is a convention used to indicate the type of the underlying
+    /// transducer.
+    ///
+    /// This crate reserves the range 0-255 (inclusive) but currently leaves
+    /// the meaning of 0-255 unspecified.
+    #[inline]
+    pub fn fst_type(&self) -> FstType {
+        self.as_ref().fst_type()
+    }
+
+    /// Returns the root node of this fst.
+    #[inline]
+    pub fn root(&self) -> Node<'_> {
+        self.as_ref().root()
+    }
+
+    /// Returns the node at the given address.
+    ///
+    /// Node addresses can be obtained by reading transitions on `Node` values.
+    #[inline]
+    pub fn node(&self, addr: CompiledAddr) -> Node<'_> {
+        self.as_ref().node(addr)
+    }
+
+    /// Returns a copy of the binary contents of this FST.
+    #[inline]
+    pub fn to_vec(&self) -> Vec<u8> {
+        self.as_ref().to_vec()
+    }
+
+    /// Returns the binary contents of this FST.
+    #[inline]
+    pub fn as_bytes(&self) -> &[u8] {
+        self.as_ref().as_bytes()
+    }
+
+    #[inline]
+    fn as_ref(&self) -> FstRef {
+        FstRef { meta: &self.meta, data: self.data.as_ref() }
+    }
+}
+
+impl<D> Fst<D> {
+    /// Returns the underlying data which constitutes the FST itself.
+    #[inline]
+    pub fn into_inner(self) -> D {
+        self.data
+    }
+
+    /// Returns a borrow to the underlying data which constitutes the FST itself.
+    #[inline]
+    pub fn as_inner(&self) -> &D {
+        &self.data
+    }
+
+    /// Maps the underlying data of the fst to another data type.
+    #[inline]
+    pub fn map_data<F, T>(self, mut f: F) -> Result<Fst<T>>
+    where
+        F: FnMut(D) -> T,
+        T: AsRef<[u8]>,
+    {
+        Fst::new(f(self.into_inner()))
+    }
+}
+
+impl<'a, 'f, D: AsRef<[u8]>> IntoStreamer<'a> for &'f Fst<D> {
+    type Item = (&'a [u8], Output);
+    type Into = Stream<'f>;
+
+    #[inline]
+    fn into_stream(self) -> Stream<'f> {
+        StreamBuilder::new(self.as_ref(), AlwaysMatch).into_stream()
+    }
+}
+
+struct FstRef<'f> {
+    meta: &'f Meta,
+    data: &'f [u8],
+}
+
+impl<'f> FstRef<'f> {
+    #[inline]
+    fn get(&self, key: &[u8]) -> Option<Output> {
+        let mut node = self.root();
+        let mut out = Output::zero();
+        for &b in key {
+            node = match node.find_input(b) {
+                None => return None,
+                Some(i) => {
+                    let t = node.transition(i);
+                    out = out.cat(t.out);
+                    self.node(t.addr)
+                }
+            }
+        }
+        if !node.is_final() {
+            None
+        } else {
+            Some(out.cat(node.final_output()))
+        }
+    }
+
+    #[inline]
+    fn contains_key(&self, key: &[u8]) -> bool {
+        let mut node = self.root();
+        for &b in key {
+            node = match node.find_input(b) {
+                None => return false,
+                Some(i) => self.node(node.transition_addr(i)),
+            }
+        }
+        node.is_final()
+    }
+
+    #[inline]
+    fn get_key_into(&self, mut value: u64, key: &mut Vec<u8>) -> bool {
+        let mut node = self.root();
+        while value != 0 || !node.is_final() {
+            let trans = node
+                .transitions()
+                .take_while(|t| t.out.value() <= value)
+                .last();
+            node = match trans {
+                None => return false,
+                Some(t) => {
+                    value -= t.out.value();
+                    key.push(t.inp);
+                    self.node(t.addr)
+                }
+            };
+        }
+        true
+    }
+
+    #[inline]
+    fn len(&self) -> usize {
+        self.meta.len
+    }
+
+    #[inline]
+    fn is_empty(&self) -> bool {
+        self.meta.len == 0
+    }
+
+    #[inline]
+    fn size(&self) -> usize {
+        self.as_bytes().len()
+    }
+
+    #[inline]
+    fn fst_type(&self) -> FstType {
+        self.meta.ty
+    }
+
+    #[inline]
+    fn root_addr(&self) -> CompiledAddr {
+        self.meta.root_addr
+    }
+
+    #[inline]
+    fn root(&self) -> Node<'f> {
+        self.node(self.root_addr())
+    }
+
+    #[inline]
+    fn node(&self, addr: CompiledAddr) -> Node<'f> {
+        Node::new(self.meta.version, addr, self.as_bytes())
+    }
+
+    #[inline]
+    fn to_vec(&self) -> Vec<u8> {
+        self.as_bytes().to_vec()
+    }
+
+    #[inline]
+    fn as_bytes(&self) -> &'f [u8] {
+        self.data
+    }
+
+    #[inline]
+    fn empty_final_output(&self) -> Option<Output> {
+        let root = self.root();
+        if root.is_final() {
+            Some(root.final_output())
+        } else {
+            None
+        }
+    }
+}
+
+/// A builder for constructing range queries on streams.
+///
+/// Once all bounds are set, one should call `into_stream` to get a
+/// `Stream`.
+///
+/// Bounds are not additive. That is, if `ge` is called twice on the same
+/// builder, then the second setting wins.
+///
+/// The `A` type parameter corresponds to an optional automaton to filter
+/// the stream. By default, no filtering is done.
+///
+/// The `'f` lifetime parameter refers to the lifetime of the underlying fst.
+pub struct StreamBuilder<'f, A = AlwaysMatch> {
+    fst: FstRef<'f>,
+    aut: A,
+    min: Bound,
+    max: Bound,
+}
+
+impl<'f, A: Automaton> StreamBuilder<'f, A> {
+    fn new(fst: FstRef<'f>, aut: A) -> StreamBuilder<'f, A> {
+        StreamBuilder {
+            fst,
+            aut,
+            min: Bound::Unbounded,
+            max: Bound::Unbounded,
+        }
+    }
+
+    /// Specify a greater-than-or-equal-to bound.
+    pub fn ge<T: AsRef<[u8]>>(mut self, bound: T) -> StreamBuilder<'f, A> {
+        self.min = Bound::Included(bound.as_ref().to_owned());
+        self
+    }
+
+    /// Specify a greater-than bound.
+    pub fn gt<T: AsRef<[u8]>>(mut self, bound: T) -> StreamBuilder<'f, A> {
+        self.min = Bound::Excluded(bound.as_ref().to_owned());
+        self
+    }
+
+    /// Specify a less-than-or-equal-to bound.
+    pub fn le<T: AsRef<[u8]>>(mut self, bound: T) -> StreamBuilder<'f, A> {
+        self.max = Bound::Included(bound.as_ref().to_owned());
+        self
+    }
+
+    /// Specify a less-than bound.
+    pub fn lt<T: AsRef<[u8]>>(mut self, bound: T) -> StreamBuilder<'f, A> {
+        self.max = Bound::Excluded(bound.as_ref().to_owned());
+        self
+    }
+}
+
+impl<'a, 'f, A: Automaton> IntoStreamer<'a> for StreamBuilder<'f, A> {
+    type Item = (&'a [u8], Output);
+    type Into = Stream<'f, A>;
+
+    fn into_stream(self) -> Stream<'f, A> {
+        Stream::new(self.fst, self.aut, self.min, self.max)
+    }
+}
+
+/// A builder for constructing range queries on streams that include automaton
+/// states.
+///
+/// In general, one should use `StreamBuilder` unless you have a specific need
+/// for accessing the states of the underlying automaton that is being used to
+/// filter this stream.
+///
+/// Once all bounds are set, one should call `into_stream` to get a
+/// `Stream`.
+///
+/// Bounds are not additive. That is, if `ge` is called twice on the same
+/// builder, then the second setting wins.
+///
+/// The `A` type parameter corresponds to an optional automaton to filter
+/// the stream. By default, no filtering is done.
+///
+/// The `'f` lifetime parameter refers to the lifetime of the underlying fst.
+pub struct StreamWithStateBuilder<'f, A = AlwaysMatch> {
+    fst: FstRef<'f>,
+    aut: A,
+    min: Bound,
+    max: Bound,
+}
+
+impl<'f, A: Automaton> StreamWithStateBuilder<'f, A> {
+    fn new(fst: FstRef<'f>, aut: A) -> StreamWithStateBuilder<'f, A> {
+        StreamWithStateBuilder {
+            fst,
+            aut,
+            min: Bound::Unbounded,
+            max: Bound::Unbounded,
+        }
+    }
+
+    /// Specify a greater-than-or-equal-to bound.
+    pub fn ge<T: AsRef<[u8]>>(
+        mut self,
+        bound: T,
+    ) -> StreamWithStateBuilder<'f, A> {
+        self.min = Bound::Included(bound.as_ref().to_owned());
+        self
+    }
+
+    /// Specify a greater-than bound.
+    pub fn gt<T: AsRef<[u8]>>(
+        mut self,
+        bound: T,
+    ) -> StreamWithStateBuilder<'f, A> {
+        self.min = Bound::Excluded(bound.as_ref().to_owned());
+        self
+    }
+
+    /// Specify a less-than-or-equal-to bound.
+    pub fn le<T: AsRef<[u8]>>(
+        mut self,
+        bound: T,
+    ) -> StreamWithStateBuilder<'f, A> {
+        self.max = Bound::Included(bound.as_ref().to_owned());
+        self
+    }
+
+    /// Specify a less-than bound.
+    pub fn lt<T: AsRef<[u8]>>(
+        mut self,
+        bound: T,
+    ) -> StreamWithStateBuilder<'f, A> {
+        self.max = Bound::Excluded(bound.as_ref().to_owned());
+        self
+    }
+}
+
+impl<'a, 'f, A: 'a + Automaton> IntoStreamer<'a>
+    for StreamWithStateBuilder<'f, A>
+where
+    A::State: Clone,
+{
+    type Item = (&'a [u8], Output, A::State);
+    type Into = StreamWithState<'f, A>;
+
+    fn into_stream(self) -> StreamWithState<'f, A> {
+        StreamWithState::new(self.fst, self.aut, self.min, self.max)
+    }
+}
+
+#[derive(Debug)]
+enum Bound {
+    Included(Vec<u8>),
+    Excluded(Vec<u8>),
+    Unbounded,
+}
+
+impl Bound {
+    #[inline]
+    fn exceeded_by(&self, inp: &[u8]) -> bool {
+        match *self {
+            Bound::Included(ref v) => inp > v,
+            Bound::Excluded(ref v) => inp >= v,
+            Bound::Unbounded => false,
+        }
+    }
+
+    #[inline]
+    fn is_empty(&self) -> bool {
+        match *self {
+            Bound::Included(ref v) => v.is_empty(),
+            Bound::Excluded(ref v) => v.is_empty(),
+            Bound::Unbounded => true,
+        }
+    }
+
+    #[inline]
+    fn is_inclusive(&self) -> bool {
+        match *self {
+            Bound::Excluded(_) => false,
+            _ => true,
+        }
+    }
+}
+
+/// A lexicographically ordered stream of key-value pairs from an fst.
+///
+/// The `A` type parameter corresponds to an optional automaton to filter
+/// the stream. By default, no filtering is done.
+///
+/// The `'f` lifetime parameter refers to the lifetime of the underlying fst.
+pub struct Stream<'f, A: Automaton = AlwaysMatch>(StreamWithState<'f, A>);
+
+impl<'f, A: Automaton> Stream<'f, A> {
+    fn new(fst: FstRef<'f>, aut: A, min: Bound, max: Bound) -> Stream<'f, A> {
+        Stream(StreamWithState::new(fst, aut, min, max))
+    }
+
+    /// Convert this stream into a vector of byte strings and outputs.
+    ///
+    /// Note that this creates a new allocation for every key in the stream.
+    pub fn into_byte_vec(mut self) -> Vec<(Vec<u8>, u64)> {
+        let mut vs = vec![];
+        while let Some((k, v)) = self.next() {
+            vs.push((k.to_vec(), v.value()));
+        }
+        vs
+    }
+
+    /// Convert this stream into a vector of Unicode strings and outputs.
+    ///
+    /// If any key is not valid UTF-8, then iteration on the stream is stopped
+    /// and a UTF-8 decoding error is returned.
+    ///
+    /// Note that this creates a new allocation for every key in the stream.
+    pub fn into_str_vec(mut self) -> Result<Vec<(String, u64)>> {
+        let mut vs = vec![];
+        while let Some((k, v)) = self.next() {
+            let k = String::from_utf8(k.to_vec()).map_err(Error::from)?;
+            vs.push((k, v.value()));
+        }
+        Ok(vs)
+    }
+
+    /// Convert this stream into a vector of byte strings.
+    ///
+    /// Note that this creates a new allocation for every key in the stream.
+    pub fn into_byte_keys(mut self) -> Vec<Vec<u8>> {
+        let mut vs = vec![];
+        while let Some((k, _)) = self.next() {
+            vs.push(k.to_vec());
+        }
+        vs
+    }
+
+    /// Convert this stream into a vector of Unicode strings.
+    ///
+    /// If any key is not valid UTF-8, then iteration on the stream is stopped
+    /// and a UTF-8 decoding error is returned.
+    ///
+    /// Note that this creates a new allocation for every key in the stream.
+    pub fn into_str_keys(mut self) -> Result<Vec<String>> {
+        let mut vs = vec![];
+        while let Some((k, _)) = self.next() {
+            let k = String::from_utf8(k.to_vec()).map_err(Error::from)?;
+            vs.push(k);
+        }
+        Ok(vs)
+    }
+
+    /// Convert this stream into a vector of outputs.
+    pub fn into_values(mut self) -> Vec<u64> {
+        let mut vs = vec![];
+        while let Some((_, v)) = self.next() {
+            vs.push(v.value());
+        }
+        vs
+    }
+}
+
+impl<'f, 'a, A: Automaton> Streamer<'a> for Stream<'f, A> {
+    type Item = (&'a [u8], Output);
+
+    fn next(&'a mut self) -> Option<(&'a [u8], Output)> {
+        self.0.next_with(|_| ()).map(|(key, out, _)| (key, out))
+    }
+}
+
+/// A lexicographically ordered stream of key-value-state triples from an fst
+/// and an automaton.
+///
+/// The key-values are from the underyling FSTP while the states are from the
+/// automaton.
+///
+/// The `A` type parameter corresponds to an optional automaton to filter
+/// the stream. By default, no filtering is done.
+///
+/// The `'m` lifetime parameter refers to the lifetime of the underlying map.
+pub struct StreamWithState<'f, A = AlwaysMatch>
+where
+    A: Automaton,
+{
+    fst: FstRef<'f>,
+    aut: A,
+    inp: Vec<u8>,
+    empty_output: Option<Output>,
+    stack: Vec<StreamState<'f, A::State>>,
+    end_at: Bound,
+}
+
+#[derive(Clone, Debug)]
+struct StreamState<'f, S> {
+    node: Node<'f>,
+    trans: usize,
+    out: Output,
+    aut_state: S,
+}
+
+impl<'f, A: Automaton> StreamWithState<'f, A> {
+    fn new(
+        fst: FstRef<'f>,
+        aut: A,
+        min: Bound,
+        max: Bound,
+    ) -> StreamWithState<'f, A> {
+        let mut rdr = StreamWithState {
+            fst,
+            aut,
+            inp: Vec::with_capacity(16),
+            empty_output: None,
+            stack: vec![],
+            end_at: max,
+        };
+        rdr.seek_min(min);
+        rdr
+    }
+
+    /// Seeks the underlying stream such that the next key to be read is the
+    /// smallest key in the underlying fst that satisfies the given minimum
+    /// bound.
+    ///
+    /// This theoretically should be straight-forward, but we need to make
+    /// sure our stack is correct, which includes accounting for automaton
+    /// states.
+    fn seek_min(&mut self, min: Bound) {
+        if min.is_empty() {
+            if min.is_inclusive() {
+                self.empty_output = self.fst.empty_final_output();
+            }
+            self.stack = vec![StreamState {
+                node: self.fst.root(),
+                trans: 0,
+                out: Output::zero(),
+                aut_state: self.aut.start(),
+            }];
+            return;
+        }
+        let (key, inclusive) = match min {
+            Bound::Excluded(ref min) => (min, false),
+            Bound::Included(ref min) => (min, true),
+            Bound::Unbounded => unreachable!(),
+        };
+        // At this point, we need to find the starting location of `min` in
+        // the FST. However, as we search, we need to maintain a stack of
+        // reader states so that the reader can pick up where we left off.
+        // N.B. We do not necessarily need to stop in a final state, unlike
+        // the one-off `find` method. For the example, the given bound might
+        // not actually exist in the FST.
+        let mut node = self.fst.root();
+        let mut out = Output::zero();
+        let mut aut_state = self.aut.start();
+        for &b in key {
+            match node.find_input(b) {
+                Some(i) => {
+                    let t = node.transition(i);
+                    let prev_state = aut_state;
+                    aut_state = self.aut.accept(&prev_state, b);
+                    self.inp.push(b);
+                    self.stack.push(StreamState {
+                        node,
+                        trans: i + 1,
+                        out,
+                        aut_state: prev_state,
+                    });
+                    out = out.cat(t.out);
+                    node = self.fst.node(t.addr);
+                }
+                None => {
+                    // This is a little tricky. We're in this case if the
+                    // given bound is not a prefix of any key in the FST.
+                    // Since this is a minimum bound, we need to find the
+                    // first transition in this node that proceeds the current
+                    // input byte.
+                    self.stack.push(StreamState {
+                        node,
+                        trans: node
+                            .transitions()
+                            .position(|t| t.inp > b)
+                            .unwrap_or(node.len()),
+                        out,
+                        aut_state,
+                    });
+                    return;
+                }
+            }
+        }
+        if !self.stack.is_empty() {
+            let last = self.stack.len() - 1;
+            if inclusive {
+                self.stack[last].trans -= 1;
+                self.inp.pop();
+            } else {
+                let node = self.stack[last].node;
+                let trans = self.stack[last].trans;
+                self.stack.push(StreamState {
+                    node: self.fst.node(node.transition(trans - 1).addr),
+                    trans: 0,
+                    out,
+                    aut_state,
+                });
+            }
+        }
+    }
+
+    fn next_with<T>(
+        &mut self,
+        mut map: impl FnMut(&A::State) -> T,
+    ) -> Option<(&[u8], Output, T)> {
+        if let Some(out) = self.empty_output.take() {
+            if self.end_at.exceeded_by(&[]) {
+                self.stack.clear();
+                return None;
+            }
+
+            let start = self.aut.start();
+            if self.aut.is_match(&start) {
+                return Some((&[], out, map(&start)));
+            }
+        }
+        while let Some(state) = self.stack.pop() {
+            if state.trans >= state.node.len()
+                || !self.aut.can_match(&state.aut_state)
+            {
+                if state.node.addr() != self.fst.root_addr() {
+                    self.inp.pop().unwrap();
+                }
+                continue;
+            }
+            let trans = state.node.transition(state.trans);
+            let out = state.out.cat(trans.out);
+            let next_state = self.aut.accept(&state.aut_state, trans.inp);
+            let t = map(&next_state);
+            let mut is_match = self.aut.is_match(&next_state);
+            let next_node = self.fst.node(trans.addr);
+            self.inp.push(trans.inp);
+            if next_node.is_final() {
+                if let Some(eof_state) = self.aut.accept_eof(&next_state) {
+                    is_match = self.aut.is_match(&eof_state);
+                }
+            }
+            self.stack.push(StreamState { trans: state.trans + 1, ..state });
+            self.stack.push(StreamState {
+                node: next_node,
+                trans: 0,
+                out,
+                aut_state: next_state,
+            });
+            if self.end_at.exceeded_by(&self.inp) {
+                // We are done, forever.
+                self.stack.clear();
+                return None;
+            }
+            if next_node.is_final() && is_match {
+                return Some((
+                    &self.inp,
+                    out.cat(next_node.final_output()),
+                    t,
+                ));
+            }
+        }
+        None
+    }
+}
+
+impl<'a, 'f, A: 'a + Automaton> Streamer<'a> for StreamWithState<'f, A>
+where
+    A::State: Clone,
+{
+    type Item = (&'a [u8], Output, A::State);
+
+    fn next(&'a mut self) -> Option<(&'a [u8], Output, A::State)> {
+        self.next_with(|state| state.clone())
+    }
+}
+
+/// An output is a value that is associated with a key in a finite state
+/// transducer.
+///
+/// Note that outputs must satisfy an algebra. Namely, it must have an additive
+/// identity and the following binary operations defined: `prefix`,
+/// `concatenation` and `subtraction`. `prefix` and `concatenation` are
+/// commutative while `subtraction` is not. `subtraction` is only defined on
+/// pairs of operands where the first operand is greater than or equal to the
+/// second operand.
+///
+/// Currently, output values must be `u64`. However, in theory, an output value
+/// can be anything that satisfies the above algebra. Future versions of this
+/// crate may make outputs generic on this algebra.
+#[derive(Copy, Clone, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)]
+pub struct Output(u64);
+
+impl Output {
+    /// Create a new output from a `u64`.
+    #[inline]
+    pub fn new(v: u64) -> Output {
+        Output(v)
+    }
+
+    /// Create a zero output.
+    #[inline]
+    pub fn zero() -> Output {
+        Output(0)
+    }
+
+    /// Retrieve the value inside this output.
+    #[inline]
+    pub fn value(self) -> u64 {
+        self.0
+    }
+
+    /// Returns true if this is a zero output.
+    #[inline]
+    pub fn is_zero(self) -> bool {
+        self.0 == 0
+    }
+
+    /// Returns the prefix of this output and `o`.
+    #[inline]
+    pub fn prefix(self, o: Output) -> Output {
+        Output(cmp::min(self.0, o.0))
+    }
+
+    /// Returns the concatenation of this output and `o`.
+    #[inline]
+    pub fn cat(self, o: Output) -> Output {
+        Output(self.0 + o.0)
+    }
+
+    /// Returns the subtraction of `o` from this output.
+    ///
+    /// This function panics if `self < o`.
+    #[inline]
+    pub fn sub(self, o: Output) -> Output {
+        Output(
+            self.0
+                .checked_sub(o.0)
+                .expect("BUG: underflow subtraction not allowed"),
+        )
+    }
+}
+
+/// A transition from one note to another.
+#[derive(Copy, Clone, Hash, Eq, PartialEq)]
+pub struct Transition {
+    /// The byte input associated with this transition.
+    pub inp: u8,
+    /// The output associated with this transition.
+    pub out: Output,
+    /// The address of the node that this transition points to.
+    pub addr: CompiledAddr,
+}
+
+impl Default for Transition {
+    #[inline]
+    fn default() -> Transition {
+        Transition { inp: 0, out: Output::zero(), addr: NONE_ADDRESS }
+    }
+}
+
+impl fmt::Debug for Transition {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        if self.out.is_zero() {
+            write!(f, "{} -> {}", self.inp as char, self.addr)
+        } else {
+            write!(
+                f,
+                "({}, {}) -> {}",
+                self.inp as char,
+                self.out.value(),
+                self.addr
+            )
+        }
+    }
+}
+
+#[inline]
+#[cfg(target_pointer_width = "64")]
+fn u64_to_usize(n: u64) -> usize {
+    n as usize
+}
+
+#[inline]
+#[cfg(not(target_pointer_width = "64"))]
+fn u64_to_usize(n: u64) -> usize {
+    if n > std::usize::MAX as u64 {
+        panic!(
+            "\
+Cannot convert node address {} to a pointer sized variable. If this FST
+is very large and was generated on a system with a larger pointer size
+than this system, then it is not possible to read this FST on this
+system.",
+            n
+        );
+    }
+    n as usize
+}
diff --git a/vendor/fst/src/raw/node.rs b/vendor/fst/src/raw/node.rs
new file mode 100644
index 000000000..820b29eac
--- /dev/null
+++ b/vendor/fst/src/raw/node.rs
@@ -0,0 +1,1012 @@
+use std::cmp;
+use std::fmt;
+use std::io;
+use std::ops::Range;
+
+use crate::bytes;
+use crate::raw::build::BuilderNode;
+use crate::raw::common_inputs::{COMMON_INPUTS, COMMON_INPUTS_INV};
+use crate::raw::{
+    u64_to_usize, CompiledAddr, Output, Transition, EMPTY_ADDRESS,
+};
+
+/// The threshold (in number of transitions) at which an index is created for
+/// a node's transitions. This speeds up lookup time at the expense of FST
+/// size.
+const TRANS_INDEX_THRESHOLD: usize = 32;
+
+/// Node represents a single state in a finite state transducer.
+///
+/// Nodes are very cheap to construct. Notably, they satisfy the `Copy` trait.
+#[derive(Clone, Copy)]
+pub struct Node<'f> {
+    data: &'f [u8],
+    version: u64,
+    state: State,
+    start: CompiledAddr,
+    end: usize,
+    is_final: bool,
+    ntrans: usize,
+    sizes: PackSizes,
+    final_output: Output,
+}
+
+impl<'f> fmt::Debug for Node<'f> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        writeln!(f, "NODE@{}", self.start)?;
+        writeln!(f, "  end_addr: {}", self.end)?;
+        writeln!(f, "  size: {} bytes", self.as_slice().len())?;
+        writeln!(f, "  state: {:?}", self.state)?;
+        writeln!(f, "  is_final: {}", self.is_final())?;
+        writeln!(f, "  final_output: {:?}", self.final_output())?;
+        writeln!(f, "  # transitions: {}", self.len())?;
+        writeln!(f, "  transitions:")?;
+        for t in self.transitions() {
+            writeln!(f, "    {:?}", t)?;
+        }
+        Ok(())
+    }
+}
+
+impl<'f> Node<'f> {
+    /// Creates a new note at the address given.
+    ///
+    /// `data` should be a slice to an entire FST.
+    pub(crate) fn new(
+        version: u64,
+        addr: CompiledAddr,
+        data: &[u8],
+    ) -> Node<'_> {
+        let state = State::new(data, addr);
+        match state {
+            State::EmptyFinal => Node {
+                data: &[],
+                version,
+                state: State::EmptyFinal,
+                start: EMPTY_ADDRESS,
+                end: EMPTY_ADDRESS,
+                is_final: true,
+                ntrans: 0,
+                sizes: PackSizes::new(),
+                final_output: Output::zero(),
+            },
+            State::OneTransNext(s) => {
+                let data = &data[..addr + 1];
+                Node {
+                    data,
+                    version,
+                    state,
+                    start: addr,
+                    end: s.end_addr(data),
+                    is_final: false,
+                    sizes: PackSizes::new(),
+                    ntrans: 1,
+                    final_output: Output::zero(),
+                }
+            }
+            State::OneTrans(s) => {
+                let data = &data[..addr + 1];
+                let sizes = s.sizes(data);
+                Node {
+                    data,
+                    version,
+                    state,
+                    start: addr,
+                    end: s.end_addr(data, sizes),
+                    is_final: false,
+                    ntrans: 1,
+                    sizes,
+                    final_output: Output::zero(),
+                }
+            }
+            State::AnyTrans(s) => {
+                let data = &data[..addr + 1];
+                let sizes = s.sizes(data);
+                let ntrans = s.ntrans(data);
+                Node {
+                    data,
+                    version,
+                    state,
+                    start: addr,
+                    end: s.end_addr(version, data, sizes, ntrans),
+                    is_final: s.is_final_state(),
+                    ntrans,
+                    sizes,
+                    final_output: s.final_output(version, data, sizes, ntrans),
+                }
+            }
+        }
+    }
+    /// Returns an iterator over all transitions in this node in lexicographic
+    /// order.
+    #[inline]
+    pub fn transitions<'n>(&'n self) -> Transitions<'f, 'n> {
+        Transitions { node: self, range: 0..self.len() }
+    }
+
+    /// Returns the transition at index `i`.
+    #[inline(always)]
+    pub fn transition(&self, i: usize) -> Transition {
+        // The `inline(always)` annotation on this function appears to
+        // dramatically speed up FST traversal. In particular, measuring the
+        // time it takes to run `fst range something-big.fst` shows almost a 2x
+        // improvement with this single `inline(always)` annotation.
+
+        match self.state {
+            State::OneTransNext(s) => {
+                assert!(i == 0);
+                Transition {
+                    inp: s.input(self),
+                    out: Output::zero(),
+                    addr: s.trans_addr(self),
+                }
+            }
+            State::OneTrans(s) => {
+                assert!(i == 0);
+                Transition {
+                    inp: s.input(self),
+                    out: s.output(self),
+                    addr: s.trans_addr(self),
+                }
+            }
+            State::AnyTrans(s) => Transition {
+                inp: s.input(self, i),
+                out: s.output(self, i),
+                addr: s.trans_addr(self, i),
+            },
+            State::EmptyFinal => panic!("out of bounds"),
+        }
+    }
+
+    /// Returns the transition address of the `i`th transition.
+    #[inline]
+    pub fn transition_addr(&self, i: usize) -> CompiledAddr {
+        match self.state {
+            State::OneTransNext(s) => {
+                assert!(i == 0);
+                s.trans_addr(self)
+            }
+            State::OneTrans(s) => {
+                assert!(i == 0);
+                s.trans_addr(self)
+            }
+            State::AnyTrans(s) => s.trans_addr(self, i),
+            State::EmptyFinal => panic!("out of bounds"),
+        }
+    }
+
+    /// Finds the `i`th transition corresponding to the given input byte.
+    ///
+    /// If no transition for this byte exists, then `None` is returned.
+    #[inline]
+    pub fn find_input(&self, b: u8) -> Option<usize> {
+        match self.state {
+            State::OneTransNext(s) if s.input(self) == b => Some(0),
+            State::OneTransNext(_) => None,
+            State::OneTrans(s) if s.input(self) == b => Some(0),
+            State::OneTrans(_) => None,
+            State::AnyTrans(s) => s.find_input(self, b),
+            State::EmptyFinal => None,
+        }
+    }
+
+    /// If this node is final and has a terminal output value, then it is
+    /// returned. Otherwise, a zero output is returned.
+    #[inline]
+    pub fn final_output(&self) -> Output {
+        self.final_output
+    }
+
+    /// Returns true if and only if this node corresponds to a final or "match"
+    /// state in the finite state transducer.
+    #[inline]
+    pub fn is_final(&self) -> bool {
+        self.is_final
+    }
+
+    /// Returns the number of transitions in this node.
+    ///
+    /// The maximum number of transitions is 256.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.ntrans
+    }
+
+    /// Returns true if and only if this node has zero transitions.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.ntrans == 0
+    }
+
+    /// Return the address of this node.
+    #[inline]
+    pub fn addr(&self) -> CompiledAddr {
+        self.start
+    }
+
+    #[doc(hidden)]
+    #[inline]
+    pub fn as_slice(&self) -> &[u8] {
+        &self.data[self.end..]
+    }
+
+    #[doc(hidden)]
+    #[inline]
+    pub fn state(&self) -> &'static str {
+        match self.state {
+            State::OneTransNext(_) => "OTN",
+            State::OneTrans(_) => "OT",
+            State::AnyTrans(_) => "AT",
+            State::EmptyFinal => "EF",
+        }
+    }
+
+    fn compile<W: io::Write>(
+        wtr: W,
+        last_addr: CompiledAddr,
+        addr: CompiledAddr,
+        node: &BuilderNode,
+    ) -> io::Result<()> {
+        assert!(node.trans.len() <= 256);
+        if node.trans.is_empty()
+            && node.is_final
+            && node.final_output.is_zero()
+        {
+            return Ok(());
+        } else if node.trans.len() != 1 || node.is_final {
+            StateAnyTrans::compile(wtr, addr, node)
+        } else {
+            if node.trans[0].addr == last_addr && node.trans[0].out.is_zero() {
+                StateOneTransNext::compile(wtr, addr, node.trans[0].inp)
+            } else {
+                StateOneTrans::compile(wtr, addr, node.trans[0])
+            }
+        }
+    }
+}
+
+impl BuilderNode {
+    pub fn compile_to<W: io::Write>(
+        &self,
+        wtr: W,
+        last_addr: CompiledAddr,
+        addr: CompiledAddr,
+    ) -> io::Result<()> {
+        Node::compile(wtr, last_addr, addr, self)
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+enum State {
+    OneTransNext(StateOneTransNext),
+    OneTrans(StateOneTrans),
+    AnyTrans(StateAnyTrans),
+    EmptyFinal,
+}
+
+// one trans flag (1), next flag (1), common input
+#[derive(Clone, Copy, Debug)]
+struct StateOneTransNext(u8);
+// one trans flag (1), next flag (0), common input
+#[derive(Clone, Copy, Debug)]
+struct StateOneTrans(u8);
+// one trans flag (0), final flag, # transitions
+#[derive(Clone, Copy, Debug)]
+struct StateAnyTrans(u8);
+
+impl State {
+    fn new(data: &[u8], addr: CompiledAddr) -> State {
+        if addr == EMPTY_ADDRESS {
+            return State::EmptyFinal;
+        }
+        let v = data[addr];
+        match (v & 0b11_000000) >> 6 {
+            0b11 => State::OneTransNext(StateOneTransNext(v)),
+            0b10 => State::OneTrans(StateOneTrans(v)),
+            _ => State::AnyTrans(StateAnyTrans(v)),
+        }
+    }
+}
+
+impl StateOneTransNext {
+    fn compile<W: io::Write>(
+        mut wtr: W,
+        _: CompiledAddr,
+        input: u8,
+    ) -> io::Result<()> {
+        let mut state = StateOneTransNext::new();
+        state.set_common_input(input);
+        if state.common_input().is_none() {
+            wtr.write_all(&[input])?;
+        }
+        wtr.write_all(&[state.0])?;
+        Ok(())
+    }
+
+    #[inline]
+    fn new() -> StateOneTransNext {
+        StateOneTransNext(0b11_000000)
+    }
+
+    #[inline]
+    fn set_common_input(&mut self, input: u8) {
+        self.0 = (self.0 & 0b11_000000) | common_idx(input, 0b111111);
+    }
+
+    #[inline]
+    fn common_input(&self) -> Option<u8> {
+        common_input(self.0 & 0b00_111111)
+    }
+
+    #[inline]
+    fn input_len(&self) -> usize {
+        if self.common_input().is_none() {
+            1
+        } else {
+            0
+        }
+    }
+
+    #[inline]
+    fn end_addr(&self, data: &[u8]) -> usize {
+        data.len() - 1 - self.input_len()
+    }
+
+    #[inline]
+    fn input(&self, node: &Node<'_>) -> u8 {
+        if let Some(inp) = self.common_input() {
+            inp
+        } else {
+            node.data[node.start - 1]
+        }
+    }
+
+    #[inline]
+    fn trans_addr(&self, node: &Node<'_>) -> CompiledAddr {
+        node.end as CompiledAddr - 1
+    }
+}
+
+impl StateOneTrans {
+    fn compile<W: io::Write>(
+        mut wtr: W,
+        addr: CompiledAddr,
+        trans: Transition,
+    ) -> io::Result<()> {
+        let out = trans.out.value();
+        let output_pack_size =
+            if out == 0 { 0 } else { bytes::pack_uint(&mut wtr, out)? };
+        let trans_pack_size = pack_delta(&mut wtr, addr, trans.addr)?;
+
+        let mut pack_sizes = PackSizes::new();
+        pack_sizes.set_output_pack_size(output_pack_size);
+        pack_sizes.set_transition_pack_size(trans_pack_size);
+        wtr.write_all(&[pack_sizes.encode()])?;
+
+        let mut state = StateOneTrans::new();
+        state.set_common_input(trans.inp);
+        if state.common_input().is_none() {
+            wtr.write_all(&[trans.inp])?;
+        }
+        wtr.write_all(&[state.0])?;
+        Ok(())
+    }
+
+    #[inline]
+    fn new() -> StateOneTrans {
+        StateOneTrans(0b10_000000)
+    }
+
+    #[inline]
+    fn set_common_input(&mut self, input: u8) {
+        self.0 = (self.0 & 0b10_000000) | common_idx(input, 0b111111);
+    }
+
+    #[inline]
+    fn common_input(&self) -> Option<u8> {
+        common_input(self.0 & 0b00_111111)
+    }
+
+    #[inline]
+    fn input_len(&self) -> usize {
+        if self.common_input().is_none() {
+            1
+        } else {
+            0
+        }
+    }
+
+    #[inline]
+    fn sizes(&self, data: &[u8]) -> PackSizes {
+        let i = data.len() - 1 - self.input_len() - 1;
+        PackSizes::decode(data[i])
+    }
+
+    #[inline]
+    fn end_addr(&self, data: &[u8], sizes: PackSizes) -> usize {
+        data.len() - 1
+        - self.input_len()
+        - 1 // pack size
+        - sizes.transition_pack_size()
+        - sizes.output_pack_size()
+    }
+
+    #[inline]
+    fn input(&self, node: &Node<'_>) -> u8 {
+        if let Some(inp) = self.common_input() {
+            inp
+        } else {
+            node.data[node.start - 1]
+        }
+    }
+
+    #[inline]
+    fn output(&self, node: &Node<'_>) -> Output {
+        let osize = node.sizes.output_pack_size();
+        if osize == 0 {
+            return Output::zero();
+        }
+        let tsize = node.sizes.transition_pack_size();
+        let i = node.start
+                - self.input_len()
+                - 1 // pack size
+                - tsize - osize;
+        Output::new(bytes::unpack_uint(&node.data[i..], osize as u8))
+    }
+
+    #[inline]
+    fn trans_addr(&self, node: &Node<'_>) -> CompiledAddr {
+        let tsize = node.sizes.transition_pack_size();
+        let i = node.start
+                - self.input_len()
+                - 1 // pack size
+                - tsize;
+        unpack_delta(&node.data[i..], tsize, node.end)
+    }
+}
+
+impl StateAnyTrans {
+    fn compile<W: io::Write>(
+        mut wtr: W,
+        addr: CompiledAddr,
+        node: &BuilderNode,
+    ) -> io::Result<()> {
+        assert!(node.trans.len() <= 256);
+
+        let mut tsize = 0;
+        let mut osize = bytes::pack_size(node.final_output.value());
+        let mut any_outs = !node.final_output.is_zero();
+        for t in &node.trans {
+            tsize = cmp::max(tsize, pack_delta_size(addr, t.addr));
+            osize = cmp::max(osize, bytes::pack_size(t.out.value()));
+            any_outs = any_outs || !t.out.is_zero();
+        }
+
+        let mut pack_sizes = PackSizes::new();
+        if any_outs {
+            pack_sizes.set_output_pack_size(osize);
+        } else {
+            pack_sizes.set_output_pack_size(0);
+        }
+        pack_sizes.set_transition_pack_size(tsize);
+
+        let mut state = StateAnyTrans::new();
+        state.set_final_state(node.is_final);
+        state.set_state_ntrans(node.trans.len() as u8);
+
+        if any_outs {
+            if node.is_final {
+                bytes::pack_uint_in(
+                    &mut wtr,
+                    node.final_output.value(),
+                    osize,
+                )?;
+            }
+            for t in node.trans.iter().rev() {
+                bytes::pack_uint_in(&mut wtr, t.out.value(), osize)?;
+            }
+        }
+        for t in node.trans.iter().rev() {
+            pack_delta_in(&mut wtr, addr, t.addr, tsize)?;
+        }
+        for t in node.trans.iter().rev() {
+            wtr.write_all(&[t.inp])?;
+        }
+        if node.trans.len() > TRANS_INDEX_THRESHOLD {
+            // A value of 255 indicates that no transition exists for the byte
+            // at that index. (Except when there are 256 transitions.) Namely,
+            // any value greater than or equal to the number of transitions in
+            // this node indicates an absent transition.
+            let mut index = [255u8; 256];
+            for (i, t) in node.trans.iter().enumerate() {
+                index[t.inp as usize] = i as u8;
+            }
+            wtr.write_all(&index)?;
+        }
+
+        wtr.write_all(&[pack_sizes.encode()])?;
+        if state.state_ntrans().is_none() {
+            if node.trans.len() == 256 {
+                // 256 can't be represented in a u8, so we abuse the fact that
+                // the # of transitions can never be 1 here, since 1 is always
+                // encoded in the state byte.
+                wtr.write_all(&[1])?;
+            } else {
+                wtr.write_all(&[node.trans.len() as u8])?;
+            }
+        }
+        wtr.write_all(&[state.0])?;
+        Ok(())
+    }
+
+    #[inline]
+    fn new() -> StateAnyTrans {
+        StateAnyTrans(0b00_000000)
+    }
+
+    #[inline]
+    fn set_final_state(&mut self, yes: bool) {
+        if yes {
+            self.0 |= 0b01_000000;
+        }
+    }
+
+    #[inline]
+    fn is_final_state(&self) -> bool {
+        self.0 & 0b01_000000 == 0b01_000000
+    }
+
+    #[inline]
+    fn set_state_ntrans(&mut self, n: u8) {
+        if n <= 0b00_111111 {
+            self.0 = (self.0 & 0b11_000000) | n;
+        }
+    }
+
+    #[inline]
+    fn state_ntrans(&self) -> Option<u8> {
+        let n = self.0 & 0b00_111111;
+        if n == 0 {
+            None
+        } else {
+            Some(n)
+        }
+    }
+
+    #[inline]
+    fn sizes(&self, data: &[u8]) -> PackSizes {
+        let i = data.len() - 1 - self.ntrans_len() - 1;
+        PackSizes::decode(data[i])
+    }
+
+    #[inline]
+    fn total_trans_size(
+        &self,
+        version: u64,
+        sizes: PackSizes,
+        ntrans: usize,
+    ) -> usize {
+        let index_size = self.trans_index_size(version, ntrans);
+        ntrans + (ntrans * sizes.transition_pack_size()) + index_size
+    }
+
+    #[inline]
+    fn trans_index_size(&self, version: u64, ntrans: usize) -> usize {
+        if version >= 2 && ntrans > TRANS_INDEX_THRESHOLD {
+            256
+        } else {
+            0
+        }
+    }
+
+    #[inline]
+    fn ntrans_len(&self) -> usize {
+        if self.state_ntrans().is_none() {
+            1
+        } else {
+            0
+        }
+    }
+
+    #[inline]
+    fn ntrans(&self, data: &[u8]) -> usize {
+        if let Some(n) = self.state_ntrans() {
+            n as usize
+        } else {
+            let n = data[data.len() - 2] as usize;
+            if n == 1 {
+                // "1" is never a normal legal value here, because if there
+                // is only 1 transition, then it is encoded in the state byte.
+                256
+            } else {
+                n
+            }
+        }
+    }
+
+    #[inline]
+    fn final_output(
+        &self,
+        version: u64,
+        data: &[u8],
+        sizes: PackSizes,
+        ntrans: usize,
+    ) -> Output {
+        let osize = sizes.output_pack_size();
+        if osize == 0 || !self.is_final_state() {
+            return Output::zero();
+        }
+        let at = data.len() - 1
+                 - self.ntrans_len()
+                 - 1 // pack size
+                 - self.total_trans_size(version, sizes, ntrans)
+                 - (ntrans * osize) // output values
+                 - osize; // the desired output value
+        Output::new(bytes::unpack_uint(&data[at..], osize as u8))
+    }
+
+    #[inline]
+    fn end_addr(
+        &self,
+        version: u64,
+        data: &[u8],
+        sizes: PackSizes,
+        ntrans: usize,
+    ) -> usize {
+        let osize = sizes.output_pack_size();
+        let final_osize = if !self.is_final_state() { 0 } else { osize };
+        data.len() - 1
+        - self.ntrans_len()
+        - 1 // pack size
+        - self.total_trans_size(version, sizes, ntrans)
+        - (ntrans * osize) // output values
+        - final_osize // final output
+    }
+
+    #[inline]
+    fn trans_addr(&self, node: &Node<'_>, i: usize) -> CompiledAddr {
+        assert!(i < node.ntrans);
+        let tsize = node.sizes.transition_pack_size();
+        let at = node.start
+                 - self.ntrans_len()
+                 - 1 // pack size
+                 - self.trans_index_size(node.version, node.ntrans)
+                 - node.ntrans // inputs
+                 - (i * tsize) // the previous transition addresses
+                 - tsize; // the desired transition address
+        unpack_delta(&node.data[at..], tsize, node.end)
+    }
+
+    #[inline]
+    fn input(&self, node: &Node<'_>, i: usize) -> u8 {
+        let at = node.start
+                 - self.ntrans_len()
+                 - 1 // pack size
+                 - self.trans_index_size(node.version, node.ntrans)
+                 - i
+                 - 1; // the input byte
+        node.data[at]
+    }
+
+    #[inline]
+    fn find_input(&self, node: &Node<'_>, b: u8) -> Option<usize> {
+        if node.version >= 2 && node.ntrans > TRANS_INDEX_THRESHOLD {
+            let start = node.start
+                        - self.ntrans_len()
+                        - 1 // pack size
+                        - self.trans_index_size(node.version, node.ntrans);
+            let i = node.data[start + b as usize] as usize;
+            if i >= node.ntrans {
+                None
+            } else {
+                Some(i)
+            }
+        } else {
+            let start = node.start
+                        - self.ntrans_len()
+                        - 1 // pack size
+                        - node.ntrans; // inputs
+            let end = start + node.ntrans;
+            let inputs = &node.data[start..end];
+            inputs.iter().position(|&b2| b == b2).map(|i| node.ntrans - i - 1)
+        }
+    }
+
+    #[inline]
+    fn output(&self, node: &Node<'_>, i: usize) -> Output {
+        let osize = node.sizes.output_pack_size();
+        if osize == 0 {
+            return Output::zero();
+        }
+        let at = node.start
+                 - self.ntrans_len()
+                 - 1 // pack size
+                 - self.total_trans_size(node.version, node.sizes, node.ntrans)
+                 - (i * osize) // the previous outputs
+                 - osize; // the desired output value
+        Output::new(bytes::unpack_uint(&node.data[at..], osize as u8))
+    }
+}
+
+// high 4 bits is transition address packed size.
+// low 4 bits is output value packed size.
+//
+// `0` is a legal value which means there are no transitions/outputs.
+#[derive(Clone, Copy, Debug)]
+struct PackSizes(u8);
+
+impl PackSizes {
+    #[inline]
+    fn new() -> PackSizes {
+        PackSizes(0)
+    }
+
+    #[inline]
+    fn decode(v: u8) -> PackSizes {
+        PackSizes(v)
+    }
+
+    #[inline]
+    fn encode(&self) -> u8 {
+        self.0
+    }
+
+    #[inline]
+    fn set_transition_pack_size(&mut self, size: u8) {
+        assert!(size <= 8);
+        self.0 = (self.0 & 0b0000_1111) | (size << 4);
+    }
+
+    #[inline]
+    fn transition_pack_size(&self) -> usize {
+        ((self.0 & 0b1111_0000) >> 4) as usize
+    }
+
+    #[inline]
+    fn set_output_pack_size(&mut self, size: u8) {
+        assert!(size <= 8);
+        self.0 = (self.0 & 0b1111_0000) | size;
+    }
+
+    #[inline]
+    fn output_pack_size(&self) -> usize {
+        (self.0 & 0b0000_1111) as usize
+    }
+}
+
+/// An iterator over all transitions in a node.
+///
+/// `'f` is the lifetime of the underlying fst and `'n` is the lifetime of
+/// the underlying `Node`.
+pub struct Transitions<'f, 'n> {
+    node: &'n Node<'f>,
+    range: Range<usize>,
+}
+
+impl<'f, 'n> Iterator for Transitions<'f, 'n> {
+    type Item = Transition;
+
+    #[inline]
+    fn next(&mut self) -> Option<Transition> {
+        self.range.next().map(|i| self.node.transition(i))
+    }
+}
+
+/// common_idx translate a byte to an index in the COMMON_INPUTS_INV array.
+///
+/// I wonder if it would be prudent to store this mapping in the FST itself.
+/// The advantage of doing so would mean that common inputs would reflect the
+/// specific data in the FST. The problem of course is that this table has to
+/// be computed up front, which is pretty much at odds with the streaming
+/// nature of the builder.
+///
+/// Nevertheless, the *caller* may have a priori knowledge that could be
+/// supplied to the builder manually, which could then be embedded in the FST.
+#[inline]
+fn common_idx(input: u8, max: u8) -> u8 {
+    let val = ((COMMON_INPUTS[input as usize] as u32 + 1) % 256) as u8;
+    if val > max {
+        0
+    } else {
+        val
+    }
+}
+
+/// common_input translates a common input index stored in a serialized FST
+/// to the corresponding byte.
+#[inline]
+fn common_input(idx: u8) -> Option<u8> {
+    if idx == 0 {
+        None
+    } else {
+        Some(COMMON_INPUTS_INV[(idx - 1) as usize])
+    }
+}
+
+#[inline]
+fn pack_delta<W: io::Write>(
+    wtr: W,
+    node_addr: CompiledAddr,
+    trans_addr: CompiledAddr,
+) -> io::Result<u8> {
+    let nbytes = pack_delta_size(node_addr, trans_addr);
+    pack_delta_in(wtr, node_addr, trans_addr, nbytes)?;
+    Ok(nbytes)
+}
+
+#[inline]
+fn pack_delta_in<W: io::Write>(
+    wtr: W,
+    node_addr: CompiledAddr,
+    trans_addr: CompiledAddr,
+    nbytes: u8,
+) -> io::Result<()> {
+    let delta_addr = if trans_addr == EMPTY_ADDRESS {
+        EMPTY_ADDRESS
+    } else {
+        node_addr - trans_addr
+    };
+    bytes::pack_uint_in(wtr, delta_addr as u64, nbytes)
+}
+
+#[inline]
+fn pack_delta_size(node_addr: CompiledAddr, trans_addr: CompiledAddr) -> u8 {
+    let delta_addr = if trans_addr == EMPTY_ADDRESS {
+        EMPTY_ADDRESS
+    } else {
+        node_addr - trans_addr
+    };
+    bytes::pack_size(delta_addr as u64)
+}
+
+#[inline]
+fn unpack_delta(
+    slice: &[u8],
+    trans_pack_size: usize,
+    node_addr: usize,
+) -> CompiledAddr {
+    let delta = bytes::unpack_uint(slice, trans_pack_size as u8);
+    let delta_addr = u64_to_usize(delta);
+    if delta_addr == EMPTY_ADDRESS {
+        EMPTY_ADDRESS
+    } else {
+        node_addr - delta_addr
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use quickcheck::{quickcheck, TestResult};
+
+    use crate::raw::build::BuilderNode;
+    use crate::raw::node::Node;
+    use crate::raw::{Builder, CompiledAddr, Output, Transition, VERSION};
+    use crate::stream::Streamer;
+
+    const NEVER_LAST: CompiledAddr = std::u64::MAX as CompiledAddr;
+
+    #[test]
+    fn prop_emits_inputs() {
+        fn p(mut bs: Vec<Vec<u8>>) -> TestResult {
+            bs.sort();
+            bs.dedup();
+
+            let mut bfst = Builder::memory();
+            for word in &bs {
+                bfst.add(word).unwrap();
+            }
+            let fst = bfst.into_fst();
+            let mut rdr = fst.stream();
+            let mut words = vec![];
+            while let Some(w) = rdr.next() {
+                words.push(w.0.to_owned());
+            }
+            TestResult::from_bool(bs == words)
+        }
+        quickcheck(p as fn(Vec<Vec<u8>>) -> TestResult)
+    }
+
+    fn nodes_equal(compiled: &Node, uncompiled: &BuilderNode) -> bool {
+        println!("{:?}", compiled);
+        assert_eq!(compiled.is_final(), uncompiled.is_final);
+        assert_eq!(compiled.len(), uncompiled.trans.len());
+        assert_eq!(compiled.final_output(), uncompiled.final_output);
+        for (ct, ut) in
+            compiled.transitions().zip(uncompiled.trans.iter().cloned())
+        {
+            assert_eq!(ct.inp, ut.inp);
+            assert_eq!(ct.out, ut.out);
+            assert_eq!(ct.addr, ut.addr);
+        }
+        true
+    }
+
+    fn compile(node: &BuilderNode) -> (CompiledAddr, Vec<u8>) {
+        let mut buf = vec![0; 24];
+        node.compile_to(&mut buf, NEVER_LAST, 24).unwrap();
+        (buf.len() as CompiledAddr - 1, buf)
+    }
+
+    fn roundtrip(bnode: &BuilderNode) -> bool {
+        let (addr, bytes) = compile(bnode);
+        let node = Node::new(VERSION, addr, &bytes);
+        nodes_equal(&node, &bnode)
+    }
+
+    fn trans(addr: CompiledAddr, inp: u8) -> Transition {
+        Transition { inp, out: Output::zero(), addr }
+    }
+
+    #[test]
+    fn bin_no_trans() {
+        let bnode = BuilderNode {
+            is_final: false,
+            final_output: Output::zero(),
+            trans: vec![],
+        };
+        let (addr, buf) = compile(&bnode);
+        let node = Node::new(VERSION, addr, &buf);
+        assert_eq!(node.as_slice().len(), 3);
+        roundtrip(&bnode);
+    }
+
+    #[test]
+    fn bin_one_trans_common() {
+        let bnode = BuilderNode {
+            is_final: false,
+            final_output: Output::zero(),
+            trans: vec![trans(20, b'a')],
+        };
+        let (addr, buf) = compile(&bnode);
+        let node = Node::new(VERSION, addr, &buf);
+        assert_eq!(node.as_slice().len(), 3);
+        roundtrip(&bnode);
+    }
+
+    #[test]
+    fn bin_one_trans_not_common() {
+        let bnode = BuilderNode {
+            is_final: false,
+            final_output: Output::zero(),
+            trans: vec![trans(2, b'\xff')],
+        };
+        let (addr, buf) = compile(&bnode);
+        let node = Node::new(VERSION, addr, &buf);
+        assert_eq!(node.as_slice().len(), 4);
+        roundtrip(&bnode);
+    }
+
+    #[test]
+    fn bin_many_trans() {
+        let bnode = BuilderNode {
+            is_final: false,
+            final_output: Output::zero(),
+            trans: vec![
+                trans(2, b'a'),
+                trans(3, b'b'),
+                trans(4, b'c'),
+                trans(5, b'd'),
+                trans(6, b'e'),
+                trans(7, b'f'),
+            ],
+        };
+        let (addr, buf) = compile(&bnode);
+        let node = Node::new(VERSION, addr, &buf);
+        assert_eq!(node.as_slice().len(), 14);
+        roundtrip(&bnode);
+    }
+
+    #[test]
+    fn node_max_trans() {
+        let bnode = BuilderNode {
+            is_final: false,
+            final_output: Output::zero(),
+            trans: (0..256).map(|i| trans(0, i as u8)).collect(),
+        };
+        let (addr, buf) = compile(&bnode);
+        let node = Node::new(VERSION, addr, &buf);
+        assert_eq!(node.transitions().count(), 256);
+        assert_eq!(node.len(), node.transitions().count());
+        roundtrip(&bnode);
+    }
+}
diff --git a/vendor/fst/src/raw/ops.rs b/vendor/fst/src/raw/ops.rs
new file mode 100644
index 000000000..9baf85540
--- /dev/null
+++ b/vendor/fst/src/raw/ops.rs
@@ -0,0 +1,643 @@
+use std::cmp;
+use std::collections::BinaryHeap;
+use std::iter::FromIterator;
+
+use crate::raw::Output;
+use crate::stream::{IntoStreamer, Streamer};
+
+/// Permits stream operations to be hetergeneous with respect to streams.
+type BoxedStream<'f> =
+    Box<dyn for<'a> Streamer<'a, Item = (&'a [u8], Output)> + 'f>;
+
+/// A value indexed by a stream.
+///
+/// Indexed values are used to indicate the presence of a key in multiple
+/// streams during a set operation. Namely, the index corresponds to the stream
+/// (by the order in which it was added to the operation, starting at `0`)
+/// and the value corresponds to the value associated with a particular key
+/// in that stream.
+#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct IndexedValue {
+    /// The index of the stream that produced this value (starting at `0`).
+    pub index: usize,
+    /// The value.
+    pub value: u64,
+}
+
+/// A builder for collecting fst streams on which to perform set operations
+/// on the keys of fsts.
+///
+/// Set operations include intersection, union, difference and symmetric
+/// difference. The result of each set operation is itself a stream that emits
+/// pairs of keys and a sequence of each occurrence of that key in the
+/// participating streams. This information allows one to perform set
+/// operations on fsts and customize how conflicting output values are handled.
+///
+/// All set operations work efficiently on an arbitrary number of
+/// streams with memory proportional to the number of streams.
+///
+/// The algorithmic complexity of all set operations is `O(n1 + n2 + n3 + ...)`
+/// where `n1, n2, n3, ...` correspond to the number of elements in each
+/// stream.
+///
+/// The `'f` lifetime parameter refers to the lifetime of the underlying set.
+pub struct OpBuilder<'f> {
+    streams: Vec<BoxedStream<'f>>,
+}
+
+impl<'f> OpBuilder<'f> {
+    /// Create a new set operation builder.
+    #[inline]
+    pub fn new() -> OpBuilder<'f> {
+        OpBuilder { streams: vec![] }
+    }
+
+    /// Add a stream to this set operation.
+    ///
+    /// This is useful for a chaining style pattern, e.g.,
+    /// `builder.add(stream1).add(stream2).union()`.
+    ///
+    /// The stream must emit a lexicographically ordered sequence of key-value
+    /// pairs.
+    pub fn add<I, S>(mut self, stream: I) -> OpBuilder<'f>
+    where
+        I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], Output)>,
+        S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], Output)>,
+    {
+        self.push(stream);
+        self
+    }
+
+    /// Add a stream to this set operation.
+    ///
+    /// The stream must emit a lexicographically ordered sequence of key-value
+    /// pairs.
+    pub fn push<I, S>(&mut self, stream: I)
+    where
+        I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], Output)>,
+        S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], Output)>,
+    {
+        self.streams.push(Box::new(stream.into_stream()));
+    }
+
+    /// Performs a union operation on all streams that have been added.
+    ///
+    /// Note that this returns a stream of `(&[u8], &[IndexedValue])`. The
+    /// first element of the tuple is the byte string key. The second element
+    /// of the tuple is a list of all occurrences of that key in participating
+    /// streams. The `IndexedValue` contains an index and the value associated
+    /// with that key in that stream. The index uniquely identifies each
+    /// stream, which is an integer that is auto-incremented when a stream
+    /// is added to this operation (starting at `0`).
+    #[inline]
+    pub fn union(self) -> Union<'f> {
+        Union {
+            heap: StreamHeap::new(self.streams),
+            outs: vec![],
+            cur_slot: None,
+        }
+    }
+
+    /// Performs an intersection operation on all streams that have been added.
+    ///
+    /// Note that this returns a stream of `(&[u8], &[IndexedValue])`. The
+    /// first element of the tuple is the byte string key. The second element
+    /// of the tuple is a list of all occurrences of that key in participating
+    /// streams. The `IndexedValue` contains an index and the value associated
+    /// with that key in that stream. The index uniquely identifies each
+    /// stream, which is an integer that is auto-incremented when a stream
+    /// is added to this operation (starting at `0`).
+    #[inline]
+    pub fn intersection(self) -> Intersection<'f> {
+        Intersection {
+            heap: StreamHeap::new(self.streams),
+            outs: vec![],
+            cur_slot: None,
+        }
+    }
+
+    /// Performs a difference operation with respect to the first stream added.
+    /// That is, this returns a stream of all elements in the first stream
+    /// that don't exist in any other stream that has been added.
+    ///
+    /// Note that this returns a stream of `(&[u8], &[IndexedValue])`. The
+    /// first element of the tuple is the byte string key. The second element
+    /// of the tuple is a list of all occurrences of that key in participating
+    /// streams. The `IndexedValue` contains an index and the value associated
+    /// with that key in that stream. The index uniquely identifies each
+    /// stream, which is an integer that is auto-incremented when a stream
+    /// is added to this operation (starting at `0`).
+    ///
+    /// The interface is the same for all the operations, but due to the nature
+    /// of `difference`, each yielded key contains exactly one `IndexValue` with
+    /// `index` set to 0.
+    #[inline]
+    pub fn difference(mut self) -> Difference<'f> {
+        let first = self.streams.swap_remove(0);
+        Difference {
+            set: first,
+            key: vec![],
+            heap: StreamHeap::new(self.streams),
+            outs: vec![],
+        }
+    }
+
+    /// Performs a symmetric difference operation on all of the streams that
+    /// have been added.
+    ///
+    /// When there are only two streams, then the keys returned correspond to
+    /// keys that are in either stream but *not* in both streams.
+    ///
+    /// More generally, for any number of streams, keys that occur in an odd
+    /// number of streams are returned.
+    ///
+    /// Note that this returns a stream of `(&[u8], &[IndexedValue])`. The
+    /// first element of the tuple is the byte string key. The second element
+    /// of the tuple is a list of all occurrences of that key in participating
+    /// streams. The `IndexedValue` contains an index and the value associated
+    /// with that key in that stream. The index uniquely identifies each
+    /// stream, which is an integer that is auto-incremented when a stream
+    /// is added to this operation (starting at `0`).
+    #[inline]
+    pub fn symmetric_difference(self) -> SymmetricDifference<'f> {
+        SymmetricDifference {
+            heap: StreamHeap::new(self.streams),
+            outs: vec![],
+            cur_slot: None,
+        }
+    }
+}
+
+impl<'f, I, S> Extend<I> for OpBuilder<'f>
+where
+    I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], Output)>,
+    S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], Output)>,
+{
+    fn extend<T>(&mut self, it: T)
+    where
+        T: IntoIterator<Item = I>,
+    {
+        for stream in it {
+            self.push(stream);
+        }
+    }
+}
+
+impl<'f, I, S> FromIterator<I> for OpBuilder<'f>
+where
+    I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], Output)>,
+    S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], Output)>,
+{
+    fn from_iter<T>(it: T) -> OpBuilder<'f>
+    where
+        T: IntoIterator<Item = I>,
+    {
+        let mut op = OpBuilder::new();
+        op.extend(it);
+        op
+    }
+}
+
+/// A stream of set union over multiple fst streams in lexicographic order.
+///
+/// The `'f` lifetime parameter refers to the lifetime of the underlying map.
+pub struct Union<'f> {
+    heap: StreamHeap<'f>,
+    outs: Vec<IndexedValue>,
+    cur_slot: Option<Slot>,
+}
+
+impl<'a, 'f> Streamer<'a> for Union<'f> {
+    type Item = (&'a [u8], &'a [IndexedValue]);
+
+    fn next(&'a mut self) -> Option<(&'a [u8], &'a [IndexedValue])> {
+        if let Some(slot) = self.cur_slot.take() {
+            self.heap.refill(slot);
+        }
+        let slot = match self.heap.pop() {
+            None => return None,
+            Some(slot) => {
+                self.cur_slot = Some(slot);
+                self.cur_slot.as_ref().unwrap()
+            }
+        };
+        self.outs.clear();
+        self.outs.push(slot.indexed_value());
+        while let Some(slot2) = self.heap.pop_if_equal(slot.input()) {
+            self.outs.push(slot2.indexed_value());
+            self.heap.refill(slot2);
+        }
+        Some((slot.input(), &self.outs))
+    }
+}
+
+/// A stream of set intersection over multiple fst streams in lexicographic
+/// order.
+///
+/// The `'f` lifetime parameter refers to the lifetime of the underlying fst.
+pub struct Intersection<'f> {
+    heap: StreamHeap<'f>,
+    outs: Vec<IndexedValue>,
+    cur_slot: Option<Slot>,
+}
+
+impl<'a, 'f> Streamer<'a> for Intersection<'f> {
+    type Item = (&'a [u8], &'a [IndexedValue]);
+
+    fn next(&'a mut self) -> Option<(&'a [u8], &'a [IndexedValue])> {
+        if let Some(slot) = self.cur_slot.take() {
+            self.heap.refill(slot);
+        }
+        loop {
+            let slot = match self.heap.pop() {
+                None => return None,
+                Some(slot) => slot,
+            };
+            self.outs.clear();
+            self.outs.push(slot.indexed_value());
+            let mut popped: usize = 1;
+            while let Some(slot2) = self.heap.pop_if_equal(slot.input()) {
+                self.outs.push(slot2.indexed_value());
+                self.heap.refill(slot2);
+                popped += 1;
+            }
+            if popped < self.heap.num_slots() {
+                self.heap.refill(slot);
+            } else {
+                self.cur_slot = Some(slot);
+                let key = self.cur_slot.as_ref().unwrap().input();
+                return Some((key, &self.outs));
+            }
+        }
+    }
+}
+
+/// A stream of set difference over multiple fst streams in lexicographic
+/// order.
+///
+/// The difference operation is taken with respect to the first stream and the
+/// rest of the streams. i.e., All elements in the first stream that do not
+/// appear in any other streams.
+///
+/// The `'f` lifetime parameter refers to the lifetime of the underlying fst.
+pub struct Difference<'f> {
+    set: BoxedStream<'f>,
+    key: Vec<u8>,
+    heap: StreamHeap<'f>,
+    outs: Vec<IndexedValue>,
+}
+
+impl<'a, 'f> Streamer<'a> for Difference<'f> {
+    type Item = (&'a [u8], &'a [IndexedValue]);
+
+    fn next(&'a mut self) -> Option<(&'a [u8], &'a [IndexedValue])> {
+        loop {
+            match self.set.next() {
+                None => return None,
+                Some((key, out)) => {
+                    self.key.clear();
+                    self.key.extend(key);
+                    self.outs.clear();
+                    self.outs
+                        .push(IndexedValue { index: 0, value: out.value() });
+                }
+            };
+            let mut unique = true;
+            while let Some(slot) = self.heap.pop_if_le(&self.key) {
+                if slot.input() == &*self.key {
+                    unique = false;
+                }
+                self.heap.refill(slot);
+            }
+            if unique {
+                return Some((&self.key, &self.outs));
+            }
+        }
+    }
+}
+
+/// A stream of set symmetric difference over multiple fst streams in
+/// lexicographic order.
+///
+/// The `'f` lifetime parameter refers to the lifetime of the underlying fst.
+pub struct SymmetricDifference<'f> {
+    heap: StreamHeap<'f>,
+    outs: Vec<IndexedValue>,
+    cur_slot: Option<Slot>,
+}
+
+impl<'a, 'f> Streamer<'a> for SymmetricDifference<'f> {
+    type Item = (&'a [u8], &'a [IndexedValue]);
+
+    fn next(&'a mut self) -> Option<(&'a [u8], &'a [IndexedValue])> {
+        if let Some(slot) = self.cur_slot.take() {
+            self.heap.refill(slot);
+        }
+        loop {
+            let slot = match self.heap.pop() {
+                None => return None,
+                Some(slot) => slot,
+            };
+            self.outs.clear();
+            self.outs.push(slot.indexed_value());
+            let mut popped: usize = 1;
+            while let Some(slot2) = self.heap.pop_if_equal(slot.input()) {
+                self.outs.push(slot2.indexed_value());
+                self.heap.refill(slot2);
+                popped += 1;
+            }
+            // This key is in the symmetric difference if and only if it
+            // appears in an odd number of sets.
+            if popped % 2 == 0 {
+                self.heap.refill(slot);
+            } else {
+                self.cur_slot = Some(slot);
+                let key = self.cur_slot.as_ref().unwrap().input();
+                return Some((key, &self.outs));
+            }
+        }
+    }
+}
+
+struct StreamHeap<'f> {
+    rdrs: Vec<BoxedStream<'f>>,
+    heap: BinaryHeap<Slot>,
+}
+
+impl<'f> StreamHeap<'f> {
+    fn new(streams: Vec<BoxedStream<'f>>) -> StreamHeap<'f> {
+        let mut u = StreamHeap { rdrs: streams, heap: BinaryHeap::new() };
+        for i in 0..u.rdrs.len() {
+            u.refill(Slot::new(i));
+        }
+        u
+    }
+
+    fn pop(&mut self) -> Option<Slot> {
+        self.heap.pop()
+    }
+
+    fn peek_is_duplicate(&self, key: &[u8]) -> bool {
+        self.heap.peek().map(|s| s.input() == key).unwrap_or(false)
+    }
+
+    fn pop_if_equal(&mut self, key: &[u8]) -> Option<Slot> {
+        if self.peek_is_duplicate(key) {
+            self.pop()
+        } else {
+            None
+        }
+    }
+
+    fn pop_if_le(&mut self, key: &[u8]) -> Option<Slot> {
+        if self.heap.peek().map(|s| s.input() <= key).unwrap_or(false) {
+            self.pop()
+        } else {
+            None
+        }
+    }
+
+    fn num_slots(&self) -> usize {
+        self.rdrs.len()
+    }
+
+    fn refill(&mut self, mut slot: Slot) {
+        if let Some((input, output)) = self.rdrs[slot.idx].next() {
+            slot.set_input(input);
+            slot.set_output(output);
+            self.heap.push(slot);
+        }
+    }
+}
+
+#[derive(Debug, Eq, PartialEq)]
+struct Slot {
+    idx: usize,
+    input: Vec<u8>,
+    output: Output,
+}
+
+impl Slot {
+    fn new(rdr_idx: usize) -> Slot {
+        Slot {
+            idx: rdr_idx,
+            input: Vec::with_capacity(64),
+            output: Output::zero(),
+        }
+    }
+
+    fn indexed_value(&self) -> IndexedValue {
+        IndexedValue { index: self.idx, value: self.output.value() }
+    }
+
+    fn input(&self) -> &[u8] {
+        &self.input
+    }
+
+    fn set_input(&mut self, input: &[u8]) {
+        self.input.clear();
+        self.input.extend(input);
+    }
+
+    fn set_output(&mut self, output: Output) {
+        self.output = output;
+    }
+}
+
+impl PartialOrd for Slot {
+    fn partial_cmp(&self, other: &Slot) -> Option<cmp::Ordering> {
+        (&self.input, self.output)
+            .partial_cmp(&(&other.input, other.output))
+            .map(|ord| ord.reverse())
+    }
+}
+
+impl Ord for Slot {
+    fn cmp(&self, other: &Slot) -> cmp::Ordering {
+        self.partial_cmp(other).unwrap()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::raw::tests::{fst_map, fst_set};
+    use crate::raw::Fst;
+    use crate::stream::{IntoStreamer, Streamer};
+
+    use super::OpBuilder;
+
+    fn s(string: &str) -> String {
+        string.to_owned()
+    }
+
+    macro_rules! create_set_op {
+        ($name:ident, $op:ident) => {
+            fn $name(sets: Vec<Vec<&str>>) -> Vec<String> {
+                let fsts: Vec<Fst<_>> =
+                    sets.into_iter().map(fst_set).collect();
+                let op: OpBuilder = fsts.iter().collect();
+                let mut stream = op.$op().into_stream();
+                let mut keys = vec![];
+                while let Some((key, _)) = stream.next() {
+                    keys.push(String::from_utf8(key.to_vec()).unwrap());
+                }
+                keys
+            }
+        };
+    }
+
+    macro_rules! create_map_op {
+        ($name:ident, $op:ident) => {
+            fn $name(sets: Vec<Vec<(&str, u64)>>) -> Vec<(String, u64)> {
+                let fsts: Vec<Fst<_>> =
+                    sets.into_iter().map(fst_map).collect();
+                let op: OpBuilder = fsts.iter().collect();
+                let mut stream = op.$op().into_stream();
+                let mut keys = vec![];
+                while let Some((key, outs)) = stream.next() {
+                    let merged = outs.iter().fold(0, |a, b| a + b.value);
+                    let s = String::from_utf8(key.to_vec()).unwrap();
+                    keys.push((s, merged));
+                }
+                keys
+            }
+        };
+    }
+
+    create_set_op!(fst_union, union);
+    create_set_op!(fst_intersection, intersection);
+    create_set_op!(fst_symmetric_difference, symmetric_difference);
+    create_set_op!(fst_difference, difference);
+    create_map_op!(fst_union_map, union);
+    create_map_op!(fst_intersection_map, intersection);
+    create_map_op!(fst_symmetric_difference_map, symmetric_difference);
+    create_map_op!(fst_difference_map, difference);
+
+    #[test]
+    fn union_set() {
+        let v = fst_union(vec![vec!["a", "b", "c"], vec!["x", "y", "z"]]);
+        assert_eq!(v, vec!["a", "b", "c", "x", "y", "z"]);
+    }
+
+    #[test]
+    fn union_set_dupes() {
+        let v = fst_union(vec![vec!["aa", "b", "cc"], vec!["b", "cc", "z"]]);
+        assert_eq!(v, vec!["aa", "b", "cc", "z"]);
+    }
+
+    #[test]
+    fn union_map() {
+        let v = fst_union_map(vec![
+            vec![("a", 1), ("b", 2), ("c", 3)],
+            vec![("x", 1), ("y", 2), ("z", 3)],
+        ]);
+        assert_eq!(
+            v,
+            vec![
+                (s("a"), 1),
+                (s("b"), 2),
+                (s("c"), 3),
+                (s("x"), 1),
+                (s("y"), 2),
+                (s("z"), 3),
+            ]
+        );
+    }
+
+    #[test]
+    fn union_map_dupes() {
+        let v = fst_union_map(vec![
+            vec![("aa", 1), ("b", 2), ("cc", 3)],
+            vec![("b", 1), ("cc", 2), ("z", 3)],
+            vec![("b", 1)],
+        ]);
+        assert_eq!(
+            v,
+            vec![(s("aa"), 1), (s("b"), 4), (s("cc"), 5), (s("z"), 3),]
+        );
+    }
+
+    #[test]
+    fn intersect_set() {
+        let v =
+            fst_intersection(vec![vec!["a", "b", "c"], vec!["x", "y", "z"]]);
+        assert_eq!(v, Vec::<String>::new());
+    }
+
+    #[test]
+    fn intersect_set_dupes() {
+        let v = fst_intersection(vec![
+            vec!["aa", "b", "cc"],
+            vec!["b", "cc", "z"],
+        ]);
+        assert_eq!(v, vec!["b", "cc"]);
+    }
+
+    #[test]
+    fn intersect_map() {
+        let v = fst_intersection_map(vec![
+            vec![("a", 1), ("b", 2), ("c", 3)],
+            vec![("x", 1), ("y", 2), ("z", 3)],
+        ]);
+        assert_eq!(v, Vec::<(String, u64)>::new());
+    }
+
+    #[test]
+    fn intersect_map_dupes() {
+        let v = fst_intersection_map(vec![
+            vec![("aa", 1), ("b", 2), ("cc", 3)],
+            vec![("b", 1), ("cc", 2), ("z", 3)],
+            vec![("b", 1)],
+        ]);
+        assert_eq!(v, vec![(s("b"), 4)]);
+    }
+
+    #[test]
+    fn symmetric_difference() {
+        let v = fst_symmetric_difference(vec![
+            vec!["a", "b", "c"],
+            vec!["a", "b"],
+            vec!["a"],
+        ]);
+        assert_eq!(v, vec!["a", "c"]);
+    }
+
+    #[test]
+    fn symmetric_difference_map() {
+        let v = fst_symmetric_difference_map(vec![
+            vec![("a", 1), ("b", 2), ("c", 3)],
+            vec![("a", 1), ("b", 2)],
+            vec![("a", 1)],
+        ]);
+        assert_eq!(v, vec![(s("a"), 3), (s("c"), 3)]);
+    }
+
+    #[test]
+    fn difference() {
+        let v = fst_difference(vec![
+            vec!["a", "b", "c"],
+            vec!["a", "b"],
+            vec!["a"],
+        ]);
+        assert_eq!(v, vec!["c"]);
+    }
+
+    #[test]
+    fn difference2() {
+        // Regression test: https://github.com/BurntSushi/fst/issues/19
+        let v = fst_difference(vec![vec!["a", "c"], vec!["b", "c"]]);
+        assert_eq!(v, vec!["a"]);
+        let v = fst_difference(vec![vec!["bar", "foo"], vec!["baz", "foo"]]);
+        assert_eq!(v, vec!["bar"]);
+    }
+
+    #[test]
+    fn difference_map() {
+        let v = fst_difference_map(vec![
+            vec![("a", 1), ("b", 2), ("c", 3)],
+            vec![("a", 1), ("b", 2)],
+            vec![("a", 1)],
+        ]);
+        assert_eq!(v, vec![(s("c"), 3)]);
+    }
+}
diff --git a/vendor/fst/src/raw/registry.rs b/vendor/fst/src/raw/registry.rs
new file mode 100644
index 000000000..3b00a5b0c
--- /dev/null
+++ b/vendor/fst/src/raw/registry.rs
@@ -0,0 +1,264 @@
+use crate::raw::build::BuilderNode;
+use crate::raw::{CompiledAddr, NONE_ADDRESS};
+
+#[derive(Debug)]
+pub struct Registry {
+    table: Vec<RegistryCell>,
+    table_size: usize, // number of rows
+    mru_size: usize,   // number of columns
+}
+
+#[derive(Debug)]
+struct RegistryCache<'a> {
+    cells: &'a mut [RegistryCell],
+}
+
+#[derive(Clone, Debug)]
+pub struct RegistryCell {
+    addr: CompiledAddr,
+    node: BuilderNode,
+}
+
+#[derive(Debug)]
+pub enum RegistryEntry<'a> {
+    Found(CompiledAddr),
+    NotFound(&'a mut RegistryCell),
+    Rejected,
+}
+
+impl Registry {
+    pub fn new(table_size: usize, mru_size: usize) -> Registry {
+        let empty_cell = RegistryCell::none();
+        let ncells = table_size.checked_mul(mru_size).unwrap();
+        Registry { table: vec![empty_cell; ncells], table_size, mru_size }
+    }
+
+    pub fn entry<'a>(&'a mut self, node: &BuilderNode) -> RegistryEntry<'a> {
+        if self.table.is_empty() {
+            return RegistryEntry::Rejected;
+        }
+        let bucket = self.hash(node);
+        let start = self.mru_size * bucket;
+        let end = start + self.mru_size;
+        RegistryCache { cells: &mut self.table[start..end] }.entry(node)
+    }
+
+    fn hash(&self, node: &BuilderNode) -> usize {
+        // Basic FNV-1a hash as described:
+        // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+        //
+        // In unscientific experiments, this provides the same compression
+        // as `std::hash::SipHasher` but is much much faster.
+        const FNV_PRIME: u64 = 1099511628211;
+        let mut h = 14695981039346656037;
+        h = (h ^ (node.is_final as u64)).wrapping_mul(FNV_PRIME);
+        h = (h ^ node.final_output.value()).wrapping_mul(FNV_PRIME);
+        for t in &node.trans {
+            h = (h ^ (t.inp as u64)).wrapping_mul(FNV_PRIME);
+            h = (h ^ t.out.value()).wrapping_mul(FNV_PRIME);
+            h = (h ^ (t.addr as u64)).wrapping_mul(FNV_PRIME);
+        }
+        (h as usize) % self.table_size
+    }
+}
+
+impl<'a> RegistryCache<'a> {
+    fn entry(mut self, node: &BuilderNode) -> RegistryEntry<'a> {
+        if self.cells.len() == 1 {
+            let cell = &mut self.cells[0];
+            if !cell.is_none() && &cell.node == node {
+                RegistryEntry::Found(cell.addr)
+            } else {
+                cell.node.clone_from(node);
+                RegistryEntry::NotFound(cell)
+            }
+        } else if self.cells.len() == 2 {
+            let cell1 = &mut self.cells[0];
+            if !cell1.is_none() && &cell1.node == node {
+                return RegistryEntry::Found(cell1.addr);
+            }
+
+            let cell2 = &mut self.cells[1];
+            if !cell2.is_none() && &cell2.node == node {
+                let addr = cell2.addr;
+                self.cells.swap(0, 1);
+                return RegistryEntry::Found(addr);
+            }
+
+            self.cells[1].node.clone_from(node);
+            self.cells.swap(0, 1);
+            RegistryEntry::NotFound(&mut self.cells[0])
+        } else {
+            let find = |c: &RegistryCell| !c.is_none() && &c.node == node;
+            if let Some(i) = self.cells.iter().position(find) {
+                let addr = self.cells[i].addr;
+                self.promote(i); // most recently used
+                RegistryEntry::Found(addr)
+            } else {
+                let last = self.cells.len() - 1;
+                self.cells[last].node.clone_from(node); // discard LRU
+                self.promote(last);
+                RegistryEntry::NotFound(&mut self.cells[0])
+            }
+        }
+    }
+
+    fn promote(&mut self, mut i: usize) {
+        assert!(i < self.cells.len());
+        while i > 0 {
+            self.cells.swap(i - 1, i);
+            i -= 1;
+        }
+    }
+}
+
+impl RegistryCell {
+    fn none() -> RegistryCell {
+        RegistryCell { addr: NONE_ADDRESS, node: BuilderNode::default() }
+    }
+
+    fn is_none(&self) -> bool {
+        self.addr == NONE_ADDRESS
+    }
+
+    pub fn insert(&mut self, addr: CompiledAddr) {
+        self.addr = addr;
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{Registry, RegistryCache, RegistryCell, RegistryEntry};
+    use crate::raw::build::BuilderNode;
+    use crate::raw::{Output, Transition};
+
+    fn assert_rejected(entry: RegistryEntry) {
+        match entry {
+            RegistryEntry::Rejected => {}
+            entry => panic!("expected rejected entry, got: {:?}", entry),
+        }
+    }
+
+    fn assert_not_found(entry: RegistryEntry) {
+        match entry {
+            RegistryEntry::NotFound(_) => {}
+            entry => panic!("expected nout found entry, got: {:?}", entry),
+        }
+    }
+
+    fn assert_insert_and_found(reg: &mut Registry, bnode: &BuilderNode) {
+        match reg.entry(&bnode) {
+            RegistryEntry::NotFound(cell) => cell.insert(1234),
+            entry => panic!("unexpected not found entry, got: {:?}", entry),
+        }
+        match reg.entry(&bnode) {
+            RegistryEntry::Found(addr) => assert_eq!(addr, 1234),
+            entry => panic!("unexpected found entry, got: {:?}", entry),
+        }
+    }
+
+    #[test]
+    fn empty_is_ok() {
+        let mut reg = Registry::new(0, 0);
+        let bnode = BuilderNode {
+            is_final: false,
+            final_output: Output::zero(),
+            trans: vec![],
+        };
+        assert_rejected(reg.entry(&bnode));
+    }
+
+    #[test]
+    fn one_final_is_ok() {
+        let mut reg = Registry::new(1, 1);
+        let bnode = BuilderNode {
+            is_final: true,
+            final_output: Output::zero(),
+            trans: vec![],
+        };
+        assert_insert_and_found(&mut reg, &bnode);
+    }
+
+    #[test]
+    fn one_with_trans_is_ok() {
+        let mut reg = Registry::new(1, 1);
+        let bnode = BuilderNode {
+            is_final: false,
+            final_output: Output::zero(),
+            trans: vec![Transition {
+                addr: 0,
+                inp: b'a',
+                out: Output::zero(),
+            }],
+        };
+        assert_insert_and_found(&mut reg, &bnode);
+        assert_not_found(
+            reg.entry(&BuilderNode { is_final: true, ..bnode.clone() }),
+        );
+        assert_not_found(reg.entry(&BuilderNode {
+            trans: vec![Transition {
+                addr: 0,
+                inp: b'b',
+                out: Output::zero(),
+            }],
+            ..bnode.clone()
+        }));
+        assert_not_found(reg.entry(&BuilderNode {
+            trans: vec![Transition {
+                addr: 0,
+                inp: b'a',
+                out: Output::new(1),
+            }],
+            ..bnode.clone()
+        }));
+    }
+
+    #[test]
+    fn cache_works() {
+        let mut reg = Registry::new(1, 1);
+
+        let bnode1 = BuilderNode { is_final: true, ..BuilderNode::default() };
+        assert_insert_and_found(&mut reg, &bnode1);
+
+        let bnode2 =
+            BuilderNode { final_output: Output::new(1), ..bnode1.clone() };
+        assert_insert_and_found(&mut reg, &bnode2);
+        assert_not_found(reg.entry(&bnode1));
+    }
+
+    #[test]
+    fn promote() {
+        let bn = BuilderNode::default();
+        let mut bnodes = vec![
+            RegistryCell { addr: 1, node: bn.clone() },
+            RegistryCell { addr: 2, node: bn.clone() },
+            RegistryCell { addr: 3, node: bn.clone() },
+            RegistryCell { addr: 4, node: bn.clone() },
+        ];
+        let mut cache = RegistryCache { cells: &mut bnodes };
+
+        cache.promote(0);
+        assert_eq!(cache.cells[0].addr, 1);
+        assert_eq!(cache.cells[1].addr, 2);
+        assert_eq!(cache.cells[2].addr, 3);
+        assert_eq!(cache.cells[3].addr, 4);
+
+        cache.promote(1);
+        assert_eq!(cache.cells[0].addr, 2);
+        assert_eq!(cache.cells[1].addr, 1);
+        assert_eq!(cache.cells[2].addr, 3);
+        assert_eq!(cache.cells[3].addr, 4);
+
+        cache.promote(3);
+        assert_eq!(cache.cells[0].addr, 4);
+        assert_eq!(cache.cells[1].addr, 2);
+        assert_eq!(cache.cells[2].addr, 1);
+        assert_eq!(cache.cells[3].addr, 3);
+
+        cache.promote(2);
+        assert_eq!(cache.cells[0].addr, 1);
+        assert_eq!(cache.cells[1].addr, 4);
+        assert_eq!(cache.cells[2].addr, 2);
+        assert_eq!(cache.cells[3].addr, 3);
+    }
+}
diff --git a/vendor/fst/src/raw/registry_minimal.rs b/vendor/fst/src/raw/registry_minimal.rs
new file mode 100644
index 000000000..663b0123a
--- /dev/null
+++ b/vendor/fst/src/raw/registry_minimal.rs
@@ -0,0 +1,53 @@
+// This module is a drop-in but inefficient replacement of the LRU registry.
+// In particular, this registry will never forget a node. In other words, if
+// this registry is used during construction, then you're guaranteed a minimal
+// FST.
+//
+// This is really only meant to be used for debugging and experiments. It is
+// a memory/CPU hog.
+//
+// One "easy" improvement here is to use an FNV hash instead of the super
+// expensive SipHasher.
+
+#![allow(dead_code)]
+
+use std::collections::hash_map::{Entry, HashMap};
+
+use crate::raw::build::BuilderNode;
+use crate::raw::CompiledAddr;
+
+#[derive(Debug)]
+pub struct Registry {
+    table: HashMap<BuilderNode, RegistryCell>,
+}
+
+#[derive(Debug)]
+pub enum RegistryEntry<'a> {
+    Found(CompiledAddr),
+    NotFound(&'a mut RegistryCell),
+    Rejected,
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct RegistryCell(CompiledAddr);
+
+impl Registry {
+    pub fn new(table_size: usize, _lru_size: usize) -> Registry {
+        Registry { table: HashMap::with_capacity(table_size) }
+    }
+
+    pub fn entry<'a>(&'a mut self, bnode: &BuilderNode) -> RegistryEntry<'a> {
+        match self.table.entry(bnode.clone()) {
+            Entry::Occupied(v) => RegistryEntry::Found(v.get().0),
+            Entry::Vacant(v) => {
+                RegistryEntry::NotFound(v.insert(RegistryCell(0)))
+            }
+        }
+    }
+}
+
+impl RegistryCell {
+    pub fn insert(&mut self, addr: CompiledAddr) {
+        self.0 = addr;
+    }
+}
diff --git a/vendor/fst/src/raw/tests.rs b/vendor/fst/src/raw/tests.rs
new file mode 100644
index 000000000..c9e77a5f7
--- /dev/null
+++ b/vendor/fst/src/raw/tests.rs
@@ -0,0 +1,589 @@
+use crate::automaton::AlwaysMatch;
+use crate::error::Error;
+use crate::raw::{self, Bound, Builder, Fst, Output, Stream, VERSION};
+use crate::stream::Streamer;
+
+const TEXT: &'static str = include_str!("./../../data/words-100000");
+
+pub fn fst_set<I, S>(ss: I) -> Fst<Vec<u8>>
+where
+    I: IntoIterator<Item = S>,
+    S: AsRef<[u8]>,
+{
+    let mut bfst = Builder::memory();
+    let mut ss: Vec<Vec<u8>> =
+        ss.into_iter().map(|s| s.as_ref().to_vec()).collect();
+    ss.sort();
+    ss.dedup();
+    for s in ss.iter().into_iter() {
+        bfst.add(s).unwrap();
+    }
+    let fst = bfst.into_fst();
+    assert_eq!(fst.len(), ss.len());
+    fst
+}
+
+pub fn fst_map<I, S>(ss: I) -> Fst<Vec<u8>>
+where
+    I: IntoIterator<Item = (S, u64)>,
+    S: AsRef<[u8]>,
+{
+    let mut bfst = Builder::memory();
+    let mut ss: Vec<(Vec<u8>, u64)> =
+        ss.into_iter().map(|(s, o)| (s.as_ref().to_vec(), o)).collect();
+    ss.sort();
+    ss.dedup();
+    for (s, o) in ss.into_iter() {
+        bfst.insert(s, o).unwrap();
+    }
+    bfst.into_fst()
+}
+
+pub fn fst_inputs<D: AsRef<[u8]>>(fst: &Fst<D>) -> Vec<Vec<u8>> {
+    let mut words = vec![];
+    let mut rdr = fst.stream();
+    while let Some((word, _)) = rdr.next() {
+        words.push(word.to_vec());
+    }
+    words
+}
+
+pub fn fst_inputs_outputs<D: AsRef<[u8]>>(
+    fst: &Fst<D>,
+) -> Vec<(Vec<u8>, u64)> {
+    let mut words = vec![];
+    let mut rdr = fst.stream();
+    while let Some((word, out)) = rdr.next() {
+        words.push((word.to_vec(), out.value()));
+    }
+    words
+}
+
+macro_rules! test_set {
+    ($name:ident, $($s:expr),+) => {
+        #[test]
+        fn $name() {
+            let mut items = vec![$($s),*];
+            let fst = fst_set(&items);
+            let mut rdr = fst.stream();
+            items.sort();
+            items.dedup();
+            for item in &items {
+                assert_eq!(rdr.next().unwrap().0, item.as_bytes());
+            }
+            assert_eq!(rdr.next(), None);
+            for item in &items {
+                assert!(fst.get(item).is_some());
+            }
+        }
+    }
+}
+
+macro_rules! test_set_fail {
+    ($name:ident, $($s:expr),+) => {
+        #[test]
+        #[should_panic]
+        fn $name() {
+            let mut bfst = Builder::memory();
+            $(bfst.add($s).unwrap();)*
+        }
+    }
+}
+
+test_set!(fst_set_only_empty, "");
+test_set!(fst_set_one, "a");
+test_set!(fst_set_dupe_empty, "", "");
+test_set!(fst_set_dupe1, "a", "a");
+test_set!(fst_set_dupe2, "a", "b", "b");
+test_set!(fst_set_two1, "a", "b");
+test_set!(fst_set_two2, "a", "ab");
+test_set!(fst_set_jan, "jam", "jbm", "jcm", "jdm", "jem", "jfm", "jgm");
+
+test_set_fail!(fst_set_order1, "b", "a");
+test_set_fail!(fst_set_order2, "a", "b", "c", "a");
+
+#[test]
+fn fst_set_100000() {
+    let words: Vec<Vec<u8>> =
+        TEXT.lines().map(|s| s.as_bytes().to_vec()).collect();
+    let fst = fst_set(words.clone());
+    assert_eq!(words, fst_inputs(&fst));
+    for word in &words {
+        assert!(
+            fst.get(word).is_some(),
+            "failed to find word: {}",
+            std::str::from_utf8(word).unwrap()
+        );
+    }
+}
+
+macro_rules! test_map {
+    ($name:ident, $($s:expr, $o:expr),+) => {
+        #[test]
+        fn $name() {
+            let fst = fst_map(vec![$(($s, $o)),*]);
+            let mut rdr = fst.stream();
+            $({
+                let (s, o) = rdr.next().unwrap();
+                assert_eq!((s, o.value()), ($s.as_bytes(), $o));
+            })*
+            assert_eq!(rdr.next(), None);
+            $({
+                assert_eq!(fst.get($s.as_bytes()), Some(Output::new($o)));
+            })*
+        }
+    }
+}
+
+macro_rules! test_map_fail {
+    ($name:ident, $($s:expr, $o:expr),+) => {
+        #[test]
+        #[should_panic]
+        fn $name() {
+            let mut bfst = Builder::memory();
+            $(bfst.insert($s, $o).unwrap();)*
+        }
+    }
+}
+
+test_map!(fst_map_only_empty1, "", 0);
+test_map!(fst_map_only_empty2, "", 100);
+test_map!(fst_map_only_empty3, "", 9999999999);
+test_map!(fst_map_one1, "a", 0);
+test_map!(fst_map_one2, "a", 100);
+test_map!(fst_map_one3, "a", 999999999);
+test_map!(fst_map_two, "a", 1, "b", 2);
+test_map!(fst_map_many1, "a", 34786, "ab", 26);
+test_map!(
+    fst_map_many2,
+    "a",
+    34786,
+    "ab",
+    26,
+    "abc",
+    58976,
+    "abcd",
+    25,
+    "z",
+    58,
+    "zabc",
+    6798
+);
+test_map!(fst_map_many3, "a", 1, "ab", 0, "abc", 0);
+
+test_map_fail!(fst_map_dupe_empty, "", 0, "", 0);
+test_map_fail!(fst_map_dupe1, "a", 0, "a", 0);
+test_map_fail!(fst_map_dupe2, "a", 0, "b", 0, "b", 0);
+test_map_fail!(fst_map_order1, "b", 0, "a", 0);
+test_map_fail!(fst_map_order2, "a", 0, "b", 0, "c", 0, "a", 0);
+
+#[test]
+fn fst_map_100000_increments() {
+    let words: Vec<(Vec<u8>, u64)> = TEXT
+        .lines()
+        .enumerate()
+        .map(|(i, s)| (s.as_bytes().to_vec(), i as u64))
+        .collect();
+    let fst = fst_map(words.clone());
+    assert_eq!(words, fst_inputs_outputs(&fst));
+    for &(ref word, out) in &words {
+        assert_eq!(fst.get(word), Some(Output::new(out)));
+    }
+}
+
+#[test]
+fn fst_map_100000_lengths() {
+    let words: Vec<(Vec<u8>, u64)> = TEXT
+        .lines()
+        .map(|s| (s.as_bytes().to_vec(), s.len() as u64))
+        .collect();
+    let fst = fst_map(words.clone());
+    assert_eq!(words, fst_inputs_outputs(&fst));
+    for &(ref word, out) in &words {
+        assert_eq!(fst.get(word), Some(Output::new(out)));
+    }
+}
+
+#[test]
+fn invalid_version() {
+    match Fst::new(vec![0; 36]) {
+        Err(Error::Fst(raw::Error::Version { got, .. })) => assert_eq!(got, 0),
+        Err(err) => panic!("expected version error, got {:?}", err),
+        Ok(_) => panic!("expected version error, got FST"),
+    }
+}
+
+#[test]
+fn invalid_version_crate_too_old() {
+    let mut buf = vec![0; 36];
+    crate::bytes::write_u64_le(VERSION + 1, &mut buf);
+    match Fst::new(buf) {
+        Err(Error::Fst(raw::Error::Version { got, .. })) => {
+            assert_eq!(got, VERSION + 1);
+        }
+        Err(err) => panic!("expected version error, got {:?}", err),
+        Ok(_) => panic!("expected version error, got FST"),
+    }
+}
+
+#[test]
+fn invalid_format() {
+    match Fst::new(vec![0; 0]) {
+        Err(Error::Fst(raw::Error::Format { .. })) => {}
+        Err(err) => panic!("expected format error, got {:?}", err),
+        Ok(_) => panic!("expected format error, got FST"),
+    }
+}
+
+#[test]
+fn fst_set_zero() {
+    let fst = fst_set::<_, String>(vec![]);
+    let mut rdr = fst.stream();
+    assert_eq!(rdr.next(), None);
+}
+
+macro_rules! test_range {
+    (
+        $name:ident,
+        min: $min:expr,
+        max: $max:expr,
+        imin: $imin:expr,
+        imax: $imax:expr,
+        $($s:expr),*
+    ) => {
+        #[test]
+        fn $name() {
+            let items: Vec<&'static str> = vec![$($s),*];
+            let items: Vec<_> = items
+                .into_iter()
+                .enumerate()
+                .map(|(i, k)| (k, i as u64))
+                .collect();
+            let fst = fst_map(items.clone());
+            let mut rdr = Stream::new(fst.as_ref(), AlwaysMatch, $min, $max);
+            for i in $imin..$imax {
+                assert_eq!(
+                    rdr.next().unwrap(),
+                    (items[i].0.as_bytes(), Output::new(items[i].1)),
+                );
+            }
+            assert_eq!(rdr.next(), None);
+        }
+    }
+}
+
+test_range! {
+    fst_range_empty_1,
+    min: Bound::Unbounded, max: Bound::Unbounded,
+    imin: 0, imax: 0,
+}
+
+test_range! {
+    fst_range_empty_2,
+    min: Bound::Unbounded, max: Bound::Unbounded,
+    imin: 0, imax: 1,
+    ""
+}
+
+test_range! {
+    fst_range_empty_3,
+    min: Bound::Included(vec![]), max: Bound::Unbounded,
+    imin: 0, imax: 1,
+    ""
+}
+
+test_range! {
+    fst_range_empty_4,
+    min: Bound::Excluded(vec![]), max: Bound::Unbounded,
+    imin: 0, imax: 0,
+    ""
+}
+
+test_range! {
+    fst_range_empty_5,
+    min: Bound::Included(vec![]), max: Bound::Unbounded,
+    imin: 0, imax: 2,
+    "", "a"
+}
+
+test_range! {
+    fst_range_empty_6,
+    min: Bound::Excluded(vec![]), max: Bound::Unbounded,
+    imin: 1, imax: 2,
+    "", "a"
+}
+
+test_range! {
+    fst_range_empty_7,
+    min: Bound::Unbounded, max: Bound::Unbounded,
+    imin: 0, imax: 2,
+    "", "a"
+}
+
+test_range! {
+    fst_range_empty_8,
+    min: Bound::Unbounded, max: Bound::Included(vec![]),
+    imin: 0, imax: 1,
+    ""
+}
+
+test_range! {
+    fst_range_empty_9,
+    min: Bound::Unbounded, max: Bound::Excluded(vec![]),
+    imin: 0, imax: 0,
+    ""
+}
+
+test_range! {
+    fst_range_empty_10,
+    min: Bound::Unbounded, max: Bound::Included(vec![]),
+    imin: 0, imax: 1,
+    "", "a"
+}
+
+test_range! {
+    fst_range_empty_11,
+    min: Bound::Included(vec![]), max: Bound::Included(vec![]),
+    imin: 0, imax: 1,
+    ""
+}
+
+test_range! {
+    fst_range_1,
+    min: Bound::Included(vec![b'a']), max: Bound::Included(vec![b'z']),
+    imin: 0, imax: 4,
+    "a", "b", "y", "z"
+}
+
+test_range! {
+    fst_range_2,
+    min: Bound::Excluded(vec![b'a']), max: Bound::Included(vec![b'y']),
+    imin: 1, imax: 3,
+    "a", "b", "y", "z"
+}
+
+test_range! {
+    fst_range_3,
+    min: Bound::Excluded(vec![b'a']), max: Bound::Excluded(vec![b'y']),
+    imin: 1, imax: 2,
+    "a", "b", "y", "z"
+}
+
+test_range! {
+    fst_range_4,
+    min: Bound::Unbounded, max: Bound::Unbounded,
+    imin: 0, imax: 4,
+    "a", "b", "y", "z"
+}
+
+test_range! {
+    fst_range_5,
+    min: Bound::Included(b"abd".to_vec()), max: Bound::Unbounded,
+    imin: 0, imax: 0,
+    "a", "ab", "abc", "abcd", "abcde"
+}
+
+test_range! {
+    fst_range_6,
+    min: Bound::Included(b"abd".to_vec()), max: Bound::Unbounded,
+    imin: 5, imax: 6,
+    "a", "ab", "abc", "abcd", "abcde", "abe"
+}
+
+test_range! {
+    fst_range_7,
+    min: Bound::Excluded(b"abd".to_vec()), max: Bound::Unbounded,
+    imin: 5, imax: 6,
+    "a", "ab", "abc", "abcd", "abcde", "abe"
+}
+
+test_range! {
+    fst_range_8,
+    min: Bound::Included(b"abd".to_vec()), max: Bound::Unbounded,
+    imin: 5, imax: 6,
+    "a", "ab", "abc", "abcd", "abcde", "xyz"
+}
+
+test_range! {
+    fst_range_9,
+    min: Bound::Unbounded, max: Bound::Included(b"abd".to_vec()),
+    imin: 0, imax: 5,
+    "a", "ab", "abc", "abcd", "abcde", "abe"
+}
+
+test_range! {
+    fst_range_10,
+    min: Bound::Unbounded, max: Bound::Included(b"abd".to_vec()),
+    imin: 0, imax: 6,
+    "a", "ab", "abc", "abcd", "abcde", "abd"
+}
+
+test_range! {
+    fst_range_11,
+    min: Bound::Unbounded, max: Bound::Included(b"abd".to_vec()),
+    imin: 0, imax: 6,
+    "a", "ab", "abc", "abcd", "abcde", "abd", "abdx"
+}
+
+test_range! {
+    fst_range_12,
+    min: Bound::Unbounded, max: Bound::Excluded(b"abd".to_vec()),
+    imin: 0, imax: 5,
+    "a", "ab", "abc", "abcd", "abcde", "abe"
+}
+
+test_range! {
+    fst_range_13,
+    min: Bound::Unbounded, max: Bound::Excluded(b"abd".to_vec()),
+    imin: 0, imax: 5,
+    "a", "ab", "abc", "abcd", "abcde", "abd"
+}
+
+test_range! {
+    fst_range_14,
+    min: Bound::Unbounded, max: Bound::Excluded(b"abd".to_vec()),
+    imin: 0, imax: 5,
+    "a", "ab", "abc", "abcd", "abcde", "abd", "abdx"
+}
+
+test_range! {
+    fst_range_15,
+    min: Bound::Included(vec![b'd']), max: Bound::Included(vec![b'c']),
+    imin: 0, imax: 0,
+    "a", "b", "c", "d", "e", "f"
+}
+
+test_range! {
+    fst_range_16,
+    min: Bound::Included(vec![b'c']), max: Bound::Included(vec![b'c']),
+    imin: 2, imax: 3,
+    "a", "b", "c", "d", "e", "f"
+}
+
+test_range! {
+    fst_range_17,
+    min: Bound::Excluded(vec![b'c']), max: Bound::Excluded(vec![b'c']),
+    imin: 0, imax: 0,
+    "a", "b", "c", "d", "e", "f"
+}
+
+test_range! {
+    fst_range_18,
+    min: Bound::Included(vec![b'c']), max: Bound::Excluded(vec![b'c']),
+    imin: 0, imax: 0,
+    "a", "b", "c", "d", "e", "f"
+}
+
+test_range! {
+    fst_range_19,
+    min: Bound::Included(vec![b'c']), max: Bound::Excluded(vec![b'd']),
+    imin: 2, imax: 3,
+    "a", "b", "c", "d", "e", "f"
+}
+
+#[test]
+fn one_vec_multiple_fsts() {
+    let mut bfst1 = Builder::memory();
+    bfst1.add(b"bar").unwrap();
+    bfst1.add(b"baz").unwrap();
+    let bytes = bfst1.into_inner().unwrap();
+    let fst1_len = bytes.len();
+
+    let mut bfst2 = Builder::new(bytes).unwrap();
+    bfst2.add(b"bar").unwrap();
+    bfst2.add(b"foo").unwrap();
+
+    let bytes = bfst2.into_inner().unwrap();
+    let slice1 = &bytes[0..fst1_len];
+    let slice2 = &bytes[fst1_len..bytes.len()];
+
+    let fst1 = Fst::new(slice1).unwrap();
+    let fst2 = Fst::new(slice2).unwrap();
+
+    assert_eq!(fst_inputs(&fst1), vec![b"bar".to_vec(), b"baz".to_vec()]);
+    assert_eq!(fst_inputs(&fst2), vec![b"bar".to_vec(), b"foo".to_vec()]);
+}
+
+#[test]
+fn bytes_written() {
+    let mut bfst1 = Builder::memory();
+    bfst1.add(b"bar").unwrap();
+    bfst1.add(b"baz").unwrap();
+    let counted_len = bfst1.bytes_written();
+    let bytes = bfst1.into_inner().unwrap();
+    let fst1_len = bytes.len() as u64;
+    let footer_size = 28;
+    assert_eq!(counted_len + footer_size, fst1_len);
+}
+
+#[test]
+fn get_key_simple() {
+    let map = fst_map(vec![("abc", 2), ("xyz", 3)]);
+    assert_eq!(map.get_key(0), None);
+    assert_eq!(map.get_key(1), None);
+    assert_eq!(map.get_key(2), Some(b"abc".to_vec()));
+    assert_eq!(map.get_key(3), Some(b"xyz".to_vec()));
+    assert_eq!(map.get_key(4), None);
+}
+
+#[test]
+fn get_key_words() {
+    let words: Vec<(Vec<u8>, u64)> = TEXT
+        .lines()
+        .enumerate()
+        .map(|(i, line)| (line.as_bytes().to_vec(), i as u64))
+        .collect();
+    let map = fst_map(words.clone());
+    for (key, value) in words {
+        assert_eq!(map.get_key(value), Some(key));
+    }
+}
+
+#[test]
+fn get_key_words_discontiguous() {
+    let words: Vec<(Vec<u8>, u64)> = TEXT
+        .lines()
+        .enumerate()
+        .map(|(i, line)| (line.as_bytes().to_vec(), i as u64 * 2))
+        .collect();
+    let map = fst_map(words.clone());
+    for (key, value) in words {
+        assert_eq!(map.get_key(value), Some(key));
+    }
+}
+
+#[test]
+fn verify_ok_nonempty() {
+    let words: Vec<(Vec<u8>, u64)> = TEXT
+        .lines()
+        .enumerate()
+        .map(|(i, line)| (line.as_bytes().to_vec(), i as u64 * 2))
+        .collect();
+    let map = fst_map(words.clone());
+    assert!(map.verify().is_ok());
+}
+
+#[test]
+fn verify_ok_empty() {
+    let map = fst_map(Vec::<(&str, u64)>::new());
+    assert!(map.verify().is_ok());
+}
+
+#[test]
+fn verify_err() {
+    let mut b = Builder::memory();
+    b.add(b"bar").unwrap();
+    b.add(b"baz").unwrap();
+    let mut bytes = b.into_inner().unwrap();
+
+    {
+        let fst = Fst::new(&bytes).unwrap();
+        assert!(fst.verify().is_ok());
+    }
+
+    bytes[17] = b'\xFF';
+    {
+        let fst = Fst::new(&bytes).unwrap();
+        assert!(fst.verify().is_err());
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-06-19 09:26:03 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-06-19 09:26:03 +0000
commit	9918693037dce8aa4bb6f08741b6812923486c18 (patch)
tree	21d2b40bec7e6a7ea664acee056eb3d08e15a1cf /vendor/fst/src/raw
parent	Releasing progress-linux version 1.75.0+dfsg1-5~progress7.99u1. (diff)
download	rustc-9918693037dce8aa4bb6f08741b6812923486c18.tar.xz rustc-9918693037dce8aa4bb6f08741b6812923486c18.zip