summaryrefslogtreecommitdiffstats
path: root/vendor/regex-automata/src/dfa/determinize.rs
blob: 19f99f5d6401f9dea1ec8df116e44d7584553ed9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
use alloc::{collections::BTreeMap, vec::Vec};

use crate::{
    dfa::{
        dense::{self, BuildError},
        DEAD,
    },
    nfa::thompson,
    util::{
        self,
        alphabet::{self, ByteSet},
        determinize::{State, StateBuilderEmpty, StateBuilderNFA},
        primitives::{PatternID, StateID},
        search::{Anchored, MatchKind},
        sparse_set::SparseSets,
        start::Start,
    },
};

/// A builder for configuring and running a DFA determinizer.
#[derive(Clone, Debug)]
pub(crate) struct Config {
    match_kind: MatchKind,
    quit: ByteSet,
    dfa_size_limit: Option<usize>,
    determinize_size_limit: Option<usize>,
}

impl Config {
    /// Create a new default config for a determinizer. The determinizer may be
    /// configured before calling `run`.
    pub fn new() -> Config {
        Config {
            match_kind: MatchKind::LeftmostFirst,
            quit: ByteSet::empty(),
            dfa_size_limit: None,
            determinize_size_limit: None,
        }
    }

    /// Run determinization on the given NFA and write the resulting DFA into
    /// the one given. The DFA given should be initialized but otherwise empty.
    /// "Initialized" means that it is setup to handle the NFA's byte classes,
    /// number of patterns and whether to build start states for each pattern.
    pub fn run(
        &self,
        nfa: &thompson::NFA,
        dfa: &mut dense::OwnedDFA,
    ) -> Result<(), BuildError> {
        let dead = State::dead();
        let quit = State::dead();
        let mut cache = StateMap::default();
        // We only insert the dead state here since its representation is
        // identical to the quit state. And we never want anything pointing
        // to the quit state other than specific transitions derived from the
        // determinizer's configured "quit" bytes.
        //
        // We do put the quit state into 'builder_states' below. This ensures
        // that a proper DFA state ID is allocated for it, and that no other
        // DFA state uses the "location after the DEAD state." That is, it
        // is assumed that the quit state is always the state immediately
        // following the DEAD state.
        cache.insert(dead.clone(), DEAD);

        let runner = Runner {
            config: self.clone(),
            nfa,
            dfa,
            builder_states: alloc::vec![dead, quit],
            cache,
            memory_usage_state: 0,
            sparses: SparseSets::new(nfa.states().len()),
            stack: alloc::vec![],
            scratch_state_builder: StateBuilderEmpty::new(),
        };
        runner.run()
    }

    /// The match semantics to use for determinization.
    ///
    /// MatchKind::All corresponds to the standard textbook construction.
    /// All possible match states are represented in the DFA.
    /// MatchKind::LeftmostFirst permits greediness and otherwise tries to
    /// simulate the match semantics of backtracking regex engines. Namely,
    /// only a subset of match states are built, and dead states are used to
    /// stop searches with an unanchored prefix.
    ///
    /// The default is MatchKind::LeftmostFirst.
    pub fn match_kind(&mut self, kind: MatchKind) -> &mut Config {
        self.match_kind = kind;
        self
    }

    /// The set of bytes to use that will cause the DFA to enter a quit state,
    /// stop searching and return an error. By default, this is empty.
    pub fn quit(&mut self, set: ByteSet) -> &mut Config {
        self.quit = set;
        self
    }

    /// The limit, in bytes of the heap, that the DFA is permitted to use. This
    /// does not include the auxiliary heap storage used by determinization.
    pub fn dfa_size_limit(&mut self, bytes: Option<usize>) -> &mut Config {
        self.dfa_size_limit = bytes;
        self
    }

    /// The limit, in bytes of the heap, that determinization itself is allowed
    /// to use. This does not include the size of the DFA being built.
    pub fn determinize_size_limit(
        &mut self,
        bytes: Option<usize>,
    ) -> &mut Config {
        self.determinize_size_limit = bytes;
        self
    }
}

/// The actual implementation of determinization that converts an NFA to a DFA
/// through powerset construction.
///
/// This determinizer roughly follows the typical powerset construction, where
/// each DFA state is comprised of one or more NFA states. In the worst case,
/// there is one DFA state for every possible combination of NFA states. In
/// practice, this only happens in certain conditions, typically when there are
/// bounded repetitions.
///
/// The main differences between this implementation and typical deteminization
/// are that this implementation delays matches by one state and hackily makes
/// look-around work. Comments below attempt to explain this.
///
/// The lifetime variable `'a` refers to the lifetime of the NFA or DFA,
/// whichever is shorter.
#[derive(Debug)]
struct Runner<'a> {
    /// The configuration used to initialize determinization.
    config: Config,
    /// The NFA we're converting into a DFA.
    nfa: &'a thompson::NFA,
    /// The DFA we're building.
    dfa: &'a mut dense::OwnedDFA,
    /// Each DFA state being built is defined as an *ordered* set of NFA
    /// states, along with some meta facts about the ordered set of NFA states.
    ///
    /// This is never empty. The first state is always a dummy state such that
    /// a state id == 0 corresponds to a dead state. The second state is always
    /// the quit state.
    ///
    /// Why do we have states in both a `Vec` and in a cache map below?
    /// Well, they serve two different roles based on access patterns.
    /// `builder_states` is the canonical home of each state, and provides
    /// constant random access by a DFA state's ID. The cache map below, on
    /// the other hand, provides a quick way of searching for identical DFA
    /// states by using the DFA state as a key in the map. Of course, we use
    /// reference counting to avoid actually duplicating the state's data
    /// itself. (Although this has never been benchmarked.) Note that the cache
    /// map does not give us full minimization; it just lets us avoid some very
    /// obvious redundant states.
    ///
    /// Note that the index into this Vec isn't quite the DFA's state ID.
    /// Rather, it's just an index. To get the state ID, you have to multiply
    /// it by the DFA's stride. That's done by self.dfa.from_index. And the
    /// inverse is self.dfa.to_index.
    ///
    /// Moreover, DFA states don't usually retain the IDs assigned to them
    /// by their position in this Vec. After determinization completes,
    /// states are shuffled around to support other optimizations. See the
    /// sibling 'special' module for more details on that. (The reason for
    /// mentioning this is that if you print out the DFA for debugging during
    /// determinization, and then print out the final DFA after it is fully
    /// built, then the state IDs likely won't match up.)
    builder_states: Vec<State>,
    /// A cache of DFA states that already exist and can be easily looked up
    /// via ordered sets of NFA states.
    ///
    /// See `builder_states` docs for why we store states in two different
    /// ways.
    cache: StateMap,
    /// The memory usage, in bytes, used by builder_states and cache. We track
    /// this as new states are added since states use a variable amount of
    /// heap. Tracking this as we add states makes it possible to compute the
    /// total amount of memory used by the determinizer in constant time.
    memory_usage_state: usize,
    /// A pair of sparse sets for tracking ordered sets of NFA state IDs.
    /// These are reused throughout determinization. A bounded sparse set
    /// gives us constant time insertion, membership testing and clearing.
    sparses: SparseSets,
    /// Scratch space for a stack of NFA states to visit, for depth first
    /// visiting without recursion.
    stack: Vec<StateID>,
    /// Scratch space for storing an ordered sequence of NFA states, for
    /// amortizing allocation. This is principally useful for when we avoid
    /// adding a new DFA state since it already exists. In order to detect this
    /// case though, we still need an ordered set of NFA state IDs. So we use
    /// this space to stage that ordered set before we know whether we need to
    /// create a new DFA state or not.
    scratch_state_builder: StateBuilderEmpty,
}

/// A map from states to state identifiers. When using std, we use a standard
/// hashmap, since it's a bit faster for this use case. (Other maps, like
/// one's based on FNV, have not yet been benchmarked.)
///
/// The main purpose of this map is to reuse states where possible. This won't
/// fully minimize the DFA, but it works well in a lot of cases.
#[cfg(feature = "std")]
type StateMap = std::collections::HashMap<State, StateID>;
#[cfg(not(feature = "std"))]
type StateMap = BTreeMap<State, StateID>;

impl<'a> Runner<'a> {
    /// Build the DFA. If there was a problem constructing the DFA (e.g., if
    /// the chosen state identifier representation is too small), then an error
    /// is returned.
    fn run(mut self) -> Result<(), BuildError> {
        if self.nfa.look_set_any().contains_word_unicode()
            && !self.config.quit.contains_range(0x80, 0xFF)
        {
            return Err(BuildError::unsupported_dfa_word_boundary_unicode());
        }

        // A sequence of "representative" bytes drawn from each equivalence
        // class. These representative bytes are fed to the NFA to compute
        // state transitions. This allows us to avoid re-computing state
        // transitions for bytes that are guaranteed to produce identical
        // results. Since computing the representatives needs to do a little
        // work, we do it once here because we'll be iterating over them a lot.
        let representatives: Vec<alphabet::Unit> =
            self.dfa.byte_classes().representatives(..).collect();
        // The set of all DFA state IDs that still need to have their
        // transitions set. We start by seeding this with all starting states.
        let mut uncompiled = alloc::vec![];
        self.add_all_starts(&mut uncompiled)?;
        while let Some(dfa_id) = uncompiled.pop() {
            for &unit in &representatives {
                if unit.as_u8().map_or(false, |b| self.config.quit.contains(b))
                {
                    continue;
                }
                // In many cases, the state we transition to has already been
                // computed. 'cached_state' will do the minimal amount of work
                // to check this, and if it exists, immediately return an
                // already existing state ID.
                let (next_dfa_id, is_new) = self.cached_state(dfa_id, unit)?;
                self.dfa.set_transition(dfa_id, unit, next_dfa_id);
                // If the state ID we got back is newly created, then we need
                // to compile it, so add it to our uncompiled frontier.
                if is_new {
                    uncompiled.push(next_dfa_id);
                }
            }
        }
        debug!(
            "determinization complete, memory usage: {}, \
             dense DFA size: {}, \
             is reverse? {}",
            self.memory_usage(),
            self.dfa.memory_usage(),
            self.nfa.is_reverse(),
        );

        // A map from DFA state ID to one or more NFA match IDs. Each NFA match
        // ID corresponds to a distinct regex pattern that matches in the state
        // corresponding to the key.
        let mut matches: BTreeMap<StateID, Vec<PatternID>> = BTreeMap::new();
        self.cache.clear();
        #[cfg(feature = "logging")]
        let mut total_pat_len = 0;
        for (i, state) in self.builder_states.into_iter().enumerate() {
            if let Some(pat_ids) = state.match_pattern_ids() {
                let id = self.dfa.to_state_id(i);
                log! {
                    total_pat_len += pat_ids.len();
                }
                matches.insert(id, pat_ids);
            }
        }
        log! {
            use core::mem::size_of;
            let per_elem = size_of::<StateID>() + size_of::<Vec<PatternID>>();
            let pats = total_pat_len * size_of::<PatternID>();
            let mem = (matches.len() * per_elem) + pats;
            log::debug!("matches map built, memory usage: {}", mem);
        }
        // At this point, we shuffle the "special" states in the final DFA.
        // This permits a DFA's match loop to detect a match condition (among
        // other things) by merely inspecting the current state's identifier,
        // and avoids the need for any additional auxiliary storage.
        self.dfa.shuffle(matches)?;
        Ok(())
    }

    /// Return the identifier for the next DFA state given an existing DFA
    /// state and an input byte. If the next DFA state already exists, then
    /// return its identifier from the cache. Otherwise, build the state, cache
    /// it and return its identifier.
    ///
    /// This routine returns a boolean indicating whether a new state was
    /// built. If a new state is built, then the caller needs to add it to its
    /// frontier of uncompiled DFA states to compute transitions for.
    fn cached_state(
        &mut self,
        dfa_id: StateID,
        unit: alphabet::Unit,
    ) -> Result<(StateID, bool), BuildError> {
        // Compute the set of all reachable NFA states, including epsilons.
        let empty_builder = self.get_state_builder();
        let builder = util::determinize::next(
            self.nfa,
            self.config.match_kind,
            &mut self.sparses,
            &mut self.stack,
            &self.builder_states[self.dfa.to_index(dfa_id)],
            unit,
            empty_builder,
        );
        self.maybe_add_state(builder)
    }

    /// Compute the set of DFA start states and add their identifiers in
    /// 'dfa_state_ids' (no duplicates are added).
    fn add_all_starts(
        &mut self,
        dfa_state_ids: &mut Vec<StateID>,
    ) -> Result<(), BuildError> {
        // These should be the first states added.
        assert!(dfa_state_ids.is_empty());
        // We only want to add (un)anchored starting states that is consistent
        // with our DFA's configuration. Unconditionally adding both (although
        // it is the default) can make DFAs quite a bit bigger.
        if self.dfa.start_kind().has_unanchored() {
            self.add_start_group(Anchored::No, dfa_state_ids)?;
        }
        if self.dfa.start_kind().has_anchored() {
            self.add_start_group(Anchored::Yes, dfa_state_ids)?;
        }
        // I previously has an 'assert' here checking that either
        // 'dfa_state_ids' was non-empty, or the NFA had zero patterns. But it
        // turns out this isn't always true. For example, the NFA might have
        // one or more patterns but where all such patterns are just 'fail'
        // states. These will ultimately just compile down to DFA dead states,
        // and since the dead state was added earlier, no new DFA states are
        // added. And thus, it is valid and okay for 'dfa_state_ids' to be
        // empty even if there are a non-zero number of patterns in the NFA.

        // We only need to compute anchored start states for each pattern if it
        // was requested to do so.
        if self.dfa.starts_for_each_pattern() {
            for pid in self.nfa.patterns() {
                self.add_start_group(Anchored::Pattern(pid), dfa_state_ids)?;
            }
        }
        Ok(())
    }

    /// Add a group of start states for the given match pattern ID. Any new
    /// DFA states added are pushed on to 'dfa_state_ids'. (No duplicates are
    /// pushed.)
    ///
    /// When pattern_id is None, then this will compile a group of unanchored
    /// start states (if the DFA is unanchored). When the pattern_id is
    /// present, then this will compile a group of anchored start states that
    /// only match the given pattern.
    ///
    /// This panics if `anchored` corresponds to an invalid pattern ID.
    fn add_start_group(
        &mut self,
        anchored: Anchored,
        dfa_state_ids: &mut Vec<StateID>,
    ) -> Result<(), BuildError> {
        let nfa_start = match anchored {
            Anchored::No => self.nfa.start_unanchored(),
            Anchored::Yes => self.nfa.start_anchored(),
            Anchored::Pattern(pid) => {
                self.nfa.start_pattern(pid).expect("valid pattern ID")
            }
        };

        // When compiling start states, we're careful not to build additional
        // states that aren't necessary. For example, if the NFA has no word
        // boundary assertion, then there's no reason to have distinct start
        // states for 'NonWordByte' and 'WordByte' starting configurations.
        // Instead, the 'WordByte' starting configuration can just point
        // directly to the start state for the 'NonWordByte' config.
        //
        // Note though that we only need to care about assertions in the prefix
        // of an NFA since this only concerns the starting states. (Actually,
        // the most precisely thing we could do it is look at the prefix
        // assertions of each pattern when 'anchored == Anchored::Pattern',
        // and then only compile extra states if the prefix is non-empty.) But
        // we settle for simplicity here instead of absolute minimalism. It is
        // somewhat rare, after all, for multiple patterns in the same regex to
        // have different prefix look-arounds.

        let (id, is_new) =
            self.add_one_start(nfa_start, Start::NonWordByte)?;
        self.dfa.set_start_state(anchored, Start::NonWordByte, id);
        if is_new {
            dfa_state_ids.push(id);
        }

        if !self.nfa.look_set_prefix_any().contains_word() {
            self.dfa.set_start_state(anchored, Start::WordByte, id);
        } else {
            let (id, is_new) =
                self.add_one_start(nfa_start, Start::WordByte)?;
            self.dfa.set_start_state(anchored, Start::WordByte, id);
            if is_new {
                dfa_state_ids.push(id);
            }
        }
        if !self.nfa.look_set_prefix_any().contains_anchor() {
            self.dfa.set_start_state(anchored, Start::Text, id);
            self.dfa.set_start_state(anchored, Start::LineLF, id);
            self.dfa.set_start_state(anchored, Start::LineCR, id);
            self.dfa.set_start_state(
                anchored,
                Start::CustomLineTerminator,
                id,
            );
        } else {
            let (id, is_new) = self.add_one_start(nfa_start, Start::Text)?;
            self.dfa.set_start_state(anchored, Start::Text, id);
            if is_new {
                dfa_state_ids.push(id);
            }

            let (id, is_new) = self.add_one_start(nfa_start, Start::LineLF)?;
            self.dfa.set_start_state(anchored, Start::LineLF, id);
            if is_new {
                dfa_state_ids.push(id);
            }

            let (id, is_new) = self.add_one_start(nfa_start, Start::LineCR)?;
            self.dfa.set_start_state(anchored, Start::LineCR, id);
            if is_new {
                dfa_state_ids.push(id);
            }

            let (id, is_new) =
                self.add_one_start(nfa_start, Start::CustomLineTerminator)?;
            self.dfa.set_start_state(
                anchored,
                Start::CustomLineTerminator,
                id,
            );
            if is_new {
                dfa_state_ids.push(id);
            }
        }

        Ok(())
    }

    /// Add a new DFA start state corresponding to the given starting NFA
    /// state, and the starting search configuration. (The starting search
    /// configuration essentially tells us which look-behind assertions are
    /// true for this particular state.)
    ///
    /// The boolean returned indicates whether the state ID returned is a newly
    /// created state, or a previously cached state.
    fn add_one_start(
        &mut self,
        nfa_start: StateID,
        start: Start,
    ) -> Result<(StateID, bool), BuildError> {
        // Compute the look-behind assertions that are true in this starting
        // configuration, and the determine the epsilon closure. While
        // computing the epsilon closure, we only follow condiional epsilon
        // transitions that satisfy the look-behind assertions in 'look_have'.
        let mut builder_matches = self.get_state_builder().into_matches();
        util::determinize::set_lookbehind_from_start(
            self.nfa,
            &start,
            &mut builder_matches,
        );
        self.sparses.set1.clear();
        util::determinize::epsilon_closure(
            self.nfa,
            nfa_start,
            builder_matches.look_have(),
            &mut self.stack,
            &mut self.sparses.set1,
        );
        let mut builder = builder_matches.into_nfa();
        util::determinize::add_nfa_states(
            &self.nfa,
            &self.sparses.set1,
            &mut builder,
        );
        self.maybe_add_state(builder)
    }

    /// Adds the given state to the DFA being built depending on whether it
    /// already exists in this determinizer's cache.
    ///
    /// If it does exist, then the memory used by 'state' is put back into the
    /// determinizer and the previously created state's ID is returned. (Along
    /// with 'false', indicating that no new state was added.)
    ///
    /// If it does not exist, then the state is added to the DFA being built
    /// and a fresh ID is allocated (if ID allocation fails, then an error is
    /// returned) and returned. (Along with 'true', indicating that a new state
    /// was added.)
    fn maybe_add_state(
        &mut self,
        builder: StateBuilderNFA,
    ) -> Result<(StateID, bool), BuildError> {
        if let Some(&cached_id) = self.cache.get(builder.as_bytes()) {
            // Since we have a cached state, put the constructed state's
            // memory back into our scratch space, so that it can be reused.
            self.put_state_builder(builder);
            return Ok((cached_id, false));
        }
        self.add_state(builder).map(|sid| (sid, true))
    }

    /// Add the given state to the DFA and make it available in the cache.
    ///
    /// The state initially has no transitions. That is, it transitions to the
    /// dead state for all possible inputs, and transitions to the quit state
    /// for all quit bytes.
    ///
    /// If adding the state would exceed the maximum value for StateID, then an
    /// error is returned.
    fn add_state(
        &mut self,
        builder: StateBuilderNFA,
    ) -> Result<StateID, BuildError> {
        let id = self.dfa.add_empty_state()?;
        if !self.config.quit.is_empty() {
            for b in self.config.quit.iter() {
                self.dfa.set_transition(
                    id,
                    alphabet::Unit::u8(b),
                    self.dfa.quit_id(),
                );
            }
        }
        let state = builder.to_state();
        // States use reference counting internally, so we only need to count
        // their memory usage once.
        self.memory_usage_state += state.memory_usage();
        self.builder_states.push(state.clone());
        self.cache.insert(state, id);
        self.put_state_builder(builder);
        if let Some(limit) = self.config.dfa_size_limit {
            if self.dfa.memory_usage() > limit {
                return Err(BuildError::dfa_exceeded_size_limit(limit));
            }
        }
        if let Some(limit) = self.config.determinize_size_limit {
            if self.memory_usage() > limit {
                return Err(BuildError::determinize_exceeded_size_limit(
                    limit,
                ));
            }
        }
        Ok(id)
    }

    /// Returns a state builder from this determinizer that might have existing
    /// capacity. This helps avoid allocs in cases where a state is built that
    /// turns out to already be cached.
    ///
    /// Callers must put the state builder back with 'put_state_builder',
    /// otherwise the allocation reuse won't work.
    fn get_state_builder(&mut self) -> StateBuilderEmpty {
        core::mem::replace(
            &mut self.scratch_state_builder,
            StateBuilderEmpty::new(),
        )
    }

    /// Puts the given state builder back into this determinizer for reuse.
    ///
    /// Note that building a 'State' from a builder always creates a new
    /// alloc, so callers should always put the builder back.
    fn put_state_builder(&mut self, builder: StateBuilderNFA) {
        let _ = core::mem::replace(
            &mut self.scratch_state_builder,
            builder.clear(),
        );
    }

    /// Return the memory usage, in bytes, of this determinizer at the current
    /// point in time. This does not include memory used by the NFA or the
    /// dense DFA itself.
    fn memory_usage(&self) -> usize {
        use core::mem::size_of;

        self.builder_states.len() * size_of::<State>()
        // Maps likely use more memory than this, but it's probably close.
        + self.cache.len() * (size_of::<State>() + size_of::<StateID>())
        + self.memory_usage_state
        + self.stack.capacity() * size_of::<StateID>()
        + self.scratch_state_builder.capacity()
    }
}