diff options
Diffstat (limited to 'compiler/rustc_lexer/src/cursor.rs')
-rw-r--r-- | compiler/rustc_lexer/src/cursor.rs | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs new file mode 100644 index 000000000..21557a9c8 --- /dev/null +++ b/compiler/rustc_lexer/src/cursor.rs @@ -0,0 +1,93 @@ +use std::str::Chars; + +/// Peekable iterator over a char sequence. +/// +/// Next characters can be peeked via `first` method, +/// and position can be shifted forward via `bump` method. +pub(crate) struct Cursor<'a> { + initial_len: usize, + /// Iterator over chars. Slightly faster than a &str. + chars: Chars<'a>, + #[cfg(debug_assertions)] + prev: char, +} + +pub(crate) const EOF_CHAR: char = '\0'; + +impl<'a> Cursor<'a> { + pub(crate) fn new(input: &'a str) -> Cursor<'a> { + Cursor { + initial_len: input.len(), + chars: input.chars(), + #[cfg(debug_assertions)] + prev: EOF_CHAR, + } + } + + /// Returns the last eaten symbol (or `'\0'` in release builds). + /// (For debug assertions only.) + pub(crate) fn prev(&self) -> char { + #[cfg(debug_assertions)] + { + self.prev + } + + #[cfg(not(debug_assertions))] + { + EOF_CHAR + } + } + + /// Peeks the next symbol from the input stream without consuming it. + /// If requested position doesn't exist, `EOF_CHAR` is returned. + /// However, getting `EOF_CHAR` doesn't always mean actual end of file, + /// it should be checked with `is_eof` method. + pub(crate) fn first(&self) -> char { + // `.next()` optimizes better than `.nth(0)` + self.chars.clone().next().unwrap_or(EOF_CHAR) + } + + /// Peeks the second symbol from the input stream without consuming it. + pub(crate) fn second(&self) -> char { + // `.next()` optimizes better than `.nth(1)` + let mut iter = self.chars.clone(); + iter.next(); + iter.next().unwrap_or(EOF_CHAR) + } + + /// Checks if there is nothing more to consume. + pub(crate) fn is_eof(&self) -> bool { + self.chars.as_str().is_empty() + } + + /// Returns amount of already consumed symbols. + pub(crate) fn len_consumed(&self) -> u32 { + (self.initial_len - self.chars.as_str().len()) as u32 + } + + /// Resets the number of bytes consumed to 0. + pub(crate) fn reset_len_consumed(&mut self) { + self.initial_len = self.chars.as_str().len(); + } + + /// Moves to the next character. + pub(crate) fn bump(&mut self) -> Option<char> { + let c = self.chars.next()?; + + #[cfg(debug_assertions)] + { + self.prev = c; + } + + Some(c) + } + + /// Eats symbols while predicate returns true or until the end of file is reached. + pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) { + // It was tried making optimized version of this for eg. line comments, but + // LLVM can inline all of this and compile it down to fast iteration over bytes. + while predicate(self.first()) && !self.is_eof() { + self.bump(); + } + } +} |