summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_lexer/src/cursor.rs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_lexer/src/cursor.rs')
-rw-r--r--compiler/rustc_lexer/src/cursor.rs93
1 files changed, 93 insertions, 0 deletions
diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs
new file mode 100644
index 000000000..21557a9c8
--- /dev/null
+++ b/compiler/rustc_lexer/src/cursor.rs
@@ -0,0 +1,93 @@
+use std::str::Chars;
+
+/// Peekable iterator over a char sequence.
+///
+/// Next characters can be peeked via `first` method,
+/// and position can be shifted forward via `bump` method.
+pub(crate) struct Cursor<'a> {
+ initial_len: usize,
+ /// Iterator over chars. Slightly faster than a &str.
+ chars: Chars<'a>,
+ #[cfg(debug_assertions)]
+ prev: char,
+}
+
+pub(crate) const EOF_CHAR: char = '\0';
+
+impl<'a> Cursor<'a> {
+ pub(crate) fn new(input: &'a str) -> Cursor<'a> {
+ Cursor {
+ initial_len: input.len(),
+ chars: input.chars(),
+ #[cfg(debug_assertions)]
+ prev: EOF_CHAR,
+ }
+ }
+
+ /// Returns the last eaten symbol (or `'\0'` in release builds).
+ /// (For debug assertions only.)
+ pub(crate) fn prev(&self) -> char {
+ #[cfg(debug_assertions)]
+ {
+ self.prev
+ }
+
+ #[cfg(not(debug_assertions))]
+ {
+ EOF_CHAR
+ }
+ }
+
+ /// Peeks the next symbol from the input stream without consuming it.
+ /// If requested position doesn't exist, `EOF_CHAR` is returned.
+ /// However, getting `EOF_CHAR` doesn't always mean actual end of file,
+ /// it should be checked with `is_eof` method.
+ pub(crate) fn first(&self) -> char {
+ // `.next()` optimizes better than `.nth(0)`
+ self.chars.clone().next().unwrap_or(EOF_CHAR)
+ }
+
+ /// Peeks the second symbol from the input stream without consuming it.
+ pub(crate) fn second(&self) -> char {
+ // `.next()` optimizes better than `.nth(1)`
+ let mut iter = self.chars.clone();
+ iter.next();
+ iter.next().unwrap_or(EOF_CHAR)
+ }
+
+ /// Checks if there is nothing more to consume.
+ pub(crate) fn is_eof(&self) -> bool {
+ self.chars.as_str().is_empty()
+ }
+
+ /// Returns amount of already consumed symbols.
+ pub(crate) fn len_consumed(&self) -> u32 {
+ (self.initial_len - self.chars.as_str().len()) as u32
+ }
+
+ /// Resets the number of bytes consumed to 0.
+ pub(crate) fn reset_len_consumed(&mut self) {
+ self.initial_len = self.chars.as_str().len();
+ }
+
+ /// Moves to the next character.
+ pub(crate) fn bump(&mut self) -> Option<char> {
+ let c = self.chars.next()?;
+
+ #[cfg(debug_assertions)]
+ {
+ self.prev = c;
+ }
+
+ Some(c)
+ }
+
+ /// Eats symbols while predicate returns true or until the end of file is reached.
+ pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
+ // It was tried making optimized version of this for eg. line comments, but
+ // LLVM can inline all of this and compile it down to fast iteration over bytes.
+ while predicate(self.first()) && !self.is_eof() {
+ self.bump();
+ }
+ }
+}