1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
//! `comma` parses command-line-style strings. See [`parse_command`] for details.
use std::iter::{Peekable};
use std::str::{Chars};
fn parse_escape(chars: &mut Peekable<Chars>) -> Option<char> {
return Some(match chars.next()? {
'n' => '\n',
'r' => '\r',
't' => '\t',
literal => literal
})
}
fn parse_string(chars: &mut Peekable<Chars>, delim: char) -> Option<String> {
let mut output = String::new();
while let Some(ch) = chars.next() {
if ch == delim {
return Some(output)
} else if ch == '\\' {
output.push(parse_escape(chars)?);
} else {
output.push(ch);
}
}
return None
}
/// Parses a command into a list of individual tokens.
/// Each token is separated by one or more characters of whitespace.
/// Pairs of single- or double-quotes can be used to ignore whitespace. Within pairs of quotation
/// marks, a backslash (\) can be used to escape any character. The special escape sequences
/// '\n', '\r', and '\t' are also handled as Newlines, Carriage Returns, and Tabs, respectively.
/// Should a quotation mark be mismatched (no counterpart terminating mark exists), this function
/// will return None. Otherwise, it returns a list of tokens in the input string.
pub fn parse_command(input: &str) -> Option<Vec<String>> {
let mut next_push = true;
let mut chars = input.chars().peekable();
let mut output : Vec<String> = Vec::new();
while let Some(ch) = chars.next() {
if ch.is_whitespace() {
next_push = true;
} else{
if next_push { output.push(String::new()); next_push = false; }
if ch == '\\' {
output.last_mut()?.push(parse_escape(&mut chars)?);
} else if ch == '"' || ch == '\'' {
output.last_mut()?.push_str(parse_string(&mut chars, ch)?.as_str());
} else {
output.last_mut()?.push(ch);
}
}
}
return Some(output)
}
#[cfg(test)]
mod tests {
use crate::{parse_command};
#[test]
fn parsing_works() {
let result = parse_command("hello world \\'this is\\' a \"quoted \\\"string\\\"\"").unwrap();
assert_eq!(result,
vec![String::from("hello"), String::from("world"),
String::from("'this"), String::from("is'"), String::from("a"),
String::from("quoted \"string\"")]);
}
#[test]
fn fail_mismatch() {
assert_eq!(parse_command("Hello 'world "), None);
}
#[test]
fn unicode() {
// This contains a CJK IDEOGRAPH EXTENSION G character, which is invisible.
let result = parse_command("ß 𱁬").unwrap();
assert_eq!(
result,
vec![String::from("ß"), String::from("𱁬")]
);
}
}
|