diff options
Diffstat (limited to 'third_party/rust/shlex')
-rw-r--r-- | third_party/rust/shlex/.cargo-checksum.json | 1 | ||||
-rw-r--r-- | third_party/rust/shlex/CHANGELOG.md | 17 | ||||
-rw-r--r-- | third_party/rust/shlex/Cargo.toml | 24 | ||||
-rw-r--r-- | third_party/rust/shlex/LICENSE-APACHE | 13 | ||||
-rw-r--r-- | third_party/rust/shlex/LICENSE-MIT | 21 | ||||
-rw-r--r-- | third_party/rust/shlex/README.md | 30 | ||||
-rw-r--r-- | third_party/rust/shlex/src/lib.rs | 249 |
7 files changed, 355 insertions, 0 deletions
diff --git a/third_party/rust/shlex/.cargo-checksum.json b/third_party/rust/shlex/.cargo-checksum.json new file mode 100644 index 0000000000..4d8a364a9a --- /dev/null +++ b/third_party/rust/shlex/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"CHANGELOG.md":"ae8160bce335d8cb67f0d522402ed7bdb47266ca774d2ba3edc661783c86bbbe","Cargo.toml":"cdc548ec58d7bcee2494dcab1de5996cdfc748622d685e1cf74a50d54edbdf34","LICENSE-APACHE":"553fffcd9b1cb158bc3e9edc35da85ca5c3b3d7d2e61c883ebcfa8a65814b583","LICENSE-MIT":"4455bf75a91154108304cb283e0fea9948c14f13e20d60887cf2552449dea3b1","README.md":"7b378c1f3f7a3c7a8a819a736a43aa6e5d984d11b412224ef25597dd1ae2fac2","src/lib.rs":"1a3880eb7688af89736e52de8deac316698e664b8b1b64f80c346bf79b18f8b8"},"package":"43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"}
\ No newline at end of file diff --git a/third_party/rust/shlex/CHANGELOG.md b/third_party/rust/shlex/CHANGELOG.md new file mode 100644 index 0000000000..50d2e6e0d3 --- /dev/null +++ b/third_party/rust/shlex/CHANGELOG.md @@ -0,0 +1,17 @@ +# 1.1.0 + +* Adds the `std` feature (enabled by default) +* Disabling the `std` feature makes the crate work in `#![no_std]` mode, assuming presence of the `alloc` crate + +# 1.0.0 + +* Adds the `join` convenience function. +* Fixes parsing of `'\\n'` to match the behavior of bash/Zsh/Python `shlex`. The result was previously `\n`, now it is `\\n`. + +# 0.1.1 + +* Adds handling of `#` comments. + +# 0.1.0 + +This is the initial release. diff --git a/third_party/rust/shlex/Cargo.toml b/third_party/rust/shlex/Cargo.toml new file mode 100644 index 0000000000..2741ed866a --- /dev/null +++ b/third_party/rust/shlex/Cargo.toml @@ -0,0 +1,24 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +name = "shlex" +version = "1.1.0" +authors = ["comex <comexk@gmail.com>", "Fenhl <fenhl@fenhl.net>"] +description = "Split a string into shell words, like Python's shlex." +categories = ["command-line-interface", "parser-implementations"] +license = "MIT OR Apache-2.0" +repository = "https://github.com/comex/rust-shlex" + +[features] +default = ["std"] +std = [] diff --git a/third_party/rust/shlex/LICENSE-APACHE b/third_party/rust/shlex/LICENSE-APACHE new file mode 100644 index 0000000000..37465048a6 --- /dev/null +++ b/third_party/rust/shlex/LICENSE-APACHE @@ -0,0 +1,13 @@ +Copyright 2015 Nicholas Allegra (comex). + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/third_party/rust/shlex/LICENSE-MIT b/third_party/rust/shlex/LICENSE-MIT new file mode 100644 index 0000000000..5ec1fe1cd7 --- /dev/null +++ b/third_party/rust/shlex/LICENSE-MIT @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Nicholas Allegra (comex). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/third_party/rust/shlex/README.md b/third_party/rust/shlex/README.md new file mode 100644 index 0000000000..6778828bdd --- /dev/null +++ b/third_party/rust/shlex/README.md @@ -0,0 +1,30 @@ + +Same idea as (but implementation not directly based on) the Python shlex +module. However, this implementation does not support any of the Python +module's customization because it makes parsing slower and is fairly useless. +You only get the default settings of shlex.split, which mimic the POSIX shell: +<https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html> + +This implementation also deviates from the Python version in not treating \r +specially, which I believe is more compliant. + +The algorithms in this crate are oblivious to UTF-8 high bytes, so they iterate +over the bytes directly as a micro-optimization. + +Disabling the `std` feature (which is enabled by default) will allow the crate +to work in `no_std` environments, where the `alloc` crate, and a global +allocator, are available. + +# LICENSE + +The source code in this repository is Licensed under either of +- Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or + https://www.apache.org/licenses/LICENSE-2.0) +- MIT license ([LICENSE-MIT](LICENSE-MIT) or + https://opensource.org/licenses/MIT) + +at your option. + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in the work by you, as defined in the Apache-2.0 license, shall +be dual licensed as above, without any additional terms or conditions. diff --git a/third_party/rust/shlex/src/lib.rs b/third_party/rust/shlex/src/lib.rs new file mode 100644 index 0000000000..31b54bda6f --- /dev/null +++ b/third_party/rust/shlex/src/lib.rs @@ -0,0 +1,249 @@ +// Copyright 2015 Nicholas Allegra (comex). +// Licensed under the Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> or +// the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be +// copied, modified, or distributed except according to those terms. + +//! Same idea as (but implementation not directly based on) the Python shlex module. However, this +//! implementation does not support any of the Python module's customization because it makes +//! parsing slower and is fairly useless. You only get the default settings of shlex.split, which +//! mimic the POSIX shell: +//! <https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html> +//! +//! This implementation also deviates from the Python version in not treating `\r` specially, which +//! I believe is more compliant. +//! +//! The algorithms in this crate are oblivious to UTF-8 high bytes, so they iterate over the bytes +//! directly as a micro-optimization. +//! +//! Disabling the `std` feature (which is enabled by default) will allow the crate to work in +//! `no_std` environments, where the `alloc` crate, and a global allocator, are available. + +#![cfg_attr(not(feature = "std"), no_std)] + +extern crate alloc; +use alloc::vec::Vec; +use alloc::borrow::Cow; +use alloc::string::String; +#[cfg(test)] +use alloc::vec; +#[cfg(test)] +use alloc::borrow::ToOwned; + +/// An iterator that takes an input string and splits it into the words using the same syntax as +/// the POSIX shell. +pub struct Shlex<'a> { + in_iter: core::str::Bytes<'a>, + /// The number of newlines read so far, plus one. + pub line_no: usize, + /// An input string is erroneous if it ends while inside a quotation or right after an + /// unescaped backslash. Since Iterator does not have a mechanism to return an error, if that + /// happens, Shlex just throws out the last token, ends the iteration, and sets 'had_error' to + /// true; best to check it after you're done iterating. + pub had_error: bool, +} + +impl<'a> Shlex<'a> { + pub fn new(in_str: &'a str) -> Self { + Shlex { + in_iter: in_str.bytes(), + line_no: 1, + had_error: false, + } + } + + fn parse_word(&mut self, mut ch: u8) -> Option<String> { + let mut result: Vec<u8> = Vec::new(); + loop { + match ch as char { + '"' => if let Err(()) = self.parse_double(&mut result) { + self.had_error = true; + return None; + }, + '\'' => if let Err(()) = self.parse_single(&mut result) { + self.had_error = true; + return None; + }, + '\\' => if let Some(ch2) = self.next_char() { + if ch2 != '\n' as u8 { result.push(ch2); } + } else { + self.had_error = true; + return None; + }, + ' ' | '\t' | '\n' => { break; }, + _ => { result.push(ch as u8); }, + } + if let Some(ch2) = self.next_char() { ch = ch2; } else { break; } + } + unsafe { Some(String::from_utf8_unchecked(result)) } + } + + fn parse_double(&mut self, result: &mut Vec<u8>) -> Result<(), ()> { + loop { + if let Some(ch2) = self.next_char() { + match ch2 as char { + '\\' => { + if let Some(ch3) = self.next_char() { + match ch3 as char { + // \$ => $ + '$' | '`' | '"' | '\\' => { result.push(ch3); }, + // \<newline> => nothing + '\n' => {}, + // \x => =x + _ => { result.push('\\' as u8); result.push(ch3); } + } + } else { + return Err(()); + } + }, + '"' => { return Ok(()); }, + _ => { result.push(ch2); }, + } + } else { + return Err(()); + } + } + } + + fn parse_single(&mut self, result: &mut Vec<u8>) -> Result<(), ()> { + loop { + if let Some(ch2) = self.next_char() { + match ch2 as char { + '\'' => { return Ok(()); }, + _ => { result.push(ch2); }, + } + } else { + return Err(()); + } + } + } + + fn next_char(&mut self) -> Option<u8> { + let res = self.in_iter.next(); + if res == Some('\n' as u8) { self.line_no += 1; } + res + } +} + +impl<'a> Iterator for Shlex<'a> { + type Item = String; + fn next(&mut self) -> Option<String> { + if let Some(mut ch) = self.next_char() { + // skip initial whitespace + loop { + match ch as char { + ' ' | '\t' | '\n' => {}, + '#' => { + while let Some(ch2) = self.next_char() { + if ch2 as char == '\n' { break; } + } + }, + _ => { break; } + } + if let Some(ch2) = self.next_char() { ch = ch2; } else { return None; } + } + self.parse_word(ch) + } else { // no initial character + None + } + } + +} + +/// Convenience function that consumes the whole string at once. Returns None if the input was +/// erroneous. +pub fn split(in_str: &str) -> Option<Vec<String>> { + let mut shl = Shlex::new(in_str); + let res = shl.by_ref().collect(); + if shl.had_error { None } else { Some(res) } +} + +/// Given a single word, return a string suitable to encode it as a shell argument. +pub fn quote(in_str: &str) -> Cow<str> { + if in_str.len() == 0 { + "\"\"".into() + } else if in_str.bytes().any(|c| match c as char { + '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | '\'' | ' ' | '\t' | + '\r' | '\n' | '*' | '?' | '[' | '#' | '~' | '=' | '%' => true, + _ => false + }) { + let mut out: Vec<u8> = Vec::new(); + out.push('"' as u8); + for c in in_str.bytes() { + match c as char { + '$' | '`' | '"' | '\\' => out.push('\\' as u8), + _ => () + } + out.push(c); + } + out.push('"' as u8); + unsafe { String::from_utf8_unchecked(out) }.into() + } else { + in_str.into() + } +} + +/// Convenience function that consumes an iterable of words and turns it into a single string, +/// quoting words when necessary. Consecutive words will be separated by a single space. +pub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String { + words.into_iter() + .map(quote) + .collect::<Vec<_>>() + .join(" ") +} + +#[cfg(test)] +static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[ + ("foo$baz", Some(&["foo$baz"])), + ("foo baz", Some(&["foo", "baz"])), + ("foo\"bar\"baz", Some(&["foobarbaz"])), + ("foo \"bar\"baz", Some(&["foo", "barbaz"])), + (" foo \nbar", Some(&["foo", "bar"])), + ("foo\\\nbar", Some(&["foobar"])), + ("\"foo\\\nbar\"", Some(&["foobar"])), + ("'baz\\$b'", Some(&["baz\\$b"])), + ("'baz\\\''", None), + ("\\", None), + ("\"\\", None), + ("'\\", None), + ("\"", None), + ("'", None), + ("foo #bar\nbaz", Some(&["foo", "baz"])), + ("foo #bar", Some(&["foo"])), + ("foo#bar", Some(&["foo#bar"])), + ("foo\"#bar", None), + ("'\\n'", Some(&["\\n"])), + ("'\\\\n'", Some(&["\\\\n"])), +]; + +#[test] +fn test_split() { + for &(input, output) in SPLIT_TEST_ITEMS { + assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect())); + } +} + +#[test] +fn test_lineno() { + let mut sh = Shlex::new("\nfoo\nbar"); + while let Some(word) = sh.next() { + if word == "bar" { + assert_eq!(sh.line_no, 3); + } + } +} + +#[test] +fn test_quote() { + assert_eq!(quote("foobar"), "foobar"); + assert_eq!(quote("foo bar"), "\"foo bar\""); + assert_eq!(quote("\""), "\"\\\"\""); + assert_eq!(quote(""), "\"\""); +} + +#[test] +fn test_join() { + assert_eq!(join(vec![]), ""); + assert_eq!(join(vec![""]), "\"\""); + assert_eq!(join(vec!["a", "b"]), "a b"); + assert_eq!(join(vec!["foo bar", "baz"]), "\"foo bar\" baz"); +} |