From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/os_str_bytes/.cargo-checksum.json | 1 + vendor/os_str_bytes/COPYRIGHT | 5 + vendor/os_str_bytes/Cargo.toml | 45 + vendor/os_str_bytes/LICENSE-APACHE | 201 ++++ vendor/os_str_bytes/LICENSE-MIT | 21 + vendor/os_str_bytes/README.md | 88 ++ vendor/os_str_bytes/src/common/mod.rs | 39 + vendor/os_str_bytes/src/common/raw.rs | 38 + vendor/os_str_bytes/src/iter.rs | 113 ++ vendor/os_str_bytes/src/lib.rs | 432 ++++++++ vendor/os_str_bytes/src/pattern.rs | 77 ++ vendor/os_str_bytes/src/raw_str.rs | 1156 ++++++++++++++++++++ vendor/os_str_bytes/src/util.rs | 10 + vendor/os_str_bytes/src/wasm32/mod.rs | 56 + vendor/os_str_bytes/src/wasm32/raw.rs | 39 + vendor/os_str_bytes/src/windows/mod.rs | 152 +++ vendor/os_str_bytes/src/windows/raw.rs | 42 + .../os_str_bytes/src/windows/wtf8/code_points.rs | 117 ++ vendor/os_str_bytes/src/windows/wtf8/convert.rs | 166 +++ vendor/os_str_bytes/src/windows/wtf8/mod.rs | 18 + vendor/os_str_bytes/src/windows/wtf8/string.rs | 63 ++ 21 files changed, 2879 insertions(+) create mode 100644 vendor/os_str_bytes/.cargo-checksum.json create mode 100644 vendor/os_str_bytes/COPYRIGHT create mode 100644 vendor/os_str_bytes/Cargo.toml create mode 100644 vendor/os_str_bytes/LICENSE-APACHE create mode 100644 vendor/os_str_bytes/LICENSE-MIT create mode 100644 vendor/os_str_bytes/README.md create mode 100644 vendor/os_str_bytes/src/common/mod.rs create mode 100644 vendor/os_str_bytes/src/common/raw.rs create mode 100644 vendor/os_str_bytes/src/iter.rs create mode 100644 vendor/os_str_bytes/src/lib.rs create mode 100644 vendor/os_str_bytes/src/pattern.rs create mode 100644 vendor/os_str_bytes/src/raw_str.rs create mode 100644 vendor/os_str_bytes/src/util.rs create mode 100644 vendor/os_str_bytes/src/wasm32/mod.rs create mode 100644 vendor/os_str_bytes/src/wasm32/raw.rs create mode 100644 vendor/os_str_bytes/src/windows/mod.rs create mode 100644 vendor/os_str_bytes/src/windows/raw.rs create mode 100644 vendor/os_str_bytes/src/windows/wtf8/code_points.rs create mode 100644 vendor/os_str_bytes/src/windows/wtf8/convert.rs create mode 100644 vendor/os_str_bytes/src/windows/wtf8/mod.rs create mode 100644 vendor/os_str_bytes/src/windows/wtf8/string.rs (limited to 'vendor/os_str_bytes') diff --git a/vendor/os_str_bytes/.cargo-checksum.json b/vendor/os_str_bytes/.cargo-checksum.json new file mode 100644 index 000000000..2b0777f89 --- /dev/null +++ b/vendor/os_str_bytes/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"COPYRIGHT":"73ce0227250f175ce8d282493385bcc02a310b6d9954cae39fbb346b54976d13","Cargo.toml":"08c09c086d599280d0f0231e436d5c0fcd9433550595fbb4c915505991bf307f","LICENSE-APACHE":"c71d239df91726fc519c6eb72d318ec65820627232b2f796219e87dcf35d0ab4","LICENSE-MIT":"f360c8dbea5216a085ae3b38e782492912f63dc06eb6e09ac8ce586d8d5a1303","README.md":"3eced3f3c87ee9a609af1e7f940070862fe5894f4624e79729edcb9ad89b905c","src/common/mod.rs":"dd963b95f6040bca293b6dc445a915f0f8099e41be6d174366073e877ba7d380","src/common/raw.rs":"9a845d3ad3348ed39ec9fbf19c29b6c1fe19f3790ab8389a2a333f97bef3df80","src/iter.rs":"331238ca1fe53bd385eb49a637a24c9e68ea81e56f1c075a9beab74e2f064afe","src/lib.rs":"38bbd655739022459130689f60851b6996f1253884e7198d8a1c4eff3a80d0b5","src/pattern.rs":"8914667975f229f3ed43e21ae5c52833d020169801b0bb2c425cb14c1c6eb62e","src/raw_str.rs":"df31dc128e514ca65236d07dd6d29dfd66bd5d95ebcd73543d743993b8c1c830","src/util.rs":"aaf4086b3cfaacad1dc4ad07744f2124d52a9e15892ff93339dfd34375708766","src/wasm32/mod.rs":"64688ef8316ddb5f68a65476245e52be832ac931ada645673dc988f4504f0d48","src/wasm32/raw.rs":"4b7b56931f513812354bec4051aae25f5c3904ba7123e9965a3f984bd7d33190","src/windows/mod.rs":"a2d5a0f0baf4f567c895f615fe579d7f2c3005247390f9260b89fb9cd81d9d05","src/windows/raw.rs":"536b194b4f67f23547f10e711d1f4a843681c70073f6ca33836d4db25f77c98f","src/windows/wtf8/code_points.rs":"052ccdf6847a82d8b711aa97650514b1fadefbd66f6c1f1abb4910aa7873a69f","src/windows/wtf8/convert.rs":"bf19cb41e065e231f4755c501045bf3488e9b6bf1952bc1afd069b9208da6b39","src/windows/wtf8/mod.rs":"46982a83e9bc6b7f6ade1350de13ffcb9068f1cf35872fc2602665634f675896","src/windows/wtf8/string.rs":"8eaf1b4efac412dbb0d5d297f6d64684350e3a6ff3f6de8956b42b640771723b"},"package":"8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64"} \ No newline at end of file diff --git a/vendor/os_str_bytes/COPYRIGHT b/vendor/os_str_bytes/COPYRIGHT new file mode 100644 index 000000000..fb2d62f00 --- /dev/null +++ b/vendor/os_str_bytes/COPYRIGHT @@ -0,0 +1,5 @@ +Copyright (c) 2019 Dylan Iuzzolino + +Licensed under the Apache License, Version 2.0 or the MIT +license , at your option. All files in this project may not be +copied, modified, or distributed except according to those terms. diff --git a/vendor/os_str_bytes/Cargo.toml b/vendor/os_str_bytes/Cargo.toml new file mode 100644 index 000000000..8124213a0 --- /dev/null +++ b/vendor/os_str_bytes/Cargo.toml @@ -0,0 +1,45 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +rust-version = "1.52.0" +name = "os_str_bytes" +version = "6.0.0" +authors = ["dylni"] +exclude = [".*", "/rustfmt.toml", "/tests"] +description = "Utilities for converting between byte sequences and platform-native strings\n" +readme = "README.md" +keywords = ["bytes", "osstr", "osstring", "path", "windows"] +categories = ["command-line-interface", "development-tools::ffi", "encoding", "os", "rust-patterns"] +license = "MIT OR Apache-2.0" +repository = "https://github.com/dylni/os_str_bytes" +[package.metadata.docs.rs] +all-features = true +rustc-args = ["--cfg", "os_str_bytes_docs_rs"] +rustdoc-args = ["--cfg", "os_str_bytes_docs_rs"] +[dependencies.memchr] +version = "2.4" +optional = true + +[dependencies.print_bytes] +version = "0.5" +optional = true + +[dependencies.uniquote] +version = "3.0" +optional = true +[dev-dependencies.getrandom] +version = "0.2" + +[features] +default = ["memchr", "raw_os_str"] +raw_os_str = [] diff --git a/vendor/os_str_bytes/LICENSE-APACHE b/vendor/os_str_bytes/LICENSE-APACHE new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/vendor/os_str_bytes/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/os_str_bytes/LICENSE-MIT b/vendor/os_str_bytes/LICENSE-MIT new file mode 100644 index 000000000..b825ac04d --- /dev/null +++ b/vendor/os_str_bytes/LICENSE-MIT @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Dylan Iuzzolino + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/os_str_bytes/README.md b/vendor/os_str_bytes/README.md new file mode 100644 index 000000000..c7f875740 --- /dev/null +++ b/vendor/os_str_bytes/README.md @@ -0,0 +1,88 @@ +# OsStr Bytes + +This crate allows interacting with the data stored by [`OsStr`] and +[`OsString`], without resorting to panics or corruption for invalid UTF-8. +Thus, methods can be used that are already defined on [`[u8]`][slice] and +[`Vec`]. + +Typically, the only way to losslessly construct [`OsStr`] or [`OsString`] from +a byte sequence is to use `OsStr::new(str::from_utf8(bytes)?)`, which requires +the bytes to be valid in UTF-8. However, since this crate makes conversions +directly between the platform encoding and raw bytes, even some strings invalid +in UTF-8 can be converted. + +[![GitHub Build Status](https://github.com/dylni/os_str_bytes/workflows/build/badge.svg?branch=master)](https://github.com/dylni/os_str_bytes/actions?query=branch%3Amaster) + +## Usage + +Add the following lines to your "Cargo.toml" file: + +```toml +[dependencies] +os_str_bytes = "6.0" +``` + +See the [documentation] for available functionality and examples. + +## Rust version support + +The minimum supported Rust toolchain version depends on the platform: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TargetTarget TripleMinimum Version
Fortanix*-fortanix-*-sgxnightly (sgx_platform)
UnixUnix1.52.0
WASI*-wasi1.52.0
WebAssemblywasm32-*-unknown1.52.0
Windows*-windows-*1.52.0
+ +Minor version updates may increase these version requirements. However, the +previous two Rust releases will always be supported. If the minimum Rust +version must not be increased, use a tilde requirement to prevent updating this +crate's minor version: + +```toml +[dependencies] +os_str_bytes = "~6.0" +``` + +## License + +Licensing terms are specified in [COPYRIGHT]. + +Unless you explicitly state otherwise, any contribution submitted for inclusion +in this crate, as defined in [LICENSE-APACHE], shall be licensed according to +[COPYRIGHT], without any additional terms or conditions. + +[COPYRIGHT]: https://github.com/dylni/os_str_bytes/blob/master/COPYRIGHT +[documentation]: https://docs.rs/os_str_bytes +[LICENSE-APACHE]: https://github.com/dylni/os_str_bytes/blob/master/LICENSE-APACHE +[slice]: https://doc.rust-lang.org/std/primitive.slice.html +[`OsStr`]: https://doc.rust-lang.org/std/ffi/struct.OsStr.html +[`OsString`]: https://doc.rust-lang.org/std/ffi/struct.OsString.html +[`Vec`]: https://doc.rust-lang.org/std/vec/struct.Vec.html diff --git a/vendor/os_str_bytes/src/common/mod.rs b/vendor/os_str_bytes/src/common/mod.rs new file mode 100644 index 000000000..e8ce58596 --- /dev/null +++ b/vendor/os_str_bytes/src/common/mod.rs @@ -0,0 +1,39 @@ +use std::borrow::Cow; +use std::convert::Infallible; +use std::ffi::OsStr; +use std::ffi::OsString; +use std::result; + +#[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] +use std::os::fortanix_sgx as os; +#[cfg(any(target_os = "hermit", unix))] +use std::os::unix as os; +#[cfg(target_os = "wasi")] +use std::os::wasi as os; + +use os::ffi::OsStrExt; +use os::ffi::OsStringExt; + +if_raw_str! { + pub(super) mod raw; +} + +pub(super) type EncodingError = Infallible; + +type Result = result::Result; + +pub(super) fn os_str_from_bytes(string: &[u8]) -> Result> { + Ok(Cow::Borrowed(OsStrExt::from_bytes(string))) +} + +pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> { + Cow::Borrowed(OsStrExt::as_bytes(os_string)) +} + +pub(super) fn os_string_from_vec(string: Vec) -> Result { + Ok(OsStringExt::from_vec(string)) +} + +pub(super) fn os_string_into_vec(os_string: OsString) -> Vec { + OsStringExt::into_vec(os_string) +} diff --git a/vendor/os_str_bytes/src/common/raw.rs b/vendor/os_str_bytes/src/common/raw.rs new file mode 100644 index 000000000..070a62cf3 --- /dev/null +++ b/vendor/os_str_bytes/src/common/raw.rs @@ -0,0 +1,38 @@ +use std::fmt; +use std::fmt::Formatter; + +#[inline(always)] +pub(crate) const fn is_continuation(_: u8) -> bool { + false +} + +#[inline(always)] +pub(crate) fn decode_code_point(_: &[u8]) -> u32 { + unreachable!(); +} + +pub(crate) fn ends_with(string: &[u8], suffix: &[u8]) -> bool { + string.ends_with(suffix) +} + +pub(crate) fn starts_with(string: &[u8], prefix: &[u8]) -> bool { + string.starts_with(prefix) +} + +pub(crate) fn debug(string: &[u8], f: &mut Formatter<'_>) -> fmt::Result { + for byte in string { + write!(f, "\\x{:02X}", byte)?; + } + Ok(()) +} + +#[cfg(feature = "uniquote")] +pub(crate) mod uniquote { + use uniquote::Formatter; + use uniquote::Quote; + use uniquote::Result; + + pub(crate) fn escape(string: &[u8], f: &mut Formatter<'_>) -> Result { + string.escape(f) + } +} diff --git a/vendor/os_str_bytes/src/iter.rs b/vendor/os_str_bytes/src/iter.rs new file mode 100644 index 000000000..5cb7299e4 --- /dev/null +++ b/vendor/os_str_bytes/src/iter.rs @@ -0,0 +1,113 @@ +//! Iterators provided by this crate. + +#![cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))] + +use std::fmt; +use std::fmt::Debug; +use std::fmt::Formatter; +use std::iter::FusedIterator; +use std::str; + +use super::pattern::Encoded; +use super::Pattern; +use super::RawOsStr; + +// [memchr::memmem::FindIter] is not currently used, since this struct would +// become self-referential. Additionally, that iterator does not implement +// [DoubleEndedIterator], and its implementation would likely require +// significant changes to implement that trait. +/// The iterator returned by [`RawOsStr::split`]. +pub struct Split<'a, P> +where + P: Pattern, +{ + string: Option<&'a RawOsStr>, + pat: P::__Encoded, +} + +impl<'a, P> Split<'a, P> +where + P: Pattern, +{ + pub(super) fn new(string: &'a RawOsStr, pat: P) -> Self { + let pat = pat.__encode(); + assert!( + !pat.__get().is_empty(), + "cannot split using an empty pattern", + ); + Self { + string: Some(string), + pat, + } + } +} + +macro_rules! impl_next { + ( $self:ident , $split_method:ident , $swap_fn:expr ) => {{ + $self + .string? + .$split_method(&$self.pat) + .map(|substrings| { + let (substring, string) = $swap_fn(substrings); + $self.string = Some(string); + substring + }) + .or_else(|| $self.string.take()) + }}; +} + +impl

DoubleEndedIterator for Split<'_, P> +where + P: Pattern, +{ + fn next_back(&mut self) -> Option { + impl_next!(self, rsplit_once_raw, |(prefix, suffix)| (suffix, prefix)) + } +} + +impl<'a, P> Iterator for Split<'a, P> +where + P: Pattern, +{ + type Item = &'a RawOsStr; + + #[inline] + fn last(mut self) -> Option { + self.next_back() + } + + fn next(&mut self) -> Option { + impl_next!(self, split_once_raw, |x| x) + } +} + +impl

Clone for Split<'_, P> +where + P: Pattern, +{ + #[inline] + fn clone(&self) -> Self { + Self { + string: self.string, + pat: self.pat.clone(), + } + } +} + +impl

Debug for Split<'_, P> +where + P: Pattern, +{ + #[inline] + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_struct("Split") + .field("string", &self.string) + .field( + "pat", + &str::from_utf8(self.pat.__get()).expect("invalid pattern"), + ) + .finish() + } +} + +impl

FusedIterator for Split<'_, P> where P: Pattern {} diff --git a/vendor/os_str_bytes/src/lib.rs b/vendor/os_str_bytes/src/lib.rs new file mode 100644 index 000000000..9a99059c6 --- /dev/null +++ b/vendor/os_str_bytes/src/lib.rs @@ -0,0 +1,432 @@ +//! This crate allows interacting with the data stored by [`OsStr`] and +//! [`OsString`], without resorting to panics or corruption for invalid UTF-8. +//! Thus, methods can be used that are already defined on [`[u8]`][slice] and +//! [`Vec`]. +//! +//! Typically, the only way to losslessly construct [`OsStr`] or [`OsString`] +//! from a byte sequence is to use `OsStr::new(str::from_utf8(bytes)?)`, which +//! requires the bytes to be valid in UTF-8. However, since this crate makes +//! conversions directly between the platform encoding and raw bytes, even some +//! strings invalid in UTF-8 can be converted. +//! +//! In most cases, [`RawOsStr`] and [`RawOsString`] should be used. +//! [`OsStrBytes`] and [`OsStringBytes`] provide lower-level APIs that are +//! easier to misuse. +//! +//! # Encoding +//! +//! The encoding of bytes returned or accepted by methods of this crate is +//! intentionally left unspecified. It may vary for different platforms, so +//! defining it would run contrary to the goal of generic string handling. +//! However, the following invariants will always be upheld: +//! +//! - The encoding will be compatible with UTF-8. In particular, splitting an +//! encoded byte sequence by a UTF-8–encoded character always produces other +//! valid byte sequences. They can be re-encoded without error using +//! [`OsStrBytes::from_raw_bytes`] and similar methods. +//! +//! - All characters valid in platform strings are representable. [`OsStr`] and +//! [`OsString`] can always be losslessly reconstructed from extracted bytes. +//! +//! Note that the chosen encoding may not match how Rust stores these strings +//! internally, which is undocumented. For instance, the result of calling +//! [`OsStr::len`] will not necessarily match the number of bytes this crate +//! uses to represent the same string. +//! +//! Additionally, concatenation may yield unexpected results without a UTF-8 +//! separator. If two platform strings need to be concatenated, the only safe +//! way to do so is using [`OsString::push`]. This limitation also makes it +//! undesirable to use the bytes in interchange. +//! +//! Since this encoding can change between versions and platforms, it should +//! not be used for storage. The standard library provides implementations of +//! [`OsStrExt`] and [`OsStringExt`] for various platforms, which should be +//! preferred for that use case. +//! +//! # User Input +//! +//! Traits in this crate should ideally not be used to convert byte sequences +//! that did not originate from [`OsStr`] or a related struct. The encoding +//! used by this crate is an implementation detail, so it does not make sense +//! to expose it to users. +//! +//! Crate [bstr] offers some useful alternative methods, such as +//! [`ByteSlice::to_os_str`] and [`ByteVec::into_os_string`], that are meant +//! for user input. But, they reject some byte sequences used to represent +//! valid platform strings, which would be undesirable for reliable path +//! handling. They are best used only when accepting unknown input. +//! +//! This crate is meant to help when you already have an instance of [`OsStr`] +//! and need to modify the data in a lossless way. +//! +//! # Features +//! +//! These features are optional and can be enabled or disabled in a +//! "Cargo.toml" file. +//! +//! ### Default Features +//! +//! - **memchr** - +//! Changes the implementation to use crate [memchr] for better performance. +//! This feature is useless when "raw\_os\_str" is disabled. +//! +//! For more information, see [`RawOsStr`][memchr complexity]. +//! +//! - **raw\_os\_str** - +//! Enables use of [`RawOsStr`] and [`RawOsString`]. +//! +//! ### Optional Features +//! +//! - **print\_bytes** - +//! Provides implementations of [`print_bytes::ToBytes`] for [`RawOsStr`] and +//! [`RawOsString`]. +//! +//! - **uniquote** - +//! Provides implementations of [`uniquote::Quote`] for [`RawOsStr`] and +//! [`RawOsString`]. +//! +//! # Implementation +//! +//! Some methods return [`Cow`] to account for platform differences. However, +//! no guarantee is made that the same variant of that enum will always be +//! returned for the same platform. Whichever can be constructed most +//! efficiently will be returned. +//! +//! All traits are [sealed], meaning that they can only be implemented by this +//! crate. Otherwise, backward compatibility would be more difficult to +//! maintain for new features. +//! +//! # Complexity +//! +//! The time complexities of trait methods will vary based on what +//! functionality is available for the platform. At worst, they will all be +//! linear, but some can take constant time. For example, +//! [`OsStringBytes::from_raw_vec`] might be able to reuse the allocation for +//! its argument. +//! +//! # Examples +//! +//! ``` +//! # #[cfg(any())] +//! use std::env; +//! use std::fs; +//! # use std::io; +//! +//! use os_str_bytes::OsStrBytes; +//! +//! # mod env { +//! # use std::env; +//! # use std::ffi::OsString; +//! # +//! # pub fn args_os() -> impl Iterator { +//! # let mut file = env::temp_dir(); +//! # file.push("os_str_bytes\u{E9}.txt"); +//! # return vec![OsString::new(), file.into_os_string()].into_iter(); +//! # } +//! # } +//! # +//! for file in env::args_os().skip(1) { +//! if file.to_raw_bytes().first() != Some(&b'-') { +//! let string = "Hello, world!"; +//! fs::write(&file, string)?; +//! assert_eq!(string, fs::read_to_string(file)?); +//! } +//! } +//! # +//! # Ok::<_, io::Error>(()) +//! ``` +//! +//! [bstr]: https://crates.io/crates/bstr +//! [`ByteSlice::to_os_str`]: https://docs.rs/bstr/0.2.12/bstr/trait.ByteSlice.html#method.to_os_str +//! [`ByteVec::into_os_string`]: https://docs.rs/bstr/0.2.12/bstr/trait.ByteVec.html#method.into_os_string +//! [memchr complexity]: RawOsStr#complexity +//! [memchr]: https://crates.io/crates/memchr +//! [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt +//! [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt +//! [sealed]: https://rust-lang.github.io/api-guidelines/future-proofing.html#c-sealed +//! [print\_bytes]: https://crates.io/crates/print_bytes + +// Only require a nightly compiler when building documentation for docs.rs. +// This is a private option that should not be used. +// https://github.com/rust-lang/docs.rs/issues/147#issuecomment-389544407 +// https://github.com/dylni/os_str_bytes/issues/2 +#![cfg_attr(os_str_bytes_docs_rs, feature(doc_cfg))] +// Nightly is also currently required for the SGX platform. +#![cfg_attr( + all(target_vendor = "fortanix", target_env = "sgx"), + feature(sgx_platform) +)] +#![forbid(unsafe_op_in_unsafe_fn)] +#![warn(unused_results)] + +use std::borrow::Cow; +use std::error::Error; +use std::ffi::OsStr; +use std::ffi::OsString; +use std::fmt; +use std::fmt::Display; +use std::fmt::Formatter; +use std::path::Path; +use std::path::PathBuf; +use std::result; + +macro_rules! if_raw_str { + ( $($item:item)+ ) => { + $( + #[cfg(feature = "raw_os_str")] + $item + )+ + }; +} + +#[cfg_attr( + all(target_arch = "wasm32", target_os = "unknown"), + path = "wasm32/mod.rs" +)] +#[cfg_attr(windows, path = "windows/mod.rs")] +#[cfg_attr( + not(any(all(target_arch = "wasm32", target_os = "unknown"), windows)), + path = "common/mod.rs" +)] +mod imp; + +mod util; + +if_raw_str! { + pub mod iter; + + mod pattern; + pub use pattern::Pattern; + + mod raw_str; + pub use raw_str::RawOsStr; + pub use raw_str::RawOsString; +} + +/// The error that occurs when a byte sequence is not representable in the +/// platform encoding. +/// +/// [`Result::unwrap`] should almost always be called on results containing +/// this error. It should be known whether or not byte sequences are properly +/// encoded for the platform, since [the module-level documentation][encoding] +/// discourages using encoded bytes in interchange. Results are returned +/// primarily to make panicking behavior explicit. +/// +/// On Unix, this error is never returned, but [`OsStrExt`] or [`OsStringExt`] +/// should be used instead if that needs to be guaranteed. +/// +/// [encoding]: self#encoding +/// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt +/// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt +/// [`Result::unwrap`]: ::std::result::Result::unwrap +#[derive(Debug, Eq, PartialEq)] +pub struct EncodingError(imp::EncodingError); + +impl Display for EncodingError { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> fmt::Result { + self.0.fmt(formatter) + } +} + +impl Error for EncodingError {} + +type Result = result::Result; + +/// A platform agnostic variant of [`OsStrExt`]. +/// +/// For more information, see [the module-level documentation][module]. +/// +/// [module]: self +/// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt +pub trait OsStrBytes: private::Sealed + ToOwned { + /// Converts a byte slice into an equivalent platform-native string. + /// + /// Provided byte strings should always be valid for the [unspecified + /// encoding] used by this crate. + /// + /// # Errors + /// + /// See documentation for [`EncodingError`]. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// use std::ffi::OsStr; + /// # use std::io; + /// + /// use os_str_bytes::OsStrBytes; + /// + /// let os_string = env::current_exe()?; + /// let os_bytes = os_string.to_raw_bytes(); + /// assert_eq!(os_string, OsStr::from_raw_bytes(os_bytes).unwrap()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [unspecified encoding]: self#encoding + fn from_raw_bytes<'a, S>(string: S) -> Result> + where + S: Into>; + + /// Converts a platform-native string into an equivalent byte slice. + /// + /// The returned bytes string will use an [unspecified encoding]. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::OsStrBytes; + /// + /// let os_string = env::current_exe()?; + /// println!("{:?}", os_string.to_raw_bytes()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [unspecified encoding]: self#encoding + #[must_use] + fn to_raw_bytes(&self) -> Cow<'_, [u8]>; +} + +impl OsStrBytes for OsStr { + #[inline] + fn from_raw_bytes<'a, S>(string: S) -> Result> + where + S: Into>, + { + match string.into() { + Cow::Borrowed(string) => { + imp::os_str_from_bytes(string).map_err(EncodingError) + } + Cow::Owned(string) => { + OsStringBytes::from_raw_vec(string).map(Cow::Owned) + } + } + } + + #[inline] + fn to_raw_bytes(&self) -> Cow<'_, [u8]> { + imp::os_str_to_bytes(self) + } +} + +impl OsStrBytes for Path { + #[inline] + fn from_raw_bytes<'a, S>(string: S) -> Result> + where + S: Into>, + { + OsStr::from_raw_bytes(string).map(|os_string| match os_string { + Cow::Borrowed(os_string) => Cow::Borrowed(Self::new(os_string)), + Cow::Owned(os_string) => Cow::Owned(os_string.into()), + }) + } + + #[inline] + fn to_raw_bytes(&self) -> Cow<'_, [u8]> { + self.as_os_str().to_raw_bytes() + } +} + +/// A platform agnostic variant of [`OsStringExt`]. +/// +/// For more information, see [the module-level documentation][module]. +/// +/// [module]: self +/// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt +pub trait OsStringBytes: private::Sealed + Sized { + /// Converts a byte vector into an equivalent platform-native string. + /// + /// Provided byte strings should always be valid for the [unspecified + /// encoding] used by this crate. + /// + /// # Errors + /// + /// See documentation for [`EncodingError`]. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// use std::ffi::OsString; + /// # use std::io; + /// + /// use os_str_bytes::OsStringBytes; + /// + /// let os_string = env::current_exe()?; + /// let os_bytes = os_string.clone().into_raw_vec(); + /// assert_eq!(os_string, OsString::from_raw_vec(os_bytes).unwrap()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [unspecified encoding]: self#encoding + fn from_raw_vec(string: Vec) -> Result; + + /// Converts a platform-native string into an equivalent byte vector. + /// + /// The returned byte string will use an [unspecified encoding]. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::OsStringBytes; + /// + /// let os_string = env::current_exe()?; + /// println!("{:?}", os_string.into_raw_vec()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [unspecified encoding]: self#encoding + #[must_use] + fn into_raw_vec(self) -> Vec; +} + +impl OsStringBytes for OsString { + #[inline] + fn from_raw_vec(string: Vec) -> Result { + imp::os_string_from_vec(string).map_err(EncodingError) + } + + #[inline] + fn into_raw_vec(self) -> Vec { + imp::os_string_into_vec(self) + } +} + +impl OsStringBytes for PathBuf { + #[inline] + fn from_raw_vec(string: Vec) -> Result { + OsString::from_raw_vec(string).map(Into::into) + } + + #[inline] + fn into_raw_vec(self) -> Vec { + self.into_os_string().into_raw_vec() + } +} + +mod private { + use std::ffi::OsStr; + use std::ffi::OsString; + use std::path::Path; + use std::path::PathBuf; + + pub trait Sealed {} + impl Sealed for char {} + impl Sealed for OsStr {} + impl Sealed for OsString {} + impl Sealed for Path {} + impl Sealed for PathBuf {} + impl Sealed for &str {} + impl Sealed for &String {} +} diff --git a/vendor/os_str_bytes/src/pattern.rs b/vendor/os_str_bytes/src/pattern.rs new file mode 100644 index 000000000..fbf005498 --- /dev/null +++ b/vendor/os_str_bytes/src/pattern.rs @@ -0,0 +1,77 @@ +use super::private; + +pub trait Encoded { + fn __get(&self) -> &[u8]; +} + +#[derive(Clone)] +pub struct EncodedChar { + buffer: [u8; 4], + length: usize, +} + +impl Encoded for EncodedChar { + #[inline] + fn __get(&self) -> &[u8] { + &self.buffer[..self.length] + } +} + +impl Encoded for &str { + #[inline] + fn __get(&self) -> &[u8] { + self.as_bytes() + } +} + +/// Allows a type to be used for searching by [`RawOsStr`] and [`RawOsString`]. +/// +/// This trait is very similar to [`str::pattern::Pattern`], but its methods +/// are private and it is implemented for different types. +/// +/// [`RawOsStr`]: super::RawOsStr +/// [`RawOsString`]: super::RawOsString +/// [`str::pattern::Pattern`]: ::std::str::pattern::Pattern +#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))] +pub trait Pattern: private::Sealed { + #[doc(hidden)] + type __Encoded: Clone + Encoded; + + #[doc(hidden)] + fn __encode(self) -> Self::__Encoded; +} + +impl Pattern for char { + #[doc(hidden)] + type __Encoded = EncodedChar; + + #[doc(hidden)] + fn __encode(self) -> Self::__Encoded { + let mut encoded = EncodedChar { + buffer: [0; 4], + length: 0, + }; + encoded.length = self.encode_utf8(&mut encoded.buffer).len(); + encoded + } +} + +impl Pattern for &str { + #[doc(hidden)] + type __Encoded = Self; + + #[doc(hidden)] + fn __encode(self) -> Self::__Encoded { + self + } +} + +impl<'a> Pattern for &'a String { + #[doc(hidden)] + type __Encoded = <&'a str as Pattern>::__Encoded; + + #[doc(hidden)] + fn __encode(self) -> Self::__Encoded { + (**self).__encode() + } +} diff --git a/vendor/os_str_bytes/src/raw_str.rs b/vendor/os_str_bytes/src/raw_str.rs new file mode 100644 index 000000000..ccec858f4 --- /dev/null +++ b/vendor/os_str_bytes/src/raw_str.rs @@ -0,0 +1,1156 @@ +use std::borrow::Borrow; +use std::borrow::Cow; +use std::borrow::ToOwned; +use std::ffi::OsStr; +use std::ffi::OsString; +use std::fmt; +use std::fmt::Debug; +use std::fmt::Display; +use std::fmt::Formatter; +use std::mem; +use std::ops::Deref; +use std::ops::Index; +use std::ops::Range; +use std::ops::RangeFrom; +use std::ops::RangeFull; +use std::ops::RangeInclusive; +use std::ops::RangeTo; +use std::ops::RangeToInclusive; +use std::str; + +#[cfg(feature = "memchr")] +use memchr::memmem::find; +#[cfg(feature = "memchr")] +use memchr::memmem::rfind; + +use super::imp::raw; +use super::iter::Split; +use super::pattern::Encoded as EncodedPattern; +use super::OsStrBytes; +use super::OsStringBytes; +use super::Pattern; + +#[cfg(not(feature = "memchr"))] +fn find(string: &[u8], pat: &[u8]) -> Option { + for i in 0..=string.len().checked_sub(pat.len())? { + if string[i..].starts_with(pat) { + return Some(i); + } + } + None +} + +#[cfg(not(feature = "memchr"))] +fn rfind(string: &[u8], pat: &[u8]) -> Option { + for i in (pat.len()..=string.len()).rev() { + if string[..i].ends_with(pat) { + return Some(i - pat.len()); + } + } + None +} + +macro_rules! impl_trim_matches { + ( $self:ident , $pat:expr , $strip_method:ident ) => {{ + let pat = $pat.__encode(); + let pat = pat.__get(); + if pat.is_empty() { + return $self; + } + + let mut string = &$self.0; + while let Some(substring) = string.$strip_method(pat) { + string = substring; + } + Self::from_raw_bytes_unchecked(string) + }}; +} + +macro_rules! impl_split_once_raw { + ( $self:ident , $pat:expr , $find_fn:expr ) => {{ + let pat = $pat.__get(); + + let index = $find_fn(&$self.0, pat)?; + let prefix = &$self.0[..index]; + let suffix = &$self.0[index + pat.len()..]; + Some(( + Self::from_raw_bytes_unchecked(prefix), + Self::from_raw_bytes_unchecked(suffix), + )) + }}; +} + +/// A container for the byte strings converted by [`OsStrBytes`]. +/// +/// This wrapper is intended to prevent violating the invariants of the +/// [unspecified encoding] used by this crate and minimize encoding +/// conversions. +/// +/// Although this type is annotated with `#[repr(transparent)]`, the inner +/// representation is not stable. Transmuting between this type and any other +/// causes immediate undefined behavior. +/// +/// # Indices +/// +/// Methods of this struct that accept indices require that the index lie on a +/// UTF-8 boundary. Although it is possible to manipulate platform strings +/// based on other indices, this crate currently does not support them for +/// slicing methods. They would add significant complication to the +/// implementation and are generally not necessary. However, all indices +/// returned by this struct can be used for slicing. +/// +/// On Unix, all indices are permitted, to avoid false positives. However, +/// relying on this implementation detail is discouraged. Platform-specific +/// indices are error-prone. +/// +/// # Complexity +/// +/// All searching methods have worst-case multiplicative time complexity (i.e., +/// `O(self.raw_len() * pat.len())`). Enabling the "memchr" feature allows +/// these methods to instead run in linear time in the worst case (documented +/// for [`memchr::memmem::find`][memchr complexity]). +/// +/// [memchr complexity]: memchr::memmem::find#complexity +/// [unspecified encoding]: super#encoding +#[derive(Eq, Hash, Ord, PartialEq, PartialOrd)] +#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))] +#[repr(transparent)] +pub struct RawOsStr([u8]); + +impl RawOsStr { + fn from_raw_bytes_unchecked(string: &[u8]) -> &Self { + // SAFETY: This struct has a layout that makes this operation safe. + unsafe { mem::transmute(string) } + } + + /// Converts a platform-native string into a representation that can be + /// more easily manipulated. + /// + /// This method performs the necessary conversion immediately, so it can be + /// expensive to call. It is recommended to continue using the returned + /// instance as long as possible (instead of the original [`OsStr`]), to + /// avoid repeated conversions. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::RawOsStr; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// println!("{:?}", RawOsStr::new(&os_string)); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn new(string: &OsStr) -> Cow<'_, Self> { + match string.to_raw_bytes() { + Cow::Borrowed(string) => { + Cow::Borrowed(Self::from_raw_bytes_unchecked(string)) + } + Cow::Owned(string) => Cow::Owned(RawOsString(string)), + } + } + + /// Wraps a string, without copying or encoding conversion. + /// + /// This method is much more efficient than [`RawOsStr::new`], since the + /// [encoding] used by this crate is compatible with UTF-8. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let string = "foobar"; + /// let raw = RawOsStr::from_str(string); + /// assert_eq!(string, raw); + /// ``` + /// + /// [encoding]: super#encoding + #[allow(clippy::should_implement_trait)] + #[inline] + #[must_use] + pub fn from_str(string: &str) -> &Self { + Self::from_raw_bytes_unchecked(string.as_bytes()) + } + + /// Returns the byte string stored by this container. + /// + /// The result will match what would be returned by + /// [`OsStrBytes::to_raw_bytes`] for the same string. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::OsStrBytes; + /// use os_str_bytes::RawOsStr; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// let raw = RawOsStr::new(&os_string); + /// assert_eq!(os_string.to_raw_bytes(), raw.as_raw_bytes()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn as_raw_bytes(&self) -> &[u8] { + &self.0 + } + + /// Equivalent to [`str::contains`]. + /// + /// # Panics + /// + /// Panics if the pattern is a byte outside of the ASCII range. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let raw = RawOsStr::from_str("foobar"); + /// assert!(raw.contains("oo")); + /// assert!(!raw.contains("of")); + /// ``` + #[inline] + #[must_use] + pub fn contains

(&self, pat: P) -> bool + where + P: Pattern, + { + self.find(pat).is_some() + } + + /// Equivalent to [`str::ends_with`]. + /// + /// # Panics + /// + /// Panics if the pattern is a byte outside of the ASCII range. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let raw = RawOsStr::from_str("foobar"); + /// assert!(raw.ends_with("bar")); + /// assert!(!raw.ends_with("foo")); + /// ``` + #[inline] + #[must_use] + pub fn ends_with

(&self, pat: P) -> bool + where + P: Pattern, + { + let pat = pat.__encode(); + let pat = pat.__get(); + + self.0.ends_with(pat) + } + + /// Equivalent to [`str::ends_with`] but accepts this type for the pattern. + /// + /// # Panics + /// + /// Panics if the pattern is a byte outside of the ASCII range. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let raw = RawOsStr::from_str("foobar"); + /// assert!(raw.ends_with_os(RawOsStr::from_str("bar"))); + /// assert!(!raw.ends_with_os(RawOsStr::from_str("foo"))); + /// ``` + #[inline] + #[must_use] + pub fn ends_with_os(&self, pat: &Self) -> bool { + raw::ends_with(&self.0, &pat.0) + } + + /// Equivalent to [`str::find`]. + /// + /// # Panics + /// + /// Panics if the pattern is a byte outside of the ASCII range. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let raw = RawOsStr::from_str("foobar"); + /// assert_eq!(Some(1), raw.find("o")); + /// assert_eq!(None, raw.find("of")); + /// ``` + #[inline] + #[must_use] + pub fn find

(&self, pat: P) -> Option + where + P: Pattern, + { + let pat = pat.__encode(); + let pat = pat.__get(); + + find(&self.0, pat) + } + + /// Equivalent to [`str::is_empty`]. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// assert!(RawOsStr::from_str("").is_empty()); + /// assert!(!RawOsStr::from_str("foobar").is_empty()); + /// ``` + #[inline] + #[must_use] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Returns the length of the byte string stored by this container. + /// + /// Only the following assumptions can be made about the result: + /// - The length of any Unicode character is the length of its UTF-8 + /// representation (i.e., [`char::len_utf8`]). + /// - Splitting a string at a UTF-8 boundary will return two strings with + /// lengths that sum to the length of the original string. + /// + /// This method may return a different result than would [`OsStr::len`] + /// when called on same string, since [`OsStr`] uses an unspecified + /// encoding. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// assert_eq!(6, RawOsStr::from_str("foobar").raw_len()); + /// assert_eq!(0, RawOsStr::from_str("").raw_len()); + /// ``` + #[inline] + #[must_use] + pub fn raw_len(&self) -> usize { + self.0.len() + } + + /// Equivalent to [`str::rfind`]. + /// + /// # Panics + /// + /// Panics if the pattern is a byte outside of the ASCII range. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let raw = RawOsStr::from_str("foobar"); + /// assert_eq!(Some(2), raw.rfind("o")); + /// assert_eq!(None, raw.rfind("of")); + /// ``` + #[inline] + #[must_use] + pub fn rfind

(&self, pat: P) -> Option + where + P: Pattern, + { + let pat = pat.__encode(); + let pat = pat.__get(); + + rfind(&self.0, pat) + } + + pub(super) fn rsplit_once_raw

(&self, pat: &P) -> Option<(&Self, &Self)> + where + P: EncodedPattern, + { + impl_split_once_raw!(self, pat, rfind) + } + + /// Equivalent to [`str::rsplit_once`]. + /// + /// # Panics + /// + /// Panics if the pattern is a byte outside of the ASCII range. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let raw = RawOsStr::from_str("foobar"); + /// assert_eq!( + /// Some((RawOsStr::from_str("fo"), RawOsStr::from_str("bar"))), + /// raw.rsplit_once("o"), + /// ); + /// assert_eq!(None, raw.rsplit_once("of")); + /// ``` + #[inline] + #[must_use] + pub fn rsplit_once

(&self, pat: P) -> Option<(&Self, &Self)> + where + P: Pattern, + { + self.rsplit_once_raw(&pat.__encode()) + } + + // https://github.com/rust-lang/rust/blob/49c68bd53f90e375bfb3cbba8c1c67a9e0adb9c0/src/libcore/str/mod.rs#L2184-L2221 + #[cold] + #[inline(never)] + #[track_caller] + fn index_boundary_error(&self, index: usize) -> ! { + debug_assert!(raw::is_continuation(self.0[index])); + + let start = self.0[..index] + .iter() + .rposition(|&x| !raw::is_continuation(x)) + .expect("invalid raw bytes"); + let mut end = index + 1; + end += self.0[end..] + .iter() + .position(|&x| !raw::is_continuation(x)) + .unwrap_or_else(|| self.raw_len() - end); + let code_point = raw::decode_code_point(&self.0[start..end]); + panic!( + "byte index {} is not a valid boundary; it is inside U+{:04X} \ + (bytes {}..{})", + index, code_point, start, end, + ); + } + + #[track_caller] + fn check_bound(&self, index: usize) { + if let Some(&byte) = self.0.get(index) { + if raw::is_continuation(byte) { + self.index_boundary_error(index); + } + } + } + + /// Equivalent to [`str::split`], but empty patterns are not accepted. + /// + /// # Panics + /// + /// Panics if the pattern is a byte outside of the ASCII range or empty. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let raw = RawOsStr::from_str("foobar"); + /// assert_eq!(["f", "", "bar"], *raw.split("o").collect::>()); + /// ``` + #[inline] + #[must_use] + pub fn split

(&self, pat: P) -> Split<'_, P> + where + P: Pattern, + { + Split::new(self, pat) + } + + /// Equivalent to [`str::split_at`]. + /// + /// # Panics + /// + /// Panics if the index is not a [valid boundary]. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let raw = RawOsStr::from_str("foobar"); + /// assert_eq!( + /// ((RawOsStr::from_str("fo"), RawOsStr::from_str("obar"))), + /// raw.split_at(2), + /// ); + /// ``` + /// + /// [valid boundary]: #indices + #[inline] + #[must_use] + pub fn split_at(&self, mid: usize) -> (&Self, &Self) { + self.check_bound(mid); + + let (prefix, suffix) = self.0.split_at(mid); + ( + Self::from_raw_bytes_unchecked(prefix), + Self::from_raw_bytes_unchecked(suffix), + ) + } + + pub(super) fn split_once_raw

(&self, pat: &P) -> Option<(&Self, &Self)> + where + P: EncodedPattern, + { + impl_split_once_raw!(self, pat, find) + } + + /// Equivalent to [`str::split_once`]. + /// + /// # Panics + /// + /// Panics if the pattern is a byte outside of the ASCII range. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let raw = RawOsStr::from_str("foobar"); + /// assert_eq!( + /// Some((RawOsStr::from_str("f"), RawOsStr::from_str("obar"))), + /// raw.split_once("o"), + /// ); + /// assert_eq!(None, raw.split_once("of")); + /// ``` + #[inline] + #[must_use] + pub fn split_once

(&self, pat: P) -> Option<(&Self, &Self)> + where + P: Pattern, + { + self.split_once_raw(&pat.__encode()) + } + + /// Equivalent to [`str::starts_with`]. + /// + /// # Panics + /// + /// Panics if the pattern is a byte outside of the ASCII range. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let raw = RawOsStr::from_str("foobar"); + /// assert!(raw.starts_with("foo")); + /// assert!(!raw.starts_with("bar")); + /// ``` + #[inline] + #[must_use] + pub fn starts_with

(&self, pat: P) -> bool + where + P: Pattern, + { + let pat = pat.__encode(); + let pat = pat.__get(); + + self.0.starts_with(pat) + } + + /// Equivalent to [`str::starts_with`] but accepts this type for the + /// pattern. + /// + /// # Panics + /// + /// Panics if the pattern is a byte outside of the ASCII range. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let raw = RawOsStr::from_str("foobar"); + /// assert!(raw.starts_with_os(RawOsStr::from_str("foo"))); + /// assert!(!raw.starts_with_os(RawOsStr::from_str("bar"))); + /// ``` + #[inline] + #[must_use] + pub fn starts_with_os(&self, pat: &Self) -> bool { + raw::starts_with(&self.0, &pat.0) + } + + /// Equivalent to [`str::strip_prefix`]. + /// + /// # Panics + /// + /// Panics if the pattern is a byte outside of the ASCII range. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let raw = RawOsStr::from_str("111foo1bar111"); + /// assert_eq!( + /// Some(RawOsStr::from_str("11foo1bar111")), + /// raw.strip_prefix("1"), + /// ); + /// assert_eq!(None, raw.strip_prefix("o")); + /// ``` + #[inline] + #[must_use] + pub fn strip_prefix

(&self, pat: P) -> Option<&Self> + where + P: Pattern, + { + let pat = pat.__encode(); + let pat = pat.__get(); + + self.0.strip_prefix(pat).map(Self::from_raw_bytes_unchecked) + } + + /// Equivalent to [`str::strip_suffix`]. + /// + /// # Panics + /// + /// Panics if the pattern is a byte outside of the ASCII range. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let raw = RawOsStr::from_str("111foo1bar111"); + /// assert_eq!( + /// Some(RawOsStr::from_str("111foo1bar11")), + /// raw.strip_suffix("1"), + /// ); + /// assert_eq!(None, raw.strip_suffix("o")); + /// ``` + #[inline] + #[must_use] + pub fn strip_suffix

(&self, pat: P) -> Option<&Self> + where + P: Pattern, + { + let pat = pat.__encode(); + let pat = pat.__get(); + + self.0.strip_suffix(pat).map(Self::from_raw_bytes_unchecked) + } + + /// Converts this representation back to a platform-native string. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::RawOsStr; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// let raw = RawOsStr::new(&os_string); + /// assert_eq!(os_string, raw.to_os_str()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn to_os_str(&self) -> Cow<'_, OsStr> { + OsStr::from_raw_bytes(&self.0).expect("invalid raw bytes") + } + + /// Equivalent to [`OsStr::to_str`]. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let string = "foobar"; + /// let raw = RawOsStr::from_str(string); + /// assert_eq!(Some(string), raw.to_str()); + /// ``` + #[inline] + #[must_use] + pub fn to_str(&self) -> Option<&str> { + str::from_utf8(&self.0).ok() + } + + /// Converts this string to the best UTF-8 representation possible. + /// + /// Invalid sequences will be replaced with + /// [`char::REPLACEMENT_CHARACTER`]. + /// + /// This method may return a different result than would + /// [`OsStr::to_string_lossy`] when called on same string, since [`OsStr`] + /// uses an unspecified encoding. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::RawOsStr; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// let raw = RawOsStr::new(&os_string); + /// println!("{}", raw.to_str_lossy()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn to_str_lossy(&self) -> Cow<'_, str> { + String::from_utf8_lossy(&self.0) + } + + /// Equivalent to [`str::trim_end_matches`]. + /// + /// # Panics + /// + /// Panics if the pattern is a byte outside of the ASCII range. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let raw = RawOsStr::from_str("111foo1bar111"); + /// assert_eq!("111foo1bar", raw.trim_end_matches("1")); + /// assert_eq!("111foo1bar111", raw.trim_end_matches("o")); + /// ``` + #[must_use] + pub fn trim_end_matches

(&self, pat: P) -> &Self + where + P: Pattern, + { + impl_trim_matches!(self, pat, strip_suffix) + } + + /// Equivalent to [`str::trim_start_matches`]. + /// + /// # Panics + /// + /// Panics if the pattern is a byte outside of the ASCII range. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let raw = RawOsStr::from_str("111foo1bar111"); + /// assert_eq!("foo1bar111", raw.trim_start_matches("1")); + /// assert_eq!("111foo1bar111", raw.trim_start_matches("o")); + /// ``` + #[must_use] + pub fn trim_start_matches

(&self, pat: P) -> &Self + where + P: Pattern, + { + impl_trim_matches!(self, pat, strip_prefix) + } +} + +impl AsRef for RawOsStr { + #[inline] + fn as_ref(&self) -> &Self { + self + } +} + +impl AsRef for str { + #[inline] + fn as_ref(&self) -> &RawOsStr { + RawOsStr::from_str(self) + } +} + +impl AsRef for String { + #[inline] + fn as_ref(&self) -> &RawOsStr { + (**self).as_ref() + } +} + +impl Default for &RawOsStr { + #[inline] + fn default() -> Self { + RawOsStr::from_str("") + } +} + +impl<'a> From<&'a RawOsStr> for Cow<'a, RawOsStr> { + #[inline] + fn from(other: &'a RawOsStr) -> Self { + Cow::Borrowed(other) + } +} + +macro_rules! r#impl { + ( + $index_type:ty + $(, $index_var:ident , $first_bound:expr $(, $second_bound:expr)?)? + ) => { + impl Index<$index_type> for RawOsStr { + type Output = Self; + + #[inline] + fn index(&self, idx: $index_type) -> &Self::Output { + $( + let $index_var = &idx; + self.check_bound($first_bound); + $(self.check_bound($second_bound);)? + )? + + Self::from_raw_bytes_unchecked(&self.0[idx]) + } + } + }; +} +r#impl!(Range, x, x.start, x.end); +r#impl!(RangeFrom, x, x.start); +r#impl!(RangeFull); +// [usize::MAX] will always be a valid inclusive end index. +#[rustfmt::skip] +r#impl!(RangeInclusive, x, *x.start(), x.end().wrapping_add(1)); +r#impl!(RangeTo, x, x.end); +r#impl!(RangeToInclusive, x, x.end.wrapping_add(1)); + +impl ToOwned for RawOsStr { + type Owned = RawOsString; + + #[inline] + fn to_owned(&self) -> Self::Owned { + RawOsString(self.0.to_owned()) + } +} + +/// A container for the byte strings converted by [`OsStringBytes`]. +/// +/// For more information, see [`RawOsStr`]. +/// +/// [unspecified encoding]: super#encoding +#[derive(Clone, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))] +pub struct RawOsString(Vec); + +impl RawOsString { + /// Converts a platform-native string into a representation that can be + /// more easily manipulated. + /// + /// For more information, see [`RawOsStr::new`]. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::RawOsString; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// println!("{:?}", RawOsString::new(os_string)); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn new(string: OsString) -> Self { + Self(string.into_raw_vec()) + } + + /// Wraps a string, without copying or encoding conversion. + /// + /// This method is much more efficient than [`RawOsString::new`], since the + /// [encoding] used by this crate is compatible with UTF-8. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsString; + /// + /// let string = "foobar".to_owned(); + /// let raw = RawOsString::from_string(string.clone()); + /// assert_eq!(string, raw); + /// ``` + /// + /// [encoding]: super#encoding + #[inline] + #[must_use] + pub fn from_string(string: String) -> Self { + Self(string.into_bytes()) + } + + /// Converts this representation back to a platform-native string. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::RawOsString; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// let raw = RawOsString::new(os_string.clone()); + /// assert_eq!(os_string, raw.into_os_string()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn into_os_string(self) -> OsString { + OsString::from_raw_vec(self.0).expect("invalid raw bytes") + } + + /// Returns the byte string stored by this container. + /// + /// The result will match what would be returned by + /// [`OsStringBytes::into_raw_vec`] for the same string. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::OsStringBytes; + /// use os_str_bytes::RawOsString; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// let raw = RawOsString::new(os_string.clone()); + /// assert_eq!(os_string.into_raw_vec(), raw.into_raw_vec()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn into_raw_vec(self) -> Vec { + self.0 + } + + /// Equivalent to [`OsString::into_string`]. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsString; + /// + /// let string = "foobar".to_owned(); + /// let raw = RawOsString::from_string(string.clone()); + /// assert_eq!(Ok(string), raw.into_string()); + /// ``` + #[inline] + pub fn into_string(self) -> Result { + String::from_utf8(self.0).map_err(|x| Self(x.into_bytes())) + } +} + +impl AsRef for RawOsString { + #[inline] + fn as_ref(&self) -> &RawOsStr { + self + } +} + +impl Borrow for RawOsString { + #[inline] + fn borrow(&self) -> &RawOsStr { + self + } +} + +impl Deref for RawOsString { + type Target = RawOsStr; + + #[inline] + fn deref(&self) -> &Self::Target { + RawOsStr::from_raw_bytes_unchecked(&self.0) + } +} + +impl From for RawOsString { + #[inline] + fn from(other: String) -> Self { + Self::from_string(other) + } +} + +impl From for Cow<'_, RawOsStr> { + #[inline] + fn from(other: RawOsString) -> Self { + Cow::Owned(other) + } +} + +macro_rules! r#impl { + ( $index_type:ty ) => { + impl Index<$index_type> for RawOsString { + type Output = RawOsStr; + + #[inline] + fn index(&self, idx: $index_type) -> &Self::Output { + &(**self)[idx] + } + } + }; +} +r#impl!(Range); +r#impl!(RangeFrom); +r#impl!(RangeFull); +r#impl!(RangeInclusive); +r#impl!(RangeTo); +r#impl!(RangeToInclusive); + +struct Buffer<'a>(&'a [u8]); + +impl Debug for Buffer<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.write_str("\"")?; + + let mut string = self.0; + let mut invalid_length = 0; + while !string.is_empty() { + let (invalid, substring) = string.split_at(invalid_length); + + let valid = match str::from_utf8(substring) { + Ok(valid) => { + string = &[]; + valid + } + Err(error) => { + let (valid, substring) = + substring.split_at(error.valid_up_to()); + + let invalid_char_length = + error.error_len().unwrap_or_else(|| substring.len()); + if valid.is_empty() { + invalid_length += invalid_char_length; + continue; + } + string = substring; + invalid_length = invalid_char_length; + + // SAFETY: This slice was validated to be UTF-8. + unsafe { str::from_utf8_unchecked(valid) } + } + }; + + raw::debug(invalid, f)?; + Display::fmt(&valid.escape_debug(), f)?; + } + + f.write_str("\"") + } +} + +macro_rules! r#impl { + ( $type:ty ) => { + impl Debug for $type { + #[inline] + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_tuple(stringify!($type)) + .field(&Buffer(&self.0)) + .finish() + } + } + }; +} +r#impl!(RawOsStr); +r#impl!(RawOsString); + +macro_rules! r#impl { + ( $type:ty , $other_type:ty ) => { + impl PartialEq<$other_type> for $type { + #[inline] + fn eq(&self, other: &$other_type) -> bool { + let raw: &RawOsStr = self; + let other: &RawOsStr = other.as_ref(); + raw == other + } + } + + impl PartialEq<$type> for $other_type { + #[inline] + fn eq(&self, other: &$type) -> bool { + other == self + } + } + }; +} +r#impl!(RawOsStr, RawOsString); +r#impl!(&RawOsStr, RawOsString); +r#impl!(RawOsStr, str); +r#impl!(RawOsStr, String); +r#impl!(&RawOsStr, String); +r#impl!(RawOsString, str); +r#impl!(RawOsString, &str); +r#impl!(RawOsString, String); + +#[cfg(feature = "print_bytes")] +#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "print_bytes")))] +mod print_bytes { + use print_bytes::ByteStr; + use print_bytes::ToBytes; + #[cfg(windows)] + use print_bytes::WideStr; + + #[cfg(windows)] + use crate::imp::raw; + + use super::RawOsStr; + use super::RawOsString; + + impl ToBytes for RawOsStr { + #[inline] + fn to_bytes(&self) -> ByteStr<'_> { + self.0.to_bytes() + } + + #[cfg(windows)] + #[inline] + fn to_wide(&self) -> Option { + Some(WideStr::new(raw::encode_wide_unchecked(&self.0).collect())) + } + } + + impl ToBytes for RawOsString { + #[inline] + fn to_bytes(&self) -> ByteStr<'_> { + (**self).to_bytes() + } + + #[cfg(windows)] + #[inline] + fn to_wide(&self) -> Option { + (**self).to_wide() + } + } +} + +#[cfg(feature = "uniquote")] +#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "uniquote")))] +mod uniquote { + use uniquote::Formatter; + use uniquote::Quote; + use uniquote::Result; + + use crate::imp::raw; + + use super::RawOsStr; + use super::RawOsString; + + impl Quote for RawOsStr { + #[inline] + fn escape(&self, f: &mut Formatter<'_>) -> Result { + raw::uniquote::escape(&self.0, f) + } + } + + impl Quote for RawOsString { + #[inline] + fn escape(&self, f: &mut Formatter<'_>) -> Result { + (**self).escape(f) + } + } +} diff --git a/vendor/os_str_bytes/src/util.rs b/vendor/os_str_bytes/src/util.rs new file mode 100644 index 000000000..bd28b7be1 --- /dev/null +++ b/vendor/os_str_bytes/src/util.rs @@ -0,0 +1,10 @@ +pub(super) const BYTE_SHIFT: u8 = 6; + +pub(super) const CONT_MASK: u8 = (1 << BYTE_SHIFT) - 1; + +pub(super) const CONT_TAG: u8 = 0b1000_0000; + +#[cfg_attr(not(windows), allow(dead_code))] +pub(super) const fn is_continuation(byte: u8) -> bool { + byte & !CONT_MASK == CONT_TAG +} diff --git a/vendor/os_str_bytes/src/wasm32/mod.rs b/vendor/os_str_bytes/src/wasm32/mod.rs new file mode 100644 index 000000000..f8ae36861 --- /dev/null +++ b/vendor/os_str_bytes/src/wasm32/mod.rs @@ -0,0 +1,56 @@ +use std::borrow::Cow; +use std::error::Error; +use std::ffi::OsStr; +use std::ffi::OsString; +use std::fmt; +use std::fmt::Display; +use std::fmt::Formatter; +use std::result; +use std::str; +use std::str::Utf8Error; + +if_raw_str! { + pub(super) mod raw; +} + +#[derive(Debug, Eq, PartialEq)] +pub(super) struct EncodingError(Utf8Error); + +impl Display for EncodingError { + fn fmt(&self, formatter: &mut Formatter<'_>) -> fmt::Result { + write!(formatter, "os_str_bytes: {}", self.0) + } +} + +impl Error for EncodingError {} + +type Result = result::Result; + +macro_rules! expect_utf8 { + ( $result:expr ) => { + $result.expect( + "platform string contains invalid UTF-8, which should not be \ + possible", + ) + }; +} + +pub(super) fn os_str_from_bytes(string: &[u8]) -> Result> { + str::from_utf8(string) + .map(|x| Cow::Borrowed(OsStr::new(x))) + .map_err(EncodingError) +} + +pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> { + Cow::Borrowed(expect_utf8!(os_string.to_str()).as_bytes()) +} + +pub(super) fn os_string_from_vec(string: Vec) -> Result { + String::from_utf8(string) + .map(Into::into) + .map_err(|x| EncodingError(x.utf8_error())) +} + +pub(super) fn os_string_into_vec(os_string: OsString) -> Vec { + expect_utf8!(os_string.into_string()).into_bytes() +} diff --git a/vendor/os_str_bytes/src/wasm32/raw.rs b/vendor/os_str_bytes/src/wasm32/raw.rs new file mode 100644 index 000000000..564590057 --- /dev/null +++ b/vendor/os_str_bytes/src/wasm32/raw.rs @@ -0,0 +1,39 @@ +use std::fmt; +use std::fmt::Formatter; +use std::str; + +pub(crate) use crate::util::is_continuation; + +pub(crate) fn decode_code_point(string: &[u8]) -> u32 { + let string = str::from_utf8(string).expect("invalid string"); + let mut chars = string.chars(); + let ch = chars + .next() + .expect("cannot parse code point from empty string"); + assert_eq!(None, chars.next(), "multiple code points found"); + ch.into() +} + +pub(crate) fn ends_with(string: &[u8], suffix: &[u8]) -> bool { + string.ends_with(suffix) +} + +pub(crate) fn starts_with(string: &[u8], prefix: &[u8]) -> bool { + string.starts_with(prefix) +} + +pub(crate) fn debug(string: &[u8], _: &mut Formatter<'_>) -> fmt::Result { + assert!(string.is_empty()); + Ok(()) +} + +#[cfg(feature = "uniquote")] +pub(crate) mod uniquote { + use uniquote::Formatter; + use uniquote::Quote; + use uniquote::Result; + + pub(crate) fn escape(string: &[u8], f: &mut Formatter<'_>) -> Result { + string.escape(f) + } +} diff --git a/vendor/os_str_bytes/src/windows/mod.rs b/vendor/os_str_bytes/src/windows/mod.rs new file mode 100644 index 000000000..3b6105b27 --- /dev/null +++ b/vendor/os_str_bytes/src/windows/mod.rs @@ -0,0 +1,152 @@ +// These functions are necessarily inefficient, because they must revert +// encoding conversions performed by the standard library. However, there is +// currently no better alternative. + +use std::borrow::Cow; +use std::error::Error; +use std::ffi::OsStr; +use std::ffi::OsString; +use std::fmt; +use std::fmt::Display; +use std::fmt::Formatter; +use std::os::windows::ffi::OsStrExt; +use std::os::windows::ffi::OsStringExt; +use std::result; +use std::str; + +if_raw_str! { + pub(super) mod raw; +} + +mod wtf8; +use wtf8::encode_wide; +use wtf8::DecodeWide; + +#[derive(Debug, Eq, PartialEq)] +pub(super) enum EncodingError { + Byte(u8), + CodePoint(u32), + End(), +} + +impl EncodingError { + fn position(&self) -> Cow<'_, str> { + match self { + Self::Byte(byte) => Cow::Owned(format!("byte b'\\x{:02X}'", byte)), + Self::CodePoint(code_point) => { + Cow::Owned(format!("code point U+{:04X}", code_point)) + } + Self::End() => Cow::Borrowed("end of string"), + } + } +} + +impl Display for EncodingError { + fn fmt(&self, formatter: &mut Formatter<'_>) -> fmt::Result { + write!( + formatter, + "byte sequence is not representable in the platform encoding; \ + error at {}", + self.position(), + ) + } +} + +impl Error for EncodingError {} + +type Result = result::Result; + +fn from_bytes(string: &[u8]) -> Result { + let encoder = encode_wide(string); + + // Collecting an iterator into a result ignores the size hint: + // https://github.com/rust-lang/rust/issues/48994 + let mut encoded_string = Vec::with_capacity(encoder.size_hint().0); + for wchar in encoder { + encoded_string.push(wchar?); + } + Ok(OsStringExt::from_wide(&encoded_string)) +} + +fn to_bytes(os_string: &OsStr) -> Vec { + let encoder = OsStrExt::encode_wide(os_string); + + let mut string = Vec::with_capacity(encoder.size_hint().0); + string.extend(DecodeWide::new(encoder)); + string +} + +pub(super) fn os_str_from_bytes(string: &[u8]) -> Result> { + from_bytes(string).map(Cow::Owned) +} + +pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> { + Cow::Owned(to_bytes(os_string)) +} + +pub(super) fn os_string_from_vec(string: Vec) -> Result { + from_bytes(&string) +} + +pub(super) fn os_string_into_vec(os_string: OsString) -> Vec { + to_bytes(&os_string) +} + +#[cfg(test)] +mod tests { + use std::ffi::OsStr; + + use crate::OsStrBytes; + + use super::EncodingError; + + #[test] + fn test_invalid() { + use EncodingError::Byte; + use EncodingError::CodePoint; + use EncodingError::End; + + test_error(Byte(b'\x83'), b"\x0C\x83\xD7\x3E"); + test_error(Byte(b'\x52'), b"\x19\xF7\x52\x84"); + test_error(Byte(b'\xB8'), b"\x70\xB8\x1F\x66"); + test_error(CodePoint(0x34_0388), b"\x70\xFD\x80\x8E\x88"); + test_error(Byte(b'\x80'), b"\x80"); + test_error(Byte(b'\x80'), b"\x80\x80"); + test_error(Byte(b'\x80'), b"\x80\x80\x80"); + test_error(Byte(b'\x81'), b"\x81"); + test_error(Byte(b'\x88'), b"\x88\xB4\xC7\x46"); + test_error(Byte(b'\x97'), b"\x97\xCE\x06"); + test_error(Byte(b'\x00'), b"\xC2\x00"); + test_error(Byte(b'\x7F'), b"\xC2\x7F"); + test_error(Byte(b'\x09'), b"\xCD\x09\x95"); + test_error(Byte(b'\x43'), b"\xCD\x43\x5F\xA0"); + test_error(Byte(b'\x69'), b"\xD7\x69\xB2"); + test_error(CodePoint(0x528), b"\xE0\x94\xA8"); + test_error(CodePoint(0x766), b"\xE0\x9D\xA6\x12\xAE"); + test_error(Byte(b'\xFD'), b"\xE2\xAB\xFD\x51"); + test_error(Byte(b'\xC4'), b"\xE3\xC4"); + test_error(CodePoint(0xDC00), b"\xED\xA0\x80\xED\xB0\x80"); + test_error(End(), b"\xF1"); + test_error(End(), b"\xF1\x80"); + test_error(End(), b"\xF1\x80\x80"); + test_error(Byte(b'\xF1'), b"\xF1\x80\x80\xF1"); + test_error(CodePoint(0x11_09CC), b"\xF4\x90\xA7\x8C"); + test_error(CodePoint(0x15_EC46), b"\xF5\x9E\xB1\x86"); + test_error(End(), b"\xFB"); + test_error(End(), b"\xFB\x80"); + test_error(End(), b"\xFB\x80\x80"); + test_error(CodePoint(0x2C_0000), b"\xFB\x80\x80\x80"); + test_error(End(), b"\xFF"); + test_error(End(), b"\xFF\x80"); + test_error(End(), b"\xFF\x80\x80"); + test_error(CodePoint(0x3C_0000), b"\xFF\x80\x80\x80"); + test_error(CodePoint(0x3C_6143), b"\xFF\x86\x85\x83"); + + fn test_error(error: EncodingError, string: &[u8]) { + assert_eq!( + Err(error), + OsStr::from_raw_bytes(string).map_err(|x| x.0), + ); + } + } +} diff --git a/vendor/os_str_bytes/src/windows/raw.rs b/vendor/os_str_bytes/src/windows/raw.rs new file mode 100644 index 000000000..630eb01ea --- /dev/null +++ b/vendor/os_str_bytes/src/windows/raw.rs @@ -0,0 +1,42 @@ +use std::fmt; +use std::fmt::Formatter; + +pub(crate) use crate::util::is_continuation; + +use super::wtf8; +pub(crate) use super::wtf8::ends_with; +pub(crate) use super::wtf8::starts_with; +use super::wtf8::CodePoints; + +pub(crate) fn encode_wide_unchecked( + string: &[u8], +) -> impl '_ + Iterator { + wtf8::encode_wide(string).map(|x| x.expect("invalid string")) +} + +pub(crate) fn decode_code_point(string: &[u8]) -> u32 { + let mut code_points = CodePoints::new(string.iter().copied()); + let code_point = code_points + .next() + .expect("cannot parse code point from empty string") + .expect("invalid string"); + assert_eq!(None, code_points.next(), "multiple code points found"); + code_point +} + +pub(crate) fn debug(string: &[u8], f: &mut Formatter<'_>) -> fmt::Result { + for wchar in encode_wide_unchecked(string) { + write!(f, "\\u{{{:X}}}", wchar)?; + } + Ok(()) +} + +#[cfg(feature = "uniquote")] +pub(crate) mod uniquote { + use uniquote::Formatter; + use uniquote::Result; + + pub(crate) fn escape(string: &[u8], f: &mut Formatter<'_>) -> Result { + f.escape_utf16(super::encode_wide_unchecked(string)) + } +} diff --git a/vendor/os_str_bytes/src/windows/wtf8/code_points.rs b/vendor/os_str_bytes/src/windows/wtf8/code_points.rs new file mode 100644 index 000000000..b265db332 --- /dev/null +++ b/vendor/os_str_bytes/src/windows/wtf8/code_points.rs @@ -0,0 +1,117 @@ +use std::iter::Peekable; +use std::mem; + +use crate::util::is_continuation; +use crate::util::BYTE_SHIFT; +use crate::util::CONT_MASK; + +use super::EncodingError; +use super::Result; + +pub(in super::super) struct CodePoints +where + I: Iterator, +{ + iter: Peekable, + surrogate: bool, +} + +impl CodePoints +where + I: Iterator, +{ + pub(in super::super) fn new(string: S) -> Self + where + S: IntoIterator, + { + Self { + iter: string.into_iter().peekable(), + surrogate: false, + } + } + + fn consume_next(&mut self, code_point: &mut u32) -> Result<()> { + if let Some(&byte) = self.iter.peek() { + if !is_continuation(byte) { + self.surrogate = false; + // Not consuming this byte will be useful if this crate ever + // offers a way to encode lossily. + return Err(EncodingError::Byte(byte)); + } + *code_point = + (*code_point << BYTE_SHIFT) | u32::from(byte & CONT_MASK); + + let removed = self.iter.next(); + debug_assert_eq!(Some(byte), removed); + } else { + return Err(EncodingError::End()); + } + Ok(()) + } + + pub(super) fn inner_size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl Iterator for CodePoints +where + I: Iterator, +{ + type Item = Result; + + fn next(&mut self) -> Option { + let byte = self.iter.next()?; + let mut code_point: u32 = byte.into(); + + macro_rules! consume_next { + () => {{ + if let Err(error) = self.consume_next(&mut code_point) { + return Some(Err(error)); + } + }}; + } + + let prev_surrogate = mem::replace(&mut self.surrogate, false); + + let mut invalid = false; + if !byte.is_ascii() { + if byte < 0xC2 { + return Some(Err(EncodingError::Byte(byte))); + } + + if byte < 0xE0 { + code_point &= 0x1F; + } else { + code_point &= 0x0F; + consume_next!(); + + if byte >= 0xF0 { + if code_point.wrapping_sub(0x10) >= 0x100 { + invalid = true; + } + consume_next!(); + + // This condition is optimized to detect surrogate code points. + } else if code_point & 0xFE0 == 0x360 { + if code_point & 0x10 == 0 { + self.surrogate = true; + } else if prev_surrogate { + // Decoding a broken surrogate pair would be lossy. + invalid = true; + } + } + + if code_point < 0x20 { + invalid = true; + } + } + consume_next!(); + } + if invalid { + return Some(Err(EncodingError::CodePoint(code_point))); + } + + Some(Ok(code_point)) + } +} diff --git a/vendor/os_str_bytes/src/windows/wtf8/convert.rs b/vendor/os_str_bytes/src/windows/wtf8/convert.rs new file mode 100644 index 000000000..75843f5b3 --- /dev/null +++ b/vendor/os_str_bytes/src/windows/wtf8/convert.rs @@ -0,0 +1,166 @@ +use std::char; +use std::char::DecodeUtf16; +use std::num::NonZeroU16; + +use crate::util::BYTE_SHIFT; +use crate::util::CONT_MASK; +use crate::util::CONT_TAG; + +use super::CodePoints; +use super::Result; + +const MIN_HIGH_SURROGATE: u16 = 0xD800; + +const MIN_LOW_SURROGATE: u16 = 0xDC00; + +const MIN_SURROGATE_CODE: u32 = (u16::MAX as u32) + 1; + +macro_rules! static_assert { + ( $condition:expr ) => { + const _: () = [()][if $condition { 0 } else { 1 }]; + }; +} + +pub(in super::super) struct DecodeWide +where + I: Iterator, +{ + iter: DecodeUtf16, + code_point: u32, + shift: u8, +} + +impl DecodeWide +where + I: Iterator, +{ + pub(in super::super) fn new(string: S) -> Self + where + S: IntoIterator, + { + Self { + iter: char::decode_utf16(string), + code_point: 0, + shift: 0, + } + } +} + +impl Iterator for DecodeWide +where + I: Iterator, +{ + type Item = u8; + + fn next(&mut self) -> Option { + if let Some(shift) = self.shift.checked_sub(BYTE_SHIFT) { + self.shift = shift; + return Some( + ((self.code_point >> self.shift) as u8 & CONT_MASK) | CONT_TAG, + ); + } + + self.code_point = self + .iter + .next()? + .map(Into::into) + .unwrap_or_else(|x| x.unpaired_surrogate().into()); + + macro_rules! decode { + ( $tag:expr ) => { + Some((self.code_point >> self.shift) as u8 | $tag) + }; + } + macro_rules! try_decode { + ( $tag:expr , $upper_bound:expr ) => { + if self.code_point < $upper_bound { + return decode!($tag); + } + self.shift += BYTE_SHIFT; + }; + } + try_decode!(0, 0x80); + try_decode!(0xC0, 0x800); + try_decode!(0xE0, MIN_SURROGATE_CODE); + decode!(0xF0) + } + + fn size_hint(&self) -> (usize, Option) { + let (low, high) = self.iter.size_hint(); + let shift = self.shift.into(); + ( + low.saturating_add(shift), + high.and_then(|x| x.checked_mul(4)) + .and_then(|x| x.checked_add(shift)), + ) + } +} + +struct EncodeWide +where + I: Iterator, +{ + iter: CodePoints, + surrogate: Option, +} + +impl EncodeWide +where + I: Iterator, +{ + pub(in super::super) fn new(string: S) -> Self + where + S: IntoIterator, + { + Self { + iter: CodePoints::new(string), + surrogate: None, + } + } +} + +impl Iterator for EncodeWide +where + I: Iterator, +{ + type Item = Result; + + fn next(&mut self) -> Option { + if let Some(surrogate) = self.surrogate.take() { + return Some(Ok(surrogate.get())); + } + + self.iter.next().map(|code_point| { + code_point.map(|code_point| { + code_point + .checked_sub(MIN_SURROGATE_CODE) + .map(|offset| { + static_assert!(MIN_LOW_SURROGATE != 0); + + self.surrogate = Some(unsafe { + NonZeroU16::new_unchecked( + (offset & 0x3FF) as u16 | MIN_LOW_SURROGATE, + ) + }); + (offset >> 10) as u16 | MIN_HIGH_SURROGATE + }) + .unwrap_or(code_point as u16) + }) + }) + } + + fn size_hint(&self) -> (usize, Option) { + let (low, high) = self.iter.inner_size_hint(); + let additional = self.surrogate.is_some().into(); + ( + (low.saturating_add(2) / 3).saturating_add(additional), + high.and_then(|x| x.checked_add(additional)), + ) + } +} + +pub(in super::super) fn encode_wide( + string: &[u8], +) -> impl '_ + Iterator> { + EncodeWide::new(string.iter().copied()) +} diff --git a/vendor/os_str_bytes/src/windows/wtf8/mod.rs b/vendor/os_str_bytes/src/windows/wtf8/mod.rs new file mode 100644 index 000000000..d8b0dc4a7 --- /dev/null +++ b/vendor/os_str_bytes/src/windows/wtf8/mod.rs @@ -0,0 +1,18 @@ +// This module implements the WTF-8 encoding specification: +// https://simonsapin.github.io/wtf-8/ + +use super::EncodingError; +use super::Result; + +mod code_points; +pub(super) use code_points::CodePoints; + +mod convert; +pub(super) use convert::encode_wide; +pub(super) use convert::DecodeWide; + +if_raw_str! { + mod string; + pub(crate) use string::ends_with; + pub(crate) use string::starts_with; +} diff --git a/vendor/os_str_bytes/src/windows/wtf8/string.rs b/vendor/os_str_bytes/src/windows/wtf8/string.rs new file mode 100644 index 000000000..10b8fafb6 --- /dev/null +++ b/vendor/os_str_bytes/src/windows/wtf8/string.rs @@ -0,0 +1,63 @@ +use crate::util::is_continuation; + +use super::encode_wide; + +const SURROGATE_LENGTH: usize = 3; + +pub(crate) fn ends_with(string: &[u8], mut suffix: &[u8]) -> bool { + let index = match string.len().checked_sub(suffix.len()) { + Some(index) => index, + None => return false, + }; + if let Some(&byte) = string.get(index) { + if is_continuation(byte) { + let index = index.checked_sub(1).expect("invalid string"); + let mut wide_surrogate = match suffix.get(..SURROGATE_LENGTH) { + Some(surrogate) => encode_wide(surrogate), + None => return false, + }; + let surrogate_wchar = wide_surrogate + .next() + .expect("failed decoding non-empty suffix"); + + if wide_surrogate.next().is_some() + || encode_wide(&string[index..]) + .take_while(Result::is_ok) + .nth(1) + != Some(surrogate_wchar) + { + return false; + } + suffix = &suffix[SURROGATE_LENGTH..]; + } + } + string.ends_with(suffix) +} + +pub(crate) fn starts_with(string: &[u8], mut prefix: &[u8]) -> bool { + if let Some(&byte) = string.get(prefix.len()) { + if is_continuation(byte) { + let index = match prefix.len().checked_sub(SURROGATE_LENGTH) { + Some(index) => index, + None => return false, + }; + let (substring, surrogate) = prefix.split_at(index); + let mut wide_surrogate = encode_wide(surrogate); + let surrogate_wchar = wide_surrogate + .next() + .expect("failed decoding non-empty prefix"); + + if surrogate_wchar.is_err() + || wide_surrogate.next().is_some() + || encode_wide(&string[index..]) + .next() + .expect("failed decoding non-empty substring") + != surrogate_wchar + { + return false; + } + prefix = substring; + } + } + string.starts_with(prefix) +} -- cgit v1.2.3