diff options
Diffstat (limited to 'third_party/rust/unix_str')
-rw-r--r-- | third_party/rust/unix_str/.cargo-checksum.json | 1 | ||||
-rw-r--r-- | third_party/rust/unix_str/Cargo.toml | 31 | ||||
-rw-r--r-- | third_party/rust/unix_str/LICENSE-APACHE | 176 | ||||
-rw-r--r-- | third_party/rust/unix_str/LICENSE-MIT | 23 | ||||
-rw-r--r-- | third_party/rust/unix_str/Readme.md | 30 | ||||
-rw-r--r-- | third_party/rust/unix_str/src/lib.rs | 1385 | ||||
-rw-r--r-- | third_party/rust/unix_str/src/lossy.rs | 222 | ||||
-rw-r--r-- | third_party/rust/unix_str/src/sys.rs | 256 | ||||
-rw-r--r-- | third_party/rust/unix_str/src/sys_common.rs | 39 | ||||
-rw-r--r-- | third_party/rust/unix_str/src/sys_common/bytestring.rs | 45 |
10 files changed, 2208 insertions, 0 deletions
diff --git a/third_party/rust/unix_str/.cargo-checksum.json b/third_party/rust/unix_str/.cargo-checksum.json new file mode 100644 index 0000000000..d88b406350 --- /dev/null +++ b/third_party/rust/unix_str/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"94d1006cba1c84ee250e4e7fa8627d103bc0004553051a8fc086fc71922447ac","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","Readme.md":"193e04332e6cdecdcaafc73380a28b3faa1d0415ebf4b45afe2025ac81878eb9","src/lib.rs":"a80fdc043cf480188120c8c65c8c07ae3add9b75b79336e801626ed79fa5f493","src/lossy.rs":"38776b5b86f06b1432748c15e8c342b6fdc63488e1c27665d03c5e6361330531","src/sys.rs":"26e47413ca142003e6547cc44ed80f0f3bd948f0892d6e65ba8c8e8c54a4f3c5","src/sys_common.rs":"37105a8e48fb185534c1fc88f519dccbe97686427fe5b979b69f8fcbfca07899","src/sys_common/bytestring.rs":"1ab26007a3bccc3ba771cacbfef5307437d83af1c7f327c990d5d765b704c262"},"package":"2ace0b4755d0a2959962769239d56267f8a024fef2d9b32666b3dcd0946b0906"}
\ No newline at end of file diff --git a/third_party/rust/unix_str/Cargo.toml b/third_party/rust/unix_str/Cargo.toml new file mode 100644 index 0000000000..0250bfc6a4 --- /dev/null +++ b/third_party/rust/unix_str/Cargo.toml @@ -0,0 +1,31 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +edition = "2018" +name = "unix_str" +version = "1.0.0" +authors = ["SnejUgal <contact@snejugal.ru>"] +description = "Unix-compatible strings regardless of platform." +readme = "./Readme.md" +keywords = ["unix", "str", "string"] +categories = ["no-std"] +license = "MIT OR Apache-2.0" +repository = "https://gitlab.com/SnejUgal/unix_str" + +[features] +alloc = [] +default = ["std"] +shrink_to = [] +std = ["alloc"] +toowned_clone_into = [] +unixstring_ascii = [] diff --git a/third_party/rust/unix_str/LICENSE-APACHE b/third_party/rust/unix_str/LICENSE-APACHE new file mode 100644 index 0000000000..1b5ec8b78e --- /dev/null +++ b/third_party/rust/unix_str/LICENSE-APACHE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/third_party/rust/unix_str/LICENSE-MIT b/third_party/rust/unix_str/LICENSE-MIT new file mode 100644 index 0000000000..31aa79387f --- /dev/null +++ b/third_party/rust/unix_str/LICENSE-MIT @@ -0,0 +1,23 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/third_party/rust/unix_str/Readme.md b/third_party/rust/unix_str/Readme.md new file mode 100644 index 0000000000..93f0349c26 --- /dev/null +++ b/third_party/rust/unix_str/Readme.md @@ -0,0 +1,30 @@ +# `unix_str` + +Unix-compatible strings regardless of platform, including `#![no_std]` +environents. This crate is extracted from `std`. + +## Features + +- `shrink_to`: implements the unstable `shrink_to` method; +- `unixstring_ascii`: ASCII transformations, `std`'s unstable feature; +- `toowned_clone_into`: implements `ToOwned::clone_into`, an unstable method; +- `alloc`: implements `UnixString` and transformations with `Box`, `Rc` + and `Arc`; +- `std`: an alias for `alloc`. + +## License + +Licensed under either of + +- Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) + or http://www.apache.org/licenses/LICENSE-2.0) +- MIT license ([LICENSE-MIT](LICENSE-MIT) + or http://opensource.org/licenses/MIT) + +at your option. + +## Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in the work by you, as defined in the Apache-2.0 license, shall be +dual licensed as above, without any additional terms or conditions. diff --git a/third_party/rust/unix_str/src/lib.rs b/third_party/rust/unix_str/src/lib.rs new file mode 100644 index 0000000000..bf669233c5 --- /dev/null +++ b/third_party/rust/unix_str/src/lib.rs @@ -0,0 +1,1385 @@ +//! Strings that are compatible wuth Unix-like operating systems. +//! +//! * [`UnixString`] and [`UnixStr`] are useful when you need to with Unix strings. +//! Conversions between [`UnixString`], [`UnixStr`] and Rust strings work similarly +//! to those for `CString` and `CStr`. +//! +//! * [`UnixString`] represents an owned string in Unix's preferred +//! representation. +//! +//! * [`UnixStr`] represents a borrowed reference to a string in a format that +//! can be passed to a Unix-lie operating system. It can be converted into +//! a UTF-8 Rust string slice in a similar way to [`UnixString`]. +//! +//! # Conversions +//! +//! [`UnixStr`] implements two methods, [`from_bytes`] and [`as_bytes`]. +//! These do inexpensive conversions from and to UTF-8 byte slices. +//! +//! Additionally, [`UnixString`] provides [`from_vec`] and [`into_vec`] methods +//! that consume their arguments, and take or produce vectors of [`u8`]. +//! +//! [`UnixString`]: struct.UnixString.html +//! [`UnixStr`]: struct.UnixStr.html +//! [`from_vec`]: struct.UnixString.html#method.from_vec +//! [`into_vec`]: struct.UnixString.html#method.into_vec +//! [`from_bytes`]: struct.UnixStrExt.html#method.from_bytes +//! [`as_bytes`]: struct.UnixStrExt.html#method.as_bytes + +#![cfg_attr(feature = "shrink_to", feature(shrink_to))] +#![cfg_attr(feature = "toowned_clone_into", feature(toowned_clone_into))] +#![no_std] + +#[cfg(feature = "alloc")] +extern crate alloc; + +use core::cmp; +use core::fmt; +use core::hash::{Hash, Hasher}; +use core::mem; + +#[cfg(feature = "alloc")] +use alloc::borrow::{Borrow, Cow, ToOwned}; +#[cfg(feature = "alloc")] +use alloc::boxed::Box; +#[cfg(feature = "alloc")] +use alloc::rc::Rc; +#[cfg(feature = "alloc")] +use alloc::string::String; +#[cfg(feature = "alloc")] +use alloc::sync::Arc; +#[cfg(feature = "alloc")] +use alloc::vec::Vec; +#[cfg(feature = "alloc")] +use core::ops; +#[cfg(feature = "alloc")] +use core::str::FromStr; + +mod lossy; + +mod sys; +#[cfg(feature = "alloc")] +use sys::Buf; +use sys::Slice; + +mod sys_common; +use sys_common::AsInner; +#[cfg(feature = "alloc")] +use sys_common::{FromInner, IntoInner}; + +/// A type that can represent owned, mutable Unix strings, but is cheaply +/// inter-convertible with Rust strings. +/// +/// The need for this type arises from the fact that: +/// +/// * On Unix systems, strings are often arbitrary sequences of non-zero +/// bytes, in many cases interpreted as UTF-8. +/// +/// * In Rust, strings are always valid UTF-8, which may contain zeros. +/// +/// `UnixString` and [`UnixStr`] bridge this gap by simultaneously representing +/// Rust and platform-native string values, and in particular allowing a Rust +/// string to be converted into a “Unix” string with no cost if possible. +/// A consequence of this is that `UnixString` instances are *not* `NULL` +/// terminated; in order to pass to e.g., Unix system call, you should create +/// a `CStr`. +/// +/// `UnixString` is to [`&UnixStr`] as `String` is to `&str`: the former +/// in each pair are owned strings; the latter are borrowed references. +/// +/// Note, `UnixString` and [`UnixStr`] internally do not hold in the form native +/// to the platform: `UnixString`s are stored as a sequence of 8-bit values. +/// +/// # Creating an `UnixString` +/// +/// **From a Rust string**: `UnixString` implements `From<String>`, so you can +/// use `my_string.from` to create an `UnixString` from a normal Rust string. +/// +/// **From slices:** Just like you can start with an empty Rust [`String`] +/// and then [`push_str`][String.push_str] `&str` sub-string slices into it, +/// you can create an empty `UnixString` with the [`new`] method and then push +/// string slices into it with the [`push`] method. +/// +/// # Extracting a borrowed reference to the whole OS string +/// +/// You can use the [`as_unix_str`] method to get a [`&UnixStr`] from +/// a `UnixString`; this is effectively a borrowed reference to the whole +/// string. +/// +/// # Conversions +/// +/// See the [module's toplevel documentation about conversions][conversions] +/// for a discussion on the traits which `UnixString` implements for +/// [conversions] from/to native representations. +/// +/// [`UnixStr`]: struct.UnixStr.html +/// [`&UnixStr`]: struct.UnixStr.html +/// [`CStr`]: struct.CStr.html +/// [`new`]: #method.new +/// [`push`]: #method.push +/// [`as_unix_str`]: #method.as_unix_str +/// [conversions]: index.html#conversions +#[derive(Clone)] +#[cfg(feature = "alloc")] +pub struct UnixString { + inner: Buf, +} + +/// Borrowed reference to a Unix string (see [`UnixString`]). +/// +/// This type represents a borrowed reference to a string in Unix's preferred +/// representation. +/// +/// `&UnixStr` is to [`UnixString`] as `&str` is to `String`: the former +/// in each pair are borrowed references; the latter are owned strings. +/// +/// See the [module's toplevel documentation about conversions][conversions] +/// for a discussion on the traits which `UnixStr` implements for [conversions] +/// from/to native representations. +/// +/// [`UnixString`]: struct.UnixString.html +/// [conversions]: index.html#conversions +// FIXME: +// `UnixStr::from_inner` current implementation relies on `UnixStr` being +// layout-compatible with `Slice`. When attribute privacy is implemented, +// `UnixStr` should be annotated as `#[repr(transparent)]`. Anyway, `UnixStr` +// representation and layout are considered implementation detail, are +// not documented and must not be relied upon. +pub struct UnixStr { + inner: Slice, +} + +#[cfg(feature = "alloc")] +impl UnixString { + /// Constructs a new empty `UnixString`. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let unix_string = UnixString::new(); + /// ``` + pub fn new() -> Self { + Self { + inner: Buf::from_string(String::new()), + } + } + + /// Converts to an [`UnixStr`] slice. + /// + /// [`UnixStr`]: struct.UnixStr.html + /// + /// # Examples + /// + /// ``` + /// use unix_str::{UnixString, UnixStr}; + /// + /// let unix_string = UnixString::from("foo"); + /// let unix_str = UnixStr::new("foo"); + /// assert_eq!(unix_string.as_unix_str(), unix_str); + /// ``` + pub fn as_unix_str(&self) -> &UnixStr { + self + } + + /// Converts the `UnixString` into a `String` if it contains valid Unicode data. + /// + /// On failure, ownership of the original `UnixString` is returned. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let unix_string = UnixString::from("foo"); + /// let string = unix_string.into_string(); + /// assert_eq!(string, Ok(String::from("foo"))); + /// ``` + pub fn into_string(self) -> Result<String, UnixString> { + self.inner + .into_string() + .map_err(|buf| UnixString { inner: buf }) + } + + /// Extends the string with the given [`&UnixStr`] slice. + /// + /// [`&UnixStr`]: struct.UnixStr.html + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let mut unix_string = UnixString::from("foo"); + /// unix_string.push("bar"); + /// assert_eq!(&unix_string, "foobar"); + /// ``` + pub fn push<T: AsRef<UnixStr>>(&mut self, s: T) { + self.inner.push_slice(&s.as_ref().inner) + } + + /// Creates a new `UnixString` with the given capacity. + /// + /// The string will be able to hold exactly `capacity` length units of other + /// OS strings without reallocating. If `capacity` is 0, the string will not + /// allocate. + /// + /// See main `UnixString` documentation information about encoding. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let mut unix_string = UnixString::with_capacity(10); + /// let capacity = unix_string.capacity(); + /// + /// // This push is done without reallocating + /// unix_string.push("foo"); + /// + /// assert_eq!(capacity, unix_string.capacity()); + /// ``` + pub fn with_capacity(capacity: usize) -> Self { + Self { + inner: Buf::with_capacity(capacity), + } + } + + /// Truncates the `UnixString` to zero length. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let mut unix_string = UnixString::from("foo"); + /// assert_eq!(&unix_string, "foo"); + /// + /// unix_string.clear(); + /// assert_eq!(&unix_string, ""); + /// ``` + pub fn clear(&mut self) { + self.inner.clear() + } + + /// Returns the capacity this `UnixString` can hold without reallocating. + /// + /// See `UnixString` introduction for information about encoding. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let unix_string = UnixString::with_capacity(10); + /// assert!(unix_string.capacity() >= 10); + /// ``` + pub fn capacity(&self) -> usize { + self.inner.capacity() + } + + /// Reserves capacity for at least `additional` more capacity to be inserted + /// in the given `UnixString`. + /// + /// The collection may reserve more space to avoid frequent reallocations. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let mut s = UnixString::new(); + /// s.reserve(10); + /// assert!(s.capacity() >= 10); + /// ``` + pub fn reserve(&mut self, additional: usize) { + self.inner.reserve(additional) + } + + /// Reserves the minimum capacity for exactly `additional` more capacity to + /// be inserted in the given `UnixString`. Does nothing if the capacity is + /// already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer reserve if future insertions are expected. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let mut s = UnixString::new(); + /// s.reserve_exact(10); + /// assert!(s.capacity() >= 10); + /// ``` + pub fn reserve_exact(&mut self, additional: usize) { + self.inner.reserve_exact(additional) + } + + /// Shrinks the capacity of the `UnixString` to match its length. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let mut s = UnixString::from("foo"); + /// + /// s.reserve(100); + /// assert!(s.capacity() >= 100); + /// + /// s.shrink_to_fit(); + /// assert_eq!(3, s.capacity()); + /// ``` + pub fn shrink_to_fit(&mut self) { + self.inner.shrink_to_fit() + } + + /// Shrinks the capacity of the `UnixString` with a lower bound. + /// + /// The capacity will remain at least as large as both the length + /// and the supplied value. + /// + /// Panics if the current capacity is smaller than the supplied + /// minimum capacity. + /// + /// # Examples + /// + /// ``` + /// #![feature(shrink_to)] + /// use std::ffi::UnixString; + /// + /// let mut s = UnixString::from("foo"); + /// + /// s.reserve(100); + /// assert!(s.capacity() >= 100); + /// + /// s.shrink_to(10); + /// assert!(s.capacity() >= 10); + /// s.shrink_to(0); + /// assert!(s.capacity() >= 3); + /// ``` + #[inline] + #[cfg(feature = "shrink_to")] + pub fn shrink_to(&mut self, min_capacity: usize) { + self.inner.shrink_to(min_capacity) + } + + /// Converts this `UnixString` into a boxed [`UnixStr`]. + /// + /// [`UnixStr`]: struct.UnixStr.html + /// + /// # Examples + /// + /// ``` + /// use unix_str::{UnixString, UnixStr}; + /// + /// let s = UnixString::from("hello"); + /// + /// let b: Box<UnixStr> = s.into_boxed_unix_str(); + /// ``` + pub fn into_boxed_unix_str(self) -> Box<UnixStr> { + let rw = Box::into_raw(self.inner.into_box()) as *mut UnixStr; + unsafe { Box::from_raw(rw) } + } + + /// Creates a `UnixString` from a byte vector. + /// + /// See the module documentation for an example. + /// + pub fn from_vec(vec: Vec<u8>) -> Self { + FromInner::from_inner(Buf { inner: vec }) + } + + /// Yields the underlying byte vector of this `UnixString`. + /// + /// See the module documentation for an example. + pub fn into_vec(self) -> Vec<u8> { + self.into_inner().inner + } +} + +#[cfg(feature = "alloc")] +impl From<String> for UnixString { + /// Converts a `String` into a [`UnixString`]. + /// + /// The conversion copies the data, and includes an allocation on the heap. + /// + /// [`UnixString`]: ../../std/ffi/struct.UnixString.html + fn from(s: String) -> Self { + UnixString { + inner: Buf::from_string(s), + } + } +} + +#[cfg(feature = "alloc")] +impl<T: ?Sized + AsRef<UnixStr>> From<&T> for UnixString { + fn from(s: &T) -> Self { + s.as_ref().to_unix_string() + } +} + +#[cfg(feature = "alloc")] +impl ops::Index<ops::RangeFull> for UnixString { + type Output = UnixStr; + + #[inline] + fn index(&self, _index: ops::RangeFull) -> &UnixStr { + UnixStr::from_inner(self.inner.as_slice()) + } +} + +#[cfg(feature = "alloc")] +impl ops::IndexMut<ops::RangeFull> for UnixString { + #[inline] + fn index_mut(&mut self, _index: ops::RangeFull) -> &mut UnixStr { + UnixStr::from_inner_mut(self.inner.as_mut_slice()) + } +} + +#[cfg(feature = "alloc")] +impl ops::Deref for UnixString { + type Target = UnixStr; + + #[inline] + fn deref(&self) -> &UnixStr { + &self[..] + } +} + +#[cfg(feature = "alloc")] +impl ops::DerefMut for UnixString { + #[inline] + fn deref_mut(&mut self) -> &mut UnixStr { + &mut self[..] + } +} + +#[cfg(feature = "alloc")] +impl Default for UnixString { + /// Constructs an empty `UnixString`. + #[inline] + fn default() -> Self { + Self::new() + } +} + +#[cfg(feature = "alloc")] +impl fmt::Debug for UnixString { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, formatter) + } +} + +#[cfg(feature = "alloc")] +impl PartialEq for UnixString { + fn eq(&self, other: &Self) -> bool { + &**self == &**other + } +} + +#[cfg(feature = "alloc")] +impl PartialEq<str> for UnixString { + fn eq(&self, other: &str) -> bool { + &**self == other + } +} + +#[cfg(feature = "alloc")] +impl PartialEq<UnixString> for str { + fn eq(&self, other: &UnixString) -> bool { + &**other == self + } +} + +#[cfg(feature = "alloc")] +impl PartialEq<&str> for UnixString { + fn eq(&self, other: &&str) -> bool { + **self == **other + } +} + +#[cfg(feature = "alloc")] +impl<'a> PartialEq<UnixString> for &'a str { + fn eq(&self, other: &UnixString) -> bool { + **other == **self + } +} + +#[cfg(feature = "alloc")] +impl Eq for UnixString {} + +#[cfg(feature = "alloc")] +impl PartialOrd for UnixString { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> { + (&**self).partial_cmp(&**other) + } + #[inline] + fn lt(&self, other: &Self) -> bool { + &**self < &**other + } + #[inline] + fn le(&self, other: &Self) -> bool { + &**self <= &**other + } + #[inline] + fn gt(&self, other: &Self) -> bool { + &**self > &**other + } + #[inline] + fn ge(&self, other: &Self) -> bool { + &**self >= &**other + } +} + +#[cfg(feature = "alloc")] +impl PartialOrd<str> for UnixString { + #[inline] + fn partial_cmp(&self, other: &str) -> Option<cmp::Ordering> { + (&**self).partial_cmp(other) + } +} + +#[cfg(feature = "alloc")] +impl Ord for UnixString { + #[inline] + fn cmp(&self, other: &Self) -> cmp::Ordering { + (&**self).cmp(&**other) + } +} + +#[cfg(feature = "alloc")] +impl Hash for UnixString { + #[inline] + fn hash<H: Hasher>(&self, state: &mut H) { + (&**self).hash(state) + } +} + +impl UnixStr { + /// Coerces into an `UnixStr` slice. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixStr; + /// + /// let unix_str = UnixStr::new("foo"); + /// ``` + #[inline] + pub fn new<S: AsRef<UnixStr> + ?Sized>(s: &S) -> &UnixStr { + s.as_ref() + } + + #[inline] + fn from_inner(inner: &Slice) -> &UnixStr { + // Safety: UnixStr is just a wrapper of Slice, + // therefore converting &Slice to &UnixStr is safe. + unsafe { &*(inner as *const Slice as *const UnixStr) } + } + + #[inline] + #[cfg(feature = "alloc")] + fn from_inner_mut(inner: &mut Slice) -> &mut UnixStr { + // Safety: UnixStr is just a wrapper of Slice, + // therefore converting &mut Slice to &mut UnixStr is safe. + // Any method that mutates UnixStr must be careful not to + // break platform-specific encoding, in particular Wtf8 on Windows. + unsafe { &mut *(inner as *mut Slice as *mut UnixStr) } + } + + /// Yields a `&str` slice if the `UnixStr` is valid Unicode. + /// + /// This conversion may entail doing a check for UTF-8 validity. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixStr; + /// + /// let unix_str = UnixStr::new("foo"); + /// assert_eq!(unix_str.to_str(), Some("foo")); + /// ``` + pub fn to_str(&self) -> Option<&str> { + self.inner.to_str() + } + + /// Converts an `UnixStr` to a `Cow<str>`. + /// + /// Any non-Unicode sequences are replaced with + /// `U+FFFD REPLACEMENT CHARACTER`. + /// + /// + /// # Examples + /// + /// Calling `to_string_lossy` on an `UnixStr` with invalid unicode: + /// + /// ``` + /// use unix_str::UnixStr; + /// + /// // Here, the values 0x66 and 0x6f correspond to 'f' and 'o' + /// // respectively. The value 0x80 is a lone continuation byte, invalid + /// // in a UTF-8 sequence. + /// let source = [0x66, 0x6f, 0x80, 0x6f]; + /// let unix_str = UnixStr::from_bytes(&source[..]); + /// + /// assert_eq!(unix_str.to_string_lossy(), "fo�o"); + /// ``` + #[cfg(feature = "alloc")] + pub fn to_string_lossy(&self) -> Cow<'_, str> { + self.inner.to_string_lossy() + } + + /// Copies the slice into an owned [`UnixString`]. + /// + /// [`UnixString`]: struct.UnixString.html + /// + /// # Examples + /// + /// ``` + /// use unix_str::{UnixStr, UnixString}; + /// + /// let unix_str = UnixStr::new("foo"); + /// let unix_string = unix_str.to_unix_string(); + /// assert_eq!(unix_string, UnixString::from("foo")); + /// ``` + #[cfg(feature = "alloc")] + pub fn to_unix_string(&self) -> UnixString { + UnixString { + inner: self.inner.to_owned(), + } + } + + /// Checks whether the `UnixStr` is empty. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixStr; + /// + /// let unix_str = UnixStr::new(""); + /// assert!(unix_str.is_empty()); + /// + /// let unix_str = UnixStr::new("foo"); + /// assert!(!unix_str.is_empty()); + /// ``` + #[inline] + pub fn is_empty(&self) -> bool { + self.inner.inner.is_empty() + } + + /// Returns the length of this `UnixStr`. + /// + /// Note that this does **not** return the number of bytes in the string in + /// OS string form. + /// + /// The length returned is that of the underlying storage used by `UnixStr`. + /// As discussed in the [`UnixString`] introduction, [`UnixString`] and + /// `UnixStr` store strings in a form best suited for cheap inter-conversion + /// between native-platform and Rust string forms, which may differ + /// significantly from both of them, including in storage size and encoding. + /// + /// This number is simply useful for passing to other methods, like + /// [`UnixString::with_capacity`] to avoid reallocations. + /// + /// [`UnixString`]: struct.UnixString.html + /// [`UnixString::with_capacity`]: struct.UnixString.html#method.with_capacity + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixStr; + /// + /// let unix_str = UnixStr::new(""); + /// assert_eq!(unix_str.len(), 0); + /// + /// let unix_str = UnixStr::new("foo"); + /// assert_eq!(unix_str.len(), 3); + /// ``` + pub fn len(&self) -> usize { + self.inner.inner.len() + } + + /// Converts a `Box<UnixStr>` into an [`UnixString`] without copying + /// allocating. + /// + /// [`UnixString`]: struct.UnixString.html + #[cfg(feature = "alloc")] + pub fn into_unix_string(self: Box<UnixStr>) -> UnixString { + let boxed = unsafe { Box::from_raw(Box::into_raw(self) as *mut Slice) }; + UnixString { + inner: Buf::from_box(boxed), + } + } + + /// Gets the underlying byte representation. + /// + /// Note: it is *crucial* that this API is private, to avoid + /// revealing the internal, platform-specific encodings. + #[inline] + fn bytes(&self) -> &[u8] { + unsafe { &*(&self.inner as *const _ as *const [u8]) } + } + + /// Converts this string to its ASCII lower case equivalent in-place. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', but non-ASCII letters + /// are unchanged. + /// + /// To return a new lowercased value without modifying the existing one, use + /// [`to_ascii_lowercase`]. + /// + /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let mut s = UnixString::from("GRÜßE, JÜRGEN ❤"); + /// + /// s.make_ascii_lowercase(); + /// + /// assert_eq!("grÜße, jÜrgen ❤", s); + /// ``` + #[cfg(feature = "unixstring_ascii")] + pub fn make_ascii_lowercase(&mut self) { + self.inner.make_ascii_lowercase() + } + + /// Converts this string to its ASCII upper case equivalent in-place. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To return a new uppercased value without modifying the existing one, use + /// [`to_ascii_uppercase`]. + /// + /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let mut s = UnixString::from("Grüße, Jürgen ❤"); + /// + /// s.make_ascii_uppercase(); + /// + /// assert_eq!("GRüßE, JüRGEN ❤", s); + /// ``` + #[cfg(feature = "unixstring_ascii")] + pub fn make_ascii_uppercase(&mut self) { + self.inner.make_ascii_uppercase() + } + + /// Returns a copy of this string where each character is mapped to its + /// ASCII lower case equivalent. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To lowercase the value in-place, use [`make_ascii_lowercase`]. + /// + /// [`make_ascii_lowercase`]: #method.make_ascii_lowercase + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// let s = UnixString::from("Grüße, Jürgen ❤"); + /// + /// assert_eq!("grüße, jürgen ❤", s.to_ascii_lowercase()); + /// ``` + #[cfg(all(feature = "alloc", feature = "unixstring_ascii"))] + pub fn to_ascii_lowercase(&self) -> UnixString { + UnixString::from_inner(self.inner.to_ascii_lowercase()) + } + + /// Returns a copy of this string where each character is mapped to its + /// ASCII upper case equivalent. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To uppercase the value in-place, use [`make_ascii_uppercase`]. + /// + /// [`make_ascii_uppercase`]: #method.make_ascii_uppercase + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// let s = UnixString::from("Grüße, Jürgen ❤"); + /// + /// assert_eq!("GRüßE, JüRGEN ❤", s.to_ascii_uppercase()); + /// ``` + #[cfg(all(feature = "alloc", feature = "unixstring_ascii"))] + pub fn to_ascii_uppercase(&self) -> UnixString { + UnixString::from_inner(self.inner.to_ascii_uppercase()) + } + + /// Checks if all characters in this string are within the ASCII range. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let ascii = UnixString::from("hello!\n"); + /// let non_ascii = UnixString::from("Grüße, Jürgen ❤"); + /// + /// assert!(ascii.is_ascii()); + /// assert!(!non_ascii.is_ascii()); + /// ``` + #[cfg(feature = "unixstring_ascii")] + pub fn is_ascii(&self) -> bool { + self.inner.is_ascii() + } + + /// Checks that two strings are an ASCII case-insensitive match. + /// + /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`, + /// but without allocating and copying temporaries. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// assert!(UnixString::from("Ferris").eq_ignore_ascii_case("FERRIS")); + /// assert!(UnixString::from("Ferrös").eq_ignore_ascii_case("FERRöS")); + /// assert!(!UnixString::from("Ferrös").eq_ignore_ascii_case("FERRÖS")); + /// ``` + #[cfg(feature = "unixstring_ascii")] + pub fn eq_ignore_ascii_case<S: ?Sized + AsRef<UnixStr>>(&self, other: &S) -> bool { + self.inner.eq_ignore_ascii_case(&other.as_ref().inner) + } + + /// Creates a `UnixStr` from a byte slice. + /// + /// See the module documentation for an example. + pub fn from_bytes(slice: &[u8]) -> &Self { + unsafe { mem::transmute(slice) } + } + + /// Gets the underlying byte view of the `UnixStr` slice. + /// + /// See the module documentation for an example. + pub fn as_bytes(&self) -> &[u8] { + &self.as_inner().inner + } +} + +#[cfg(feature = "alloc")] +impl From<&UnixStr> for Box<UnixStr> { + fn from(s: &UnixStr) -> Self { + let rw = Box::into_raw(s.inner.into_box()) as *mut UnixStr; + unsafe { Box::from_raw(rw) } + } +} + +#[cfg(feature = "alloc")] +impl From<Cow<'_, UnixStr>> for Box<UnixStr> { + #[inline] + fn from(cow: Cow<'_, UnixStr>) -> Self { + match cow { + Cow::Borrowed(s) => Box::from(s), + Cow::Owned(s) => Box::from(s), + } + } +} + +#[cfg(feature = "alloc")] +impl From<Box<UnixStr>> for UnixString { + /// Converts a `Box<UnixStr>` into a `UnixString` without copying or + /// allocating. + /// + /// [`UnixStr`]: ../ffi/struct.UnixStr.html + fn from(boxed: Box<UnixStr>) -> Self { + boxed.into_unix_string() + } +} + +#[cfg(feature = "alloc")] +impl From<UnixString> for Box<UnixStr> { + /// Converts a [`UnixString`] into a `Box<UnixStr>` without copying or + /// allocating. + /// + /// [`UnixString`]: ../ffi/struct.UnixString.html + fn from(s: UnixString) -> Self { + s.into_boxed_unix_str() + } +} + +#[cfg(feature = "alloc")] +impl Clone for Box<UnixStr> { + #[inline] + fn clone(&self) -> Self { + self.to_unix_string().into_boxed_unix_str() + } +} + +#[cfg(feature = "alloc")] +impl From<UnixString> for Arc<UnixStr> { + /// Converts a [`UnixString`] into a `Arc<UnixStr>` without copying or + /// allocating. + /// + /// [`UnixString`]: ../ffi/struct.UnixString.html + #[inline] + fn from(s: UnixString) -> Self { + let arc = s.inner.into_arc(); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const UnixStr) } + } +} + +#[cfg(feature = "alloc")] +impl From<&UnixStr> for Arc<UnixStr> { + #[inline] + fn from(s: &UnixStr) -> Self { + let arc = s.inner.into_arc(); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const UnixStr) } + } +} + +#[cfg(feature = "alloc")] +impl From<UnixString> for Rc<UnixStr> { + /// Converts a [`UnixString`] into a `Rc<UnixStr>` without copying or + /// allocating. + /// + /// [`UnixString`]: ../ffi/struct.UnixString.html + #[inline] + fn from(s: UnixString) -> Self { + let rc = s.inner.into_rc(); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const UnixStr) } + } +} + +#[cfg(feature = "alloc")] +impl From<&UnixStr> for Rc<UnixStr> { + #[inline] + fn from(s: &UnixStr) -> Self { + let rc = s.inner.into_rc(); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const UnixStr) } + } +} + +#[cfg(feature = "alloc")] +impl<'a> From<UnixString> for Cow<'a, UnixStr> { + #[inline] + fn from(s: UnixString) -> Self { + Cow::Owned(s) + } +} + +#[cfg(feature = "alloc")] +impl<'a> From<&'a UnixStr> for Cow<'a, UnixStr> { + #[inline] + fn from(s: &'a UnixStr) -> Self { + Cow::Borrowed(s) + } +} + +#[cfg(feature = "alloc")] +impl<'a> From<&'a UnixString> for Cow<'a, UnixStr> { + #[inline] + fn from(s: &'a UnixString) -> Self { + Cow::Borrowed(s.as_unix_str()) + } +} + +#[cfg(feature = "alloc")] +impl<'a> From<Cow<'a, UnixStr>> for UnixString { + #[inline] + fn from(s: Cow<'a, UnixStr>) -> Self { + s.into_owned() + } +} + +#[cfg(feature = "alloc")] +impl Default for Box<UnixStr> { + fn default() -> Self { + let rw = Box::into_raw(Slice::empty_box()) as *mut UnixStr; + unsafe { Box::from_raw(rw) } + } +} + +impl Default for &UnixStr { + /// Creates an empty `UnixStr`. + #[inline] + fn default() -> Self { + UnixStr::new("") + } +} + +impl PartialEq for UnixStr { + #[inline] + fn eq(&self, other: &UnixStr) -> bool { + self.bytes().eq(other.bytes()) + } +} + +impl PartialEq<str> for UnixStr { + #[inline] + fn eq(&self, other: &str) -> bool { + *self == *UnixStr::new(other) + } +} + +impl PartialEq<UnixStr> for str { + #[inline] + fn eq(&self, other: &UnixStr) -> bool { + *other == *UnixStr::new(self) + } +} + +impl Eq for UnixStr {} + +impl PartialOrd for UnixStr { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> { + self.bytes().partial_cmp(other.bytes()) + } + #[inline] + fn lt(&self, other: &Self) -> bool { + self.bytes().lt(other.bytes()) + } + #[inline] + fn le(&self, other: &Self) -> bool { + self.bytes().le(other.bytes()) + } + #[inline] + fn gt(&self, other: &Self) -> bool { + self.bytes().gt(other.bytes()) + } + #[inline] + fn ge(&self, other: &Self) -> bool { + self.bytes().ge(other.bytes()) + } +} + +impl PartialOrd<str> for UnixStr { + #[inline] + fn partial_cmp(&self, other: &str) -> Option<cmp::Ordering> { + self.partial_cmp(Self::new(other)) + } +} + +// FIXME (#19470): cannot provide PartialOrd<UnixStr> for str until we +// have more flexible coherence rules. + +impl Ord for UnixStr { + #[inline] + fn cmp(&self, other: &Self) -> cmp::Ordering { + self.bytes().cmp(other.bytes()) + } +} + +#[cfg(feature = "alloc")] +macro_rules! impl_cmp { + ($lhs:ty, $rhs: ty) => { + impl<'a, 'b> PartialEq<$rhs> for $lhs { + #[inline] + fn eq(&self, other: &$rhs) -> bool { + <UnixStr as PartialEq>::eq(self, other) + } + } + + impl<'a, 'b> PartialEq<$lhs> for $rhs { + #[inline] + fn eq(&self, other: &$lhs) -> bool { + <UnixStr as PartialEq>::eq(self, other) + } + } + + impl<'a, 'b> PartialOrd<$rhs> for $lhs { + #[inline] + fn partial_cmp(&self, other: &$rhs) -> Option<cmp::Ordering> { + <UnixStr as PartialOrd>::partial_cmp(self, other) + } + } + + impl<'a, 'b> PartialOrd<$lhs> for $rhs { + #[inline] + fn partial_cmp(&self, other: &$lhs) -> Option<cmp::Ordering> { + <UnixStr as PartialOrd>::partial_cmp(self, other) + } + } + }; +} + +#[cfg(feature = "alloc")] +impl_cmp!(UnixString, UnixStr); +#[cfg(feature = "alloc")] +impl_cmp!(UnixString, &'a UnixStr); +#[cfg(feature = "alloc")] +impl_cmp!(Cow<'a, UnixStr>, UnixStr); +#[cfg(feature = "alloc")] +impl_cmp!(Cow<'a, UnixStr>, &'b UnixStr); +#[cfg(feature = "alloc")] +impl_cmp!(Cow<'a, UnixStr>, UnixString); + +impl Hash for UnixStr { + #[inline] + fn hash<H: Hasher>(&self, state: &mut H) { + self.bytes().hash(state) + } +} + +impl fmt::Debug for UnixStr { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.inner, formatter) + } +} + +#[cfg(feature = "alloc")] +impl Borrow<UnixStr> for UnixString { + fn borrow(&self) -> &UnixStr { + &self[..] + } +} + +#[cfg(feature = "alloc")] +impl ToOwned for UnixStr { + type Owned = UnixString; + fn to_owned(&self) -> Self::Owned { + self.to_unix_string() + } + #[cfg(feature = "toowned_clone_into")] + fn clone_into(&self, target: &mut Self::Owned) { + self.inner.clone_into(&mut target.inner) + } +} + +impl AsRef<UnixStr> for UnixStr { + fn as_ref(&self) -> &UnixStr { + self + } +} + +#[cfg(feature = "alloc")] +impl AsRef<UnixStr> for UnixString { + #[inline] + fn as_ref(&self) -> &UnixStr { + self + } +} + +impl AsRef<UnixStr> for str { + #[inline] + fn as_ref(&self) -> &UnixStr { + UnixStr::from_inner(Slice::from_str(self)) + } +} + +#[cfg(feature = "alloc")] +impl AsRef<UnixStr> for String { + #[inline] + fn as_ref(&self) -> &UnixStr { + (&**self).as_ref() + } +} + +#[cfg(feature = "alloc")] +impl FromInner<Buf> for UnixString { + fn from_inner(buf: Buf) -> UnixString { + UnixString { inner: buf } + } +} + +#[cfg(feature = "alloc")] +impl IntoInner<Buf> for UnixString { + fn into_inner(self) -> Buf { + self.inner + } +} + +impl AsInner<Slice> for UnixStr { + #[inline] + fn as_inner(&self) -> &Slice { + &self.inner + } +} + +#[cfg(feature = "alloc")] +impl FromStr for UnixString { + type Err = core::convert::Infallible; + + fn from_str(s: &str) -> Result<Self, Self::Err> { + Ok(UnixString::from(s)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use sys_common::{AsInner, IntoInner}; + + use alloc::rc::Rc; + use alloc::sync::Arc; + + #[test] + fn test_unix_string_with_capacity() { + let unix_string = UnixString::with_capacity(0); + assert_eq!(0, unix_string.inner.into_inner().capacity()); + + let unix_string = UnixString::with_capacity(10); + assert_eq!(10, unix_string.inner.into_inner().capacity()); + + let mut unix_string = UnixString::with_capacity(0); + unix_string.push("abc"); + assert!(unix_string.inner.into_inner().capacity() >= 3); + } + + #[test] + fn test_unix_string_clear() { + let mut unix_string = UnixString::from("abc"); + assert_eq!(3, unix_string.inner.as_inner().len()); + + unix_string.clear(); + assert_eq!(&unix_string, ""); + assert_eq!(0, unix_string.inner.as_inner().len()); + } + + #[test] + fn test_unix_string_capacity() { + let unix_string = UnixString::with_capacity(0); + assert_eq!(0, unix_string.capacity()); + + let unix_string = UnixString::with_capacity(10); + assert_eq!(10, unix_string.capacity()); + + let mut unix_string = UnixString::with_capacity(0); + unix_string.push("abc"); + assert!(unix_string.capacity() >= 3); + } + + #[test] + fn test_unix_string_reserve() { + let mut unix_string = UnixString::new(); + assert_eq!(unix_string.capacity(), 0); + + unix_string.reserve(2); + assert!(unix_string.capacity() >= 2); + + for _ in 0..16 { + unix_string.push("a"); + } + + assert!(unix_string.capacity() >= 16); + unix_string.reserve(16); + assert!(unix_string.capacity() >= 32); + + unix_string.push("a"); + + unix_string.reserve(16); + assert!(unix_string.capacity() >= 33) + } + + #[test] + fn test_unix_string_reserve_exact() { + let mut unix_string = UnixString::new(); + assert_eq!(unix_string.capacity(), 0); + + unix_string.reserve_exact(2); + assert!(unix_string.capacity() >= 2); + + for _ in 0..16 { + unix_string.push("a"); + } + + assert!(unix_string.capacity() >= 16); + unix_string.reserve_exact(16); + assert!(unix_string.capacity() >= 32); + + unix_string.push("a"); + + unix_string.reserve_exact(16); + assert!(unix_string.capacity() >= 33) + } + + #[test] + fn test_unix_string_default() { + let unix_string: UnixString = Default::default(); + assert_eq!("", &unix_string); + } + + #[test] + fn test_unix_str_is_empty() { + let mut unix_string = UnixString::new(); + assert!(unix_string.is_empty()); + + unix_string.push("abc"); + assert!(!unix_string.is_empty()); + + unix_string.clear(); + assert!(unix_string.is_empty()); + } + + #[test] + fn test_unix_str_len() { + let mut unix_string = UnixString::new(); + assert_eq!(0, unix_string.len()); + + unix_string.push("abc"); + assert_eq!(3, unix_string.len()); + + unix_string.clear(); + assert_eq!(0, unix_string.len()); + } + + #[test] + fn test_unix_str_default() { + let unix_str: &UnixStr = Default::default(); + assert_eq!("", unix_str); + } + + #[test] + fn into_boxed() { + let orig = "Hello, world!"; + let unix_str = UnixStr::new(orig); + let boxed: Box<UnixStr> = Box::from(unix_str); + let unix_string = unix_str.to_owned().into_boxed_unix_str().into_unix_string(); + assert_eq!(unix_str, &*boxed); + assert_eq!(&*boxed, &*unix_string); + assert_eq!(&*unix_string, unix_str); + } + + #[test] + fn boxed_default() { + let boxed = <Box<UnixStr>>::default(); + assert!(boxed.is_empty()); + } + + #[test] + #[cfg(feature = "toowned_clone_into")] + fn test_unix_str_clone_into() { + let mut unix_string = UnixString::with_capacity(123); + unix_string.push("hello"); + let unix_str = UnixStr::new("bonjour"); + unix_str.clone_into(&mut unix_string); + assert_eq!(unix_str, unix_string); + assert!(unix_string.capacity() >= 123); + } + + #[test] + fn into_rc() { + let orig = "Hello, world!"; + let unix_str = UnixStr::new(orig); + let rc: Rc<UnixStr> = Rc::from(unix_str); + let arc: Arc<UnixStr> = Arc::from(unix_str); + + assert_eq!(&*rc, unix_str); + assert_eq!(&*arc, unix_str); + + let rc2: Rc<UnixStr> = Rc::from(unix_str.to_owned()); + let arc2: Arc<UnixStr> = Arc::from(unix_str.to_owned()); + + assert_eq!(&*rc2, unix_str); + assert_eq!(&*arc2, unix_str); + } +} diff --git a/third_party/rust/unix_str/src/lossy.rs b/third_party/rust/unix_str/src/lossy.rs new file mode 100644 index 0000000000..270ae30d71 --- /dev/null +++ b/third_party/rust/unix_str/src/lossy.rs @@ -0,0 +1,222 @@ +use core::char; +use core::fmt::{self, Write}; +use core::mem; +use core::str as core_str; + +// https://tools.ietf.org/html/rfc3629 +static UTF8_CHAR_WIDTH: [u8; 256] = [ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, // 0x1F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, // 0x3F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, // 0x5F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, // 0x7F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 0x9F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 0xBF + 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, // 0xDF + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF + 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF +]; + +/// Given a first byte, determines how many bytes are in this UTF-8 character. +#[inline] +pub fn utf8_char_width(b: u8) -> usize { + UTF8_CHAR_WIDTH[b as usize] as usize +} + +/// Lossy UTF-8 string. +pub struct Utf8Lossy { + bytes: [u8], +} + +impl Utf8Lossy { + pub fn from_bytes(bytes: &[u8]) -> &Utf8Lossy { + // SAFETY: Both use the same memory layout, and UTF-8 correctness isn't required. + unsafe { mem::transmute(bytes) } + } + + pub fn chunks(&self) -> Utf8LossyChunksIter<'_> { + Utf8LossyChunksIter { + source: &self.bytes, + } + } +} + +/// Iterator over lossy UTF-8 string +#[allow(missing_debug_implementations)] +pub struct Utf8LossyChunksIter<'a> { + source: &'a [u8], +} + +#[derive(PartialEq, Eq, Debug)] +pub struct Utf8LossyChunk<'a> { + /// Sequence of valid chars. + /// Can be empty between broken UTF-8 chars. + pub valid: &'a str, + /// Single broken char, empty if none. + /// Empty iff iterator item is last. + pub broken: &'a [u8], +} + +impl<'a> Iterator for Utf8LossyChunksIter<'a> { + type Item = Utf8LossyChunk<'a>; + + fn next(&mut self) -> Option<Utf8LossyChunk<'a>> { + if self.source.is_empty() { + return None; + } + + const TAG_CONT_U8: u8 = 128; + fn safe_get(xs: &[u8], i: usize) -> u8 { + *xs.get(i).unwrap_or(&0) + } + + let mut i = 0; + while i < self.source.len() { + let i_ = i; + + // SAFETY: `i` starts at `0`, is less than `self.source.len()`, and + // only increases, so `0 <= i < self.source.len()`. + let byte = unsafe { *self.source.get_unchecked(i) }; + i += 1; + + if byte < 128 { + } else { + let w = utf8_char_width(byte); + + macro_rules! error { + () => {{ + // SAFETY: We have checked up to `i` that source is valid UTF-8. + unsafe { + let r = Utf8LossyChunk { + valid: core_str::from_utf8_unchecked(&self.source[0..i_]), + broken: &self.source[i_..i], + }; + self.source = &self.source[i..]; + return Some(r); + } + }}; + } + + match w { + 2 => { + if safe_get(self.source, i) & 192 != TAG_CONT_U8 { + error!(); + } + i += 1; + } + 3 => { + match (byte, safe_get(self.source, i)) { + (0xE0, 0xA0..=0xBF) => (), + (0xE1..=0xEC, 0x80..=0xBF) => (), + (0xED, 0x80..=0x9F) => (), + (0xEE..=0xEF, 0x80..=0xBF) => (), + _ => { + error!(); + } + } + i += 1; + if safe_get(self.source, i) & 192 != TAG_CONT_U8 { + error!(); + } + i += 1; + } + 4 => { + match (byte, safe_get(self.source, i)) { + (0xF0, 0x90..=0xBF) => (), + (0xF1..=0xF3, 0x80..=0xBF) => (), + (0xF4, 0x80..=0x8F) => (), + _ => { + error!(); + } + } + i += 1; + if safe_get(self.source, i) & 192 != TAG_CONT_U8 { + error!(); + } + i += 1; + if safe_get(self.source, i) & 192 != TAG_CONT_U8 { + error!(); + } + i += 1; + } + _ => { + error!(); + } + } + } + } + + let r = Utf8LossyChunk { + // SAFETY: We have checked that the entire source is valid UTF-8. + valid: unsafe { core_str::from_utf8_unchecked(self.source) }, + broken: &[], + }; + self.source = &[]; + Some(r) + } +} + +impl fmt::Display for Utf8Lossy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // If we're the empty string then our iterator won't actually yield + // anything, so perform the formatting manually + if self.bytes.is_empty() { + return "".fmt(f); + } + + for Utf8LossyChunk { valid, broken } in self.chunks() { + // If we successfully decoded the whole chunk as a valid string then + // we can return a direct formatting of the string which will also + // respect various formatting flags if possible. + if valid.len() == self.bytes.len() { + assert!(broken.is_empty()); + return valid.fmt(f); + } + + f.write_str(valid)?; + if !broken.is_empty() { + f.write_char(char::REPLACEMENT_CHARACTER)?; + } + } + Ok(()) + } +} + +impl fmt::Debug for Utf8Lossy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_char('"')?; + + for Utf8LossyChunk { valid, broken } in self.chunks() { + // Valid part. + // Here we partially parse UTF-8 again which is suboptimal. + { + let mut from = 0; + for (i, c) in valid.char_indices() { + let esc = c.escape_debug(); + // If char needs escaping, flush backlog so far and write, else skip + if esc.len() != 1 { + f.write_str(&valid[from..i])?; + for c in esc { + f.write_char(c)?; + } + from = i + c.len_utf8(); + } + } + f.write_str(&valid[from..])?; + } + + // Broken parts of string as hex escape. + for &b in broken { + write!(f, "\\x{:02x}", b)?; + } + } + + f.write_char('"') + } +} diff --git a/third_party/rust/unix_str/src/sys.rs b/third_party/rust/unix_str/src/sys.rs new file mode 100644 index 0000000000..cf1de79ee4 --- /dev/null +++ b/third_party/rust/unix_str/src/sys.rs @@ -0,0 +1,256 @@ +//! The underlying UnixString/UnixStr implementation: just a `Vec<u8>`/`[u8]`. + +use crate::sys_common::bytestring::debug_fmt_bytestring; +#[cfg(feature = "alloc")] +use crate::sys_common::{AsInner, IntoInner}; +use core::fmt; +use core::mem; +use core::str; + +#[cfg(feature = "alloc")] +use alloc::borrow::Cow; +#[cfg(feature = "alloc")] +use alloc::boxed::Box; +#[cfg(feature = "alloc")] +use alloc::rc::Rc; +#[cfg(feature = "alloc")] +use alloc::string::String; +#[cfg(feature = "alloc")] +use alloc::sync::Arc; +#[cfg(feature = "alloc")] +use alloc::vec::Vec; + +#[cfg(all(feature = "alloc", feature = "toowned_clone_into"))] +use alloc::borrow::ToOwned; + +#[cfg(feature = "alloc")] +#[derive(Clone, Hash)] +pub(crate) struct Buf { + pub inner: Vec<u8>, +} + +// FIXME: +// `Buf::as_slice` current implementation relies +// on `Slice` being layout-compatible with `[u8]`. +// When attribute privacy is implemented, `Slice` should be annotated as `#[repr(transparent)]`. +// Anyway, `Slice` representation and layout are considered implementation detail, are +// not documented and must not be relied upon. +pub(crate) struct Slice { + pub inner: [u8], +} + +impl fmt::Debug for Slice { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + debug_fmt_bytestring(&self.inner, formatter) + } +} + +#[cfg(feature = "alloc")] +impl IntoInner<Vec<u8>> for Buf { + fn into_inner(self) -> Vec<u8> { + self.inner + } +} + +#[cfg(feature = "alloc")] +impl AsInner<[u8]> for Buf { + fn as_inner(&self) -> &[u8] { + &self.inner + } +} + +#[cfg(feature = "alloc")] +impl Buf { + pub fn from_string(s: String) -> Self { + Self { + inner: s.into_bytes(), + } + } + + #[inline] + pub fn with_capacity(capacity: usize) -> Self { + Buf { + inner: Vec::with_capacity(capacity), + } + } + + #[inline] + pub fn clear(&mut self) { + self.inner.clear() + } + + #[inline] + pub fn capacity(&self) -> usize { + self.inner.capacity() + } + + #[inline] + pub fn reserve(&mut self, additional: usize) { + self.inner.reserve(additional) + } + + #[inline] + pub fn reserve_exact(&mut self, additional: usize) { + self.inner.reserve_exact(additional) + } + + #[inline] + pub fn shrink_to_fit(&mut self) { + self.inner.shrink_to_fit() + } + + #[inline] + #[cfg(feature = "shrink_to")] + pub fn shrink_to(&mut self, min_capacity: usize) { + self.inner.shrink_to(min_capacity) + } + + #[inline] + pub fn as_slice(&self) -> &Slice { + // Safety: Slice just wraps [u8], + // and &*self.inner is &[u8], therefore + // transmuting &[u8] to &Slice is safe. + unsafe { mem::transmute(&*self.inner) } + } + + #[inline] + pub fn as_mut_slice(&mut self) -> &mut Slice { + // Safety: Slice just wraps [u8], + // and &mut *self.inner is &mut [u8], therefore + // transmuting &mut [u8] to &mut Slice is safe. + unsafe { mem::transmute(&mut *self.inner) } + } + + pub fn into_string(self) -> Result<String, Self> { + String::from_utf8(self.inner).map_err(|p| Self { + inner: p.into_bytes(), + }) + } + + pub fn push_slice(&mut self, s: &Slice) { + self.inner.extend_from_slice(&s.inner) + } + + #[inline] + pub fn into_box(self) -> Box<Slice> { + unsafe { mem::transmute(self.inner.into_boxed_slice()) } + } + + #[inline] + pub fn from_box(boxed: Box<Slice>) -> Self { + let inner: Box<[u8]> = unsafe { mem::transmute(boxed) }; + Self { + inner: inner.into_vec(), + } + } + + #[inline] + pub fn into_arc(&self) -> Arc<Slice> { + self.as_slice().into_arc() + } + + #[inline] + pub fn into_rc(&self) -> Rc<Slice> { + self.as_slice().into_rc() + } +} + +impl Slice { + #[inline] + fn from_u8_slice(s: &[u8]) -> &Self { + unsafe { mem::transmute(s) } + } + + #[inline] + pub fn from_str(s: &str) -> &Self { + Self::from_u8_slice(s.as_bytes()) + } + + pub fn to_str(&self) -> Option<&str> { + str::from_utf8(&self.inner).ok() + } + + #[cfg(feature = "alloc")] + pub fn to_string_lossy(&self) -> Cow<'_, str> { + String::from_utf8_lossy(&self.inner) + } + + #[cfg(feature = "alloc")] + pub fn to_owned(&self) -> Buf { + Buf { + inner: self.inner.to_vec(), + } + } + + #[cfg(all(feature = "alloc", feature = "toowned_clone_into"))] + pub fn clone_into(&self, buf: &mut Buf) { + self.inner.clone_into(&mut buf.inner) + } + + #[inline] + #[cfg(feature = "alloc")] + pub fn into_box(&self) -> Box<Self> { + let boxed: Box<[u8]> = self.inner.into(); + unsafe { mem::transmute(boxed) } + } + + #[cfg(feature = "alloc")] + pub fn empty_box() -> Box<Self> { + let boxed: Box<[u8]> = Default::default(); + unsafe { mem::transmute(boxed) } + } + + #[inline] + #[cfg(feature = "alloc")] + pub fn into_arc(&self) -> Arc<Self> { + let arc: Arc<[u8]> = Arc::from(&self.inner); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Self) } + } + + #[inline] + #[cfg(feature = "alloc")] + pub fn into_rc(&self) -> Rc<Self> { + let rc: Rc<[u8]> = Rc::from(&self.inner); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Self) } + } + + #[inline] + #[cfg(feature = "unixstring_ascii")] + pub fn make_ascii_lowercase(&mut self) { + self.inner.make_ascii_lowercase() + } + + #[inline] + #[cfg(feature = "unixstring_ascii")] + pub fn make_ascii_uppercase(&mut self) { + self.inner.make_ascii_uppercase() + } + + #[inline] + #[cfg(all(feature = "alloc", feature = "unixstring_ascii"))] + pub fn to_ascii_lowercase(&self) -> Buf { + Buf { + inner: self.inner.to_ascii_lowercase(), + } + } + + #[inline] + #[cfg(all(feature = "alloc", feature = "unixstring_ascii"))] + pub fn to_ascii_uppercase(&self) -> Buf { + Buf { + inner: self.inner.to_ascii_uppercase(), + } + } + + #[inline] + #[cfg(feature = "unixstring_ascii")] + pub fn is_ascii(&self) -> bool { + self.inner.is_ascii() + } + + #[inline] + #[cfg(feature = "unixstring_ascii")] + pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { + self.inner.eq_ignore_ascii_case(&other.inner) + } +} diff --git a/third_party/rust/unix_str/src/sys_common.rs b/third_party/rust/unix_str/src/sys_common.rs new file mode 100644 index 0000000000..c18994e4a2 --- /dev/null +++ b/third_party/rust/unix_str/src/sys_common.rs @@ -0,0 +1,39 @@ +//! Platform-independent platform abstraction +//! +//! This is the platform-independent portion of the standard library's +//! platform abstraction layer, whereas `std::sys` is the +//! platform-specific portion. +//! +//! The relationship between `std::sys_common`, `std::sys` and the +//! rest of `std` is complex, with dependencies going in all +//! directions: `std` depending on `sys_common`, `sys_common` +//! depending on `sys`, and `sys` depending on `sys_common` and `std`. +//! Ideally `sys_common` would be split into two and the dependencies +//! between them all would form a dag, facilitating the extraction of +//! `std::sys` from the standard library. + +pub mod bytestring; + +/// A trait for viewing representations from std types +#[doc(hidden)] +pub trait AsInner<Inner: ?Sized> { + fn as_inner(&self) -> &Inner; +} + +/// A trait for viewing representations from std types +#[doc(hidden)] +pub trait AsInnerMut<Inner: ?Sized> { + fn as_inner_mut(&mut self) -> &mut Inner; +} + +/// A trait for extracting representations from std types +#[doc(hidden)] +pub trait IntoInner<Inner> { + fn into_inner(self) -> Inner; +} + +/// A trait for creating std types from internal representations +#[doc(hidden)] +pub trait FromInner<Inner> { + fn from_inner(inner: Inner) -> Self; +} diff --git a/third_party/rust/unix_str/src/sys_common/bytestring.rs b/third_party/rust/unix_str/src/sys_common/bytestring.rs new file mode 100644 index 0000000000..ac6b7e893f --- /dev/null +++ b/third_party/rust/unix_str/src/sys_common/bytestring.rs @@ -0,0 +1,45 @@ +use crate::lossy::{Utf8Lossy, Utf8LossyChunk}; +use core::fmt::{Formatter, Result, Write}; + +pub fn debug_fmt_bytestring(slice: &[u8], f: &mut Formatter<'_>) -> Result { + // Writes out a valid unicode string with the correct escape sequences + fn write_str_escaped(f: &mut Formatter<'_>, s: &str) -> Result { + for c in s.chars().flat_map(|c| c.escape_debug()) { + f.write_char(c)? + } + Ok(()) + } + + f.write_str("\"")?; + for Utf8LossyChunk { valid, broken } in Utf8Lossy::from_bytes(slice).chunks() { + write_str_escaped(f, valid)?; + for b in broken { + write!(f, "\\x{:02X}", b)?; + } + } + f.write_str("\"") +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::fmt::{Debug, Formatter, Result}; + use alloc::format; + + #[test] + fn smoke() { + struct Helper<'a>(&'a [u8]); + + impl Debug for Helper<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + debug_fmt_bytestring(self.0, f) + } + } + + let input = b"\xF0hello,\tworld"; + let expected = r#""\xF0hello,\tworld""#; + let output = format!("{:?}", Helper(input)); + + assert!(output == expected); + } +} |