diff options
Diffstat (limited to 'vendor/byteyarn')
-rw-r--r-- | vendor/byteyarn/.cargo-checksum.json | 1 | ||||
-rw-r--r-- | vendor/byteyarn/Cargo.toml | 26 | ||||
-rw-r--r-- | vendor/byteyarn/LICENSE.md | 202 | ||||
-rw-r--r-- | vendor/byteyarn/README.md | 59 | ||||
-rw-r--r-- | vendor/byteyarn/rust-toolchain.toml | 3 | ||||
-rw-r--r-- | vendor/byteyarn/rustfmt.toml | 2 | ||||
-rw-r--r-- | vendor/byteyarn/src/boxed.rs | 714 | ||||
-rw-r--r-- | vendor/byteyarn/src/convert.rs | 248 | ||||
-rw-r--r-- | vendor/byteyarn/src/lib.rs | 113 | ||||
-rw-r--r-- | vendor/byteyarn/src/raw.rs | 469 | ||||
-rw-r--r-- | vendor/byteyarn/src/reffed.rs | 410 | ||||
-rw-r--r-- | vendor/byteyarn/src/utf8.rs | 151 |
12 files changed, 0 insertions, 2398 deletions
diff --git a/vendor/byteyarn/.cargo-checksum.json b/vendor/byteyarn/.cargo-checksum.json deleted file mode 100644 index 57d38528b..000000000 --- a/vendor/byteyarn/.cargo-checksum.json +++ /dev/null @@ -1 +0,0 @@ -{"files":{"Cargo.toml":"39d79ab0a70c48eb71cbe5005efb07e91ebdd33ea140e16fcecb637f47396c8c","LICENSE.md":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","README.md":"bd28680ccef99a774747ced223550280b6beebf0621d5c69804e5b338c405a62","rust-toolchain.toml":"eec2edd95c031d416738b93e3e243e96e2fee9eb72db010be08a07bc695f6c1b","rustfmt.toml":"98164c76a38c770fdaf8fcc7d787aab3ca9fe56326bfbe4ae4dac44ffaa3d0cf","src/boxed.rs":"fbeed4e00d5afcf7b996ed1bcbfca047ccd0ff293e0aeb515f078b7ee1f2dbd1","src/convert.rs":"ee8e1200fde41022d7d512cef9cfb55b6f708f15f5a73340352cdc18951c4ef8","src/lib.rs":"4073f25466d1efa5c1b4a48ec36f81f01109cb13829d026887b56fa6e18a6ebf","src/raw.rs":"025d32775afb4753aeea63dbc436addce03aedb1f3bea487381d573cee823552","src/reffed.rs":"ab67ace303a05ed7e33955e1cf888e8e8f540deb60446860d38cf9dae0d49bca","src/utf8.rs":"1beb18af4074e5d90dba2849724c870955682c040eb262a66499c98bd0f85032"},"package":"a7534301c0ea17abb4db06d75efc7b4b0fa360fce8e175a4330d721c71c942ff"}
\ No newline at end of file diff --git a/vendor/byteyarn/Cargo.toml b/vendor/byteyarn/Cargo.toml deleted file mode 100644 index 0fad5b067..000000000 --- a/vendor/byteyarn/Cargo.toml +++ /dev/null @@ -1,26 +0,0 @@ -# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO -# -# When uploading crates to the registry Cargo will automatically -# "normalize" Cargo.toml files for maximal compatibility -# with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies. -# -# If you are reading this file be aware that the original Cargo.toml -# will likely look very different (and much more reasonable). -# See Cargo.toml.orig for the original contents. - -[package] -edition = "2021" -name = "byteyarn" -version = "0.2.3" -authors = ["Miguel Young de la Sota <mcyoung@mit.edu>"] -description = "hyper-compact strings" -homepage = "https://github.com/mcy/byteyarn" -readme = "README.md" -keywords = [ - "string", - "text", - "binary", -] -license = "Apache-2.0" -repository = "https://github.com/mcy/byteyarn" diff --git a/vendor/byteyarn/LICENSE.md b/vendor/byteyarn/LICENSE.md deleted file mode 100644 index d64569567..000000000 --- a/vendor/byteyarn/LICENSE.md +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/vendor/byteyarn/README.md b/vendor/byteyarn/README.md deleted file mode 100644 index 34d096c78..000000000 --- a/vendor/byteyarn/README.md +++ /dev/null @@ -1,59 +0,0 @@ -# byteyarn - -`byteyarn` - Space-efficient byte strings π§Άπββ¬ - -A `Yarn` is a highly optimized string type that provides a number of -useful properties over `String`: - -* Always two pointers wide, so it is always passed into and out of functions - in registers. -* Small string optimization (SSO) up to 15 bytes on 64-bit architectures. -* Can be either an owned buffer or a borrowed buffer (like `Cow<str>`). -* Can be upcast to `'static` lifetime if it was constructed from a - known-static string. - -The main caveat is that `Yarn`s cannot be easily appended to, since they -do not track an internal capacity, and the slice returned by -`Yarn::as_slice()` does not have the same pointer stability properties as -`String` (these are rarely needed, though). - ---- - -Yarns are useful for situations in which a copy-on-write string is necessary -and most of the strings are relatively small. Although `Yarn` itself is -not `Copy`, there is a separate `YarnRef` type that is. These types -have equivalent representations, and can be cheaply cast between each other. - -The easiest way to create a yarn is with the `yarn!()` and `byarn!()` -macros, which are similar to `format!()`. - -```rust -// Create a new yarn via `fmt`ing. -let yarn = yarn!("Answer: {}", 42); - -// Convert that yarn into a reference. -let ry: YarnRef<str> = yarn.as_ref(); - -// Try up-casting the yarn into an "immortal yarn" without copying. -let copy: YarnRef<'static, str> = ry.immortalize().unwrap(); - -assert_eq!(yarn, copy); -``` - -Yarns are intended for storing text, either as UTF-8 or as -probably-UTF-8 bytes; `Yarn<str>` and `Yarn<[u8]>` serve these purposes, -and can be inter-converted with each other. The `Yarn::utf8_chunks()` -function can be used to iterate over definitely-valid-UTF-8 chunks within -a string. - -Both kinds of yarns can be `Debug`ed and `Display`ed, and will print out as -strings would. In particular, invalid UTF-8 is converted into either `\xNN` -escapes or replacement characters (for `Debug` and `Display` respectively). - -```rust -let invalid = ByteYarn::from_byte(0xff); -assert_eq!(format!("{invalid:?}"), r#""\xFF""#); -assert_eq!(format!("{invalid}"), "οΏ½"); -``` - -License: Apache-2.0 diff --git a/vendor/byteyarn/rust-toolchain.toml b/vendor/byteyarn/rust-toolchain.toml deleted file mode 100644 index f0d51c34a..000000000 --- a/vendor/byteyarn/rust-toolchain.toml +++ /dev/null @@ -1,3 +0,0 @@ -[toolchain] -channel = "1.66.0" -profile = "default" diff --git a/vendor/byteyarn/rustfmt.toml b/vendor/byteyarn/rustfmt.toml deleted file mode 100644 index 5ca604090..000000000 --- a/vendor/byteyarn/rustfmt.toml +++ /dev/null @@ -1,2 +0,0 @@ -max_width = 80 -tab_spaces = 2
\ No newline at end of file diff --git a/vendor/byteyarn/src/boxed.rs b/vendor/byteyarn/src/boxed.rs deleted file mode 100644 index c14beae88..000000000 --- a/vendor/byteyarn/src/boxed.rs +++ /dev/null @@ -1,714 +0,0 @@ -use std::cmp::Ordering; -use std::fmt; -use std::hash::Hash; -use std::hash::Hasher; -use std::marker::PhantomData; -use std::mem; -use std::ops::Deref; -use std::str; -use std::str::Utf8Error; - -use crate::raw::RawYarn; -use crate::Utf8Chunks; -use crate::YarnRef; - -#[cfg(doc)] -use crate::*; - -/// An optimized, possibly heap-allocated string type. -/// -/// This is the core data structure of `byteyarn`. It is a string that can be -/// borrowed, boxed, or inlined. Generally, you'll want to use the [`Yarn`] -/// or [`ByteYarn`] type aliases directly, instead. -/// -/// The lifetime `'a` is the shortest lifetime this yarn can borrow for; often, -/// this will be `'static`. -/// -/// See the [crate documentation](crate) for general information. -#[repr(transparent)] -pub struct YarnBox<'a, Buf = [u8]> -where - Buf: crate::Buf + ?Sized, -{ - raw: RawYarn, - _ph: PhantomData<&'a Buf>, -} - -impl<'a, Buf> YarnBox<'a, Buf> -where - Buf: crate::Buf + ?Sized, -{ - /// Returns a reference to an empty yarn of any lifetime. - /// - /// ``` - /// # use byteyarn::*; - /// let empty: &Yarn = Yarn::empty(); - /// assert_eq!(empty, ""); - /// ``` - /// - /// This will also be found by the `Default` impl for `&YarnBox`. - pub fn empty<'b>() -> &'b Self { - unsafe { - // SAFETY: YarnBox is a transparent wrapper over RawYarn; even though - // YarnBox has a destructor, this is fine, because this lifetime is 'static - // and will thus never run a destructor. - mem::transmute::<&'b RawYarn, &'b Self>(RawYarn::empty()) - } - } - - /// Returns a yarn pointing to the given slice, without copying. - /// - /// ``` - /// # use byteyarn::*; - /// let foo = Yarn::new("Byzantium"); - /// assert_eq!(foo.len(), 9); - /// ``` - pub const fn new(buf: &'a Buf) -> Self { - YarnRef::new(buf).to_box() - } - - /// Returns a new yarn containing the contents of the given slice. - /// This function will always return an inlined string, or `None` if the - /// given buffer is too big. - /// - /// Note that the maximum inlined size is architecture-dependent. - /// - /// ``` - /// # use byteyarn::*; - /// let smol = Yarn::inlined("smol"); - /// assert_eq!(smol.unwrap(), "smol"); - /// - /// let big = Yarn::inlined("biiiiiiiiiiiiiiig"); - /// assert!(big.is_none()); - /// ``` - pub const fn inlined(buf: &Buf) -> Option<Self> { - match YarnRef::inlined(buf) { - Some(y) => Some(y.to_box()), - None => None, - } - } - - /// Returns a new yarn that aliases the contents of this yarn. - /// - /// In effect, this is like `Copy`ing out of `*self`, by shortening the - /// lifetime of the yarn. - /// - /// ``` - /// # use byteyarn::*; - /// /// Joins two yarns with "and", but re-uses the buffer if one of them is - /// /// `None`. - /// fn and<'a>(a: Option<&'a YarnBox<str>>, b: Option<&'a YarnBox<str>>) -> YarnBox<'a, str> { - /// match (a, b) { - /// (Some(a), Some(b)) => yarn!("{a} and {b}"), - /// (Some(a), None) => a.aliased(), - /// (None, Some(b)) => b.aliased(), - /// (None, None) => Yarn::default(), - /// } - /// } - /// - /// assert_eq!(and(Some(&yarn!("apples")), Some(&yarn!("oranges"))), "apples and oranges"); - /// assert_eq!(and(Some(&yarn!("apples")), None), "apples"); - /// assert_eq!(and(None, None), ""); - /// ``` - /// - /// This function will be found by `From` impls from `&YarnBox`. - /// - /// Note also that unlike `YarnBox::new(y.as_ref())`, this will ensure the - /// yarn remembers that it's a static string. - /// - /// ``` - /// # use byteyarn::*; - /// use std::ptr; - /// - /// let lit = Yarn::from_static("nice long static string constant"); - /// - /// // Immortalizing the aliased yarn does not require a new heap allocation. - /// assert!(ptr::eq(lit.aliased().immortalize().as_slice(), lit.as_slice())); - /// - /// // We forgot this yarn was static, so immortalization requires a copy. - /// assert!(!ptr::eq(YarnBox::<str>::new(&lit).immortalize().as_slice(), lit.as_slice())); - /// ``` - pub const fn aliased(&self) -> YarnBox<Buf> { - // NOTE: going through YarnRef will ensure we preserve static-ness. - self.as_ref().to_box() - } - - /// Returns a yarn containing a single UTF-8-encoded Unicode scalar. - /// This function does not allocate: every `char` fits in an inlined yarn. - /// - /// ``` - /// # use byteyarn::*; - /// let a = Yarn::from_char('a'); - /// assert_eq!(a, "a"); - /// ``` - pub const fn from_char(c: char) -> Self { - YarnRef::<Buf>::from_char(c).to_box() - } - - /// Returns a yarn by taking ownership of an allocation. - /// - /// ``` - /// # use byteyarn::*; - /// let str = String::from("big string box").into_boxed_str(); - /// let yarn = Yarn::from_boxed_str(str); - /// assert_eq!(yarn, "big string box"); - /// ``` - pub fn from_boxed_str(string: Box<str>) -> Self { - let raw = RawYarn::from_heap(string.into()); - unsafe { - // SAFETY: both [u8] and str can be safely constructed from a str. We have - // unique ownership of raw's allocation because from_heap guarantees it. - Self::from_raw(raw) - } - } - - /// Returns a yarn by taking ownership of an allocation. - /// - /// ``` - /// # use byteyarn::*; - /// let str = String::from("big string box"); - /// let yarn = Yarn::from_string(str); - /// assert_eq!(yarn, "big string box"); - /// ``` - pub fn from_string(string: String) -> Self { - Self::from_boxed_str(string.into()) - } - - /// Checks whether this yarn is empty. - /// - /// ``` - /// # use byteyarn::*; - /// assert!(yarn!("").is_empty()); - /// assert!(!yarn!("xyz").is_empty()); - /// ``` - pub const fn is_empty(&self) -> bool { - self.as_ref().is_empty() - } - - /// Returns the length of this yarn, in bytes. - /// - /// ``` - /// # use byteyarn::*; - /// assert_eq!(yarn!("").len(), 0); - /// assert_eq!(yarn!("42").len(), 2); - /// assert_eq!(yarn!("η«").len(), 3); - /// assert_eq!(yarn!("πββ¬").len(), 10); - /// - /// assert_eq!(ByteYarn::new(b"").len(), 0); - /// assert_eq!(ByteYarn::new(b"xyz").len(), 3); - /// assert_eq!(ByteYarn::new(&[1, 2, 3]).len(), 3); - /// ``` - pub const fn len(&self) -> usize { - self.as_ref().len() - } - - /// Converts this yarn into a slice. - /// - /// ``` - /// # use byteyarn::*; - /// let yarn = yarn!("jellybeans"); - /// let s: &str = yarn.as_slice(); - /// assert_eq!(s, "jellybeans"); - /// - /// let yarn = ByteYarn::new(b"jellybeans"); - /// let s: &[u8] = yarn.as_slice(); - /// assert_eq!(s, b"jellybeans"); - /// ``` - pub const fn as_slice(&self) -> &Buf { - unsafe { - // SAFETY: converting back to buf from raw is ok here because this is - // evidently a round-trip. - YarnRef::raw2buf(self.as_bytes()) - } - } - - /// Converts this owning yarn into a reference yarn. - /// - /// ``` - /// # use byteyarn::*; - /// let yarn = yarn!("jellybeans"); - /// let ry = yarn.as_ref(); - /// assert_eq!(ry, "jellybeans"); - /// ``` - pub const fn as_ref(&self) -> YarnRef<Buf> { - if let Some(inl) = YarnRef::inlined(self.as_slice()) { - return inl; - } - - let raw = match self.raw.on_heap() { - true => unsafe { - // SAFETY: The returned YarnRef will prevent self from being used - // until this raw yarn goes away, because it borrows self. - RawYarn::alias_slice(self.as_bytes()) - }, - false => self.raw, - }; - - unsafe { - // SAFETY: The lifetime of the output is shorter than that of - // the input, so raw is valid for a yarn reference. Even in the case - // that self.on_heap, the aliased slice will not outlive the &self of - // this function. - YarnRef::from_raw(raw) - } - } - - /// Converts this owning yarn into a reference yarn, with the same lifetime - /// as this yarn. - /// - /// Note that if this yarn is on the heap, this function will return `None`. - /// - /// ``` - /// # use byteyarn::*; - /// let yarn = yarn!("lots and lots of jellybeans"); - /// assert_eq!(yarn.to_ref().unwrap(), "lots and lots of jellybeans"); - /// - /// let boxed = Yarn::from_string(String::from("lots and lots of jellybeans")); - /// assert!(boxed.to_ref().is_none()); - /// ``` - pub const fn to_ref(&self) -> Option<YarnRef<'a, Buf>> { - if self.raw.on_heap() { - return None; - } - - unsafe { - // SAFETY: The lifetime of the output is equal than that of - // the input, so raw is valid for a yarn reference. We have excluded the - // on_heap case above. - Some(YarnRef::from_raw(self.raw)) - } - } - - /// Converts this yarn into a byte slice. - /// ``` - /// # use byteyarn::*; - /// assert_eq!(yarn!("").as_bytes(), b""); - /// assert_eq!(yarn!("η«").as_bytes(), b"\xE7\x8C\xAB"); - /// - /// assert_eq!(ByteYarn::new(b"xyz").as_bytes(), b"xyz"); - /// assert_eq!(ByteYarn::new(&[1, 2, 3]).as_bytes(), [1, 2, 3]); - /// ``` - pub const fn as_bytes(&self) -> &[u8] { - self.raw.as_slice() - } - - /// Converts this yarn into a boxed slice, potentially by copying it. - /// - /// ``` - /// # use byteyarn::*; - /// let boxed = yarn!("jellybeans").into_boxed_bytes(); - /// assert_eq!(&boxed[..], b"jellybeans"); - /// ``` - pub fn into_boxed_bytes(self) -> Box<[u8]> { - let mut raw = self.into_raw(); - if !raw.on_heap() { - return raw.as_slice().into(); - } - - unsafe { - // SAFETY: raw is guaranteed to be on the heap, so this slice is on the - // heap with the correct layout; because we called into_raw(), this - // reference is uniquely owned. - Box::from_raw(raw.as_mut_slice()) - } - } - - /// Converts this yarn into a vector, potentially by copying it. - /// - /// ``` - /// # use byteyarn::*; - /// let mut vec = ByteYarn::new(b"jellybeans").into_vec(); - /// vec.extend_from_slice(b" & KNUCKLES"); - /// let yarn = ByteYarn::from_vec(vec); - /// - /// assert_eq!(yarn, b"jellybeans & KNUCKLES"); - /// ``` - pub fn into_vec(self) -> Vec<u8> { - self.into_boxed_bytes().into() - } - - /// Converts this yarn into a byte yarn. - pub const fn into_bytes(self) -> YarnBox<'a, [u8]> { - unsafe { - // SAFETY: The lifetimes are the same, and [u8] is constructible from - // either a [u8] or str, so this is just weakening the user-facing type. - YarnBox::from_raw(self.into_raw()) - } - } - - /// Extends the lifetime of this yarn if this yarn is dynamically known to - /// point to immortal memory. - /// - /// If it doesn't, the contents are copied into a fresh heap allocation. - /// - /// ``` - /// # use byteyarn::*; - /// let bytes = Vec::from(*b"crunchcrunchcrunch"); - /// let yarn = YarnBox::new(&*bytes); - /// - /// let immortal: ByteYarn = yarn.immortalize(); - /// drop(bytes); // Show that yarn continues to exist despite `bytes` going - /// // away. - /// - /// assert_eq!(immortal, b"crunchcrunchcrunch"); - /// ``` - pub fn immortalize(self) -> YarnBox<'static, Buf> { - if self.raw.is_immortal() { - unsafe { - // SAFETY: We just validated that this raw is in fact suitable for use - // with 'static lifetime, and all this cast is doing is extending the - // lifetime on self. - return YarnBox::from_raw(self.into_raw()); - } - } - - let raw = RawYarn::copy_slice(self.as_bytes()); - unsafe { - // SAFETY: RawYarn::copy_slice always returns an immortal, uniquely-owned - // value. - YarnBox::from_raw(raw) - } - } - - /// Returns a yarn consisting of the concatenation of the given slices. - /// - /// Does not allocate if the resulting concatenation can be inlined. - /// - /// ``` - /// # use byteyarn::*; - /// let yarn = Yarn::concat(&["foo", "bar", "baz"]); - /// assert_eq!(yarn, "foobarbaz"); - /// ``` - pub fn concat(bufs: &[impl AsRef<Buf>]) -> Self { - let total_len = bufs - .iter() - .map(|b| YarnRef::buf2raw(b.as_ref()).len()) - .sum(); - let iter = bufs.iter().map(|b| YarnRef::buf2raw(b.as_ref())); - - unsafe { Self::from_raw(RawYarn::concat(total_len, iter)) } - } - - /// Tries to inline this yarn, if it's small enough. - /// - /// This operation has no directly visible side effects, and is only intended - /// to provide a way to relieve memory pressure. In general, you should not - /// have to call this function directly. - pub fn inline_in_place(&mut self) { - if let Some(inlined) = Self::inlined(self.as_slice()) { - *self = inlined; - } - } - - /// Leaks any heap allocation associated with this yarn. - /// - /// The allocation is tagged as "static", so upcasting via - /// [`Yarn::immortalize()`] will not need to reallocate. - pub fn leak(&mut self) { - if !self.raw.on_heap() { - return; - } - - unsafe { - // SAFETY: We have unique ownership of this yarn, and we know it's HEAP, - // so updating the tag from HEAP to STATIC will not change anything - // except to make it immutable and to inhibit the destructor. - self.raw = RawYarn::from_ptr_len_tag( - self.as_bytes().as_ptr(), - self.len(), - RawYarn::STATIC, - ); - } - } - - /// Returns an iterator over the UTF-8 (or otherwise) chunks in this string. - /// - /// This iterator is also used for the `Debug` and `Display` formatter - /// implementations. - /// - /// ``` - /// # use byteyarn::*; - /// let yarn = ByteYarn::new(b"abc\xFF\xFE\xFF\xF0\x9F\x90\x88\xE2\x80\x8D\xE2\xAC\x9B!"); - /// let chunks = yarn.utf8_chunks().collect::<Vec<_>>(); - /// assert_eq!(chunks, [ - /// Ok("abc"), - /// Err(&[0xff][..]), - /// Err(&[0xfe][..]), - /// Err(&[0xff][..]), - /// Ok("πββ¬!"), - /// ]); - /// - /// assert_eq!(format!("{yarn:?}"), r#""abc\xFF\xFE\xFFπ\u{200d}β¬!""#); - /// assert_eq!(format!("{yarn}"), "abcοΏ½οΏ½οΏ½πββ¬!"); - /// ``` - pub fn utf8_chunks(&self) -> Utf8Chunks { - Utf8Chunks::new(self.as_bytes()) - } - - /// Returns a new yarn wrapping the given raw yarn. - /// - /// # Safety - /// - /// If `raw` is aliased, its lifetime must not be shorter than 'a. - /// - /// If `raw` is heap-allocated, no other yarn must be holding it. - pub(crate) const unsafe fn from_raw(raw: RawYarn) -> Self { - Self { - raw, - _ph: PhantomData, - } - } - - /// Consumes self, inhibits the destructor, and returns the raw yarn. - pub(crate) const fn into_raw(self) -> RawYarn { - let raw = self.raw; - mem::forget(self); - raw - } -} - -impl<Buf> YarnBox<'static, Buf> -where - Buf: crate::Buf + ?Sized, -{ - /// Returns a yarn pointing to the given slice, without copying. This function - /// has the benefit of creating a yarn that remembers that it came from a - /// static string, meaning that it can be dynamically upcast back to a - /// `'static` lifetime. - /// - /// This function will *not* be found by `From` impls. - pub const fn from_static(buf: &'static Buf) -> Self { - YarnRef::from_static(buf).to_box() - } -} - -impl<'a> YarnBox<'a, [u8]> { - /// Returns a yarn containing a single byte, without allocating. - /// - /// ``` - /// # use byteyarn::*; - /// let a = ByteYarn::from_byte(0x20); - /// assert_eq!(a, b" "); - /// ``` - pub const fn from_byte(c: u8) -> Self { - YarnRef::from_byte(c).to_box() - } - - /// Returns a yarn by taking ownership of the given allocation. - /// - /// ``` - /// # use byteyarn::*; - /// let str = Box::new([0xf0, 0x9f, 0x90, 0x88, 0xe2, 0x80, 0x8d, 0xe2, 0xac, 0x9b]); - /// let yarn = ByteYarn::from_boxed_bytes(str); - /// assert_eq!(yarn, "πββ¬".as_bytes()); - /// ``` - pub fn from_boxed_bytes(bytes: Box<[u8]>) -> Self { - let raw = RawYarn::from_heap(bytes); - unsafe { Self::from_raw(raw) } - } - - /// Returns a yarn by taking ownership of the given allocation. - /// - /// ``` - /// # use byteyarn::*; - /// let str = vec![0xf0, 0x9f, 0x90, 0x88, 0xe2, 0x80, 0x8d, 0xe2, 0xac, 0x9b]; - /// let yarn = ByteYarn::from_vec(str); - /// assert_eq!(yarn, "πββ¬".as_bytes()); - /// ``` - pub fn from_vec(bytes: Vec<u8>) -> Self { - Self::from_boxed_bytes(bytes.into_boxed_slice()) - } - - /// Tries to convert this yarn into a UTF-8 yarn via [`str::from_utf8()`]. - /// - /// ``` - /// # use byteyarn::*; - /// let yarn = ByteYarn::new(&[0xf0, 0x9f, 0x90, 0x88, 0xe2, 0x80, 0x8d, 0xe2, 0xac, 0x9b]); - /// assert_eq!(yarn.to_utf8().unwrap(), "πββ¬"); - /// - /// assert!(ByteYarn::from_byte(0xff).to_utf8().is_err()); - /// ``` - pub fn to_utf8(self) -> Result<YarnBox<'a, str>, Utf8Error> { - self.to_utf8_or_bytes().map_err(|(_, e)| e) - } - - /// Tries to convert this yarn into a UTF-8 yarn via [`str::from_utf8()`]. - /// - /// If conversion fails, the original yarn is returned with the error. - /// - /// ``` - /// # use byteyarn::*; - /// let blob = ByteYarn::new(&[0xff; 5]); - /// let (bad, _) = blob.to_utf8_or_bytes().unwrap_err(); - /// - /// assert_eq!(bad, &[0xff; 5]); - /// ``` - pub fn to_utf8_or_bytes(self) -> Result<YarnBox<'a, str>, (Self, Utf8Error)> { - if let Err(e) = str::from_utf8(self.as_bytes()) { - return Err((self, e)); - } - unsafe { Ok(YarnBox::from_raw(self.into_raw())) } - } - - /// Returns a mutable reference into this yarn's internal buffer. - /// - /// If the buffer is not uniquely owned (e.g., it is an alias of some other - /// buffer or a string constant) this function will first perform a copy and - /// possibly a heap allocation. - /// - /// ``` - /// # use byteyarn::*; - /// let mut yarn = ByteYarn::new(b"const but very long"); - /// assert!(yarn.try_mut().is_none()); - /// - /// let mut smol = ByteYarn::new(b"smol const"); - /// smol.try_mut().unwrap()[3] = b'g'; - /// assert_eq!(smol, b"smog const"); - /// ``` - pub fn try_mut(&mut self) -> Option<&mut [u8]> { - self.inline_in_place(); - if !self.raw.on_heap() && !self.raw.is_small() { - return None; - } - - Some(self.as_mut()) - } - - /// Returns a mutable reference into this yarn's internal buffer. - /// - /// If the buffer is not uniquely owned (e.g., it is an alias of some other - /// buffer or a string constant) this function will first perform a copy and - /// possibly a heap allocation. - /// - /// ``` - /// # use byteyarn::*; - /// let mut yarn = ByteYarn::new(b"const but very long"); - /// yarn.as_mut()[17] = b'_'; - /// assert_eq!(yarn, b"const but very lo_g"); - /// ``` - #[allow(clippy::should_implement_trait)] - pub fn as_mut(&mut self) -> &mut [u8] { - self.inline_in_place(); - if !self.raw.on_heap() && !self.raw.is_small() { - *self = Self::from_boxed_bytes(mem::take(self).into_boxed_bytes()); - } - - unsafe { self.raw.as_mut_slice() } - } -} - -impl YarnBox<'_, str> { - /// Builds a new yarn from the given formatting arguments - /// (see [`format_args!()`]), allocating only when absolutely necessary. - /// - /// In general, you'll want to use the [`yarn!()`] macro, instead. - pub fn from_fmt(args: fmt::Arguments) -> Self { - unsafe { YarnBox::from_raw(RawYarn::from_fmt_args(args)) } - } - - /// Converts this yarn into a string slice. - pub fn as_str(&self) -> &str { - self.as_slice() - } - - /// Converts this yarn into a boxed slice, potentially by copying it. - pub fn into_boxed_str(self) -> Box<str> { - self.into_string().into() - } - - /// Converts this yarn into a string, potentially by copying it. - pub fn into_string(self) -> String { - unsafe { String::from_utf8_unchecked(self.into_vec()) } - } -} - -impl<Buf> Deref for YarnBox<'_, Buf> -where - Buf: crate::Buf + ?Sized, -{ - type Target = Buf; - fn deref(&self) -> &Buf { - self.as_slice() - } -} - -impl<Buf> Drop for YarnBox<'_, Buf> -where - Buf: crate::Buf + ?Sized, -{ - fn drop(&mut self) { - unsafe { self.raw.destroy() } - } -} - -impl<Buf> Clone for YarnBox<'_, Buf> -where - Buf: crate::Buf + ?Sized, -{ - fn clone(&self) -> Self { - if let Some(yr) = self.to_ref() { - return yr.to_box(); - } - - let copy = RawYarn::copy_slice(self.as_bytes()); - unsafe { Self::from_raw(copy) } - } -} - -impl<Buf: crate::Buf + ?Sized> fmt::Debug for YarnBox<'_, Buf> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Debug::fmt(&self.as_ref(), f) - } -} - -impl<Buf: crate::Buf + ?Sized> fmt::Display for YarnBox<'_, Buf> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&self.as_ref(), f) - } -} - -impl<Slice, Buf> PartialEq<Slice> for YarnBox<'_, Buf> -where - Buf: crate::Buf + ?Sized, - Slice: AsRef<Buf> + ?Sized, -{ - fn eq(&self, that: &Slice) -> bool { - self.as_slice() == that.as_ref() - } -} - -impl<Buf: crate::Buf + Eq + ?Sized> Eq for YarnBox<'_, Buf> {} - -impl<Slice, Buf> PartialOrd<Slice> for YarnBox<'_, Buf> -where - Buf: crate::Buf + ?Sized, - Slice: AsRef<Buf> + ?Sized, -{ - fn partial_cmp(&self, that: &Slice) -> Option<Ordering> { - self.as_slice().partial_cmp(that.as_ref()) - } -} - -impl<Buf: crate::Buf + ?Sized> Ord for YarnBox<'_, Buf> { - fn cmp(&self, that: &Self) -> Ordering { - self.as_slice().cmp(that.as_slice()) - } -} - -impl<Buf: crate::Buf + ?Sized> Hash for YarnBox<'_, Buf> { - fn hash<H: Hasher>(&self, state: &mut H) { - self.as_slice().hash(state) - } -} - -impl<Buf: crate::Buf + ?Sized> Default for YarnBox<'_, Buf> { - fn default() -> Self { - <&Self>::default().clone() - } -} - -impl<Buf: crate::Buf + ?Sized> Default for &YarnBox<'_, Buf> { - fn default() -> Self { - YarnBox::empty() - } -} diff --git a/vendor/byteyarn/src/convert.rs b/vendor/byteyarn/src/convert.rs deleted file mode 100644 index 2c4ea5357..000000000 --- a/vendor/byteyarn/src/convert.rs +++ /dev/null @@ -1,248 +0,0 @@ -use std::borrow::Borrow; -use std::fmt; -use std::str::Utf8Error; - -use crate::YarnBox; -use crate::YarnRef; - -#[derive(Clone, Debug)] -pub struct NonCopy(()); - -impl fmt::Display for NonCopy { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str("cannot convert yarn to non-owning yarn") - } -} - -impl<'a, Buf> TryFrom<YarnBox<'a, Buf>> for YarnRef<'a, Buf> -where - Buf: crate::Buf + ?Sized, -{ - type Error = NonCopy; - - fn try_from(y: YarnBox<'a, Buf>) -> Result<Self, NonCopy> { - y.to_ref().ok_or(NonCopy(())) - } -} - -impl<'a> TryFrom<YarnBox<'a, [u8]>> for YarnBox<'a, str> { - type Error = Utf8Error; - - fn try_from(y: YarnBox<'a, [u8]>) -> Result<Self, Utf8Error> { - y.to_utf8() - } -} - -impl<'a> TryFrom<YarnRef<'a, [u8]>> for YarnRef<'a, str> { - type Error = Utf8Error; - - fn try_from(y: YarnRef<'a, [u8]>) -> Result<Self, Utf8Error> { - y.to_utf8() - } -} - -impl<'a> From<YarnBox<'a, str>> for YarnBox<'a, [u8]> { - fn from(y: YarnBox<'a, str>) -> Self { - y.into_bytes() - } -} - -impl<'a> From<YarnRef<'a, str>> for YarnRef<'a, [u8]> { - fn from(y: YarnRef<'a, str>) -> Self { - y.into_bytes() - } -} - -impl From<u8> for YarnBox<'_, [u8]> { - fn from(c: u8) -> Self { - Self::from_byte(c) - } -} - -impl From<u8> for YarnRef<'_, [u8]> { - fn from(c: u8) -> Self { - Self::from_byte(c) - } -} - -impl<Buf> From<char> for YarnBox<'_, Buf> -where - Buf: crate::Buf + ?Sized, -{ - fn from(c: char) -> Self { - Self::from_char(c) - } -} - -impl<Buf> From<char> for YarnRef<'_, Buf> -where - Buf: crate::Buf + ?Sized, -{ - fn from(c: char) -> Self { - Self::from_char(c) - } -} - -impl<'a, Buf> From<&'a Buf> for YarnBox<'a, Buf> -where - Buf: crate::Buf + ?Sized, -{ - fn from(s: &'a Buf) -> Self { - Self::new(s) - } -} - -impl<'a, Buf> From<&'a YarnBox<'_, Buf>> for YarnBox<'a, Buf> -where - Buf: crate::Buf + ?Sized, -{ - fn from(s: &'a YarnBox<'a, Buf>) -> Self { - s.aliased() - } -} - -impl<'a, Buf> From<&'a YarnBox<'_, Buf>> for YarnRef<'a, Buf> -where - Buf: crate::Buf + ?Sized, -{ - fn from(s: &'a YarnBox<'a, Buf>) -> Self { - s.as_ref() - } -} - -impl<'a, Buf> From<&'a Buf> for YarnRef<'a, Buf> -where - Buf: crate::Buf + ?Sized, -{ - fn from(s: &'a Buf) -> Self { - Self::new(s) - } -} - -impl From<Box<[u8]>> for YarnBox<'_, [u8]> { - fn from(s: Box<[u8]>) -> Self { - Self::from_boxed_bytes(s) - } -} - -impl From<Vec<u8>> for YarnBox<'_, [u8]> { - fn from(s: Vec<u8>) -> Self { - Self::from_vec(s) - } -} - -impl<Buf> From<Box<str>> for YarnBox<'_, Buf> -where - Buf: crate::Buf + ?Sized, -{ - fn from(s: Box<str>) -> Self { - Self::from_boxed_str(s) - } -} - -impl<Buf> From<String> for YarnBox<'_, Buf> -where - Buf: crate::Buf + ?Sized, -{ - fn from(s: String) -> Self { - Self::from_string(s) - } -} - -impl<Buf> From<YarnBox<'_, Buf>> for Box<[u8]> -where - Buf: crate::Buf + ?Sized, -{ - fn from(y: YarnBox<Buf>) -> Self { - y.into_boxed_bytes() - } -} - -impl<Buf> From<YarnRef<'_, Buf>> for Box<[u8]> -where - Buf: crate::Buf + ?Sized, -{ - fn from(y: YarnRef<Buf>) -> Self { - y.to_boxed_bytes() - } -} - -impl<Buf> From<YarnBox<'_, Buf>> for Vec<u8> -where - Buf: crate::Buf + ?Sized, -{ - fn from(y: YarnBox<Buf>) -> Self { - y.into_vec() - } -} - -impl<Buf> From<YarnRef<'_, Buf>> for Vec<u8> -where - Buf: crate::Buf + ?Sized, -{ - fn from(y: YarnRef<Buf>) -> Self { - y.to_vec() - } -} - -impl From<YarnBox<'_, str>> for Box<str> { - fn from(y: YarnBox<str>) -> Self { - y.into_boxed_str() - } -} - -impl From<YarnRef<'_, str>> for Box<str> { - fn from(y: YarnRef<str>) -> Self { - y.to_boxed_str() - } -} - -impl From<YarnBox<'_, str>> for String { - fn from(y: YarnBox<str>) -> Self { - y.into_string() - } -} - -impl From<YarnRef<'_, str>> for String { - fn from(y: YarnRef<str>) -> Self { - y.to_string() - } -} - -// AsRef / Borrow - -impl<Buf> AsRef<Buf> for YarnBox<'_, Buf> -where - Buf: crate::Buf + ?Sized, -{ - fn as_ref(&self) -> &Buf { - self.as_slice() - } -} - -impl<Buf> AsRef<Buf> for YarnRef<'_, Buf> -where - Buf: crate::Buf + ?Sized, -{ - fn as_ref(&self) -> &Buf { - self.as_slice() - } -} - -impl<Buf> Borrow<Buf> for YarnBox<'_, Buf> -where - Buf: crate::Buf + ?Sized, -{ - fn borrow(&self) -> &Buf { - self.as_slice() - } -} - -impl<Buf> Borrow<Buf> for YarnRef<'_, Buf> -where - Buf: crate::Buf + ?Sized, -{ - fn borrow(&self) -> &Buf { - self.as_slice() - } -} diff --git a/vendor/byteyarn/src/lib.rs b/vendor/byteyarn/src/lib.rs deleted file mode 100644 index 38dfc111c..000000000 --- a/vendor/byteyarn/src/lib.rs +++ /dev/null @@ -1,113 +0,0 @@ -//! `byteyarn` - Space-efficient byte strings π§Άπββ¬ -//! -//! A [`Yarn`] is a highly optimized string type that provides a number of -//! useful properties over [`String`]: -//! -//! * Always two pointers wide, so it is always passed into and out of functions -//! in registers. -//! * Small string optimization (SSO) up to 15 bytes on 64-bit architectures. -//! * Can be either an owned buffer or a borrowed buffer (like [`Cow<str>`]). -//! * Can be upcast to `'static` lifetime if it was constructed from a -//! known-static string. -//! -//! The main caveat is that [`Yarn`]s cannot be easily appended to, since they -//! do not track an internal capacity, and the slice returned by -//! [`Yarn::as_slice()`] does not have the same pointer stability properties as -//! [`String`] (these are rarely needed, though). -//! -//! --- -//! -//! Yarns are useful for situations in which a copy-on-write string is necessary -//! and most of the strings are relatively small. Although [`Yarn`] itself is -//! not [`Copy`], there is a separate [`YarnRef`] type that is. These types -//! have equivalent representations, and can be cheaply cast between each other. -//! -//! The easiest way to create a yarn is with the [`yarn!()`] -//! macro, which is similar to [`format!()`]. -//! -//! ``` -//! # use byteyarn::*; -//! // Create a new yarn via `fmt`ing. -//! let yarn = yarn!("Answer: {}", 42); -//! -//! // Convert that yarn into a reference. -//! let ry: YarnRef<str> = yarn.as_ref(); -//! -//! // Try up-casting the yarn into an "immortal yarn" without copying. -//! let copy: YarnRef<'static, str> = ry.immortalize().unwrap(); -//! -//! assert_eq!(yarn, copy); -//! ``` -//! -//! Yarns are intended for storing text, either as UTF-8 or as -//! probably-UTF-8 bytes; [`Yarn<str>`] and [`Yarn<[u8]>`] serve these purposes, -//! and can be inter-converted with each other. The [`Yarn::utf8_chunks()`] -//! function can be used to iterate over definitely-valid-UTF-8 chunks within -//! a string. -//! -//! Both kinds of yarns can be `Debug`ed and `Display`ed, and will print out as -//! strings would. In particular, invalid UTF-8 is converted into either `\xNN` -//! escapes or replacement characters (for `Debug` and `Display` respectively). -//! -//! ``` -//! # use byteyarn::*; -//! let invalid = ByteYarn::from_byte(0xff); -//! assert_eq!(format!("{invalid:?}"), r#""\xFF""#); -//! assert_eq!(format!("{invalid}"), "οΏ½"); -//! ``` - -#![deny(missing_docs)] - -use std::hash::Hash; - -#[cfg(doc)] -use std::borrow::Cow; - -mod boxed; -mod convert; -mod raw; -mod reffed; -mod utf8; - -pub use boxed::YarnBox; -pub use reffed::YarnRef; -pub use utf8::Utf8Chunks; - -mod z { - pub trait Sealed {} -} - -// Macro stuff. -#[doc(hidden)] -pub mod m { - pub extern crate std; -} - -/// A trait for abstracting over `str` and `[u8]`. -pub trait Buf: z::Sealed + Eq + Ord + Hash {} - -impl z::Sealed for [u8] {} -impl z::Sealed for str {} - -impl Buf for [u8] {} -impl Buf for str {} - -/// An optimized Unicode string. -/// -/// See [`YarnBox`] for full type documentation. -pub type Yarn = YarnBox<'static, str>; - -/// An optimized raw byte string. -/// -/// See [`YarnBox`] for full type documentation. -pub type ByteYarn = YarnBox<'static, [u8]>; - -/// Similar to [`format!()`], but returns a [`Yarn`], instead. -/// -/// This macro calls out to [`Yarn::from_fmt()`] internally. -#[macro_export] -macro_rules! yarn { - ($($args:tt)*) => { - $crate::Yarn::from_fmt($crate::m::std::format_args!($($args)*)) - }; -} diff --git a/vendor/byteyarn/src/raw.rs b/vendor/byteyarn/src/raw.rs deleted file mode 100644 index b424df549..000000000 --- a/vendor/byteyarn/src/raw.rs +++ /dev/null @@ -1,469 +0,0 @@ -use std::alloc; -use std::fmt; -use std::fmt::Write; -use std::mem; -use std::mem::MaybeUninit; -use std::num::NonZeroUsize; -use std::slice; - -/// The core implementation of yarns. -/// -/// This type encapsulates the various size optimizations that yarns make; this -/// wrapper is shared between both owning and non-owning yarns. -#[repr(C)] -#[derive(Copy, Clone)] -pub struct RawYarn { - ptr: PtrOrBytes, - len: NonZeroUsize, -} - -#[repr(C)] -#[derive(Copy, Clone)] -union PtrOrBytes { - bytes: [u8; mem::size_of::<*const u8>()], - ptr: *const u8, -} - -#[repr(C)] -#[derive(Copy, Clone)] -struct Small { - data: [u8; mem::size_of::<RawYarn>() - 1], - len: u8, -} - -#[repr(C)] -#[derive(Copy, Clone)] -struct Slice { - ptr: *const u8, - len: usize, -} - -enum Layout<'a> { - Small(&'a Small), - Slice(&'a Slice), -} - -enum LayoutMut<'a> { - Small(&'a mut Small), - Slice(&'a mut Slice), -} - -// RawYarn does not expose &mut through &self. -unsafe impl Send for RawYarn {} -unsafe impl Sync for RawYarn {} - -#[test] -fn has_niche() { - assert_eq!(mem::size_of::<RawYarn>(), mem::size_of::<Option<RawYarn>>()); -} - -impl RawYarn { - /// The number of bytes beyond the length byte that are usable for data. - /// This is 7 on 32-bit and 15 on 64-bit. - pub const SSO_LEN: usize = { - let bytes_usable = mem::size_of::<usize>() * 2 - 1; - let max_len = 1 << (8 - 2); - - let sso_len = if bytes_usable < max_len { - bytes_usable - } else { - max_len - }; - - assert!( - sso_len >= 4, - "yarns are not supported on architectures with pointers this small" - ); - - sso_len - }; - - /// The tag for an SSO yarn. - pub const SMALL: u8 = 0b11; - /// The tag for a yarn that came from an immortal string slice. - pub const STATIC: u8 = 0b01; - /// The tag for a yarn that points to a dynamic string slice, on the heap, - /// that we uniquely own. - pub const HEAP: u8 = 0b10; - /// The tag for a yarn that points to a dynamic string slice we don't - /// uniquely own. - /// - /// Because the first word can never be zero, aliased yarns can never have - /// zero length. - pub const ALIASED: u8 = 0b00; - - /// Mask for extracting the tag out of the lowest byte of the yarn. - const SHIFT8: u32 = u8::BITS - 2; - const SHIFT: u32 = usize::BITS - 2; - - const MASK8: usize = !0 << Self::SHIFT8; - const MASK: usize = !0 << Self::SHIFT; - - /// Returns the kind of yarn this is (one of the constants above). - #[inline(always)] - pub const fn kind(&self) -> u8 { - // This used to be - // - // let ptr = self as *const Self as *const u8; - // let hi_byte = unsafe { - // // SAFETY: ptr is valid by construction; regardless of which union member - // // is engaged, the lowest byte is always initialized. - // *ptr.add(std::mem::size_of::<Self>() - 1) - // }; - // hi_byte >> Self::SHIFT8 - // - // But LLVM apparently upgrades this to a word-aligned load (i.e. the code - // below) regardless. :D - - (self.len.get() >> Self::SHIFT) as u8 - } - - /// Creates a new, non-`SMALL` yarn with the given pointer, length, and tag. - /// - /// # Safety - /// - /// `ptr` must be valid for reading `len` bytes. - /// - /// If tag is `STATIC`, then `ptr` must never be deallocated. If the tag is - /// `HEAP`, `ptr` must be free-able via dealloc with a (len, 1) layout and - /// valid for writing `len` bytes. - #[inline(always)] - pub const unsafe fn from_ptr_len_tag( - ptr: *const u8, - len: usize, - tag: u8, - ) -> Self { - assert!( - len < usize::MAX / 4, - "yarns cannot be larger than a quarter of the address space" - ); - debug_assert!( - tag != 0 || len != 0, - "zero-length and zero tag are not permitted simultaneously." - ); - debug_assert!(tag != Self::SMALL); - - Self { - ptr: PtrOrBytes { ptr }, - len: NonZeroUsize::new_unchecked(len | (tag as usize) << Self::SHIFT), - } - } - - /// Returns the currently valid union variant for this yarn. - #[inline(always)] - const fn layout(&self) -> Layout { - match self.is_small() { - true => unsafe { - // SAFETY: When self.is_small, the small variant is always active. - Layout::Small(mem::transmute::<&RawYarn, &Small>(self)) - }, - false => unsafe { - // SAFETY: Otherwise, the slice variant is always active. - Layout::Slice(mem::transmute::<&RawYarn, &Slice>(self)) - }, - } - } - - /// Returns the currently valid union variant for this yarn. - #[inline(always)] - fn layout_mut(&mut self) -> LayoutMut { - match self.is_small() { - true => unsafe { - // SAFETY: When self.is_small, the small variant is always active. - LayoutMut::Small(mem::transmute::<&mut RawYarn, &mut Small>(self)) - }, - false => unsafe { - // SAFETY: Otherwise, the slice variant is always active. - LayoutMut::Slice(mem::transmute::<&mut RawYarn, &mut Slice>(self)) - }, - } - } - - /// Returns a reference to an empty `RawYarn` of any lifetime. - #[inline] - pub fn empty<'a>() -> &'a RawYarn { - static STORAGE: MaybeUninit<RawYarn> = MaybeUninit::new(RawYarn::new(b"")); - unsafe { - // SAFETY: MaybeUninit::new() creates well-initialized memory. - STORAGE.assume_init_ref() - } - } - - /// Returns a `RawYarn` pointing to the given static string, without copying. - #[inline] - pub const fn new(s: &'static [u8]) -> Self { - if s.len() < Self::SSO_LEN { - unsafe { - // SAFETY: We just checked s.len() < Self::SSO_LEN. - return Self::from_slice_inlined_unchecked(s.as_ptr(), s.len()); - } - } - - unsafe { - // SAFETY: s is a static string, because the argument is 'static. - Self::from_ptr_len_tag(s.as_ptr(), s.len(), Self::STATIC) - } - } - - /// Returns an empty `RawYarn`. - #[inline(always)] - pub const fn len(self) -> usize { - match self.layout() { - Layout::Small(s) => s.len as usize & !Self::MASK8, - Layout::Slice(s) => s.len & !Self::MASK, - } - } - - /// Returns whether this `RawYarn` needs to be dropped (i.e., if it is holding - /// onto memory resources). - #[inline(always)] - pub const fn on_heap(self) -> bool { - self.kind() == Self::HEAP - } - - /// Returns whether this `RawYarn` is SSO. - #[inline(always)] - pub const fn is_small(self) -> bool { - self.kind() == Self::SMALL - } - - /// Returns whether this `RawYarn` is SSO. - #[inline(always)] - pub const fn is_immortal(self) -> bool { - self.kind() != Self::ALIASED - } - - /// Frees heap memory owned by this raw yarn. - /// - /// # Safety - /// - /// This function must be called at most once, when the raw yarn is being - /// disposed of. - #[inline(always)] - pub unsafe fn destroy(self) { - if !self.on_heap() { - return; - } - - debug_assert!(self.len() > 0); - let layout = alloc::Layout::for_value(self.as_slice()); - alloc::dealloc(self.ptr.ptr as *mut u8, layout) - } - - /// Returns a pointer into the data for this raw yarn. - #[inline(always)] - pub const fn as_ptr(&self) -> *const u8 { - match self.layout() { - Layout::Small(s) => s.data.as_ptr().cast(), - Layout::Slice(s) => s.ptr, - } - } - - /// Returns a pointer into the data for this raw yarn. - #[inline(always)] - pub fn as_mut_ptr(&mut self) -> *mut u8 { - match self.layout_mut() { - LayoutMut::Small(s) => s.data.as_mut_ptr().cast(), - LayoutMut::Slice(s) => s.ptr.cast_mut(), - } - } - - /// Converts this RawYarn into a byte slice. - #[inline(always)] - pub const fn as_slice(&self) -> &[u8] { - unsafe { - // SAFETY: the output lifetime ensures that `self` cannot move away. - slice::from_raw_parts(self.as_ptr(), self.len()) - } - } - - /// Converts this RawYarn into a mutable byte slice. - /// - /// # Safety - /// - /// This must only be called on `SMALL` or `HEAP` yarns. - #[inline(always)] - pub unsafe fn as_mut_slice(&mut self) -> &mut [u8] { - debug_assert!(self.is_small() || self.on_heap()); - unsafe { - // SAFETY: the output lifetime ensures that `self` cannot move away. - slice::from_raw_parts_mut(self.as_mut_ptr(), self.len()) - } - } - - /// Returns a `RawYarn` by making a copy of the given slice. - #[inline(always)] - pub fn copy_slice(s: &[u8]) -> Self { - match Self::from_slice_inlined(s) { - Some(inl) => inl, - None => Self::from_heap(s.into()), - } - } - - /// Returns a `RawYarn` by making an alias of the given slice. - /// - /// # Safety - /// - /// `s` must outlive all uses of the returned yarn. - #[inline(always)] - pub const unsafe fn alias_slice(s: &[u8]) -> Self { - if let Some(inlined) = Self::from_slice_inlined(s) { - return inlined; - } - - Self::from_ptr_len_tag(s.as_ptr(), s.len(), Self::ALIASED) - } - - /// Returns a new `RawYarn` containing the contents of the given slice. - /// - /// # Safety - /// - /// `len < Self::SSO`, and `ptr` must be valid for reading `len` bytes. - #[inline] - pub const unsafe fn from_slice_inlined_unchecked( - ptr: *const u8, - len: usize, - ) -> Self { - debug_assert!(len <= Self::SSO_LEN); - - let mut small = Small { - data: [0; Self::SSO_LEN], - len: (len as u8) | Self::SMALL << Self::SHIFT8, - }; - - // There's no way to get an *mut to `small.data`, so we do an iteration, - // instead. This loop can be trivially converted into a memcpy by the - // optimizer. - let mut i = 0; - while i < len { - small.data[i] = *ptr.add(i); - i += 1; - } - - // Small and RawYarn are both POD. - mem::transmute::<Small, RawYarn>(small) - } - - /// Returns a new `RawYarn` containing the contents of the given slice. - /// - /// This function will always return an inlined string. - #[inline] - pub const fn from_slice_inlined(s: &[u8]) -> Option<Self> { - if s.len() > Self::SSO_LEN { - return None; - } - - unsafe { - // SAFETY: s.len() is within bounds; we just checked it above. - Some(Self::from_slice_inlined_unchecked(s.as_ptr(), s.len())) - } - } - - /// Returns a `RawYarn` containing a single UTF-8-encoded Unicode scalar. - /// - /// This function does not allocate: every `char` fits in an inlined `RawYarn`. - #[inline(always)] - pub const fn from_char(c: char) -> Self { - let (data, len) = crate::utf8::encode_utf8(c); - unsafe { - // SAFETY: len is at most 4, 4 < Self::SSO_LEN. - Self::from_slice_inlined_unchecked(data.as_ptr(), len) - } - } - - /// Returns a `RawYarn` containing a single byte, without allocating. - #[inline(always)] - pub const fn from_byte(c: u8) -> Self { - unsafe { - // SAFETY: 1 < Self::SSO_LEN. - Self::from_slice_inlined_unchecked(&c, 1) - } - } - - /// Returns a `RawYarn` consisting of the concatenation of the given slices. - /// - /// Does not allocate if the resulting concatenation can be inlined. - /// - /// # Safety - /// - /// `total_len < Self::SSO_LEN`. - pub unsafe fn concat<'a>( - total_len: usize, - iter: impl IntoIterator<Item = &'a [u8]>, - ) -> Self { - if total_len > Self::SSO_LEN { - let mut buf = Vec::with_capacity(total_len); - for b in iter { - buf.extend_from_slice(b); - } - - return Self::from_heap(buf.into()); - } - - let mut cursor = 0; - let mut data = [0; Self::SSO_LEN]; - for b in iter { - data[cursor..cursor + b.len()].copy_from_slice(b); - cursor += b.len(); - } - - Self::from_slice_inlined(&data[..cursor]).unwrap_unchecked() - } - - /// Returns a `RawYarn` by taking ownership of the given allocation. - #[inline] - pub fn from_heap(s: Box<[u8]>) -> Self { - if let Some(inline) = Self::from_slice_inlined(&s) { - return inline; - } - - let len = s.len(); - let ptr = Box::into_raw(s) as *mut u8; - unsafe { - // SAFETY: s is a heap allocation of the appropriate layout for HEAP, - // which we own uniquely because we dismantled it from a box. - Self::from_ptr_len_tag(ptr, len, Self::HEAP) - } - } - - /// Builds a new yarn from the given formatting arguments, without allocating - /// in the trival and small cases. - pub fn from_fmt_args(args: fmt::Arguments) -> Self { - if let Some(constant) = args.as_str() { - return Self::new(constant.as_bytes()); - } - - enum Buf { - Sso(usize, [u8; RawYarn::SSO_LEN]), - Vec(Vec<u8>), - } - impl fmt::Write for Buf { - fn write_str(&mut self, s: &str) -> fmt::Result { - match self { - Self::Sso(len, bytes) => { - let new_len = *len + s.len(); - if new_len > RawYarn::SSO_LEN { - let mut vec = Vec::from(&bytes[..*len]); - vec.extend_from_slice(s.as_bytes()); - - *self = Self::Vec(vec); - } else { - let _ = &bytes[*len..new_len].copy_from_slice(s.as_bytes()); - *len = new_len; - } - } - Self::Vec(vec) => vec.extend_from_slice(s.as_bytes()), - } - - Ok(()) - } - } - - let mut w = Buf::Sso(0, [0; RawYarn::SSO_LEN]); - let _ = w.write_fmt(args); - match w { - Buf::Sso(len, bytes) => Self::from_slice_inlined(&bytes[..len]).unwrap(), - Buf::Vec(vec) => Self::from_heap(vec.into()), - } - } -} diff --git a/vendor/byteyarn/src/reffed.rs b/vendor/byteyarn/src/reffed.rs deleted file mode 100644 index a988b4085..000000000 --- a/vendor/byteyarn/src/reffed.rs +++ /dev/null @@ -1,410 +0,0 @@ -use std::cmp::Ordering; -use std::fmt; -use std::fmt::Write; -use std::hash::Hash; -use std::hash::Hasher; -use std::marker::PhantomData; -use std::mem; -use std::ops::Deref; -use std::str; -use std::str::Utf8Error; - -use crate::raw::RawYarn; -use crate::Utf8Chunks; -use crate::YarnBox; - -#[cfg(doc)] -use crate::*; - -/// An optimized, freely copyable string type. -/// -/// Like a [`Yarn`], but [`Copy`]. -/// -/// In general, prefer to use [`Yarn`] except when you absolutely need the type -/// to be [`Copy`]. [`YarnRef`] is very similar to [`Yarn`], although it can't -/// provide full functionality because it can't own a heap allocation. -/// -/// See the [crate documentation](crate) for general information. -#[repr(transparent)] -pub struct YarnRef<'a, Buf> -where - Buf: crate::Buf + ?Sized, -{ - raw: RawYarn, - _ph: PhantomData<&'a Buf>, -} - -impl<'a, Buf> YarnRef<'a, Buf> -where - Buf: crate::Buf + ?Sized, -{ - pub(crate) const fn buf2raw(buf: &Buf) -> &[u8] { - let ptr = &buf as *const &Buf as *const &[u8]; - unsafe { - // SAFETY: The safety rules of `Buf` make this valid. - *ptr - } - } - - pub(crate) const unsafe fn raw2buf(buf: &[u8]) -> &Buf { - let ptr = &buf as *const &[u8] as *const &Buf; - *ptr - } - - pub(crate) const unsafe fn from_raw(raw: RawYarn) -> Self { - debug_assert!(!raw.on_heap()); - Self { - raw, - _ph: PhantomData, - } - } - - /// Returns a reference to an empty yarn of any lifetime. - /// - /// ``` - /// # use byteyarn::*; - /// let empty: &YarnRef<str> = YarnRef::empty(); - /// assert_eq!(empty, ""); - /// ``` - /// - /// This will also be found by the `Default` impl for `&YarnRef`. - pub fn empty<'b>() -> &'b Self { - unsafe { - // SAFETY: YarnRef is a transparent wrapper over RawYarn; even though - // YarnRef has a destructor, this is fine. - mem::transmute::<&'b RawYarn, &'b Self>(RawYarn::empty()) - } - } - - /// Returns a yarn pointing to the given slice, without copying. - /// - /// ``` - /// # use byteyarn::*; - /// let foo = YarnRef::new("Byzantium"); - /// assert_eq!(foo.len(), 9); - /// ``` - pub const fn new(buf: &'a Buf) -> Self { - unsafe { - // SAFETY: We copy the lifetime from buf into self, so this alias slice - // must go away before buf can. - let raw = RawYarn::alias_slice(Self::buf2raw(buf)); - - // SAFETY: buf is a valid slice by construction, and alias_slice() never - // returns a HEAP yarn. - Self::from_raw(raw) - } - } - - /// Returns a new yarn containing the contents of the given slice. - /// This function will always return an inlined string, or `None` if the - /// given buffer is too big. - /// - /// Note that the maximum inlined size is architecture-dependent. - /// - /// ``` - /// # use byteyarn::*; - /// let smol = YarnRef::inlined("smol"); - /// assert_eq!(smol.unwrap(), "smol"); - /// - /// let big = YarnRef::inlined("biiiiiiiiiiiiiiig"); - /// assert!(big.is_none()); - /// ``` - pub const fn inlined(buf: &Buf) -> Option<Self> { - // This is a const fn, hence no ?. - let Some(raw) = RawYarn::from_slice_inlined(Self::buf2raw(buf)) else { - return None; - }; - - unsafe { - // SAFETY: from_slice_inlined() always returns a SMALL yarn. - Some(Self::from_raw(raw)) - } - } - - /// Returns a yarn containing a single UTF-8-encoded Unicode scalar. - /// This function does not allocate: every `char` fits in an inlined yarn. - /// - /// ``` - /// # use byteyarn::*; - /// let a = YarnRef::<str>::from_char('a'); - /// assert_eq!(a, "a"); - /// ``` - pub const fn from_char(c: char) -> Self { - let raw = RawYarn::from_char(c); - unsafe { - // SAFETY: from_char() always returns a SMALL yarn. - Self::from_raw(raw) - } - } - - /// Checks whether this yarn is empty. - pub const fn is_empty(self) -> bool { - self.len() == 0 - } - - /// Returns the length of this yarn, in bytes. - pub const fn len(self) -> usize { - self.raw.len() - } - - /// Converts this yarn into a slice. - pub const fn as_slice(&self) -> &Buf { - unsafe { Self::raw2buf(self.as_bytes()) } - } - - /// Converts this yarn into a byte slice. - pub const fn as_bytes(&self) -> &[u8] { - self.raw.as_slice() - } - - /// Converts this reference yarn into a owning yarn of the same lifetime. - /// - /// This function does not make copies or allocations. - pub const fn to_box(self) -> YarnBox<'a, Buf> { - unsafe { - // SAFETY: self is never HEAP, and the output lifetime is the same as the - // input so if self is ALIASED it will not become invalid before the - // returned yarn goes out of scope. - YarnBox::from_raw(self.raw) - } - } - - /// Converts this yarn into a boxed slice by copying it. - pub fn to_boxed_bytes(self) -> Box<[u8]> { - self.to_box().into_boxed_bytes() - } - - /// Converts this yarn into a vector by copying it. - pub fn to_vec(self) -> Vec<u8> { - self.to_box().into_vec() - } - - /// Converts this yarn into a byte yarn. - pub const fn into_bytes(self) -> YarnRef<'a, [u8]> { - unsafe { - // SAFETY: [u8] can be constructed from either str or [u8], so this - // type parameter change is valid. - YarnRef::from_raw(self.raw) - } - } - - /// Extends the lifetime of this yarn if this yarn is dynamically known to - /// point to immortal memory. - /// - /// If it doesn't, this function returns `None`. - /// - /// ``` - /// # use byteyarn::*; - /// let yarn = YarnRef::<[u8]>::from_static(b"crunchcrunchcrunch"); - /// - /// let immortal: YarnRef<'static, [u8]> = yarn.immortalize().unwrap(); - /// assert_eq!(immortal, b"crunchcrunchcrunch"); - /// - /// let borrowed = YarnRef::new(&*immortal); - /// assert!(borrowed.immortalize().is_none()); - /// ``` - pub fn immortalize(self) -> Option<YarnRef<'static, Buf>> { - if !self.raw.is_immortal() { - return None; - } - - unsafe { - // SAFETY: We just checked that self.raw is guaranteed immortal (and - // can therefore be used for a 'static lifetime). - Some(YarnRef::<'static, Buf>::from_raw(self.raw)) - } - } - - /// Tries to inline this yarn, if it's small enough. - /// - /// This operation has no directly visible side effects, and is only intended - /// to provide a way to relieve memory pressure. In general, you should not - /// have to call this function directly. - pub fn inline_in_place(&mut self) { - if let Some(inlined) = Self::inlined(self.as_slice()) { - *self = inlined; - } - } - - /// Returns an iterator over the UTF-8 (or otherwise) chunks in this string. - /// - /// This iterator is also used for the `Debug` and `Display` formatter - /// implementations. - /// - /// ``` - /// # use byteyarn::*; - /// let yarn = ByteYarn::new(b"abc\xFF\xFE\xFF\xF0\x9F\x90\x88\xE2\x80\x8D\xE2\xAC\x9B!"); - /// let yr = yarn.as_ref(); - /// let chunks = yr.utf8_chunks().collect::<Vec<_>>(); - /// assert_eq!(chunks, [ - /// Ok("abc"), - /// Err(&[0xff][..]), - /// Err(&[0xfe][..]), - /// Err(&[0xff][..]), - /// Ok("πββ¬!"), - /// ]); - /// - /// assert_eq!(format!("{yarn:?}"), r#""abc\xFF\xFE\xFFπ\u{200d}β¬!""#); - /// assert_eq!(format!("{yarn}"), "abcοΏ½οΏ½οΏ½πββ¬!"); - /// ``` - pub fn utf8_chunks(&self) -> Utf8Chunks { - Utf8Chunks::new(self.as_bytes()) - } -} - -impl<Buf> YarnRef<'static, Buf> -where - Buf: crate::Buf + ?Sized, -{ - /// Returns a yarn pointing to the given slice, without copying. This function - /// has the benefit of creating a yarn that remembers that it came from a - /// static string, meaning that it can be dynamically upcast back to a - /// `'static` lifetime. - /// - /// This function will *not* be found by `From` impls. - pub const fn from_static(buf: &'static Buf) -> Self { - let raw = RawYarn::new(Self::buf2raw(buf)); - unsafe { Self::from_raw(raw) } - } -} - -impl<'a> YarnRef<'a, [u8]> { - /// Returns a yarn containing a single byte, without allocating. - /// - /// This function will be found by `From` impls. - pub const fn from_byte(c: u8) -> Self { - let raw = RawYarn::from_byte(c); - unsafe { Self::from_raw(raw) } - } - - /// Tries to convert this yarn into a UTF-8 yarn via [`str::from_utf8()`]. - /// - /// ``` - /// # use byteyarn::*; - /// let yarn = ByteYarn::new(&[0xf0, 0x9f, 0x90, 0x88, 0xe2, 0x80, 0x8d, 0xe2, 0xac, 0x9b]); - /// assert_eq!(yarn.as_ref().to_utf8().unwrap(), "πββ¬"); - /// - /// assert!(ByteYarn::from_byte(0xff).as_ref().to_utf8().is_err()); - /// ``` - pub fn to_utf8(self) -> Result<YarnRef<'a, str>, Utf8Error> { - str::from_utf8(self.as_bytes())?; - unsafe { Ok(YarnRef::from_raw(self.raw)) } - } -} - -impl<'a> YarnRef<'a, str> { - /// Converts this yarn into a string slice. - pub fn as_str(&self) -> &str { - self.as_slice() - } - - /// Converts this yarn into a boxed slice by copying it. - pub fn to_boxed_str(self) -> Box<str> { - self.to_box().into_boxed_str() - } - - /// Converts this yarn into a string by copying it. - // This does the same thing as to_string, but more efficiently. :) - // The clippy diagnostic also seems wrong, because it says something about - // this method taking &self? Very odd. - #[allow(clippy::inherent_to_string_shadow_display)] - pub fn to_string(self) -> String { - self.to_box().into_string() - } -} - -impl<Buf> Deref for YarnRef<'_, Buf> -where - Buf: crate::Buf + ?Sized, -{ - type Target = Buf; - fn deref(&self) -> &Buf { - self.as_slice() - } -} - -impl<Buf> Copy for YarnRef<'_, Buf> where Buf: crate::Buf + ?Sized {} -impl<Buf> Clone for YarnRef<'_, Buf> -where - Buf: crate::Buf + ?Sized, -{ - fn clone(&self) -> Self { - *self - } -} - -impl<Buf: crate::Buf + ?Sized> fmt::Debug for YarnRef<'_, Buf> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "\"")?; - for chunk in self.utf8_chunks() { - match chunk { - Ok(utf8) => write!(f, "{}", utf8.escape_debug())?, - Err(bytes) => { - for b in bytes { - write!(f, "\\x{:02X}", b)?; - } - } - } - } - write!(f, "\"") - } -} - -impl<Buf: crate::Buf + ?Sized> fmt::Display for YarnRef<'_, Buf> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - for chunk in self.utf8_chunks() { - match chunk { - Ok(utf8) => f.write_str(utf8)?, - Err(..) => f.write_char(char::REPLACEMENT_CHARACTER)?, - } - } - - Ok(()) - } -} - -impl<Slice, Buf> PartialEq<Slice> for YarnRef<'_, Buf> -where - Buf: crate::Buf + ?Sized, - Slice: AsRef<Buf> + ?Sized, -{ - fn eq(&self, that: &Slice) -> bool { - self.as_slice() == that.as_ref() - } -} - -impl<Buf: crate::Buf + Eq + ?Sized> Eq for YarnRef<'_, Buf> {} - -impl<Slice, Buf> PartialOrd<Slice> for YarnRef<'_, Buf> -where - Buf: crate::Buf + ?Sized, - Slice: AsRef<Buf> + ?Sized, -{ - fn partial_cmp(&self, that: &Slice) -> Option<Ordering> { - self.as_slice().partial_cmp(that.as_ref()) - } -} - -impl<Buf: crate::Buf + ?Sized> Ord for YarnRef<'_, Buf> { - fn cmp(&self, that: &Self) -> Ordering { - self.as_slice().cmp(that.as_slice()) - } -} - -impl<Buf: crate::Buf + ?Sized> Hash for YarnRef<'_, Buf> { - fn hash<H: Hasher>(&self, state: &mut H) { - self.as_slice().hash(state) - } -} - -impl<Buf: crate::Buf + ?Sized> Default for YarnRef<'_, Buf> { - fn default() -> Self { - *<&Self>::default() - } -} - -impl<Buf: crate::Buf + ?Sized> Default for &YarnRef<'_, Buf> { - fn default() -> Self { - YarnRef::empty() - } -} diff --git a/vendor/byteyarn/src/utf8.rs b/vendor/byteyarn/src/utf8.rs deleted file mode 100644 index a0006807e..000000000 --- a/vendor/byteyarn/src/utf8.rs +++ /dev/null @@ -1,151 +0,0 @@ -//! UTF-8 utilities not provided by the standard library. - -use std::str; - -#[cfg(doc)] -use crate::*; - -/// An iterator over UTF-8 chunks in a byte buffer. -/// -/// Any time non-UTF-8 bytes are encountered, they are returned as `Err`s -/// from the iterator. -/// -/// See [`Yarn::utf8_chunks()`]. -#[derive(Copy, Clone)] -pub struct Utf8Chunks<'a> { - buf: &'a [u8], - invalid_prefix: Option<usize>, -} - -impl<'a> Utf8Chunks<'a> { - /// Returns the rest of the underlying byte buffer that has not been yielded. - pub fn rest(self) -> &'a [u8] { - self.buf - } - - pub(crate) fn new(buf: &'a [u8]) -> Self { - Self { - buf, - invalid_prefix: None, - } - } - - unsafe fn take(&mut self, len: usize) -> &'a [u8] { - debug_assert!(len <= self.buf.len()); - - let pre = self.buf.get_unchecked(..len); - self.buf = self.buf.get_unchecked(len..); - pre - } -} - -impl<'a> Iterator for Utf8Chunks<'a> { - type Item = Result<&'a str, &'a [u8]>; - - fn next(&mut self) -> Option<Self::Item> { - if let Some(prefix) = self.invalid_prefix.take() { - let bytes = unsafe { - // SAFETY: self.invalid_prefix is only ever written to in this function, - // where it gets set to a value that is known to be in-range. - self.take(prefix) - }; - - return Some(Err(bytes)); - } - - if self.buf.is_empty() { - return None; - } - - let utf8 = match str::from_utf8(self.buf) { - Ok(utf8) => { - self.buf = &[]; - utf8 - } - Err(e) => { - let bytes = unsafe { - // SAFETY: valid_up_to() always returns a value in range of self.buf. - self.take(e.valid_up_to()) - }; - - let utf8 = match cfg!(debug_assertions) { - true => str::from_utf8(bytes).unwrap(), - - // SAFETY: the value of valid_up_to() delimits valid UTF-8, by - // definition. - false => unsafe { str::from_utf8_unchecked(bytes) }, - }; - - self.invalid_prefix = match e.error_len() { - Some(len) => Some(len), - None => Some(self.buf.len()), - }; - - if utf8.is_empty() { - return self.next(); - } - - utf8 - } - }; - - Some(Ok(utf8)) - } -} - -/// `const`-enabled UTF-8 encoding. -/// -/// Returns the encoded bytes in a static array, and the number of those bytes -/// that are pertinent. -pub const fn encode_utf8(c: char) -> ([u8; 4], usize) { - const CONT: u8 = 0b1000_0000; - const CONT_MASK: u8 = !CONT >> 1; - - const B1: u8 = 0b0000_0000; - const B1_MASK: u8 = !B1 >> 1; - - const B2: u8 = 0b1100_0000; - const B2_MASK: u8 = !B2 >> 1; - - const B3: u8 = 0b1110_0000; - const B3_MASK: u8 = !B3 >> 1; - - const B4: u8 = 0b1111_0000; - const B4_MASK: u8 = !B4 >> 1; - - const fn sextet(c: char, idx: u32) -> u8 { - ((c as u32) >> (idx * 6)) as u8 - } - - match c.len_utf8() { - 1 => ([sextet(c, 0) & B1_MASK | B1, 0, 0, 0], 1), - 2 => ( - [ - sextet(c, 1) & B2_MASK | B2, - sextet(c, 0) & CONT_MASK | CONT, - 0, - 0, - ], - 2, - ), - 3 => ( - [ - sextet(c, 2) & B3_MASK | B3, - sextet(c, 1) & CONT_MASK | CONT, - sextet(c, 0) & CONT_MASK | CONT, - 0, - ], - 3, - ), - 4 => ( - [ - sextet(c, 3) & B4_MASK | B4, - sextet(c, 2) & CONT_MASK | CONT, - sextet(c, 1) & CONT_MASK | CONT, - sextet(c, 0) & CONT_MASK | CONT, - ], - 4, - ), - _ => unreachable!(), - } -} |