From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- third_party/rust/heck/src/lib.rs | 209 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 209 insertions(+) create mode 100644 third_party/rust/heck/src/lib.rs (limited to 'third_party/rust/heck/src/lib.rs') diff --git a/third_party/rust/heck/src/lib.rs b/third_party/rust/heck/src/lib.rs new file mode 100644 index 0000000000..49bfb0efde --- /dev/null +++ b/third_party/rust/heck/src/lib.rs @@ -0,0 +1,209 @@ +//! **heck** is a case conversion library. +//! +//! This library exists to provide case conversion between common cases like +//! CamelCase and snake_case. It is intended to be unicode aware, internally +//! consistent, and reasonably well performing. +//! +//! ## Definition of a word boundary +//! +//! Word boundaries are defined as the "unicode words" defined in the +//! `unicode_segmentation` library, as well as within those words in this +//! manner: +//! +//! 1. All underscore characters are considered word boundaries. +//! 2. If an uppercase character is followed by lowercase letters, a word +//! boundary is considered to be just prior to that uppercase character. +//! 3. If multiple uppercase characters are consecutive, they are considered to +//! be within a single word, except that the last will be part of the next word +//! if it is followed by lowercase characters (see rule 2). +//! +//! That is, "HelloWorld" is segmented `Hello|World` whereas "XMLHttpRequest" is +//! segmented `XML|Http|Request`. +//! +//! Characters not within words (such as spaces, punctuations, and underscores) +//! are not included in the output string except as they are a part of the case +//! being converted to. Multiple adjacent word boundaries (such as a series of +//! underscores) are folded into one. ("hello__world" in snake case is therefore +//! "hello_world", not the exact same string). Leading or trailing word boundary +//! indicators are dropped, except insofar as CamelCase capitalizes the first +//! word. +//! +//! ### Cases contained in this library: +//! +//! 1. UpperCamelCase +//! 2. lowerCamelCase +//! 3. snake_case +//! 4. kebab-case +//! 5. SHOUTY_SNAKE_CASE +//! 6. Title Case +//! 7. SHOUTY-KEBAB-CASE +//! 8. Train-Case +#![deny(missing_docs)] +#![forbid(unsafe_code)] + +mod kebab; +mod lower_camel; +mod shouty_kebab; +mod shouty_snake; +mod snake; +mod title; +mod train; +mod upper_camel; + +pub use kebab::{AsKebabCase, ToKebabCase}; +pub use lower_camel::{AsLowerCamelCase, ToLowerCamelCase}; +pub use shouty_kebab::{AsShoutyKebabCase, ToShoutyKebabCase}; +pub use shouty_snake::{ + AsShoutySnakeCase, AsShoutySnakeCase as AsShoutySnekCase, ToShoutySnakeCase, ToShoutySnekCase, +}; +pub use snake::{AsSnakeCase, AsSnakeCase as AsSnekCase, ToSnakeCase, ToSnekCase}; +pub use title::{AsTitleCase, ToTitleCase}; +pub use train::{AsTrainCase, ToTrainCase}; +pub use upper_camel::{ + AsUpperCamelCase, AsUpperCamelCase as AsPascalCase, ToPascalCase, ToUpperCamelCase, +}; + +use std::fmt; + +#[cfg(feature = "unicode")] +fn get_iterator(s: &str) -> unicode_segmentation::UnicodeWords { + use unicode_segmentation::UnicodeSegmentation; + s.unicode_words() +} +#[cfg(not(feature = "unicode"))] +fn get_iterator(s: &str) -> impl Iterator { + s.split(|letter: char| !letter.is_ascii_alphanumeric()) +} + +fn transform( + s: &str, + mut with_word: F, + mut boundary: G, + f: &mut fmt::Formatter, +) -> fmt::Result +where + F: FnMut(&str, &mut fmt::Formatter) -> fmt::Result, + G: FnMut(&mut fmt::Formatter) -> fmt::Result, +{ + /// Tracks the current 'mode' of the transformation algorithm as it scans + /// the input string. + /// + /// The mode is a tri-state which tracks the case of the last cased + /// character of the current word. If there is no cased character + /// (either lowercase or uppercase) since the previous word boundary, + /// than the mode is `Boundary`. If the last cased character is lowercase, + /// then the mode is `Lowercase`. Othertherwise, the mode is + /// `Uppercase`. + #[derive(Clone, Copy, PartialEq)] + enum WordMode { + /// There have been no lowercase or uppercase characters in the current + /// word. + Boundary, + /// The previous cased character in the current word is lowercase. + Lowercase, + /// The previous cased character in the current word is uppercase. + Uppercase, + } + + let mut first_word = true; + + for word in get_iterator(s) { + let mut char_indices = word.char_indices().peekable(); + let mut init = 0; + let mut mode = WordMode::Boundary; + + while let Some((i, c)) = char_indices.next() { + // Skip underscore characters + if c == '_' { + if init == i { + init += 1; + } + continue; + } + + if let Some(&(next_i, next)) = char_indices.peek() { + // The mode including the current character, assuming the + // current character does not result in a word boundary. + let next_mode = if c.is_lowercase() { + WordMode::Lowercase + } else if c.is_uppercase() { + WordMode::Uppercase + } else { + mode + }; + + // Word boundary after if next is underscore or current is + // not uppercase and next is uppercase + if next == '_' || (next_mode == WordMode::Lowercase && next.is_uppercase()) { + if !first_word { + boundary(f)?; + } + with_word(&word[init..next_i], f)?; + first_word = false; + init = next_i; + mode = WordMode::Boundary; + + // Otherwise if current and previous are uppercase and next + // is lowercase, word boundary before + } else if mode == WordMode::Uppercase && c.is_uppercase() && next.is_lowercase() { + if !first_word { + boundary(f)?; + } else { + first_word = false; + } + with_word(&word[init..i], f)?; + init = i; + mode = WordMode::Boundary; + + // Otherwise no word boundary, just update the mode + } else { + mode = next_mode; + } + } else { + // Collect trailing characters as a word + if !first_word { + boundary(f)?; + } else { + first_word = false; + } + with_word(&word[init..], f)?; + break; + } + } + } + + Ok(()) +} + +fn lowercase(s: &str, f: &mut fmt::Formatter) -> fmt::Result { + let mut chars = s.chars().peekable(); + while let Some(c) = chars.next() { + if c == 'Σ' && chars.peek().is_none() { + write!(f, "ς")?; + } else { + write!(f, "{}", c.to_lowercase())?; + } + } + + Ok(()) +} + +fn uppercase(s: &str, f: &mut fmt::Formatter) -> fmt::Result { + for c in s.chars() { + write!(f, "{}", c.to_uppercase())?; + } + + Ok(()) +} + +fn capitalize(s: &str, f: &mut fmt::Formatter) -> fmt::Result { + let mut char_indices = s.char_indices(); + if let Some((_, c)) = char_indices.next() { + write!(f, "{}", c.to_uppercase())?; + if let Some((i, _)) = char_indices.next() { + lowercase(&s[i..], f)?; + } + } + + Ok(()) +} -- cgit v1.2.3