1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
//! For detecting the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
//! a string conforms to
use crate::mixed_script::AugmentedScriptSet;
use crate::GeneralSecurityProfile;
use unicode_script::Script;
#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)]
/// The [Restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
/// a string conforms to
pub enum RestrictionLevel {
/// https://www.unicode.org/reports/tr39/#ascii_only
ASCIIOnly,
/// https://www.unicode.org/reports/tr39/#single_script
SingleScript,
/// https://www.unicode.org/reports/tr39/#highly_restrictive
HighlyRestrictive,
/// https://www.unicode.org/reports/tr39/#moderately_restrictive
ModeratelyRestrictive,
/// https://www.unicode.org/reports/tr39/#minimally_restrictive
MinimallyRestrictive,
/// https://www.unicode.org/reports/tr39/#unrestricted
Unrestricted,
}
/// Utilities for determining which [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
/// a string satisfies
pub trait RestrictionLevelDetection: Sized {
/// Detect the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
///
/// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness
fn detect_restriction_level(self) -> RestrictionLevel;
/// Check if a string satisfies the supplied [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
///
/// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness
fn check_restriction_level(self, level: RestrictionLevel) -> bool {
self.detect_restriction_level() <= level
}
}
impl RestrictionLevelDetection for &'_ str {
fn detect_restriction_level(self) -> RestrictionLevel {
let mut ascii_only = true;
let mut set = AugmentedScriptSet::default();
let mut exclude_latin_set = AugmentedScriptSet::default();
for ch in self.chars() {
if !GeneralSecurityProfile::identifier_allowed(ch) {
return RestrictionLevel::Unrestricted;
}
if ch.is_ascii() {
ascii_only = false;
}
let ch_set = ch.into();
set.intersect_with(ch_set);
if !ch_set.base.contains_script(Script::Latin) {
exclude_latin_set.intersect_with(ch_set);
}
}
if ascii_only {
return RestrictionLevel::ASCIIOnly;
} else if !set.is_empty() {
return RestrictionLevel::SingleScript;
} else if exclude_latin_set.kore || exclude_latin_set.hanb || exclude_latin_set.jpan {
return RestrictionLevel::HighlyRestrictive;
} else if exclude_latin_set.base.len() == 1 {
let script = exclude_latin_set.base.iter().next().unwrap();
if script.is_recommended() && script != Script::Cyrillic && script != Script::Greek {
return RestrictionLevel::ModeratelyRestrictive;
}
}
return RestrictionLevel::MinimallyRestrictive;
}
}
|