summaryrefslogtreecommitdiffstats
path: root/intl/icu_capi/src/segmenter_grapheme.rs
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu_capi/src/segmenter_grapheme.rs')
-rw-r--r--intl/icu_capi/src/segmenter_grapheme.rs154
1 files changed, 154 insertions, 0 deletions
diff --git a/intl/icu_capi/src/segmenter_grapheme.rs b/intl/icu_capi/src/segmenter_grapheme.rs
new file mode 100644
index 0000000000..5d3f65fc05
--- /dev/null
+++ b/intl/icu_capi/src/segmenter_grapheme.rs
@@ -0,0 +1,154 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+#[diplomat::bridge]
+pub mod ffi {
+ use crate::errors::ffi::ICU4XError;
+ use crate::provider::ffi::ICU4XDataProvider;
+ use alloc::boxed::Box;
+ use core::convert::TryFrom;
+ use icu_segmenter::{
+ GraphemeClusterBreakIteratorLatin1, GraphemeClusterBreakIteratorPotentiallyIllFormedUtf8,
+ GraphemeClusterBreakIteratorUtf16, GraphemeClusterSegmenter,
+ };
+
+ #[diplomat::opaque]
+ /// An ICU4X grapheme-cluster-break segmenter, capable of finding grapheme cluster breakpoints
+ /// in strings.
+ #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter, Struct)]
+ pub struct ICU4XGraphemeClusterSegmenter(GraphemeClusterSegmenter);
+
+ #[diplomat::opaque]
+ #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator, Struct)]
+ #[diplomat::rust_link(
+ icu::segmenter::GraphemeClusterBreakIteratorPotentiallyIllFormedUtf8,
+ Typedef,
+ hidden
+ )]
+ pub struct ICU4XGraphemeClusterBreakIteratorUtf8<'a>(
+ GraphemeClusterBreakIteratorPotentiallyIllFormedUtf8<'a, 'a>,
+ );
+
+ #[diplomat::opaque]
+ #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator, Struct)]
+ #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIteratorUtf16, Typedef, hidden)]
+ pub struct ICU4XGraphemeClusterBreakIteratorUtf16<'a>(
+ GraphemeClusterBreakIteratorUtf16<'a, 'a>,
+ );
+
+ #[diplomat::opaque]
+ #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator, Struct)]
+ #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIteratorLatin1, Typedef, hidden)]
+ pub struct ICU4XGraphemeClusterBreakIteratorLatin1<'a>(
+ GraphemeClusterBreakIteratorLatin1<'a, 'a>,
+ );
+
+ impl ICU4XGraphemeClusterSegmenter {
+ /// Construct an [`ICU4XGraphemeClusterSegmenter`].
+ #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter::new, FnInStruct)]
+ pub fn create(
+ provider: &ICU4XDataProvider,
+ ) -> Result<Box<ICU4XGraphemeClusterSegmenter>, ICU4XError> {
+ Ok(Box::new(ICU4XGraphemeClusterSegmenter(call_constructor!(
+ GraphemeClusterSegmenter::new [r => Ok(r)],
+ GraphemeClusterSegmenter::try_new_with_any_provider,
+ GraphemeClusterSegmenter::try_new_with_buffer_provider,
+ provider,
+ )?)))
+ }
+
+ /// Segments a (potentially ill-formed) UTF-8 string.
+ #[diplomat::rust_link(
+ icu::segmenter::GraphemeClusterSegmenter::segment_str,
+ FnInStruct,
+ hidden
+ )]
+ #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter::segment_utf8, FnInStruct)]
+ pub fn segment_utf8<'a>(
+ &'a self,
+ input: &'a str,
+ ) -> Box<ICU4XGraphemeClusterBreakIteratorUtf8<'a>> {
+ let input = input.as_bytes(); // #2520
+ Box::new(ICU4XGraphemeClusterBreakIteratorUtf8(
+ self.0.segment_utf8(input),
+ ))
+ }
+
+ /// Segments a UTF-16 string.
+ #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter::segment_utf16, FnInStruct)]
+ pub fn segment_utf16<'a>(
+ &'a self,
+ input: &'a [u16],
+ ) -> Box<ICU4XGraphemeClusterBreakIteratorUtf16<'a>> {
+ Box::new(ICU4XGraphemeClusterBreakIteratorUtf16(
+ self.0.segment_utf16(input),
+ ))
+ }
+
+ /// Segments a Latin-1 string.
+ #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter::segment_latin1, FnInStruct)]
+ pub fn segment_latin1<'a>(
+ &'a self,
+ input: &'a [u8],
+ ) -> Box<ICU4XGraphemeClusterBreakIteratorLatin1<'a>> {
+ Box::new(ICU4XGraphemeClusterBreakIteratorLatin1(
+ self.0.segment_latin1(input),
+ ))
+ }
+ }
+
+ impl<'a> ICU4XGraphemeClusterBreakIteratorUtf8<'a> {
+ /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
+ /// out of range of a 32-bit signed integer.
+ #[allow(clippy::should_implement_trait)]
+ #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator::next, FnInStruct)]
+ #[diplomat::rust_link(
+ icu::segmenter::GraphemeClusterBreakIterator::Item,
+ AssociatedTypeInStruct,
+ hidden
+ )]
+ pub fn next(&mut self) -> i32 {
+ self.0
+ .next()
+ .and_then(|u| i32::try_from(u).ok())
+ .unwrap_or(-1)
+ }
+ }
+
+ impl<'a> ICU4XGraphemeClusterBreakIteratorUtf16<'a> {
+ /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
+ /// out of range of a 32-bit signed integer.
+ #[allow(clippy::should_implement_trait)]
+ #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator::next, FnInStruct)]
+ #[diplomat::rust_link(
+ icu::segmenter::GraphemeClusterBreakIterator::Item,
+ AssociatedTypeInStruct,
+ hidden
+ )]
+ pub fn next(&mut self) -> i32 {
+ self.0
+ .next()
+ .and_then(|u| i32::try_from(u).ok())
+ .unwrap_or(-1)
+ }
+ }
+
+ impl<'a> ICU4XGraphemeClusterBreakIteratorLatin1<'a> {
+ /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
+ /// out of range of a 32-bit signed integer.
+ #[allow(clippy::should_implement_trait)]
+ #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator::next, FnInStruct)]
+ #[diplomat::rust_link(
+ icu::segmenter::GraphemeClusterBreakIterator::Item,
+ AssociatedTypeInStruct,
+ hidden
+ )]
+ pub fn next(&mut self) -> i32 {
+ self.0
+ .next()
+ .and_then(|u| i32::try_from(u).ok())
+ .unwrap_or(-1)
+ }
+ }
+}