diff options
Diffstat (limited to 'intl/lwbrk/gtest/TestSegmenter.cpp')
-rw-r--r-- | intl/lwbrk/gtest/TestSegmenter.cpp | 209 |
1 files changed, 209 insertions, 0 deletions
diff --git a/intl/lwbrk/gtest/TestSegmenter.cpp b/intl/lwbrk/gtest/TestSegmenter.cpp new file mode 100644 index 0000000000..42d04b8e03 --- /dev/null +++ b/intl/lwbrk/gtest/TestSegmenter.cpp @@ -0,0 +1,209 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" + +#include "mozilla/intl/Segmenter.h" +#include "mozilla/Preferences.h" + +namespace mozilla::intl { + +TEST(IntlSegmenter, TestLineBreakIteratorUtf16SeekOld) +{ + nsresult rv = Preferences::SetBool("intl.icu4x.segmenter.enabled", false); + EXPECT_TRUE(rv == NS_OK); + + const SegmenterOptions options{SegmenterGranularity::Line}; + auto result = Segmenter::TryCreate("en", options); + ASSERT_TRUE(result.isOk()); + auto lineSegmenter = result.unwrap(); + + const char16_t text[] = u"hello world"; + UniquePtr<SegmentIteratorUtf16> segIter = + lineSegmenter->Segment(MakeStringSpan(text)); + + // Seek to space between "hello" and "world". + ASSERT_EQ(segIter->Seek(5u), Some(11u)); + + ASSERT_EQ(segIter->Next(), Nothing()); + + // Same as calling Next(). + ASSERT_EQ(segIter->Seek(0u), Nothing()); +} + +TEST(IntlSegmenter, TestLineBreakIteratorUtf16Seek) +{ + nsresult rv = Preferences::SetBool("intl.icu4x.segmenter.enabled", true); + EXPECT_TRUE(rv == NS_OK); + + const SegmenterOptions options{SegmenterGranularity::Line}; + auto result = Segmenter::TryCreate("en", options); + ASSERT_TRUE(result.isOk()); + auto lineSegmenter = result.unwrap(); + + const char16_t text[] = u"hello world"; + UniquePtr<SegmentIteratorUtf16> segIter = + lineSegmenter->Segment(MakeStringSpan(text)); + + // Seek to space between "hello" and "world". + // UAX#14 rule returns before "w". + ASSERT_EQ(segIter->Seek(5u), Some(6u)); + + ASSERT_EQ(segIter->Next(), Some(11u)); + + ASSERT_EQ(segIter->Next(), Nothing()); + + // Same as calling Next(). + ASSERT_EQ(segIter->Seek(0u), Nothing()); +} + +TEST(IntlSegmenter, TestWordBreakIteratorUtf16Simple) +{ + const SegmenterOptions options{SegmenterGranularity::Word}; + auto result = Segmenter::TryCreate("en", options); + ASSERT_TRUE(result.isOk()); + auto wordSegmenter = result.unwrap(); + + const char16_t text[] = u"hello world"; + UniquePtr<SegmentIteratorUtf16> segIter = + wordSegmenter->Segment(MakeStringSpan(text)); + + ASSERT_EQ(segIter->Next(), Some(5u)); + ASSERT_EQ(segIter->Next(), Some(6u)); + ASSERT_EQ(segIter->Next(), Some(11u)); + ASSERT_EQ(segIter->Next(), Nothing()); +} + +TEST(IntlSegmenter, TestWordBreakIteratorUtf16Seek) +{ + const SegmenterOptions options{SegmenterGranularity::Word}; + auto result = Segmenter::TryCreate("en", options); + ASSERT_TRUE(result.isOk()); + auto wordSegmenter = result.unwrap(); + + const char16_t text[] = u"hello world"; + UniquePtr<SegmentIteratorUtf16> segIter = + wordSegmenter->Segment(MakeStringSpan(text)); + + // Seek to the space between "hello" and "world" + ASSERT_EQ(segIter->Seek(5u), Some(6u)); + + ASSERT_EQ(segIter->Next(), Some(11u)); + ASSERT_EQ(segIter->Next(), Nothing()); + + // Same as calling Next(). + ASSERT_EQ(segIter->Seek(0u), Nothing()); +} + +TEST(IntlSegmenter, TestGraphemeClusterBreakIteratorUtf16Simple) +{ + SegmenterOptions options{SegmenterGranularity::Grapheme}; + auto result = Segmenter::TryCreate("en", options); + ASSERT_TRUE(result.isOk()); + auto graphemeClusterSegmenter = result.unwrap(); + + const char16_t text[] = u"hello world"; + UniquePtr<SegmentIteratorUtf16> segIter = + graphemeClusterSegmenter->Segment(MakeStringSpan(text)); + + ASSERT_EQ(segIter->Next(), Some(1u)); + ASSERT_EQ(segIter->Next(), Some(2u)); + ASSERT_EQ(segIter->Next(), Some(3u)); + ASSERT_EQ(segIter->Next(), Some(4u)); + ASSERT_EQ(segIter->Next(), Some(5u)); + ASSERT_EQ(segIter->Next(), Some(6u)); + ASSERT_EQ(segIter->Next(), Some(7u)); + ASSERT_EQ(segIter->Next(), Some(8u)); + ASSERT_EQ(segIter->Next(), Some(9u)); + ASSERT_EQ(segIter->Next(), Some(10u)); + ASSERT_EQ(segIter->Next(), Some(11u)); + ASSERT_EQ(segIter->Next(), Nothing()); +} + +TEST(IntlSegmenter, TestGraphemeClusterBreakIteratorUtf16Seek) +{ + SegmenterOptions options{SegmenterGranularity::Grapheme}; + auto result = Segmenter::TryCreate("en", options); + ASSERT_TRUE(result.isOk()); + auto graphemeClusterSegmenter = result.unwrap(); + + const char16_t text[] = u"hello world"; + UniquePtr<SegmentIteratorUtf16> segIter = + graphemeClusterSegmenter->Segment(MakeStringSpan(text)); + + // Seek to the space between "hello" and "world" + ASSERT_EQ(segIter->Seek(5u), Some(6u)); + + ASSERT_EQ(segIter->Next(), Some(7u)); + ASSERT_EQ(segIter->Next(), Some(8u)); + ASSERT_EQ(segIter->Next(), Some(9u)); + ASSERT_EQ(segIter->Next(), Some(10u)); + ASSERT_EQ(segIter->Next(), Some(11u)); + ASSERT_EQ(segIter->Next(), Nothing()); + + // Same as calling Next(). + ASSERT_EQ(segIter->Seek(0u), Nothing()); +} + +TEST(IntlSegmenter, TestGraphemeClusterBreakReverseIteratorUtf16) +{ + const char16_t text[] = u"hello world"; + GraphemeClusterBreakReverseIteratorUtf16 segIter(MakeStringSpan(text)); + + // Seek to the space between "hello" and "world" + ASSERT_EQ(segIter.Seek(6u), Some(5u)); + + ASSERT_EQ(segIter.Next(), Some(4u)); + ASSERT_EQ(segIter.Next(), Some(3u)); + ASSERT_EQ(segIter.Next(), Some(2u)); + ASSERT_EQ(segIter.Next(), Some(1u)); + ASSERT_EQ(segIter.Next(), Some(0u)); + ASSERT_EQ(segIter.Next(), Nothing()); + + // Same as calling Next(). + ASSERT_EQ(segIter.Seek(0u), Nothing()); +} + +TEST(IntlSegmenter, TestSentenceBreakIteratorUtf16) +{ + nsresult rv = Preferences::SetBool("intl.icu4x.segmenter.enabled", true); + EXPECT_TRUE(rv == NS_OK); + + SegmenterOptions options{SegmenterGranularity::Sentence}; + auto result = Segmenter::TryCreate("en", options); + ASSERT_TRUE(result.isOk()); + auto sentenceSegmenter = result.unwrap(); + + const char16_t text[] = u"Hello world. Hello world."; + UniquePtr<SegmentIteratorUtf16> segIter = + sentenceSegmenter->Segment(MakeStringSpan(text)); + + ASSERT_EQ(segIter->Next(), Some(13u)); + ASSERT_EQ(segIter->Next(), Some(25u)); + ASSERT_EQ(segIter->Next(), Nothing()); + + // Same as calling Next(). + ASSERT_EQ(segIter->Seek(0u), Nothing()); +} + +TEST(IntlSegmenter, TestSentenceBreakIteratorUtf16Seek) +{ + nsresult rv = Preferences::SetBool("intl.icu4x.segmenter.enabled", true); + EXPECT_TRUE(rv == NS_OK); + + SegmenterOptions options{SegmenterGranularity::Sentence}; + auto result = Segmenter::TryCreate("en", options); + ASSERT_TRUE(result.isOk()); + auto sentenceSegmenter = result.unwrap(); + + const char16_t text[] = u"Hello world. Hello world."; + UniquePtr<SegmentIteratorUtf16> segIter = + sentenceSegmenter->Segment(MakeStringSpan(text)); + + ASSERT_EQ(segIter->Seek(5u), Some(13u)); +} + +} // namespace mozilla::intl |