summaryrefslogtreecommitdiffstats
path: root/intl/lwbrk/gtest/TestSegmenter.cpp
blob: 21c44a078f92babe1b18d627d2ab8a68aaff96e5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "gtest/gtest.h"

#include "mozilla/intl/Segmenter.h"

namespace mozilla::intl {

TEST(IntlSegmenter, TestLineBreakIteratorUtf16)
{
  const SegmenterOptions options{SegmenterGranularity::Line};
  auto result = Segmenter::TryCreate("en", options);
  ASSERT_TRUE(result.isOk());
  auto lineSegmenter = result.unwrap();

  const char16_t text[] = u"hello world";
  UniquePtr<SegmentIteratorUtf16> segIter =
      lineSegmenter->Segment(MakeStringSpan(text));

  // Seek to space between "hello" and "world".
  ASSERT_EQ(segIter->Seek(5u), Some(11u));

  ASSERT_EQ(segIter->Next(), Nothing());

  // Same as calling Next().
  ASSERT_EQ(segIter->Seek(0u), Nothing());
}

TEST(IntlSegmenter, TestWordBreakIteratorUtf16)
{
  const SegmenterOptions options{SegmenterGranularity::Word};
  auto result = Segmenter::TryCreate("en", options);
  ASSERT_TRUE(result.isOk());
  auto wordSegmenter = result.unwrap();

  const char16_t text[] = u"hello world";
  UniquePtr<SegmentIteratorUtf16> segIter =
      wordSegmenter->Segment(MakeStringSpan(text));

  // Seek to the space between "hello" and "world"
  ASSERT_EQ(segIter->Seek(5u), Some(6u));

  ASSERT_EQ(segIter->Next(), Some(11u));
  ASSERT_EQ(segIter->Next(), Nothing());

  // Same as calling Next().
  ASSERT_EQ(segIter->Seek(0u), Nothing());
}

TEST(IntlSegmenter, TestGraphemeClusterBreakIteratorUtf16)
{
  SegmenterOptions options{SegmenterGranularity::Grapheme};
  auto result = Segmenter::TryCreate("en", options);
  ASSERT_TRUE(result.isOk());
  auto graphemeClusterSegmenter = result.unwrap();

  const char16_t text[] = u"hello world";
  UniquePtr<SegmentIteratorUtf16> segIter =
      graphemeClusterSegmenter->Segment(MakeStringSpan(text));

  // Seek to the space between "hello" and "world"
  ASSERT_EQ(segIter->Seek(5u), Some(6u));

  ASSERT_EQ(segIter->Next(), Some(7u));
  ASSERT_EQ(segIter->Next(), Some(8u));
  ASSERT_EQ(segIter->Next(), Some(9u));
  ASSERT_EQ(segIter->Next(), Some(10u));
  ASSERT_EQ(segIter->Next(), Some(11u));
  ASSERT_EQ(segIter->Next(), Nothing());

  // Same as calling Next().
  ASSERT_EQ(segIter->Seek(0u), Nothing());
}

TEST(IntlSegmenter, TestGraphemeClusterBreakReverseIteratorUtf16)
{
  const char16_t text[] = u"hello world";
  GraphemeClusterBreakReverseIteratorUtf16 segIter(MakeStringSpan(text));

  // Seek to the space between "hello" and "world"
  ASSERT_EQ(segIter.Seek(6u), Some(5u));

  ASSERT_EQ(segIter.Next(), Some(4u));
  ASSERT_EQ(segIter.Next(), Some(3u));
  ASSERT_EQ(segIter.Next(), Some(2u));
  ASSERT_EQ(segIter.Next(), Some(1u));
  ASSERT_EQ(segIter.Next(), Some(0u));
  ASSERT_EQ(segIter.Next(), Nothing());

  // Same as calling Next().
  ASSERT_EQ(segIter.Seek(0u), Nothing());
}

TEST(IntlSegmenter, TestSentenceBreakIteratorUtf16)
{
  SegmenterOptions options{SegmenterGranularity::Sentence};
  auto result = Segmenter::TryCreate("en", options);
  ASSERT_TRUE(result.isErr());
}

}  // namespace mozilla::intl