summaryrefslogtreecommitdiffstats
path: root/js/src/tests/non262/Intl/Segmenter/grapheme.js
blob: c51de7f8d0670ff6693d3b186d0971dcfda83aff (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
// |reftest| skip-if(!this.hasOwnProperty('Intl')||!this.Intl.Segmenter)

// Grapheme boundaries are locale independent. Test with various locales to
// ensure we get the same results.
const locales = [
  "en", "de", "fr", "ar", "ja", "zh", "th",
];

let strings = {
  // Empty string
  "": [],

  // Ascii
  "test": "test".split(""),
  "hello world": "hello world".split(""),
  "hello\0world": "hello\0world".split(""),
  "\r\n": ["\r\n"],

  // Latin-1
  "äöü éèê µß \xff": "äöü éèê µß \xff".split(""),

  // Two-Byte
  "中文字": "中文字".split(""),

  // Grapheme Clusters: https://www.unicode.org/reports/tr29/#Table_Sample_Grapheme_Clusters
  "e\u0300": ["e\u0300"],
  "\u1100\u1161\u11A8": ["\u1100\u1161\u11A8"], // Hangul syllable "gag"
  "\u0E01\u0E33": ["\u0E01\u0E33"], // Thai kam
  "\u0937\u093F": ["\u0937\u093F"], // Devanagari ssi

  // Emojis
  "\u263A\uFE0F": ["\u263A\uFE0F"], // Variant selector
  "\u{1F385}\u{1F3FB}": ["\u{1F385}\u{1F3FB}"], // Skin tone selector
  "\u{1F469}\u{1F3FD}\u{200D}\u{1F52C}": ["\u{1F469}\u{1F3FD}\u{200D}\u{1F52C}"], // ZWJ
  "\u{1F469}\u{1F3FD}\u{200D}\u{1F52C}\u{FE0F}": ["\u{1F469}\u{1F3FD}\u{200D}\u{1F52C}\u{FE0F}"], // ZWJ + VS
  "\u{1F926}\u{1F3FC}\u{200D}\u{2642}\u{FE0F}": ["\u{1F926}\u{1F3FC}\u{200D}\u{2642}\u{FE0F}"], // ZWJ + VS with BMP modifier
  "\u{1F1E9}\u{1F1EA}": ["\u{1F1E9}\u{1F1EA}"], // Flags
  "\u{1F3F4}\u{E0067}\u{E0062}\u{E0073}\u{E0063}\u{E0074}\u{E007F}": ["\u{1F3F4}\u{E0067}\u{E0062}\u{E0073}\u{E0063}\u{E0074}\u{E007F}"], // Subdivision flags
};

function assertIsSegmentDataObject(obj) {
  // The prototype is %Object.prototype%.
  assertEq(Object.getPrototypeOf(obj), Object.prototype);

  // The Segment Data object has exactly three own properties.
  let keys = Reflect.ownKeys(obj);
  assertEq(keys.length, 3);
  assertEq(keys[0], "segment");
  assertEq(keys[1], "index");
  assertEq(keys[2], "input");

  // Ensure each property has the correct value type.
  assertEq(typeof obj.segment, "string");
  assertEq(typeof obj.index, "number");
  assertEq(typeof obj.input, "string");

  // |index| is an integer index into |string|.
  assertEq(Number.isInteger(obj.index), true);
  assertEq(obj.index >= 0, true);
  assertEq(obj.index < obj.input.length, true);

  // Segments are non-empty.
  assertEq(obj.segment.length > 0, true);

  // Ensure the segment is present in the input at the correct position.
  assertEq(obj.input.substr(obj.index, obj.segment.length), obj.segment);
}

function segmentsFromContaining(segmenter, string) {
  let segments = segmenter.segment(string);

  let result = [];
  for (let index = 0, data; (data = segments.containing(index)); index += data.segment.length) {
    result.push(data);
  }
  return result;
}

for (let locale of locales) {
  let segmenter = new Intl.Segmenter(locale, {granularity: "grapheme"});

  let resolved = segmenter.resolvedOptions();
  assertEq(resolved.locale, locale);
  assertEq(resolved.granularity, "grapheme");

  for (let [string, graphemes] of Object.entries(strings)) {
    let segments = [...segmenter.segment(string)];

    // Assert each segment is a valid Segment Data object.
    segments.forEach(assertIsSegmentDataObject);

    // Concatenating all segments should return the input.
    assertEq(segments.reduce((acc, {segment}) => acc + segment, ""), string);

    // The "input" property matches the original input string.
    assertEq(segments.every(({input}) => input === string), true);

    // The indices are sorted in ascending order.
    assertEq(isNaN(segments.reduce((acc, {index}) => index > acc ? index : NaN, -Infinity)), false);

    // The computed segments match the expected value.
    assertEqArray(segments.map(({segment}) => segment), graphemes);

    // Segment iteration and %Segments.prototype%.containing return the same results.
    assertDeepEq(segmentsFromContaining(segmenter, string), segments);
  }
}

if (typeof reportCompare === "function")
  reportCompare(0, 0);