diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /js/src/tests/non262/Intl/Segmenter/sentence-latin.js | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'js/src/tests/non262/Intl/Segmenter/sentence-latin.js')
-rw-r--r-- | js/src/tests/non262/Intl/Segmenter/sentence-latin.js | 96 |
1 files changed, 96 insertions, 0 deletions
diff --git a/js/src/tests/non262/Intl/Segmenter/sentence-latin.js b/js/src/tests/non262/Intl/Segmenter/sentence-latin.js new file mode 100644 index 0000000000..cebb029a40 --- /dev/null +++ b/js/src/tests/non262/Intl/Segmenter/sentence-latin.js @@ -0,0 +1,96 @@ +// |reftest| skip-if(!this.hasOwnProperty('Intl')||!this.Intl.Segmenter) + +// https://www.unicode.org/reports/tr29/#Sentence_Boundary_Rules + +const strings = { + // SB1, SB2 + "": [], + + // SB3 + "\r\n": ["\r\n"], + + // SB4 + "First paragraph.\nSecond paragraph.": ["First paragraph.\n", "Second paragraph."], + "First paragraph.\rSecond paragraph.": ["First paragraph.\r", "Second paragraph."], + "First paragraph.\r\nSecond paragraph.": ["First paragraph.\r\n", "Second paragraph."], + "First paragraph.\x85Second paragraph.": ["First paragraph.\x85", "Second paragraph."], + + // SB5 + "\xADWo\xADrd\xAD.\xAD": ["\xADWo\xADrd\xAD.\xAD"], + "Word.\n\xAD": ["Word.\n", "\xAD"], + "Word.\r\xAD\n": ["Word.\r", "\xAD\n"], + + // SB6 + ".2": [".2"], + "1.2": ["1.2"], + "!2": ["!", "2"], + "1!2": ["1!", "2"], + + // SB7 + "A.B": ["A.B"], + "a.B": ["a.B"], + "A. B": ["A. ", "B"], + "a. B": ["a. ", "B"], + + // SB8 + "#.a": ["#.a"], + "#. a": ["#. a"], + "#. # a": ["#. # a"], + "#. 1 a": ["#. 1 a"], + "#. , a": ["#. , a"], + "#. Aa": ["#. ", "Aa"], + + // SB8a + "Word..": ["Word.."], + "Word . , ": ["Word . , "], + "Word.'\t , ": ["Word.'\t , "], + + // SB9, SB10, SB11 + "Word.''": ["Word.''"], + "Word.'\t ": ["Word.'\t "], + "Word.'\t \n": ["Word.'\t \n"], +}; + +function assertSegments(string, sentences) { + let seg = segmenter.segment(string); + let segments = [...seg]; + + // The computed segments match the expected value. + assertEqArray(segments.map(({segment}) => segment), sentences); + + // |containing()| should return the same result. + for (let expected of segments) { + let {segment, index} = expected; + for (let i = index; i < index + segment.length; ++i) { + let actual = seg.containing(i); + assertDeepEq(actual, expected); + } + } +} + +let segmenter = new Intl.Segmenter("en", {granularity: "sentence"}); + +for (let [string, words] of Object.entries(strings)) { + assertSegments(string, words); +} + +// Locale-dependent sentence segmentation. +{ + // https://en.wikipedia.org/wiki/Greek_question_mark#Greek_question_mark + let string = "A sentence; semicolon separated."; + + let english = new Intl.Segmenter("en", {granularity: "sentence"}); + let greek = new Intl.Segmenter("el", {granularity: "sentence"}); + + // A single sentence in English. + assertEq([...english.segment(string)].length, 1); + + // ICU4C: Two sentences in Greek. + // assertEq([...greek.segment(string)].length, 2); + + // ICU4X: A single sentence in Greek. + assertEq([...greek.segment(string)].length, 1); +} + +if (typeof reportCompare === "function") + reportCompare(0, 0); |