summaryrefslogtreecommitdiffstats
path: root/intl/lwbrk/gtest/TestSegmenterPerf.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/lwbrk/gtest/TestSegmenterPerf.cpp')
-rw-r--r--intl/lwbrk/gtest/TestSegmenterPerf.cpp276
1 files changed, 276 insertions, 0 deletions
diff --git a/intl/lwbrk/gtest/TestSegmenterPerf.cpp b/intl/lwbrk/gtest/TestSegmenterPerf.cpp
new file mode 100644
index 0000000000..772e284fa8
--- /dev/null
+++ b/intl/lwbrk/gtest/TestSegmenterPerf.cpp
@@ -0,0 +1,276 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <fstream>
+
+#include "gtest/gtest.h"
+#include "gtest/MozGTestBench.h" // For MOZ_GTEST_BENCH
+#include "mozilla/intl/LineBreaker.h"
+#include "mozilla/intl/Segmenter.h"
+#include "mozilla/Preferences.h"
+#include "nsAtom.h"
+#include "nsLineBreaker.h"
+#include "nsString.h"
+#include "nsTArray.h"
+
+namespace mozilla::intl {
+
+using mozilla::intl::LineBreakRule;
+using mozilla::intl::WordBreakRule;
+
+constexpr size_t kIterations = 100;
+
+static std::string ReadFileIntoString(const char* aPath) {
+ std::ifstream file(aPath);
+ std::stringstream sstr;
+ sstr << file.rdbuf();
+ return sstr.str();
+}
+
+class SegmenterPerf : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ // Test files are into xpcom/tests/gtest/wikipedia
+ mArUtf8 = ReadFileIntoString("ar.txt");
+ mDeUtf8 = ReadFileIntoString("de.txt");
+ mJaUtf8 = ReadFileIntoString("ja.txt");
+ mRuUtf8 = ReadFileIntoString("ru.txt");
+ mThUtf8 = ReadFileIntoString("th.txt");
+ mTrUtf8 = ReadFileIntoString("tr.txt");
+ mViUtf8 = ReadFileIntoString("vi.txt");
+
+ CopyUTF8toUTF16(mArUtf8, mArUtf16);
+ CopyUTF8toUTF16(mDeUtf8, mDeUtf16);
+ CopyUTF8toUTF16(mJaUtf8, mJaUtf16);
+ CopyUTF8toUTF16(mRuUtf8, mRuUtf16);
+ CopyUTF8toUTF16(mThUtf8, mThUtf16);
+ CopyUTF8toUTF16(mTrUtf8, mTrUtf16);
+ CopyUTF8toUTF16(mViUtf8, mViUtf16);
+
+ mAr = NS_Atomize(u"ar");
+ mDe = NS_Atomize(u"de");
+ mJa = NS_Atomize(u"ja");
+ mRu = NS_Atomize(u"ru");
+ mTh = NS_Atomize(u"th");
+ mTr = NS_Atomize(u"tr");
+ mVi = NS_Atomize(u"vi");
+ }
+
+ public:
+ std::string mArUtf8;
+ std::string mDeUtf8;
+ std::string mJaUtf8;
+ std::string mRuUtf8;
+ std::string mThUtf8;
+ std::string mTrUtf8;
+ std::string mViUtf8;
+
+ nsString mArUtf16;
+ nsString mDeUtf16;
+ nsString mJaUtf16;
+ nsString mRuUtf16;
+ nsString mThUtf16;
+ nsString mTrUtf16;
+ nsString mViUtf16;
+
+ RefPtr<nsAtom> mAr;
+ RefPtr<nsAtom> mDe;
+ RefPtr<nsAtom> mJa;
+ RefPtr<nsAtom> mRu;
+ RefPtr<nsAtom> mTh;
+ RefPtr<nsAtom> mTr;
+ RefPtr<nsAtom> mVi;
+};
+
+class AutoSetSegmenter final {
+ public:
+ explicit AutoSetSegmenter(bool aValue) {
+ nsresult rv =
+ mozilla::Preferences::SetBool("intl.icu4x.segmenter.enabled", aValue);
+ EXPECT_TRUE(rv == NS_OK);
+ }
+
+ ~AutoSetSegmenter() {
+ mozilla::Preferences::ClearUser("intl.icu4x.segmenter.enabled");
+ }
+};
+
+static void TestSegmenterBench(const nsString& aStr, bool aIsJaOrZh,
+ size_t aCount = kIterations) {
+ nsTArray<uint8_t> breakState;
+ breakState.SetLength(aStr.Length());
+
+ for (size_t i = 0; i < aCount; i++) {
+ LineBreaker::ComputeBreakPositions(
+ aStr.get(), aStr.Length(), WordBreakRule::Normal, LineBreakRule::Strict,
+ aIsJaOrZh, breakState.Elements());
+ }
+}
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakAROld, [this] {
+ AutoSetSegmenter set(false);
+ TestSegmenterBench(mArUtf16, false);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakDEOld, [this] {
+ AutoSetSegmenter set(false);
+ TestSegmenterBench(mDeUtf16, false);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakJAOld, [this] {
+ AutoSetSegmenter set(false);
+ TestSegmenterBench(mJaUtf16, true);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakRUOld, [this] {
+ AutoSetSegmenter set(false);
+ TestSegmenterBench(mRuUtf16, false);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakTHOld, [this] {
+ AutoSetSegmenter set(false);
+ TestSegmenterBench(mThUtf16, false);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakTROld, [this] {
+ AutoSetSegmenter set(false);
+ TestSegmenterBench(mTrUtf16, false);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakVIOld, [this] {
+ AutoSetSegmenter set(false);
+ TestSegmenterBench(mViUtf16, false);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakAR, [this] {
+ AutoSetSegmenter set(false);
+ TestSegmenterBench(mArUtf16, false);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakDE, [this] {
+ AutoSetSegmenter set(true);
+ TestSegmenterBench(mDeUtf16, false);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakJA, [this] {
+ AutoSetSegmenter set(true);
+ TestSegmenterBench(mJaUtf16, true);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakRU, [this] {
+ AutoSetSegmenter set(true);
+ TestSegmenterBench(mRuUtf16, false);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakTH, [this] {
+ AutoSetSegmenter set(true);
+ // LSTM segmenter is too slow
+ TestSegmenterBench(mThUtf16, false, 3);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakTR, [this] {
+ AutoSetSegmenter set(true);
+ TestSegmenterBench(mTrUtf16, false);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakVI, [this] {
+ AutoSetSegmenter set(true);
+ TestSegmenterBench(mViUtf16, false);
+});
+
+class LBSink final : public nsILineBreakSink {
+ public:
+ LBSink() = default;
+ ~LBSink() = default;
+
+ virtual void SetBreaks(uint32_t, uint32_t, uint8_t*) override {}
+ virtual void SetCapitalization(uint32_t, uint32_t, bool*) override {}
+};
+
+static void TestDOMSegmenterBench(const nsString& aStr, nsAtom* aLang,
+ size_t aCount = kIterations) {
+ LBSink sink;
+ bool trailingBreak;
+
+ for (size_t i = 0; i < aCount; i++) {
+ nsLineBreaker breaker;
+ breaker.AppendText(aLang, aStr.get(), aStr.Length(), 0, &sink);
+ breaker.Reset(&trailingBreak);
+ }
+}
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakAROld, [this] {
+ AutoSetSegmenter set(false);
+ TestDOMSegmenterBench(mArUtf16, mAr);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakDEOld, [this] {
+ AutoSetSegmenter set(false);
+ TestDOMSegmenterBench(mDeUtf16, mDe);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakJAOld, [this] {
+ AutoSetSegmenter set(false);
+ TestDOMSegmenterBench(mJaUtf16, mJa);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakRUOld, [this] {
+ AutoSetSegmenter set(false);
+ TestDOMSegmenterBench(mRuUtf16, mRu);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakTHOld, [this] {
+ AutoSetSegmenter set(false);
+ TestDOMSegmenterBench(mThUtf16, mTh);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakTROld, [this] {
+ AutoSetSegmenter set(false);
+ TestDOMSegmenterBench(mTrUtf16, mTr);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakVIOld, [this] {
+ AutoSetSegmenter set(false);
+ TestDOMSegmenterBench(mViUtf16, mVi);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakAR, [this] {
+ AutoSetSegmenter set(true);
+ TestDOMSegmenterBench(mArUtf16, mAr);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakDE, [this] {
+ AutoSetSegmenter set(true);
+ TestDOMSegmenterBench(mDeUtf16, mDe);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakJA, [this] {
+ AutoSetSegmenter set(true);
+ TestDOMSegmenterBench(mJaUtf16, mJa);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakRU, [this] {
+ AutoSetSegmenter set(true);
+ TestDOMSegmenterBench(mRuUtf16, mRu);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakTH, [this] {
+ AutoSetSegmenter set(true);
+ // LSTM segmenter is too slow
+ TestDOMSegmenterBench(mThUtf16, mTh, 3);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakTR, [this] {
+ AutoSetSegmenter set(true);
+ TestDOMSegmenterBench(mTrUtf16, mTr);
+});
+
+MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakVI, [this] {
+ AutoSetSegmenter set(true);
+ TestDOMSegmenterBench(mViUtf16, mVi);
+});
+
+} // namespace mozilla::intl