/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include #include "gtest/gtest.h" #include "gtest/MozGTestBench.h" // For MOZ_GTEST_BENCH #include "mozilla/intl/LineBreaker.h" #include "mozilla/intl/Segmenter.h" #include "mozilla/Preferences.h" #include "nsAtom.h" #include "nsLineBreaker.h" #include "nsString.h" #include "nsTArray.h" namespace mozilla::intl { using mozilla::intl::LineBreakRule; using mozilla::intl::WordBreakRule; constexpr size_t kIterations = 100; static std::string ReadFileIntoString(const char* aPath) { std::ifstream file(aPath); std::stringstream sstr; sstr << file.rdbuf(); return sstr.str(); } class SegmenterPerf : public ::testing::Test { protected: void SetUp() override { // Test files are into xpcom/tests/gtest/wikipedia mArUtf8 = ReadFileIntoString("ar.txt"); mDeUtf8 = ReadFileIntoString("de.txt"); mJaUtf8 = ReadFileIntoString("ja.txt"); mRuUtf8 = ReadFileIntoString("ru.txt"); mThUtf8 = ReadFileIntoString("th.txt"); mTrUtf8 = ReadFileIntoString("tr.txt"); mViUtf8 = ReadFileIntoString("vi.txt"); CopyUTF8toUTF16(mArUtf8, mArUtf16); CopyUTF8toUTF16(mDeUtf8, mDeUtf16); CopyUTF8toUTF16(mJaUtf8, mJaUtf16); CopyUTF8toUTF16(mRuUtf8, mRuUtf16); CopyUTF8toUTF16(mThUtf8, mThUtf16); CopyUTF8toUTF16(mTrUtf8, mTrUtf16); CopyUTF8toUTF16(mViUtf8, mViUtf16); mAr = NS_Atomize(u"ar"); mDe = NS_Atomize(u"de"); mJa = NS_Atomize(u"ja"); mRu = NS_Atomize(u"ru"); mTh = NS_Atomize(u"th"); mTr = NS_Atomize(u"tr"); mVi = NS_Atomize(u"vi"); } public: std::string mArUtf8; std::string mDeUtf8; std::string mJaUtf8; std::string mRuUtf8; std::string mThUtf8; std::string mTrUtf8; std::string mViUtf8; nsString mArUtf16; nsString mDeUtf16; nsString mJaUtf16; nsString mRuUtf16; nsString mThUtf16; nsString mTrUtf16; nsString mViUtf16; RefPtr mAr; RefPtr mDe; RefPtr mJa; RefPtr mRu; RefPtr mTh; RefPtr mTr; RefPtr mVi; }; class AutoSetSegmenter final { public: explicit AutoSetSegmenter(bool aValue) { nsresult rv = mozilla::Preferences::SetBool("intl.icu4x.segmenter.enabled", aValue); EXPECT_TRUE(rv == NS_OK); } ~AutoSetSegmenter() { mozilla::Preferences::ClearUser("intl.icu4x.segmenter.enabled"); } }; static void TestSegmenterBench(const nsString& aStr, bool aIsJaOrZh, size_t aCount = kIterations) { nsTArray breakState; breakState.SetLength(aStr.Length()); for (size_t i = 0; i < aCount; i++) { LineBreaker::ComputeBreakPositions( aStr.get(), aStr.Length(), WordBreakRule::Normal, LineBreakRule::Strict, aIsJaOrZh, breakState.Elements()); } } MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakAROld, [this] { AutoSetSegmenter set(false); TestSegmenterBench(mArUtf16, false); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakDEOld, [this] { AutoSetSegmenter set(false); TestSegmenterBench(mDeUtf16, false); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakJAOld, [this] { AutoSetSegmenter set(false); TestSegmenterBench(mJaUtf16, true); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakRUOld, [this] { AutoSetSegmenter set(false); TestSegmenterBench(mRuUtf16, false); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakTHOld, [this] { AutoSetSegmenter set(false); TestSegmenterBench(mThUtf16, false); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakTROld, [this] { AutoSetSegmenter set(false); TestSegmenterBench(mTrUtf16, false); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakVIOld, [this] { AutoSetSegmenter set(false); TestSegmenterBench(mViUtf16, false); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakAR, [this] { AutoSetSegmenter set(false); TestSegmenterBench(mArUtf16, false); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakDE, [this] { AutoSetSegmenter set(true); TestSegmenterBench(mDeUtf16, false); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakJA, [this] { AutoSetSegmenter set(true); TestSegmenterBench(mJaUtf16, true); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakRU, [this] { AutoSetSegmenter set(true); TestSegmenterBench(mRuUtf16, false); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakTH, [this] { AutoSetSegmenter set(true); // LSTM segmenter is too slow TestSegmenterBench(mThUtf16, false, 3); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakTR, [this] { AutoSetSegmenter set(true); TestSegmenterBench(mTrUtf16, false); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakVI, [this] { AutoSetSegmenter set(true); TestSegmenterBench(mViUtf16, false); }); class LBSink final : public nsILineBreakSink { public: LBSink() = default; ~LBSink() = default; virtual void SetBreaks(uint32_t, uint32_t, uint8_t*) override {} virtual void SetCapitalization(uint32_t, uint32_t, bool*) override {} }; static void TestDOMSegmenterBench(const nsString& aStr, nsAtom* aLang, size_t aCount = kIterations) { LBSink sink; bool trailingBreak; for (size_t i = 0; i < aCount; i++) { nsLineBreaker breaker; breaker.AppendText(aLang, aStr.get(), aStr.Length(), 0, &sink); breaker.Reset(&trailingBreak); } } MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakAROld, [this] { AutoSetSegmenter set(false); TestDOMSegmenterBench(mArUtf16, mAr); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakDEOld, [this] { AutoSetSegmenter set(false); TestDOMSegmenterBench(mDeUtf16, mDe); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakJAOld, [this] { AutoSetSegmenter set(false); TestDOMSegmenterBench(mJaUtf16, mJa); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakRUOld, [this] { AutoSetSegmenter set(false); TestDOMSegmenterBench(mRuUtf16, mRu); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakTHOld, [this] { AutoSetSegmenter set(false); TestDOMSegmenterBench(mThUtf16, mTh); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakTROld, [this] { AutoSetSegmenter set(false); TestDOMSegmenterBench(mTrUtf16, mTr); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakVIOld, [this] { AutoSetSegmenter set(false); TestDOMSegmenterBench(mViUtf16, mVi); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakAR, [this] { AutoSetSegmenter set(true); TestDOMSegmenterBench(mArUtf16, mAr); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakDE, [this] { AutoSetSegmenter set(true); TestDOMSegmenterBench(mDeUtf16, mDe); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakJA, [this] { AutoSetSegmenter set(true); TestDOMSegmenterBench(mJaUtf16, mJa); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakRU, [this] { AutoSetSegmenter set(true); TestDOMSegmenterBench(mRuUtf16, mRu); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakTH, [this] { AutoSetSegmenter set(true); // LSTM segmenter is too slow TestDOMSegmenterBench(mThUtf16, mTh, 3); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakTR, [this] { AutoSetSegmenter set(true); TestDOMSegmenterBench(mTrUtf16, mTr); }); MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakVI, [this] { AutoSetSegmenter set(true); TestDOMSegmenterBench(mViUtf16, mVi); }); } // namespace mozilla::intl