diff options
Diffstat (limited to 'intl/lwbrk/gtest/TestSegmenterPerf.cpp')
-rw-r--r-- | intl/lwbrk/gtest/TestSegmenterPerf.cpp | 276 |
1 files changed, 276 insertions, 0 deletions
diff --git a/intl/lwbrk/gtest/TestSegmenterPerf.cpp b/intl/lwbrk/gtest/TestSegmenterPerf.cpp new file mode 100644 index 0000000000..772e284fa8 --- /dev/null +++ b/intl/lwbrk/gtest/TestSegmenterPerf.cpp @@ -0,0 +1,276 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <fstream> + +#include "gtest/gtest.h" +#include "gtest/MozGTestBench.h" // For MOZ_GTEST_BENCH +#include "mozilla/intl/LineBreaker.h" +#include "mozilla/intl/Segmenter.h" +#include "mozilla/Preferences.h" +#include "nsAtom.h" +#include "nsLineBreaker.h" +#include "nsString.h" +#include "nsTArray.h" + +namespace mozilla::intl { + +using mozilla::intl::LineBreakRule; +using mozilla::intl::WordBreakRule; + +constexpr size_t kIterations = 100; + +static std::string ReadFileIntoString(const char* aPath) { + std::ifstream file(aPath); + std::stringstream sstr; + sstr << file.rdbuf(); + return sstr.str(); +} + +class SegmenterPerf : public ::testing::Test { + protected: + void SetUp() override { + // Test files are into xpcom/tests/gtest/wikipedia + mArUtf8 = ReadFileIntoString("ar.txt"); + mDeUtf8 = ReadFileIntoString("de.txt"); + mJaUtf8 = ReadFileIntoString("ja.txt"); + mRuUtf8 = ReadFileIntoString("ru.txt"); + mThUtf8 = ReadFileIntoString("th.txt"); + mTrUtf8 = ReadFileIntoString("tr.txt"); + mViUtf8 = ReadFileIntoString("vi.txt"); + + CopyUTF8toUTF16(mArUtf8, mArUtf16); + CopyUTF8toUTF16(mDeUtf8, mDeUtf16); + CopyUTF8toUTF16(mJaUtf8, mJaUtf16); + CopyUTF8toUTF16(mRuUtf8, mRuUtf16); + CopyUTF8toUTF16(mThUtf8, mThUtf16); + CopyUTF8toUTF16(mTrUtf8, mTrUtf16); + CopyUTF8toUTF16(mViUtf8, mViUtf16); + + mAr = NS_Atomize(u"ar"); + mDe = NS_Atomize(u"de"); + mJa = NS_Atomize(u"ja"); + mRu = NS_Atomize(u"ru"); + mTh = NS_Atomize(u"th"); + mTr = NS_Atomize(u"tr"); + mVi = NS_Atomize(u"vi"); + } + + public: + std::string mArUtf8; + std::string mDeUtf8; + std::string mJaUtf8; + std::string mRuUtf8; + std::string mThUtf8; + std::string mTrUtf8; + std::string mViUtf8; + + nsString mArUtf16; + nsString mDeUtf16; + nsString mJaUtf16; + nsString mRuUtf16; + nsString mThUtf16; + nsString mTrUtf16; + nsString mViUtf16; + + RefPtr<nsAtom> mAr; + RefPtr<nsAtom> mDe; + RefPtr<nsAtom> mJa; + RefPtr<nsAtom> mRu; + RefPtr<nsAtom> mTh; + RefPtr<nsAtom> mTr; + RefPtr<nsAtom> mVi; +}; + +class AutoSetSegmenter final { + public: + explicit AutoSetSegmenter(bool aValue) { + nsresult rv = + mozilla::Preferences::SetBool("intl.icu4x.segmenter.enabled", aValue); + EXPECT_TRUE(rv == NS_OK); + } + + ~AutoSetSegmenter() { + mozilla::Preferences::ClearUser("intl.icu4x.segmenter.enabled"); + } +}; + +static void TestSegmenterBench(const nsString& aStr, bool aIsJaOrZh, + size_t aCount = kIterations) { + nsTArray<uint8_t> breakState; + breakState.SetLength(aStr.Length()); + + for (size_t i = 0; i < aCount; i++) { + LineBreaker::ComputeBreakPositions( + aStr.get(), aStr.Length(), WordBreakRule::Normal, LineBreakRule::Strict, + aIsJaOrZh, breakState.Elements()); + } +} + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakAROld, [this] { + AutoSetSegmenter set(false); + TestSegmenterBench(mArUtf16, false); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakDEOld, [this] { + AutoSetSegmenter set(false); + TestSegmenterBench(mDeUtf16, false); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakJAOld, [this] { + AutoSetSegmenter set(false); + TestSegmenterBench(mJaUtf16, true); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakRUOld, [this] { + AutoSetSegmenter set(false); + TestSegmenterBench(mRuUtf16, false); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakTHOld, [this] { + AutoSetSegmenter set(false); + TestSegmenterBench(mThUtf16, false); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakTROld, [this] { + AutoSetSegmenter set(false); + TestSegmenterBench(mTrUtf16, false); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakVIOld, [this] { + AutoSetSegmenter set(false); + TestSegmenterBench(mViUtf16, false); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakAR, [this] { + AutoSetSegmenter set(false); + TestSegmenterBench(mArUtf16, false); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakDE, [this] { + AutoSetSegmenter set(true); + TestSegmenterBench(mDeUtf16, false); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakJA, [this] { + AutoSetSegmenter set(true); + TestSegmenterBench(mJaUtf16, true); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakRU, [this] { + AutoSetSegmenter set(true); + TestSegmenterBench(mRuUtf16, false); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakTH, [this] { + AutoSetSegmenter set(true); + // LSTM segmenter is too slow + TestSegmenterBench(mThUtf16, false, 3); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakTR, [this] { + AutoSetSegmenter set(true); + TestSegmenterBench(mTrUtf16, false); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakVI, [this] { + AutoSetSegmenter set(true); + TestSegmenterBench(mViUtf16, false); +}); + +class LBSink final : public nsILineBreakSink { + public: + LBSink() = default; + ~LBSink() = default; + + virtual void SetBreaks(uint32_t, uint32_t, uint8_t*) override {} + virtual void SetCapitalization(uint32_t, uint32_t, bool*) override {} +}; + +static void TestDOMSegmenterBench(const nsString& aStr, nsAtom* aLang, + size_t aCount = kIterations) { + LBSink sink; + bool trailingBreak; + + for (size_t i = 0; i < aCount; i++) { + nsLineBreaker breaker; + breaker.AppendText(aLang, aStr.get(), aStr.Length(), 0, &sink); + breaker.Reset(&trailingBreak); + } +} + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakAROld, [this] { + AutoSetSegmenter set(false); + TestDOMSegmenterBench(mArUtf16, mAr); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakDEOld, [this] { + AutoSetSegmenter set(false); + TestDOMSegmenterBench(mDeUtf16, mDe); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakJAOld, [this] { + AutoSetSegmenter set(false); + TestDOMSegmenterBench(mJaUtf16, mJa); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakRUOld, [this] { + AutoSetSegmenter set(false); + TestDOMSegmenterBench(mRuUtf16, mRu); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakTHOld, [this] { + AutoSetSegmenter set(false); + TestDOMSegmenterBench(mThUtf16, mTh); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakTROld, [this] { + AutoSetSegmenter set(false); + TestDOMSegmenterBench(mTrUtf16, mTr); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakVIOld, [this] { + AutoSetSegmenter set(false); + TestDOMSegmenterBench(mViUtf16, mVi); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakAR, [this] { + AutoSetSegmenter set(true); + TestDOMSegmenterBench(mArUtf16, mAr); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakDE, [this] { + AutoSetSegmenter set(true); + TestDOMSegmenterBench(mDeUtf16, mDe); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakJA, [this] { + AutoSetSegmenter set(true); + TestDOMSegmenterBench(mJaUtf16, mJa); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakRU, [this] { + AutoSetSegmenter set(true); + TestDOMSegmenterBench(mRuUtf16, mRu); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakTH, [this] { + AutoSetSegmenter set(true); + // LSTM segmenter is too slow + TestDOMSegmenterBench(mThUtf16, mTh, 3); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakTR, [this] { + AutoSetSegmenter set(true); + TestDOMSegmenterBench(mTrUtf16, mTr); +}); + +MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakVI, [this] { + AutoSetSegmenter set(true); + TestDOMSegmenterBench(mViUtf16, mVi); +}); + +} // namespace mozilla::intl |