diff options
Diffstat (limited to 'intl/lwbrk/WordBreaker.h')
-rw-r--r-- | intl/lwbrk/WordBreaker.h | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/intl/lwbrk/WordBreaker.h b/intl/lwbrk/WordBreaker.h new file mode 100644 index 0000000000..f508e41ba6 --- /dev/null +++ b/intl/lwbrk/WordBreaker.h @@ -0,0 +1,65 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef mozilla_intl_WordBreaker_h__ +#define mozilla_intl_WordBreaker_h__ + +#include <cstdint> + +#define NS_WORDBREAKER_NEED_MORE_TEXT -1 + +namespace mozilla { +namespace intl { + +struct WordRange { + uint32_t mBegin; + uint32_t mEnd; +}; + +class WordBreaker final { + public: + // WordBreaker is a utility class with only static methods. No need to + // instantiate it. + WordBreaker() = delete; + ~WordBreaker() = delete; + + // Find the word boundary by scanning forward and backward from aPos. + // + // @return WordRange where mBegin equals to the offset to first character in + // the word and mEnd equals to the offset to the last character plus 1. mEnd + // can be aLen if the desired word is at the end of aText. + // + // If aPos is already at the end of aText or beyond, both mBegin and mEnd + // equals to aLen. + static WordRange FindWord(const char16_t* aText, uint32_t aLen, + uint32_t aPos); + + // Find the next word break opportunity starting from aPos + 1. It can return + // aLen if there's no break opportunity between [aPos + 1, aLen - 1]. + // + // If aPos is already at the end of aText or beyond, i.e. aPos >= aLen, return + // NS_WORDBREAKER_NEED_MORE_TEXT. + // + // DEPRECATED: Use WordBreakIteratorUtf16 instead. + static int32_t Next(const char16_t* aText, uint32_t aLen, uint32_t aPos); + + private: + enum WordBreakClass : uint8_t { + kWbClassSpace = 0, + kWbClassAlphaLetter, + kWbClassPunct, + kWbClassHanLetter, + kWbClassKatakanaLetter, + kWbClassHiraganaLetter, + kWbClassHWKatakanaLetter, + kWbClassScriptioContinua + }; + + static WordBreakClass GetClass(char16_t aChar); +}; + +} // namespace intl +} // namespace mozilla + +#endif /* mozilla_intl_WordBreaker_h__ */ |