diff options
Diffstat (limited to 'intl/icu/source/common/lstmbe.h')
-rw-r--r-- | intl/icu/source/common/lstmbe.h | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/intl/icu/source/common/lstmbe.h b/intl/icu/source/common/lstmbe.h new file mode 100644 index 0000000000..77c97d85fa --- /dev/null +++ b/intl/icu/source/common/lstmbe.h @@ -0,0 +1,88 @@ +// © 2021 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef LSTMBE_H +#define LSTMBE_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/uniset.h" +#include "unicode/ures.h" +#include "unicode/utext.h" +#include "unicode/utypes.h" + +#include "brkeng.h" +#include "dictbe.h" +#include "uvectr32.h" + +U_NAMESPACE_BEGIN + +class Vectorizer; +struct LSTMData; + +/******************************************************************* + * LSTMBreakEngine + */ + +/** + * <p>LSTMBreakEngine is a kind of DictionaryBreakEngine that uses a + * LSTM to determine language-specific breaks.</p> + * + * <p>After it is constructed a LSTMBreakEngine may be shared between + * threads without synchronization.</p> + */ +class LSTMBreakEngine : public DictionaryBreakEngine { +public: + /** + * <p>Constructor.</p> + */ + LSTMBreakEngine(const LSTMData* data, const UnicodeSet& set, UErrorCode &status); + + /** + * <p>Virtual destructor.</p> + */ + virtual ~LSTMBreakEngine(); + + virtual const char16_t* name() const; + +protected: + /** + * <p>Divide up a range of known dictionary characters handled by this break engine.</p> + * + * @param text A UText representing the text + * @param rangeStart The start of the range of dictionary characters + * @param rangeEnd The end of the range of dictionary characters + * @param foundBreaks Output of C array of int32_t break positions, or 0 + * @param status Information on any errors encountered. + * @return The number of breaks found + */ + virtual int32_t divideUpDictionaryRange(UText *text, + int32_t rangeStart, + int32_t rangeEnd, + UVector32 &foundBreaks, + UBool isPhraseBreaking, + UErrorCode& status) const override; +private: + const LSTMData* fData; + const Vectorizer* fVectorizer; +}; + +U_CAPI const LanguageBreakEngine* U_EXPORT2 CreateLSTMBreakEngine( + UScriptCode script, const LSTMData* data, UErrorCode& status); + +U_CAPI const LSTMData* U_EXPORT2 CreateLSTMData( + UResourceBundle* rb, UErrorCode& status); + +U_CAPI const LSTMData* U_EXPORT2 CreateLSTMDataForScript( + UScriptCode script, UErrorCode& status); + +U_CAPI void U_EXPORT2 DeleteLSTMData(const LSTMData* data); +U_CAPI const char16_t* U_EXPORT2 LSTMDataName(const LSTMData* data); + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ + +#endif /* LSTMBE_H */ |