diff options
Diffstat (limited to 'intl/components/src/Bidi.h')
-rw-r--r-- | intl/components/src/Bidi.h | 160 |
1 files changed, 160 insertions, 0 deletions
diff --git a/intl/components/src/Bidi.h b/intl/components/src/Bidi.h new file mode 100644 index 0000000000..9b7fba73e2 --- /dev/null +++ b/intl/components/src/Bidi.h @@ -0,0 +1,160 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef intl_components_Bidi_h_ +#define intl_components_Bidi_h_ + +#include "mozilla/intl/BidiEmbeddingLevel.h" +#include "mozilla/intl/ICU4CGlue.h" + +struct UBiDi; + +namespace mozilla::intl { + +/** + * This component is a Mozilla-focused API for working with bidirectional (bidi) + * text. Text is commonly displayed left to right (LTR), especially for + * Latin-based alphabets. However, languages like Arabic and Hebrew displays + * text right to left (RTL). When displaying text, LTR and RTL text can be + * combined together in the same paragraph. This class gives tools for working + * with unidirectional, and mixed direction paragraphs. + * + * See the Unicode Bidirectional Algorithm document for implementation details: + * https://unicode.org/reports/tr9/ + */ +class Bidi final { + public: + Bidi(); + ~Bidi(); + + // Not copyable or movable + Bidi(const Bidi&) = delete; + Bidi& operator=(const Bidi&) = delete; + + /** + * This enum indicates the text direction for the set paragraph. Some + * paragraphs are unidirectional, where they only have one direction, or a + * paragraph could use both LTR and RTL. In this case the paragraph's + * direction would be mixed. + */ + enum class ParagraphDirection { LTR, RTL, Mixed }; + + /** + * Set the current paragraph of text to analyze for its bidi properties. This + * performs the Unicode bidi algorithm as specified by: + * https://unicode.org/reports/tr9/ + * + * After setting the text, the other getter methods can be used to find out + * the directionality of the paragraph text. + */ + ICUResult SetParagraph(Span<const char16_t> aParagraph, + BidiEmbeddingLevel aLevel); + + /** + * Get the embedding level for the paragraph that was set by SetParagraph. + */ + BidiEmbeddingLevel GetParagraphEmbeddingLevel() const; + + /** + * Get the directionality of the paragraph text that was set by SetParagraph. + */ + ParagraphDirection GetParagraphDirection() const; + + /** + * Get the number of runs. This function may invoke the actual reordering on + * the Bidi object, after SetParagraph may have resolved only the levels of + * the text. Therefore, `CountRuns` may have to allocate memory, and may fail + * doing so. + */ + Result<int32_t, ICUError> CountRuns(); + + /** + * Get the next logical run. The logical runs are a run of text that has the + * same directionality and embedding level. These runs are in memory order, + * and not in display order. + * + * Important! `Bidi::CountRuns` must be called before calling this method. + * + * @param aLogicalStart is the offset into the paragraph text that marks the + * logical start of the text. + * @param aLogicalLimitOut is an out param that is the length of the string + * that makes up the logical run. + * @param aLevelOut is an out parameter that returns the embedding level for + * the run + */ + void GetLogicalRun(int32_t aLogicalStart, int32_t* aLogicalLimitOut, + BidiEmbeddingLevel* aLevelOut); + + /** + * This is a convenience function that does not use the ICU Bidi object. + * It is intended to be used for when an application has determined the + * embedding levels of objects (character sequences) and just needs to have + * them reordered (L2). + * + * @param aLevels is an array with `aLength` levels that have been + * determined by the application. + * + * @param aLength is the number of levels in the array, or, semantically, + * the number of objects to be reordered. It must be greater than 0. + * + * @param aIndexMap is a pointer to an array of `aLength` + * indexes which will reflect the reordering of the characters. + * The array does not need to be initialized. + * The index map will result in + * `aIndexMap[aVisualIndex]==aLogicalIndex`. + */ + static void ReorderVisual(const BidiEmbeddingLevel* aLevels, int32_t aLength, + int32_t* aIndexMap); + + /** + * This enum indicates the bidi character type of the first strong character + * for the set paragraph. + * LTR: bidi character type 'L'. + * RTL: bidi character type 'R' or 'AL'. + * Neutral: The rest of bidi character types. + */ + enum class BaseDirection { LTR, RTL, Neutral }; + + /** + * Get the base direction of the paragraph. + */ + static BaseDirection GetBaseDirection(Span<const char16_t> aParagraph); + + /** + * Get one run's logical start, length, and directionality. In an RTL run, the + * character at the logical start is visually on the right of the displayed + * run. The length is the number of characters in the run. + * `Bidi::CountRuns` should be called before the runs are retrieved. + * + * @param aRunIndex is the number of the run in visual order, in the + * range `[0..CountRuns-1]`. + * + * @param aLogicalStart is the first logical character index in the text. + * The pointer may be `nullptr` if this index is not needed. + * + * @param aLength is the number of characters (at least one) in the run. + * The pointer may be `nullptr` if this is not needed. + * + * Note that in right-to-left runs, the code places modifier letters before + * base characters and second surrogates before first ones. + */ + BidiDirection GetVisualRun(int32_t aRunIndex, int32_t* aLogicalStart, + int32_t* aLength); + + private: + ICUPointer<UBiDi> mBidi = ICUPointer<UBiDi>(nullptr); + + /** + * An array of levels that is the same length as the paragraph from + * `Bidi::SetParagraph`. + */ + const BidiEmbeddingLevel* mLevels = nullptr; + + /** + * The length of the paragraph from `Bidi::SetParagraph`. + */ + int32_t mLength = 0; +}; + +} // namespace mozilla::intl +#endif |