diff options
Diffstat (limited to 'extensions/spellcheck/src/mozInlineSpellWordUtil.h')
-rw-r--r-- | extensions/spellcheck/src/mozInlineSpellWordUtil.h | 211 |
1 files changed, 211 insertions, 0 deletions
diff --git a/extensions/spellcheck/src/mozInlineSpellWordUtil.h b/extensions/spellcheck/src/mozInlineSpellWordUtil.h new file mode 100644 index 0000000000..78ab9f0b7d --- /dev/null +++ b/extensions/spellcheck/src/mozInlineSpellWordUtil.h @@ -0,0 +1,211 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozInlineSpellWordUtil_h +#define mozInlineSpellWordUtil_h + +#include "mozilla/Attributes.h" +#include "mozilla/dom/Document.h" +#include "nsCOMPtr.h" +#include "nsString.h" +#include "nsTArray.h" + +//#define DEBUG_SPELLCHECK + +class nsRange; +class nsINode; + +namespace mozilla { +class TextEditor; + +namespace dom { +class Document; +} +} // namespace mozilla + +struct NodeOffset { + nsCOMPtr<nsINode> mNode; + int32_t mOffset; + + NodeOffset() : mOffset(0) {} + NodeOffset(nsINode* aNode, int32_t aOffset) + : mNode(aNode), mOffset(aOffset) {} + + bool operator==(const NodeOffset& aOther) const { + return mNode == aOther.mNode && mOffset == aOther.mOffset; + } + + bool operator!=(const NodeOffset& aOther) const { return !(*this == aOther); } + + nsINode* Node() const { return mNode.get(); } + int32_t Offset() const { return mOffset; } +}; + +class NodeOffsetRange { + private: + NodeOffset mBegin; + NodeOffset mEnd; + + public: + NodeOffsetRange() {} + NodeOffsetRange(NodeOffset b, NodeOffset e) : mBegin(b), mEnd(e) {} + + NodeOffset Begin() const { return mBegin; } + + NodeOffset End() const { return mEnd; } +}; + +/** + * This class extracts text from the DOM and builds it into a single string. + * The string includes whitespace breaks whereever non-inline elements begin + * and end. This string is broken into "real words", following somewhat + * complex rules; for example substrings that look like URLs or + * email addresses are treated as single words, but otherwise many kinds of + * punctuation are treated as word separators. GetNextWord provides a way + * to iterate over these "real words". + * + * The basic operation is: + * + * 1. Call Init with the weak pointer to the editor that you're using. + * 2. Call SetPositionAndEnd to to initialize the current position inside the + * previously given range and set where you want to stop spellchecking. + * We'll stop at the word boundary after that. If SetEnd is not called, + * we'll stop at the end of the root element. + * 3. Call GetNextWord over and over until it returns false. + */ + +class MOZ_STACK_CLASS mozInlineSpellWordUtil { + public: + mozInlineSpellWordUtil() + : mIsContentEditableOrDesignMode(false), + mRootNode(nullptr), + mSoftBegin(nullptr, 0), + mSoftEnd(nullptr, 0), + mNextWordIndex(-1), + mSoftTextValid(false) {} + + nsresult Init(mozilla::TextEditor* aTextEditor); + + // sets the current position, this should be inside the range. If we are in + // the middle of a word, we'll move to its start. + nsresult SetPositionAndEnd(nsINode* aPositionNode, int32_t aPositionOffset, + nsINode* aEndNode, int32_t aEndOffset); + + // Given a point inside or immediately following a word, this returns the + // DOM range that exactly encloses that word's characters. The current + // position will be at the end of the word. This will find the previous + // word if the current position is space, so if you care that the point is + // inside the word, you should check the range. + // + // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called + // before you actually generate the range you are interested in and iterate + // the words in it. + nsresult GetRangeForWord(nsINode* aWordNode, int32_t aWordOffset, + nsRange** aRange); + + // Convenience functions, object must be initialized + nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd, nsRange** aRange); + static already_AddRefed<nsRange> MakeRange(const NodeOffsetRange& aRange); + + // Moves to the the next word in the range, and retrieves it's text and range. + // false is returned when we are done checking. + // aSkipChecking will be set if the word is "special" and shouldn't be + // checked (e.g., an email address). + bool GetNextWord(nsAString& aText, NodeOffsetRange* aNodeOffsetRange, + bool* aSkipChecking); + + // Call to normalize some punctuation. This function takes an autostring + // so we can access characters directly. + static void NormalizeWord(nsAString& aWord); + + mozilla::dom::Document* GetDocument() const { return mDocument; } + nsINode* GetRootNode() { return mRootNode; } + + private: + // cached stuff for the editor, set by Init + RefPtr<mozilla::dom::Document> mDocument; + bool mIsContentEditableOrDesignMode; + + // range to check, see SetPosition and SetEnd + nsINode* mRootNode; + NodeOffset mSoftBegin; + NodeOffset mSoftEnd; + + // DOM text covering the soft range, with newlines added at block boundaries + nsString mSoftText; + // A list of where we extracted text from, ordered by mSoftTextOffset. A given + // DOM node appears at most once in this list. + struct DOMTextMapping { + NodeOffset mNodeOffset; + int32_t mSoftTextOffset; + int32_t mLength; + + DOMTextMapping(NodeOffset aNodeOffset, int32_t aSoftTextOffset, + int32_t aLength) + : mNodeOffset(aNodeOffset), + mSoftTextOffset(aSoftTextOffset), + mLength(aLength) {} + }; + nsTArray<DOMTextMapping> mSoftTextDOMMapping; + + // A list of the "real words" in mSoftText, ordered by mSoftTextOffset + struct RealWord { + int32_t mSoftTextOffset; + uint32_t mLength : 31; + uint32_t mCheckableWord : 1; + + RealWord(int32_t aOffset, uint32_t aLength, bool aCheckable) + : mSoftTextOffset(aOffset), + mLength(aLength), + mCheckableWord(aCheckable) { + static_assert(sizeof(RealWord) == 8, + "RealWord should be limited to 8 bytes"); + MOZ_ASSERT(aLength < INT32_MAX, + "Word length is too large to fit in the bitfield"); + } + + int32_t EndOffset() const { return mSoftTextOffset + mLength; } + }; + nsTArray<RealWord> mRealWords; + int32_t mNextWordIndex; + + bool mSoftTextValid; + + void InvalidateWords() { mSoftTextValid = false; } + nsresult EnsureWords(); + + int32_t MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset); + // Map an offset into mSoftText to a DOM position. Note that two DOM positions + // can map to the same mSoftText offset, e.g. given nodes A=aaaa and B=bbbb + // forming aaaabbbb, (A,4) and (B,0) give the same string offset. So, + // aHintBefore controls which position we return ... if aHint is eEnd + // then the position indicates the END of a range so we return (A,4). + // Otherwise the position indicates the START of a range so we return (B,0). + enum DOMMapHint { HINT_BEGIN, HINT_END }; + NodeOffset MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset, + DOMMapHint aHint); + // Finds the index of the real word containing aSoftTextOffset, or -1 if none + // If it's exactly between two words, then if aHint is HINT_BEGIN, return the + // later word (favouring the assumption that it's the BEGINning of a word), + // otherwise return the earlier word (assuming it's the END of a word). + // If aSearchForward is true, then if we don't find a word at the given + // position, search forward until we do find a word and return that (if + // found). + int32_t FindRealWordContaining(int32_t aSoftTextOffset, DOMMapHint aHint, + bool aSearchForward); + + // build mSoftText and mSoftTextDOMMapping + void BuildSoftText(); + // Build mRealWords array + nsresult BuildRealWords(); + + nsresult SplitDOMWord(int32_t aStart, int32_t aEnd); + + nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange); + void MakeNodeOffsetRangeForWord(const RealWord& aWord, + NodeOffsetRange* aNodeOffsetRange); +}; + +#endif |