diff options
Diffstat (limited to 'parser/htmlparser/nsScanner.h')
-rw-r--r-- | parser/htmlparser/nsScanner.h | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/parser/htmlparser/nsScanner.h b/parser/htmlparser/nsScanner.h new file mode 100644 index 0000000000..ca0e51bc5e --- /dev/null +++ b/parser/htmlparser/nsScanner.h @@ -0,0 +1,188 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * MODULE NOTES: + * @update gess 4/1/98 + * + * The scanner is a low-level service class that knows + * how to consume characters out of an (internal) stream. + * This class also offers a series of utility methods + * that most tokenizers want, such as readUntil() + * and SkipWhitespace(). + */ + +#ifndef SCANNER +#define SCANNER + +#include "nsCharsetSource.h" +#include "nsCOMPtr.h" +#include "nsString.h" +#include "nsIParser.h" +#include "mozilla/Encoding.h" +#include "nsScannerString.h" +#include "mozilla/CheckedInt.h" + +class nsReadEndCondition { + public: + const char16_t* mChars; + char16_t mFilter; + explicit nsReadEndCondition(const char16_t* aTerminateChars); + + private: + nsReadEndCondition(const nsReadEndCondition& aOther); // No copying + void operator=(const nsReadEndCondition& aOther); // No assigning +}; + +class nsScanner final { + using Encoding = mozilla::Encoding; + template <typename T> + using NotNull = mozilla::NotNull<T>; + + public: + /** + * Use this constructor for the XML fragment parsing case + */ + nsScanner(const nsAString& anHTMLString, bool aIncremental); + + /** + * Use this constructor if you want i/o to be based on + * a file (therefore a stream) or just data you provide via Append(). + */ + explicit nsScanner(nsIURI* aURI); + + ~nsScanner(); + + /** + * retrieve next char from internal input stream + * + * @update gess 3/25/98 + * @param ch is the char to accept new value + * @return error code reflecting read status + */ + nsresult GetChar(char16_t& ch); + + /** + * Records current offset position in input stream. This allows us + * to back up to this point if the need should arise, such as when + * tokenization gets interrupted. + * + * @update gess 5/12/98 + * @param + * @return + */ + int32_t Mark(void); + + /** + * Resets current offset position of input stream to marked position. + * This allows us to back up to this point if the need should arise, + * such as when tokenization gets interrupted. + * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST! + * + * @update gess 5/12/98 + * @param + * @return + */ + void RewindToMark(void); + + /** + * + * + * @update harishd 01/12/99 + * @param + * @return + */ + bool UngetReadable(const nsAString& aBuffer); + + /** + * + * + * @update gess 5/13/98 + * @param + * @return + */ + nsresult Append(const nsAString& aBuffer); + + /** + * + * + * @update gess 5/21/98 + * @param + * @return + */ + nsresult Append(const char* aBuffer, uint32_t aLen); + + /** + * Call this to copy bytes out of the scanner that have not yet been consumed + * by the tokenization process. + * + * @update gess 5/12/98 + * @param aCopyBuffer is where the scanner buffer will be copied to + * @return true if OK or false on OOM + */ + bool CopyUnusedData(nsString& aCopyBuffer); + + /** + * Retrieve the URI of the file that the scanner is reading from. + * In some cases, it's just a given name, because the scanner isn't + * really reading from a file. + */ + nsIURI* GetURI(void) const { return mURI; } + + static void SelfTest(); + + /** + * Use this setter to change the scanner's unicode decoder + * + * @update ftang 3/02/99 + * @param aCharset a normalized (alias resolved) charset name + * @param aCharsetSource- where the charset info came from + * @return + */ + nsresult SetDocumentCharset(NotNull<const Encoding*> aEncoding, + int32_t aSource); + + void BindSubstring(nsScannerSubstring& aSubstring, + const nsScannerIterator& aStart, + const nsScannerIterator& aEnd); + void CurrentPosition(nsScannerIterator& aPosition); + void EndReading(nsScannerIterator& aPosition); + void SetPosition(nsScannerIterator& aPosition, bool aTruncate = false); + + /** + * Internal method used to cause the internal buffer to + * be filled with data. + * + * @update gess4/3/98 + */ + bool IsIncremental(void) { return mIncremental; } + void SetIncremental(bool anIncrValue) { mIncremental = anIncrValue; } + + protected: + void AppendToBuffer(nsScannerString::Buffer* aBuffer); + bool AppendToBuffer(const nsAString& aStr) { + nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr); + if (!buf) return false; + AppendToBuffer(buf); + return true; + } + + mozilla::UniquePtr<nsScannerString> mSlidingBuffer; + nsScannerIterator mCurrentPosition; // The position we will next read from in + // the scanner buffer + nsScannerIterator + mMarkPosition; // The position last marked (we may rewind to here) + nsScannerIterator mEndPosition; // The current end of the scanner buffer + nsCOMPtr<nsIURI> mURI; + bool mIncremental; + int32_t mCharsetSource = kCharsetUninitialized; + nsCString mCharset; + mozilla::UniquePtr<mozilla::Decoder> mUnicodeDecoder; + + private: + nsScanner& operator=(const nsScanner&); // Not implemented. +}; + +#endif |