summaryrefslogtreecommitdiffstats
path: root/parser/html/nsHtml5StreamParser.h
blob: 0dacf257bf3411a219f75ee89a6bb93401f6269f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef nsHtml5StreamParser_h
#define nsHtml5StreamParser_h

#include <tuple>

#include "MainThreadUtils.h"
#include "mozilla/AlreadyAddRefed.h"
#include "mozilla/Assertions.h"
#include "mozilla/Encoding.h"
#include "mozilla/Mutex.h"
#include "mozilla/NotNull.h"
#include "mozilla/RefPtr.h"
#include "mozilla/Span.h"
#include "mozilla/UniquePtr.h"
#include "nsCharsetSource.h"
#include "nsCOMPtr.h"
#include "nsCycleCollectionParticipant.h"
#include "nsDebug.h"
#include "nsHtml5AtomTable.h"
#include "nsIRequestObserver.h"
#include "nsISerialEventTarget.h"
#include "nsISupports.h"
#include "nsStringFwd.h"
#include "nsTArray.h"
#include "nscore.h"

class nsCycleCollectionTraversalCallback;
class nsHtml5OwningUTF16Buffer;
class nsHtml5Parser;
class nsHtml5Speculation;
class nsHtml5String;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;
class nsHtml5TreeOpExecutor;
class nsIChannel;
class nsIInputStream;
class nsIRequest;
class nsIRunnable;
class nsITimer;
class nsIURI;

namespace mozilla {
class EncodingDetector;
template <typename T>
class Buffer;

namespace dom {
class DocGroup;
}
}  // namespace mozilla

enum eParserMode {
  /**
   * Parse a document normally as HTML.
   */
  NORMAL,

  /**
   * View document as HTML source.
   */
  VIEW_SOURCE_HTML,

  /**
   * View document as XML source
   */
  VIEW_SOURCE_XML,

  /**
   * View document as plain text source
   */
  VIEW_SOURCE_PLAIN,

  /**
   * View document as plain text
   */
  PLAIN_TEXT,

  /**
   * Load as data (XHR)
   */
  LOAD_AS_DATA
};

enum eBomState {
  /**
   * BOM sniffing hasn't started.
   */
  BOM_SNIFFING_NOT_STARTED,

  /**
   * BOM sniffing is ongoing, and the first byte of an UTF-16LE BOM has been
   * seen.
   */
  SEEN_UTF_16_LE_FIRST_BYTE,

  /**
   * BOM sniffing is ongoing, and the first byte of an UTF-16BE BOM has been
   * seen.
   */
  SEEN_UTF_16_BE_FIRST_BYTE,

  /**
   * BOM sniffing is ongoing, and the first byte of an UTF-8 BOM has been
   * seen.
   */
  SEEN_UTF_8_FIRST_BYTE,

  /**
   * BOM sniffing is ongoing, and the first and second bytes of an UTF-8 BOM
   * have been seen.
   */
  SEEN_UTF_8_SECOND_BYTE,

  /**
   * Seen \x00 in UTF-16BE bogo-XML declaration.
   */
  SEEN_UTF_16_BE_XML_FIRST,

  /**
   * Seen \x00< in UTF-16BE bogo-XML declaration.
   */
  SEEN_UTF_16_BE_XML_SECOND,

  /**
   * Seen \x00<\x00 in UTF-16BE bogo-XML declaration.
   */
  SEEN_UTF_16_BE_XML_THIRD,

  /**
   * Seen \x00<\x00? in UTF-16BE bogo-XML declaration.
   */
  SEEN_UTF_16_BE_XML_FOURTH,

  /**
   * Seen \x00<\x00?\x00 in UTF-16BE bogo-XML declaration.
   */
  SEEN_UTF_16_BE_XML_FIFTH,

  /**
   * Seen < in UTF-16BE bogo-XML declaration.
   */
  SEEN_UTF_16_LE_XML_FIRST,

  /**
   * Seen <\x00 in UTF-16BE bogo-XML declaration.
   */
  SEEN_UTF_16_LE_XML_SECOND,

  /**
   * Seen <\x00? in UTF-16BE bogo-XML declaration.
   */
  SEEN_UTF_16_LE_XML_THIRD,

  /**
   * Seen <\x00?\x00 in UTF-16BE bogo-XML declaration.
   */
  SEEN_UTF_16_LE_XML_FOURTH,

  /**
   * Seen <\x00?\x00x in UTF-16BE bogo-XML declaration.
   */
  SEEN_UTF_16_LE_XML_FIFTH,

  /**
   * BOM sniffing was started but is now over for whatever reason.
   */
  BOM_SNIFFING_OVER,
};

enum eHtml5StreamState {
  STREAM_NOT_STARTED = 0,
  STREAM_BEING_READ = 1,
  STREAM_ENDED = 2
};

class nsHtml5StreamParser final : public nsISupports {
  template <typename T>
  using NotNull = mozilla::NotNull<T>;
  using Encoding = mozilla::Encoding;

  const uint32_t UNCONDITIONAL_META_SCAN_BOUNDARY = 1024;
  const uint32_t READ_BUFFER_SIZE = 1024;
  const uint32_t LOCAL_FILE_UTF_8_BUFFER_SIZE = 1024 * 1024 * 4;  // 4 MB

  friend class nsHtml5RequestStopper;
  friend class nsHtml5DataAvailable;
  friend class nsHtml5StreamParserContinuation;
  friend class nsHtml5TimerKungFu;
  friend class nsHtml5StreamParserPtr;
  friend class nsHtml5StreamListener;

 public:
  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
  NS_DECL_CYCLE_COLLECTION_CLASS(nsHtml5StreamParser)

  nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor, nsHtml5Parser* aOwner,
                      eParserMode aMode);

  nsresult OnStartRequest(nsIRequest* aRequest);

  nsresult OnDataAvailable(nsIRequest* aRequest, nsIInputStream* aInStream,
                           uint64_t aSourceOffset, uint32_t aLength);

  nsresult OnStopRequest(nsIRequest* aRequest, nsresult status);

  // EncodingDeclarationHandler
  // https://hg.mozilla.org/projects/htmlparser/file/tip/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java
  /**
   * Tree builder uses this to report a late <meta charset>
   */
  bool internalEncodingDeclaration(nsHtml5String aEncoding);

  bool TemplatePushedOrHeadPopped();

  void RememberGt(int32_t aPos);

  // Not from an external interface

  /**
   * Post a runnable to the main thread to perform the speculative load
   * operations without performing the tree operations.
   *
   * This should be called at the end of each data available or stop
   * request runnable running on the parser thread.
   */
  void PostLoadFlusher();

  /**
   * Pass a buffer to chardetng.
   */
  void FeedDetector(mozilla::Span<const uint8_t> aBuffer);

  /**
   * Report EOF to chardetng.
   */
  void DetectorEof();

  /**
   *  Call this method once you've created a parser, and want to instruct it
   *  about what charset to load
   *
   *  @param   aEncoding the charset of a document
   *  @param   aCharsetSource the source of the charset
   */
  inline void SetDocumentCharset(NotNull<const Encoding*> aEncoding,
                                 nsCharsetSource aSource,
                                 bool aForceAutoDetection) {
    MOZ_ASSERT(mStreamState == STREAM_NOT_STARTED,
               "SetDocumentCharset called too late.");
    MOZ_ASSERT(NS_IsMainThread(), "Wrong thread!");
    MOZ_ASSERT(!(aForceAutoDetection && aSource >= kCharsetFromOtherComponent),
               "Can't force with high-ranking source.");
    mEncoding = aEncoding;
    mCharsetSource = aSource;
    mForceAutoDetection = aForceAutoDetection;
    mChannelHadCharset = (aSource == kCharsetFromChannel);
  }

  nsresult GetChannel(nsIChannel** aChannel);

  /**
   * The owner parser must call this after script execution
   * when no scripts are executing and the document.written
   * buffer has been exhausted.
   *
   * If the first two arguments are nullptr, instead of
   * continuing after scripts, this method commits to an
   * internally-discovered encoding.
   */
  void ContinueAfterScriptsOrEncodingCommitment(
      nsHtml5Tokenizer* aTokenizer, nsHtml5TreeBuilder* aTreeBuilder,
      bool aLastWasCR);

  /**
   * Continues the stream parser if the charset switch failed.
   */
  void ContinueAfterFailedCharsetSwitch();

  void Terminate() { mTerminated = true; }

  void DropTimer();

  /**
   * Sets the URL for View Source title in case this parser ends up being
   * used for View Source. If aURL is a view-source: URL, takes the inner
   * URL. data: URLs are shown with an ellipsis instead of the actual data.
   */
  void SetViewSourceTitle(nsIURI* aURL);

 private:
  virtual ~nsHtml5StreamParser();

#ifdef DEBUG
  bool IsParserThread() { return mEventTarget->IsOnCurrentThread(); }
#endif

  void MarkAsBroken(nsresult aRv);

  /**
   * Marks the stream parser as interrupted. If you ever add calls to this
   * method, be sure to review Uninterrupt usage very, very carefully to
   * avoid having a previous in-flight runnable cancel your Interrupt()
   * call on the other thread too soon.
   */
  void Interrupt() {
    MOZ_ASSERT(NS_IsMainThread(), "Wrong thread!");
    mInterrupted = true;
  }

  void Uninterrupt() MOZ_NO_THREAD_SAFETY_ANALYSIS {
    MOZ_ASSERT(IsParserThread(), "Wrong thread!");
    mTokenizerMutex.AssertCurrentThreadOwns();
    mInterrupted = false;
  }

  /**
   * Flushes the tree ops from the tree builder and disarms the flush
   * timer.
   */
  void FlushTreeOpsAndDisarmTimer();

  void SwitchDecoderIfAsciiSoFar(NotNull<const Encoding*> aEncoding)
      MOZ_REQUIRES(mTokenizerMutex);
  ;

  size_t CountGts();

  void DiscardMetaSpeculation();

  bool ProcessLookingForMetaCharset(bool aEof) MOZ_REQUIRES(mTokenizerMutex);

  void ParseAvailableData();

  void DoStopRequest();

  void DoDataAvailableBuffer(mozilla::Buffer<uint8_t>&& aBuffer)
      MOZ_REQUIRES(mTokenizerMutex);

  void DoDataAvailable(mozilla::Span<const uint8_t> aBuffer)
      MOZ_REQUIRES(mTokenizerMutex);

  static nsresult CopySegmentsToParser(nsIInputStream* aInStream,
                                       void* aClosure, const char* aFromSegment,
                                       uint32_t aToOffset, uint32_t aCount,
                                       uint32_t* aWriteCount)
      MOZ_REQUIRES(mTokenizerMutex);

  bool IsTerminatedOrInterrupted() { return mTerminated || mInterrupted; }

  bool IsTerminated() { return mTerminated; }

  /**
   * True when there is a Unicode decoder already
   */
  inline bool HasDecoder() { return !!mUnicodeDecoder; }

  /**
   * Returns 0 if 1) there aren't at least 2 buffers in mBufferedBytes
   * or 2) there is no byte '>' in the second buffer.
   * Otherwise, returns the length of the prefix of the second buffer
   * that is long enough to contain the first byte '>' in the second
   * buffer (including the '>' byte).
   */
  size_t LengthOfLtContainingPrefixInSecondBuffer();

  /**
   * Push bytes from network when there is no Unicode decoder yet
   */
  nsresult SniffStreamBytes(mozilla::Span<const uint8_t> aFromSegment,
                            bool aEof) MOZ_REQUIRES(mTokenizerMutex);

  /**
   * Push bytes from network when there is a Unicode decoder already
   */
  nsresult WriteStreamBytes(mozilla::Span<const uint8_t> aFromSegment)
      MOZ_REQUIRES(mTokenizerMutex);

  /**
   * Set up the Unicode decoder and write the sniffing buffer into it
   * followed by the current network buffer.
   *
   * @param aPrefix the part of the stream that has already been seen
   *                prior to aFromSegment. In practice, these are the
   *                bytes that are baked into the state of the BOM
   *                and UTF-16 XML declaration-like sniffing state
   *                machine state.
   * @param aFromSegment The current network buffer
   */
  nsresult SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
      mozilla::Span<const uint8_t> aPrefix,
      mozilla::Span<const uint8_t> aFromSegment) MOZ_REQUIRES(mTokenizerMutex);

  /**
   * Initialize the Unicode decoder, mark the BOM as the source and
   * drop the sniffer.
   *
   * @param aDecoderCharsetName The name for the decoder's charset
   *                            (UTF-16BE, UTF-16LE or UTF-8; the BOM has
   *                            been swallowed)
   */
  void SetupDecodingFromBom(NotNull<const Encoding*> aEncoding);

  void SetupDecodingFromUtf16BogoXml(NotNull<const Encoding*> aEncoding);

  /**
   * When speculatively decoding from file: URL as UTF-8, commit
   * to UTF-8 as the non-speculative encoding and start processing
   * the decoded data.
   */
  [[nodiscard]] nsresult CommitLocalFileToEncoding();

  /**
   * When speculatively decoding from file: URL as UTF-8, redecode
   * using fallback and then continue normally with the fallback.
   */
  [[nodiscard]] nsresult ReDecodeLocalFile() MOZ_REQUIRES(mTokenizerMutex);

  /**
   * Potentially guess the encoding using mozilla::EncodingDetector.
   * Returns the guessed encoding and a telemetry-appropriate source.
   */
  std::tuple<NotNull<const Encoding*>, nsCharsetSource> GuessEncoding(
      bool aInitial);

  /**
   * Become confident or resolve and encoding name to its preferred form.
   * @param aEncoding the value of an internal encoding decl. Acts as an
   *                  out param, too, when the method returns true.
   * @return true if the parser needs to start using the new value of
   *         aEncoding and false if the parser became confident or if
   *         the encoding name did not specify a usable encoding
   */
  const Encoding* PreferredForInternalEncodingDecl(const nsAString& aEncoding);

  /**
   * Callback for mFlushTimer.
   */
  static void TimerCallback(nsITimer* aTimer, void* aClosure);

  /**
   * Parser thread entry point for (maybe) flushing the ops and posting
   * a flush runnable back on the main thread.
   */
  void TimerFlush();

  /**
   * Called when speculation fails.
   */
  void MaybeDisableFutureSpeculation() { mSpeculationFailureCount++; }

  /**
   * Used to check whether we're getting too many speculation failures and
   * should just stop trying.  The 100 is picked pretty randomly to be not too
   * small (so most pages are not affected) but small enough that we don't end
   * up with failed speculations over and over in pathological cases.
   */
  bool IsSpeculationEnabled() { return mSpeculationFailureCount < 100; }

  /**
   * Dispatch an event to a Quantum DOM main thread-ish thread.
   * (Not the parser thread.)
   */
  nsresult DispatchToMain(already_AddRefed<nsIRunnable>&& aRunnable);

  /**
   * Notify any devtools listeners about content newly received for parsing.
   */
  inline void OnNewContent(mozilla::Span<const char16_t> aData);

  /**
   * Notify any devtools listeners after all parse content has been received.
   */
  inline void OnContentComplete();

  nsCOMPtr<nsIRequest> mRequest;

  /**
   * The document title to use if this turns out to be a View Source parser.
   */
  nsCString mViewSourceTitle;

  /**
   * The Unicode decoder
   */
  mozilla::UniquePtr<mozilla::Decoder> mUnicodeDecoder;

  /**
   * BOM sniffing state
   */
  eBomState mBomState;

  // encoding-related stuff
  /**
   * The source (confidence) of the character encoding in use
   */
  nsCharsetSource mCharsetSource;

  nsCharsetSource mEncodingSwitchSource;

  /**
   * The character encoding in use
   */
  NotNull<const Encoding*> mEncoding;

  const Encoding* mNeedsEncodingSwitchTo;

  bool mSeenEligibleMetaCharset;

  bool mChardetEof;

#ifdef DEBUG

  bool mStartedFeedingDetector;

  bool mStartedFeedingDevTools;

#endif

  /**
   * Whether reparse is forbidden
   */
  bool mReparseForbidden;

  /**
   * Whether the Repair Text Encoding menu item was invoked
   */
  bool mForceAutoDetection;

  /**
   * Whether there was a valid charset parameter on the HTTP layer.
   */
  bool mChannelHadCharset;

  /**
   * We are in the process of looking for <meta charset>
   */
  bool mLookingForMetaCharset;

  /**
   * Whether the byte stream started with ASCII <?
   */
  bool mStartsWithLtQuestion;

  /**
   * If we are viewing XML source and are waiting for a '>' form the network.
   */
  bool mLookingForXmlDeclarationForXmlViewSource;

  /**
   * Whether template has been pushed or head popped within the first 1024
   * bytes.
   */
  bool mTemplatePushedOrHeadPopped;

  // Portable parser objects
  /**
   * The first buffer in the pending UTF-16 buffer queue
   */
  RefPtr<nsHtml5OwningUTF16Buffer> mFirstBuffer;

  /**
   * Non-owning pointer to the most recent buffer that contains the most recent
   * remembered greater-than sign. Used only while mLookingForMetaCharset is
   * true. While mLookingForMetaCharset is true, mFirstBuffer is not changed and
   * keeps the whole linked list of buffers alive. This pointer is non-owning to
   * avoid frequent refcounting.
   */
  nsHtml5OwningUTF16Buffer* mGtBuffer;

  int32_t mGtPos;

  /**
   * The last buffer in the pending UTF-16 buffer queue
   */
  nsHtml5OwningUTF16Buffer*
      mLastBuffer;  // weak ref; always points to
                    // a buffer of the size
                    // NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE

  /**
   * The first buffer of the document if looking for <meta charset> or
   * nullptr afterwards.
   */
  RefPtr<nsHtml5OwningUTF16Buffer> mFirstBufferOfMetaScan;

  /**
   * The tree operation executor
   */
  nsHtml5TreeOpExecutor* mExecutor;

  /**
   * Network event target for mExecutor->mDocument
   */
  nsCOMPtr<nsISerialEventTarget> mNetworkEventTarget;

  /**
   * The HTML5 tree builder
   */
  mozilla::UniquePtr<nsHtml5TreeBuilder> mTreeBuilder;

  /**
   * The HTML5 tokenizer
   */
  mozilla::UniquePtr<nsHtml5Tokenizer> mTokenizer;

  /**
   * Makes sure the main thread can't mess the tokenizer state while it's
   * tokenizing. This mutex also protects the current speculation.
   */
  mozilla::Mutex mTokenizerMutex;

  /**
   * The scoped atom table
   */
  nsHtml5AtomTable mAtomTable;

  /**
   * The owner parser.
   */
  RefPtr<nsHtml5Parser> mOwner;

  /**
   * Whether the last character tokenized was a carriage return (for CRLF)
   */
  bool mLastWasCR;

  /**
   * For tracking stream life cycle
   */
  eHtml5StreamState mStreamState;

  /**
   * Whether we are speculating.
   */
  bool mSpeculating;

  /**
   * Whether the tokenizer has reached EOF. (Reset when stream rewinded.)
   */
  bool mAtEOF;

  /**
   * The speculations. The mutex protects the nsTArray itself.
   * To access the queue of current speculation, mTokenizerMutex must be
   * obtained.
   * The current speculation is the last element
   */
  nsTArray<mozilla::UniquePtr<nsHtml5Speculation>> mSpeculations;
  mozilla::Mutex mSpeculationMutex;

  /**
   * Number of times speculation has failed for this parser.
   */
  mozilla::Atomic<uint32_t> mSpeculationFailureCount;

  /**
   * Number of bytes already buffered into mBufferedBytes.
   */
  uint32_t mNumBytesBuffered;

  nsTArray<mozilla::Buffer<uint8_t>> mBufferedBytes;

  /**
   * True to terminate early.
   */
  mozilla::Atomic<bool> mTerminated;

  /**
   * True to release mTokenizerMutex early.
   */
  mozilla::Atomic<bool> mInterrupted;

  /**
   * The thread this stream parser runs on.
   */
  nsCOMPtr<nsISerialEventTarget> mEventTarget;

  nsCOMPtr<nsIRunnable> mExecutorFlusher;

  nsCOMPtr<nsIRunnable> mLoadFlusher;

  /**
   * This runnable is distinct from the regular flushers to
   * signal the intent of encoding commitment without having to
   * protect mPendingEncodingCommitment in the executer with a
   * mutex.
   */
  nsCOMPtr<nsIRunnable> mEncodingCommitter;

  /**
   * The generict detector.
   */
  mozilla::UniquePtr<mozilla::EncodingDetector> mDetector;

  /**
   * The TLD we're loading from or empty if unknown.
   */
  nsCString mTLD;

  /**
   * Whether the initial charset source was kCharsetFromParentFrame
   */
  bool mInitialEncodingWasFromParentFrame;

  bool mHasHadErrors;

  bool mDetectorHasSeenNonAscii;

  /**
   * If true, we are decoding a local file that lacks an encoding
   * declaration and we are not tokenizing yet.
   */
  bool mDecodingLocalFileWithoutTokenizing;

  /**
   * Whether we are keeping the incoming bytes.
   */
  bool mBufferingBytes;

  /**
   * Timer for flushing tree ops once in a while when not speculating.
   */
  nsCOMPtr<nsITimer> mFlushTimer;

  /**
   * Mutex for protecting access to mFlushTimer (but not for the two
   * mFlushTimerFoo booleans below).
   */
  mozilla::Mutex mFlushTimerMutex;

  /**
   * Keeps track whether mFlushTimer has been armed. Unfortunately,
   * nsITimer doesn't enable querying this from the timer itself.
   */
  bool mFlushTimerArmed;

  /**
   * False initially and true after the timer has fired at least once.
   */
  bool mFlushTimerEverFired;

  /**
   * Whether the parser is doing a normal parse, view source or plain text.
   */
  eParserMode mMode;

  /**
   * If the associated docshell is being watched by the devtools, this is
   * set to the URI associated with the parse. All parse data is sent to the
   * devtools, along with this URI. This URI is cleared out after the parse has
   * been marked as completed.
   */
  nsCOMPtr<nsIURI> mURIToSendToDevtools;

  /**
   * If content is being sent to the devtools, an encoded UUID for the parser.
   */
  nsString mUUIDForDevtools;
};

#endif  // nsHtml5StreamParser_h