summaryrefslogtreecommitdiffstats
path: root/parser/html/nsHtml5TokenizerHSupplement.h
blob: 0e1930b93e6d7b5c565be9e387c6a0c8daeac2da (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

private:
inline void silentCarriageReturn() {
  nextCharOnNewLine = true;
  lastCR = true;
}

inline void silentLineFeed() { nextCharOnNewLine = true; }

inline char16_t checkChar(char16_t* buf, int32_t pos) {
  // The name of this method comes from the validator.
  // We aren't checking a char here. We read the next
  // UTF-16 code unit and, before returning it, adjust
  // the line and column numbers.
  char16_t c = buf[pos];
  if (MOZ_UNLIKELY(nextCharOnNewLine)) {
    // By changing the line and column here instead
    // of doing so eagerly when seeing the line break
    // causes the line break itself to be considered
    // column-wise at the end of a line.
    line++;
    col = 1;
    nextCharOnNewLine = false;
  } else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) {
    // SpiderMonkey wants to count scalar values
    // instead of UTF-16 code units. We omit low
    // surrogates from the count so that only the
    // high surrogate increments the count for
    // two-code-unit scalar values.
    //
    // It's somewhat questionable from the performance
    // perspective to make the human-perceivable column
    // count correct for non-BMP characters in the case
    // where there is a single scalar value per extended
    // grapheme cluster when even on the BMP there are
    // various cases where the scalar count doesn't make
    // much sense as a human-perceived "column count" due
    // to extended grapheme clusters consisting of more
    // than one scalar value.
    col++;
  }
  return c;
}

int32_t col;
bool nextCharOnNewLine;

public:
inline int32_t getColumnNumber() { return col; }

inline void setColumnNumberAndResetNextLine(int32_t aCol) {
  col = aCol;
  // The restored position only ever points to the position of
  // script tag's > character, so we can unconditionally use
  // `false` below.
  nextCharOnNewLine = false;
}

inline nsHtml5HtmlAttributes* GetAttributes() { return attributes; }

/**
 * Makes sure the buffers are large enough to be able to tokenize aLength
 * UTF-16 code units before having to make the buffers larger.
 *
 * @param aLength the number of UTF-16 code units to be tokenized before the
 *                next call to this method.
 * @return true if successful; false if out of memory
 */
bool EnsureBufferSpace(int32_t aLength);

bool TemplatePushedOrHeadPopped();

void RememberGt(int32_t aPos);

void AtKilobyteBoundary() { suspendAfterCurrentTokenIfNotInText(); }

bool IsInTokenStartedAtKilobyteBoundary() {
  return suspensionAfterCurrentNonTextTokenPending();
}

mozilla::UniquePtr<nsHtml5Highlighter> mViewSource;

/**
 * Starts handling text/plain. This is a one-way initialization. There is
 * no corresponding EndPlainText() call.
 */
void StartPlainText();

void EnableViewSource(nsHtml5Highlighter* aHighlighter);

bool ShouldFlushViewSource();

mozilla::Result<bool, nsresult> FlushViewSource();

void StartViewSource(const nsAutoString& aTitle);

void StartViewSourceCharacters();

[[nodiscard]] bool EndViewSource();

void RewindViewSource();

void SetViewSourceOpSink(nsAHtml5TreeOpSink* aOpSink);

void errGarbageAfterLtSlash();

void errLtSlashGt();

void errWarnLtSlashInRcdata();

void errCharRefLacksSemicolon();

void errNoDigitsInNCR();

void errGtInSystemId();

void errGtInPublicId();

void errNamelessDoctype();

void errConsecutiveHyphens();

void errPrematureEndOfComment();

void errBogusComment();

void errUnquotedAttributeValOrNull(char16_t c);

void errSlashNotFollowedByGt();

void errNoSpaceBetweenAttributes();

void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char16_t c);

void errAttributeValueMissing();

void errBadCharBeforeAttributeNameOrNull(char16_t c);

void errEqualsSignBeforeAttributeName();

void errBadCharAfterLt(char16_t c);

void errLtGt();

void errProcessingInstruction();

void errUnescapedAmpersandInterpretedAsCharacterReference();

void errNotSemicolonTerminated();

void errNoNamedCharacterMatch();

void errQuoteBeforeAttributeName(char16_t c);

void errQuoteOrLtInAttributeNameOrNull(char16_t c);

void errExpectedPublicId();

void errBogusDoctype();

void maybeErrAttributesOnEndTag(nsHtml5HtmlAttributes* attrs);

void maybeErrSlashInEndTag(bool selfClosing);

char16_t errNcrNonCharacter(char16_t ch);

void errAstralNonCharacter(int32_t ch);

void errNcrSurrogate();

char16_t errNcrControlChar(char16_t ch);

void errNcrCr();

void errNcrInC1Range();

void errEofInPublicId();

void errEofInComment();

void errEofInDoctype();

void errEofInAttributeValue();

void errEofInAttributeName();

void errEofWithoutGt();

void errEofInTagName();

void errEofInEndTag();

void errEofAfterLt();

void errNcrOutOfRange();

void errNcrUnassigned();

void errDuplicateAttribute();

void errEofInSystemId();

void errExpectedSystemId();

void errMissingSpaceBeforeDoctypeName();

void errNestedComment();

void errNcrControlChar();

void errNcrZero();

void errNoSpaceBetweenDoctypeSystemKeywordAndQuote();

void errNoSpaceBetweenPublicAndSystemIds();

void errNoSpaceBetweenDoctypePublicKeywordAndQuote();