1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
|
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsCSPParser_h___
#define nsCSPParser_h___
#include "nsCSPUtils.h"
#include "nsCSPContext.h"
#include "nsIURI.h"
#include "PolicyTokenizer.h"
bool isNumberToken(char16_t aSymbol);
bool isValidHexDig(char16_t aHexDig);
// clang-format off
const char16_t COLON = ':';
const char16_t SEMICOLON = ';';
const char16_t SLASH = '/';
const char16_t PLUS = '+';
const char16_t DASH = '-';
const char16_t DOT = '.';
const char16_t UNDERLINE = '_';
const char16_t TILDE = '~';
const char16_t WILDCARD = '*';
const char16_t SINGLEQUOTE = '\'';
const char16_t NUMBER_SIGN = '#';
const char16_t QUESTIONMARK = '?';
const char16_t PERCENT_SIGN = '%';
const char16_t EXCLAMATION = '!';
const char16_t DOLLAR = '$';
const char16_t AMPERSAND = '&';
const char16_t OPENBRACE = '(';
const char16_t CLOSINGBRACE = ')';
const char16_t EQUALS = '=';
const char16_t ATSYMBOL = '@';
// clang-format on
class nsCSPParser {
public:
/**
* The CSP parser only has one publicly accessible function, which is
* parseContentSecurityPolicy. Internally the input string is separated into
* string tokens and policy() is called, which starts parsing the policy. The
* parser calls one function after the other according the the source-list
* from http://www.w3.org/TR/CSP11/#source-list. E.g., the parser can only
* call port() after the parser has already processed any possible host in
* host(), similar to a finite state machine.
*/
static nsCSPPolicy* parseContentSecurityPolicy(const nsAString& aPolicyString,
nsIURI* aSelfURI,
bool aReportOnly,
nsCSPContext* aCSPContext,
bool aDeliveredViaMetaTag,
bool aSuppressLogMessages);
private:
nsCSPParser(policyTokens& aTokens, nsIURI* aSelfURI,
nsCSPContext* aCSPContext, bool aDeliveredViaMetaTag,
bool aSuppressLogMessages);
~nsCSPParser();
// Parsing the CSP using the source-list from
// http://www.w3.org/TR/CSP11/#source-list
nsCSPPolicy* policy();
void directive();
nsCSPDirective* directiveName();
void directiveValue(nsTArray<nsCSPBaseSrc*>& outSrcs);
void referrerDirectiveValue(nsCSPDirective* aDir);
void reportURIList(nsCSPDirective* aDir);
void sandboxFlagList(nsCSPDirective* aDir);
void handleRequireTrustedTypesForDirective(nsCSPDirective* aDir);
void handleTrustedTypesDirective(nsCSPDirective* aDir);
void sourceList(nsTArray<nsCSPBaseSrc*>& outSrcs);
nsCSPBaseSrc* sourceExpression();
nsCSPSchemeSrc* schemeSource();
nsCSPHostSrc* hostSource();
nsCSPBaseSrc* keywordSource();
nsCSPNonceSrc* nonceSource();
nsCSPHashSrc* hashSource();
nsCSPHostSrc* host();
bool hostChar();
bool schemeChar();
bool port();
bool path(nsCSPHostSrc* aCspHost);
bool subHost(); // helper function to parse subDomains
bool atValidUnreservedChar(); // helper function to parse unreserved
bool atValidSubDelimChar(); // helper function to parse sub-delims
bool atValidPctEncodedChar(); // helper function to parse pct-encoded
bool subPath(nsCSPHostSrc* aCspHost); // helper function to parse paths
inline bool atEnd() { return mCurChar >= mEndChar; }
inline bool accept(char16_t aSymbol) {
if (atEnd()) {
return false;
}
return (*mCurChar == aSymbol) && advance();
}
inline bool accept(bool (*aClassifier)(char16_t)) {
if (atEnd()) {
return false;
}
return (aClassifier(*mCurChar)) && advance();
}
inline bool peek(char16_t aSymbol) {
if (atEnd()) {
return false;
}
return *mCurChar == aSymbol;
}
inline bool peek(bool (*aClassifier)(char16_t)) {
if (atEnd()) {
return false;
}
return aClassifier(*mCurChar);
}
inline bool advance() {
if (atEnd()) {
return false;
}
mCurValue.Append(*mCurChar++);
return true;
}
inline void resetCurValue() { mCurValue.Truncate(); }
bool atEndOfPath();
bool atValidPathChar();
void resetCurChar(const nsAString& aToken);
void logWarningErrorToConsole(uint32_t aSeverityFlag, const char* aProperty,
const nsTArray<nsString>& aParams);
void MaybeWarnAboutIgnoredSources(const nsTArray<nsCSPBaseSrc*>& aSrcs);
void MaybeWarnAboutUnsafeInline(const nsCSPDirective& aDirective);
void MaybeWarnAboutUnsafeEval(const nsCSPDirective& aDirective);
/**
* When parsing the policy, the parser internally uses the following helper
* variables/members which are used/reset during parsing. The following
* example explains how they are used.
* The tokenizer separats all input into arrays of arrays of strings, which
* are stored in mTokens, for example:
* mTokens = [ [ script-src, http://www.example.com, 'self' ], ... ]
*
* When parsing starts, mCurdir always holds the currently processed array of
* strings.
* In our example:
* mCurDir = [ script-src, http://www.example.com, 'self' ]
*
* During parsing, we process/consume one string at a time of that array.
* We set mCurToken to the string we are currently processing; in the first
* case that would be: mCurToken = script-src which allows to do simple string
* comparisons to see if mCurToken is a valid directive.
*
* Continuing parsing, the parser consumes the next string of that array,
* resetting:
* mCurToken = "http://www.example.com"
* ^ ^
* mCurChar mEndChar (points *after* the 'm')
* mCurValue = ""
*
* After calling advance() the first time, helpers would hold the following
* values:
* mCurToken = "http://www.example.com"
* ^ ^
* mCurChar mEndChar (points *after* the 'm')
* mCurValue = "h"
*
* We continue parsing till all strings of one directive are consumed, then we
* reset mCurDir to hold the next array of strings and start the process all
* over.
*/
const char16_t* mCurChar;
const char16_t* mEndChar;
nsString mCurValue;
nsString mCurToken;
nsTArray<nsString> mCurDir;
// helpers to allow invalidation of srcs within script-src and style-src
// if either 'strict-dynamic' or at least a hash or nonce is present.
bool mHasHashOrNonce; // false, if no hash or nonce is defined
bool mHasAnyUnsafeEval; // false, if no (wasm-)unsafe-eval keyword is used.
bool mStrictDynamic; // false, if 'strict-dynamic' is not defined
nsCSPKeywordSrc* mUnsafeInlineKeywordSrc; // null, otherwise invlidate()
// cache variables for child-src, frame-src and worker-src handling;
// in CSP 3 child-src is deprecated. For backwards compatibility
// child-src needs to restrict:
// (*) frames, in case frame-src is not expicitly specified
// (*) workers, in case worker-src is not expicitly specified
// If neither worker-src, nor child-src is present, then script-src
// needs to govern workers.
nsCSPChildSrcDirective* mChildSrc;
nsCSPDirective* mFrameSrc;
nsCSPDirective* mWorkerSrc;
nsCSPScriptSrcDirective* mScriptSrc;
nsCSPStyleSrcDirective* mStyleSrc;
// cache variable to let nsCSPHostSrc know that it's within
// the frame-ancestors directive.
bool mParsingFrameAncestorsDir;
policyTokens mTokens;
nsIURI* mSelfURI;
nsCSPPolicy* mPolicy;
nsCSPContext* mCSPContext; // used for console logging
bool mDeliveredViaMetaTag;
bool mSuppressLogMessages;
};
#endif /* nsCSPParser_h___ */
|