summaryrefslogtreecommitdiffstats
path: root/dom/base/nsTreeSanitizer.h
blob: ed1c49c60ca523e73c10bd5b3fd7c26a1453a3d8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef nsTreeSanitizer_h_
#define nsTreeSanitizer_h_

#include "nsAtom.h"
#include "nsHashKeys.h"
#include "nsHashtablesFwd.h"
#include "nsIPrincipal.h"
#include "nsTArray.h"
#include "nsTHashSet.h"
#include "mozilla/UniquePtr.h"
#include "mozilla/dom/NameSpaceConstants.h"
#include "mozilla/dom/SanitizerBinding.h"

class nsIContent;
class nsIGlobalObject;
class nsINode;

namespace mozilla {
class DeclarationBlock;
class ErrorResult;
enum class StyleSanitizationKind : uint8_t;
}  // namespace mozilla

namespace mozilla::dom {
class DocumentFragment;
class Element;
class OwningStringOrSanitizerElementNameNamespace;
struct SanitizerAttribute;
}  // namespace mozilla::dom

/**
 * See the documentation of nsIParserUtils::sanitize for documentation
 * about the default behavior and the configuration options of this sanitizer.
 */
class nsTreeSanitizer {
 public:
  /**
   * The constructor.
   *
   * @param aFlags Flags from nsIParserUtils
   */
  explicit nsTreeSanitizer(uint32_t aFlags = 0);

  static void InitializeStatics();
  static void ReleaseStatics();

  /**
   * Sanitizes a disconnected DOM fragment freshly obtained from a parser.
   * The fragment must have just come from a parser so that it can't have
   * mutation event listeners set on it.
   */
  void Sanitize(mozilla::dom::DocumentFragment* aFragment);

  /**
   * Sanitizes a disconnected (not in a docshell) document freshly obtained
   * from a parser. The document must not be embedded in a docshell and must
   * not have had a chance to get mutation event listeners attached to it.
   * The root element must be <html>.
   */
  void Sanitize(mozilla::dom::Document* aDocument);

  /**
   * Provides additional options for usage from the Web Sanitizer API
   * which allows modifying the allow-list from above
   */
  void WithWebSanitizerOptions(nsIGlobalObject* aGlobal,
                               const mozilla::dom::SanitizerConfig& aOptions,
                               mozilla::ErrorResult& aRv);

  /**
   * Removes conditional CSS from this subtree.
   */
  static void RemoveConditionalCSSFromSubtree(nsINode* aRoot);

 private:
  /**
   * Whether <style> and style="" are allowed.
   */
  bool mAllowStyles;

  /**
   * Whether comment nodes are allowed.
   */
  bool mAllowComments;

  /**
   * Whether HTML <font>, <center>, bgcolor="", etc., are dropped.
   */
  bool mDropNonCSSPresentation;

  /**
   * Whether to remove forms and form controls (excluding fieldset/legend).
   */
  bool mDropForms;

  /**
   * Whether only cid: embeds are allowed.
   */
  bool mCidEmbedsOnly;

  /**
   * Whether to drop <img>, <video>, <audio> and <svg>.
   */
  bool mDropMedia;

  /**
   * Whether we are sanitizing a full document (as opposed to a fragment).
   */
  bool mFullDocument;

  /**
   * Whether we should notify to the console for anything that's stripped.
   */
  bool mLogRemovals;

  // WindowID used for logging removals.
  uint64_t mInnerWindowID = 0;

  /**
   * We have various tables of static atoms for elements and attributes.
   */
  class AtomsTable : public nsTHashSet<const nsStaticAtom*> {
   public:
    explicit AtomsTable(uint32_t aLength)
        : nsTHashSet<const nsStaticAtom*>(aLength) {}

    bool Contains(nsAtom* aAtom) {
      // Because this table only contains static atoms, if aAtom isn't
      // static we can immediately fail.
      return aAtom->IsStatic() && GetEntry(aAtom->AsStatic());
    }
  };

  // The name of an element combined with its namespace.
  class NamespaceAtom : public PLDHashEntryHdr {
   public:
    using KeyType = const NamespaceAtom&;
    using KeyTypePointer = const NamespaceAtom*;

    explicit NamespaceAtom(KeyTypePointer aKey)
        : mNamespaceID(aKey->mNamespaceID), mLocalName(aKey->mLocalName) {}
    NamespaceAtom(int32_t aNamespaceID, RefPtr<nsAtom> aLocalName)
        : mNamespaceID(aNamespaceID), mLocalName(std::move(aLocalName)) {}
    NamespaceAtom(NamespaceAtom&&) = default;
    ~NamespaceAtom() = default;

    bool KeyEquals(KeyTypePointer aKey) const {
      return mNamespaceID == aKey->mNamespaceID &&
             mLocalName == aKey->mLocalName;
    }

    static KeyTypePointer KeyToPointer(KeyType aKey) { return &aKey; }
    static PLDHashNumber HashKey(KeyTypePointer aKey) {
      if (!aKey) {
        return 0;
      }

      return mozilla::HashGeneric(aKey->mNamespaceID, aKey->mLocalName.get());
    }

    enum { ALLOW_MEMMOVE = true };

   private:
    int32_t mNamespaceID = kNameSpaceID_None;
    RefPtr<nsAtom> mLocalName;
  };

  using ElementName = NamespaceAtom;
  using AttributeName = NamespaceAtom;

  using ElementNameSet = nsTHashSet<ElementName>;
  // nullptr value (ElementNameSet) means all elements (*).
  using AttributesToElementsMap =
      nsTHashMap<AttributeName, mozilla::UniquePtr<ElementNameSet>>;

  void SanitizeChildren(nsINode* aRoot);

  /**
   * Queries if an element must be replaced with its children.
   * @param aNamespace the namespace of the element the question is about
   * @param aLocal the local name of the element the question is about
   * @return true if the element must be replaced with its children and
   *         false if the element is to be kept
   */
  bool MustFlatten(int32_t aNamespace, nsAtom* aLocal);
  bool MustFlattenForSanitizerAPI(int32_t aNamespace, nsAtom* aLocal);

  /**
   * Queries if an element including its children must be removed.
   * @param aNamespace the namespace of the element the question is about
   * @param aLocal the local name of the element the question is about
   * @param aElement the element node itself for inspecting attributes
   * @return true if the element and its children must be removed and
   *         false if the element is to be kept
   */
  bool MustPrune(int32_t aNamespace, nsAtom* aLocal,
                 mozilla::dom::Element* aElement);
  bool MustPruneForSanitizerAPI(int32_t aNamespace, nsAtom* aLocal,
                                mozilla::dom::Element* aElement);

  /**
   * Checks if a given local name (for an attribute) is on the given list
   * of URL attribute names.
   * @param aURLs the list of URL attribute names
   * @param aLocalName the name to search on the list
   * @return true if aLocalName is on the aURLs list and false otherwise
   */
  bool IsURL(const nsStaticAtom* const* aURLs, nsAtom* aLocalName);

  /**
   * Struct for what attributes and their values are allowed.
   */
  struct AllowedAttributes {
    // The whitelist of permitted local names to use.
    AtomsTable* mNames = nullptr;
    // The local names of URL-valued attributes for URL checking.
    const nsStaticAtom* const* mURLs = nullptr;
    // Whether XLink attributes are allowed.
    bool mXLink = false;
    // Whether the style attribute is allowed.
    bool mStyle = false;
    // Whether to leave the value of the src attribute unsanitized.
    bool mDangerousSrc = false;
  };

  /**
   * Removes dangerous attributes from the element. If the style attribute
   * is allowed, its value is sanitized. The values of URL attributes are
   * sanitized, except src isn't sanitized when it is allowed to remain
   * potentially dangerous.
   *
   * @param aElement the element whose attributes should be sanitized
   * @param aAllowed options for sanitizing attributes
   */
  void SanitizeAttributes(mozilla::dom::Element* aElement,
                          AllowedAttributes aAllowed);
  // Currently only used for the Sanitizer API.
  bool MustDropAttribute(mozilla::dom::Element* aElement,
                         int32_t aAttrNamespace, nsAtom* aAttrLocalName);
  bool MustDropFunkyAttribute(mozilla::dom::Element* aElement,
                              int32_t aAttrNamespace, nsAtom* aAttrLocalName);

  /**
   * Remove the named URL attribute from the element if the URL fails a
   * security check.
   *
   * @param aElement the element whose attribute to possibly modify
   * @param aNamespace the namespace of the URL attribute
   * @param aLocalName the local name of the URL attribute
   * @param aFragmentsOnly allows same-document references only
   * @return true if the attribute was removed and false otherwise
   */
  bool SanitizeURL(mozilla::dom::Element* aElement, int32_t aNamespace,
                   nsAtom* aLocalName, bool aFragmentsOnly = false);

  /**
   * Checks a style rule for the presence of the 'binding' CSS property and
   * removes that property from the rule.
   *
   * @param aDeclaration The style declaration to check
   * @return true if the rule was modified and false otherwise
   */
  bool SanitizeStyleDeclaration(mozilla::DeclarationBlock* aDeclaration);

  /**
   * Sanitizes an inline style element (an HTML or SVG <style>).
   *
   * Returns whether the style has changed.
   */
  static bool SanitizeInlineStyle(mozilla::dom::Element*,
                                  mozilla::StyleSanitizationKind);

  /**
   * Removes all attributes from an element node.
   */
  static void RemoveAllAttributes(mozilla::dom::Element* aElement);

  /**
   * Removes all attributes from the descendants of an element but not from
   * the element itself.
   */
  static void RemoveAllAttributesFromDescendants(mozilla::dom::Element*);

  static bool MatchesElementName(ElementNameSet& aNames, int32_t aNamespace,
                                 nsAtom* aLocalName);
  static bool MatchesAttributeMatchList(AttributesToElementsMap& aMatchList,
                                        mozilla::dom::Element& aElement,
                                        int32_t aAttrNamespace,
                                        nsAtom* aAttrLocalName);

  static mozilla::UniquePtr<ElementNameSet> ConvertElements(
      const nsTArray<mozilla::dom::OwningStringOrSanitizerElementNamespace>&
          aElements,
      mozilla::ErrorResult& aRv);

  static mozilla::UniquePtr<ElementNameSet> ConvertElements(
      const mozilla::dom::OwningStarOrStringOrSanitizerElementNamespaceSequence&
          aElements,
      mozilla::ErrorResult& aRv);

  static mozilla::UniquePtr<AttributesToElementsMap> ConvertAttributes(
      const nsTArray<mozilla::dom::SanitizerAttribute>& aAttributes,
      mozilla::ErrorResult& aRv);

  /**
   * Log a Console Service message to indicate we removed something.
   * If you pass an element and/or attribute, their information will
   * be appended to the message.
   *
   * @param aMessage   the basic message to log.
   * @param aDocument  the base document we're modifying
   *                   (used for the error message)
   * @param aElement   optional, the element being removed or modified.
   * @param aAttribute optional, the attribute being removed or modified.
   */
  void LogMessage(const char* aMessage, mozilla::dom::Document* aDoc,
                  mozilla::dom::Element* aElement = nullptr,
                  nsAtom* aAttr = nullptr);

  /**
   * The whitelist of HTML elements.
   */
  static AtomsTable* sElementsHTML;

  /**
   * The whitelist of non-presentational HTML attributes.
   */
  static AtomsTable* sAttributesHTML;

  /**
   * The whitelist of presentational HTML attributes.
   */
  static AtomsTable* sPresAttributesHTML;

  /**
   * The whitelist of SVG elements.
   */
  static AtomsTable* sElementsSVG;

  /**
   * The whitelist of SVG attributes.
   */
  static AtomsTable* sAttributesSVG;

  /**
   * The whitelist of SVG elements.
   */
  static AtomsTable* sElementsMathML;

  /**
   * The whitelist of MathML attributes.
   */
  static AtomsTable* sAttributesMathML;

  /**
   * The built-in baseline attribute allow list used by the Sanitizer API.
   */
  static AtomsTable* sBaselineAttributeAllowlist;

  /**
   * The built-in baseline element allow list used by the Sanitizer API.
   */
  static AtomsTable* sBaselineElementAllowlist;

  /**
   * The default configuration's attribute allow list used by the Sanitizer API.
   */
  static AtomsTable* sDefaultConfigurationAttributeAllowlist;

  /**
   * The default configuration's element allow list used by the Sanitizer API.
   */
  static AtomsTable* sDefaultConfigurationElementAllowlist;

  /**
   * Reusable null principal for URL checks.
   */
  static nsIPrincipal* sNullPrincipal;

  // === Variables used to implement HTML Sanitizer API. ==

  // This nsTreeSanitizer instance should behave like the Sanitizer API.
  bool mIsForSanitizerAPI = false;

  bool mAllowCustomElements = false;
  bool mAllowUnknownMarkup = false;

  // An allow-list of elements to keep.
  mozilla::UniquePtr<ElementNameSet> mAllowElements;

  // A deny-list of elements to block. (aka flatten)
  mozilla::UniquePtr<ElementNameSet> mBlockElements;

  // A deny-list of elements to drop. (aka prune)
  mozilla::UniquePtr<ElementNameSet> mDropElements;

  // An allow-list of attributes to keep.
  mozilla::UniquePtr<AttributesToElementsMap> mAllowAttributes;

  // A deny-list of attributes to drop.
  mozilla::UniquePtr<AttributesToElementsMap> mDropAttributes;
};

#endif  // nsTreeSanitizer_h_