1 files changed, 94 insertions, 0 deletions
diff --git a/layout/style/ImportScanner.h b/layout/style/ImportScanner.h
new file mode 100644
index 0000000000..ff1755fb8b
--- /dev/null
+++ b/layout/style/ImportScanner.h
@@ -0,0 +1,94 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=2 sw=2 et tw=78: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_ImportScanner_h
+#define mozilla_ImportScanner_h
+
+/* A simple best-effort scanner for @import rules for the HTML parser */
+
+#include "nsString.h"
+#include "nsTArray.h"
+
+namespace mozilla {
+
+struct ImportScanner final {
+  ImportScanner() = default;
+
+  // Called when a <style> element starts.
+  //
+  // Note that this function cannot make assumptions about the internal state,
+  // as you can nest <svg:style> elements.
+  void Start();
+
+  // Called when a <style> element ends. Returns the list of URLs scanned.
+  nsTArray<nsString> Stop();
+
+  // Whether Scan() should be called.
+  bool ShouldScan() const {
+    return mState != State::OutsideOfStyleElement && mState != State::Done;
+  }
+
+  // Scan() should be called when text content is parsed, and returns an array
+  // of found URLs, if any.
+  //
+  // Asserts ShouldScan() returns true.
+  nsTArray<nsString> Scan(Span<const char16_t> aFragment);
+
+ private:
+  enum class State {
+    // Initial state, doesn't scan anything until Start() is called.
+    OutsideOfStyleElement,
+    // In an idle state during the stylesheet scanning, either at the
+    // beginning or after parsing a rule.
+    Idle,
+    // We've seen a '/' character, but not the '*' yet, so we don't know if
+    // it's a comment.
+    MaybeAtCommentStart,
+    // We're inside a comment.
+    AtComment,
+    // We've seen a '*' while we're in a comment, but we don't now yet whether
+    // '/' comes afterwards (thus ending the comment).
+    MaybeAtCommentEnd,
+    // We're parsing the '@' rule name.
+    AtRuleName,
+    // We're parsing the '@' rule value.
+    AtRuleValue,
+    // We're parsing the '@' rule value and we've seen the delimiter (quote or
+    // url() function) that encloses the url.
+    AtRuleValueDelimited,
+    // We've seen the url, but haven't seen the ';' finishing the rule yet.
+    AfterRuleValue,
+    // We've seen anything that is not an @import or a @charset rule, and thus
+    // further @import / @charset should not be parsed.
+    Done,
+  };
+
+  void ResetState();
+  void EmitUrl();
+  [[nodiscard]] State Scan(char16_t aChar);
+
+  static constexpr const uint32_t kMaxRuleNameLength = 7;  // (charset, import)
+
+  State mState = State::OutsideOfStyleElement;
+  nsAutoStringN<kMaxRuleNameLength> mRuleName;
+  nsAutoStringN<128> mRuleValue;
+  nsAutoStringN<128> mAfterRuleValue;
+  nsTArray<nsString> mUrlsFound;
+
+  // This is conceptually part of the AtRuleValue* / AfterRuleValue states,
+  // and serves to differentiate between @import (where we actually care about
+  // the value) and @charset (where we don't). It's just more convenient this
+  // way than having separate states for them.
+  bool mInImportRule = false;
+  // If we're in the AtRuleValueDelimited state, what is the closing character
+  // that will end the value. This is either a parenthesis (for unquoted
+  // urls), or a quote, either single or double.
+  char16_t mUrlValueDelimiterClosingChar = 0;
+};
+
+}  // namespace mozilla
+
+#endif