summaryrefslogtreecommitdiffstats
path: root/third_party/rust/cssparser
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/rust/cssparser
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/cssparser')
-rw-r--r--third_party/rust/cssparser/.cargo-checksum.json1
-rw-r--r--third_party/rust/cssparser/.github/workflows/main.yml78
-rw-r--r--third_party/rust/cssparser/Cargo.toml65
-rw-r--r--third_party/rust/cssparser/LICENSE373
-rw-r--r--third_party/rust/cssparser/README.md57
-rw-r--r--third_party/rust/cssparser/docs/.nojekyll0
-rw-r--r--third_party/rust/cssparser/docs/404.html3
-rw-r--r--third_party/rust/cssparser/docs/index.html3
-rw-r--r--third_party/rust/cssparser/src/color.rs351
-rw-r--r--third_party/rust/cssparser/src/cow_rc_str.rs185
-rw-r--r--third_party/rust/cssparser/src/from_bytes.rs64
-rw-r--r--third_party/rust/cssparser/src/lib.rs108
-rw-r--r--third_party/rust/cssparser/src/macros.rs204
-rw-r--r--third_party/rust/cssparser/src/nth.rs145
-rw-r--r--third_party/rust/cssparser/src/parser.rs1169
-rw-r--r--third_party/rust/cssparser/src/rules_and_declarations.rs507
-rw-r--r--third_party/rust/cssparser/src/serializer.rs593
-rw-r--r--third_party/rust/cssparser/src/size_of_tests.rs52
-rw-r--r--third_party/rust/cssparser/src/tests.rs1362
-rw-r--r--third_party/rust/cssparser/src/tokenizer.rs1403
-rw-r--r--third_party/rust/cssparser/src/unicode_range.rs181
21 files changed, 6904 insertions, 0 deletions
diff --git a/third_party/rust/cssparser/.cargo-checksum.json b/third_party/rust/cssparser/.cargo-checksum.json
new file mode 100644
index 0000000000..2c42f1420e
--- /dev/null
+++ b/third_party/rust/cssparser/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{".github/workflows/main.yml":"9fb6be1c14d9107ac4613e660d111d469722839ddf8a59e781c54a3607676e9e","Cargo.toml":"2c12f0dd7e94af4ca4ae29a741d2de2447c705f83fec0ab601b3548d2b7c64f4","LICENSE":"fab3dd6bdab226f1c08630b1dd917e11fcb4ec5e1e020e2c16f83a0a13863e85","README.md":"53a6805edd80f642473514cb93f1f4197e17a911d66a2dfcefc3dc5e82bac206","docs/.nojekyll":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855","docs/404.html":"025861f76f8d1f6d67c20ab624c6e418f4f824385e2dd8ad8732c4ea563c6a2e","docs/index.html":"025861f76f8d1f6d67c20ab624c6e418f4f824385e2dd8ad8732c4ea563c6a2e","src/color.rs":"eedf03d8ba8ca54a744617fdd945c80cbae73f99b6dff06f43a39764a93a3ac5","src/cow_rc_str.rs":"4d172d3633ef55af815784fbaee03cbcf85796a380765a0af09bbb6ca5b6fbab","src/from_bytes.rs":"b1cf15c4e975523fef46b575598737a39f3c63e5ce0b2bfd6ec627c69c6ea54a","src/lib.rs":"13be989c091fb59ecab3e855e76e7c3468f465f63e7391303fa51f251441916a","src/macros.rs":"c6e06fd014ee8c6212c72928e8b474fb1cd13a0b604055e9943ed05179a0e63b","src/nth.rs":"2fc26915f0a36cb22ac45dd9a7ecbdc64c327b2ec135370258ec3db9f9985460","src/parser.rs":"51d86df7f788da4ee6bdef8e92474bf118ac26f8954f82a14d11f1f578b6998e","src/rules_and_declarations.rs":"180c797c75a1f7298c4e47dc819cd5f8c8d911d20492eac88f10d910fd5258d4","src/serializer.rs":"b3d59a3b72a67f7bcd0f949497445d756f584661424682d03a3a1030ed4862b1","src/size_of_tests.rs":"da0cbcaa304f7800e9122e2bce0a11d42a70b9012e646a723cb23ee74a6b858c","src/tests.rs":"aa67c41be76b2a944d4d6dd162c3e8a77be1f877e94ac62e8f065adb5407a669","src/tokenizer.rs":"1f690582d4cdba930a379e5808d54f4085e3c6b60345e55c1141df7e263c722a","src/unicode_range.rs":"20d96f06fbb73921e308cc340c9fe065e27f19843005689fb259007a6a372bcc"},"package":null} \ No newline at end of file
diff --git a/third_party/rust/cssparser/.github/workflows/main.yml b/third_party/rust/cssparser/.github/workflows/main.yml
new file mode 100644
index 0000000000..0d3c0229fb
--- /dev/null
+++ b/third_party/rust/cssparser/.github/workflows/main.yml
@@ -0,0 +1,78 @@
+name: CI
+
+on:
+ push:
+ branches: [master]
+ pull_request:
+ workflow_dispatch:
+ merge_group:
+ types: [checks_requested]
+
+jobs:
+ linux-ci:
+ name: Linux
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ toolchain:
+ - nightly
+ - beta
+ - stable
+ - 1.63.0
+ features:
+ -
+ - --features dummy_match_byte
+ include:
+ - toolchain: nightly
+ features: --features bench
+ - toolchain: nightly
+ features: --features bench,dummy_match_byte
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: Install toolchain
+ uses: actions-rs/toolchain@v1
+ with:
+ profile: minimal
+ toolchain: ${{ matrix.toolchain }}
+ override: true
+ components: ${{ matrix.toolchain == 'nightly' && 'miri,rust-src' || '' }}
+
+ - name: Cargo build
+ run: cargo build ${{ matrix.features }}
+
+ - name: Cargo doc
+ run: cargo doc ${{ matrix.features }}
+
+ - name: Cargo test
+ run: cargo test ${{ matrix.features }}
+
+ - name: macros build
+ run: cargo build
+ working-directory: macros
+
+ - name: Color build
+ run: cargo build
+ working-directory: color
+
+ - name: Color test
+ run: cargo test
+ working-directory: color
+
+ - name: Cargo miri test
+ if: "matrix.toolchain == 'nightly'"
+ run: cargo miri test --features skip_long_tests ${{ matrix.features }}
+
+ build_result:
+ name: Result
+ runs-on: ubuntu-latest
+ needs:
+ - "linux-ci"
+
+ steps:
+ - name: Mark the job as successful
+ run: exit 0
+ if: success()
+ - name: Mark the job as unsuccessful
+ run: exit 1
+ if: "!success()"
diff --git a/third_party/rust/cssparser/Cargo.toml b/third_party/rust/cssparser/Cargo.toml
new file mode 100644
index 0000000000..28312541ae
--- /dev/null
+++ b/third_party/rust/cssparser/Cargo.toml
@@ -0,0 +1,65 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2018"
+rust-version = "1.63"
+name = "cssparser"
+version = "0.33.0"
+authors = ["Simon Sapin <simon.sapin@exyr.org>"]
+exclude = [
+ "src/css-parsing-tests/**",
+ "src/big-data-url.css",
+]
+description = "Rust implementation of CSS Syntax Level 3"
+documentation = "https://docs.rs/cssparser/"
+readme = "README.md"
+keywords = [
+ "css",
+ "syntax",
+ "parser",
+]
+license = "MPL-2.0"
+repository = "https://github.com/servo/rust-cssparser"
+
+[dependencies]
+dtoa-short = "0.3"
+itoa = "1.0"
+smallvec = "1.0"
+
+[dependencies.cssparser-macros]
+version = "0.6.1"
+path = "./macros"
+
+[dependencies.phf]
+version = ">=0.8,<=0.11"
+features = ["macros"]
+
+[dependencies.serde]
+version = "1.0"
+optional = true
+
+[dev-dependencies]
+difference = "2.0"
+encoding_rs = "0.8"
+serde_json = "1.0"
+
+[features]
+bench = []
+dummy_match_byte = []
+skip_long_tests = []
+
+[workspace]
+members = [
+ ".",
+ "./macros",
+ "./color",
+]
diff --git a/third_party/rust/cssparser/LICENSE b/third_party/rust/cssparser/LICENSE
new file mode 100644
index 0000000000..14e2f777f6
--- /dev/null
+++ b/third_party/rust/cssparser/LICENSE
@@ -0,0 +1,373 @@
+Mozilla Public License Version 2.0
+==================================
+
+1. Definitions
+--------------
+
+1.1. "Contributor"
+ means each individual or legal entity that creates, contributes to
+ the creation of, or owns Covered Software.
+
+1.2. "Contributor Version"
+ means the combination of the Contributions of others (if any) used
+ by a Contributor and that particular Contributor's Contribution.
+
+1.3. "Contribution"
+ means Covered Software of a particular Contributor.
+
+1.4. "Covered Software"
+ means Source Code Form to which the initial Contributor has attached
+ the notice in Exhibit A, the Executable Form of such Source Code
+ Form, and Modifications of such Source Code Form, in each case
+ including portions thereof.
+
+1.5. "Incompatible With Secondary Licenses"
+ means
+
+ (a) that the initial Contributor has attached the notice described
+ in Exhibit B to the Covered Software; or
+
+ (b) that the Covered Software was made available under the terms of
+ version 1.1 or earlier of the License, but not also under the
+ terms of a Secondary License.
+
+1.6. "Executable Form"
+ means any form of the work other than Source Code Form.
+
+1.7. "Larger Work"
+ means a work that combines Covered Software with other material, in
+ a separate file or files, that is not Covered Software.
+
+1.8. "License"
+ means this document.
+
+1.9. "Licensable"
+ means having the right to grant, to the maximum extent possible,
+ whether at the time of the initial grant or subsequently, any and
+ all of the rights conveyed by this License.
+
+1.10. "Modifications"
+ means any of the following:
+
+ (a) any file in Source Code Form that results from an addition to,
+ deletion from, or modification of the contents of Covered
+ Software; or
+
+ (b) any new file in Source Code Form that contains any Covered
+ Software.
+
+1.11. "Patent Claims" of a Contributor
+ means any patent claim(s), including without limitation, method,
+ process, and apparatus claims, in any patent Licensable by such
+ Contributor that would be infringed, but for the grant of the
+ License, by the making, using, selling, offering for sale, having
+ made, import, or transfer of either its Contributions or its
+ Contributor Version.
+
+1.12. "Secondary License"
+ means either the GNU General Public License, Version 2.0, the GNU
+ Lesser General Public License, Version 2.1, the GNU Affero General
+ Public License, Version 3.0, or any later versions of those
+ licenses.
+
+1.13. "Source Code Form"
+ means the form of the work preferred for making modifications.
+
+1.14. "You" (or "Your")
+ means an individual or a legal entity exercising rights under this
+ License. For legal entities, "You" includes any entity that
+ controls, is controlled by, or is under common control with You. For
+ purposes of this definition, "control" means (a) the power, direct
+ or indirect, to cause the direction or management of such entity,
+ whether by contract or otherwise, or (b) ownership of more than
+ fifty percent (50%) of the outstanding shares or beneficial
+ ownership of such entity.
+
+2. License Grants and Conditions
+--------------------------------
+
+2.1. Grants
+
+Each Contributor hereby grants You a world-wide, royalty-free,
+non-exclusive license:
+
+(a) under intellectual property rights (other than patent or trademark)
+ Licensable by such Contributor to use, reproduce, make available,
+ modify, display, perform, distribute, and otherwise exploit its
+ Contributions, either on an unmodified basis, with Modifications, or
+ as part of a Larger Work; and
+
+(b) under Patent Claims of such Contributor to make, use, sell, offer
+ for sale, have made, import, and otherwise transfer either its
+ Contributions or its Contributor Version.
+
+2.2. Effective Date
+
+The licenses granted in Section 2.1 with respect to any Contribution
+become effective for each Contribution on the date the Contributor first
+distributes such Contribution.
+
+2.3. Limitations on Grant Scope
+
+The licenses granted in this Section 2 are the only rights granted under
+this License. No additional rights or licenses will be implied from the
+distribution or licensing of Covered Software under this License.
+Notwithstanding Section 2.1(b) above, no patent license is granted by a
+Contributor:
+
+(a) for any code that a Contributor has removed from Covered Software;
+ or
+
+(b) for infringements caused by: (i) Your and any other third party's
+ modifications of Covered Software, or (ii) the combination of its
+ Contributions with other software (except as part of its Contributor
+ Version); or
+
+(c) under Patent Claims infringed by Covered Software in the absence of
+ its Contributions.
+
+This License does not grant any rights in the trademarks, service marks,
+or logos of any Contributor (except as may be necessary to comply with
+the notice requirements in Section 3.4).
+
+2.4. Subsequent Licenses
+
+No Contributor makes additional grants as a result of Your choice to
+distribute the Covered Software under a subsequent version of this
+License (see Section 10.2) or under the terms of a Secondary License (if
+permitted under the terms of Section 3.3).
+
+2.5. Representation
+
+Each Contributor represents that the Contributor believes its
+Contributions are its original creation(s) or it has sufficient rights
+to grant the rights to its Contributions conveyed by this License.
+
+2.6. Fair Use
+
+This License is not intended to limit any rights You have under
+applicable copyright doctrines of fair use, fair dealing, or other
+equivalents.
+
+2.7. Conditions
+
+Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
+in Section 2.1.
+
+3. Responsibilities
+-------------------
+
+3.1. Distribution of Source Form
+
+All distribution of Covered Software in Source Code Form, including any
+Modifications that You create or to which You contribute, must be under
+the terms of this License. You must inform recipients that the Source
+Code Form of the Covered Software is governed by the terms of this
+License, and how they can obtain a copy of this License. You may not
+attempt to alter or restrict the recipients' rights in the Source Code
+Form.
+
+3.2. Distribution of Executable Form
+
+If You distribute Covered Software in Executable Form then:
+
+(a) such Covered Software must also be made available in Source Code
+ Form, as described in Section 3.1, and You must inform recipients of
+ the Executable Form how they can obtain a copy of such Source Code
+ Form by reasonable means in a timely manner, at a charge no more
+ than the cost of distribution to the recipient; and
+
+(b) You may distribute such Executable Form under the terms of this
+ License, or sublicense it under different terms, provided that the
+ license for the Executable Form does not attempt to limit or alter
+ the recipients' rights in the Source Code Form under this License.
+
+3.3. Distribution of a Larger Work
+
+You may create and distribute a Larger Work under terms of Your choice,
+provided that You also comply with the requirements of this License for
+the Covered Software. If the Larger Work is a combination of Covered
+Software with a work governed by one or more Secondary Licenses, and the
+Covered Software is not Incompatible With Secondary Licenses, this
+License permits You to additionally distribute such Covered Software
+under the terms of such Secondary License(s), so that the recipient of
+the Larger Work may, at their option, further distribute the Covered
+Software under the terms of either this License or such Secondary
+License(s).
+
+3.4. Notices
+
+You may not remove or alter the substance of any license notices
+(including copyright notices, patent notices, disclaimers of warranty,
+or limitations of liability) contained within the Source Code Form of
+the Covered Software, except that You may alter any license notices to
+the extent required to remedy known factual inaccuracies.
+
+3.5. Application of Additional Terms
+
+You may choose to offer, and to charge a fee for, warranty, support,
+indemnity or liability obligations to one or more recipients of Covered
+Software. However, You may do so only on Your own behalf, and not on
+behalf of any Contributor. You must make it absolutely clear that any
+such warranty, support, indemnity, or liability obligation is offered by
+You alone, and You hereby agree to indemnify every Contributor for any
+liability incurred by such Contributor as a result of warranty, support,
+indemnity or liability terms You offer. You may include additional
+disclaimers of warranty and limitations of liability specific to any
+jurisdiction.
+
+4. Inability to Comply Due to Statute or Regulation
+---------------------------------------------------
+
+If it is impossible for You to comply with any of the terms of this
+License with respect to some or all of the Covered Software due to
+statute, judicial order, or regulation then You must: (a) comply with
+the terms of this License to the maximum extent possible; and (b)
+describe the limitations and the code they affect. Such description must
+be placed in a text file included with all distributions of the Covered
+Software under this License. Except to the extent prohibited by statute
+or regulation, such description must be sufficiently detailed for a
+recipient of ordinary skill to be able to understand it.
+
+5. Termination
+--------------
+
+5.1. The rights granted under this License will terminate automatically
+if You fail to comply with any of its terms. However, if You become
+compliant, then the rights granted under this License from a particular
+Contributor are reinstated (a) provisionally, unless and until such
+Contributor explicitly and finally terminates Your grants, and (b) on an
+ongoing basis, if such Contributor fails to notify You of the
+non-compliance by some reasonable means prior to 60 days after You have
+come back into compliance. Moreover, Your grants from a particular
+Contributor are reinstated on an ongoing basis if such Contributor
+notifies You of the non-compliance by some reasonable means, this is the
+first time You have received notice of non-compliance with this License
+from such Contributor, and You become compliant prior to 30 days after
+Your receipt of the notice.
+
+5.2. If You initiate litigation against any entity by asserting a patent
+infringement claim (excluding declaratory judgment actions,
+counter-claims, and cross-claims) alleging that a Contributor Version
+directly or indirectly infringes any patent, then the rights granted to
+You by any and all Contributors for the Covered Software under Section
+2.1 of this License shall terminate.
+
+5.3. In the event of termination under Sections 5.1 or 5.2 above, all
+end user license agreements (excluding distributors and resellers) which
+have been validly granted by You or Your distributors under this License
+prior to termination shall survive termination.
+
+************************************************************************
+* *
+* 6. Disclaimer of Warranty *
+* ------------------------- *
+* *
+* Covered Software is provided under this License on an "as is" *
+* basis, without warranty of any kind, either expressed, implied, or *
+* statutory, including, without limitation, warranties that the *
+* Covered Software is free of defects, merchantable, fit for a *
+* particular purpose or non-infringing. The entire risk as to the *
+* quality and performance of the Covered Software is with You. *
+* Should any Covered Software prove defective in any respect, You *
+* (not any Contributor) assume the cost of any necessary servicing, *
+* repair, or correction. This disclaimer of warranty constitutes an *
+* essential part of this License. No use of any Covered Software is *
+* authorized under this License except under this disclaimer. *
+* *
+************************************************************************
+
+************************************************************************
+* *
+* 7. Limitation of Liability *
+* -------------------------- *
+* *
+* Under no circumstances and under no legal theory, whether tort *
+* (including negligence), contract, or otherwise, shall any *
+* Contributor, or anyone who distributes Covered Software as *
+* permitted above, be liable to You for any direct, indirect, *
+* special, incidental, or consequential damages of any character *
+* including, without limitation, damages for lost profits, loss of *
+* goodwill, work stoppage, computer failure or malfunction, or any *
+* and all other commercial damages or losses, even if such party *
+* shall have been informed of the possibility of such damages. This *
+* limitation of liability shall not apply to liability for death or *
+* personal injury resulting from such party's negligence to the *
+* extent applicable law prohibits such limitation. Some *
+* jurisdictions do not allow the exclusion or limitation of *
+* incidental or consequential damages, so this exclusion and *
+* limitation may not apply to You. *
+* *
+************************************************************************
+
+8. Litigation
+-------------
+
+Any litigation relating to this License may be brought only in the
+courts of a jurisdiction where the defendant maintains its principal
+place of business and such litigation shall be governed by laws of that
+jurisdiction, without reference to its conflict-of-law provisions.
+Nothing in this Section shall prevent a party's ability to bring
+cross-claims or counter-claims.
+
+9. Miscellaneous
+----------------
+
+This License represents the complete agreement concerning the subject
+matter hereof. If any provision of this License is held to be
+unenforceable, such provision shall be reformed only to the extent
+necessary to make it enforceable. Any law or regulation which provides
+that the language of a contract shall be construed against the drafter
+shall not be used to construe this License against a Contributor.
+
+10. Versions of the License
+---------------------------
+
+10.1. New Versions
+
+Mozilla Foundation is the license steward. Except as provided in Section
+10.3, no one other than the license steward has the right to modify or
+publish new versions of this License. Each version will be given a
+distinguishing version number.
+
+10.2. Effect of New Versions
+
+You may distribute the Covered Software under the terms of the version
+of the License under which You originally received the Covered Software,
+or under the terms of any subsequent version published by the license
+steward.
+
+10.3. Modified Versions
+
+If you create software not governed by this License, and you want to
+create a new license for such software, you may create and use a
+modified version of this License if you rename the license and remove
+any references to the name of the license steward (except to note that
+such modified license differs from this License).
+
+10.4. Distributing Source Code Form that is Incompatible With Secondary
+Licenses
+
+If You choose to distribute Source Code Form that is Incompatible With
+Secondary Licenses under the terms of this version of the License, the
+notice described in Exhibit B of this License must be attached.
+
+Exhibit A - Source Code Form License Notice
+-------------------------------------------
+
+ This Source Code Form is subject to the terms of the Mozilla Public
+ License, v. 2.0. If a copy of the MPL was not distributed with this
+ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+If it is not possible or desirable to put the notice in a particular
+file, then You may include the notice in a location (such as a LICENSE
+file in a relevant directory) where a recipient would be likely to look
+for such a notice.
+
+You may add additional accurate notices of copyright ownership.
+
+Exhibit B - "Incompatible With Secondary Licenses" Notice
+---------------------------------------------------------
+
+ This Source Code Form is "Incompatible With Secondary Licenses", as
+ defined by the Mozilla Public License, v. 2.0.
diff --git a/third_party/rust/cssparser/README.md b/third_party/rust/cssparser/README.md
new file mode 100644
index 0000000000..84d47d9e04
--- /dev/null
+++ b/third_party/rust/cssparser/README.md
@@ -0,0 +1,57 @@
+rust-cssparser
+==============
+
+[![Build Status](https://github.com/servo/rust-cssparser/actions/workflows/main.yml/badge.svg)](https://github.com/servo/rust-cssparser/actions)
+
+[Documentation](https://docs.rs/cssparser/)
+
+Rust implementation of
+[CSS Syntax Module Level 3](https://drafts.csswg.org/css-syntax/)
+
+
+Overview
+--------
+
+Parsing CSS involves a series of steps:
+
+* When parsing from bytes,
+ (e.g. reading a file or fetching an URL from the network,)
+ detect the character encoding
+ (based on a `Content-Type` HTTP header, an `@charset` rule, a BOM, etc.)
+ and decode to Unicode text.
+
+ rust-cssparser does not do this yet and just assumes UTF-8.
+
+ This step is skipped when parsing from Unicode, e.g. in an HTML `<style>` element.
+
+* Tokenization, a.k.a. lexing.
+ The input, a stream of Unicode text, is transformed into a stream of *tokens*.
+ Tokenization never fails, although the output may contain *error tokens*.
+
+* This flat stream of tokens is then transformed into a tree of *component values*,
+ which are either *preserved tokens*,
+ or blocks/functions (`{ … }`, `[ … ]`, `( … )`, `foo( … )`)
+ that contain more component values.
+
+ rust-cssparser does this at the same time as tokenization:
+ raw tokens are never materialized, you only get component values.
+
+* Component values can then be parsed into generic rules or declarations.
+ The header and body of rules as well as the value of declarations
+ are still just lists of component values at this point.
+ See [the `Token` enum](src/tokenizer.rs) for the data structure.
+
+* The last step of a full CSS parser is
+ parsing the remaining component values
+ into [Selectors](https://drafts.csswg.org/selectors/),
+ specific CSS properties, etc.
+
+ By design, rust-cssparser does not do this last step
+ which depends a lot on what you want to do:
+ which properties you want to support, what you want to do with selectors, etc.
+
+ It does however provide some helper functions to parse [CSS colors](src/color.rs)
+ and [An+B](src/nth.rs) (the argument to `:nth-child()` and related selectors.
+
+ See [Servo’s `style` crate](https://github.com/servo/servo/tree/master/components/style)
+ for an example of a parser based on rust-cssparser.
diff --git a/third_party/rust/cssparser/docs/.nojekyll b/third_party/rust/cssparser/docs/.nojekyll
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/third_party/rust/cssparser/docs/.nojekyll
diff --git a/third_party/rust/cssparser/docs/404.html b/third_party/rust/cssparser/docs/404.html
new file mode 100644
index 0000000000..bf70be151d
--- /dev/null
+++ b/third_party/rust/cssparser/docs/404.html
@@ -0,0 +1,3 @@
+<meta http-equiv="refresh" content="0; url=https://docs.rs/cssparser/">
+<link rel="canonical" href="https://docs.rs/cssparser/">
+<a href="https://docs.rs/cssparser/">Moved to docs.rs</a>
diff --git a/third_party/rust/cssparser/docs/index.html b/third_party/rust/cssparser/docs/index.html
new file mode 100644
index 0000000000..bf70be151d
--- /dev/null
+++ b/third_party/rust/cssparser/docs/index.html
@@ -0,0 +1,3 @@
+<meta http-equiv="refresh" content="0; url=https://docs.rs/cssparser/">
+<link rel="canonical" href="https://docs.rs/cssparser/">
+<a href="https://docs.rs/cssparser/">Moved to docs.rs</a>
diff --git a/third_party/rust/cssparser/src/color.rs b/third_party/rust/cssparser/src/color.rs
new file mode 100644
index 0000000000..d5f9a5c0e6
--- /dev/null
+++ b/third_party/rust/cssparser/src/color.rs
@@ -0,0 +1,351 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! General color-parsing utilities, independent on the specific color storage and parsing
+//! implementation.
+//!
+//! For a more complete css-color implementation take a look at cssparser-color crate, or at
+//! Gecko's color module.
+
+// Allow text like <color> in docs.
+#![allow(rustdoc::invalid_html_tags)]
+
+/// The opaque alpha value of 1.0.
+pub const OPAQUE: f32 = 1.0;
+
+use crate::ToCss;
+use std::fmt;
+use std::str::FromStr;
+
+/// Clamp a 0..1 number to a 0..255 range to u8.
+///
+/// Whilst scaling by 256 and flooring would provide
+/// an equal distribution of integers to percentage inputs,
+/// this is not what Gecko does so we instead multiply by 255
+/// and round (adding 0.5 and flooring is equivalent to rounding)
+///
+/// Chrome does something similar for the alpha value, but not
+/// the rgb values.
+///
+/// See <https://bugzilla.mozilla.org/show_bug.cgi?id=1340484>
+///
+/// Clamping to 256 and rounding after would let 1.0 map to 256, and
+/// `256.0_f32 as u8` is undefined behavior:
+///
+/// <https://github.com/rust-lang/rust/issues/10184>
+#[inline]
+pub fn clamp_unit_f32(val: f32) -> u8 {
+ clamp_floor_256_f32(val * 255.)
+}
+
+/// Round and clamp a single number to a u8.
+#[inline]
+pub fn clamp_floor_256_f32(val: f32) -> u8 {
+ val.round().clamp(0., 255.) as u8
+}
+
+/// Serialize the alpha copmonent of a color according to the specification.
+/// <https://drafts.csswg.org/css-color-4/#serializing-alpha-values>
+#[inline]
+pub fn serialize_color_alpha(
+ dest: &mut impl fmt::Write,
+ alpha: Option<f32>,
+ legacy_syntax: bool,
+) -> fmt::Result {
+ let alpha = match alpha {
+ None => return dest.write_str(" / none"),
+ Some(a) => a,
+ };
+
+ // If the alpha component is full opaque, don't emit the alpha value in CSS.
+ if alpha == OPAQUE {
+ return Ok(());
+ }
+
+ dest.write_str(if legacy_syntax { ", " } else { " / " })?;
+
+ // Try first with two decimal places, then with three.
+ let mut rounded_alpha = (alpha * 100.).round() / 100.;
+ if clamp_unit_f32(rounded_alpha) != clamp_unit_f32(alpha) {
+ rounded_alpha = (alpha * 1000.).round() / 1000.;
+ }
+
+ rounded_alpha.to_css(dest)
+}
+
+/// A Predefined color space specified in:
+/// <https://drafts.csswg.org/css-color-4/#predefined>
+#[derive(Clone, Copy, PartialEq, Debug)]
+pub enum PredefinedColorSpace {
+ /// <https://drafts.csswg.org/css-color-4/#predefined-sRGB>
+ Srgb,
+ /// <https://drafts.csswg.org/css-color-4/#predefined-sRGB-linear>
+ SrgbLinear,
+ /// <https://drafts.csswg.org/css-color-4/#predefined-display-p3>
+ DisplayP3,
+ /// <https://drafts.csswg.org/css-color-4/#predefined-a98-rgb>
+ A98Rgb,
+ /// <https://drafts.csswg.org/css-color-4/#predefined-prophoto-rgb>
+ ProphotoRgb,
+ /// <https://drafts.csswg.org/css-color-4/#predefined-rec2020>
+ Rec2020,
+ /// <https://drafts.csswg.org/css-color-4/#predefined-xyz>
+ XyzD50,
+ /// <https://drafts.csswg.org/css-color-4/#predefined-xyz>
+ XyzD65,
+}
+
+impl PredefinedColorSpace {
+ /// Returns the string value of the predefined color space.
+ pub fn as_str(&self) -> &str {
+ match self {
+ PredefinedColorSpace::Srgb => "srgb",
+ PredefinedColorSpace::SrgbLinear => "srgb-linear",
+ PredefinedColorSpace::DisplayP3 => "display-p3",
+ PredefinedColorSpace::A98Rgb => "a98-rgb",
+ PredefinedColorSpace::ProphotoRgb => "prophoto-rgb",
+ PredefinedColorSpace::Rec2020 => "rec2020",
+ PredefinedColorSpace::XyzD50 => "xyz-d50",
+ PredefinedColorSpace::XyzD65 => "xyz-d65",
+ }
+ }
+}
+
+impl FromStr for PredefinedColorSpace {
+ type Err = ();
+
+ fn from_str(s: &str) -> Result<Self, Self::Err> {
+ Ok(match_ignore_ascii_case! { s,
+ "srgb" => PredefinedColorSpace::Srgb,
+ "srgb-linear" => PredefinedColorSpace::SrgbLinear,
+ "display-p3" => PredefinedColorSpace::DisplayP3,
+ "a98-rgb" => PredefinedColorSpace::A98Rgb,
+ "prophoto-rgb" => PredefinedColorSpace::ProphotoRgb,
+ "rec2020" => PredefinedColorSpace::Rec2020,
+ "xyz-d50" => PredefinedColorSpace::XyzD50,
+ "xyz" | "xyz-d65" => PredefinedColorSpace::XyzD65,
+
+ _ => return Err(()),
+ })
+ }
+}
+
+impl ToCss for PredefinedColorSpace {
+ fn to_css<W>(&self, dest: &mut W) -> fmt::Result
+ where
+ W: fmt::Write,
+ {
+ dest.write_str(self.as_str())
+ }
+}
+
+/// Parse a color hash, without the leading '#' character.
+#[inline]
+pub fn parse_hash_color(value: &[u8]) -> Result<(u8, u8, u8, f32), ()> {
+ Ok(match value.len() {
+ 8 => (
+ from_hex(value[0])? * 16 + from_hex(value[1])?,
+ from_hex(value[2])? * 16 + from_hex(value[3])?,
+ from_hex(value[4])? * 16 + from_hex(value[5])?,
+ (from_hex(value[6])? * 16 + from_hex(value[7])?) as f32 / 255.0,
+ ),
+ 6 => (
+ from_hex(value[0])? * 16 + from_hex(value[1])?,
+ from_hex(value[2])? * 16 + from_hex(value[3])?,
+ from_hex(value[4])? * 16 + from_hex(value[5])?,
+ OPAQUE,
+ ),
+ 4 => (
+ from_hex(value[0])? * 17,
+ from_hex(value[1])? * 17,
+ from_hex(value[2])? * 17,
+ (from_hex(value[3])? * 17) as f32 / 255.0,
+ ),
+ 3 => (
+ from_hex(value[0])? * 17,
+ from_hex(value[1])? * 17,
+ from_hex(value[2])? * 17,
+ OPAQUE,
+ ),
+ _ => return Err(()),
+ })
+}
+
+ascii_case_insensitive_phf_map! {
+ named_colors -> (u8, u8, u8) = {
+ "black" => (0, 0, 0),
+ "silver" => (192, 192, 192),
+ "gray" => (128, 128, 128),
+ "white" => (255, 255, 255),
+ "maroon" => (128, 0, 0),
+ "red" => (255, 0, 0),
+ "purple" => (128, 0, 128),
+ "fuchsia" => (255, 0, 255),
+ "green" => (0, 128, 0),
+ "lime" => (0, 255, 0),
+ "olive" => (128, 128, 0),
+ "yellow" => (255, 255, 0),
+ "navy" => (0, 0, 128),
+ "blue" => (0, 0, 255),
+ "teal" => (0, 128, 128),
+ "aqua" => (0, 255, 255),
+
+ "aliceblue" => (240, 248, 255),
+ "antiquewhite" => (250, 235, 215),
+ "aquamarine" => (127, 255, 212),
+ "azure" => (240, 255, 255),
+ "beige" => (245, 245, 220),
+ "bisque" => (255, 228, 196),
+ "blanchedalmond" => (255, 235, 205),
+ "blueviolet" => (138, 43, 226),
+ "brown" => (165, 42, 42),
+ "burlywood" => (222, 184, 135),
+ "cadetblue" => (95, 158, 160),
+ "chartreuse" => (127, 255, 0),
+ "chocolate" => (210, 105, 30),
+ "coral" => (255, 127, 80),
+ "cornflowerblue" => (100, 149, 237),
+ "cornsilk" => (255, 248, 220),
+ "crimson" => (220, 20, 60),
+ "cyan" => (0, 255, 255),
+ "darkblue" => (0, 0, 139),
+ "darkcyan" => (0, 139, 139),
+ "darkgoldenrod" => (184, 134, 11),
+ "darkgray" => (169, 169, 169),
+ "darkgreen" => (0, 100, 0),
+ "darkgrey" => (169, 169, 169),
+ "darkkhaki" => (189, 183, 107),
+ "darkmagenta" => (139, 0, 139),
+ "darkolivegreen" => (85, 107, 47),
+ "darkorange" => (255, 140, 0),
+ "darkorchid" => (153, 50, 204),
+ "darkred" => (139, 0, 0),
+ "darksalmon" => (233, 150, 122),
+ "darkseagreen" => (143, 188, 143),
+ "darkslateblue" => (72, 61, 139),
+ "darkslategray" => (47, 79, 79),
+ "darkslategrey" => (47, 79, 79),
+ "darkturquoise" => (0, 206, 209),
+ "darkviolet" => (148, 0, 211),
+ "deeppink" => (255, 20, 147),
+ "deepskyblue" => (0, 191, 255),
+ "dimgray" => (105, 105, 105),
+ "dimgrey" => (105, 105, 105),
+ "dodgerblue" => (30, 144, 255),
+ "firebrick" => (178, 34, 34),
+ "floralwhite" => (255, 250, 240),
+ "forestgreen" => (34, 139, 34),
+ "gainsboro" => (220, 220, 220),
+ "ghostwhite" => (248, 248, 255),
+ "gold" => (255, 215, 0),
+ "goldenrod" => (218, 165, 32),
+ "greenyellow" => (173, 255, 47),
+ "grey" => (128, 128, 128),
+ "honeydew" => (240, 255, 240),
+ "hotpink" => (255, 105, 180),
+ "indianred" => (205, 92, 92),
+ "indigo" => (75, 0, 130),
+ "ivory" => (255, 255, 240),
+ "khaki" => (240, 230, 140),
+ "lavender" => (230, 230, 250),
+ "lavenderblush" => (255, 240, 245),
+ "lawngreen" => (124, 252, 0),
+ "lemonchiffon" => (255, 250, 205),
+ "lightblue" => (173, 216, 230),
+ "lightcoral" => (240, 128, 128),
+ "lightcyan" => (224, 255, 255),
+ "lightgoldenrodyellow" => (250, 250, 210),
+ "lightgray" => (211, 211, 211),
+ "lightgreen" => (144, 238, 144),
+ "lightgrey" => (211, 211, 211),
+ "lightpink" => (255, 182, 193),
+ "lightsalmon" => (255, 160, 122),
+ "lightseagreen" => (32, 178, 170),
+ "lightskyblue" => (135, 206, 250),
+ "lightslategray" => (119, 136, 153),
+ "lightslategrey" => (119, 136, 153),
+ "lightsteelblue" => (176, 196, 222),
+ "lightyellow" => (255, 255, 224),
+ "limegreen" => (50, 205, 50),
+ "linen" => (250, 240, 230),
+ "magenta" => (255, 0, 255),
+ "mediumaquamarine" => (102, 205, 170),
+ "mediumblue" => (0, 0, 205),
+ "mediumorchid" => (186, 85, 211),
+ "mediumpurple" => (147, 112, 219),
+ "mediumseagreen" => (60, 179, 113),
+ "mediumslateblue" => (123, 104, 238),
+ "mediumspringgreen" => (0, 250, 154),
+ "mediumturquoise" => (72, 209, 204),
+ "mediumvioletred" => (199, 21, 133),
+ "midnightblue" => (25, 25, 112),
+ "mintcream" => (245, 255, 250),
+ "mistyrose" => (255, 228, 225),
+ "moccasin" => (255, 228, 181),
+ "navajowhite" => (255, 222, 173),
+ "oldlace" => (253, 245, 230),
+ "olivedrab" => (107, 142, 35),
+ "orange" => (255, 165, 0),
+ "orangered" => (255, 69, 0),
+ "orchid" => (218, 112, 214),
+ "palegoldenrod" => (238, 232, 170),
+ "palegreen" => (152, 251, 152),
+ "paleturquoise" => (175, 238, 238),
+ "palevioletred" => (219, 112, 147),
+ "papayawhip" => (255, 239, 213),
+ "peachpuff" => (255, 218, 185),
+ "peru" => (205, 133, 63),
+ "pink" => (255, 192, 203),
+ "plum" => (221, 160, 221),
+ "powderblue" => (176, 224, 230),
+ "rebeccapurple" => (102, 51, 153),
+ "rosybrown" => (188, 143, 143),
+ "royalblue" => (65, 105, 225),
+ "saddlebrown" => (139, 69, 19),
+ "salmon" => (250, 128, 114),
+ "sandybrown" => (244, 164, 96),
+ "seagreen" => (46, 139, 87),
+ "seashell" => (255, 245, 238),
+ "sienna" => (160, 82, 45),
+ "skyblue" => (135, 206, 235),
+ "slateblue" => (106, 90, 205),
+ "slategray" => (112, 128, 144),
+ "slategrey" => (112, 128, 144),
+ "snow" => (255, 250, 250),
+ "springgreen" => (0, 255, 127),
+ "steelblue" => (70, 130, 180),
+ "tan" => (210, 180, 140),
+ "thistle" => (216, 191, 216),
+ "tomato" => (255, 99, 71),
+ "turquoise" => (64, 224, 208),
+ "violet" => (238, 130, 238),
+ "wheat" => (245, 222, 179),
+ "whitesmoke" => (245, 245, 245),
+ "yellowgreen" => (154, 205, 50),
+ }
+}
+
+/// Returns the named color with the given name.
+/// <https://drafts.csswg.org/css-color-4/#typedef-named-color>
+#[inline]
+pub fn parse_named_color(ident: &str) -> Result<(u8, u8, u8), ()> {
+ named_colors::get(ident).copied().ok_or(())
+}
+
+/// Returns an iterator over all named CSS colors.
+/// <https://drafts.csswg.org/css-color-4/#typedef-named-color>
+#[inline]
+pub fn all_named_colors() -> impl Iterator<Item = (&'static str, (u8, u8, u8))> {
+ named_colors::entries().map(|(k, v)| (*k, *v))
+}
+
+#[inline]
+fn from_hex(c: u8) -> Result<u8, ()> {
+ match c {
+ b'0'..=b'9' => Ok(c - b'0'),
+ b'a'..=b'f' => Ok(c - b'a' + 10),
+ b'A'..=b'F' => Ok(c - b'A' + 10),
+ _ => Err(()),
+ }
+}
diff --git a/third_party/rust/cssparser/src/cow_rc_str.rs b/third_party/rust/cssparser/src/cow_rc_str.rs
new file mode 100644
index 0000000000..ecf14a0a75
--- /dev/null
+++ b/third_party/rust/cssparser/src/cow_rc_str.rs
@@ -0,0 +1,185 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use std::borrow::{Borrow, Cow};
+use std::rc::Rc;
+use std::{cmp, fmt, hash, marker, mem, ops, slice, str, ptr};
+
+/// A string that is either shared (heap-allocated and reference-counted) or borrowed.
+///
+/// Equivalent to `enum { Borrowed(&'a str), Shared(Rc<String>) }`, but stored more compactly.
+///
+/// * If `borrowed_len_or_max == usize::MAX`, then `ptr` represents `NonZero<*const String>`
+/// from `Rc::into_raw`.
+/// The lifetime parameter `'a` is irrelevant in this case.
+///
+/// * Otherwise, `ptr` represents the `NonZero<*const u8>` data component of `&'a str`,
+/// and `borrowed_len_or_max` its length.
+pub struct CowRcStr<'a> {
+ ptr: ptr::NonNull<()>,
+ borrowed_len_or_max: usize,
+
+ phantom: marker::PhantomData<Result<&'a str, Rc<String>>>,
+}
+
+fn _static_assert_same_size<'a>() {
+ // "Instantiate" the generic function without calling it.
+ let _ = mem::transmute::<CowRcStr<'a>, Option<CowRcStr<'a>>>;
+}
+
+impl<'a> From<Cow<'a, str>> for CowRcStr<'a> {
+ #[inline]
+ fn from(s: Cow<'a, str>) -> Self {
+ match s {
+ Cow::Borrowed(s) => CowRcStr::from(s),
+ Cow::Owned(s) => CowRcStr::from(s),
+ }
+ }
+}
+
+impl<'a> From<&'a str> for CowRcStr<'a> {
+ #[inline]
+ fn from(s: &'a str) -> Self {
+ let len = s.len();
+ assert!(len < usize::MAX);
+ CowRcStr {
+ ptr: unsafe { ptr::NonNull::new_unchecked(s.as_ptr() as *mut ()) },
+ borrowed_len_or_max: len,
+ phantom: marker::PhantomData,
+ }
+ }
+}
+
+impl<'a> From<String> for CowRcStr<'a> {
+ #[inline]
+ fn from(s: String) -> Self {
+ CowRcStr::from_rc(Rc::new(s))
+ }
+}
+
+impl<'a> CowRcStr<'a> {
+ #[inline]
+ fn from_rc(s: Rc<String>) -> Self {
+ let ptr = unsafe { ptr::NonNull::new_unchecked(Rc::into_raw(s) as *mut ()) };
+ CowRcStr {
+ ptr,
+ borrowed_len_or_max: usize::MAX,
+ phantom: marker::PhantomData,
+ }
+ }
+
+ #[inline]
+ fn unpack(&self) -> Result<&'a str, *const String> {
+ if self.borrowed_len_or_max == usize::MAX {
+ Err(self.ptr.as_ptr() as *const String)
+ } else {
+ unsafe {
+ Ok(str::from_utf8_unchecked(slice::from_raw_parts(
+ self.ptr.as_ptr() as *const u8,
+ self.borrowed_len_or_max,
+ )))
+ }
+ }
+ }
+}
+
+impl<'a> Clone for CowRcStr<'a> {
+ #[inline]
+ fn clone(&self) -> Self {
+ match self.unpack() {
+ Err(ptr) => {
+ let rc = unsafe { Rc::from_raw(ptr) };
+ let new_rc = rc.clone();
+ mem::forget(rc); // Don’t actually take ownership of this strong reference
+ CowRcStr::from_rc(new_rc)
+ }
+ Ok(_) => CowRcStr { ..*self },
+ }
+ }
+}
+
+impl<'a> Drop for CowRcStr<'a> {
+ #[inline]
+ fn drop(&mut self) {
+ if let Err(ptr) = self.unpack() {
+ mem::drop(unsafe { Rc::from_raw(ptr) })
+ }
+ }
+}
+
+impl<'a> ops::Deref for CowRcStr<'a> {
+ type Target = str;
+
+ #[inline]
+ fn deref(&self) -> &str {
+ self.unpack().unwrap_or_else(|ptr| unsafe { &**ptr })
+ }
+}
+
+// Boilerplate / trivial impls below.
+
+impl<'a> AsRef<str> for CowRcStr<'a> {
+ #[inline]
+ fn as_ref(&self) -> &str {
+ self
+ }
+}
+
+impl<'a> Borrow<str> for CowRcStr<'a> {
+ #[inline]
+ fn borrow(&self) -> &str {
+ self
+ }
+}
+
+impl<'a> Default for CowRcStr<'a> {
+ #[inline]
+ fn default() -> Self {
+ Self::from("")
+ }
+}
+
+impl<'a> hash::Hash for CowRcStr<'a> {
+ #[inline]
+ fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
+ str::hash(self, hasher)
+ }
+}
+
+impl<'a, T: AsRef<str>> PartialEq<T> for CowRcStr<'a> {
+ #[inline]
+ fn eq(&self, other: &T) -> bool {
+ str::eq(self, other.as_ref())
+ }
+}
+
+impl<'a, T: AsRef<str>> PartialOrd<T> for CowRcStr<'a> {
+ #[inline]
+ fn partial_cmp(&self, other: &T) -> Option<cmp::Ordering> {
+ str::partial_cmp(self, other.as_ref())
+ }
+}
+
+impl<'a> Eq for CowRcStr<'a> {}
+
+impl<'a> Ord for CowRcStr<'a> {
+ #[inline]
+ fn cmp(&self, other: &Self) -> cmp::Ordering {
+ str::cmp(self, other)
+ }
+}
+
+impl<'a> fmt::Display for CowRcStr<'a> {
+ #[inline]
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ str::fmt(self, formatter)
+ }
+}
+
+impl<'a> fmt::Debug for CowRcStr<'a> {
+ #[inline]
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ str::fmt(self, formatter)
+ }
+}
diff --git a/third_party/rust/cssparser/src/from_bytes.rs b/third_party/rust/cssparser/src/from_bytes.rs
new file mode 100644
index 0000000000..78a56d3e14
--- /dev/null
+++ b/third_party/rust/cssparser/src/from_bytes.rs
@@ -0,0 +1,64 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/// Abstraction for avoiding a dependency from cssparser to an encoding library
+pub trait EncodingSupport {
+ /// One character encoding
+ type Encoding;
+
+ /// https://encoding.spec.whatwg.org/#concept-encoding-get
+ fn from_label(ascii_label: &[u8]) -> Option<Self::Encoding>;
+
+ /// Return the UTF-8 encoding
+ fn utf8() -> Self::Encoding;
+
+ /// Whether the given encoding is UTF-16BE or UTF-16LE
+ fn is_utf16_be_or_le(encoding: &Self::Encoding) -> bool;
+}
+
+/// Determine the character encoding of a CSS stylesheet.
+///
+/// This is based on the presence of a BOM (Byte Order Mark), an `@charset` rule, and
+/// encoding meta-information.
+///
+/// * `css_bytes`: A byte string.
+/// * `protocol_encoding`: The encoding label, if any, defined by HTTP or equivalent protocol.
+/// (e.g. via the `charset` parameter of the `Content-Type` header.)
+/// * `environment_encoding`: An optional `Encoding` object for the [environment encoding]
+/// (https://drafts.csswg.org/css-syntax/#environment-encoding), if any.
+///
+/// Returns the encoding to use.
+pub fn stylesheet_encoding<E>(
+ css: &[u8],
+ protocol_encoding_label: Option<&[u8]>,
+ environment_encoding: Option<E::Encoding>,
+) -> E::Encoding
+where
+ E: EncodingSupport,
+{
+ // https://drafts.csswg.org/css-syntax/#the-input-byte-stream
+ if let Some(label) = protocol_encoding_label {
+ if let Some(protocol_encoding) = E::from_label(label) {
+ return protocol_encoding;
+ };
+ };
+
+ let prefix = b"@charset \"";
+ if css.starts_with(prefix) {
+ let rest = &css[prefix.len()..];
+ if let Some(label_length) = rest.iter().position(|&b| b == b'"') {
+ if rest[label_length..].starts_with(b"\";") {
+ let label = &rest[..label_length];
+ if let Some(charset_encoding) = E::from_label(label) {
+ if E::is_utf16_be_or_le(&charset_encoding) {
+ return E::utf8();
+ } else {
+ return charset_encoding;
+ }
+ }
+ }
+ }
+ }
+ environment_encoding.unwrap_or_else(E::utf8)
+}
diff --git a/third_party/rust/cssparser/src/lib.rs b/third_party/rust/cssparser/src/lib.rs
new file mode 100644
index 0000000000..dc44fb743c
--- /dev/null
+++ b/third_party/rust/cssparser/src/lib.rs
@@ -0,0 +1,108 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#![crate_name = "cssparser"]
+#![crate_type = "rlib"]
+#![cfg_attr(feature = "bench", feature(test))]
+#![deny(missing_docs)]
+
+/*!
+
+Implementation of [CSS Syntax Module Level 3](https://drafts.csswg.org/css-syntax/) for Rust.
+
+# Input
+
+Everything is based on `Parser` objects, which borrow a `&str` input.
+If you have bytes (from a file, the network, or something)
+and want to support character encodings other than UTF-8,
+see the `stylesheet_encoding` function,
+which can be used together with rust-encoding or encoding-rs.
+
+# Conventions for parsing functions
+
+* Take (at least) a `input: &mut cssparser::Parser` parameter
+* Return `Result<_, ()>`
+* When returning `Ok(_)`,
+ the function must have consumed exactly the amount of input that represents the parsed value.
+* When returning `Err(())`, any amount of input may have been consumed.
+
+As a consequence, when calling another parsing function, either:
+
+* Any `Err(())` return value must be propagated.
+ This happens by definition for tail calls,
+ and can otherwise be done with the `?` operator.
+* Or the call must be wrapped in a `Parser::try` call.
+ `try` takes a closure that takes a `Parser` and returns a `Result`,
+ calls it once,
+ and returns itself that same result.
+ If the result is `Err`,
+ it restores the position inside the input to the one saved before calling the closure.
+
+Examples:
+
+```{rust,ignore}
+// 'none' | <image>
+fn parse_background_image(context: &ParserContext, input: &mut Parser)
+ -> Result<Option<Image>, ()> {
+ if input.try_parse(|input| input.expect_ident_matching("none")).is_ok() {
+ Ok(None)
+ } else {
+ Image::parse(context, input).map(Some) // tail call
+ }
+}
+```
+
+```{rust,ignore}
+// [ <length> | <percentage> ] [ <length> | <percentage> ]?
+fn parse_border_spacing(_context: &ParserContext, input: &mut Parser)
+ -> Result<(LengthOrPercentage, LengthOrPercentage), ()> {
+ let first = LengthOrPercentage::parse?;
+ let second = input.try_parse(LengthOrPercentage::parse).unwrap_or(first);
+ (first, second)
+}
+```
+
+*/
+
+#![recursion_limit = "200"] // For color::parse_color_keyword
+
+pub use crate::cow_rc_str::CowRcStr;
+pub use crate::from_bytes::{stylesheet_encoding, EncodingSupport};
+#[doc(hidden)]
+pub use crate::macros::{
+ _cssparser_internal_create_uninit_array, _cssparser_internal_to_lowercase,
+};
+pub use crate::nth::parse_nth;
+pub use crate::parser::{BasicParseError, BasicParseErrorKind, ParseError, ParseErrorKind};
+pub use crate::parser::{Delimiter, Delimiters, Parser, ParserInput, ParserState};
+pub use crate::rules_and_declarations::{parse_important, parse_one_declaration};
+pub use crate::rules_and_declarations::{parse_one_rule, StyleSheetParser};
+pub use crate::rules_and_declarations::{AtRuleParser, QualifiedRuleParser};
+pub use crate::rules_and_declarations::{DeclarationParser, RuleBodyItemParser, RuleBodyParser};
+pub use crate::serializer::{serialize_identifier, serialize_name, serialize_string};
+pub use crate::serializer::{CssStringWriter, ToCss, TokenSerializationType};
+pub use crate::tokenizer::{SourceLocation, SourcePosition, Token};
+pub use crate::unicode_range::UnicodeRange;
+pub use cssparser_macros::*;
+#[doc(hidden)]
+pub use phf as _cssparser_internal_phf;
+
+#[macro_use]
+mod macros;
+
+mod rules_and_declarations;
+mod tokenizer;
+
+pub mod color;
+mod cow_rc_str;
+mod from_bytes;
+mod nth;
+mod parser;
+mod serializer;
+mod unicode_range;
+
+#[cfg(test)]
+mod size_of_tests;
+#[cfg(test)]
+mod tests;
diff --git a/third_party/rust/cssparser/src/macros.rs b/third_party/rust/cssparser/src/macros.rs
new file mode 100644
index 0000000000..fc4b77a194
--- /dev/null
+++ b/third_party/rust/cssparser/src/macros.rs
@@ -0,0 +1,204 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use std::mem::MaybeUninit;
+
+/// Expands to a `match` expression with string patterns,
+/// matching case-insensitively in the ASCII range.
+///
+/// The patterns must not contain ASCII upper case letters. (They must be already be lower-cased.)
+///
+/// # Example
+///
+/// ```rust
+/// # fn main() {} // Make doctest not wrap everything in its own main
+/// # fn dummy(function_name: &String) { let _ =
+/// cssparser::match_ignore_ascii_case! { &function_name,
+/// "rgb" => parse_rgb(..),
+/// # #[cfg(not(something))]
+/// "rgba" => parse_rgba(..),
+/// "hsl" => parse_hsl(..),
+/// "hsla" => parse_hsla(..),
+/// _ => Err(format!("unknown function: {}", function_name))
+/// }
+/// # ;}
+/// # use std::ops::RangeFull;
+/// # fn parse_rgb(_: RangeFull) -> Result<(), String> { Ok(()) }
+/// # fn parse_rgba(_: RangeFull) -> Result<(), String> { Ok(()) }
+/// # fn parse_hsl(_: RangeFull) -> Result<(), String> { Ok(()) }
+/// # fn parse_hsla(_: RangeFull) -> Result<(), String> { Ok(()) }
+/// ```
+#[macro_export]
+macro_rules! match_ignore_ascii_case {
+ ( $input:expr,
+ $(
+ $( #[$meta: meta] )*
+ $( $pattern: pat )|+ $( if $guard: expr )? => $then: expr
+ ),+
+ $(,)?
+ ) => {
+ {
+ // This dummy module works around the feature gate
+ // `error[E0658]: procedural macros cannot be expanded to statements`
+ // by forcing the macro to be in an item context
+ // rather than expression/statement context,
+ // even though the macro only expands to items.
+ mod cssparser_internal {
+ $crate::_cssparser_internal_max_len! {
+ $( $( $pattern )+ )+
+ }
+ }
+ $crate::_cssparser_internal_to_lowercase!($input, cssparser_internal::MAX_LENGTH => lowercase);
+ // "A" is a short string that we know is different for every string pattern,
+ // since we’ve verified that none of them include ASCII upper case letters.
+ match lowercase.unwrap_or("A") {
+ $(
+ $( #[$meta] )*
+ $( $pattern )|+ $( if $guard )? => $then,
+ )+
+ }
+ }
+ };
+}
+
+/// Define a function `$name(&str) -> Option<&'static $ValueType>`
+///
+/// The function finds a match for the input string
+/// in a [`phf` map](https://github.com/sfackler/rust-phf)
+/// and returns a reference to the corresponding value.
+/// Matching is case-insensitive in the ASCII range.
+///
+/// ## Example:
+///
+/// ```rust
+/// # fn main() {} // Make doctest not wrap everything in its own main
+///
+/// fn color_rgb(input: &str) -> Option<(u8, u8, u8)> {
+/// cssparser::ascii_case_insensitive_phf_map! {
+/// keywords -> (u8, u8, u8) = {
+/// "red" => (255, 0, 0),
+/// "green" => (0, 255, 0),
+/// "blue" => (0, 0, 255),
+/// }
+/// }
+/// keywords::get(input).cloned()
+/// }
+/// ```
+///
+/// You can also iterate over the map entries by using `keywords::entries()`.
+#[macro_export]
+macro_rules! ascii_case_insensitive_phf_map {
+ ($name: ident -> $ValueType: ty = { $( $key: tt => $value: expr ),+ }) => {
+ ascii_case_insensitive_phf_map!($name -> $ValueType = { $( $key => $value, )+ })
+ };
+ ($name: ident -> $ValueType: ty = { $( $key: tt => $value: expr, )+ }) => {
+ use $crate::_cssparser_internal_phf as phf;
+
+ // See macro above for context.
+ mod cssparser_internal {
+ $crate::_cssparser_internal_max_len! {
+ $( $key )+
+ }
+ }
+
+ static MAP: phf::Map<&'static str, $ValueType> = phf::phf_map! {
+ $(
+ $key => $value,
+ )*
+ };
+
+ // While the obvious choice for this would be an inner module, it's not possible to
+ // reference from types from there, see:
+ // <https://github.com/rust-lang/rust/issues/114369>
+ //
+ // So we abuse a struct with static associated functions instead.
+ #[allow(non_camel_case_types)]
+ struct $name;
+ impl $name {
+ #[allow(dead_code)]
+ fn entries() -> impl Iterator<Item = (&'static &'static str, &'static $ValueType)> {
+ MAP.entries()
+ }
+
+ fn get(input: &str) -> Option<&'static $ValueType> {
+ $crate::_cssparser_internal_to_lowercase!(input, cssparser_internal::MAX_LENGTH => lowercase);
+ MAP.get(lowercase?)
+ }
+ }
+ }
+}
+
+/// Create a new array of MaybeUninit<T> items, in an uninitialized state.
+#[inline(always)]
+pub fn _cssparser_internal_create_uninit_array<const N: usize>() -> [MaybeUninit<u8>; N] {
+ unsafe {
+ // SAFETY: An uninitialized `[MaybeUninit<_>; LEN]` is valid.
+ // See: https://doc.rust-lang.org/stable/core/mem/union.MaybeUninit.html#method.uninit_array
+ MaybeUninit::<[MaybeUninit<u8>; N]>::uninit().assume_init()
+ }
+}
+
+/// Implementation detail of match_ignore_ascii_case! and ascii_case_insensitive_phf_map! macros.
+///
+/// **This macro is not part of the public API. It can change or be removed between any versions.**
+///
+/// Define a local variable named `$output`
+/// and assign it the result of calling `_cssparser_internal_to_lowercase`
+/// with a stack-allocated buffer of length `$BUFFER_SIZE`.
+#[macro_export]
+#[doc(hidden)]
+macro_rules! _cssparser_internal_to_lowercase {
+ ($input: expr, $BUFFER_SIZE: expr => $output: ident) => {
+ let mut buffer = $crate::_cssparser_internal_create_uninit_array::<{ $BUFFER_SIZE }>();
+ let input: &str = $input;
+ let $output = $crate::_cssparser_internal_to_lowercase(&mut buffer, input);
+ };
+}
+
+/// Implementation detail of match_ignore_ascii_case! and ascii_case_insensitive_phf_map! macros.
+///
+/// **This function is not part of the public API. It can change or be removed between any versions.**
+///
+/// If `input` is larger than buffer, return `None`.
+/// Otherwise, return `input` ASCII-lowercased, using `buffer` as temporary space if necessary.
+#[doc(hidden)]
+#[allow(non_snake_case)]
+#[inline]
+pub fn _cssparser_internal_to_lowercase<'a>(
+ buffer: &'a mut [MaybeUninit<u8>],
+ input: &'a str,
+) -> Option<&'a str> {
+ let buffer = buffer.get_mut(..input.len())?;
+
+ #[cold]
+ fn make_ascii_lowercase<'a>(
+ buffer: &'a mut [MaybeUninit<u8>],
+ input: &'a str,
+ first_uppercase: usize,
+ ) -> &'a str {
+ // This cast doesn't change the pointer's validity
+ // since `u8` has the same layout as `MaybeUninit<u8>`:
+ let input_bytes =
+ unsafe { &*(input.as_bytes() as *const [u8] as *const [MaybeUninit<u8>]) };
+
+ buffer.copy_from_slice(&*input_bytes);
+
+ // Same as above re layout, plus these bytes have been initialized:
+ let buffer = unsafe { &mut *(buffer as *mut [MaybeUninit<u8>] as *mut [u8]) };
+
+ buffer[first_uppercase..].make_ascii_lowercase();
+ // `buffer` was initialized to a copy of `input`
+ // (which is `&str` so well-formed UTF-8)
+ // then ASCII-lowercased (which preserves UTF-8 well-formedness):
+ unsafe { ::std::str::from_utf8_unchecked(buffer) }
+ }
+
+ Some(
+ match input.bytes().position(|byte| matches!(byte, b'A'..=b'Z')) {
+ Some(first_uppercase) => make_ascii_lowercase(buffer, input, first_uppercase),
+ // common case: input is already lower-case
+ None => input,
+ },
+ )
+}
diff --git a/third_party/rust/cssparser/src/nth.rs b/third_party/rust/cssparser/src/nth.rs
new file mode 100644
index 0000000000..518de4d9b4
--- /dev/null
+++ b/third_party/rust/cssparser/src/nth.rs
@@ -0,0 +1,145 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use super::{BasicParseError, Parser, ParserInput, Token};
+
+/// Parse the *An+B* notation, as found in the `:nth-child()` selector.
+/// The input is typically the arguments of a function,
+/// in which case the caller needs to check if the arguments’ parser is exhausted.
+/// Return `Ok((A, B))`, or `Err(())` for a syntax error.
+pub fn parse_nth<'i, 't>(input: &mut Parser<'i, 't>) -> Result<(i32, i32), BasicParseError<'i>> {
+ match *input.next()? {
+ Token::Number {
+ int_value: Some(b), ..
+ } => Ok((0, b)),
+ Token::Dimension {
+ int_value: Some(a),
+ ref unit,
+ ..
+ } => {
+ match_ignore_ascii_case! {
+ unit,
+ "n" => Ok(parse_b(input, a)?),
+ "n-" => Ok(parse_signless_b(input, a, -1)?),
+ _ => match parse_n_dash_digits(&*unit) {
+ Ok(b) => Ok((a, b)),
+ Err(()) => {
+ let unit = unit.clone();
+ Err(input.new_basic_unexpected_token_error(Token::Ident(unit)))
+ }
+ }
+ }
+ }
+ Token::Ident(ref value) => {
+ match_ignore_ascii_case! { value,
+ "even" => Ok((2, 0)),
+ "odd" => Ok((2, 1)),
+ "n" => Ok(parse_b(input, 1)?),
+ "-n" => Ok(parse_b(input, -1)?),
+ "n-" => Ok(parse_signless_b(input, 1, -1)?),
+ "-n-" => Ok(parse_signless_b(input, -1, -1)?),
+ _ => {
+ let (slice, a) = if value.starts_with("-") {
+ (&value[1..], -1)
+ } else {
+ (&**value, 1)
+ };
+ match parse_n_dash_digits(slice) {
+ Ok(b) => Ok((a, b)),
+ Err(()) => {
+ let value = value.clone();
+ Err(input.new_basic_unexpected_token_error(Token::Ident(value)))
+ }
+ }
+ }
+ }
+ }
+ Token::Delim('+') => match *input.next_including_whitespace()? {
+ Token::Ident(ref value) => {
+ match_ignore_ascii_case! { value,
+ "n" => parse_b(input, 1),
+ "n-" => parse_signless_b(input, 1, -1),
+ _ => match parse_n_dash_digits(value) {
+ Ok(b) => Ok((1, b)),
+ Err(()) => {
+ let value = value.clone();
+ Err(input.new_basic_unexpected_token_error(Token::Ident(value)))
+ }
+ }
+ }
+ }
+ ref token => {
+ let token = token.clone();
+ Err(input.new_basic_unexpected_token_error(token))
+ }
+ },
+ ref token => {
+ let token = token.clone();
+ Err(input.new_basic_unexpected_token_error(token))
+ }
+ }
+}
+
+fn parse_b<'i, 't>(input: &mut Parser<'i, 't>, a: i32) -> Result<(i32, i32), BasicParseError<'i>> {
+ let start = input.state();
+ match input.next() {
+ Ok(&Token::Delim('+')) => parse_signless_b(input, a, 1),
+ Ok(&Token::Delim('-')) => parse_signless_b(input, a, -1),
+ Ok(&Token::Number {
+ has_sign: true,
+ int_value: Some(b),
+ ..
+ }) => Ok((a, b)),
+ _ => {
+ input.reset(&start);
+ Ok((a, 0))
+ }
+ }
+}
+
+fn parse_signless_b<'i, 't>(
+ input: &mut Parser<'i, 't>,
+ a: i32,
+ b_sign: i32,
+) -> Result<(i32, i32), BasicParseError<'i>> {
+ // FIXME: remove .clone() when lifetimes are non-lexical.
+ match input.next()?.clone() {
+ Token::Number {
+ has_sign: false,
+ int_value: Some(b),
+ ..
+ } => Ok((a, b_sign * b)),
+ token => Err(input.new_basic_unexpected_token_error(token)),
+ }
+}
+
+fn parse_n_dash_digits(string: &str) -> Result<i32, ()> {
+ let bytes = string.as_bytes();
+ if bytes.len() >= 3
+ && bytes[..2].eq_ignore_ascii_case(b"n-")
+ && bytes[2..].iter().all(|&c| matches!(c, b'0'..=b'9'))
+ {
+ Ok(parse_number_saturate(&string[1..]).unwrap()) // Include the minus sign
+ } else {
+ Err(())
+ }
+}
+
+fn parse_number_saturate(string: &str) -> Result<i32, ()> {
+ let mut input = ParserInput::new(string);
+ let mut parser = Parser::new(&mut input);
+ let int = if let Ok(&Token::Number {
+ int_value: Some(int),
+ ..
+ }) = parser.next_including_whitespace_and_comments()
+ {
+ int
+ } else {
+ return Err(());
+ };
+ if !parser.is_exhausted() {
+ return Err(());
+ }
+ Ok(int)
+}
diff --git a/third_party/rust/cssparser/src/parser.rs b/third_party/rust/cssparser/src/parser.rs
new file mode 100644
index 0000000000..dd7777a2d8
--- /dev/null
+++ b/third_party/rust/cssparser/src/parser.rs
@@ -0,0 +1,1169 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use crate::cow_rc_str::CowRcStr;
+use crate::tokenizer::{SourceLocation, SourcePosition, Token, Tokenizer};
+use smallvec::SmallVec;
+use std::fmt;
+use std::ops::BitOr;
+use std::ops::Range;
+
+/// A capture of the internal state of a `Parser` (including the position within the input),
+/// obtained from the `Parser::position` method.
+///
+/// Can be used with the `Parser::reset` method to restore that state.
+/// Should only be used with the `Parser` instance it came from.
+#[derive(Debug, Clone)]
+pub struct ParserState {
+ pub(crate) position: usize,
+ pub(crate) current_line_start_position: usize,
+ pub(crate) current_line_number: u32,
+ pub(crate) at_start_of: Option<BlockType>,
+}
+
+impl ParserState {
+ /// The position from the start of the input, counted in UTF-8 bytes.
+ #[inline]
+ pub fn position(&self) -> SourcePosition {
+ SourcePosition(self.position)
+ }
+
+ /// The line number and column number
+ #[inline]
+ pub fn source_location(&self) -> SourceLocation {
+ SourceLocation {
+ line: self.current_line_number,
+ column: (self.position - self.current_line_start_position + 1) as u32,
+ }
+ }
+}
+
+/// When parsing until a given token, sometimes the caller knows that parsing is going to restart
+/// at some earlier point, and consuming until we find a top level delimiter is just wasted work.
+///
+/// In that case, callers can pass ParseUntilErrorBehavior::Stop to avoid doing all that wasted
+/// work.
+///
+/// This is important for things like CSS nesting, where something like:
+///
+/// foo:is(..) {
+/// ...
+/// }
+///
+/// Would need to scan the whole {} block to find a semicolon, only for parsing getting restarted
+/// as a qualified rule later.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum ParseUntilErrorBehavior {
+ /// Consume until we see the relevant delimiter or the end of the stream.
+ Consume,
+ /// Eagerly error.
+ Stop,
+}
+
+/// Details about a `BasicParseError`
+#[derive(Clone, Debug, PartialEq)]
+pub enum BasicParseErrorKind<'i> {
+ /// An unexpected token was encountered.
+ UnexpectedToken(Token<'i>),
+ /// The end of the input was encountered unexpectedly.
+ EndOfInput,
+ /// An `@` rule was encountered that was invalid.
+ AtRuleInvalid(CowRcStr<'i>),
+ /// The body of an '@' rule was invalid.
+ AtRuleBodyInvalid,
+ /// A qualified rule was encountered that was invalid.
+ QualifiedRuleInvalid,
+}
+
+impl<'i> fmt::Display for BasicParseErrorKind<'i> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ BasicParseErrorKind::UnexpectedToken(token) => {
+ write!(f, "unexpected token: {:?}", token)
+ }
+ BasicParseErrorKind::EndOfInput => write!(f, "unexpected end of input"),
+ BasicParseErrorKind::AtRuleInvalid(rule) => {
+ write!(f, "invalid @ rule encountered: '@{}'", rule)
+ }
+ BasicParseErrorKind::AtRuleBodyInvalid => write!(f, "invalid @ rule body encountered"),
+ BasicParseErrorKind::QualifiedRuleInvalid => {
+ write!(f, "invalid qualified rule encountered")
+ }
+ }
+ }
+}
+
+/// The fundamental parsing errors that can be triggered by built-in parsing routines.
+#[derive(Clone, Debug, PartialEq)]
+pub struct BasicParseError<'i> {
+ /// Details of this error
+ pub kind: BasicParseErrorKind<'i>,
+ /// Location where this error occurred
+ pub location: SourceLocation,
+}
+
+impl<'i, T> From<BasicParseError<'i>> for ParseError<'i, T> {
+ #[inline]
+ fn from(this: BasicParseError<'i>) -> ParseError<'i, T> {
+ ParseError {
+ kind: ParseErrorKind::Basic(this.kind),
+ location: this.location,
+ }
+ }
+}
+
+impl SourceLocation {
+ /// Create a new BasicParseError at this location for an unexpected token
+ #[inline]
+ pub fn new_basic_unexpected_token_error<'i>(self, token: Token<'i>) -> BasicParseError<'i> {
+ BasicParseError {
+ kind: BasicParseErrorKind::UnexpectedToken(token),
+ location: self,
+ }
+ }
+
+ /// Create a new ParseError at this location for an unexpected token
+ #[inline]
+ pub fn new_unexpected_token_error<'i, E>(self, token: Token<'i>) -> ParseError<'i, E> {
+ ParseError {
+ kind: ParseErrorKind::Basic(BasicParseErrorKind::UnexpectedToken(token)),
+ location: self,
+ }
+ }
+
+ /// Create a new custom ParseError at this location
+ #[inline]
+ pub fn new_custom_error<'i, E1: Into<E2>, E2>(self, error: E1) -> ParseError<'i, E2> {
+ ParseError {
+ kind: ParseErrorKind::Custom(error.into()),
+ location: self,
+ }
+ }
+}
+
+/// Details of a `ParseError`
+#[derive(Clone, Debug, PartialEq)]
+pub enum ParseErrorKind<'i, T: 'i> {
+ /// A fundamental parse error from a built-in parsing routine.
+ Basic(BasicParseErrorKind<'i>),
+ /// A parse error reported by downstream consumer code.
+ Custom(T),
+}
+
+impl<'i, T> ParseErrorKind<'i, T> {
+ /// Like `std::convert::Into::into`
+ pub fn into<U>(self) -> ParseErrorKind<'i, U>
+ where
+ T: Into<U>,
+ {
+ match self {
+ ParseErrorKind::Basic(basic) => ParseErrorKind::Basic(basic),
+ ParseErrorKind::Custom(custom) => ParseErrorKind::Custom(custom.into()),
+ }
+ }
+}
+
+impl<'i, E: fmt::Display> fmt::Display for ParseErrorKind<'i, E> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ ParseErrorKind::Basic(ref basic) => basic.fmt(f),
+ ParseErrorKind::Custom(ref custom) => custom.fmt(f),
+ }
+ }
+}
+
+/// Extensible parse errors that can be encountered by client parsing implementations.
+#[derive(Clone, Debug, PartialEq)]
+pub struct ParseError<'i, E> {
+ /// Details of this error
+ pub kind: ParseErrorKind<'i, E>,
+ /// Location where this error occurred
+ pub location: SourceLocation,
+}
+
+impl<'i, T> ParseError<'i, T> {
+ /// Extract the fundamental parse error from an extensible error.
+ pub fn basic(self) -> BasicParseError<'i> {
+ match self.kind {
+ ParseErrorKind::Basic(kind) => BasicParseError {
+ kind,
+ location: self.location,
+ },
+ ParseErrorKind::Custom(_) => panic!("Not a basic parse error"),
+ }
+ }
+
+ /// Like `std::convert::Into::into`
+ pub fn into<U>(self) -> ParseError<'i, U>
+ where
+ T: Into<U>,
+ {
+ ParseError {
+ kind: self.kind.into(),
+ location: self.location,
+ }
+ }
+}
+
+impl<'i, E: fmt::Display> fmt::Display for ParseError<'i, E> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ self.kind.fmt(f)
+ }
+}
+
+impl<'i, E: fmt::Display + fmt::Debug> std::error::Error for ParseError<'i, E> {}
+
+/// The owned input for a parser.
+pub struct ParserInput<'i> {
+ tokenizer: Tokenizer<'i>,
+ cached_token: Option<CachedToken<'i>>,
+}
+
+struct CachedToken<'i> {
+ token: Token<'i>,
+ start_position: SourcePosition,
+ end_state: ParserState,
+}
+
+impl<'i> ParserInput<'i> {
+ /// Create a new input for a parser.
+ pub fn new(input: &'i str) -> ParserInput<'i> {
+ ParserInput {
+ tokenizer: Tokenizer::new(input),
+ cached_token: None,
+ }
+ }
+
+ #[inline]
+ fn cached_token_ref(&self) -> &Token<'i> {
+ &self.cached_token.as_ref().unwrap().token
+ }
+}
+
+/// A CSS parser that borrows its `&str` input,
+/// yields `Token`s,
+/// and keeps track of nested blocks and functions.
+pub struct Parser<'i, 't> {
+ input: &'t mut ParserInput<'i>,
+ /// If `Some(_)`, .parse_nested_block() can be called.
+ at_start_of: Option<BlockType>,
+ /// For parsers from `parse_until` or `parse_nested_block`
+ stop_before: Delimiters,
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub(crate) enum BlockType {
+ Parenthesis,
+ SquareBracket,
+ CurlyBracket,
+}
+
+impl BlockType {
+ fn opening(token: &Token) -> Option<BlockType> {
+ match *token {
+ Token::Function(_) | Token::ParenthesisBlock => Some(BlockType::Parenthesis),
+ Token::SquareBracketBlock => Some(BlockType::SquareBracket),
+ Token::CurlyBracketBlock => Some(BlockType::CurlyBracket),
+ _ => None,
+ }
+ }
+
+ fn closing(token: &Token) -> Option<BlockType> {
+ match *token {
+ Token::CloseParenthesis => Some(BlockType::Parenthesis),
+ Token::CloseSquareBracket => Some(BlockType::SquareBracket),
+ Token::CloseCurlyBracket => Some(BlockType::CurlyBracket),
+ _ => None,
+ }
+ }
+}
+
+/// A set of characters, to be used with the `Parser::parse_until*` methods.
+///
+/// The union of two sets can be obtained with the `|` operator. Example:
+///
+/// ```{rust,ignore}
+/// input.parse_until_before(Delimiter::CurlyBracketBlock | Delimiter::Semicolon)
+/// ```
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub struct Delimiters {
+ bits: u8,
+}
+
+/// `Delimiters` constants.
+#[allow(non_upper_case_globals, non_snake_case)]
+pub mod Delimiter {
+ use super::Delimiters;
+
+ /// The empty delimiter set
+ pub const None: Delimiters = Delimiters { bits: 0 };
+ /// The delimiter set with only the `{` opening curly bracket
+ pub const CurlyBracketBlock: Delimiters = Delimiters { bits: 1 << 1 };
+ /// The delimiter set with only the `;` semicolon
+ pub const Semicolon: Delimiters = Delimiters { bits: 1 << 2 };
+ /// The delimiter set with only the `!` exclamation point
+ pub const Bang: Delimiters = Delimiters { bits: 1 << 3 };
+ /// The delimiter set with only the `,` comma
+ pub const Comma: Delimiters = Delimiters { bits: 1 << 4 };
+}
+
+#[allow(non_upper_case_globals, non_snake_case)]
+mod ClosingDelimiter {
+ use super::Delimiters;
+
+ pub const CloseCurlyBracket: Delimiters = Delimiters { bits: 1 << 5 };
+ pub const CloseSquareBracket: Delimiters = Delimiters { bits: 1 << 6 };
+ pub const CloseParenthesis: Delimiters = Delimiters { bits: 1 << 7 };
+}
+
+impl BitOr<Delimiters> for Delimiters {
+ type Output = Delimiters;
+
+ #[inline]
+ fn bitor(self, other: Delimiters) -> Delimiters {
+ Delimiters {
+ bits: self.bits | other.bits,
+ }
+ }
+}
+
+impl Delimiters {
+ #[inline]
+ fn contains(self, other: Delimiters) -> bool {
+ (self.bits & other.bits) != 0
+ }
+
+ #[inline]
+ pub(crate) fn from_byte(byte: Option<u8>) -> Delimiters {
+ const TABLE: [Delimiters; 256] = {
+ let mut table = [Delimiter::None; 256];
+ table[b';' as usize] = Delimiter::Semicolon;
+ table[b'!' as usize] = Delimiter::Bang;
+ table[b',' as usize] = Delimiter::Comma;
+ table[b'{' as usize] = Delimiter::CurlyBracketBlock;
+ table[b'}' as usize] = ClosingDelimiter::CloseCurlyBracket;
+ table[b']' as usize] = ClosingDelimiter::CloseSquareBracket;
+ table[b')' as usize] = ClosingDelimiter::CloseParenthesis;
+ table
+ };
+
+ match byte {
+ None => Delimiter::None,
+ Some(b) => TABLE[b as usize],
+ }
+ }
+}
+
+/// Used in some `fn expect_*` methods
+macro_rules! expect {
+ ($parser: ident, $($branches: tt)+) => {
+ {
+ let start_location = $parser.current_source_location();
+ match *$parser.next()? {
+ $($branches)+
+ ref token => {
+ return Err(start_location.new_basic_unexpected_token_error(token.clone()))
+ }
+ }
+ }
+ }
+}
+
+impl<'i: 't, 't> Parser<'i, 't> {
+ /// Create a new parser
+ #[inline]
+ pub fn new(input: &'t mut ParserInput<'i>) -> Parser<'i, 't> {
+ Parser {
+ input,
+ at_start_of: None,
+ stop_before: Delimiter::None,
+ }
+ }
+
+ /// Return the current line that is being parsed.
+ pub fn current_line(&self) -> &'i str {
+ self.input.tokenizer.current_source_line()
+ }
+
+ /// Check whether the input is exhausted. That is, if `.next()` would return a token.
+ ///
+ /// This ignores whitespace and comments.
+ #[inline]
+ pub fn is_exhausted(&mut self) -> bool {
+ self.expect_exhausted().is_ok()
+ }
+
+ /// Check whether the input is exhausted. That is, if `.next()` would return a token.
+ /// Return a `Result` so that the `?` operator can be used: `input.expect_exhausted()?`
+ ///
+ /// This ignores whitespace and comments.
+ #[inline]
+ pub fn expect_exhausted(&mut self) -> Result<(), BasicParseError<'i>> {
+ let start = self.state();
+ let result = match self.next() {
+ Err(BasicParseError {
+ kind: BasicParseErrorKind::EndOfInput,
+ ..
+ }) => Ok(()),
+ Err(e) => unreachable!("Unexpected error encountered: {:?}", e),
+ Ok(t) => Err(start
+ .source_location()
+ .new_basic_unexpected_token_error(t.clone())),
+ };
+ self.reset(&start);
+ result
+ }
+
+ /// Return the current position within the input.
+ ///
+ /// This can be used with the `Parser::slice` and `slice_from` methods.
+ #[inline]
+ pub fn position(&self) -> SourcePosition {
+ self.input.tokenizer.position()
+ }
+
+ /// The current line number and column number.
+ #[inline]
+ pub fn current_source_location(&self) -> SourceLocation {
+ self.input.tokenizer.current_source_location()
+ }
+
+ /// The source map URL, if known.
+ ///
+ /// The source map URL is extracted from a specially formatted
+ /// comment. The last such comment is used, so this value may
+ /// change as parsing proceeds.
+ pub fn current_source_map_url(&self) -> Option<&str> {
+ self.input.tokenizer.current_source_map_url()
+ }
+
+ /// The source URL, if known.
+ ///
+ /// The source URL is extracted from a specially formatted
+ /// comment. The last such comment is used, so this value may
+ /// change as parsing proceeds.
+ pub fn current_source_url(&self) -> Option<&str> {
+ self.input.tokenizer.current_source_url()
+ }
+
+ /// Create a new BasicParseError at the current location
+ #[inline]
+ pub fn new_basic_error(&self, kind: BasicParseErrorKind<'i>) -> BasicParseError<'i> {
+ BasicParseError {
+ kind,
+ location: self.current_source_location(),
+ }
+ }
+
+ /// Create a new basic ParseError at the current location
+ #[inline]
+ pub fn new_error<E>(&self, kind: BasicParseErrorKind<'i>) -> ParseError<'i, E> {
+ ParseError {
+ kind: ParseErrorKind::Basic(kind),
+ location: self.current_source_location(),
+ }
+ }
+
+ /// Create a new custom BasicParseError at the current location
+ #[inline]
+ pub fn new_custom_error<E1: Into<E2>, E2>(&self, error: E1) -> ParseError<'i, E2> {
+ self.current_source_location().new_custom_error(error)
+ }
+
+ /// Create a new unexpected token BasicParseError at the current location
+ #[inline]
+ pub fn new_basic_unexpected_token_error(&self, token: Token<'i>) -> BasicParseError<'i> {
+ self.new_basic_error(BasicParseErrorKind::UnexpectedToken(token))
+ }
+
+ /// Create a new unexpected token ParseError at the current location
+ #[inline]
+ pub fn new_unexpected_token_error<E>(&self, token: Token<'i>) -> ParseError<'i, E> {
+ self.new_error(BasicParseErrorKind::UnexpectedToken(token))
+ }
+
+ /// Create a new unexpected token or EOF ParseError at the current location
+ #[inline]
+ pub fn new_error_for_next_token<E>(&mut self) -> ParseError<'i, E> {
+ let token = match self.next() {
+ Ok(token) => token.clone(),
+ Err(e) => return e.into(),
+ };
+ self.new_error(BasicParseErrorKind::UnexpectedToken(token))
+ }
+
+ /// Return the current internal state of the parser (including position within the input).
+ ///
+ /// This state can later be restored with the `Parser::reset` method.
+ #[inline]
+ pub fn state(&self) -> ParserState {
+ ParserState {
+ at_start_of: self.at_start_of,
+ ..self.input.tokenizer.state()
+ }
+ }
+
+ /// Advance the input until the next token that’s not whitespace or a comment.
+ #[inline]
+ pub fn skip_whitespace(&mut self) {
+ if let Some(block_type) = self.at_start_of.take() {
+ consume_until_end_of_block(block_type, &mut self.input.tokenizer);
+ }
+
+ self.input.tokenizer.skip_whitespace()
+ }
+
+ #[inline]
+ pub(crate) fn skip_cdc_and_cdo(&mut self) {
+ if let Some(block_type) = self.at_start_of.take() {
+ consume_until_end_of_block(block_type, &mut self.input.tokenizer);
+ }
+
+ self.input.tokenizer.skip_cdc_and_cdo()
+ }
+
+ #[inline]
+ pub(crate) fn next_byte(&self) -> Option<u8> {
+ let byte = self.input.tokenizer.next_byte();
+ if self.stop_before.contains(Delimiters::from_byte(byte)) {
+ return None;
+ }
+ byte
+ }
+
+ /// Restore the internal state of the parser (including position within the input)
+ /// to what was previously saved by the `Parser::position` method.
+ ///
+ /// Should only be used with `SourcePosition` values from the same `Parser` instance.
+ #[inline]
+ pub fn reset(&mut self, state: &ParserState) {
+ self.input.tokenizer.reset(state);
+ self.at_start_of = state.at_start_of;
+ }
+
+ /// Start looking for `var()` / `env()` functions. (See the
+ /// `.seen_var_or_env_functions()` method.)
+ #[inline]
+ pub fn look_for_var_or_env_functions(&mut self) {
+ self.input.tokenizer.look_for_var_or_env_functions()
+ }
+
+ /// Return whether a `var()` or `env()` function has been seen by the
+ /// tokenizer since either `look_for_var_or_env_functions` was called, and
+ /// stop looking.
+ #[inline]
+ pub fn seen_var_or_env_functions(&mut self) -> bool {
+ self.input.tokenizer.seen_var_or_env_functions()
+ }
+
+ /// The old name of `try_parse`, which requires raw identifiers in the Rust 2018 edition.
+ #[inline]
+ pub fn r#try<F, T, E>(&mut self, thing: F) -> Result<T, E>
+ where
+ F: FnOnce(&mut Parser<'i, 't>) -> Result<T, E>,
+ {
+ self.try_parse(thing)
+ }
+
+ /// Execute the given closure, passing it the parser.
+ /// If the result (returned unchanged) is `Err`,
+ /// the internal state of the parser (including position within the input)
+ /// is restored to what it was before the call.
+ #[inline]
+ pub fn try_parse<F, T, E>(&mut self, thing: F) -> Result<T, E>
+ where
+ F: FnOnce(&mut Parser<'i, 't>) -> Result<T, E>,
+ {
+ let start = self.state();
+ let result = thing(self);
+ if result.is_err() {
+ self.reset(&start)
+ }
+ result
+ }
+
+ /// Return a slice of the CSS input
+ #[inline]
+ pub fn slice(&self, range: Range<SourcePosition>) -> &'i str {
+ self.input.tokenizer.slice(range)
+ }
+
+ /// Return a slice of the CSS input, from the given position to the current one.
+ #[inline]
+ pub fn slice_from(&self, start_position: SourcePosition) -> &'i str {
+ self.input.tokenizer.slice_from(start_position)
+ }
+
+ /// Return the next token in the input that is neither whitespace or a comment,
+ /// and advance the position accordingly.
+ ///
+ /// After returning a `Function`, `ParenthesisBlock`,
+ /// `CurlyBracketBlock`, or `SquareBracketBlock` token,
+ /// the next call will skip until after the matching `CloseParenthesis`,
+ /// `CloseCurlyBracket`, or `CloseSquareBracket` token.
+ ///
+ /// See the `Parser::parse_nested_block` method to parse the content of functions or blocks.
+ ///
+ /// This only returns a closing token when it is unmatched (and therefore an error).
+ pub fn next(&mut self) -> Result<&Token<'i>, BasicParseError<'i>> {
+ self.skip_whitespace();
+ self.next_including_whitespace_and_comments()
+ }
+
+ /// Same as `Parser::next`, but does not skip whitespace tokens.
+ pub fn next_including_whitespace(&mut self) -> Result<&Token<'i>, BasicParseError<'i>> {
+ loop {
+ match self.next_including_whitespace_and_comments() {
+ Err(e) => return Err(e),
+ Ok(&Token::Comment(_)) => {}
+ _ => break,
+ }
+ }
+ Ok(self.input.cached_token_ref())
+ }
+
+ /// Same as `Parser::next`, but does not skip whitespace or comment tokens.
+ ///
+ /// **Note**: This should only be used in contexts like a CSS pre-processor
+ /// where comments are preserved.
+ /// When parsing higher-level values, per the CSS Syntax specification,
+ /// comments should always be ignored between tokens.
+ pub fn next_including_whitespace_and_comments(
+ &mut self,
+ ) -> Result<&Token<'i>, BasicParseError<'i>> {
+ if let Some(block_type) = self.at_start_of.take() {
+ consume_until_end_of_block(block_type, &mut self.input.tokenizer);
+ }
+
+ let byte = self.input.tokenizer.next_byte();
+ if self.stop_before.contains(Delimiters::from_byte(byte)) {
+ return Err(self.new_basic_error(BasicParseErrorKind::EndOfInput));
+ }
+
+ let token_start_position = self.input.tokenizer.position();
+ let using_cached_token = self
+ .input
+ .cached_token
+ .as_ref()
+ .map_or(false, |cached_token| {
+ cached_token.start_position == token_start_position
+ });
+ let token = if using_cached_token {
+ let cached_token = self.input.cached_token.as_ref().unwrap();
+ self.input.tokenizer.reset(&cached_token.end_state);
+ match cached_token.token {
+ Token::Function(ref name) => self.input.tokenizer.see_function(name),
+ _ => {}
+ }
+ &cached_token.token
+ } else {
+ let new_token = self
+ .input
+ .tokenizer
+ .next()
+ .map_err(|()| self.new_basic_error(BasicParseErrorKind::EndOfInput))?;
+ self.input.cached_token = Some(CachedToken {
+ token: new_token,
+ start_position: token_start_position,
+ end_state: self.input.tokenizer.state(),
+ });
+ self.input.cached_token_ref()
+ };
+
+ if let Some(block_type) = BlockType::opening(token) {
+ self.at_start_of = Some(block_type);
+ }
+ Ok(token)
+ }
+
+ /// Have the given closure parse something, then check the the input is exhausted.
+ /// The result is overridden to `Err(())` if some input remains.
+ ///
+ /// This can help tell e.g. `color: green;` from `color: green 4px;`
+ #[inline]
+ pub fn parse_entirely<F, T, E>(&mut self, parse: F) -> Result<T, ParseError<'i, E>>
+ where
+ F: FnOnce(&mut Parser<'i, 't>) -> Result<T, ParseError<'i, E>>,
+ {
+ let result = parse(self)?;
+ self.expect_exhausted()?;
+ Ok(result)
+ }
+
+ /// Parse a list of comma-separated values, all with the same syntax.
+ ///
+ /// The given closure is called repeatedly with a "delimited" parser
+ /// (see the `Parser::parse_until_before` method) so that it can over
+ /// consume the input past a comma at this block/function nesting level.
+ ///
+ /// Successful results are accumulated in a vector.
+ ///
+ /// This method returns `Err(())` the first time that a closure call does,
+ /// or if a closure call leaves some input before the next comma or the end
+ /// of the input.
+ #[inline]
+ pub fn parse_comma_separated<F, T, E>(
+ &mut self,
+ parse_one: F,
+ ) -> Result<Vec<T>, ParseError<'i, E>>
+ where
+ F: for<'tt> FnMut(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
+ {
+ self.parse_comma_separated_internal(parse_one, /* ignore_errors = */ false)
+ }
+
+ /// Like `parse_comma_separated`, but ignores errors on unknown components,
+ /// rather than erroring out in the whole list.
+ ///
+ /// Caller must deal with the fact that the resulting list might be empty,
+ /// if there's no valid component on the list.
+ #[inline]
+ pub fn parse_comma_separated_ignoring_errors<F, T, E: 'i>(&mut self, parse_one: F) -> Vec<T>
+ where
+ F: for<'tt> FnMut(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
+ {
+ match self.parse_comma_separated_internal(parse_one, /* ignore_errors = */ true) {
+ Ok(values) => values,
+ Err(..) => unreachable!(),
+ }
+ }
+
+ #[inline]
+ fn parse_comma_separated_internal<F, T, E>(
+ &mut self,
+ mut parse_one: F,
+ ignore_errors: bool,
+ ) -> Result<Vec<T>, ParseError<'i, E>>
+ where
+ F: for<'tt> FnMut(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
+ {
+ // Vec grows from 0 to 4 by default on first push(). So allocate with
+ // capacity 1, so in the somewhat common case of only one item we don't
+ // way overallocate. Note that we always push at least one item if
+ // parsing succeeds.
+ let mut values = Vec::with_capacity(1);
+ loop {
+ self.skip_whitespace(); // Unnecessary for correctness, but may help try() in parse_one rewind less.
+ match self.parse_until_before(Delimiter::Comma, &mut parse_one) {
+ Ok(v) => values.push(v),
+ Err(e) if !ignore_errors => return Err(e),
+ Err(_) => {},
+ }
+ match self.next() {
+ Err(_) => return Ok(values),
+ Ok(&Token::Comma) => continue,
+ Ok(_) => unreachable!(),
+ }
+ }
+ }
+
+ /// Parse the content of a block or function.
+ ///
+ /// This method panics if the last token yielded by this parser
+ /// (from one of the `next*` methods)
+ /// is not a on that marks the start of a block or function:
+ /// a `Function`, `ParenthesisBlock`, `CurlyBracketBlock`, or `SquareBracketBlock`.
+ ///
+ /// The given closure is called with a "delimited" parser
+ /// that stops at the end of the block or function (at the matching closing token).
+ ///
+ /// The result is overridden to `Err(())` if the closure leaves some input before that point.
+ #[inline]
+ pub fn parse_nested_block<F, T, E>(&mut self, parse: F) -> Result<T, ParseError<'i, E>>
+ where
+ F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
+ {
+ parse_nested_block(self, parse)
+ }
+
+ /// Limit parsing to until a given delimiter or the end of the input. (E.g.
+ /// a semicolon for a property value.)
+ ///
+ /// The given closure is called with a "delimited" parser
+ /// that stops before the first character at this block/function nesting level
+ /// that matches the given set of delimiters, or at the end of the input.
+ ///
+ /// The result is overridden to `Err(())` if the closure leaves some input before that point.
+ #[inline]
+ pub fn parse_until_before<F, T, E>(
+ &mut self,
+ delimiters: Delimiters,
+ parse: F,
+ ) -> Result<T, ParseError<'i, E>>
+ where
+ F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
+ {
+ parse_until_before(self, delimiters, ParseUntilErrorBehavior::Consume, parse)
+ }
+
+ /// Like `parse_until_before`, but also consume the delimiter token.
+ ///
+ /// This can be useful when you don’t need to know which delimiter it was
+ /// (e.g. if these is only one in the given set)
+ /// or if it was there at all (as opposed to reaching the end of the input).
+ #[inline]
+ pub fn parse_until_after<F, T, E>(
+ &mut self,
+ delimiters: Delimiters,
+ parse: F,
+ ) -> Result<T, ParseError<'i, E>>
+ where
+ F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
+ {
+ parse_until_after(self, delimiters, ParseUntilErrorBehavior::Consume, parse)
+ }
+
+ /// Parse a <whitespace-token> and return its value.
+ #[inline]
+ pub fn expect_whitespace(&mut self) -> Result<&'i str, BasicParseError<'i>> {
+ let start_location = self.current_source_location();
+ match *self.next_including_whitespace()? {
+ Token::WhiteSpace(value) => Ok(value),
+ ref t => Err(start_location.new_basic_unexpected_token_error(t.clone())),
+ }
+ }
+
+ /// Parse a <ident-token> and return the unescaped value.
+ #[inline]
+ pub fn expect_ident(&mut self) -> Result<&CowRcStr<'i>, BasicParseError<'i>> {
+ expect! {self,
+ Token::Ident(ref value) => Ok(value),
+ }
+ }
+
+ /// expect_ident, but clone the CowRcStr
+ #[inline]
+ pub fn expect_ident_cloned(&mut self) -> Result<CowRcStr<'i>, BasicParseError<'i>> {
+ self.expect_ident().map(|s| s.clone())
+ }
+
+ /// Parse a <ident-token> whose unescaped value is an ASCII-insensitive match for the given value.
+ #[inline]
+ pub fn expect_ident_matching(
+ &mut self,
+ expected_value: &str,
+ ) -> Result<(), BasicParseError<'i>> {
+ expect! {self,
+ Token::Ident(ref value) if value.eq_ignore_ascii_case(expected_value) => Ok(()),
+ }
+ }
+
+ /// Parse a <string-token> and return the unescaped value.
+ #[inline]
+ pub fn expect_string(&mut self) -> Result<&CowRcStr<'i>, BasicParseError<'i>> {
+ expect! {self,
+ Token::QuotedString(ref value) => Ok(value),
+ }
+ }
+
+ /// expect_string, but clone the CowRcStr
+ #[inline]
+ pub fn expect_string_cloned(&mut self) -> Result<CowRcStr<'i>, BasicParseError<'i>> {
+ self.expect_string().map(|s| s.clone())
+ }
+
+ /// Parse either a <ident-token> or a <string-token>, and return the unescaped value.
+ #[inline]
+ pub fn expect_ident_or_string(&mut self) -> Result<&CowRcStr<'i>, BasicParseError<'i>> {
+ expect! {self,
+ Token::Ident(ref value) => Ok(value),
+ Token::QuotedString(ref value) => Ok(value),
+ }
+ }
+
+ /// Parse a <url-token> and return the unescaped value.
+ #[inline]
+ pub fn expect_url(&mut self) -> Result<CowRcStr<'i>, BasicParseError<'i>> {
+ expect! {self,
+ Token::UnquotedUrl(ref value) => Ok(value.clone()),
+ Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {
+ self.parse_nested_block(|input| {
+ input.expect_string().map_err(Into::into).map(|s| s.clone())
+ })
+ .map_err(ParseError::<()>::basic)
+ }
+ }
+ }
+
+ /// Parse either a <url-token> or a <string-token>, and return the unescaped value.
+ #[inline]
+ pub fn expect_url_or_string(&mut self) -> Result<CowRcStr<'i>, BasicParseError<'i>> {
+ expect! {self,
+ Token::UnquotedUrl(ref value) => Ok(value.clone()),
+ Token::QuotedString(ref value) => Ok(value.clone()),
+ Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {
+ self.parse_nested_block(|input| {
+ input.expect_string().map_err(Into::into).map(|s| s.clone())
+ })
+ .map_err(ParseError::<()>::basic)
+ }
+ }
+ }
+
+ /// Parse a <number-token> and return the integer value.
+ #[inline]
+ pub fn expect_number(&mut self) -> Result<f32, BasicParseError<'i>> {
+ expect! {self,
+ Token::Number { value, .. } => Ok(value),
+ }
+ }
+
+ /// Parse a <number-token> that does not have a fractional part, and return the integer value.
+ #[inline]
+ pub fn expect_integer(&mut self) -> Result<i32, BasicParseError<'i>> {
+ expect! {self,
+ Token::Number { int_value: Some(int_value), .. } => Ok(int_value),
+ }
+ }
+
+ /// Parse a <percentage-token> and return the value.
+ /// `0%` and `100%` map to `0.0` and `1.0` (not `100.0`), respectively.
+ #[inline]
+ pub fn expect_percentage(&mut self) -> Result<f32, BasicParseError<'i>> {
+ expect! {self,
+ Token::Percentage { unit_value, .. } => Ok(unit_value),
+ }
+ }
+
+ /// Parse a `:` <colon-token>.
+ #[inline]
+ pub fn expect_colon(&mut self) -> Result<(), BasicParseError<'i>> {
+ expect! {self,
+ Token::Colon => Ok(()),
+ }
+ }
+
+ /// Parse a `;` <semicolon-token>.
+ #[inline]
+ pub fn expect_semicolon(&mut self) -> Result<(), BasicParseError<'i>> {
+ expect! {self,
+ Token::Semicolon => Ok(()),
+ }
+ }
+
+ /// Parse a `,` <comma-token>.
+ #[inline]
+ pub fn expect_comma(&mut self) -> Result<(), BasicParseError<'i>> {
+ expect! {self,
+ Token::Comma => Ok(()),
+ }
+ }
+
+ /// Parse a <delim-token> with the given value.
+ #[inline]
+ pub fn expect_delim(&mut self, expected_value: char) -> Result<(), BasicParseError<'i>> {
+ expect! {self,
+ Token::Delim(value) if value == expected_value => Ok(()),
+ }
+ }
+
+ /// Parse a `{ /* ... */ }` curly brackets block.
+ ///
+ /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method.
+ #[inline]
+ pub fn expect_curly_bracket_block(&mut self) -> Result<(), BasicParseError<'i>> {
+ expect! {self,
+ Token::CurlyBracketBlock => Ok(()),
+ }
+ }
+
+ /// Parse a `[ /* ... */ ]` square brackets block.
+ ///
+ /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method.
+ #[inline]
+ pub fn expect_square_bracket_block(&mut self) -> Result<(), BasicParseError<'i>> {
+ expect! {self,
+ Token::SquareBracketBlock => Ok(()),
+ }
+ }
+
+ /// Parse a `( /* ... */ )` parenthesis block.
+ ///
+ /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method.
+ #[inline]
+ pub fn expect_parenthesis_block(&mut self) -> Result<(), BasicParseError<'i>> {
+ expect! {self,
+ Token::ParenthesisBlock => Ok(()),
+ }
+ }
+
+ /// Parse a <function> token and return its name.
+ ///
+ /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method.
+ #[inline]
+ pub fn expect_function(&mut self) -> Result<&CowRcStr<'i>, BasicParseError<'i>> {
+ expect! {self,
+ Token::Function(ref name) => Ok(name),
+ }
+ }
+
+ /// Parse a <function> token whose name is an ASCII-insensitive match for the given value.
+ ///
+ /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method.
+ #[inline]
+ pub fn expect_function_matching(
+ &mut self,
+ expected_name: &str,
+ ) -> Result<(), BasicParseError<'i>> {
+ expect! {self,
+ Token::Function(ref name) if name.eq_ignore_ascii_case(expected_name) => Ok(()),
+ }
+ }
+
+ /// Parse the input until exhaustion and check that it contains no “error” token.
+ ///
+ /// See `Token::is_parse_error`. This also checks nested blocks and functions recursively.
+ #[inline]
+ pub fn expect_no_error_token(&mut self) -> Result<(), BasicParseError<'i>> {
+ loop {
+ match self.next_including_whitespace_and_comments() {
+ Ok(&Token::Function(_))
+ | Ok(&Token::ParenthesisBlock)
+ | Ok(&Token::SquareBracketBlock)
+ | Ok(&Token::CurlyBracketBlock) => self
+ .parse_nested_block(|input| input.expect_no_error_token().map_err(Into::into))
+ .map_err(ParseError::<()>::basic)?,
+ Ok(t) => {
+ // FIXME: maybe these should be separate variants of
+ // BasicParseError instead?
+ if t.is_parse_error() {
+ let token = t.clone();
+ return Err(self.new_basic_unexpected_token_error(token));
+ }
+ }
+ Err(_) => return Ok(()),
+ }
+ }
+ }
+}
+
+pub fn parse_until_before<'i: 't, 't, F, T, E>(
+ parser: &mut Parser<'i, 't>,
+ delimiters: Delimiters,
+ error_behavior: ParseUntilErrorBehavior,
+ parse: F,
+) -> Result<T, ParseError<'i, E>>
+where
+ F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
+{
+ let delimiters = parser.stop_before | delimiters;
+ let result;
+ // Introduce a new scope to limit duration of nested_parser’s borrow
+ {
+ let mut delimited_parser = Parser {
+ input: parser.input,
+ at_start_of: parser.at_start_of.take(),
+ stop_before: delimiters,
+ };
+ result = delimited_parser.parse_entirely(parse);
+ if error_behavior == ParseUntilErrorBehavior::Stop && result.is_err() {
+ return result;
+ }
+ if let Some(block_type) = delimited_parser.at_start_of {
+ consume_until_end_of_block(block_type, &mut delimited_parser.input.tokenizer);
+ }
+ }
+ // FIXME: have a special-purpose tokenizer method for this that does less work.
+ loop {
+ if delimiters.contains(Delimiters::from_byte(parser.input.tokenizer.next_byte())) {
+ break;
+ }
+ if let Ok(token) = parser.input.tokenizer.next() {
+ if let Some(block_type) = BlockType::opening(&token) {
+ consume_until_end_of_block(block_type, &mut parser.input.tokenizer);
+ }
+ } else {
+ break;
+ }
+ }
+ result
+}
+
+pub fn parse_until_after<'i: 't, 't, F, T, E>(
+ parser: &mut Parser<'i, 't>,
+ delimiters: Delimiters,
+ error_behavior: ParseUntilErrorBehavior,
+ parse: F,
+) -> Result<T, ParseError<'i, E>>
+where
+ F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
+{
+ let result = parse_until_before(parser, delimiters, error_behavior, parse);
+ if error_behavior == ParseUntilErrorBehavior::Stop && result.is_err() {
+ return result;
+ }
+ let next_byte = parser.input.tokenizer.next_byte();
+ if next_byte.is_some()
+ && !parser
+ .stop_before
+ .contains(Delimiters::from_byte(next_byte))
+ {
+ debug_assert!(delimiters.contains(Delimiters::from_byte(next_byte)));
+ // We know this byte is ASCII.
+ parser.input.tokenizer.advance(1);
+ if next_byte == Some(b'{') {
+ consume_until_end_of_block(BlockType::CurlyBracket, &mut parser.input.tokenizer);
+ }
+ }
+ result
+}
+
+pub fn parse_nested_block<'i: 't, 't, F, T, E>(
+ parser: &mut Parser<'i, 't>,
+ parse: F,
+) -> Result<T, ParseError<'i, E>>
+where
+ F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
+{
+ let block_type = parser.at_start_of.take().expect(
+ "\
+ A nested parser can only be created when a Function, \
+ ParenthesisBlock, SquareBracketBlock, or CurlyBracketBlock \
+ token was just consumed.\
+ ",
+ );
+ let closing_delimiter = match block_type {
+ BlockType::CurlyBracket => ClosingDelimiter::CloseCurlyBracket,
+ BlockType::SquareBracket => ClosingDelimiter::CloseSquareBracket,
+ BlockType::Parenthesis => ClosingDelimiter::CloseParenthesis,
+ };
+ let result;
+ // Introduce a new scope to limit duration of nested_parser’s borrow
+ {
+ let mut nested_parser = Parser {
+ input: parser.input,
+ at_start_of: None,
+ stop_before: closing_delimiter,
+ };
+ result = nested_parser.parse_entirely(parse);
+ if let Some(block_type) = nested_parser.at_start_of {
+ consume_until_end_of_block(block_type, &mut nested_parser.input.tokenizer);
+ }
+ }
+ consume_until_end_of_block(block_type, &mut parser.input.tokenizer);
+ result
+}
+
+#[inline(never)]
+#[cold]
+fn consume_until_end_of_block(block_type: BlockType, tokenizer: &mut Tokenizer) {
+ let mut stack = SmallVec::<[BlockType; 16]>::new();
+ stack.push(block_type);
+
+ // FIXME: have a special-purpose tokenizer method for this that does less work.
+ while let Ok(ref token) = tokenizer.next() {
+ if let Some(b) = BlockType::closing(token) {
+ if *stack.last().unwrap() == b {
+ stack.pop();
+ if stack.is_empty() {
+ return;
+ }
+ }
+ }
+
+ if let Some(block_type) = BlockType::opening(token) {
+ stack.push(block_type);
+ }
+ }
+}
diff --git a/third_party/rust/cssparser/src/rules_and_declarations.rs b/third_party/rust/cssparser/src/rules_and_declarations.rs
new file mode 100644
index 0000000000..fb33a7d0cd
--- /dev/null
+++ b/third_party/rust/cssparser/src/rules_and_declarations.rs
@@ -0,0 +1,507 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// https://drafts.csswg.org/css-syntax/#parsing
+
+use super::{
+ BasicParseError, BasicParseErrorKind, Delimiter, Delimiters, ParseError, Parser, Token,
+};
+use crate::cow_rc_str::CowRcStr;
+use crate::parser::{parse_nested_block, parse_until_after, ParseUntilErrorBehavior, ParserState};
+
+/// Parse `!important`.
+///
+/// Typical usage is `input.try_parse(parse_important).is_ok()`
+/// at the end of a `DeclarationParser::parse_value` implementation.
+pub fn parse_important<'i, 't>(input: &mut Parser<'i, 't>) -> Result<(), BasicParseError<'i>> {
+ input.expect_delim('!')?;
+ input.expect_ident_matching("important")
+}
+
+/// A trait to provide various parsing of declaration values.
+///
+/// For example, there could be different implementations for property declarations in style rules
+/// and for descriptors in `@font-face` rules.
+pub trait DeclarationParser<'i> {
+ /// The finished representation of a declaration.
+ type Declaration;
+
+ /// The error type that is included in the ParseError value that can be returned.
+ type Error: 'i;
+
+ /// Parse the value of a declaration with the given `name`.
+ ///
+ /// Return the finished representation for the declaration
+ /// as returned by `DeclarationListParser::next`,
+ /// or `Err(())` to ignore the entire declaration as invalid.
+ ///
+ /// Declaration name matching should be case-insensitive in the ASCII range.
+ /// This can be done with `std::ascii::Ascii::eq_ignore_ascii_case`,
+ /// or with the `match_ignore_ascii_case!` macro.
+ ///
+ /// The given `input` is a "delimited" parser
+ /// that ends wherever the declaration value should end.
+ /// (In declaration lists, before the next semicolon or end of the current block.)
+ ///
+ /// If `!important` can be used in a given context,
+ /// `input.try_parse(parse_important).is_ok()` should be used at the end
+ /// of the implementation of this method and the result should be part of the return value.
+ fn parse_value<'t>(
+ &mut self,
+ name: CowRcStr<'i>,
+ input: &mut Parser<'i, 't>,
+ ) -> Result<Self::Declaration, ParseError<'i, Self::Error>> {
+ Err(input.new_error(BasicParseErrorKind::UnexpectedToken(Token::Ident(name))))
+ }
+}
+
+/// A trait to provide various parsing of at-rules.
+///
+/// For example, there could be different implementations for top-level at-rules
+/// (`@media`, `@font-face`, …)
+/// and for page-margin rules inside `@page`.
+///
+/// Default implementations that reject all at-rules are provided,
+/// so that `impl AtRuleParser<(), ()> for ... {}` can be used
+/// for using `DeclarationListParser` to parse a declarations list with only qualified rules.
+pub trait AtRuleParser<'i> {
+ /// The intermediate representation of prelude of an at-rule.
+ type Prelude;
+
+ /// The finished representation of an at-rule.
+ type AtRule;
+
+ /// The error type that is included in the ParseError value that can be returned.
+ type Error: 'i;
+
+ /// Parse the prelude of an at-rule with the given `name`.
+ ///
+ /// Return the representation of the prelude and the type of at-rule,
+ /// or `Err(())` to ignore the entire at-rule as invalid.
+ ///
+ /// The prelude is the part after the at-keyword
+ /// and before the `;` semicolon or `{ /* ... */ }` block.
+ ///
+ /// At-rule name matching should be case-insensitive in the ASCII range.
+ /// This can be done with `std::ascii::Ascii::eq_ignore_ascii_case`,
+ /// or with the `match_ignore_ascii_case!` macro.
+ ///
+ /// The given `input` is a "delimited" parser
+ /// that ends wherever the prelude should end.
+ /// (Before the next semicolon, the next `{`, or the end of the current block.)
+ fn parse_prelude<'t>(
+ &mut self,
+ name: CowRcStr<'i>,
+ input: &mut Parser<'i, 't>,
+ ) -> Result<Self::Prelude, ParseError<'i, Self::Error>> {
+ Err(input.new_error(BasicParseErrorKind::AtRuleInvalid(name)))
+ }
+
+ /// End an at-rule which doesn't have block. Return the finished
+ /// representation of the at-rule.
+ ///
+ /// The location passed in is source location of the start of the prelude.
+ ///
+ /// This is only called when `parse_prelude` returned `WithoutBlock`, and
+ /// either the `;` semicolon indeed follows the prelude, or parser is at
+ /// the end of the input.
+ fn rule_without_block(
+ &mut self,
+ prelude: Self::Prelude,
+ start: &ParserState,
+ ) -> Result<Self::AtRule, ()> {
+ let _ = prelude;
+ let _ = start;
+ Err(())
+ }
+
+ /// Parse the content of a `{ /* ... */ }` block for the body of the at-rule.
+ ///
+ /// The location passed in is source location of the start of the prelude.
+ ///
+ /// Return the finished representation of the at-rule
+ /// as returned by `RuleListParser::next` or `DeclarationListParser::next`,
+ /// or `Err(())` to ignore the entire at-rule as invalid.
+ ///
+ /// This is only called when `parse_prelude` returned `WithBlock`, and a block
+ /// was indeed found following the prelude.
+ fn parse_block<'t>(
+ &mut self,
+ prelude: Self::Prelude,
+ start: &ParserState,
+ input: &mut Parser<'i, 't>,
+ ) -> Result<Self::AtRule, ParseError<'i, Self::Error>> {
+ let _ = prelude;
+ let _ = start;
+ Err(input.new_error(BasicParseErrorKind::AtRuleBodyInvalid))
+ }
+}
+
+/// A trait to provide various parsing of qualified rules.
+///
+/// For example, there could be different implementations for top-level qualified rules (i.e. style
+/// rules with Selectors as prelude) and for qualified rules inside `@keyframes` (keyframe rules
+/// with keyframe selectors as prelude).
+///
+/// Default implementations that reject all qualified rules are provided, so that
+/// `impl QualifiedRuleParser<(), ()> for ... {}` can be used for example for using
+/// `RuleListParser` to parse a rule list with only at-rules (such as inside
+/// `@font-feature-values`).
+pub trait QualifiedRuleParser<'i> {
+ /// The intermediate representation of a qualified rule prelude.
+ type Prelude;
+
+ /// The finished representation of a qualified rule.
+ type QualifiedRule;
+
+ /// The error type that is included in the ParseError value that can be returned.
+ type Error: 'i;
+
+ /// Parse the prelude of a qualified rule. For style rules, this is as Selector list.
+ ///
+ /// Return the representation of the prelude,
+ /// or `Err(())` to ignore the entire at-rule as invalid.
+ ///
+ /// The prelude is the part before the `{ /* ... */ }` block.
+ ///
+ /// The given `input` is a "delimited" parser
+ /// that ends where the prelude should end (before the next `{`).
+ fn parse_prelude<'t>(
+ &mut self,
+ input: &mut Parser<'i, 't>,
+ ) -> Result<Self::Prelude, ParseError<'i, Self::Error>> {
+ Err(input.new_error(BasicParseErrorKind::QualifiedRuleInvalid))
+ }
+
+ /// Parse the content of a `{ /* ... */ }` block for the body of the qualified rule.
+ ///
+ /// The location passed in is source location of the start of the prelude.
+ ///
+ /// Return the finished representation of the qualified rule
+ /// as returned by `RuleListParser::next`,
+ /// or `Err(())` to ignore the entire at-rule as invalid.
+ fn parse_block<'t>(
+ &mut self,
+ prelude: Self::Prelude,
+ start: &ParserState,
+ input: &mut Parser<'i, 't>,
+ ) -> Result<Self::QualifiedRule, ParseError<'i, Self::Error>> {
+ let _ = prelude;
+ let _ = start;
+ Err(input.new_error(BasicParseErrorKind::QualifiedRuleInvalid))
+ }
+}
+
+/// Provides an iterator for rule bodies and declaration lists.
+pub struct RuleBodyParser<'i, 't, 'a, P, I, E> {
+ /// The input given to the parser.
+ pub input: &'a mut Parser<'i, 't>,
+ /// The parser given to `DeclarationListParser::new`
+ pub parser: &'a mut P,
+
+ _phantom: std::marker::PhantomData<(I, E)>,
+}
+
+/// A parser for a rule body item.
+pub trait RuleBodyItemParser<'i, DeclOrRule, Error: 'i>:
+ DeclarationParser<'i, Declaration = DeclOrRule, Error = Error>
+ + QualifiedRuleParser<'i, QualifiedRule = DeclOrRule, Error = Error>
+ + AtRuleParser<'i, AtRule = DeclOrRule, Error = Error>
+{
+ /// Whether we should attempt to parse declarations. If you know you won't, returning false
+ /// here is slightly faster.
+ fn parse_declarations(&self) -> bool;
+ /// Whether we should attempt to parse qualified rules. If you know you won't, returning false
+ /// would be slightly faster.
+ fn parse_qualified(&self) -> bool;
+}
+
+impl<'i, 't, 'a, P, I, E> RuleBodyParser<'i, 't, 'a, P, I, E> {
+ /// Create a new `DeclarationListParser` for the given `input` and `parser`.
+ ///
+ /// Note that all CSS declaration lists can on principle contain at-rules.
+ /// Even if no such valid at-rule exists (yet),
+ /// this affects error handling: at-rules end at `{}` blocks, not just semicolons.
+ ///
+ /// The given `parser` therefore needs to implement
+ /// both `DeclarationParser` and `AtRuleParser` traits.
+ /// However, the latter can be an empty `impl`
+ /// since `AtRuleParser` provides default implementations of its methods.
+ ///
+ /// The return type for finished declarations and at-rules also needs to be the same,
+ /// since `<DeclarationListParser as Iterator>::next` can return either.
+ /// It could be a custom enum.
+ pub fn new(input: &'a mut Parser<'i, 't>, parser: &'a mut P) -> Self {
+ Self {
+ input,
+ parser,
+ _phantom: std::marker::PhantomData,
+ }
+ }
+}
+
+/// https://drafts.csswg.org/css-syntax/#consume-a-blocks-contents
+impl<'i, 't, 'a, I, P, E: 'i> Iterator for RuleBodyParser<'i, 't, 'a, P, I, E>
+where
+ P: RuleBodyItemParser<'i, I, E>,
+{
+ type Item = Result<I, (ParseError<'i, E>, &'i str)>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ loop {
+ self.input.skip_whitespace();
+ let start = self.input.state();
+ match self.input.next_including_whitespace_and_comments().ok()? {
+ Token::CloseCurlyBracket |
+ Token::WhiteSpace(..) |
+ Token::Semicolon |
+ Token::Comment(..) => continue,
+ Token::AtKeyword(ref name) => {
+ let name = name.clone();
+ return Some(parse_at_rule(&start, name, self.input, &mut *self.parser));
+ }
+ // https://drafts.csswg.org/css-syntax/#consume-a-declaration bails out just to
+ // keep parsing as a qualified rule if the token is not an ident, so we implement
+ // that in a slightly more straight-forward way
+ Token::Ident(ref name) if self.parser.parse_declarations() => {
+ let name = name.clone();
+ let parse_qualified = self.parser.parse_qualified();
+ let result = {
+ let error_behavior = if parse_qualified {
+ ParseUntilErrorBehavior::Stop
+ } else {
+ ParseUntilErrorBehavior::Consume
+ };
+ let parser = &mut self.parser;
+ parse_until_after(
+ self.input,
+ Delimiter::Semicolon,
+ error_behavior,
+ |input| {
+ input.expect_colon()?;
+ parser.parse_value(name, input)
+ },
+ )
+ };
+ if result.is_err() && parse_qualified {
+ self.input.reset(&start);
+ // We ignore the resulting error here. The property declaration parse error
+ // is likely to be more relevant.
+ if let Ok(qual) = parse_qualified_rule(
+ &start,
+ self.input,
+ &mut *self.parser,
+ Delimiter::Semicolon | Delimiter::CurlyBracketBlock,
+ ) {
+ return Some(Ok(qual))
+ }
+ }
+
+ return Some(result.map_err(|e| (e, self.input.slice_from(start.position()))));
+ }
+ token => {
+ let result = if self.parser.parse_qualified() {
+ self.input.reset(&start);
+ let delimiters = if self.parser.parse_declarations() {
+ Delimiter::Semicolon | Delimiter::CurlyBracketBlock
+ } else {
+ Delimiter::CurlyBracketBlock
+ };
+ parse_qualified_rule(&start, self.input, &mut *self.parser, delimiters)
+ } else {
+ let token = token.clone();
+ self.input.parse_until_after(Delimiter::Semicolon, |_| {
+ Err(start.source_location().new_unexpected_token_error(token))
+ })
+ };
+ return Some(result.map_err(|e| (e, self.input.slice_from(start.position()))));
+ }
+ }
+ }
+ }
+}
+
+/// Provides an iterator for rule list parsing at the top-level of a stylesheet.
+pub struct StyleSheetParser<'i, 't, 'a, P> {
+ /// The input given.
+ pub input: &'a mut Parser<'i, 't>,
+
+ /// The parser given.
+ pub parser: &'a mut P,
+
+ any_rule_so_far: bool,
+}
+
+impl<'i, 't, 'a, R, P, E: 'i> StyleSheetParser<'i, 't, 'a, P>
+where
+ P: QualifiedRuleParser<'i, QualifiedRule = R, Error = E>
+ + AtRuleParser<'i, AtRule = R, Error = E>,
+{
+ /// The given `parser` needs to implement both `QualifiedRuleParser` and `AtRuleParser` traits.
+ /// However, either of them can be an empty `impl` since the traits provide default
+ /// implementations of their methods.
+ ///
+ /// The return type for finished qualified rules and at-rules also needs to be the same,
+ /// since `<RuleListParser as Iterator>::next` can return either. It could be a custom enum.
+ pub fn new(input: &'a mut Parser<'i, 't>, parser: &'a mut P) -> Self {
+ Self {
+ input,
+ parser,
+ any_rule_so_far: false,
+ }
+ }
+}
+
+/// `RuleListParser` is an iterator that yields `Ok(_)` for a rule or `Err(())` for an invalid one.
+impl<'i, 't, 'a, R, P, E: 'i> Iterator for StyleSheetParser<'i, 't, 'a, P>
+where
+ P: QualifiedRuleParser<'i, QualifiedRule = R, Error = E>
+ + AtRuleParser<'i, AtRule = R, Error = E>,
+{
+ type Item = Result<R, (ParseError<'i, E>, &'i str)>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ loop {
+ self.input.skip_cdc_and_cdo();
+ let start = self.input.state();
+ let at_keyword = match self.input.next_byte()? {
+ b'@' => match self.input.next_including_whitespace_and_comments() {
+ Ok(&Token::AtKeyword(ref name)) => Some(name.clone()),
+ _ => {
+ self.input.reset(&start);
+ None
+ }
+ },
+ _ => None,
+ };
+
+ if let Some(name) = at_keyword {
+ let first_stylesheet_rule = !self.any_rule_so_far;
+ self.any_rule_so_far = true;
+ if first_stylesheet_rule && name.eq_ignore_ascii_case("charset") {
+ let delimiters = Delimiter::Semicolon | Delimiter::CurlyBracketBlock;
+ let _: Result<(), ParseError<()>> =
+ self.input.parse_until_after(delimiters, |_| Ok(()));
+ } else {
+ return Some(parse_at_rule(
+ &start,
+ name.clone(),
+ self.input,
+ &mut *self.parser,
+ ));
+ }
+ } else {
+ self.any_rule_so_far = true;
+ let result = parse_qualified_rule(
+ &start,
+ self.input,
+ &mut *self.parser,
+ Delimiter::CurlyBracketBlock,
+ );
+ return Some(result.map_err(|e| (e, self.input.slice_from(start.position()))));
+ }
+ }
+ }
+}
+
+/// Parse a single declaration, such as an `( /* ... */ )` parenthesis in an `@supports` prelude.
+pub fn parse_one_declaration<'i, 't, P, E>(
+ input: &mut Parser<'i, 't>,
+ parser: &mut P,
+) -> Result<<P as DeclarationParser<'i>>::Declaration, (ParseError<'i, E>, &'i str)>
+where
+ P: DeclarationParser<'i, Error = E>,
+{
+ let start_position = input.position();
+ input
+ .parse_entirely(|input| {
+ let name = input.expect_ident()?.clone();
+ input.expect_colon()?;
+ parser.parse_value(name, input)
+ })
+ .map_err(|e| (e, input.slice_from(start_position)))
+}
+
+/// Parse a single rule, such as for CSSOM’s `CSSStyleSheet.insertRule`.
+pub fn parse_one_rule<'i, 't, R, P, E>(
+ input: &mut Parser<'i, 't>,
+ parser: &mut P,
+) -> Result<R, ParseError<'i, E>>
+where
+ P: QualifiedRuleParser<'i, QualifiedRule = R, Error = E>
+ + AtRuleParser<'i, AtRule = R, Error = E>,
+{
+ input.parse_entirely(|input| {
+ input.skip_whitespace();
+ let start = input.state();
+ let at_keyword = if input.next_byte() == Some(b'@') {
+ match *input.next_including_whitespace_and_comments()? {
+ Token::AtKeyword(ref name) => Some(name.clone()),
+ _ => {
+ input.reset(&start);
+ None
+ }
+ }
+ } else {
+ None
+ };
+
+ if let Some(name) = at_keyword {
+ parse_at_rule(&start, name, input, parser).map_err(|e| e.0)
+ } else {
+ parse_qualified_rule(&start, input, parser, Delimiter::CurlyBracketBlock)
+ }
+ })
+}
+
+fn parse_at_rule<'i, 't, P, E>(
+ start: &ParserState,
+ name: CowRcStr<'i>,
+ input: &mut Parser<'i, 't>,
+ parser: &mut P,
+) -> Result<<P as AtRuleParser<'i>>::AtRule, (ParseError<'i, E>, &'i str)>
+where
+ P: AtRuleParser<'i, Error = E>,
+{
+ let delimiters = Delimiter::Semicolon | Delimiter::CurlyBracketBlock;
+ let result = input.parse_until_before(delimiters, |input| parser.parse_prelude(name, input));
+ match result {
+ Ok(prelude) => {
+ let result = match input.next() {
+ Ok(&Token::Semicolon) | Err(_) => parser
+ .rule_without_block(prelude, start)
+ .map_err(|()| input.new_unexpected_token_error(Token::Semicolon)),
+ Ok(&Token::CurlyBracketBlock) => {
+ parse_nested_block(input, |input| parser.parse_block(prelude, start, input))
+ }
+ Ok(_) => unreachable!(),
+ };
+ result.map_err(|e| (e, input.slice_from(start.position())))
+ }
+ Err(error) => {
+ let end_position = input.position();
+ match input.next() {
+ Ok(&Token::CurlyBracketBlock) | Ok(&Token::Semicolon) | Err(_) => {}
+ _ => unreachable!(),
+ };
+ Err((error, input.slice(start.position()..end_position)))
+ }
+ }
+}
+
+fn parse_qualified_rule<'i, 't, P, E>(
+ start: &ParserState,
+ input: &mut Parser<'i, 't>,
+ parser: &mut P,
+ delimiters: Delimiters,
+) -> Result<<P as QualifiedRuleParser<'i>>::QualifiedRule, ParseError<'i, E>>
+where
+ P: QualifiedRuleParser<'i, Error = E>,
+{
+ let prelude = input.parse_until_before(delimiters, |input| parser.parse_prelude(input));
+ input.expect_curly_bracket_block()?;
+ // Do this here so that we consume the `{` even if the prelude is `Err`.
+ let prelude = prelude?;
+ parse_nested_block(input, |input| parser.parse_block(prelude, &start, input))
+}
diff --git a/third_party/rust/cssparser/src/serializer.rs b/third_party/rust/cssparser/src/serializer.rs
new file mode 100644
index 0000000000..09c224022d
--- /dev/null
+++ b/third_party/rust/cssparser/src/serializer.rs
@@ -0,0 +1,593 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use crate::match_byte;
+use dtoa_short::{self, Notation};
+use itoa;
+use std::fmt::{self, Write};
+use std::str;
+
+use super::Token;
+
+/// Trait for things the can serialize themselves in CSS syntax.
+pub trait ToCss {
+ /// Serialize `self` in CSS syntax, writing to `dest`.
+ fn to_css<W>(&self, dest: &mut W) -> fmt::Result
+ where
+ W: fmt::Write;
+
+ /// Serialize `self` in CSS syntax and return a string.
+ ///
+ /// (This is a convenience wrapper for `to_css` and probably should not be overridden.)
+ #[inline]
+ fn to_css_string(&self) -> String {
+ let mut s = String::new();
+ self.to_css(&mut s).unwrap();
+ s
+ }
+}
+
+#[inline]
+fn write_numeric<W>(value: f32, int_value: Option<i32>, has_sign: bool, dest: &mut W) -> fmt::Result
+where
+ W: fmt::Write,
+{
+ // `value.value >= 0` is true for negative 0.
+ if has_sign && value.is_sign_positive() {
+ dest.write_str("+")?;
+ }
+
+ let notation = if value == 0.0 && value.is_sign_negative() {
+ // Negative zero. Work around #20596.
+ dest.write_str("-0")?;
+ Notation {
+ decimal_point: false,
+ scientific: false,
+ }
+ } else {
+ dtoa_short::write(dest, value)?
+ };
+
+ if int_value.is_none() && value.fract() == 0. {
+ if !notation.decimal_point && !notation.scientific {
+ dest.write_str(".0")?;
+ }
+ }
+ Ok(())
+}
+
+impl<'a> ToCss for Token<'a> {
+ fn to_css<W>(&self, dest: &mut W) -> fmt::Result
+ where
+ W: fmt::Write,
+ {
+ match *self {
+ Token::Ident(ref value) => serialize_identifier(&**value, dest)?,
+ Token::AtKeyword(ref value) => {
+ dest.write_str("@")?;
+ serialize_identifier(&**value, dest)?;
+ }
+ Token::Hash(ref value) => {
+ dest.write_str("#")?;
+ serialize_name(value, dest)?;
+ }
+ Token::IDHash(ref value) => {
+ dest.write_str("#")?;
+ serialize_identifier(&**value, dest)?;
+ }
+ Token::QuotedString(ref value) => serialize_string(&**value, dest)?,
+ Token::UnquotedUrl(ref value) => {
+ dest.write_str("url(")?;
+ serialize_unquoted_url(&**value, dest)?;
+ dest.write_str(")")?;
+ }
+ Token::Delim(value) => dest.write_char(value)?,
+
+ Token::Number {
+ value,
+ int_value,
+ has_sign,
+ } => write_numeric(value, int_value, has_sign, dest)?,
+ Token::Percentage {
+ unit_value,
+ int_value,
+ has_sign,
+ } => {
+ write_numeric(unit_value * 100., int_value, has_sign, dest)?;
+ dest.write_str("%")?;
+ }
+ Token::Dimension {
+ value,
+ int_value,
+ has_sign,
+ ref unit,
+ } => {
+ write_numeric(value, int_value, has_sign, dest)?;
+ // Disambiguate with scientific notation.
+ let unit = &**unit;
+ // TODO(emilio): This doesn't handle e.g. 100E1m, which gets us
+ // an unit of "E1m"...
+ if unit == "e" || unit == "E" || unit.starts_with("e-") || unit.starts_with("E-") {
+ dest.write_str("\\65 ")?;
+ serialize_name(&unit[1..], dest)?;
+ } else {
+ serialize_identifier(unit, dest)?;
+ }
+ }
+
+ Token::WhiteSpace(content) => dest.write_str(content)?,
+ Token::Comment(content) => {
+ dest.write_str("/*")?;
+ dest.write_str(content)?;
+ dest.write_str("*/")?
+ }
+ Token::Colon => dest.write_str(":")?,
+ Token::Semicolon => dest.write_str(";")?,
+ Token::Comma => dest.write_str(",")?,
+ Token::IncludeMatch => dest.write_str("~=")?,
+ Token::DashMatch => dest.write_str("|=")?,
+ Token::PrefixMatch => dest.write_str("^=")?,
+ Token::SuffixMatch => dest.write_str("$=")?,
+ Token::SubstringMatch => dest.write_str("*=")?,
+ Token::CDO => dest.write_str("<!--")?,
+ Token::CDC => dest.write_str("-->")?,
+
+ Token::Function(ref name) => {
+ serialize_identifier(&**name, dest)?;
+ dest.write_str("(")?;
+ }
+ Token::ParenthesisBlock => dest.write_str("(")?,
+ Token::SquareBracketBlock => dest.write_str("[")?,
+ Token::CurlyBracketBlock => dest.write_str("{")?,
+
+ Token::BadUrl(ref contents) => {
+ dest.write_str("url(")?;
+ dest.write_str(contents)?;
+ dest.write_char(')')?;
+ }
+ Token::BadString(ref value) => {
+ // During tokenization, an unescaped newline after a quote causes
+ // the token to be a BadString instead of a QuotedString.
+ // The BadString token ends just before the newline
+ // (which is in a separate WhiteSpace token),
+ // and therefore does not have a closing quote.
+ dest.write_char('"')?;
+ CssStringWriter::new(dest).write_str(value)?;
+ }
+ Token::CloseParenthesis => dest.write_str(")")?,
+ Token::CloseSquareBracket => dest.write_str("]")?,
+ Token::CloseCurlyBracket => dest.write_str("}")?,
+ }
+ Ok(())
+ }
+}
+
+fn hex_escape<W>(ascii_byte: u8, dest: &mut W) -> fmt::Result
+where
+ W: fmt::Write,
+{
+ static HEX_DIGITS: &'static [u8; 16] = b"0123456789abcdef";
+ let b3;
+ let b4;
+ let bytes = if ascii_byte > 0x0F {
+ let high = (ascii_byte >> 4) as usize;
+ let low = (ascii_byte & 0x0F) as usize;
+ b4 = [b'\\', HEX_DIGITS[high], HEX_DIGITS[low], b' '];
+ &b4[..]
+ } else {
+ b3 = [b'\\', HEX_DIGITS[ascii_byte as usize], b' '];
+ &b3[..]
+ };
+ dest.write_str(unsafe { str::from_utf8_unchecked(&bytes) })
+}
+
+fn char_escape<W>(ascii_byte: u8, dest: &mut W) -> fmt::Result
+where
+ W: fmt::Write,
+{
+ let bytes = [b'\\', ascii_byte];
+ dest.write_str(unsafe { str::from_utf8_unchecked(&bytes) })
+}
+
+/// Write a CSS identifier, escaping characters as necessary.
+pub fn serialize_identifier<W>(mut value: &str, dest: &mut W) -> fmt::Result
+where
+ W: fmt::Write,
+{
+ if value.is_empty() {
+ return Ok(());
+ }
+
+ if value.starts_with("--") {
+ dest.write_str("--")?;
+ serialize_name(&value[2..], dest)
+ } else if value == "-" {
+ dest.write_str("\\-")
+ } else {
+ if value.as_bytes()[0] == b'-' {
+ dest.write_str("-")?;
+ value = &value[1..];
+ }
+ if let digit @ b'0'..=b'9' = value.as_bytes()[0] {
+ hex_escape(digit, dest)?;
+ value = &value[1..];
+ }
+ serialize_name(value, dest)
+ }
+}
+
+/// Write a CSS name, like a custom property name.
+///
+/// You should only use this when you know what you're doing, when in doubt,
+/// consider using `serialize_identifier`.
+pub fn serialize_name<W>(value: &str, dest: &mut W) -> fmt::Result
+where
+ W: fmt::Write,
+{
+ let mut chunk_start = 0;
+ for (i, b) in value.bytes().enumerate() {
+ let escaped = match_byte! { b,
+ b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' => continue,
+ b'\0' => Some("\u{FFFD}"),
+ b => {
+ if !b.is_ascii() {
+ continue;
+ }
+ None
+ },
+ };
+ dest.write_str(&value[chunk_start..i])?;
+ if let Some(escaped) = escaped {
+ dest.write_str(escaped)?;
+ } else if (b >= b'\x01' && b <= b'\x1F') || b == b'\x7F' {
+ hex_escape(b, dest)?;
+ } else {
+ char_escape(b, dest)?;
+ }
+ chunk_start = i + 1;
+ }
+ dest.write_str(&value[chunk_start..])
+}
+
+fn serialize_unquoted_url<W>(value: &str, dest: &mut W) -> fmt::Result
+where
+ W: fmt::Write,
+{
+ let mut chunk_start = 0;
+ for (i, b) in value.bytes().enumerate() {
+ let hex = match_byte! { b,
+ b'\0'..=b' ' | b'\x7F' => true,
+ b'(' | b')' | b'"' | b'\'' | b'\\' => false,
+ _ => continue,
+ };
+ dest.write_str(&value[chunk_start..i])?;
+ if hex {
+ hex_escape(b, dest)?;
+ } else {
+ char_escape(b, dest)?;
+ }
+ chunk_start = i + 1;
+ }
+ dest.write_str(&value[chunk_start..])
+}
+
+/// Write a double-quoted CSS string token, escaping content as necessary.
+pub fn serialize_string<W>(value: &str, dest: &mut W) -> fmt::Result
+where
+ W: fmt::Write,
+{
+ dest.write_str("\"")?;
+ CssStringWriter::new(dest).write_str(value)?;
+ dest.write_str("\"")?;
+ Ok(())
+}
+
+/// A `fmt::Write` adapter that escapes text for writing as a double-quoted CSS string.
+/// Quotes are not included.
+///
+/// Typical usage:
+///
+/// ```{rust,ignore}
+/// fn write_foo<W>(foo: &Foo, dest: &mut W) -> fmt::Result where W: fmt::Write {
+/// dest.write_str("\"")?;
+/// {
+/// let mut string_dest = CssStringWriter::new(dest);
+/// // Write into string_dest...
+/// }
+/// dest.write_str("\"")?;
+/// Ok(())
+/// }
+/// ```
+pub struct CssStringWriter<'a, W> {
+ inner: &'a mut W,
+}
+
+impl<'a, W> CssStringWriter<'a, W>
+where
+ W: fmt::Write,
+{
+ /// Wrap a text writer to create a `CssStringWriter`.
+ pub fn new(inner: &'a mut W) -> CssStringWriter<'a, W> {
+ CssStringWriter { inner }
+ }
+}
+
+impl<'a, W> fmt::Write for CssStringWriter<'a, W>
+where
+ W: fmt::Write,
+{
+ fn write_str(&mut self, s: &str) -> fmt::Result {
+ let mut chunk_start = 0;
+ for (i, b) in s.bytes().enumerate() {
+ let escaped = match_byte! { b,
+ b'"' => Some("\\\""),
+ b'\\' => Some("\\\\"),
+ b'\0' => Some("\u{FFFD}"),
+ b'\x01'..=b'\x1F' | b'\x7F' => None,
+ _ => continue,
+ };
+ self.inner.write_str(&s[chunk_start..i])?;
+ match escaped {
+ Some(x) => self.inner.write_str(x)?,
+ None => hex_escape(b, self.inner)?,
+ };
+ chunk_start = i + 1;
+ }
+ self.inner.write_str(&s[chunk_start..])
+ }
+}
+
+macro_rules! impl_tocss_for_int {
+ ($T: ty) => {
+ impl<'a> ToCss for $T {
+ fn to_css<W>(&self, dest: &mut W) -> fmt::Result
+ where
+ W: fmt::Write,
+ {
+ let mut buf = itoa::Buffer::new();
+ dest.write_str(buf.format(*self))
+ }
+ }
+ };
+}
+
+impl_tocss_for_int!(i8);
+impl_tocss_for_int!(u8);
+impl_tocss_for_int!(i16);
+impl_tocss_for_int!(u16);
+impl_tocss_for_int!(i32);
+impl_tocss_for_int!(u32);
+impl_tocss_for_int!(i64);
+impl_tocss_for_int!(u64);
+
+macro_rules! impl_tocss_for_float {
+ ($T: ty) => {
+ impl<'a> ToCss for $T {
+ fn to_css<W>(&self, dest: &mut W) -> fmt::Result
+ where
+ W: fmt::Write,
+ {
+ dtoa_short::write(dest, *self).map(|_| ())
+ }
+ }
+ };
+}
+
+impl_tocss_for_float!(f32);
+impl_tocss_for_float!(f64);
+
+/// A category of token. See the `needs_separator_when_before` method.
+#[derive(Copy, Clone, Eq, PartialEq, Debug, Default)]
+pub enum TokenSerializationType {
+ /// No token serialization type.
+ #[default]
+ Nothing,
+
+ /// The [`<whitespace-token>`](https://drafts.csswg.org/css-syntax/#whitespace-token-diagram)
+ /// type.
+ WhiteSpace,
+
+ /// The [`<at-keyword-token>`](https://drafts.csswg.org/css-syntax/#at-keyword-token-diagram)
+ /// type, the "[`<hash-token>`](https://drafts.csswg.org/css-syntax/#hash-token-diagram) with
+ /// the type flag set to 'unrestricted'" type, or the
+ /// "[`<hash-token>`](https://drafts.csswg.org/css-syntax/#hash-token-diagram) with the type
+ /// flag set to 'id'" type.
+ AtKeywordOrHash,
+
+ /// The [`<number-token>`](https://drafts.csswg.org/css-syntax/#number-token-diagram) type.
+ Number,
+
+ /// The [`<dimension-token>`](https://drafts.csswg.org/css-syntax/#dimension-token-diagram)
+ /// type.
+ Dimension,
+
+ /// The [`<percentage-token>`](https://drafts.csswg.org/css-syntax/#percentage-token-diagram)
+ /// type.
+ Percentage,
+
+ /// The [`<url-token>`](https://drafts.csswg.org/css-syntax/#url-token-diagram) or
+ /// `<bad-url-token>` type.
+ UrlOrBadUrl,
+
+ /// The [`<function-token>`](https://drafts.csswg.org/css-syntax/#function-token-diagram) type.
+ Function,
+
+ /// The [`<ident-token>`](https://drafts.csswg.org/css-syntax/#ident-token-diagram) type.
+ Ident,
+
+ /// The `-->` [`<CDC-token>`](https://drafts.csswg.org/css-syntax/#CDC-token-diagram) type.
+ CDC,
+
+ /// The `|=`
+ /// [`<dash-match-token>`](https://drafts.csswg.org/css-syntax/#dash-match-token-diagram) type.
+ DashMatch,
+
+ /// The `*=`
+ /// [`<substring-match-token>`](https://drafts.csswg.org/css-syntax/#substring-match-token-diagram)
+ /// type.
+ SubstringMatch,
+
+ /// The `<(-token>` type.
+ OpenParen,
+
+ /// The `#` `<delim-token>` type.
+ DelimHash,
+
+ /// The `@` `<delim-token>` type.
+ DelimAt,
+
+ /// The `.` or `+` `<delim-token>` type.
+ DelimDotOrPlus,
+
+ /// The `-` `<delim-token>` type.
+ DelimMinus,
+
+ /// The `?` `<delim-token>` type.
+ DelimQuestion,
+
+ /// The `$`, `^`, or `~` `<delim-token>` type.
+ DelimAssorted,
+
+ /// The `=` `<delim-token>` type.
+ DelimEquals,
+
+ /// The `|` `<delim-token>` type.
+ DelimBar,
+
+ /// The `/` `<delim-token>` type.
+ DelimSlash,
+
+ /// The `*` `<delim-token>` type.
+ DelimAsterisk,
+
+ /// The `%` `<delim-token>` type.
+ DelimPercent,
+
+ /// A type indicating any other token.
+ Other,
+}
+
+impl TokenSerializationType {
+ /// Return a value that represents the absence of a token, e.g. before the start of the input.
+ #[deprecated(
+ since = "0.32.1",
+ note = "use TokenSerializationType::Nothing or TokenSerializationType::default() instead"
+ )]
+ pub fn nothing() -> TokenSerializationType {
+ Default::default()
+ }
+
+ /// If this value is `TokenSerializationType::Nothing`, set it to the given value instead.
+ pub fn set_if_nothing(&mut self, new_value: TokenSerializationType) {
+ if matches!(self, TokenSerializationType::Nothing) {
+ *self = new_value
+ }
+ }
+
+ /// Return true if, when a token of category `self` is serialized just before
+ /// a token of category `other` with no whitespace in between,
+ /// an empty comment `/**/` needs to be inserted between them
+ /// so that they are not re-parsed as a single token.
+ ///
+ /// See https://drafts.csswg.org/css-syntax/#serialization
+ ///
+ /// See https://github.com/w3c/csswg-drafts/issues/4088 for the
+ /// `DelimPercent` bits.
+ pub fn needs_separator_when_before(self, other: TokenSerializationType) -> bool {
+ use self::TokenSerializationType::*;
+ match self {
+ Ident => matches!(
+ other,
+ Ident
+ | Function
+ | UrlOrBadUrl
+ | DelimMinus
+ | Number
+ | Percentage
+ | Dimension
+ | CDC
+ | OpenParen
+ ),
+ AtKeywordOrHash | Dimension => matches!(
+ other,
+ Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension | CDC
+ ),
+ DelimHash | DelimMinus => matches!(
+ other,
+ Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension
+ ),
+ Number => matches!(
+ other,
+ Ident
+ | Function
+ | UrlOrBadUrl
+ | DelimMinus
+ | Number
+ | Percentage
+ | DelimPercent
+ | Dimension
+ ),
+ DelimAt => matches!(other, Ident | Function | UrlOrBadUrl | DelimMinus),
+ DelimDotOrPlus => matches!(other, Number | Percentage | Dimension),
+ DelimAssorted | DelimAsterisk => matches!(other, DelimEquals),
+ DelimBar => matches!(other, DelimEquals | DelimBar | DashMatch),
+ DelimSlash => matches!(other, DelimAsterisk | SubstringMatch),
+ Nothing | WhiteSpace | Percentage | UrlOrBadUrl | Function | CDC | OpenParen
+ | DashMatch | SubstringMatch | DelimQuestion | DelimEquals | DelimPercent | Other => {
+ false
+ }
+ }
+ }
+}
+
+impl<'a> Token<'a> {
+ /// Categorize a token into a type that determines when `/**/` needs to be inserted
+ /// between two tokens when serialized next to each other without whitespace in between.
+ ///
+ /// See the `TokenSerializationType::needs_separator_when_before` method.
+ pub fn serialization_type(&self) -> TokenSerializationType {
+ use self::TokenSerializationType::*;
+ match self {
+ Token::Ident(_) => Ident,
+ Token::AtKeyword(_) | Token::Hash(_) | Token::IDHash(_) => AtKeywordOrHash,
+ Token::UnquotedUrl(_) | Token::BadUrl(_) => UrlOrBadUrl,
+ Token::Delim('#') => DelimHash,
+ Token::Delim('@') => DelimAt,
+ Token::Delim('.') | Token::Delim('+') => DelimDotOrPlus,
+ Token::Delim('-') => DelimMinus,
+ Token::Delim('?') => DelimQuestion,
+ Token::Delim('$') | Token::Delim('^') | Token::Delim('~') => DelimAssorted,
+ Token::Delim('%') => DelimPercent,
+ Token::Delim('=') => DelimEquals,
+ Token::Delim('|') => DelimBar,
+ Token::Delim('/') => DelimSlash,
+ Token::Delim('*') => DelimAsterisk,
+ Token::Number { .. } => Number,
+ Token::Percentage { .. } => Percentage,
+ Token::Dimension { .. } => Dimension,
+ Token::WhiteSpace(_) => WhiteSpace,
+ Token::Comment(_) => DelimSlash,
+ Token::DashMatch => DashMatch,
+ Token::SubstringMatch => SubstringMatch,
+ Token::CDC => CDC,
+ Token::Function(_) => Function,
+ Token::ParenthesisBlock => OpenParen,
+ Token::SquareBracketBlock
+ | Token::CurlyBracketBlock
+ | Token::CloseParenthesis
+ | Token::CloseSquareBracket
+ | Token::CloseCurlyBracket
+ | Token::QuotedString(_)
+ | Token::BadString(_)
+ | Token::Delim(_)
+ | Token::Colon
+ | Token::Semicolon
+ | Token::Comma
+ | Token::CDO
+ | Token::IncludeMatch
+ | Token::PrefixMatch
+ | Token::SuffixMatch => Other,
+ }
+ }
+}
diff --git a/third_party/rust/cssparser/src/size_of_tests.rs b/third_party/rust/cssparser/src/size_of_tests.rs
new file mode 100644
index 0000000000..edd2b439f0
--- /dev/null
+++ b/third_party/rust/cssparser/src/size_of_tests.rs
@@ -0,0 +1,52 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use crate::cow_rc_str::CowRcStr;
+use crate::tokenizer::Token;
+
+macro_rules! size_of_test {
+ ($testname: ident, $t: ty, $expected_min_size: expr, $expected_max_size: expr) => {
+ #[test]
+ fn $testname() {
+ let new = ::std::mem::size_of::<$t>();
+ if new < $expected_min_size {
+ panic!(
+ "Your changes have decreased the stack size of {} from {} to {}. \
+ Good work! Please update the expected size in {}.",
+ stringify!($t),
+ $expected_min_size,
+ new,
+ file!()
+ )
+ } else if new > $expected_max_size {
+ panic!(
+ "Your changes have increased the stack size of {} from {} to {}. \
+ Please consider choosing a design which avoids this increase. \
+ If you feel that the increase is necessary, update the size in {}.",
+ stringify!($t),
+ $expected_max_size,
+ new,
+ file!()
+ )
+ }
+ }
+ };
+ ($testname: ident, $t: ty, $expected_size: expr) => {
+ size_of_test!($testname, $t, $expected_size, $expected_size);
+ };
+}
+
+// Some of these assume 64-bit
+size_of_test!(token, Token, 32);
+size_of_test!(std_cow_str, std::borrow::Cow<'static, str>, 24, 32);
+size_of_test!(cow_rc_str, CowRcStr, 16);
+
+size_of_test!(tokenizer, crate::tokenizer::Tokenizer, 72);
+size_of_test!(parser_input, crate::parser::ParserInput, 136);
+size_of_test!(parser, crate::parser::Parser, 16);
+size_of_test!(source_position, crate::SourcePosition, 8);
+size_of_test!(parser_state, crate::ParserState, 24);
+
+size_of_test!(basic_parse_error, crate::BasicParseError, 40, 48);
+size_of_test!(parse_error_lower_bound, crate::ParseError<()>, 40, 48);
diff --git a/third_party/rust/cssparser/src/tests.rs b/third_party/rust/cssparser/src/tests.rs
new file mode 100644
index 0000000000..f9dea19325
--- /dev/null
+++ b/third_party/rust/cssparser/src/tests.rs
@@ -0,0 +1,1362 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#[cfg(feature = "bench")]
+extern crate test;
+
+use encoding_rs;
+use serde_json::{self, json, Map, Value};
+
+#[cfg(feature = "bench")]
+use self::test::Bencher;
+
+use super::{
+ parse_important, parse_nth, parse_one_declaration, parse_one_rule, stylesheet_encoding,
+ AtRuleParser, BasicParseError, BasicParseErrorKind, CowRcStr, DeclarationParser, Delimiter,
+ EncodingSupport, ParseError, ParseErrorKind, Parser, ParserInput, ParserState,
+ QualifiedRuleParser, RuleBodyItemParser, RuleBodyParser, SourceLocation, StyleSheetParser,
+ ToCss, Token, TokenSerializationType, UnicodeRange,
+};
+
+macro_rules! JArray {
+ ($($e: expr,)*) => { JArray![ $( $e ),* ] };
+ ($($e: expr),*) => { Value::Array(vec!( $( $e.to_json() ),* )) }
+}
+
+fn almost_equals(a: &Value, b: &Value) -> bool {
+ match (a, b) {
+ (&Value::Number(ref a), &Value::Number(ref b)) => {
+ let a = a.as_f64().unwrap();
+ let b = b.as_f64().unwrap();
+ (a - b).abs() <= a.abs() * 1e-6
+ }
+
+ (&Value::Bool(a), &Value::Bool(b)) => a == b,
+ (&Value::String(ref a), &Value::String(ref b)) => a == b,
+ (&Value::Array(ref a), &Value::Array(ref b)) => {
+ a.len() == b.len()
+ && a.iter()
+ .zip(b.iter())
+ .all(|(ref a, ref b)| almost_equals(*a, *b))
+ }
+ (&Value::Object(_), &Value::Object(_)) => panic!("Not implemented"),
+ (&Value::Null, &Value::Null) => true,
+ _ => false,
+ }
+}
+
+fn normalize(json: &mut Value) {
+ match *json {
+ Value::Array(ref mut list) => {
+ for item in list.iter_mut() {
+ normalize(item)
+ }
+ }
+ Value::String(ref mut s) => {
+ if *s == "extra-input" || *s == "empty" {
+ *s = "invalid".to_string()
+ }
+ }
+ _ => {}
+ }
+}
+
+fn assert_json_eq(results: Value, mut expected: Value, message: &str) {
+ normalize(&mut expected);
+ if !almost_equals(&results, &expected) {
+ println!(
+ "{}",
+ ::difference::Changeset::new(
+ &serde_json::to_string_pretty(&results).unwrap(),
+ &serde_json::to_string_pretty(&expected).unwrap(),
+ "\n",
+ )
+ );
+ panic!("{}", message)
+ }
+}
+
+fn run_raw_json_tests<F: Fn(Value, Value) -> ()>(json_data: &str, run: F) {
+ let items = match serde_json::from_str(json_data) {
+ Ok(Value::Array(items)) => items,
+ other => panic!("Invalid JSON: {:?}", other),
+ };
+ assert!(items.len() % 2 == 0);
+ let mut input = None;
+ for item in items.into_iter() {
+ match (&input, item) {
+ (&None, json_obj) => input = Some(json_obj),
+ (&Some(_), expected) => {
+ let input = input.take().unwrap();
+ run(input, expected)
+ }
+ };
+ }
+}
+
+fn run_json_tests<F: Fn(&mut Parser) -> Value>(json_data: &str, parse: F) {
+ run_raw_json_tests(json_data, |input, expected| match input {
+ Value::String(input) => {
+ let mut parse_input = ParserInput::new(&input);
+ let result = parse(&mut Parser::new(&mut parse_input));
+ assert_json_eq(result, expected, &input);
+ }
+ _ => panic!("Unexpected JSON"),
+ });
+}
+
+#[test]
+fn component_value_list() {
+ run_json_tests(
+ include_str!("css-parsing-tests/component_value_list.json"),
+ |input| Value::Array(component_values_to_json(input)),
+ );
+}
+
+#[test]
+fn one_component_value() {
+ run_json_tests(
+ include_str!("css-parsing-tests/one_component_value.json"),
+ |input| {
+ let result: Result<Value, ParseError<()>> = input.parse_entirely(|input| {
+ Ok(one_component_value_to_json(input.next()?.clone(), input))
+ });
+ result.unwrap_or(JArray!["error", "invalid"])
+ },
+ );
+}
+
+#[test]
+fn declaration_list() {
+ run_json_tests(
+ include_str!("css-parsing-tests/declaration_list.json"),
+ |input| {
+ Value::Array(
+ RuleBodyParser::new(input, &mut JsonParser)
+ .map(|result| result.unwrap_or(JArray!["error", "invalid"]))
+ .collect(),
+ )
+ },
+ );
+}
+
+#[test]
+fn one_declaration() {
+ run_json_tests(
+ include_str!("css-parsing-tests/one_declaration.json"),
+ |input| {
+ parse_one_declaration(input, &mut JsonParser).unwrap_or(JArray!["error", "invalid"])
+ },
+ );
+}
+
+#[test]
+fn rule_list() {
+ run_json_tests(include_str!("css-parsing-tests/rule_list.json"), |input| {
+ Value::Array(
+ RuleBodyParser::new(input, &mut JsonParser)
+ .map(|result| result.unwrap_or(JArray!["error", "invalid"]))
+ .collect(),
+ )
+ });
+}
+
+#[test]
+fn stylesheet() {
+ run_json_tests(include_str!("css-parsing-tests/stylesheet.json"), |input| {
+ Value::Array(
+ StyleSheetParser::new(input, &mut JsonParser)
+ .map(|result| result.unwrap_or(JArray!["error", "invalid"]))
+ .collect(),
+ )
+ });
+}
+
+#[test]
+fn one_rule() {
+ run_json_tests(include_str!("css-parsing-tests/one_rule.json"), |input| {
+ parse_one_rule(input, &mut JsonParser).unwrap_or(JArray!["error", "invalid"])
+ });
+}
+
+#[test]
+fn stylesheet_from_bytes() {
+ pub struct EncodingRs;
+
+ impl EncodingSupport for EncodingRs {
+ type Encoding = &'static encoding_rs::Encoding;
+
+ fn utf8() -> Self::Encoding {
+ encoding_rs::UTF_8
+ }
+
+ fn is_utf16_be_or_le(encoding: &Self::Encoding) -> bool {
+ *encoding == encoding_rs::UTF_16LE || *encoding == encoding_rs::UTF_16BE
+ }
+
+ fn from_label(ascii_label: &[u8]) -> Option<Self::Encoding> {
+ encoding_rs::Encoding::for_label(ascii_label)
+ }
+ }
+
+ run_raw_json_tests(
+ include_str!("css-parsing-tests/stylesheet_bytes.json"),
+ |input, expected| {
+ let map = match input {
+ Value::Object(map) => map,
+ _ => panic!("Unexpected JSON"),
+ };
+
+ let result = {
+ let css = get_string(&map, "css_bytes")
+ .unwrap()
+ .chars()
+ .map(|c| {
+ assert!(c as u32 <= 0xFF);
+ c as u8
+ })
+ .collect::<Vec<u8>>();
+ let protocol_encoding_label =
+ get_string(&map, "protocol_encoding").map(|s| s.as_bytes());
+ let environment_encoding = get_string(&map, "environment_encoding")
+ .map(|s| s.as_bytes())
+ .and_then(EncodingRs::from_label);
+
+ let encoding = stylesheet_encoding::<EncodingRs>(
+ &css,
+ protocol_encoding_label,
+ environment_encoding,
+ );
+ let (css_unicode, used_encoding, _) = encoding.decode(&css);
+ let mut input = ParserInput::new(&css_unicode);
+ let input = &mut Parser::new(&mut input);
+ let rules = StyleSheetParser::new(input, &mut JsonParser)
+ .map(|result| result.unwrap_or(JArray!["error", "invalid"]))
+ .collect::<Vec<_>>();
+ JArray![rules, used_encoding.name().to_lowercase()]
+ };
+ assert_json_eq(result, expected, &Value::Object(map).to_string());
+ },
+ );
+
+ fn get_string<'a>(map: &'a Map<String, Value>, key: &str) -> Option<&'a str> {
+ match map.get(key) {
+ Some(&Value::String(ref s)) => Some(s),
+ Some(&Value::Null) => None,
+ None => None,
+ _ => panic!("Unexpected JSON"),
+ }
+ }
+}
+
+#[test]
+fn expect_no_error_token() {
+ let mut input = ParserInput::new("foo 4px ( / { !bar }");
+ assert!(Parser::new(&mut input).expect_no_error_token().is_ok());
+ let mut input = ParserInput::new(")");
+ assert!(Parser::new(&mut input).expect_no_error_token().is_err());
+ let mut input = ParserInput::new("}");
+ assert!(Parser::new(&mut input).expect_no_error_token().is_err());
+ let mut input = ParserInput::new("(a){]");
+ assert!(Parser::new(&mut input).expect_no_error_token().is_err());
+ let mut input = ParserInput::new("'\n'");
+ assert!(Parser::new(&mut input).expect_no_error_token().is_err());
+ let mut input = ParserInput::new("url('\n'");
+ assert!(Parser::new(&mut input).expect_no_error_token().is_err());
+ let mut input = ParserInput::new("url(a b)");
+ assert!(Parser::new(&mut input).expect_no_error_token().is_err());
+ let mut input = ParserInput::new("url(\u{7F}))");
+ assert!(Parser::new(&mut input).expect_no_error_token().is_err());
+}
+
+/// https://github.com/servo/rust-cssparser/issues/71
+#[test]
+fn outer_block_end_consumed() {
+ let mut input = ParserInput::new("(calc(true))");
+ let mut input = Parser::new(&mut input);
+ assert!(input.expect_parenthesis_block().is_ok());
+ assert!(input
+ .parse_nested_block(|input| input
+ .expect_function_matching("calc")
+ .map_err(Into::<ParseError<()>>::into))
+ .is_ok());
+ println!("{:?}", input.position());
+ assert!(input.next().is_err());
+}
+
+/// https://github.com/servo/rust-cssparser/issues/174
+#[test]
+fn bad_url_slice_out_of_bounds() {
+ let mut input = ParserInput::new("url(\u{1}\\");
+ let mut parser = Parser::new(&mut input);
+ let result = parser.next_including_whitespace_and_comments(); // This used to panic
+ assert_eq!(result, Ok(&Token::BadUrl("\u{1}\\".into())));
+}
+
+/// https://bugzilla.mozilla.org/show_bug.cgi?id=1383975
+#[test]
+fn bad_url_slice_not_at_char_boundary() {
+ let mut input = ParserInput::new("url(9\n۰");
+ let mut parser = Parser::new(&mut input);
+ let result = parser.next_including_whitespace_and_comments(); // This used to panic
+ assert_eq!(result, Ok(&Token::BadUrl("9\n۰".into())));
+}
+
+#[test]
+fn unquoted_url_escaping() {
+ let token = Token::UnquotedUrl(
+ "\
+ \x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\
+ \x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f \
+ !\"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\
+ ^_`abcdefghijklmnopqrstuvwxyz{|}~\x7fé\
+ "
+ .into(),
+ );
+ let serialized = token.to_css_string();
+ assert_eq!(
+ serialized,
+ "\
+ url(\
+ \\1 \\2 \\3 \\4 \\5 \\6 \\7 \\8 \\9 \\a \\b \\c \\d \\e \\f \\10 \
+ \\11 \\12 \\13 \\14 \\15 \\16 \\17 \\18 \\19 \\1a \\1b \\1c \\1d \\1e \\1f \\20 \
+ !\\\"#$%&\\'\\(\\)*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]\
+ ^_`abcdefghijklmnopqrstuvwxyz{|}~\\7f é\
+ )\
+ "
+ );
+ let mut input = ParserInput::new(&serialized);
+ assert_eq!(Parser::new(&mut input).next(), Ok(&token));
+}
+
+#[test]
+fn test_expect_url() {
+ fn parse<'a>(s: &mut ParserInput<'a>) -> Result<CowRcStr<'a>, BasicParseError<'a>> {
+ Parser::new(s).expect_url()
+ }
+ let mut input = ParserInput::new("url()");
+ assert_eq!(parse(&mut input).unwrap(), "");
+ let mut input = ParserInput::new("url( ");
+ assert_eq!(parse(&mut input).unwrap(), "");
+ let mut input = ParserInput::new("url( abc");
+ assert_eq!(parse(&mut input).unwrap(), "abc");
+ let mut input = ParserInput::new("url( abc \t)");
+ assert_eq!(parse(&mut input).unwrap(), "abc");
+ let mut input = ParserInput::new("url( 'abc' \t)");
+ assert_eq!(parse(&mut input).unwrap(), "abc");
+ let mut input = ParserInput::new("url(abc more stuff)");
+ assert!(parse(&mut input).is_err());
+ // The grammar at https://drafts.csswg.org/css-values/#urls plans for `<url-modifier>*`
+ // at the position of "more stuff", but no such modifier is defined yet.
+ let mut input = ParserInput::new("url('abc' more stuff)");
+ assert!(parse(&mut input).is_err());
+}
+
+#[test]
+fn nth() {
+ run_json_tests(include_str!("css-parsing-tests/An+B.json"), |input| {
+ input
+ .parse_entirely(|i| {
+ let result: Result<_, ParseError<()>> = parse_nth(i).map_err(Into::into);
+ result
+ })
+ .ok()
+ .map(|(v0, v1)| json!([v0, v1]))
+ .unwrap_or(Value::Null)
+ });
+}
+
+#[test]
+fn parse_comma_separated_ignoring_errors() {
+ let input = "red, green something, yellow, whatever, blue";
+ let mut input = ParserInput::new(input);
+ let mut input = Parser::new(&mut input);
+ let result = input.parse_comma_separated_ignoring_errors(|input| {
+ let loc = input.current_source_location();
+ let ident = input.expect_ident()?;
+ crate::color::parse_named_color(ident).map_err(|()| {
+ loc.new_unexpected_token_error::<ParseError<()>>(Token::Ident(ident.clone()))
+ })
+ });
+ assert_eq!(result.len(), 3);
+ assert_eq!(result[0], (255, 0, 0));
+ assert_eq!(result[1], (255, 255, 0));
+ assert_eq!(result[2], (0, 0, 255));
+}
+
+#[test]
+fn unicode_range() {
+ run_json_tests(include_str!("css-parsing-tests/urange.json"), |input| {
+ let result: Result<_, ParseError<()>> = input.parse_comma_separated(|input| {
+ let result = UnicodeRange::parse(input).ok().map(|r| (r.start, r.end));
+ if input.is_exhausted() {
+ Ok(result)
+ } else {
+ while let Ok(_) = input.next() {}
+ Ok(None)
+ }
+ });
+ result
+ .unwrap()
+ .iter()
+ .map(|v| {
+ if let Some((v0, v1)) = v {
+ json!([v0, v1])
+ } else {
+ Value::Null
+ }
+ })
+ .collect::<Vec<_>>()
+ .to_json()
+ });
+}
+
+#[test]
+fn serializer_not_preserving_comments() {
+ serializer(false)
+}
+
+#[test]
+fn serializer_preserving_comments() {
+ serializer(true)
+}
+
+fn serializer(preserve_comments: bool) {
+ run_json_tests(
+ include_str!("css-parsing-tests/component_value_list.json"),
+ |input| {
+ fn write_to(
+ mut previous_token: TokenSerializationType,
+ input: &mut Parser,
+ string: &mut String,
+ preserve_comments: bool,
+ ) {
+ while let Ok(token) = if preserve_comments {
+ input
+ .next_including_whitespace_and_comments()
+ .map(|t| t.clone())
+ } else {
+ input.next_including_whitespace().map(|t| t.clone())
+ } {
+ let token_type = token.serialization_type();
+ if !preserve_comments && previous_token.needs_separator_when_before(token_type)
+ {
+ string.push_str("/**/")
+ }
+ previous_token = token_type;
+ token.to_css(string).unwrap();
+ let closing_token = match token {
+ Token::Function(_) | Token::ParenthesisBlock => {
+ Some(Token::CloseParenthesis)
+ }
+ Token::SquareBracketBlock => Some(Token::CloseSquareBracket),
+ Token::CurlyBracketBlock => Some(Token::CloseCurlyBracket),
+ _ => None,
+ };
+ if let Some(closing_token) = closing_token {
+ let result: Result<_, ParseError<()>> = input.parse_nested_block(|input| {
+ write_to(previous_token, input, string, preserve_comments);
+ Ok(())
+ });
+ result.unwrap();
+ closing_token.to_css(string).unwrap();
+ }
+ }
+ }
+ let mut serialized = String::new();
+ write_to(
+ TokenSerializationType::Nothing,
+ input,
+ &mut serialized,
+ preserve_comments,
+ );
+ let mut input = ParserInput::new(&serialized);
+ let parser = &mut Parser::new(&mut input);
+ Value::Array(component_values_to_json(parser))
+ },
+ );
+}
+
+#[test]
+fn serialize_bad_tokens() {
+ let mut input = ParserInput::new("url(foo\\) b\\)ar)'ba\\'\"z\n4");
+ let mut parser = Parser::new(&mut input);
+
+ let token = parser.next().unwrap().clone();
+ assert!(matches!(token, Token::BadUrl(_)));
+ assert_eq!(token.to_css_string(), "url(foo\\) b\\)ar)");
+
+ let token = parser.next().unwrap().clone();
+ assert!(matches!(token, Token::BadString(_)));
+ assert_eq!(token.to_css_string(), "\"ba'\\\"z");
+
+ let token = parser.next().unwrap().clone();
+ assert!(matches!(token, Token::Number { .. }));
+ assert_eq!(token.to_css_string(), "4");
+
+ assert!(parser.next().is_err());
+}
+
+#[test]
+fn line_numbers() {
+ let mut input = ParserInput::new(concat!(
+ "fo\\30\r\n",
+ "0o bar/*\n",
+ "*/baz\r\n",
+ "\n",
+ "url(\r\n",
+ " u \r\n",
+ ")\"a\\\r\n",
+ "b\""
+ ));
+ let mut input = Parser::new(&mut input);
+ assert_eq!(
+ input.current_source_location(),
+ SourceLocation { line: 0, column: 1 }
+ );
+ assert_eq!(
+ input.next_including_whitespace(),
+ Ok(&Token::Ident("fo00o".into()))
+ );
+ assert_eq!(
+ input.current_source_location(),
+ SourceLocation { line: 1, column: 3 }
+ );
+ assert_eq!(
+ input.next_including_whitespace(),
+ Ok(&Token::WhiteSpace(" "))
+ );
+ assert_eq!(
+ input.current_source_location(),
+ SourceLocation { line: 1, column: 4 }
+ );
+ assert_eq!(
+ input.next_including_whitespace(),
+ Ok(&Token::Ident("bar".into()))
+ );
+ assert_eq!(
+ input.current_source_location(),
+ SourceLocation { line: 1, column: 7 }
+ );
+ assert_eq!(
+ input.next_including_whitespace_and_comments(),
+ Ok(&Token::Comment("\n"))
+ );
+ assert_eq!(
+ input.current_source_location(),
+ SourceLocation { line: 2, column: 3 }
+ );
+ assert_eq!(
+ input.next_including_whitespace(),
+ Ok(&Token::Ident("baz".into()))
+ );
+ assert_eq!(
+ input.current_source_location(),
+ SourceLocation { line: 2, column: 6 }
+ );
+ let state = input.state();
+
+ assert_eq!(
+ input.next_including_whitespace(),
+ Ok(&Token::WhiteSpace("\r\n\n"))
+ );
+ assert_eq!(
+ input.current_source_location(),
+ SourceLocation { line: 4, column: 1 }
+ );
+
+ assert_eq!(
+ state.source_location(),
+ SourceLocation { line: 2, column: 6 }
+ );
+
+ assert_eq!(
+ input.next_including_whitespace(),
+ Ok(&Token::UnquotedUrl("u".into()))
+ );
+ assert_eq!(
+ input.current_source_location(),
+ SourceLocation { line: 6, column: 2 }
+ );
+
+ assert_eq!(
+ input.next_including_whitespace(),
+ Ok(&Token::QuotedString("ab".into()))
+ );
+ assert_eq!(
+ input.current_source_location(),
+ SourceLocation { line: 7, column: 3 }
+ );
+ assert!(input.next_including_whitespace().is_err());
+}
+
+#[test]
+fn overflow() {
+ use std::iter::repeat;
+
+ let css = r"
+ 2147483646
+ 2147483647
+ 2147483648
+ 10000000000000
+ 1000000000000000000000000000000000000000
+ 1{309 zeros}
+
+ -2147483647
+ -2147483648
+ -2147483649
+ -10000000000000
+ -1000000000000000000000000000000000000000
+ -1{309 zeros}
+
+ 3.30282347e+38
+ 3.40282347e+38
+ 3.402824e+38
+
+ -3.30282347e+38
+ -3.40282347e+38
+ -3.402824e+38
+
+ "
+ .replace("{309 zeros}", &repeat('0').take(309).collect::<String>());
+ let mut input = ParserInput::new(&css);
+ let mut input = Parser::new(&mut input);
+
+ assert_eq!(input.expect_integer(), Ok(2147483646));
+ assert_eq!(input.expect_integer(), Ok(2147483647));
+ assert_eq!(input.expect_integer(), Ok(2147483647)); // Clamp on overflow
+ assert_eq!(input.expect_integer(), Ok(2147483647));
+ assert_eq!(input.expect_integer(), Ok(2147483647));
+ assert_eq!(input.expect_integer(), Ok(2147483647));
+
+ assert_eq!(input.expect_integer(), Ok(-2147483647));
+ assert_eq!(input.expect_integer(), Ok(-2147483648));
+ assert_eq!(input.expect_integer(), Ok(-2147483648)); // Clamp on overflow
+ assert_eq!(input.expect_integer(), Ok(-2147483648));
+ assert_eq!(input.expect_integer(), Ok(-2147483648));
+ assert_eq!(input.expect_integer(), Ok(-2147483648));
+
+ assert_eq!(input.expect_number(), Ok(3.30282347e+38));
+ assert_eq!(input.expect_number(), Ok(f32::MAX));
+ assert_eq!(input.expect_number(), Ok(f32::INFINITY));
+ assert!(f32::MAX != f32::INFINITY);
+
+ assert_eq!(input.expect_number(), Ok(-3.30282347e+38));
+ assert_eq!(input.expect_number(), Ok(f32::MIN));
+ assert_eq!(input.expect_number(), Ok(f32::NEG_INFINITY));
+ assert!(f32::MIN != f32::NEG_INFINITY);
+}
+
+#[test]
+fn line_delimited() {
+ let mut input = ParserInput::new(" { foo ; bar } baz;,");
+ let mut input = Parser::new(&mut input);
+ assert_eq!(input.next(), Ok(&Token::CurlyBracketBlock));
+ assert!({
+ let result: Result<_, ParseError<()>> =
+ input.parse_until_after(Delimiter::Semicolon, |_| Ok(42));
+ result
+ }
+ .is_err());
+ assert_eq!(input.next(), Ok(&Token::Comma));
+ assert!(input.next().is_err());
+}
+
+#[test]
+fn identifier_serialization() {
+ // Null bytes
+ assert_eq!(Token::Ident("\0".into()).to_css_string(), "\u{FFFD}");
+ assert_eq!(Token::Ident("a\0".into()).to_css_string(), "a\u{FFFD}");
+ assert_eq!(Token::Ident("\0b".into()).to_css_string(), "\u{FFFD}b");
+ assert_eq!(Token::Ident("a\0b".into()).to_css_string(), "a\u{FFFD}b");
+
+ // Replacement character
+ assert_eq!(Token::Ident("\u{FFFD}".into()).to_css_string(), "\u{FFFD}");
+ assert_eq!(
+ Token::Ident("a\u{FFFD}".into()).to_css_string(),
+ "a\u{FFFD}"
+ );
+ assert_eq!(
+ Token::Ident("\u{FFFD}b".into()).to_css_string(),
+ "\u{FFFD}b"
+ );
+ assert_eq!(
+ Token::Ident("a\u{FFFD}b".into()).to_css_string(),
+ "a\u{FFFD}b"
+ );
+
+ // Number prefix
+ assert_eq!(Token::Ident("0a".into()).to_css_string(), "\\30 a");
+ assert_eq!(Token::Ident("1a".into()).to_css_string(), "\\31 a");
+ assert_eq!(Token::Ident("2a".into()).to_css_string(), "\\32 a");
+ assert_eq!(Token::Ident("3a".into()).to_css_string(), "\\33 a");
+ assert_eq!(Token::Ident("4a".into()).to_css_string(), "\\34 a");
+ assert_eq!(Token::Ident("5a".into()).to_css_string(), "\\35 a");
+ assert_eq!(Token::Ident("6a".into()).to_css_string(), "\\36 a");
+ assert_eq!(Token::Ident("7a".into()).to_css_string(), "\\37 a");
+ assert_eq!(Token::Ident("8a".into()).to_css_string(), "\\38 a");
+ assert_eq!(Token::Ident("9a".into()).to_css_string(), "\\39 a");
+
+ // Letter number prefix
+ assert_eq!(Token::Ident("a0b".into()).to_css_string(), "a0b");
+ assert_eq!(Token::Ident("a1b".into()).to_css_string(), "a1b");
+ assert_eq!(Token::Ident("a2b".into()).to_css_string(), "a2b");
+ assert_eq!(Token::Ident("a3b".into()).to_css_string(), "a3b");
+ assert_eq!(Token::Ident("a4b".into()).to_css_string(), "a4b");
+ assert_eq!(Token::Ident("a5b".into()).to_css_string(), "a5b");
+ assert_eq!(Token::Ident("a6b".into()).to_css_string(), "a6b");
+ assert_eq!(Token::Ident("a7b".into()).to_css_string(), "a7b");
+ assert_eq!(Token::Ident("a8b".into()).to_css_string(), "a8b");
+ assert_eq!(Token::Ident("a9b".into()).to_css_string(), "a9b");
+
+ // Dash number prefix
+ assert_eq!(Token::Ident("-0a".into()).to_css_string(), "-\\30 a");
+ assert_eq!(Token::Ident("-1a".into()).to_css_string(), "-\\31 a");
+ assert_eq!(Token::Ident("-2a".into()).to_css_string(), "-\\32 a");
+ assert_eq!(Token::Ident("-3a".into()).to_css_string(), "-\\33 a");
+ assert_eq!(Token::Ident("-4a".into()).to_css_string(), "-\\34 a");
+ assert_eq!(Token::Ident("-5a".into()).to_css_string(), "-\\35 a");
+ assert_eq!(Token::Ident("-6a".into()).to_css_string(), "-\\36 a");
+ assert_eq!(Token::Ident("-7a".into()).to_css_string(), "-\\37 a");
+ assert_eq!(Token::Ident("-8a".into()).to_css_string(), "-\\38 a");
+ assert_eq!(Token::Ident("-9a".into()).to_css_string(), "-\\39 a");
+
+ // Double dash prefix
+ assert_eq!(Token::Ident("--a".into()).to_css_string(), "--a");
+
+ // Various tests
+ assert_eq!(
+ Token::Ident("\x01\x02\x1E\x1F".into()).to_css_string(),
+ "\\1 \\2 \\1e \\1f "
+ );
+ assert_eq!(
+ Token::Ident("\u{0080}\x2D\x5F\u{00A9}".into()).to_css_string(),
+ "\u{0080}\x2D\x5F\u{00A9}"
+ );
+ assert_eq!(Token::Ident("\x7F\u{0080}\u{0081}\u{0082}\u{0083}\u{0084}\u{0085}\u{0086}\u{0087}\u{0088}\u{0089}\
+ \u{008A}\u{008B}\u{008C}\u{008D}\u{008E}\u{008F}\u{0090}\u{0091}\u{0092}\u{0093}\u{0094}\u{0095}\u{0096}\
+ \u{0097}\u{0098}\u{0099}\u{009A}\u{009B}\u{009C}\u{009D}\u{009E}\u{009F}".into()).to_css_string(),
+ "\\7f \u{0080}\u{0081}\u{0082}\u{0083}\u{0084}\u{0085}\u{0086}\u{0087}\u{0088}\u{0089}\u{008A}\u{008B}\u{008C}\
+ \u{008D}\u{008E}\u{008F}\u{0090}\u{0091}\u{0092}\u{0093}\u{0094}\u{0095}\u{0096}\u{0097}\u{0098}\u{0099}\
+ \u{009A}\u{009B}\u{009C}\u{009D}\u{009E}\u{009F}");
+ assert_eq!(
+ Token::Ident("\u{00A0}\u{00A1}\u{00A2}".into()).to_css_string(),
+ "\u{00A0}\u{00A1}\u{00A2}"
+ );
+ assert_eq!(
+ Token::Ident("a0123456789b".into()).to_css_string(),
+ "a0123456789b"
+ );
+ assert_eq!(
+ Token::Ident("abcdefghijklmnopqrstuvwxyz".into()).to_css_string(),
+ "abcdefghijklmnopqrstuvwxyz"
+ );
+ assert_eq!(
+ Token::Ident("ABCDEFGHIJKLMNOPQRSTUVWXYZ".into()).to_css_string(),
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ );
+ assert_eq!(
+ Token::Ident("\x20\x21\x78\x79".into()).to_css_string(),
+ "\\ \\!xy"
+ );
+
+ // astral symbol (U+1D306 TETRAGRAM FOR CENTRE)
+ assert_eq!(
+ Token::Ident("\u{1D306}".into()).to_css_string(),
+ "\u{1D306}"
+ );
+}
+
+trait ToJson {
+ fn to_json(&self) -> Value;
+}
+
+impl<T> ToJson for T
+where
+ T: Clone,
+ Value: From<T>,
+{
+ fn to_json(&self) -> Value {
+ Value::from(self.clone())
+ }
+}
+
+impl<'a> ToJson for CowRcStr<'a> {
+ fn to_json(&self) -> Value {
+ let s: &str = &*self;
+ s.to_json()
+ }
+}
+
+#[bench]
+#[cfg(feature = "bench")]
+fn delimiter_from_byte(b: &mut Bencher) {
+ use crate::Delimiters;
+ b.iter(|| {
+ for _ in 0..1000 {
+ for i in 0..256 {
+ std::hint::black_box(Delimiters::from_byte(Some(i as u8)));
+ }
+ }
+ })
+}
+
+#[cfg(feature = "bench")]
+const BACKGROUND_IMAGE: &'static str = include_str!("big-data-url.css");
+
+#[cfg(feature = "bench")]
+#[bench]
+fn unquoted_url(b: &mut Bencher) {
+ b.iter(|| {
+ let mut input = ParserInput::new(BACKGROUND_IMAGE);
+ let mut input = Parser::new(&mut input);
+ input.look_for_var_or_env_functions();
+
+ let result = input.try_parse(|input| input.expect_url());
+
+ assert!(result.is_ok());
+
+ input.seen_var_or_env_functions();
+ (result.is_ok(), input.seen_var_or_env_functions())
+ })
+}
+
+#[cfg_attr(all(miri, feature = "skip_long_tests"), ignore)]
+#[cfg(feature = "bench")]
+#[bench]
+fn numeric(b: &mut Bencher) {
+ b.iter(|| {
+ for _ in 0..1000000 {
+ let mut input = ParserInput::new("10px");
+ let mut input = Parser::new(&mut input);
+ let _ = test::black_box(input.next());
+ }
+ })
+}
+
+struct JsonParser;
+
+#[cfg_attr(all(miri, feature = "skip_long_tests"), ignore)]
+#[test]
+fn no_stack_overflow_multiple_nested_blocks() {
+ let mut input: String = "{{".into();
+ for _ in 0..20 {
+ let dup = input.clone();
+ input.push_str(&dup);
+ }
+ let mut input = ParserInput::new(&input);
+ let mut input = Parser::new(&mut input);
+ while let Ok(..) = input.next() {}
+}
+
+impl<'i> DeclarationParser<'i> for JsonParser {
+ type Declaration = Value;
+ type Error = ();
+
+ fn parse_value<'t>(
+ &mut self,
+ name: CowRcStr<'i>,
+ input: &mut Parser<'i, 't>,
+ ) -> Result<Value, ParseError<'i, ()>> {
+ let mut value = vec![];
+ let mut important = false;
+ loop {
+ let start = input.state();
+ if let Ok(mut token) = input.next_including_whitespace().map(|t| t.clone()) {
+ // Hack to deal with css-parsing-tests assuming that
+ // `!important` in the middle of a declaration value is OK.
+ // This can never happen per spec
+ // (even CSS Variables forbid top-level `!`)
+ if token == Token::Delim('!') {
+ input.reset(&start);
+ if parse_important(input).is_ok() {
+ if input.is_exhausted() {
+ important = true;
+ break;
+ }
+ }
+ input.reset(&start);
+ token = input.next_including_whitespace().unwrap().clone();
+ }
+ value.push(one_component_value_to_json(token, input));
+ } else {
+ break;
+ }
+ }
+ Ok(JArray!["declaration", name, value, important,])
+ }
+}
+
+impl<'i> AtRuleParser<'i> for JsonParser {
+ type Prelude = Vec<Value>;
+ type AtRule = Value;
+ type Error = ();
+
+ fn parse_prelude<'t>(
+ &mut self,
+ name: CowRcStr<'i>,
+ input: &mut Parser<'i, 't>,
+ ) -> Result<Vec<Value>, ParseError<'i, ()>> {
+ let prelude = vec![
+ "at-rule".to_json(),
+ name.to_json(),
+ Value::Array(component_values_to_json(input)),
+ ];
+ match_ignore_ascii_case! { &*name,
+ "charset" => {
+ Err(input.new_error(BasicParseErrorKind::AtRuleInvalid(name.clone()).into()))
+ },
+ _ => Ok(prelude),
+ }
+ }
+
+ fn rule_without_block(
+ &mut self,
+ mut prelude: Vec<Value>,
+ _: &ParserState,
+ ) -> Result<Value, ()> {
+ prelude.push(Value::Null);
+ Ok(Value::Array(prelude))
+ }
+
+ fn parse_block<'t>(
+ &mut self,
+ mut prelude: Vec<Value>,
+ _: &ParserState,
+ input: &mut Parser<'i, 't>,
+ ) -> Result<Value, ParseError<'i, ()>> {
+ prelude.push(Value::Array(component_values_to_json(input)));
+ Ok(Value::Array(prelude))
+ }
+}
+
+impl<'i> QualifiedRuleParser<'i> for JsonParser {
+ type Prelude = Vec<Value>;
+ type QualifiedRule = Value;
+ type Error = ();
+
+ fn parse_prelude<'t>(
+ &mut self,
+ input: &mut Parser<'i, 't>,
+ ) -> Result<Vec<Value>, ParseError<'i, ()>> {
+ Ok(component_values_to_json(input))
+ }
+
+ fn parse_block<'t>(
+ &mut self,
+ prelude: Vec<Value>,
+ _: &ParserState,
+ input: &mut Parser<'i, 't>,
+ ) -> Result<Value, ParseError<'i, ()>> {
+ Ok(JArray![
+ "qualified rule",
+ prelude,
+ component_values_to_json(input),
+ ])
+ }
+}
+
+impl<'i> RuleBodyItemParser<'i, Value, ()> for JsonParser {
+ fn parse_qualified(&self) -> bool {
+ true
+ }
+ fn parse_declarations(&self) -> bool {
+ true
+ }
+}
+
+fn component_values_to_json(input: &mut Parser) -> Vec<Value> {
+ let mut values = vec![];
+ while let Ok(token) = input.next_including_whitespace().map(|t| t.clone()) {
+ values.push(one_component_value_to_json(token, input));
+ }
+ values
+}
+
+fn one_component_value_to_json(token: Token, input: &mut Parser) -> Value {
+ fn numeric(value: f32, int_value: Option<i32>, has_sign: bool) -> Vec<Value> {
+ vec![
+ Token::Number {
+ value: value,
+ int_value: int_value,
+ has_sign: has_sign,
+ }
+ .to_css_string()
+ .to_json(),
+ match int_value {
+ Some(i) => i.to_json(),
+ None => value.to_json(),
+ },
+ match int_value {
+ Some(_) => "integer",
+ None => "number",
+ }
+ .to_json(),
+ ]
+ }
+
+ fn nested(input: &mut Parser) -> Vec<Value> {
+ let result: Result<_, ParseError<()>> =
+ input.parse_nested_block(|input| Ok(component_values_to_json(input)));
+ result.unwrap()
+ }
+
+ match token {
+ Token::Ident(value) => JArray!["ident", value],
+ Token::AtKeyword(value) => JArray!["at-keyword", value],
+ Token::Hash(value) => JArray!["hash", value, "unrestricted"],
+ Token::IDHash(value) => JArray!["hash", value, "id"],
+ Token::QuotedString(value) => JArray!["string", value],
+ Token::UnquotedUrl(value) => JArray!["url", value],
+ Token::Delim('\\') => "\\".to_json(),
+ Token::Delim(value) => value.to_string().to_json(),
+
+ Token::Number {
+ value,
+ int_value,
+ has_sign,
+ } => Value::Array({
+ let mut v = vec!["number".to_json()];
+ v.extend(numeric(value, int_value, has_sign));
+ v
+ }),
+ Token::Percentage {
+ unit_value,
+ int_value,
+ has_sign,
+ } => Value::Array({
+ let mut v = vec!["percentage".to_json()];
+ v.extend(numeric(unit_value * 100., int_value, has_sign));
+ v
+ }),
+ Token::Dimension {
+ value,
+ int_value,
+ has_sign,
+ unit,
+ } => Value::Array({
+ let mut v = vec!["dimension".to_json()];
+ v.extend(numeric(value, int_value, has_sign));
+ v.push(unit.to_json());
+ v
+ }),
+
+ Token::WhiteSpace(_) => " ".to_json(),
+ Token::Comment(_) => "/**/".to_json(),
+ Token::Colon => ":".to_json(),
+ Token::Semicolon => ";".to_json(),
+ Token::Comma => ",".to_json(),
+ Token::IncludeMatch => "~=".to_json(),
+ Token::DashMatch => "|=".to_json(),
+ Token::PrefixMatch => "^=".to_json(),
+ Token::SuffixMatch => "$=".to_json(),
+ Token::SubstringMatch => "*=".to_json(),
+ Token::CDO => "<!--".to_json(),
+ Token::CDC => "-->".to_json(),
+
+ Token::Function(name) => Value::Array({
+ let mut v = vec!["function".to_json(), name.to_json()];
+ v.extend(nested(input));
+ v
+ }),
+ Token::ParenthesisBlock => Value::Array({
+ let mut v = vec!["()".to_json()];
+ v.extend(nested(input));
+ v
+ }),
+ Token::SquareBracketBlock => Value::Array({
+ let mut v = vec!["[]".to_json()];
+ v.extend(nested(input));
+ v
+ }),
+ Token::CurlyBracketBlock => Value::Array({
+ let mut v = vec!["{}".to_json()];
+ v.extend(nested(input));
+ v
+ }),
+ Token::BadUrl(_) => JArray!["error", "bad-url"],
+ Token::BadString(_) => JArray!["error", "bad-string"],
+ Token::CloseParenthesis => JArray!["error", ")"],
+ Token::CloseSquareBracket => JArray!["error", "]"],
+ Token::CloseCurlyBracket => JArray!["error", "}"],
+ }
+}
+
+/// A previous version of procedural-masquerade had a bug where it
+/// would normalize consecutive whitespace to a single space,
+/// including in string literals.
+#[test]
+fn procedural_masquerade_whitespace() {
+ ascii_case_insensitive_phf_map! {
+ map -> () = {
+ " \t\n" => ()
+ }
+ }
+ assert_eq!(map::get(" \t\n"), Some(&()));
+ assert_eq!(map::get(" "), None);
+
+ match_ignore_ascii_case! { " \t\n",
+ " " => panic!("1"),
+ " \t\n" => {},
+ _ => panic!("2"),
+ }
+
+ match_ignore_ascii_case! { " ",
+ " \t\n" => panic!("3"),
+ " " => {},
+ _ => panic!("4"),
+ }
+}
+
+#[test]
+fn parse_until_before_stops_at_delimiter_or_end_of_input() {
+ // For all j and k, inputs[i].1[j] should parse the same as inputs[i].1[k]
+ // when we use delimiters inputs[i].0.
+ let inputs = vec![
+ (
+ Delimiter::Bang | Delimiter::Semicolon,
+ // Note that the ';extra' is fine, because the ';' acts the same as
+ // the end of input.
+ vec!["token stream;extra", "token stream!", "token stream"],
+ ),
+ (Delimiter::Bang | Delimiter::Semicolon, vec![";", "!", ""]),
+ ];
+ for equivalent in inputs {
+ for (j, x) in equivalent.1.iter().enumerate() {
+ for y in equivalent.1[j + 1..].iter() {
+ let mut ix = ParserInput::new(x);
+ let mut ix = Parser::new(&mut ix);
+
+ let mut iy = ParserInput::new(y);
+ let mut iy = Parser::new(&mut iy);
+
+ let _ = ix.parse_until_before::<_, _, ()>(equivalent.0, |ix| {
+ iy.parse_until_before::<_, _, ()>(equivalent.0, |iy| {
+ loop {
+ let ox = ix.next();
+ let oy = iy.next();
+ assert_eq!(ox, oy);
+ if let Err(_) = ox {
+ break;
+ }
+ }
+ Ok(())
+ })
+ });
+ }
+ }
+ }
+}
+
+#[test]
+fn parser_maintains_current_line() {
+ let mut input = ParserInput::new("ident ident;\nident ident ident;\nident");
+ let mut parser = Parser::new(&mut input);
+ assert_eq!(parser.current_line(), "ident ident;");
+ assert_eq!(parser.next(), Ok(&Token::Ident("ident".into())));
+ assert_eq!(parser.next(), Ok(&Token::Ident("ident".into())));
+ assert_eq!(parser.next(), Ok(&Token::Semicolon));
+
+ assert_eq!(parser.next(), Ok(&Token::Ident("ident".into())));
+ assert_eq!(parser.current_line(), "ident ident ident;");
+ assert_eq!(parser.next(), Ok(&Token::Ident("ident".into())));
+ assert_eq!(parser.next(), Ok(&Token::Ident("ident".into())));
+ assert_eq!(parser.next(), Ok(&Token::Semicolon));
+
+ assert_eq!(parser.next(), Ok(&Token::Ident("ident".into())));
+ assert_eq!(parser.current_line(), "ident");
+}
+
+#[test]
+fn cdc_regression_test() {
+ let mut input = ParserInput::new("-->x");
+ let mut parser = Parser::new(&mut input);
+ parser.skip_cdc_and_cdo();
+ assert_eq!(parser.next(), Ok(&Token::Ident("x".into())));
+ assert_eq!(
+ parser.next(),
+ Err(BasicParseError {
+ kind: BasicParseErrorKind::EndOfInput,
+ location: SourceLocation { line: 0, column: 5 }
+ })
+ );
+}
+
+#[test]
+fn parse_entirely_reports_first_error() {
+ #[derive(PartialEq, Debug)]
+ enum E {
+ Foo,
+ }
+ let mut input = ParserInput::new("ident");
+ let mut parser = Parser::new(&mut input);
+ let result: Result<(), _> = parser.parse_entirely(|p| Err(p.new_custom_error(E::Foo)));
+ assert_eq!(
+ result,
+ Err(ParseError {
+ kind: ParseErrorKind::Custom(E::Foo),
+ location: SourceLocation { line: 0, column: 1 },
+ })
+ );
+}
+
+#[test]
+fn parse_sourcemapping_comments() {
+ let tests = vec![
+ ("/*# sourceMappingURL=here*/", Some("here")),
+ ("/*# sourceMappingURL=here */", Some("here")),
+ ("/*@ sourceMappingURL=here*/", Some("here")),
+ (
+ "/*@ sourceMappingURL=there*/ /*# sourceMappingURL=here*/",
+ Some("here"),
+ ),
+ ("/*# sourceMappingURL=here there */", Some("here")),
+ ("/*# sourceMappingURL= here */", Some("")),
+ ("/*# sourceMappingURL=*/", Some("")),
+ ("/*# sourceMappingUR=here */", None),
+ ("/*! sourceMappingURL=here */", None),
+ ("/*# sourceMappingURL = here */", None),
+ ("/* # sourceMappingURL=here */", None),
+ ];
+
+ for test in tests {
+ let mut input = ParserInput::new(test.0);
+ let mut parser = Parser::new(&mut input);
+ while let Ok(_) = parser.next_including_whitespace() {}
+ assert_eq!(parser.current_source_map_url(), test.1);
+ }
+}
+
+#[test]
+fn parse_sourceurl_comments() {
+ let tests = vec![
+ ("/*# sourceURL=here*/", Some("here")),
+ ("/*# sourceURL=here */", Some("here")),
+ ("/*@ sourceURL=here*/", Some("here")),
+ ("/*@ sourceURL=there*/ /*# sourceURL=here*/", Some("here")),
+ ("/*# sourceURL=here there */", Some("here")),
+ ("/*# sourceURL= here */", Some("")),
+ ("/*# sourceURL=*/", Some("")),
+ ("/*# sourceMappingUR=here */", None),
+ ("/*! sourceURL=here */", None),
+ ("/*# sourceURL = here */", None),
+ ("/* # sourceURL=here */", None),
+ ];
+
+ for test in tests {
+ let mut input = ParserInput::new(test.0);
+ let mut parser = Parser::new(&mut input);
+ while let Ok(_) = parser.next_including_whitespace() {}
+ assert_eq!(parser.current_source_url(), test.1);
+ }
+}
+
+#[cfg_attr(all(miri, feature = "skip_long_tests"), ignore)]
+#[test]
+fn roundtrip_percentage_token() {
+ fn test_roundtrip(value: &str) {
+ let mut input = ParserInput::new(value);
+ let mut parser = Parser::new(&mut input);
+ let token = parser.next().unwrap();
+ assert_eq!(token.to_css_string(), value);
+ }
+ // Test simple number serialization
+ for i in 0..101 {
+ test_roundtrip(&format!("{}%", i));
+ for j in 0..10 {
+ if j != 0 {
+ test_roundtrip(&format!("{}.{}%", i, j));
+ }
+ for k in 1..10 {
+ test_roundtrip(&format!("{}.{}{}%", i, j, k));
+ }
+ }
+ }
+}
+
+#[test]
+fn utf16_columns() {
+ // This particular test serves two purposes. First, it checks
+ // that the column number computations are correct. Second, it
+ // checks that tokenizer code paths correctly differentiate
+ // between the different UTF-8 encoding bytes. In particular
+ // different leader bytes and continuation bytes are treated
+ // differently, so we make sure to include all lengths in the
+ // tests, using the string "QΡ✈🆒". Also, remember that because
+ // the column is in units of UTF-16, the 4-byte sequence results
+ // in two columns.
+ let tests = vec![
+ ("", 1),
+ ("ascii", 6),
+ ("/*QΡ✈🆒*/", 10),
+ ("'QΡ✈🆒*'", 9),
+ ("\"\\\"'QΡ✈🆒*'", 12),
+ ("\\Q\\Ρ\\✈\\🆒", 10),
+ ("QΡ✈🆒", 6),
+ ("QΡ✈🆒\\Q\\Ρ\\✈\\🆒", 15),
+ ("newline\r\nQΡ✈🆒", 6),
+ ("url(QΡ✈🆒\\Q\\Ρ\\✈\\🆒)", 20),
+ ("url(QΡ✈🆒)", 11),
+ ("url(\r\nQΡ✈🆒\\Q\\Ρ\\✈\\🆒)", 16),
+ ("url(\r\nQΡ✈🆒\\Q\\Ρ\\✈\\🆒", 15),
+ ("url(\r\nQΡ✈🆒\\Q\\Ρ\\✈\\🆒 x", 17),
+ ("QΡ✈🆒()", 8),
+ // Test that under/over-flow of current_line_start_position is
+ // handled properly; see the special case in consume_4byte_intro.
+ ("🆒", 3),
+ ];
+
+ for test in tests {
+ let mut input = ParserInput::new(test.0);
+ let mut parser = Parser::new(&mut input);
+
+ // Read all tokens.
+ loop {
+ match parser.next() {
+ Err(BasicParseError {
+ kind: BasicParseErrorKind::EndOfInput,
+ ..
+ }) => {
+ break;
+ }
+ Err(_) => {
+ assert!(false);
+ }
+ Ok(_) => {}
+ };
+ }
+
+ // Check the resulting column.
+ assert_eq!(parser.current_source_location().column, test.1);
+ }
+}
+
+#[test]
+fn servo_define_css_keyword_enum() {
+ macro_rules! define_css_keyword_enum {
+ (pub enum $name:ident { $($variant:ident = $css:pat,)+ }) => {
+ #[derive(PartialEq, Debug)]
+ pub enum $name {
+ $($variant),+
+ }
+
+ impl $name {
+ pub fn from_ident(ident: &str) -> Result<$name, ()> {
+ match_ignore_ascii_case! { ident,
+ $($css => Ok($name::$variant),)+
+ _ => Err(())
+ }
+ }
+ }
+ }
+ }
+ define_css_keyword_enum! {
+ pub enum UserZoom {
+ Zoom = "zoom",
+ Fixed = "fixed",
+ }
+ }
+
+ assert_eq!(UserZoom::from_ident("fixed"), Ok(UserZoom::Fixed));
+}
diff --git a/third_party/rust/cssparser/src/tokenizer.rs b/third_party/rust/cssparser/src/tokenizer.rs
new file mode 100644
index 0000000000..a3b700632d
--- /dev/null
+++ b/third_party/rust/cssparser/src/tokenizer.rs
@@ -0,0 +1,1403 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// https://drafts.csswg.org/css-syntax/#tokenization
+
+use self::Token::*;
+use crate::cow_rc_str::CowRcStr;
+use crate::parser::ParserState;
+use std::char;
+use std::ops::Range;
+
+#[cfg(not(feature = "dummy_match_byte"))]
+use cssparser_macros::match_byte;
+
+#[cfg(feature = "dummy_match_byte")]
+macro_rules! match_byte {
+ ($value:expr, $($rest:tt)* ) => {
+ match $value {
+ $(
+ $rest
+ )+
+ }
+ };
+}
+
+/// One of the pieces the CSS input is broken into.
+///
+/// Some components use `Cow` in order to borrow from the original input string
+/// and avoid allocating/copying when possible.
+#[derive(PartialEq, Debug, Clone)]
+pub enum Token<'a> {
+ /// A [`<ident-token>`](https://drafts.csswg.org/css-syntax/#ident-token-diagram)
+ Ident(CowRcStr<'a>),
+
+ /// A [`<at-keyword-token>`](https://drafts.csswg.org/css-syntax/#at-keyword-token-diagram)
+ ///
+ /// The value does not include the `@` marker.
+ AtKeyword(CowRcStr<'a>),
+
+ /// A [`<hash-token>`](https://drafts.csswg.org/css-syntax/#hash-token-diagram) with the type flag set to "unrestricted"
+ ///
+ /// The value does not include the `#` marker.
+ Hash(CowRcStr<'a>),
+
+ /// A [`<hash-token>`](https://drafts.csswg.org/css-syntax/#hash-token-diagram) with the type flag set to "id"
+ ///
+ /// The value does not include the `#` marker.
+ IDHash(CowRcStr<'a>), // Hash that is a valid ID selector.
+
+ /// A [`<string-token>`](https://drafts.csswg.org/css-syntax/#string-token-diagram)
+ ///
+ /// The value does not include the quotes.
+ QuotedString(CowRcStr<'a>),
+
+ /// A [`<url-token>`](https://drafts.csswg.org/css-syntax/#url-token-diagram)
+ ///
+ /// The value does not include the `url(` `)` markers. Note that `url( <string-token> )` is represented by a
+ /// `Function` token.
+ UnquotedUrl(CowRcStr<'a>),
+
+ /// A `<delim-token>`
+ Delim(char),
+
+ /// A [`<number-token>`](https://drafts.csswg.org/css-syntax/#number-token-diagram)
+ Number {
+ /// Whether the number had a `+` or `-` sign.
+ ///
+ /// This is used is some cases like the <An+B> micro syntax. (See the `parse_nth` function.)
+ has_sign: bool,
+
+ /// The value as a float
+ value: f32,
+
+ /// If the origin source did not include a fractional part, the value as an integer.
+ int_value: Option<i32>,
+ },
+
+ /// A [`<percentage-token>`](https://drafts.csswg.org/css-syntax/#percentage-token-diagram)
+ Percentage {
+ /// Whether the number had a `+` or `-` sign.
+ has_sign: bool,
+
+ /// The value as a float, divided by 100 so that the nominal range is 0.0 to 1.0.
+ unit_value: f32,
+
+ /// If the origin source did not include a fractional part, the value as an integer.
+ /// It is **not** divided by 100.
+ int_value: Option<i32>,
+ },
+
+ /// A [`<dimension-token>`](https://drafts.csswg.org/css-syntax/#dimension-token-diagram)
+ Dimension {
+ /// Whether the number had a `+` or `-` sign.
+ ///
+ /// This is used is some cases like the <An+B> micro syntax. (See the `parse_nth` function.)
+ has_sign: bool,
+
+ /// The value as a float
+ value: f32,
+
+ /// If the origin source did not include a fractional part, the value as an integer.
+ int_value: Option<i32>,
+
+ /// The unit, e.g. "px" in `12px`
+ unit: CowRcStr<'a>,
+ },
+
+ /// A [`<whitespace-token>`](https://drafts.csswg.org/css-syntax/#whitespace-token-diagram)
+ WhiteSpace(&'a str),
+
+ /// A comment.
+ ///
+ /// The CSS Syntax spec does not generate tokens for comments,
+ /// But we do, because we can (borrowed &str makes it cheap).
+ ///
+ /// The value does not include the `/*` `*/` markers.
+ Comment(&'a str),
+
+ /// A `:` `<colon-token>`
+ Colon, // :
+
+ /// A `;` `<semicolon-token>`
+ Semicolon, // ;
+
+ /// A `,` `<comma-token>`
+ Comma, // ,
+
+ /// A `~=` [`<include-match-token>`](https://drafts.csswg.org/css-syntax/#include-match-token-diagram)
+ IncludeMatch,
+
+ /// A `|=` [`<dash-match-token>`](https://drafts.csswg.org/css-syntax/#dash-match-token-diagram)
+ DashMatch,
+
+ /// A `^=` [`<prefix-match-token>`](https://drafts.csswg.org/css-syntax/#prefix-match-token-diagram)
+ PrefixMatch,
+
+ /// A `$=` [`<suffix-match-token>`](https://drafts.csswg.org/css-syntax/#suffix-match-token-diagram)
+ SuffixMatch,
+
+ /// A `*=` [`<substring-match-token>`](https://drafts.csswg.org/css-syntax/#substring-match-token-diagram)
+ SubstringMatch,
+
+ /// A `<!--` [`<CDO-token>`](https://drafts.csswg.org/css-syntax/#CDO-token-diagram)
+ CDO,
+
+ /// A `-->` [`<CDC-token>`](https://drafts.csswg.org/css-syntax/#CDC-token-diagram)
+ CDC,
+
+ /// A [`<function-token>`](https://drafts.csswg.org/css-syntax/#function-token-diagram)
+ ///
+ /// The value (name) does not include the `(` marker.
+ Function(CowRcStr<'a>),
+
+ /// A `<(-token>`
+ ParenthesisBlock,
+
+ /// A `<[-token>`
+ SquareBracketBlock,
+
+ /// A `<{-token>`
+ CurlyBracketBlock,
+
+ /// A `<bad-url-token>`
+ ///
+ /// This token always indicates a parse error.
+ BadUrl(CowRcStr<'a>),
+
+ /// A `<bad-string-token>`
+ ///
+ /// This token always indicates a parse error.
+ BadString(CowRcStr<'a>),
+
+ /// A `<)-token>`
+ ///
+ /// When obtained from one of the `Parser::next*` methods,
+ /// this token is always unmatched and indicates a parse error.
+ CloseParenthesis,
+
+ /// A `<]-token>`
+ ///
+ /// When obtained from one of the `Parser::next*` methods,
+ /// this token is always unmatched and indicates a parse error.
+ CloseSquareBracket,
+
+ /// A `<}-token>`
+ ///
+ /// When obtained from one of the `Parser::next*` methods,
+ /// this token is always unmatched and indicates a parse error.
+ CloseCurlyBracket,
+}
+
+impl<'a> Token<'a> {
+ /// Return whether this token represents a parse error.
+ ///
+ /// `BadUrl` and `BadString` are tokenizer-level parse errors.
+ ///
+ /// `CloseParenthesis`, `CloseSquareBracket`, and `CloseCurlyBracket` are *unmatched*
+ /// and therefore parse errors when returned by one of the `Parser::next*` methods.
+ pub fn is_parse_error(&self) -> bool {
+ matches!(
+ *self,
+ BadUrl(_) | BadString(_) | CloseParenthesis | CloseSquareBracket | CloseCurlyBracket
+ )
+ }
+}
+
+#[derive(Clone)]
+pub struct Tokenizer<'a> {
+ input: &'a str,
+ /// Counted in bytes, not code points. From 0.
+ position: usize,
+ /// The position at the start of the current line; but adjusted to
+ /// ensure that computing the column will give the result in units
+ /// of UTF-16 characters.
+ current_line_start_position: usize,
+ current_line_number: u32,
+ var_or_env_functions: SeenStatus,
+ source_map_url: Option<&'a str>,
+ source_url: Option<&'a str>,
+}
+
+#[derive(Copy, Clone, PartialEq, Eq)]
+enum SeenStatus {
+ DontCare,
+ LookingForThem,
+ SeenAtLeastOne,
+}
+
+impl<'a> Tokenizer<'a> {
+ #[inline]
+ pub fn new(input: &str) -> Tokenizer {
+ Tokenizer {
+ input,
+ position: 0,
+ current_line_start_position: 0,
+ current_line_number: 0,
+ var_or_env_functions: SeenStatus::DontCare,
+ source_map_url: None,
+ source_url: None,
+ }
+ }
+
+ #[inline]
+ pub fn look_for_var_or_env_functions(&mut self) {
+ self.var_or_env_functions = SeenStatus::LookingForThem;
+ }
+
+ #[inline]
+ pub fn seen_var_or_env_functions(&mut self) -> bool {
+ let seen = self.var_or_env_functions == SeenStatus::SeenAtLeastOne;
+ self.var_or_env_functions = SeenStatus::DontCare;
+ seen
+ }
+
+ #[inline]
+ pub fn see_function(&mut self, name: &str) {
+ if self.var_or_env_functions == SeenStatus::LookingForThem {
+ if name.eq_ignore_ascii_case("var") || name.eq_ignore_ascii_case("env") {
+ self.var_or_env_functions = SeenStatus::SeenAtLeastOne;
+ }
+ }
+ }
+
+ #[inline]
+ pub fn next(&mut self) -> Result<Token<'a>, ()> {
+ next_token(self)
+ }
+
+ #[inline]
+ pub fn position(&self) -> SourcePosition {
+ debug_assert!(self.input.is_char_boundary(self.position));
+ SourcePosition(self.position)
+ }
+
+ #[inline]
+ pub fn current_source_location(&self) -> SourceLocation {
+ SourceLocation {
+ line: self.current_line_number,
+ column: (self.position - self.current_line_start_position + 1) as u32,
+ }
+ }
+
+ #[inline]
+ pub fn current_source_map_url(&self) -> Option<&'a str> {
+ self.source_map_url
+ }
+
+ #[inline]
+ pub fn current_source_url(&self) -> Option<&'a str> {
+ self.source_url
+ }
+
+ #[inline]
+ pub fn state(&self) -> ParserState {
+ ParserState {
+ position: self.position,
+ current_line_start_position: self.current_line_start_position,
+ current_line_number: self.current_line_number,
+ at_start_of: None,
+ }
+ }
+
+ #[inline]
+ pub fn reset(&mut self, state: &ParserState) {
+ self.position = state.position;
+ self.current_line_start_position = state.current_line_start_position;
+ self.current_line_number = state.current_line_number;
+ }
+
+ #[inline]
+ pub(crate) fn slice_from(&self, start_pos: SourcePosition) -> &'a str {
+ self.slice(start_pos..self.position())
+ }
+
+ #[inline]
+ pub(crate) fn slice(&self, range: Range<SourcePosition>) -> &'a str {
+ debug_assert!(self.input.is_char_boundary(range.start.0));
+ debug_assert!(self.input.is_char_boundary(range.end.0));
+ unsafe { self.input.get_unchecked(range.start.0..range.end.0) }
+ }
+
+ pub fn current_source_line(&self) -> &'a str {
+ let current = self.position();
+ let start = self.slice(SourcePosition(0)..current)
+ .rfind(|c| matches!(c, '\r' | '\n' | '\x0C'))
+ .map_or(0, |start| start + 1);
+ let end = self.slice(current..SourcePosition(self.input.len()))
+ .find(|c| matches!(c, '\r' | '\n' | '\x0C'))
+ .map_or(self.input.len(), |end| current.0 + end);
+ self.slice(SourcePosition(start)..SourcePosition(end))
+ }
+
+ #[inline]
+ pub fn next_byte(&self) -> Option<u8> {
+ if self.is_eof() {
+ None
+ } else {
+ Some(self.input.as_bytes()[self.position])
+ }
+ }
+
+ // If false, `tokenizer.next_char()` will not panic.
+ #[inline]
+ fn is_eof(&self) -> bool {
+ !self.has_at_least(0)
+ }
+
+ // If true, the input has at least `n` bytes left *after* the current one.
+ // That is, `tokenizer.char_at(n)` will not panic.
+ #[inline]
+ fn has_at_least(&self, n: usize) -> bool {
+ self.position + n < self.input.len()
+ }
+
+ // Advance over N bytes in the input. This function can advance
+ // over ASCII bytes (excluding newlines), or UTF-8 sequence
+ // leaders (excluding leaders for 4-byte sequences).
+ #[inline]
+ pub fn advance(&mut self, n: usize) {
+ if cfg!(debug_assertions) {
+ // Each byte must either be an ASCII byte or a sequence
+ // leader, but not a 4-byte leader; also newlines are
+ // rejected.
+ for i in 0..n {
+ let b = self.byte_at(i);
+ debug_assert!(b.is_ascii() || (b & 0xF0 != 0xF0 && b & 0xC0 != 0x80));
+ debug_assert!(b != b'\r' && b != b'\n' && b != b'\x0C');
+ }
+ }
+ self.position += n
+ }
+
+ // Assumes non-EOF
+ #[inline]
+ fn next_byte_unchecked(&self) -> u8 {
+ self.byte_at(0)
+ }
+
+ #[inline]
+ fn byte_at(&self, offset: usize) -> u8 {
+ self.input.as_bytes()[self.position + offset]
+ }
+
+ // Advance over a single byte; the byte must be a UTF-8 sequence
+ // leader for a 4-byte sequence.
+ #[inline]
+ fn consume_4byte_intro(&mut self) {
+ debug_assert!(self.next_byte_unchecked() & 0xF0 == 0xF0);
+ // This takes two UTF-16 characters to represent, so we
+ // actually have an undercount.
+ self.current_line_start_position = self.current_line_start_position.wrapping_sub(1);
+ self.position += 1;
+ }
+
+ // Advance over a single byte; the byte must be a UTF-8
+ // continuation byte.
+ #[inline]
+ fn consume_continuation_byte(&mut self) {
+ debug_assert!(self.next_byte_unchecked() & 0xC0 == 0x80);
+ // Continuation bytes contribute to column overcount. Note
+ // that due to the special case for the 4-byte sequence intro,
+ // we must use wrapping add here.
+ self.current_line_start_position = self.current_line_start_position.wrapping_add(1);
+ self.position += 1;
+ }
+
+ // Advance over any kind of byte, excluding newlines.
+ #[inline(never)]
+ fn consume_known_byte(&mut self, byte: u8) {
+ debug_assert!(byte != b'\r' && byte != b'\n' && byte != b'\x0C');
+ self.position += 1;
+ // Continuation bytes contribute to column overcount.
+ if byte & 0xF0 == 0xF0 {
+ // This takes two UTF-16 characters to represent, so we
+ // actually have an undercount.
+ self.current_line_start_position = self.current_line_start_position.wrapping_sub(1);
+ } else if byte & 0xC0 == 0x80 {
+ // Note that due to the special case for the 4-byte
+ // sequence intro, we must use wrapping add here.
+ self.current_line_start_position = self.current_line_start_position.wrapping_add(1);
+ }
+ }
+
+ #[inline]
+ fn next_char(&self) -> char {
+ unsafe { self.input.get_unchecked(self.position().0..) }.chars().next().unwrap()
+ }
+
+ // Given that a newline has been seen, advance over the newline
+ // and update the state.
+ #[inline]
+ fn consume_newline(&mut self) {
+ let byte = self.next_byte_unchecked();
+ debug_assert!(byte == b'\r' || byte == b'\n' || byte == b'\x0C');
+ self.position += 1;
+ if byte == b'\r' && self.next_byte() == Some(b'\n') {
+ self.position += 1;
+ }
+ self.current_line_start_position = self.position;
+ self.current_line_number += 1;
+ }
+
+ #[inline]
+ fn has_newline_at(&self, offset: usize) -> bool {
+ self.position + offset < self.input.len()
+ && matches!(self.byte_at(offset), b'\n' | b'\r' | b'\x0C')
+ }
+
+ #[inline]
+ fn consume_char(&mut self) -> char {
+ let c = self.next_char();
+ let len_utf8 = c.len_utf8();
+ self.position += len_utf8;
+ // Note that due to the special case for the 4-byte sequence
+ // intro, we must use wrapping add here.
+ self.current_line_start_position = self
+ .current_line_start_position
+ .wrapping_add(len_utf8 - c.len_utf16());
+ c
+ }
+
+ #[inline]
+ fn starts_with(&self, needle: &[u8]) -> bool {
+ self.input.as_bytes()[self.position..].starts_with(needle)
+ }
+
+ pub fn skip_whitespace(&mut self) {
+ while !self.is_eof() {
+ match_byte! { self.next_byte_unchecked(),
+ b' ' | b'\t' => {
+ self.advance(1)
+ },
+ b'\n' | b'\x0C' | b'\r' => {
+ self.consume_newline();
+ },
+ b'/' => {
+ if self.starts_with(b"/*") {
+ consume_comment(self);
+ } else {
+ return
+ }
+ }
+ _ => return,
+ }
+ }
+ }
+
+ pub fn skip_cdc_and_cdo(&mut self) {
+ while !self.is_eof() {
+ match_byte! { self.next_byte_unchecked(),
+ b' ' | b'\t' => {
+ self.advance(1)
+ },
+ b'\n' | b'\x0C' | b'\r' => {
+ self.consume_newline();
+ },
+ b'/' => {
+ if self.starts_with(b"/*") {
+ consume_comment(self);
+ } else {
+ return
+ }
+ }
+ b'<' => {
+ if self.starts_with(b"<!--") {
+ self.advance(4)
+ } else {
+ return
+ }
+ }
+ b'-' => {
+ if self.starts_with(b"-->") {
+ self.advance(3)
+ } else {
+ return
+ }
+ }
+ _ => {
+ return
+ }
+ }
+ }
+ }
+}
+
+/// A position from the start of the input, counted in UTF-8 bytes.
+#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
+pub struct SourcePosition(pub(crate) usize);
+
+impl SourcePosition {
+ /// Returns the current byte index in the original input.
+ #[inline]
+ pub fn byte_index(&self) -> usize {
+ self.0
+ }
+}
+
+/// The line and column number for a given position within the input.
+#[derive(PartialEq, Eq, Debug, Clone, Copy)]
+pub struct SourceLocation {
+ /// The line number, starting at 0 for the first line.
+ pub line: u32,
+
+ /// The column number within a line, starting at 1 for first the character of the line.
+ /// Column numbers are counted in UTF-16 code units.
+ pub column: u32,
+}
+
+fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
+ if tokenizer.is_eof() {
+ return Err(());
+ }
+ let b = tokenizer.next_byte_unchecked();
+ let token = match_byte! { b,
+ b' ' | b'\t' => {
+ consume_whitespace(tokenizer, false)
+ },
+ b'\n' | b'\x0C' | b'\r' => consume_whitespace(tokenizer, true),
+ b'"' => consume_string(tokenizer, false),
+ b'#' => {
+ tokenizer.advance(1);
+ if is_ident_start(tokenizer) { IDHash(consume_name(tokenizer)) }
+ else if !tokenizer.is_eof() && match tokenizer.next_byte_unchecked() {
+ // Any other valid case here already resulted in IDHash.
+ b'0'..=b'9' | b'-' => true,
+ _ => false,
+ } { Hash(consume_name(tokenizer)) }
+ else { Delim('#') }
+ },
+ b'$' => {
+ if tokenizer.starts_with(b"$=") { tokenizer.advance(2); SuffixMatch }
+ else { tokenizer.advance(1); Delim('$') }
+ },
+ b'\'' => consume_string(tokenizer, true),
+ b'(' => { tokenizer.advance(1); ParenthesisBlock },
+ b')' => { tokenizer.advance(1); CloseParenthesis },
+ b'*' => {
+ if tokenizer.starts_with(b"*=") { tokenizer.advance(2); SubstringMatch }
+ else { tokenizer.advance(1); Delim('*') }
+ },
+ b'+' => {
+ if (
+ tokenizer.has_at_least(1)
+ && matches!(tokenizer.byte_at(1), b'0'..=b'9')
+ ) || (
+ tokenizer.has_at_least(2)
+ && tokenizer.byte_at(1) == b'.'
+ && matches!(tokenizer.byte_at(2), b'0'..=b'9')
+ ) {
+ consume_numeric(tokenizer)
+ } else {
+ tokenizer.advance(1);
+ Delim('+')
+ }
+ },
+ b',' => { tokenizer.advance(1); Comma },
+ b'-' => {
+ if (
+ tokenizer.has_at_least(1)
+ && matches!(tokenizer.byte_at(1), b'0'..=b'9')
+ ) || (
+ tokenizer.has_at_least(2)
+ && tokenizer.byte_at(1) == b'.'
+ && matches!(tokenizer.byte_at(2), b'0'..=b'9')
+ ) {
+ consume_numeric(tokenizer)
+ } else if tokenizer.starts_with(b"-->") {
+ tokenizer.advance(3);
+ CDC
+ } else if is_ident_start(tokenizer) {
+ consume_ident_like(tokenizer)
+ } else {
+ tokenizer.advance(1);
+ Delim('-')
+ }
+ },
+ b'.' => {
+ if tokenizer.has_at_least(1)
+ && matches!(tokenizer.byte_at(1), b'0'..=b'9'
+ ) {
+ consume_numeric(tokenizer)
+ } else {
+ tokenizer.advance(1);
+ Delim('.')
+ }
+ }
+ b'/' => {
+ if tokenizer.starts_with(b"/*") {
+ Comment(consume_comment(tokenizer))
+ } else {
+ tokenizer.advance(1);
+ Delim('/')
+ }
+ }
+ b'0'..=b'9' => consume_numeric(tokenizer),
+ b':' => { tokenizer.advance(1); Colon },
+ b';' => { tokenizer.advance(1); Semicolon },
+ b'<' => {
+ if tokenizer.starts_with(b"<!--") {
+ tokenizer.advance(4);
+ CDO
+ } else {
+ tokenizer.advance(1);
+ Delim('<')
+ }
+ },
+ b'@' => {
+ tokenizer.advance(1);
+ if is_ident_start(tokenizer) { AtKeyword(consume_name(tokenizer)) }
+ else { Delim('@') }
+ },
+ b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'\0' => consume_ident_like(tokenizer),
+ b'[' => { tokenizer.advance(1); SquareBracketBlock },
+ b'\\' => {
+ if !tokenizer.has_newline_at(1) { consume_ident_like(tokenizer) }
+ else { tokenizer.advance(1); Delim('\\') }
+ },
+ b']' => { tokenizer.advance(1); CloseSquareBracket },
+ b'^' => {
+ if tokenizer.starts_with(b"^=") { tokenizer.advance(2); PrefixMatch }
+ else { tokenizer.advance(1); Delim('^') }
+ },
+ b'{' => { tokenizer.advance(1); CurlyBracketBlock },
+ b'|' => {
+ if tokenizer.starts_with(b"|=") { tokenizer.advance(2); DashMatch }
+ else { tokenizer.advance(1); Delim('|') }
+ },
+ b'}' => { tokenizer.advance(1); CloseCurlyBracket },
+ b'~' => {
+ if tokenizer.starts_with(b"~=") { tokenizer.advance(2); IncludeMatch }
+ else { tokenizer.advance(1); Delim('~') }
+ },
+ _ => {
+ if !b.is_ascii() {
+ consume_ident_like(tokenizer)
+ } else {
+ tokenizer.advance(1);
+ Delim(b as char)
+ }
+ },
+ };
+ Ok(token)
+}
+
+fn consume_whitespace<'a>(tokenizer: &mut Tokenizer<'a>, newline: bool) -> Token<'a> {
+ let start_position = tokenizer.position();
+ if newline {
+ tokenizer.consume_newline();
+ } else {
+ tokenizer.advance(1);
+ }
+ while !tokenizer.is_eof() {
+ let b = tokenizer.next_byte_unchecked();
+ match_byte! { b,
+ b' ' | b'\t' => {
+ tokenizer.advance(1);
+ }
+ b'\n' | b'\x0C' | b'\r' => {
+ tokenizer.consume_newline();
+ }
+ _ => {
+ break
+ }
+ }
+ }
+ WhiteSpace(tokenizer.slice_from(start_position))
+}
+
+// Check for sourceMappingURL or sourceURL comments and update the
+// tokenizer appropriately.
+fn check_for_source_map<'a>(tokenizer: &mut Tokenizer<'a>, contents: &'a str) {
+ let directive = "# sourceMappingURL=";
+ let directive_old = "@ sourceMappingURL=";
+
+ // If there is a source map directive, extract the URL.
+ if contents.starts_with(directive) || contents.starts_with(directive_old) {
+ let contents = &contents[directive.len()..];
+ tokenizer.source_map_url = contents
+ .split(|c| c == ' ' || c == '\t' || c == '\x0C' || c == '\r' || c == '\n')
+ .next()
+ }
+
+ let directive = "# sourceURL=";
+ let directive_old = "@ sourceURL=";
+
+ // If there is a source map directive, extract the URL.
+ if contents.starts_with(directive) || contents.starts_with(directive_old) {
+ let contents = &contents[directive.len()..];
+ tokenizer.source_url = contents
+ .split(|c| c == ' ' || c == '\t' || c == '\x0C' || c == '\r' || c == '\n')
+ .next()
+ }
+}
+
+fn consume_comment<'a>(tokenizer: &mut Tokenizer<'a>) -> &'a str {
+ tokenizer.advance(2); // consume "/*"
+ let start_position = tokenizer.position();
+ while !tokenizer.is_eof() {
+ match_byte! { tokenizer.next_byte_unchecked(),
+ b'*' => {
+ let end_position = tokenizer.position();
+ tokenizer.advance(1);
+ if tokenizer.next_byte() == Some(b'/') {
+ tokenizer.advance(1);
+ let contents = tokenizer.slice(start_position..end_position);
+ check_for_source_map(tokenizer, contents);
+ return contents
+ }
+ }
+ b'\n' | b'\x0C' | b'\r' => {
+ tokenizer.consume_newline();
+ }
+ b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
+ b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
+ _ => {
+ // ASCII or other leading byte.
+ tokenizer.advance(1);
+ }
+ }
+ }
+ let contents = tokenizer.slice_from(start_position);
+ check_for_source_map(tokenizer, contents);
+ contents
+}
+
+fn consume_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Token<'a> {
+ match consume_quoted_string(tokenizer, single_quote) {
+ Ok(value) => QuotedString(value),
+ Err(value) => BadString(value),
+ }
+}
+
+/// Return `Err(())` on syntax error (ie. unescaped newline)
+fn consume_quoted_string<'a>(
+ tokenizer: &mut Tokenizer<'a>,
+ single_quote: bool,
+) -> Result<CowRcStr<'a>, CowRcStr<'a>> {
+ tokenizer.advance(1); // Skip the initial quote
+ // start_pos is at code point boundary, after " or '
+ let start_pos = tokenizer.position();
+ let mut string_bytes;
+ loop {
+ if tokenizer.is_eof() {
+ return Ok(tokenizer.slice_from(start_pos).into());
+ }
+ match_byte! { tokenizer.next_byte_unchecked(),
+ b'"' => {
+ if !single_quote {
+ let value = tokenizer.slice_from(start_pos);
+ tokenizer.advance(1);
+ return Ok(value.into())
+ }
+ tokenizer.advance(1);
+ }
+ b'\'' => {
+ if single_quote {
+ let value = tokenizer.slice_from(start_pos);
+ tokenizer.advance(1);
+ return Ok(value.into())
+ }
+ tokenizer.advance(1);
+ }
+ b'\\' | b'\0' => {
+ // * The tokenizer’s input is UTF-8 since it’s `&str`.
+ // * start_pos is at a code point boundary
+ // * so is the current position (which is before '\\' or '\0'
+ //
+ // So `string_bytes` is well-formed UTF-8.
+ string_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned();
+ break
+ }
+ b'\n' | b'\r' | b'\x0C' => {
+ return Err(tokenizer.slice_from(start_pos).into())
+ },
+ b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
+ b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
+ _ => {
+ // ASCII or other leading byte.
+ tokenizer.advance(1);
+ }
+ }
+ }
+
+ while !tokenizer.is_eof() {
+ let b = tokenizer.next_byte_unchecked();
+ match_byte! { b,
+ b'\n' | b'\r' | b'\x0C' => {
+ return Err(
+ // string_bytes is well-formed UTF-8, see other comments.
+ unsafe {
+ from_utf8_release_unchecked(string_bytes)
+ }.into()
+ );
+ }
+ b'"' => {
+ tokenizer.advance(1);
+ if !single_quote {
+ break;
+ }
+ }
+ b'\'' => {
+ tokenizer.advance(1);
+ if single_quote {
+ break;
+ }
+ }
+ b'\\' => {
+ tokenizer.advance(1);
+ if !tokenizer.is_eof() {
+ match tokenizer.next_byte_unchecked() {
+ // Escaped newline
+ b'\n' | b'\x0C' | b'\r' => {
+ tokenizer.consume_newline();
+ }
+ // This pushes one well-formed code point
+ _ => consume_escape_and_write(tokenizer, &mut string_bytes)
+ }
+ }
+ // else: escaped EOF, do nothing.
+ continue;
+ }
+ b'\0' => {
+ tokenizer.advance(1);
+ string_bytes.extend("\u{FFFD}".as_bytes());
+ continue;
+ }
+ b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
+ b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
+ _ => {
+ // ASCII or other leading byte.
+ tokenizer.advance(1);
+ },
+ }
+
+ // If this byte is part of a multi-byte code point,
+ // we’ll end up copying the whole code point before this loop does something else.
+ string_bytes.push(b);
+ }
+
+ Ok(
+ // string_bytes is well-formed UTF-8, see other comments.
+ unsafe { from_utf8_release_unchecked(string_bytes) }.into(),
+ )
+}
+
+#[inline]
+fn is_ident_start(tokenizer: &mut Tokenizer) -> bool {
+ !tokenizer.is_eof()
+ && match_byte! { tokenizer.next_byte_unchecked(),
+ b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'\0' => true,
+ b'-' => {
+ tokenizer.has_at_least(1) && match_byte! { tokenizer.byte_at(1),
+ b'a'..=b'z' | b'A'..=b'Z' | b'-' | b'_' | b'\0' => {
+ true
+ }
+ b'\\' => !tokenizer.has_newline_at(1),
+ b => !b.is_ascii(),
+ }
+ },
+ b'\\' => !tokenizer.has_newline_at(1),
+ b => !b.is_ascii(),
+ }
+}
+
+fn consume_ident_like<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
+ let value = consume_name(tokenizer);
+ if !tokenizer.is_eof() && tokenizer.next_byte_unchecked() == b'(' {
+ tokenizer.advance(1);
+ if value.eq_ignore_ascii_case("url") {
+ consume_unquoted_url(tokenizer).unwrap_or(Function(value))
+ } else {
+ tokenizer.see_function(&value);
+ Function(value)
+ }
+ } else {
+ Ident(value)
+ }
+}
+
+fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowRcStr<'a> {
+ // start_pos is the end of the previous token, therefore at a code point boundary
+ let start_pos = tokenizer.position();
+ let mut value_bytes;
+ loop {
+ if tokenizer.is_eof() {
+ return tokenizer.slice_from(start_pos).into();
+ }
+ match_byte! { tokenizer.next_byte_unchecked(),
+ b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'-' => tokenizer.advance(1),
+ b'\\' | b'\0' => {
+ // * The tokenizer’s input is UTF-8 since it’s `&str`.
+ // * start_pos is at a code point boundary
+ // * so is the current position (which is before '\\' or '\0'
+ //
+ // So `value_bytes` is well-formed UTF-8.
+ value_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned();
+ break
+ }
+ b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
+ b'\xC0'..=b'\xEF' => { tokenizer.advance(1); }
+ b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
+ _b => {
+ return tokenizer.slice_from(start_pos).into();
+ }
+ }
+ }
+
+ while !tokenizer.is_eof() {
+ let b = tokenizer.next_byte_unchecked();
+ match_byte! { b,
+ b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'-' => {
+ tokenizer.advance(1);
+ value_bytes.push(b) // ASCII
+ }
+ b'\\' => {
+ if tokenizer.has_newline_at(1) { break }
+ tokenizer.advance(1);
+ // This pushes one well-formed code point
+ consume_escape_and_write(tokenizer, &mut value_bytes)
+ }
+ b'\0' => {
+ tokenizer.advance(1);
+ value_bytes.extend("\u{FFFD}".as_bytes());
+ },
+ b'\x80'..=b'\xBF' => {
+ // This byte *is* part of a multi-byte code point,
+ // we’ll end up copying the whole code point before this loop does something else.
+ tokenizer.consume_continuation_byte();
+ value_bytes.push(b)
+ }
+ b'\xC0'..=b'\xEF' => {
+ // This byte *is* part of a multi-byte code point,
+ // we’ll end up copying the whole code point before this loop does something else.
+ tokenizer.advance(1);
+ value_bytes.push(b)
+ }
+ b'\xF0'..=b'\xFF' => {
+ tokenizer.consume_4byte_intro();
+ value_bytes.push(b)
+ }
+ _ => {
+ // ASCII
+ break;
+ }
+ }
+ }
+ // string_bytes is well-formed UTF-8, see other comments.
+ unsafe { from_utf8_release_unchecked(value_bytes) }.into()
+}
+
+fn byte_to_hex_digit(b: u8) -> Option<u32> {
+ Some(match_byte! { b,
+ b'0' ..= b'9' => b - b'0',
+ b'a' ..= b'f' => b - b'a' + 10,
+ b'A' ..= b'F' => b - b'A' + 10,
+ _ => {
+ return None
+ }
+ } as u32)
+}
+
+fn byte_to_decimal_digit(b: u8) -> Option<u32> {
+ if b >= b'0' && b <= b'9' {
+ Some((b - b'0') as u32)
+ } else {
+ None
+ }
+}
+
+fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
+ // Parse [+-]?\d*(\.\d+)?([eE][+-]?\d+)?
+ // But this is always called so that there is at least one digit in \d*(\.\d+)?
+
+ // Do all the math in f64 so that large numbers overflow to +/-inf
+ // and i32::{MIN, MAX} are within range.
+
+ let (has_sign, sign) = match tokenizer.next_byte_unchecked() {
+ b'-' => (true, -1.),
+ b'+' => (true, 1.),
+ _ => (false, 1.),
+ };
+ if has_sign {
+ tokenizer.advance(1);
+ }
+
+ let mut integral_part: f64 = 0.;
+ while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) {
+ integral_part = integral_part * 10. + digit as f64;
+ tokenizer.advance(1);
+ if tokenizer.is_eof() {
+ break;
+ }
+ }
+
+ let mut is_integer = true;
+
+ let mut fractional_part: f64 = 0.;
+ if tokenizer.has_at_least(1)
+ && tokenizer.next_byte_unchecked() == b'.'
+ && matches!(tokenizer.byte_at(1), b'0'..=b'9')
+ {
+ is_integer = false;
+ tokenizer.advance(1); // Consume '.'
+ let mut factor = 0.1;
+ while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) {
+ fractional_part += digit as f64 * factor;
+ factor *= 0.1;
+ tokenizer.advance(1);
+ if tokenizer.is_eof() {
+ break;
+ }
+ }
+ }
+
+ let mut value = sign * (integral_part + fractional_part);
+
+ if tokenizer.has_at_least(1) && matches!(tokenizer.next_byte_unchecked(), b'e' | b'E') {
+ if matches!(tokenizer.byte_at(1), b'0'..=b'9')
+ || (tokenizer.has_at_least(2)
+ && matches!(tokenizer.byte_at(1), b'+' | b'-')
+ && matches!(tokenizer.byte_at(2), b'0'..=b'9'))
+ {
+ is_integer = false;
+ tokenizer.advance(1);
+ let (has_sign, sign) = match tokenizer.next_byte_unchecked() {
+ b'-' => (true, -1.),
+ b'+' => (true, 1.),
+ _ => (false, 1.),
+ };
+ if has_sign {
+ tokenizer.advance(1);
+ }
+ let mut exponent: f64 = 0.;
+ while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) {
+ exponent = exponent * 10. + digit as f64;
+ tokenizer.advance(1);
+ if tokenizer.is_eof() {
+ break;
+ }
+ }
+ value *= f64::powf(10., sign * exponent);
+ }
+ }
+
+ let int_value = if is_integer {
+ Some(if value >= i32::MAX as f64 {
+ i32::MAX
+ } else if value <= i32::MIN as f64 {
+ i32::MIN
+ } else {
+ value as i32
+ })
+ } else {
+ None
+ };
+
+ if !tokenizer.is_eof() && tokenizer.next_byte_unchecked() == b'%' {
+ tokenizer.advance(1);
+ return Percentage {
+ unit_value: (value / 100.) as f32,
+ int_value,
+ has_sign,
+ };
+ }
+ let value = value as f32;
+ if is_ident_start(tokenizer) {
+ let unit = consume_name(tokenizer);
+ Dimension {
+ value,
+ int_value,
+ has_sign,
+ unit,
+ }
+ } else {
+ Number {
+ value,
+ int_value,
+ has_sign,
+ }
+ }
+}
+
+#[inline]
+unsafe fn from_utf8_release_unchecked(string_bytes: Vec<u8>) -> String {
+ if cfg!(debug_assertions) {
+ String::from_utf8(string_bytes).unwrap()
+ } else {
+ String::from_utf8_unchecked(string_bytes)
+ }
+}
+
+fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
+ // This is only called after "url(", so the current position is a code point boundary.
+ let start_position = tokenizer.position;
+ let from_start = &tokenizer.input[tokenizer.position..];
+ let mut newlines = 0;
+ let mut last_newline = 0;
+ let mut found_printable_char = false;
+ let mut iter = from_start.bytes().enumerate();
+ loop {
+ let (offset, b) = match iter.next() {
+ Some(item) => item,
+ None => {
+ tokenizer.position = tokenizer.input.len();
+ break;
+ }
+ };
+ match_byte! { b,
+ b' ' | b'\t' => {},
+ b'\n' | b'\x0C' => {
+ newlines += 1;
+ last_newline = offset;
+ }
+ b'\r' => {
+ if from_start.as_bytes().get(offset + 1) != Some(&b'\n') {
+ newlines += 1;
+ last_newline = offset;
+ }
+ }
+ b'"' | b'\'' => return Err(()), // Do not advance
+ b')' => {
+ // Don't use advance, because we may be skipping
+ // newlines here, and we want to avoid the assert.
+ tokenizer.position += offset + 1;
+ break
+ }
+ _ => {
+ // Don't use advance, because we may be skipping
+ // newlines here, and we want to avoid the assert.
+ tokenizer.position += offset;
+ found_printable_char = true;
+ break
+ }
+ }
+ }
+
+ if newlines > 0 {
+ tokenizer.current_line_number += newlines;
+ // No need for wrapping_add here, because there's no possible
+ // way to wrap.
+ tokenizer.current_line_start_position = start_position + last_newline + 1;
+ }
+
+ if found_printable_char {
+ // This function only consumed ASCII (whitespace) bytes,
+ // so the current position is a code point boundary.
+ return Ok(consume_unquoted_url_internal(tokenizer));
+ } else {
+ return Ok(UnquotedUrl("".into()));
+ }
+
+ fn consume_unquoted_url_internal<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
+ // This function is only called with start_pos at a code point boundary.
+ let start_pos = tokenizer.position();
+ let mut string_bytes: Vec<u8>;
+ loop {
+ if tokenizer.is_eof() {
+ return UnquotedUrl(tokenizer.slice_from(start_pos).into());
+ }
+ match_byte! { tokenizer.next_byte_unchecked(),
+ b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {
+ let value = tokenizer.slice_from(start_pos);
+ return consume_url_end(tokenizer, start_pos, value.into())
+ }
+ b')' => {
+ let value = tokenizer.slice_from(start_pos);
+ tokenizer.advance(1);
+ return UnquotedUrl(value.into())
+ }
+ b'\x01'..=b'\x08' | b'\x0B' | b'\x0E'..=b'\x1F' | b'\x7F' // non-printable
+ | b'"' | b'\'' | b'(' => {
+ tokenizer.advance(1);
+ return consume_bad_url(tokenizer, start_pos)
+ },
+ b'\\' | b'\0' => {
+ // * The tokenizer’s input is UTF-8 since it’s `&str`.
+ // * start_pos is at a code point boundary
+ // * so is the current position (which is before '\\' or '\0'
+ //
+ // So `string_bytes` is well-formed UTF-8.
+ string_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned();
+ break
+ }
+ b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
+ b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
+ _ => {
+ // ASCII or other leading byte.
+ tokenizer.advance(1);
+ }
+ }
+ }
+ while !tokenizer.is_eof() {
+ let b = tokenizer.next_byte_unchecked();
+ match_byte! { b,
+ b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {
+ // string_bytes is well-formed UTF-8, see other comments.
+ let string = unsafe { from_utf8_release_unchecked(string_bytes) }.into();
+ return consume_url_end(tokenizer, start_pos, string)
+ }
+ b')' => {
+ tokenizer.advance(1);
+ break;
+ }
+ b'\x01'..=b'\x08' | b'\x0B' | b'\x0E'..=b'\x1F' | b'\x7F' // non-printable
+ | b'"' | b'\'' | b'(' => {
+ tokenizer.advance(1);
+ return consume_bad_url(tokenizer, start_pos);
+ }
+ b'\\' => {
+ tokenizer.advance(1);
+ if tokenizer.has_newline_at(0) {
+ return consume_bad_url(tokenizer, start_pos)
+ }
+
+ // This pushes one well-formed code point to string_bytes
+ consume_escape_and_write(tokenizer, &mut string_bytes)
+ },
+ b'\0' => {
+ tokenizer.advance(1);
+ string_bytes.extend("\u{FFFD}".as_bytes());
+ }
+ b'\x80'..=b'\xBF' => {
+ // We’ll end up copying the whole code point
+ // before this loop does something else.
+ tokenizer.consume_continuation_byte();
+ string_bytes.push(b);
+ }
+ b'\xF0'..=b'\xFF' => {
+ // We’ll end up copying the whole code point
+ // before this loop does something else.
+ tokenizer.consume_4byte_intro();
+ string_bytes.push(b);
+ }
+ // If this byte is part of a multi-byte code point,
+ // we’ll end up copying the whole code point before this loop does something else.
+ b => {
+ // ASCII or other leading byte.
+ tokenizer.advance(1);
+ string_bytes.push(b)
+ }
+ }
+ }
+ UnquotedUrl(
+ // string_bytes is well-formed UTF-8, see other comments.
+ unsafe { from_utf8_release_unchecked(string_bytes) }.into(),
+ )
+ }
+
+ fn consume_url_end<'a>(
+ tokenizer: &mut Tokenizer<'a>,
+ start_pos: SourcePosition,
+ string: CowRcStr<'a>,
+ ) -> Token<'a> {
+ while !tokenizer.is_eof() {
+ match_byte! { tokenizer.next_byte_unchecked(),
+ b')' => {
+ tokenizer.advance(1);
+ break
+ }
+ b' ' | b'\t' => { tokenizer.advance(1); }
+ b'\n' | b'\x0C' | b'\r' => {
+ tokenizer.consume_newline();
+ }
+ b => {
+ tokenizer.consume_known_byte(b);
+ return consume_bad_url(tokenizer, start_pos);
+ }
+ }
+ }
+ UnquotedUrl(string)
+ }
+
+ fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>, start_pos: SourcePosition) -> Token<'a> {
+ // Consume up to the closing )
+ while !tokenizer.is_eof() {
+ match_byte! { tokenizer.next_byte_unchecked(),
+ b')' => {
+ let contents = tokenizer.slice_from(start_pos).into();
+ tokenizer.advance(1);
+ return BadUrl(contents)
+ }
+ b'\\' => {
+ tokenizer.advance(1);
+ if matches!(tokenizer.next_byte(), Some(b')') | Some(b'\\')) {
+ tokenizer.advance(1); // Skip an escaped ')' or '\'
+ }
+ }
+ b'\n' | b'\x0C' | b'\r' => {
+ tokenizer.consume_newline();
+ }
+ b => {
+ tokenizer.consume_known_byte(b);
+ }
+ }
+ }
+ BadUrl(tokenizer.slice_from(start_pos).into())
+ }
+}
+
+// (value, number of digits up to 6)
+fn consume_hex_digits<'a>(tokenizer: &mut Tokenizer<'a>) -> (u32, u32) {
+ let mut value = 0;
+ let mut digits = 0;
+ while digits < 6 && !tokenizer.is_eof() {
+ match byte_to_hex_digit(tokenizer.next_byte_unchecked()) {
+ Some(digit) => {
+ value = value * 16 + digit;
+ digits += 1;
+ tokenizer.advance(1);
+ }
+ None => break,
+ }
+ }
+ (value, digits)
+}
+
+// Same constraints as consume_escape except it writes into `bytes` the result
+// instead of returning it.
+fn consume_escape_and_write(tokenizer: &mut Tokenizer, bytes: &mut Vec<u8>) {
+ bytes.extend(
+ consume_escape(tokenizer)
+ .encode_utf8(&mut [0; 4])
+ .as_bytes(),
+ )
+}
+
+// Assumes that the U+005C REVERSE SOLIDUS (\) has already been consumed
+// and that the next input character has already been verified
+// to not be a newline.
+fn consume_escape(tokenizer: &mut Tokenizer) -> char {
+ if tokenizer.is_eof() {
+ return '\u{FFFD}';
+ } // Escaped EOF
+ match_byte! { tokenizer.next_byte_unchecked(),
+ b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
+ let (c, _) = consume_hex_digits(tokenizer);
+ if !tokenizer.is_eof() {
+ match_byte! { tokenizer.next_byte_unchecked(),
+ b' ' | b'\t' => {
+ tokenizer.advance(1)
+ }
+ b'\n' | b'\x0C' | b'\r' => {
+ tokenizer.consume_newline();
+ }
+ _ => {}
+ }
+ }
+ static REPLACEMENT_CHAR: char = '\u{FFFD}';
+ if c != 0 {
+ let c = char::from_u32(c);
+ c.unwrap_or(REPLACEMENT_CHAR)
+ } else {
+ REPLACEMENT_CHAR
+ }
+ },
+ b'\0' => {
+ tokenizer.advance(1);
+ '\u{FFFD}'
+ }
+ _ => tokenizer.consume_char(),
+ }
+}
diff --git a/third_party/rust/cssparser/src/unicode_range.rs b/third_party/rust/cssparser/src/unicode_range.rs
new file mode 100644
index 0000000000..b0a2017cbf
--- /dev/null
+++ b/third_party/rust/cssparser/src/unicode_range.rs
@@ -0,0 +1,181 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! https://drafts.csswg.org/css-syntax/#urange
+
+use crate::tokenizer::Token;
+use crate::{BasicParseError, Parser, ToCss};
+use std::char;
+use std::fmt;
+
+/// One contiguous range of code points.
+///
+/// Can not be empty. Can represent a single code point when start == end.
+#[derive(PartialEq, Eq, Clone, Hash)]
+#[repr(C)]
+pub struct UnicodeRange {
+ /// Inclusive start of the range. In [0, end].
+ pub start: u32,
+
+ /// Inclusive end of the range. In [0, 0x10FFFF].
+ pub end: u32,
+}
+
+impl UnicodeRange {
+ /// https://drafts.csswg.org/css-syntax/#urange-syntax
+ pub fn parse<'i, 't>(input: &mut Parser<'i, 't>) -> Result<Self, BasicParseError<'i>> {
+ // <urange> =
+ // u '+' <ident-token> '?'* |
+ // u <dimension-token> '?'* |
+ // u <number-token> '?'* |
+ // u <number-token> <dimension-token> |
+ // u <number-token> <number-token> |
+ // u '+' '?'+
+
+ input.expect_ident_matching("u")?;
+ let after_u = input.position();
+ parse_tokens(input)?;
+
+ // This deviates from the spec in case there are CSS comments
+ // between tokens in the middle of one <unicode-range>,
+ // but oh well…
+ let concatenated_tokens = input.slice_from(after_u);
+
+ let range = match parse_concatenated(concatenated_tokens.as_bytes()) {
+ Ok(range) => range,
+ Err(()) => {
+ return Err(input
+ .new_basic_unexpected_token_error(Token::Ident(concatenated_tokens.into())))
+ }
+ };
+ if range.end > char::MAX as u32 || range.start > range.end {
+ Err(input.new_basic_unexpected_token_error(Token::Ident(concatenated_tokens.into())))
+ } else {
+ Ok(range)
+ }
+ }
+}
+
+fn parse_tokens<'i, 't>(input: &mut Parser<'i, 't>) -> Result<(), BasicParseError<'i>> {
+ match input.next_including_whitespace()?.clone() {
+ Token::Delim('+') => {
+ match *input.next_including_whitespace()? {
+ Token::Ident(_) => {}
+ Token::Delim('?') => {}
+ ref t => {
+ let t = t.clone();
+ return Err(input.new_basic_unexpected_token_error(t));
+ }
+ }
+ parse_question_marks(input)
+ }
+ Token::Dimension { .. } => parse_question_marks(input),
+ Token::Number { .. } => {
+ let after_number = input.state();
+ match input.next_including_whitespace() {
+ Ok(&Token::Delim('?')) => parse_question_marks(input),
+ Ok(&Token::Dimension { .. }) => {}
+ Ok(&Token::Number { .. }) => {}
+ _ => input.reset(&after_number),
+ }
+ }
+ t => return Err(input.new_basic_unexpected_token_error(t)),
+ }
+ Ok(())
+}
+
+/// Consume as many '?' as possible
+fn parse_question_marks(input: &mut Parser) {
+ loop {
+ let start = input.state();
+ match input.next_including_whitespace() {
+ Ok(&Token::Delim('?')) => {}
+ _ => {
+ input.reset(&start);
+ return;
+ }
+ }
+ }
+}
+
+fn parse_concatenated(text: &[u8]) -> Result<UnicodeRange, ()> {
+ let mut text = match text.split_first() {
+ Some((&b'+', text)) => text,
+ _ => return Err(()),
+ };
+ let (first_hex_value, hex_digit_count) = consume_hex(&mut text);
+ let question_marks = consume_question_marks(&mut text);
+ let consumed = hex_digit_count + question_marks;
+ if consumed == 0 || consumed > 6 {
+ return Err(());
+ }
+
+ if question_marks > 0 {
+ if text.is_empty() {
+ return Ok(UnicodeRange {
+ start: first_hex_value << (question_marks * 4),
+ end: ((first_hex_value + 1) << (question_marks * 4)) - 1,
+ });
+ }
+ } else if text.is_empty() {
+ return Ok(UnicodeRange {
+ start: first_hex_value,
+ end: first_hex_value,
+ });
+ } else {
+ if let Some((&b'-', mut text)) = text.split_first() {
+ let (second_hex_value, hex_digit_count) = consume_hex(&mut text);
+ if hex_digit_count > 0 && hex_digit_count <= 6 && text.is_empty() {
+ return Ok(UnicodeRange {
+ start: first_hex_value,
+ end: second_hex_value,
+ });
+ }
+ }
+ }
+ Err(())
+}
+
+fn consume_hex(text: &mut &[u8]) -> (u32, usize) {
+ let mut value = 0;
+ let mut digits = 0;
+ while let Some((&byte, rest)) = text.split_first() {
+ if let Some(digit_value) = (byte as char).to_digit(16) {
+ value = value * 0x10 + digit_value;
+ digits += 1;
+ *text = rest
+ } else {
+ break;
+ }
+ }
+ (value, digits)
+}
+
+fn consume_question_marks(text: &mut &[u8]) -> usize {
+ let mut question_marks = 0;
+ while let Some((&b'?', rest)) = text.split_first() {
+ question_marks += 1;
+ *text = rest
+ }
+ question_marks
+}
+
+impl fmt::Debug for UnicodeRange {
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ self.to_css(formatter)
+ }
+}
+
+impl ToCss for UnicodeRange {
+ fn to_css<W>(&self, dest: &mut W) -> fmt::Result
+ where
+ W: fmt::Write,
+ {
+ write!(dest, "U+{:X}", self.start)?;
+ if self.end != self.start {
+ write!(dest, "-{:X}", self.end)?;
+ }
+ Ok(())
+ }
+}