diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /vendor/ammonia | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/ammonia')
-rw-r--r-- | vendor/ammonia/.cargo-checksum.json | 1 | ||||
-rw-r--r-- | vendor/ammonia/CHANGELOG.md | 136 | ||||
-rw-r--r-- | vendor/ammonia/CODE_OF_CONDUCT.md | 46 | ||||
-rw-r--r-- | vendor/ammonia/Cargo.lock | 637 | ||||
-rw-r--r-- | vendor/ammonia/Cargo.toml | 42 | ||||
-rw-r--r-- | vendor/ammonia/LICENSE-APACHE | 202 | ||||
-rw-r--r-- | vendor/ammonia/LICENSE-MIT | 19 | ||||
-rw-r--r-- | vendor/ammonia/README.md | 107 | ||||
-rw-r--r-- | vendor/ammonia/RELEASE_PROCESS.md | 16 | ||||
-rw-r--r-- | vendor/ammonia/SECURITY.md | 16 | ||||
-rw-r--r-- | vendor/ammonia/bors.toml | 2 | ||||
-rw-r--r-- | vendor/ammonia/examples/ammonia-cat.rs | 35 | ||||
-rw-r--r-- | vendor/ammonia/src/lib.rs | 3626 | ||||
-rw-r--r-- | vendor/ammonia/src/rcdom.rs | 512 | ||||
-rw-r--r-- | vendor/ammonia/tests/version-numbers.rs | 6 |
15 files changed, 5403 insertions, 0 deletions
diff --git a/vendor/ammonia/.cargo-checksum.json b/vendor/ammonia/.cargo-checksum.json new file mode 100644 index 000000000..c2a6cb27d --- /dev/null +++ b/vendor/ammonia/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"CHANGELOG.md":"424accda5fd8607826f81db9f88debda74caf9cb2bbc8f20bea25aeaccf7d515","CODE_OF_CONDUCT.md":"58043b883e086791ab688b87fd7f692ae0176727fb02957781ba2f25a5a199a1","Cargo.lock":"c377d4b4c8e2f560378070a4db592904dbec5ce0a5fce64dca6817e25e505840","Cargo.toml":"774628570e94a319e55eb51564698daf4c8c914a1966a429d52a58727647594d","LICENSE-APACHE":"c6596eb7be8581c18be736c846fb9173b69eccf6ef94c5135893ec56bd92ba08","LICENSE-MIT":"565c516359bc84cc469a1b51dd59df60071bc23587ec5b59397ee71bb8e5cff8","README.md":"ce3b5e1a04fc604ce343e5f27ce3201610ca76e85b3a565143c0f228406668a0","RELEASE_PROCESS.md":"c06c0322b7f0d766a2f5dce08c53f9e9704b45f46b445521abca4b4c819d21b9","SECURITY.md":"206ccf40ec6aa34055f6da99366829a330f3b3189dbd2b79feace15f8524edc3","bors.toml":"542f9ce988ed5553cae00ab93d36cba550de73762e54b7865263cb24ba42af2e","examples/ammonia-cat.rs":"94c3fa92816a58ef9f8f7d5b0babb3778bedb5fba02e538c208eae0b1bf0b681","src/lib.rs":"75d544ac15fddd962ebd8667bf5e2709d2bc38782115ffc42f9203d83efdd53f","src/rcdom.rs":"e80bd9aca18875dfac99af94a4edaac4606ea8c57c90400592399974ddb86c26","tests/version-numbers.rs":"7711bb7de089a67efd19134f7af6a86c3422ed314da6ddbcab71c987eac2ad8b"},"package":"d5ed2509ee88cc023cccee37a6fab35826830fe8b748b3869790e7720c2c4a74"}
\ No newline at end of file diff --git a/vendor/ammonia/CHANGELOG.md b/vendor/ammonia/CHANGELOG.md new file mode 100644 index 000000000..b4d5c0cad --- /dev/null +++ b/vendor/ammonia/CHANGELOG.md @@ -0,0 +1,136 @@ +# Unreleased + +# 3.2.0 + +* chore: bump MSRV to 1.49.0 +* chore: update to html5ever 0.26 +* chore: switch from lazy_static to once_cell +* feature: add `Builder::empty()` constructor + +# 3.1.4 + +* chore: use `#[non_exhaustive]` instead of hidden variant for `UrlRelative` policy +* chore: remove unnecessary dependency on third-party `matches!` macro + +# 3.1.3 + +* fix: incorrect FF/CR handling in `clean_text` +* fix: split class name attribute by all ASCII whitespace, not just SP 0x20 +* docs: fix incorrect English in `is_html` documentation + +# 3.1.2 + +* fix: unexpected namespace switches can allow XSS via svg/mathml parsing + +# 3.1.1 + +* fix: Crash on invalid URLs in some configurations ([issue #136](https://github.com/rust-ammonia/ammonia/issues/136)) + +# 3.1.0 + +* feature: Whitelist generic attribute prefix +* chore: Update html5ever to 0.25 +* feature: support adding attribute values to elements + +# 3.0.0 + +* Add [`clean_text`] function. +* Updated to [rust-url 2.0]. +* Updated to the 2018 edition. + +[`clean_text`]: https://docs.rs/ammonia/3.0.0/ammonia/fn.clean_text.html +[rust-url 2.0]: https://docs.rs/url/2.0.0/url/ + +# 2.1.4 + +* fix: split class name attribute by all ASCII whitespace, not just SP 0x20 (backported from 3.1.3) + +# 2.1.3 + +* fix: unexpected namespace switches can allow XSS via svg/mathml parsing (backported from 3.1.2) + +# 2.1.2 + +* Fix a memory leak caused by certain node types. + +# 2.1.1 + +* Update dependencies + +# 2.1.0 + +* Bump minimum supported Rust version to 1.30. +* Fix a potential DoS attack from pathologically nested input. + +# 2.0.0 + +* Add "script" and "style" to the default set of [clean content tags] +* Make all iterator-accepting APIs use `IntoIterator` and `Borrow`, so that you can pass slices directly to them. + +[clean content tags]: https://docs.rs/ammonia/2.0.0/ammonia/struct.Builder.html#method.clean_content_tags + +# 1.2.0 + +* Recognize `action`, `formaction` and `ping` as [URL attributes] for scheme and origin filtering +* Add [`Builder::url_filter_map`] which allows URLs, both relative and absolute, to be pre-filtered + +[URL attributes]: https://html.spec.whatwg.org/multipage/indices.html#attributes-3 +[`Builder::url_filter_map`]: https://docs.rs/ammonia/1.2/ammonia/struct.Builder.html#method.url_filter_map + +# 1.1.0 + +* Add [`Builder::clean_content_tags`] which allows elements to be removed entirely instead of just having the tags removed + +[`Builder::clean_content_tags`]: https://docs.rs/ammonia/1.1/ammonia/struct.Builder.html#method.clean_content_tags + +# 1.0.1 + +* Update dependencies + +# 1.0.0 + +* Breaking change: The `Ammonia` struct is now called `Builder` and uses that pattern for better forward compatibility +* Breaking change: The `Builder::clean()` method now returns a `Document` struct instead of a `String`. You can use the `Document::to_string` method to obtain a `String`. +* Breaking change: `keep_cleaned_elements` has changed from being an off-by-default option to the only supported behavior +* Breaking change: Using a tag with `allowed_classes` means that the class attribute is banned from `tag_attributes` (it used to be required) +* Breaking change: The default set of allowed elements and attributes was expanded +* Added support for reading the input from a stream +* Added `UrlRelative::Custom`, allowing you to write your own relative URL resolver +* Changed `UrlRelative::RewriteWithBase` take a custom URL. This made the `url` crate a public dependency. +* Added `id_prefix`, which can be used to avoid element `id` collisions with the rest of the page +* Added property getters to `Builder`, to see what everything is currently set to +* Added property modifiers, to change the existing whitelist (instead of completely replacing it) + +# 0.7.0 + +* Add `allowed_classes`, allowing the user to set only specific items that can go in the class attribute + +# 0.6.1 + +* Fix a bug in the traversal code + +# 0.6.0 + +* Resolve relative URLs with a given base (off by default, you need to specify that base URL) +* Add `rel="noreferrer noopener"` to links, as a security measure +* Avoid closing void tags, such as turning `<br>` into `<br></br>` +* Bump the html5ever version +* Switch to using docs.rs to host docs + +# 0.5.0 + +* Bump html5ever to 0.18 (this updates serde from 0.9 to 1.0) + +# 0.4.0 + +* Upgrade to html5ever 0.17 + +# 0.3.0 + +* Add an option to keep elements that had attributes removed + +# 0.2.0 + +* Removed the strip option. Not a security problem, but it was wrong and looked stupid. I'm not going to reintroduce this until html5ever allows me to preserve the original text enough to have non-stripped tags come out exactly like they go in. +* Treat the data attribute of object as a URL. In non-default configurations, this could have been a leak. +* Update to the newest html5ever. diff --git a/vendor/ammonia/CODE_OF_CONDUCT.md b/vendor/ammonia/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..8a6e01007 --- /dev/null +++ b/vendor/ammonia/CODE_OF_CONDUCT.md @@ -0,0 +1,46 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at michael@notriddle.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/4/ diff --git a/vendor/ammonia/Cargo.lock b/vendor/ammonia/Cargo.lock new file mode 100644 index 000000000..78f0e3a1e --- /dev/null +++ b/vendor/ammonia/Cargo.lock @@ -0,0 +1,637 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + +[[package]] +name = "ammonia" +version = "3.2.0" +dependencies = [ + "env_logger", + "html5ever", + "maplit", + "once_cell", + "tendril", + "url", + "version-sync", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "env_logger" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "form_urlencoded" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" +dependencies = [ + "matches", + "percent-encoding", +] + +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + +[[package]] +name = "getrandom" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "html5ever" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "idna" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8" +dependencies = [ + "matches", + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "libc" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f" + +[[package]] +name = "lock_api" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + +[[package]] +name = "markup5ever" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" +dependencies = [ + "log", + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "matches" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" + +[[package]] +name = "memchr" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + +[[package]] +name = "new_debug_unreachable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" + +[[package]] +name = "once_cell" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" + +[[package]] +name = "parking_lot" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "995f667a6c822200b0433ac218e05582f0e2efa1b922a3fd2fbaadc5f87bab37" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-sys", +] + +[[package]] +name = "percent-encoding" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" + +[[package]] +name = "phf" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + +[[package]] +name = "proc-macro2" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "pulldown-cmark" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffade02495f22453cd593159ea2f59827aae7f53fa8323f756799b670881dcf8" +dependencies = [ + "bitflags", + "memchr", + "unicase", +] + +[[package]] +name = "quote" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "632d02bff7f874a36f33ea8bb416cd484b90cc66c1194b1a1110d067a7013f58" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom", +] + +[[package]] +name = "redox_syscall" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "semver" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d65bd28f48be7196d222d95b9243287f48d27aca604e08497513019ff0502cc4" + +[[package]] +name = "serde" +version = "1.0.136" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789" + +[[package]] +name = "siphasher" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" + +[[package]] +name = "smallvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" + +[[package]] +name = "string_cache" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "213494b7a2b503146286049378ce02b482200519accc31872ee8be91fa820a08" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", +] + +[[package]] +name = "syn" +version = "1.0.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704df27628939572cd88d33f171cd6f896f4eaca85252c6e0a72d8d8287ee86f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + +[[package]] +name = "termcolor" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "tinyvec" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c1c1d5a42b6245520c249549ec267180beaffcc0615401ac8e31853d4b6d8d2" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" + +[[package]] +name = "toml" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa" +dependencies = [ + "serde", +] + +[[package]] +name = "unicase" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" +dependencies = [ + "version_check", +] + +[[package]] +name = "unicode-bidi" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a01404663e3db436ed2746d9fefef640d868edae3cceb81c3b8d5732fda678f" + +[[package]] +name = "unicode-normalization" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "url" +version = "2.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c" +dependencies = [ + "form_urlencoded", + "idna", + "matches", + "percent-encoding", +] + +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + +[[package]] +name = "version-sync" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d0801cec07737d88cb900e6419f6f68733867f90b3faaa837e84692e101bf0" +dependencies = [ + "proc-macro2", + "pulldown-cmark", + "regex", + "semver", + "syn", + "toml", + "url", +] + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5acdd78cb4ba54c0045ac14f62d8f94a03d10047904ae2a40afa1e99d8f70825" +dependencies = [ + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_msvc" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17cffbe740121affb56fad0fc0e421804adf0ae00891205213b5cecd30db881d" + +[[package]] +name = "windows_i686_gnu" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2564fde759adb79129d9b4f54be42b32c89970c18ebf93124ca8870a498688ed" + +[[package]] +name = "windows_i686_msvc" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cd9d32ba70453522332c14d38814bceeb747d80b3958676007acadd7e166956" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfce6deae227ee8d356d19effc141a509cc503dfd1f850622ec4b0f84428e1f4" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d19538ccc21819d01deaf88d6a17eae6596a12e9aafdbb97916fb49896d89de9" diff --git a/vendor/ammonia/Cargo.toml b/vendor/ammonia/Cargo.toml new file mode 100644 index 000000000..f54f76166 --- /dev/null +++ b/vendor/ammonia/Cargo.toml @@ -0,0 +1,42 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +name = "ammonia" +version = "3.2.0" +authors = ["Michael Howell <michael@notriddle.com>"] +description = "HTML Sanitization" +documentation = "https://docs.rs/ammonia/" +readme = "README.md" +keywords = ["sanitization", "html", "security", "xss"] +categories = ["web-programming", "text-processing"] +license = "MIT OR Apache-2.0" +repository = "https://github.com/rust-ammonia/ammonia" +[dependencies.html5ever] +version = "0.26" + +[dependencies.maplit] +version = "1.0" + +[dependencies.once_cell] +version = "1.10" + +[dependencies.tendril] +version = "0.4" + +[dependencies.url] +version = "2" +[dev-dependencies.env_logger] +version = "0.9" + +[dev-dependencies.version-sync] +version = "0.9" diff --git a/vendor/ammonia/LICENSE-APACHE b/vendor/ammonia/LICENSE-APACHE new file mode 100644 index 000000000..8f71f43fe --- /dev/null +++ b/vendor/ammonia/LICENSE-APACHE @@ -0,0 +1,202 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/vendor/ammonia/LICENSE-MIT b/vendor/ammonia/LICENSE-MIT new file mode 100644 index 000000000..2af1161b7 --- /dev/null +++ b/vendor/ammonia/LICENSE-MIT @@ -0,0 +1,19 @@ +Copyright (c) 2015-2021 The ammonia Developers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/ammonia/README.md b/vendor/ammonia/README.md new file mode 100644 index 000000000..d7e22541e --- /dev/null +++ b/vendor/ammonia/README.md @@ -0,0 +1,107 @@ +HTML Sanitization +================= + +[![Crates.IO](https://img.shields.io/crates/v/ammonia.svg)](https://crates.rs/crates/ammonia) +![Requires rustc 1.49.0](https://img.shields.io/badge/rustc-1.49.0+-green.svg) + +Ammonia is a whitelist-based HTML sanitization library. It is designed to +prevent cross-site scripting, layout breaking, and clickjacking caused +by untrusted user-provided HTML being mixed into a larger web page. + +Ammonia uses [html5ever] to parse and serialize document fragments the same way browsers do, +so it is extremely resilient to syntactic obfuscation. + +Ammonia parses its input exactly according to the HTML5 specification; +it will not linkify bare URLs, insert line or paragraph breaks, or convert `(C)` into ©. +If you want that, use a markup processor before running the sanitizer, like [pulldown-cmark]. + +[html5ever]: https://github.com/servo/html5ever "The HTML parser in Servo" +[pulldown-cmark]: https://github.com/google/pulldown-cmark + + +Installation +----------- + +To use `ammonia`, add it to your project's `Cargo.toml` file: + +```toml +[dependencies] +ammonia = "3" +``` + + +Changes +----------- +Please see the [CHANGELOG](CHANGELOG.md) for a release history. + + +Example +------- + +Using [pulldown-cmark] together with Ammonia for a friendly user-facing comment +site. + +```rust +use ammonia::clean; +use pulldown_cmark::{Parser, Options, html::push_html}; + +let text = "[a link](http://www.notriddle.com/)"; + +let mut options = Options::empty(); +options.insert(Options::ENABLE_TABLES); + +let mut md_parse = Parser::new_ext(text, options); +let mut unsafe_html = String::new(); +push_html(&mut unsafe_html, md_parse); + +let safe_html = clean(&*unsafe_html); +assert_eq!(safe_html, "<a href=\"http://www.notriddle.com/\">a link</a>"); +``` + + +Performance +----------- + +Ammonia builds a DOM, traverses it (replacing unwanted nodes along the way), +and serializes it again. It could be faster for what it does, and if you don't +want to allow any HTML it is possible to be even faster than that. + +However, it takes about fifteen times longer to sanitize an HTML string using +[bleach]-2.0.0 with html5lib-0.999999999 than it does using Ammonia 1.0. + + $ cd benchmarks + $ cargo run --release + Running `target/release/ammonia_bench` + 87539 nanoseconds to clean up the intro to the Ammonia docs. + $ python bleach_bench.py + (1498800.015449524, 'nanoseconds to clean up the intro to the Ammonia docs.') + + +License +------ + +Licensed under either of these: + + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or + http://opensource.org/licenses/MIT) + + +Thanks +------ + +Thanks to the other sanitizer libraries, particularly [Bleach] for Python and [sanitize-html] for Node, +which we blatantly copied most of our API from. + +Thanks to ChALkeR, whose [Improper Markup Sanitization] document helped us find high-level semantic holes in Ammonia, +to [ssokolow](https://github.com/ssokolow), whose review and experience were also very helpful, to [securityMB](https://github.com/securityMB), +for finding a very obscure [namespace-related injection bug](https://github.com/rust-ammonia/ammonia/pull/142), and [xfix](https://github.com/xfix) for finding a [DoS bug in a recursive destructor](https://github.com/rust-ammonia/ammonia/pull/113). + +And finally, thanks to [the contributors]. + + +[sanitize-html]: https://www.npmjs.com/package/sanitize-html +[Bleach]: https://bleach.readthedocs.io/ +[Improper Markup Sanitization]: https://github.com/ChALkeR/notes/blob/master/Improper-markup-sanitization.md +[the contributors]: https://github.com/notriddle/ammonia/graphs/contributors diff --git a/vendor/ammonia/RELEASE_PROCESS.md b/vendor/ammonia/RELEASE_PROCESS.md new file mode 100644 index 000000000..12b01deea --- /dev/null +++ b/vendor/ammonia/RELEASE_PROCESS.md @@ -0,0 +1,16 @@ +How to make a release of ammonia +================================ + +* Make a pull request with all these changes, wait until it's approved: + + * Bump the version in Cargo.toml + + * Check if all the dependencies are up-to-date + + * Put all the Unreleased stuff in CHANGELOG.md under the new version + +* Check out and pull down `master` + +* Copy the CHANGELOG into a GitHub release: + +* Run `cargo publish` diff --git a/vendor/ammonia/SECURITY.md b/vendor/ammonia/SECURITY.md new file mode 100644 index 000000000..71c80c95e --- /dev/null +++ b/vendor/ammonia/SECURITY.md @@ -0,0 +1,16 @@ +# Security Policy + +## Supported Versions + +Use this section to tell people about which versions of your project are +currently being supported with security updates. + +| Version | Supported | +| ------- | ------------------ | +| 3.0 | :white_check_mark: | +| 2.0 | :white_check_mark: | +| <= 1.0 | :x: | + +## Reporting a Vulnerability + +You can report security vulnerabilities to @notriddle's personal email, listed on his GitHub profile. diff --git a/vendor/ammonia/bors.toml b/vendor/ammonia/bors.toml new file mode 100644 index 000000000..c44762b2f --- /dev/null +++ b/vendor/ammonia/bors.toml @@ -0,0 +1,2 @@ +status = [ "ci" ] +cut_body_after = "<details>" diff --git a/vendor/ammonia/examples/ammonia-cat.rs b/vendor/ammonia/examples/ammonia-cat.rs new file mode 100644 index 000000000..95a83807d --- /dev/null +++ b/vendor/ammonia/examples/ammonia-cat.rs @@ -0,0 +1,35 @@ +use ammonia::Builder; +use std::env; +use std::fs::File; +use std::io::{self, Read, Write}; +use std::process; + +fn run() -> io::Result<()> { + let input = env::args().nth(1).unwrap_or_else(|| String::from("-")); + let output = env::args().nth(2).unwrap_or_else(|| String::from("-")); + + let mut rdr: Box<dyn Read> = if input == "-" { + Box::new(io::stdin()) + } else { + Box::new(File::open(input)?) + }; + + let mut wrt: Box<dyn Write> = if output == "-" { + Box::new(io::stdout()) + } else { + Box::new(File::create(output)?) + }; + + Builder::new() + .clean_from_reader(&mut rdr)? + .write_to(&mut wrt)?; + Ok(()) +} + +fn main() { + env_logger::init(); + if let Err(ref e) = run() { + println!("error: {}", e); + process::exit(1); + } +} diff --git a/vendor/ammonia/src/lib.rs b/vendor/ammonia/src/lib.rs new file mode 100644 index 000000000..d80d66ac7 --- /dev/null +++ b/vendor/ammonia/src/lib.rs @@ -0,0 +1,3626 @@ +// Copyright (C) Michael Howell and others +// this library is released under the same terms as Rust itself. + +#![deny(unsafe_code)] +#![deny(missing_docs)] + +//! Ammonia is a whitelist-based HTML sanitization library. It is designed to +//! prevent cross-site scripting, layout breaking, and clickjacking caused +//! by untrusted user-provided HTML being mixed into a larger web page. +//! +//! Ammonia uses [html5ever] to parse and serialize document fragments the same way browsers do, +//! so it is extremely resilient to syntactic obfuscation. +//! +//! Ammonia parses its input exactly according to the HTML5 specification; +//! it will not linkify bare URLs, insert line or paragraph breaks, or convert `(C)` into ©. +//! If you want that, use a markup processor before running the sanitizer, like [pulldown-cmark]. +//! +//! # Examples +//! +//! ``` +//! let result = ammonia::clean( +//! "<b><img src='' onerror='alert(\\'hax\\')'>I'm not trying to XSS you</b>" +//! ); +//! assert_eq!(result, "<b><img src=\"\">I'm not trying to XSS you</b>"); +//! ``` +//! +//! [html5ever]: https://github.com/servo/html5ever "The HTML parser in Servo" +//! [pulldown-cmark]: https://github.com/google/pulldown-cmark "CommonMark parser" + + +#[cfg(ammonia_unstable)] +pub mod rcdom; + +#[cfg(not(ammonia_unstable))] +mod rcdom; + +use html5ever::interface::Attribute; +use html5ever::serialize::{serialize, SerializeOpts}; +use html5ever::tree_builder::{NodeOrText, TreeSink}; +use html5ever::{driver as html, local_name, namespace_url, ns, QualName}; +use maplit::{hashmap, hashset}; +use once_cell::sync::Lazy; +use rcdom::{Handle, NodeData, RcDom, SerializableHandle}; +use std::borrow::{Borrow, Cow}; +use std::cmp::max; +use std::collections::{HashMap, HashSet}; +use std::fmt; +use std::io; +use std::iter::IntoIterator as IntoIter; +use std::mem::replace; +use std::rc::Rc; +use std::str::FromStr; +use tendril::stream::TendrilSink; +use tendril::StrTendril; +use tendril::{format_tendril, ByteTendril}; +pub use url::Url; + +use html5ever::buffer_queue::BufferQueue; +use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer}; +pub use url; + +static AMMONIA: Lazy<Builder<'static>> = Lazy::new(|| Builder::default()); + +/// Clean HTML with a conservative set of defaults. +/// +/// * [tags](struct.Builder.html#defaults) +/// * [attributes on specific tags](struct.Builder.html#defaults-1) +/// * [attributes on all tags](struct.Builder.html#defaults-2) +/// * [url schemes](struct.Builder.html#defaults-3) +/// * [relative URLs are passed through, unchanged, by default](struct.Builder.html#defaults-4) +/// * [links are marked `noopener noreferrer` by default](struct.Builder.html#defaults-5) +/// * [all `class=""` settings are blocked by default](struct.Builder.html#defaults-6) +/// * [comments are stripped by default](struct.Builder.html#defaults-7) +/// +/// [opener]: https://mathiasbynens.github.io/rel-noopener/ +/// [referrer]: https://en.wikipedia.org/wiki/HTTP_referer +/// +/// # Examples +/// +/// assert_eq!(ammonia::clean("XSS<script>attack</script>"), "XSS") +pub fn clean(src: &str) -> String { + AMMONIA.clean(src).to_string() +} + +/// Turn an arbitrary string into unformatted HTML. +/// +/// This function is roughly equivalent to PHP's `htmlspecialchars` and `htmlentities`. +/// It is as strict as possible, encoding every character that has special meaning to the +/// HTML parser. +/// +/// # Warnings +/// +/// This function cannot be used to package strings into a `<script>` or `<style>` tag; +/// you need a JavaScript or CSS escaper to do that. +/// +/// // DO NOT DO THIS +/// # use ammonia::clean_text; +/// let untrusted = "Robert\"); abuse();//"; +/// let html = format!("<script>invoke(\"{}\")</script>", clean_text(untrusted)); +/// +/// `<textarea>` tags will strip the first newline, if present, even if that newline is encoded. +/// If you want to build an editor that works the way most folks expect them to, you should put a +/// newline at the beginning of the tag, like this: +/// +/// # use ammonia::{Builder, clean_text}; +/// let untrusted = "\n\nhi!"; +/// let mut b = Builder::new(); +/// b.add_tags(&["textarea"]); +/// // This is the bad version +/// // The user put two newlines at the beginning, but the first one was removed +/// let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted))).to_string(); +/// assert_eq!("<textarea>\nhi!</textarea>", sanitized); +/// // This is a good version +/// // The user put two newlines at the beginning, and we add a third one, +/// // so the result still has two +/// let sanitized = b.clean(&format!("<textarea>\n{}</textarea>", clean_text(untrusted))).to_string(); +/// assert_eq!("<textarea>\n\nhi!</textarea>", sanitized); +/// // This version is also often considered good +/// // For many applications, leading and trailing whitespace is probably unwanted +/// let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted.trim()))).to_string(); +/// assert_eq!("<textarea>hi!</textarea>", sanitized); +/// +/// It also does not make user text safe for HTML attribute microsyntaxes such as `class` or `id`. +/// Only use this function for places where HTML accepts unrestricted text such as `title` attributes +/// and paragraph contents. +pub fn clean_text(src: &str) -> String { + let mut ret_val = String::with_capacity(max(4, src.len())); + for c in src.chars() { + let replacement = match c { + // this character, when confronted, will start a tag + '<' => "<", + // in an unquoted attribute, will end the attribute value + '>' => ">", + // in an attribute surrounded by double quotes, this character will end the attribute value + '\"' => """, + // in an attribute surrounded by single quotes, this character will end the attribute value + '\'' => "'", + // in HTML5, returns a bogus parse error in an unquoted attribute, while in SGML/HTML, it will end an attribute value surrounded by backquotes + '`' => "`", + // in an unquoted attribute, this character will end the attribute + '/' => "/", + // starts an entity reference + '&' => "&", + // if at the beginning of an unquoted attribute, will get ignored + '=' => "=", + // will end an unquoted attribute + ' ' => " ", + '\t' => "	", + '\n' => " ", + '\x0c' => "", + '\r' => " ", + // a spec-compliant browser will perform this replacement anyway, but the middleware might not + '\0' => "�", + // ALL OTHER CHARACTERS ARE PASSED THROUGH VERBATIM + _ => { + ret_val.push(c); + continue; + } + }; + ret_val.push_str(replacement); + } + ret_val +} + +/// Determine if a given string contains HTML +/// +/// This function is parses the full string into HTML and checks if the input contained any +/// HTML syntax. +/// +/// # Note +/// This function will return positively for strings that contain invalid HTML syntax like +/// `<g>` and even `Vec::<u8>::new()`. +pub fn is_html(input: &str) -> bool { + let santok = SanitizationTokenizer::new(); + let mut chunk = ByteTendril::new(); + chunk.push_slice(input.as_bytes()); + let mut input = BufferQueue::new(); + input.push_back(chunk.try_reinterpret().unwrap()); + + let mut tok = Tokenizer::new(santok, Default::default()); + let _ = tok.feed(&mut input); + tok.end(); + tok.sink.was_sanitized +} + +#[derive(Copy, Clone)] +struct SanitizationTokenizer { + was_sanitized: bool, +} + +impl SanitizationTokenizer { + pub fn new() -> SanitizationTokenizer { + SanitizationTokenizer { + was_sanitized: false, + } + } +} + +impl TokenSink for SanitizationTokenizer { + type Handle = (); + fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> { + match token { + Token::CharacterTokens(_) | Token::EOFToken | Token::ParseError(_) => {} + _ => { + self.was_sanitized = true; + } + } + TokenSinkResult::Continue + } + fn end(&mut self) {} +} + +/// An HTML sanitizer. +/// +/// Given a fragment of HTML, Ammonia will parse it according to the HTML5 +/// parsing algorithm and sanitize any disallowed tags or attributes. This +/// algorithm also takes care of things like unclosed and (some) misnested +/// tags. +/// +/// # Examples +/// +/// use ammonia::{Builder, UrlRelative}; +/// +/// let a = Builder::default() +/// .link_rel(None) +/// .url_relative(UrlRelative::PassThrough) +/// .clean("<a href=/>test") +/// .to_string(); +/// assert_eq!( +/// a, +/// "<a href=\"/\">test</a>"); +/// +/// # Panics +/// +/// Running [`clean`] or [`clean_from_reader`] may cause a panic if the builder is +/// configured with any of these (contradictory) settings: +/// +/// * The `rel` attribute is added to [`generic_attributes`] or the +/// [`tag_attributes`] for the `<a>` tag, and [`link_rel`] is not set to `None`. +/// +/// For example, this is going to panic, since [`link_rel`] is set to +/// `Some("noopener noreferrer")` by default, +/// and it makes no sense to simultaneously say that the user is allowed to +/// set their own `rel` attribute while saying that every link shall be set to +/// a particular value: +/// +/// ```should_panic +/// use ammonia::Builder; +/// use maplit::hashset; +/// +/// # fn main() { +/// Builder::default() +/// .generic_attributes(hashset!["rel"]) +/// .clean(""); +/// # } +/// ``` +/// +/// This, however, is perfectly valid: +/// +/// ``` +/// use ammonia::Builder; +/// use maplit::hashset; +/// +/// # fn main() { +/// Builder::default() +/// .generic_attributes(hashset!["rel"]) +/// .link_rel(None) +/// .clean(""); +/// # } +/// ``` +/// +/// * The `class` attribute is in [`allowed_classes`] and is in the +/// corresponding [`tag_attributes`] or in [`generic_attributes`]. +/// +/// This is done both to line up with the treatment of `rel`, +/// and to prevent people from accidentally allowing arbitrary +/// classes on a particular element. +/// +/// This will panic: +/// +/// ```should_panic +/// use ammonia::Builder; +/// use maplit::{hashmap, hashset}; +/// +/// # fn main() { +/// Builder::default() +/// .generic_attributes(hashset!["class"]) +/// .allowed_classes(hashmap!["span" => hashset!["hidden"]]) +/// .clean(""); +/// # } +/// ``` +/// +/// This, however, is perfectly valid: +/// +/// ``` +/// use ammonia::Builder; +/// use maplit::{hashmap, hashset}; +/// +/// # fn main() { +/// Builder::default() +/// .allowed_classes(hashmap!["span" => hashset!["hidden"]]) +/// .clean(""); +/// # } +/// ``` +/// +/// * A tag is in either [`tags`] or [`tag_attributes`] while also +/// being in [`clean_content_tags`]. +/// +/// Both [`tags`] and [`tag_attributes`] are whitelists but +/// [`clean_content_tags`] is a blacklist, so it doesn't make sense +/// to have the same tag in both. +/// +/// For example, this will panic, since the `aside` tag is in +/// [`tags`] by default: +/// +/// ```should_panic +/// use ammonia::Builder; +/// use maplit::hashset; +/// +/// # fn main() { +/// Builder::default() +/// .clean_content_tags(hashset!["aside"]) +/// .clean(""); +/// # } +/// ``` +/// +/// This, however, is valid: +/// +/// ``` +/// use ammonia::Builder; +/// use maplit::hashset; +/// +/// # fn main() { +/// Builder::default() +/// .rm_tags(&["aside"]) +/// .clean_content_tags(hashset!["aside"]) +/// .clean(""); +/// # } +/// ``` +/// +/// [`clean`]: #method.clean +/// [`clean_from_reader`]: #method.clean_from_reader +/// [`generic_attributes`]: #method.generic_attributes +/// [`tag_attributes`]: #method.tag_attributes +/// [`generic_attributes`]: #method.generic_attributes +/// [`link_rel`]: #method.link_rel +/// [`allowed_classes`]: #method.allowed_classes +/// [`id_prefix`]: #method.id_prefix +/// [`tags`]: #method.tags +/// [`clean_content_tags`]: #method.clean_content_tags +#[derive(Debug)] +pub struct Builder<'a> { + tags: HashSet<&'a str>, + clean_content_tags: HashSet<&'a str>, + tag_attributes: HashMap<&'a str, HashSet<&'a str>>, + tag_attribute_values: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>, + set_tag_attribute_values: HashMap<&'a str, HashMap<&'a str, &'a str>>, + generic_attributes: HashSet<&'a str>, + url_schemes: HashSet<&'a str>, + url_relative: UrlRelative, + attribute_filter: Option<Box<dyn AttributeFilter>>, + link_rel: Option<&'a str>, + allowed_classes: HashMap<&'a str, HashSet<&'a str>>, + strip_comments: bool, + id_prefix: Option<&'a str>, + generic_attribute_prefixes: Option<HashSet<&'a str>>, +} + +impl<'a> Default for Builder<'a> { + fn default() -> Self { + #[cfg_attr(rustfmt, rustfmt_skip)] + let tags = hashset![ + "a", "abbr", "acronym", "area", "article", "aside", "b", "bdi", + "bdo", "blockquote", "br", "caption", "center", "cite", "code", + "col", "colgroup", "data", "dd", "del", "details", "dfn", "div", + "dl", "dt", "em", "figcaption", "figure", "footer", "h1", "h2", + "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "i", "img", + "ins", "kbd", "kbd", "li", "map", "mark", "nav", "ol", "p", "pre", + "q", "rp", "rt", "rtc", "ruby", "s", "samp", "small", "span", + "strike", "strong", "sub", "summary", "sup", "table", "tbody", + "td", "th", "thead", "time", "tr", "tt", "u", "ul", "var", "wbr" + ]; + let clean_content_tags = hashset!["script", "style"]; + let generic_attributes = hashset!["lang", "title"]; + let tag_attributes = hashmap![ + "a" => hashset![ + "href", "hreflang" + ], + "bdo" => hashset![ + "dir" + ], + "blockquote" => hashset![ + "cite" + ], + "col" => hashset![ + "align", "char", "charoff", "span" + ], + "colgroup" => hashset![ + "align", "char", "charoff", "span" + ], + "del" => hashset![ + "cite", "datetime" + ], + "hr" => hashset![ + "align", "size", "width" + ], + "img" => hashset![ + "align", "alt", "height", "src", "width" + ], + "ins" => hashset![ + "cite", "datetime" + ], + "ol" => hashset![ + "start" + ], + "q" => hashset![ + "cite" + ], + "table" => hashset![ + "align", "char", "charoff", "summary" + ], + "tbody" => hashset![ + "align", "char", "charoff" + ], + "td" => hashset![ + "align", "char", "charoff", "colspan", "headers", "rowspan" + ], + "tfoot" => hashset![ + "align", "char", "charoff" + ], + "th" => hashset![ + "align", "char", "charoff", "colspan", "headers", "rowspan", "scope" + ], + "thead" => hashset![ + "align", "char", "charoff" + ], + "tr" => hashset![ + "align", "char", "charoff" + ], + ]; + let tag_attribute_values = hashmap![]; + let set_tag_attribute_values = hashmap![]; + let url_schemes = hashset![ + "bitcoin", + "ftp", + "ftps", + "geo", + "http", + "https", + "im", + "irc", + "ircs", + "magnet", + "mailto", + "mms", + "mx", + "news", + "nntp", + "openpgp4fpr", + "sip", + "sms", + "smsto", + "ssh", + "tel", + "url", + "webcal", + "wtai", + "xmpp" + ]; + let allowed_classes = hashmap![]; + + Builder { + tags, + clean_content_tags, + tag_attributes, + tag_attribute_values, + set_tag_attribute_values, + generic_attributes, + url_schemes, + url_relative: UrlRelative::PassThrough, + attribute_filter: None, + link_rel: Some("noopener noreferrer"), + allowed_classes, + strip_comments: true, + id_prefix: None, + generic_attribute_prefixes: None, + } + } +} + +impl<'a> Builder<'a> { + /// Sets the tags that are allowed. + /// + /// # Examples + /// + /// use ammonia::Builder; + /// use maplit::hashset; + /// + /// # fn main() { + /// let tags = hashset!["my-tag"]; + /// let a = Builder::new() + /// .tags(tags) + /// .clean("<my-tag>") + /// .to_string(); + /// assert_eq!(a, "<my-tag></my-tag>"); + /// # } + /// + /// # Defaults + /// + /// ```notest + /// a, abbr, acronym, area, article, aside, b, bdi, + /// bdo, blockquote, br, caption, center, cite, code, + /// col, colgroup, data, dd, del, details, dfn, div, + /// dl, dt, em, figcaption, figure, footer, h1, h2, + /// h3, h4, h5, h6, header, hgroup, hr, i, img, + /// ins, kbd, kbd, li, map, mark, nav, ol, p, pre, + /// q, rp, rt, rtc, ruby, s, samp, small, span, + /// strike, strong, sub, summary, sup, table, tbody, + /// td, th, thead, time, tr, tt, u, ul, var, wbr + /// ``` + pub fn tags(&mut self, value: HashSet<&'a str>) -> &mut Self { + self.tags = value; + self + } + + /// Add additonal whitelisted tags without overwriting old ones. + /// + /// Does nothing if the tag is already there. + /// + /// # Examples + /// + /// let a = ammonia::Builder::default() + /// .add_tags(&["my-tag"]) + /// .clean("<my-tag>test</my-tag> <span>mess</span>").to_string(); + /// assert_eq!("<my-tag>test</my-tag> <span>mess</span>", a); + pub fn add_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>( + &mut self, + it: I, + ) -> &mut Self { + self.tags.extend(it.into_iter().map(Borrow::borrow)); + self + } + + /// Remove already-whitelisted tags. + /// + /// Does nothing if the tags is already gone. + /// + /// # Examples + /// + /// let a = ammonia::Builder::default() + /// .rm_tags(&["span"]) + /// .clean("<span></span>").to_string(); + /// assert_eq!("", a); + pub fn rm_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>( + &mut self, + it: I, + ) -> &mut Self { + for i in it { + self.tags.remove(i.borrow()); + } + self + } + + /// Returns a copy of the set of whitelisted tags. + /// + /// # Examples + /// + /// use maplit::hashset; + /// + /// let tags = hashset!["my-tag-1", "my-tag-2"]; + /// + /// let mut b = ammonia::Builder::default(); + /// b.tags(Clone::clone(&tags)); + /// assert_eq!(tags, b.clone_tags()); + pub fn clone_tags(&self) -> HashSet<&'a str> { + self.tags.clone() + } + + /// Sets the tags whose contents will be completely removed from the output. + /// + /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause + /// a panic. + /// + /// # Examples + /// + /// use ammonia::Builder; + /// use maplit::hashset; + /// + /// # fn main() { + /// let tag_blacklist = hashset!["script", "style"]; + /// let a = Builder::new() + /// .clean_content_tags(tag_blacklist) + /// .clean("<script>alert('hello')</script><style>a { background: #fff }</style>") + /// .to_string(); + /// assert_eq!(a, ""); + /// # } + /// + /// # Defaults + /// + /// No tags have content removed by default. + pub fn clean_content_tags(&mut self, value: HashSet<&'a str>) -> &mut Self { + self.clean_content_tags = value; + self + } + + /// Add additonal blacklisted clean-content tags without overwriting old ones. + /// + /// Does nothing if the tag is already there. + /// + /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause + /// a panic. + /// + /// # Examples + /// + /// let a = ammonia::Builder::default() + /// .add_clean_content_tags(&["my-tag"]) + /// .clean("<my-tag>test</my-tag><span>mess</span>").to_string(); + /// assert_eq!("<span>mess</span>", a); + pub fn add_clean_content_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>( + &mut self, + it: I, + ) -> &mut Self { + self.clean_content_tags + .extend(it.into_iter().map(Borrow::borrow)); + self + } + + /// Remove already-blacklisted clean-content tags. + /// + /// Does nothing if the tags aren't blacklisted. + /// + /// # Examples + /// use ammonia::Builder; + /// use maplit::hashset; + /// + /// # fn main() { + /// let tag_blacklist = hashset!["script"]; + /// let a = ammonia::Builder::default() + /// .clean_content_tags(tag_blacklist) + /// .rm_clean_content_tags(&["script"]) + /// .clean("<script>XSS</script>").to_string(); + /// assert_eq!("XSS", a); + /// # } + pub fn rm_clean_content_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>( + &mut self, + it: I, + ) -> &mut Self { + for i in it { + self.clean_content_tags.remove(i.borrow()); + } + self + } + + /// Returns a copy of the set of blacklisted clean-content tags. + /// + /// # Examples + /// # use maplit::hashset; + /// + /// let tags = hashset!["my-tag-1", "my-tag-2"]; + /// + /// let mut b = ammonia::Builder::default(); + /// b.clean_content_tags(Clone::clone(&tags)); + /// assert_eq!(tags, b.clone_clean_content_tags()); + pub fn clone_clean_content_tags(&self) -> HashSet<&'a str> { + self.clean_content_tags.clone() + } + + /// Sets the HTML attributes that are allowed on specific tags. + /// + /// The value is structured as a map from tag names to a set of attribute names. + /// + /// If a tag is not itself whitelisted, adding entries to this map will do nothing. + /// + /// # Examples + /// + /// use ammonia::Builder; + /// use maplit::{hashmap, hashset}; + /// + /// # fn main() { + /// let tags = hashset!["my-tag"]; + /// let tag_attributes = hashmap![ + /// "my-tag" => hashset!["val"] + /// ]; + /// let a = Builder::new().tags(tags).tag_attributes(tag_attributes) + /// .clean("<my-tag val=1>") + /// .to_string(); + /// assert_eq!(a, "<my-tag val=\"1\"></my-tag>"); + /// # } + /// + /// # Defaults + /// + /// ```notest + /// a => + /// href, hreflang + /// bdo => + /// dir + /// blockquote => + /// cite + /// col => + /// align, char, charoff, span + /// colgroup => + /// align, char, charoff, span + /// del => + /// cite, datetime + /// hr => + /// align, size, width + /// img => + /// align, alt, height, src, width + /// ins => + /// cite, datetime + /// ol => + /// start + /// q => + /// cite + /// table => + /// align, char, charoff, summary + /// tbody => + /// align, char, charoff + /// td => + /// align, char, charoff, colspan, headers, rowspan + /// tfoot => + /// align, char, charoff + /// th => + /// align, char, charoff, colspan, headers, rowspan, scope + /// thead => + /// align, char, charoff + /// tr => + /// align, char, charoff + /// ``` + pub fn tag_attributes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self { + self.tag_attributes = value; + self + } + + /// Add additonal whitelisted tag-specific attributes without overwriting old ones. + /// + /// # Examples + /// + /// let a = ammonia::Builder::default() + /// .add_tags(&["my-tag"]) + /// .add_tag_attributes("my-tag", &["my-attr"]) + /// .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string(); + /// assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a); + pub fn add_tag_attributes< + T: 'a + ?Sized + Borrow<str>, + U: 'a + ?Sized + Borrow<str>, + I: IntoIter<Item = &'a T>, + >( + &mut self, + tag: &'a U, + it: I, + ) -> &mut Self { + self.tag_attributes + .entry(tag.borrow()) + .or_insert_with(|| HashSet::new()) + .extend(it.into_iter().map(Borrow::borrow)); + self + } + + /// Remove already-whitelisted tag-specific attributes. + /// + /// Does nothing if the attribute is already gone. + /// + /// # Examples + /// + /// let a = ammonia::Builder::default() + /// .rm_tag_attributes("a", &["href"]) + /// .clean("<a href=\"/\"></a>").to_string(); + /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a); + pub fn rm_tag_attributes< + 'b, + 'c, + T: 'b + ?Sized + Borrow<str>, + U: 'c + ?Sized + Borrow<str>, + I: IntoIter<Item = &'b T>, + >( + &mut self, + tag: &'c U, + it: I, + ) -> &mut Self { + if let Some(tag) = self.tag_attributes.get_mut(tag.borrow()) { + for i in it { + tag.remove(i.borrow()); + } + } + self + } + + /// Returns a copy of the set of whitelisted tag-specific attributes. + /// + /// # Examples + /// use maplit::{hashmap, hashset}; + /// + /// let tag_attributes = hashmap![ + /// "my-tag" => hashset!["my-attr-1", "my-attr-2"] + /// ]; + /// + /// let mut b = ammonia::Builder::default(); + /// b.tag_attributes(Clone::clone(&tag_attributes)); + /// assert_eq!(tag_attributes, b.clone_tag_attributes()); + pub fn clone_tag_attributes(&self) -> HashMap<&'a str, HashSet<&'a str>> { + self.tag_attributes.clone() + } + + /// Sets the values of HTML attributes that are allowed on specific tags. + /// + /// The value is structured as a map from tag names to a map from attribute names to a set of + /// attribute values. + /// + /// If a tag is not itself whitelisted, adding entries to this map will do nothing. + /// + /// # Examples + /// + /// use ammonia::Builder; + /// use maplit::{hashmap, hashset}; + /// + /// # fn main() { + /// let tags = hashset!["my-tag"]; + /// let tag_attribute_values = hashmap![ + /// "my-tag" => hashmap![ + /// "my-attr" => hashset!["val"], + /// ], + /// ]; + /// let a = Builder::new().tags(tags).tag_attribute_values(tag_attribute_values) + /// .clean("<my-tag my-attr=val>") + /// .to_string(); + /// assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>"); + /// # } + /// + /// # Defaults + /// + /// None. + pub fn tag_attribute_values( + &mut self, + value: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>, + ) -> &mut Self { + self.tag_attribute_values = value; + self + } + + /// Add additonal whitelisted tag-specific attribute values without overwriting old ones. + /// + /// # Examples + /// + /// let a = ammonia::Builder::default() + /// .add_tags(&["my-tag"]) + /// .add_tag_attribute_values("my-tag", "my-attr", &[""]) + /// .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string(); + /// assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a); + pub fn add_tag_attribute_values< + T: 'a + ?Sized + Borrow<str>, + U: 'a + ?Sized + Borrow<str>, + V: 'a + ?Sized + Borrow<str>, + I: IntoIter<Item = &'a T>, + >( + &mut self, + tag: &'a U, + attribute: &'a V, + it: I, + ) -> &mut Self { + self.tag_attribute_values + .entry(tag.borrow()) + .or_insert_with(HashMap::new) + .entry(attribute.borrow()) + .or_insert_with(HashSet::new) + .extend(it.into_iter().map(Borrow::borrow)); + + self + } + + /// Remove already-whitelisted tag-specific attribute values. + /// + /// Does nothing if the attribute or the value is already gone. + /// + /// # Examples + /// + /// let a = ammonia::Builder::default() + /// .rm_tag_attributes("a", &["href"]) + /// .add_tag_attribute_values("a", "href", &["/"]) + /// .rm_tag_attribute_values("a", "href", &["/"]) + /// .clean("<a href=\"/\"></a>").to_string(); + /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a); + pub fn rm_tag_attribute_values< + 'b, + 'c, + T: 'b + ?Sized + Borrow<str>, + U: 'c + ?Sized + Borrow<str>, + V: 'c + ?Sized + Borrow<str>, + I: IntoIter<Item = &'b T>, + >( + &mut self, + tag: &'c U, + attribute: &'c V, + it: I, + ) -> &mut Self { + if let Some(attrs) = self + .tag_attribute_values + .get_mut(tag.borrow()) + .and_then(|map| map.get_mut(attribute.borrow())) + { + for i in it { + attrs.remove(i.borrow()); + } + } + self + } + + /// Returns a copy of the set of whitelisted tag-specific attribute values. + /// + /// # Examples + /// + /// use maplit::{hashmap, hashset}; + /// + /// let attribute_values = hashmap![ + /// "my-attr-1" => hashset!["foo"], + /// "my-attr-2" => hashset!["baz", "bar"], + /// ]; + /// let tag_attribute_values = hashmap![ + /// "my-tag" => attribute_values + /// ]; + /// + /// let mut b = ammonia::Builder::default(); + /// b.tag_attribute_values(Clone::clone(&tag_attribute_values)); + /// assert_eq!(tag_attribute_values, b.clone_tag_attribute_values()); + pub fn clone_tag_attribute_values( + &self, + ) -> HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>> { + self.tag_attribute_values.clone() + } + + /// Sets the values of HTML attributes that are to be set on specific tags. + /// + /// The value is structured as a map from tag names to a map from attribute names to an + /// attribute value. + /// + /// If a tag is not itself whitelisted, adding entries to this map will do nothing. + /// + /// # Examples + /// + /// use ammonia::Builder; + /// use maplit::{hashmap, hashset}; + /// + /// # fn main() { + /// let tags = hashset!["my-tag"]; + /// let set_tag_attribute_values = hashmap![ + /// "my-tag" => hashmap![ + /// "my-attr" => "val", + /// ], + /// ]; + /// let a = Builder::new().tags(tags).set_tag_attribute_values(set_tag_attribute_values) + /// .clean("<my-tag>") + /// .to_string(); + /// assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>"); + /// # } + /// + /// # Defaults + /// + /// None. + pub fn set_tag_attribute_values( + &mut self, + value: HashMap<&'a str, HashMap<&'a str, &'a str>>, + ) -> &mut Self { + self.set_tag_attribute_values = value; + self + } + + /// Add an attribute value to set on a specific element. + /// + /// # Examples + /// + /// let a = ammonia::Builder::default() + /// .add_tags(&["my-tag"]) + /// .set_tag_attribute_value("my-tag", "my-attr", "val") + /// .clean("<my-tag>test</my-tag> <span>mess</span>").to_string(); + /// assert_eq!("<my-tag my-attr=\"val\">test</my-tag> <span>mess</span>", a); + pub fn set_tag_attribute_value< + T: 'a + ?Sized + Borrow<str>, + A: 'a + ?Sized + Borrow<str>, + V: 'a + ?Sized + Borrow<str>, + >( + &mut self, + tag: &'a T, + attribute: &'a A, + value: &'a V, + ) -> &mut Self { + self.set_tag_attribute_values + .entry(tag.borrow()) + .or_insert_with(HashMap::new) + .insert(attribute.borrow(), value.borrow()); + self + } + + /// Remove existing tag-specific attribute values to be set. + /// + /// Does nothing if the attribute is already gone. + /// + /// # Examples + /// + /// let a = ammonia::Builder::default() + /// // this does nothing, since no value is set for this tag attribute yet + /// .rm_set_tag_attribute_value("a", "target") + /// .set_tag_attribute_value("a", "target", "_blank") + /// .rm_set_tag_attribute_value("a", "target") + /// .clean("<a href=\"/\"></a>").to_string(); + /// assert_eq!("<a href=\"/\" rel=\"noopener noreferrer\"></a>", a); + pub fn rm_set_tag_attribute_value< + T: 'a + ?Sized + Borrow<str>, + A: 'a + ?Sized + Borrow<str>, + >( + &mut self, + tag: &'a T, + attribute: &'a A, + ) -> &mut Self { + if let Some(attributes) = self.set_tag_attribute_values.get_mut(tag.borrow()) { + attributes.remove(attribute.borrow()); + } + self + } + + /// Returns the value that will be set for the attribute on the element, if any. + /// + /// # Examples + /// + /// let mut b = ammonia::Builder::default(); + /// b.set_tag_attribute_value("a", "target", "_blank"); + /// let value = b.get_set_tag_attribute_value("a", "target"); + /// assert_eq!(value, Some("_blank")); + pub fn get_set_tag_attribute_value< + T: 'a + ?Sized + Borrow<str>, + A: 'a + ?Sized + Borrow<str>, + >( + &self, + tag: &'a T, + attribute: &'a A, + ) -> Option<&'a str> { + self.set_tag_attribute_values + .get(tag.borrow()) + .and_then(|map| map.get(attribute.borrow())) + .copied() + } + + /// Returns a copy of the set of tag-specific attribute values to be set. + /// + /// # Examples + /// + /// use maplit::{hashmap, hashset}; + /// + /// let attribute_values = hashmap![ + /// "my-attr-1" => "foo", + /// "my-attr-2" => "bar", + /// ]; + /// let set_tag_attribute_values = hashmap![ + /// "my-tag" => attribute_values, + /// ]; + /// + /// let mut b = ammonia::Builder::default(); + /// b.set_tag_attribute_values(Clone::clone(&set_tag_attribute_values)); + /// assert_eq!(set_tag_attribute_values, b.clone_set_tag_attribute_values()); + pub fn clone_set_tag_attribute_values(&self) -> HashMap<&'a str, HashMap<&'a str, &'a str>> { + self.set_tag_attribute_values.clone() + } + + /// Sets the prefix of attributes that are allowed on any tag. + /// + /// # Examples + /// + /// use ammonia::Builder; + /// use maplit::hashset; + /// + /// # fn main() { + /// let prefixes = hashset!["data-"]; + /// let a = Builder::new() + /// .generic_attribute_prefixes(prefixes) + /// .clean("<b data-val=1>") + /// .to_string(); + /// assert_eq!(a, "<b data-val=\"1\"></b>"); + /// # } + /// + /// # Defaults + /// + /// ```notest + /// lang, title + /// ``` + pub fn generic_attribute_prefixes(&mut self, value: HashSet<&'a str>) -> &mut Self { + self.generic_attribute_prefixes = Some(value); + self + } + + /// Add additional whitelisted attribute prefix without overwriting old ones. + /// + /// # Examples + /// + /// let a = ammonia::Builder::default() + /// .add_generic_attribute_prefixes(&["my-"]) + /// .clean("<span my-attr>mess</span>").to_string(); + /// assert_eq!("<span my-attr=\"\">mess</span>", a); + pub fn add_generic_attribute_prefixes< + T: 'a + ?Sized + Borrow<str>, + I: IntoIter<Item = &'a T>, + >( + &mut self, + it: I, + ) -> &mut Self { + self.generic_attribute_prefixes + .get_or_insert_with(HashSet::new) + .extend(it.into_iter().map(Borrow::borrow)); + self + } + + /// Remove already-whitelisted attribute prefixes. + /// + /// Does nothing if the attribute prefix is already gone. + /// + /// # Examples + /// + /// let a = ammonia::Builder::default() + /// .add_generic_attribute_prefixes(&["data-", "code-"]) + /// .rm_generic_attribute_prefixes(&["data-"]) + /// .clean("<span code-test=\"foo\" data-test=\"cool\"></span>").to_string(); + /// assert_eq!("<span code-test=\"foo\"></span>", a); + pub fn rm_generic_attribute_prefixes< + 'b, + T: 'b + ?Sized + Borrow<str>, + I: IntoIter<Item = &'b T>, + >( + &mut self, + it: I, + ) -> &mut Self { + if let Some(true) = self.generic_attribute_prefixes.as_mut().map(|prefixes| { + for i in it { + let _ = prefixes.remove(i.borrow()); + } + prefixes.is_empty() + }) { + self.generic_attribute_prefixes = None; + } + self + } + + /// Returns a copy of the set of whitelisted attribute prefixes. + /// + /// # Examples + /// + /// use maplit::hashset; + /// + /// let generic_attribute_prefixes = hashset!["my-prfx-1-", "my-prfx-2-"]; + /// + /// let mut b = ammonia::Builder::default(); + /// b.generic_attribute_prefixes(Clone::clone(&generic_attribute_prefixes)); + /// assert_eq!(Some(generic_attribute_prefixes), b.clone_generic_attribute_prefixes()); + pub fn clone_generic_attribute_prefixes(&self) -> Option<HashSet<&'a str>> { + self.generic_attribute_prefixes.clone() + } + + /// Sets the attributes that are allowed on any tag. + /// + /// # Examples + /// + /// use ammonia::Builder; + /// use maplit::hashset; + /// + /// # fn main() { + /// let attributes = hashset!["data-val"]; + /// let a = Builder::new() + /// .generic_attributes(attributes) + /// .clean("<b data-val=1>") + /// .to_string(); + /// assert_eq!(a, "<b data-val=\"1\"></b>"); + /// # } + /// + /// # Defaults + /// + /// ```notest + /// lang, title + /// ``` + pub fn generic_attributes(&mut self, value: HashSet<&'a str>) -> &mut Self { + self.generic_attributes = value; + self + } + + /// Add additonal whitelisted attributes without overwriting old ones. + /// + /// # Examples + /// + /// let a = ammonia::Builder::default() + /// .add_generic_attributes(&["my-attr"]) + /// .clean("<span my-attr>mess</span>").to_string(); + /// assert_eq!("<span my-attr=\"\">mess</span>", a); + pub fn add_generic_attributes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>( + &mut self, + it: I, + ) -> &mut Self { + self.generic_attributes + .extend(it.into_iter().map(Borrow::borrow)); + self + } + + /// Remove already-whitelisted attributes. + /// + /// Does nothing if the attribute is already gone. + /// + /// # Examples + /// + /// let a = ammonia::Builder::default() + /// .rm_generic_attributes(&["title"]) + /// .clean("<span title=\"cool\"></span>").to_string(); + /// assert_eq!("<span></span>", a); + pub fn rm_generic_attributes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>( + &mut self, + it: I, + ) -> &mut Self { + for i in it { + self.generic_attributes.remove(i.borrow()); + } + self + } + + /// Returns a copy of the set of whitelisted attributes. + /// + /// # Examples + /// + /// use maplit::hashset; + /// + /// let generic_attributes = hashset!["my-attr-1", "my-attr-2"]; + /// + /// let mut b = ammonia::Builder::default(); + /// b.generic_attributes(Clone::clone(&generic_attributes)); + /// assert_eq!(generic_attributes, b.clone_generic_attributes()); + pub fn clone_generic_attributes(&self) -> HashSet<&'a str> { + self.generic_attributes.clone() + } + + /// Sets the URL schemes permitted on `href` and `src` attributes. + /// + /// # Examples + /// + /// use ammonia::Builder; + /// use maplit::hashset; + /// + /// # fn main() { + /// let url_schemes = hashset![ + /// "http", "https", "mailto", "magnet" + /// ]; + /// let a = Builder::new().url_schemes(url_schemes) + /// .clean("<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&xl=0&dn=zero_len.fil&xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\">zero-length file</a>") + /// .to_string(); + /// + /// // See `link_rel` for information on the rel="noopener noreferrer" attribute + /// // in the cleaned HTML. + /// assert_eq!(a, + /// "<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&xl=0&dn=zero_len.fil&xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\" rel=\"noopener noreferrer\">zero-length file</a>"); + /// # } + /// + /// # Defaults + /// + /// ```notest + /// bitcoin, ftp, ftps, geo, http, https, im, irc, + /// ircs, magnet, mailto, mms, mx, news, nntp, + /// openpgp4fpr, sip, sms, smsto, ssh, tel, url, + /// webcal, wtai, xmpp + /// ``` + pub fn url_schemes(&mut self, value: HashSet<&'a str>) -> &mut Self { + self.url_schemes = value; + self + } + + /// Add additonal whitelisted URL schemes without overwriting old ones. + /// + /// # Examples + /// + /// let a = ammonia::Builder::default() + /// .add_url_schemes(&["my-scheme"]) + /// .clean("<a href=my-scheme:home>mess</span>").to_string(); + /// assert_eq!("<a href=\"my-scheme:home\" rel=\"noopener noreferrer\">mess</a>", a); + pub fn add_url_schemes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>( + &mut self, + it: I, + ) -> &mut Self { + self.url_schemes.extend(it.into_iter().map(Borrow::borrow)); + self + } + + /// Remove already-whitelisted attributes. + /// + /// Does nothing if the attribute is already gone. + /// + /// # Examples + /// + /// let a = ammonia::Builder::default() + /// .rm_url_schemes(&["ftp"]) + /// .clean("<a href=\"ftp://ftp.mozilla.org/\"></a>").to_string(); + /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a); + pub fn rm_url_schemes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>( + &mut self, + it: I, + ) -> &mut Self { + for i in it { + self.url_schemes.remove(i.borrow()); + } + self + } + + /// Returns a copy of the set of whitelisted URL schemes. + /// + /// # Examples + /// use maplit::hashset; + /// + /// let url_schemes = hashset!["my-scheme-1", "my-scheme-2"]; + /// + /// let mut b = ammonia::Builder::default(); + /// b.url_schemes(Clone::clone(&url_schemes)); + /// assert_eq!(url_schemes, b.clone_url_schemes()); + pub fn clone_url_schemes(&self) -> HashSet<&'a str> { + self.url_schemes.clone() + } + + /// Configures the behavior for relative URLs: pass-through, resolve-with-base, or deny. + /// + /// # Examples + /// + /// use ammonia::{Builder, UrlRelative}; + /// + /// let a = Builder::new().url_relative(UrlRelative::PassThrough) + /// .clean("<a href=/>Home</a>") + /// .to_string(); + /// + /// // See `link_rel` for information on the rel="noopener noreferrer" attribute + /// // in the cleaned HTML. + /// assert_eq!( + /// a, + /// "<a href=\"/\" rel=\"noopener noreferrer\">Home</a>"); + /// + /// # Defaults + /// + /// ```notest + /// UrlRelative::PassThrough + /// ``` + pub fn url_relative(&mut self, value: UrlRelative) -> &mut Self { + self.url_relative = value; + self + } + + /// Allows rewriting of all attributes using a callback. + /// + /// The callback takes name of the element, attribute and its value. + /// Returns `None` to remove the attribute, or a value to use. + /// + /// Rewriting of attributes with URLs is done before `url_relative()`. + /// + /// # Panics + /// + /// If more than one callback is set. + /// + /// # Examples + /// + /// ```rust + /// use ammonia::Builder; + /// let a = Builder::new() + /// .attribute_filter(|element, attribute, value| { + /// match (element, attribute) { + /// ("img", "src") => None, + /// _ => Some(value.into()) + /// } + /// }) + /// .link_rel(None) + /// .clean("<a href=/><img alt=Home src=foo></a>") + /// .to_string(); + /// assert_eq!(a, + /// r#"<a href="/"><img alt="Home"></a>"#); + /// ``` + pub fn attribute_filter<'cb, CallbackFn>(&mut self, callback: CallbackFn) -> &mut Self + where + CallbackFn: for<'u> Fn(&str, &str, &'u str) -> Option<Cow<'u, str>> + Send + Sync + 'static, + { + assert!( + self.attribute_filter.is_none(), + "attribute_filter can be set only once" + ); + self.attribute_filter = Some(Box::new(callback)); + self + } + + /// Returns `true` if the relative URL resolver is set to `Deny`. + /// + /// # Examples + /// + /// use ammonia::{Builder, UrlRelative}; + /// let mut a = Builder::default(); + /// a.url_relative(UrlRelative::Deny); + /// assert!(a.is_url_relative_deny()); + /// a.url_relative(UrlRelative::PassThrough); + /// assert!(!a.is_url_relative_deny()); + pub fn is_url_relative_deny(&self) -> bool { + matches!(self.url_relative, UrlRelative::Deny) + } + + /// Returns `true` if the relative URL resolver is set to `PassThrough`. + /// + /// # Examples + /// + /// use ammonia::{Builder, UrlRelative}; + /// let mut a = Builder::default(); + /// a.url_relative(UrlRelative::Deny); + /// assert!(!a.is_url_relative_pass_through()); + /// a.url_relative(UrlRelative::PassThrough); + /// assert!(a.is_url_relative_pass_through()); + pub fn is_url_relative_pass_through(&self) -> bool { + matches!(self.url_relative, UrlRelative::PassThrough) + } + + /// Returns `true` if the relative URL resolver is set to `Custom`. + /// + /// # Examples + /// + /// use ammonia::{Builder, UrlRelative}; + /// use std::borrow::Cow; + /// fn test(a: &str) -> Option<Cow<str>> { None } + /// # fn main() { + /// let mut a = Builder::default(); + /// a.url_relative(UrlRelative::Custom(Box::new(test))); + /// assert!(a.is_url_relative_custom()); + /// a.url_relative(UrlRelative::PassThrough); + /// assert!(!a.is_url_relative_custom()); + /// a.url_relative(UrlRelative::Deny); + /// assert!(!a.is_url_relative_custom()); + /// # } + pub fn is_url_relative_custom(&self) -> bool { + matches!(self.url_relative, UrlRelative::Custom(_)) + } + + /// Configures a `rel` attribute that will be added on links. + /// + /// If `rel` is in the generic or tag attributes, this must be set to `None`. + /// Common `rel` values to include: + /// + /// * `noopener`: This prevents [a particular type of XSS attack], + /// and should usually be turned on for untrusted HTML. + /// * `noreferrer`: This prevents the browser from [sending the source URL] + /// to the website that is linked to. + /// * `nofollow`: This prevents search engines from [using this link for + /// ranking], which disincentivizes spammers. + /// + /// To turn on rel-insertion, call this function with a space-separated list. + /// Ammonia does not parse rel-attributes; + /// it just puts the given string into the attribute directly. + /// + /// [a particular type of XSS attack]: https://mathiasbynens.github.io/rel-noopener/ + /// [sending the source URL]: https://en.wikipedia.org/wiki/HTTP_referer + /// [using this link for ranking]: https://en.wikipedia.org/wiki/Nofollow + /// + /// # Examples + /// + /// use ammonia::Builder; + /// + /// let a = Builder::new().link_rel(None) + /// .clean("<a href=https://rust-lang.org/>Rust</a>") + /// .to_string(); + /// assert_eq!( + /// a, + /// "<a href=\"https://rust-lang.org/\">Rust</a>"); + /// + /// # Defaults + /// + /// ```notest + /// Some("noopener noreferrer") + /// ``` + pub fn link_rel(&mut self, value: Option<&'a str>) -> &mut Self { + self.link_rel = value; + self + } + + /// Returns the settings for links' `rel` attribute, if one is set. + /// + /// # Examples + /// + /// use ammonia::{Builder, UrlRelative}; + /// let mut a = Builder::default(); + /// a.link_rel(Some("a b")); + /// assert_eq!(a.get_link_rel(), Some("a b")); + pub fn get_link_rel(&self) -> Option<&str> { + self.link_rel.clone() + } + + /// Sets the CSS classes that are allowed on specific tags. + /// + /// The values is structured as a map from tag names to a set of class names. + /// + /// If the `class` attribute is itself whitelisted for a tag, then adding entries to + /// this map will cause a panic. + /// + /// # Examples + /// + /// use ammonia::Builder; + /// use maplit::{hashmap, hashset}; + /// + /// # fn main() { + /// let allowed_classes = hashmap![ + /// "code" => hashset!["rs", "ex", "c", "cxx", "js"] + /// ]; + /// let a = Builder::new() + /// .allowed_classes(allowed_classes) + /// .clean("<code class=rs>fn main() {}</code>") + /// .to_string(); + /// assert_eq!( + /// a, + /// "<code class=\"rs\">fn main() {}</code>"); + /// # } + /// + /// # Defaults + /// + /// The set of allowed classes is empty by default. + pub fn allowed_classes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self { + self.allowed_classes = value; + self + } + + /// Add additonal whitelisted classes without overwriting old ones. + /// + /// # Examples + /// + /// let a = ammonia::Builder::default() + /// .add_allowed_classes("a", &["onebox"]) + /// .clean("<a href=/ class=onebox>mess</span>").to_string(); + /// assert_eq!("<a href=\"/\" class=\"onebox\" rel=\"noopener noreferrer\">mess</a>", a); + pub fn add_allowed_classes< + T: 'a + ?Sized + Borrow<str>, + U: 'a + ?Sized + Borrow<str>, + I: IntoIter<Item = &'a T>, + >( + &mut self, + tag: &'a U, + it: I, + ) -> &mut Self { + self.allowed_classes + .entry(tag.borrow()) + .or_insert_with(|| HashSet::new()) + .extend(it.into_iter().map(Borrow::borrow)); + self + } + + /// Remove already-whitelisted attributes. + /// + /// Does nothing if the attribute is already gone. + /// + /// # Examples + /// + /// let a = ammonia::Builder::default() + /// .add_allowed_classes("span", &["active"]) + /// .rm_allowed_classes("span", &["active"]) + /// .clean("<span class=active>").to_string(); + /// assert_eq!("<span class=\"\"></span>", a); + pub fn rm_allowed_classes< + 'b, + 'c, + T: 'b + ?Sized + Borrow<str>, + U: 'c + ?Sized + Borrow<str>, + I: IntoIter<Item = &'b T>, + >( + &mut self, + tag: &'c U, + it: I, + ) -> &mut Self { + if let Some(tag) = self.allowed_classes.get_mut(tag.borrow()) { + for i in it { + tag.remove(i.borrow()); + } + } + self + } + + /// Returns a copy of the set of whitelisted class attributes. + /// + /// # Examples + /// + /// use maplit::{hashmap, hashset}; + /// + /// let allowed_classes = hashmap![ + /// "my-tag" => hashset!["my-class-1", "my-class-2"] + /// ]; + /// + /// let mut b = ammonia::Builder::default(); + /// b.allowed_classes(Clone::clone(&allowed_classes)); + /// assert_eq!(allowed_classes, b.clone_allowed_classes()); + pub fn clone_allowed_classes(&self) -> HashMap<&'a str, HashSet<&'a str>> { + self.allowed_classes.clone() + } + + /// Configures the handling of HTML comments. + /// + /// If this option is false, comments will be preserved. + /// + /// # Examples + /// + /// use ammonia::Builder; + /// + /// let a = Builder::new().strip_comments(false) + /// .clean("<!-- yes -->") + /// .to_string(); + /// assert_eq!( + /// a, + /// "<!-- yes -->"); + /// + /// # Defaults + /// + /// `true` + pub fn strip_comments(&mut self, value: bool) -> &mut Self { + self.strip_comments = value; + self + } + + /// Returns `true` if comment stripping is turned on. + /// + /// # Examples + /// + /// let mut a = ammonia::Builder::new(); + /// a.strip_comments(true); + /// assert!(a.will_strip_comments()); + /// a.strip_comments(false); + /// assert!(!a.will_strip_comments()); + pub fn will_strip_comments(&self) -> bool { + self.strip_comments + } + + /// Prefixes all "id" attribute values with a given string. Note that the tag and + /// attribute themselves must still be whitelisted. + /// + /// # Examples + /// + /// use ammonia::Builder; + /// use maplit::hashset; + /// + /// # fn main() { + /// let attributes = hashset!["id"]; + /// let a = Builder::new() + /// .generic_attributes(attributes) + /// .id_prefix(Some("safe-")) + /// .clean("<b id=42>") + /// .to_string(); + /// assert_eq!(a, "<b id=\"safe-42\"></b>"); + /// # } + + /// + /// # Defaults + /// + /// `None` + pub fn id_prefix(&mut self, value: Option<&'a str>) -> &mut Self { + self.id_prefix = value; + self + } + + /// Constructs a [`Builder`] instance configured with the [default options]. + /// + /// # Examples + /// + /// use ammonia::{Builder, Url, UrlRelative}; + /// # use std::error::Error; + /// + /// # fn do_main() -> Result<(), Box<Error>> { + /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>."; + /// let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>."; + /// + /// let result = Builder::new() // <-- + /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?)) + /// .clean(input) + /// .to_string(); + /// assert_eq!(result, output); + /// # Ok(()) + /// # } + /// # fn main() { do_main().unwrap() } + /// + /// [default options]: fn.clean.html + /// [`Builder`]: struct.Builder.html + pub fn new() -> Self { + Self::default() + } + + /// Constructs a [`Builder`] instance configured with no allowed tags. + /// + /// # Examples + /// + /// use ammonia::{Builder, Url, UrlRelative}; + /// # use std::error::Error; + /// + /// # fn do_main() -> Result<(), Box<Error>> { + /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>empty()</code> function</a>."; + /// let output = "This is an Ammonia example using the empty() function."; + /// + /// let result = Builder::empty() // <-- + /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?)) + /// .clean(input) + /// .to_string(); + /// assert_eq!(result, output); + /// # Ok(()) + /// # } + /// # fn main() { do_main().unwrap() } + /// + /// [default options]: fn.clean.html + /// [`Builder`]: struct.Builder.html + pub fn empty() -> Self { + Self { + tags: hashset![], + ..Self::default() + } + } + + /// Sanitizes an HTML fragment in a string according to the configured options. + /// + /// # Examples + /// + /// use ammonia::{Builder, Url, UrlRelative}; + /// # use std::error::Error; + /// + /// # fn do_main() -> Result<(), Box<Error>> { + /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>."; + /// let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>."; + /// + /// let result = Builder::new() + /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?)) + /// .clean(input) + /// .to_string(); // <-- + /// assert_eq!(result, output); + /// # Ok(()) + /// # } + /// # fn main() { do_main().unwrap() } + pub fn clean(&self, src: &str) -> Document { + let parser = Self::make_parser(); + let dom = parser.one(src); + self.clean_dom(dom) + } + + /// Sanitizes an HTML fragment from a reader according to the configured options. + /// + /// The input should be in UTF-8 encoding, otherwise the decoding is lossy, just + /// like when using [`String::from_utf8_lossy`]. + /// + /// To avoid consuming the reader, a mutable reference can be passed to this method. + /// + /// # Examples + /// + /// use ammonia::Builder; + /// # use std::error::Error; + /// + /// # fn do_main() -> Result<(), Box<Error>> { + /// let a = Builder::new() + /// .clean_from_reader(&b"<!-- no -->"[..])? // notice the `b` + /// .to_string(); + /// assert_eq!(a, ""); + /// # Ok(()) } + /// # fn main() { do_main().unwrap() } + /// + /// [`String::from_utf8_lossy`]: https://doc.rust-lang.org/std/string/struct.String.html#method.from_utf8_lossy + pub fn clean_from_reader<R>(&self, mut src: R) -> io::Result<Document> + where + R: io::Read, + { + let parser = Self::make_parser().from_utf8(); + let dom = parser.read_from(&mut src)?; + Ok(self.clean_dom(dom)) + } + + /// Clean a post-parsing DOM. + /// + /// This is not a public API because RcDom isn't really stable. + /// We want to be able to take breaking changes to html5ever itself + /// without having to break Ammonia's API. + fn clean_dom(&self, mut dom: RcDom) -> Document { + let mut stack = Vec::new(); + let mut removed = Vec::new(); + let link_rel = self + .link_rel + .map(|link_rel| format_tendril!("{}", link_rel)); + if link_rel.is_some() { + assert!(self.generic_attributes.get("rel").is_none()); + assert!(self + .tag_attributes + .get("a") + .and_then(|a| a.get("rel")) + .is_none()); + } + assert!(self.allowed_classes.is_empty() || !self.generic_attributes.contains("class")); + for (tag_name, _classes) in &self.allowed_classes { + assert!(self + .tag_attributes + .get(tag_name) + .and_then(|a| a.get("class")) + .is_none()); + } + for tag_name in &self.clean_content_tags { + assert!(!self.tags.contains(tag_name)); + assert!(!self.tag_attributes.contains_key(tag_name)); + } + let url_base = if let UrlRelative::RewriteWithBase(ref base) = self.url_relative { + Some(base) + } else { + None + }; + let body = { + let children = dom.document.children.borrow(); + children[0].clone() + }; + stack.extend( + replace(&mut *body.children.borrow_mut(), Vec::new()) + .into_iter() + .rev(), + ); + // This design approach is used to prevent pathological content from producing + // a stack overflow. The `stack` contains to-be-cleaned nodes, while `remove`, + // of course, contains nodes that need to be dropped (we can't just drop them, + // because they could have a very deep child tree). + while let Some(mut node) = stack.pop() { + let parent = node.parent + .replace(None).expect("a node in the DOM will have a parent, except the root, which is not processed") + .upgrade().expect("a node's parent will be pointed to by its parent (or the root pointer), and will not be dropped"); + if self.clean_node_content(&node) { + removed.push(node); + continue; + } + let pass_clean = self.clean_child(&mut node, url_base); + let pass = pass_clean && self.check_expected_namespace(&parent, &node); + if pass { + self.adjust_node_attributes(&mut node, &link_rel, url_base, self.id_prefix); + dom.append(&parent.clone(), NodeOrText::AppendNode(node.clone())); + } else { + for sub in node.children.borrow_mut().iter_mut() { + sub.parent.replace(Some(Rc::downgrade(&parent))); + } + } + stack.extend( + replace(&mut *node.children.borrow_mut(), Vec::new()) + .into_iter() + .rev(), + ); + if !pass { + removed.push(node); + } + } + // Now, imperatively clean up all of the child nodes. + // Otherwise, we could wind up with a DoS, either caused by a memory leak, + // or caused by a stack overflow. + while let Some(node) = removed.pop() { + removed.extend_from_slice(&replace(&mut *node.children.borrow_mut(), Vec::new())[..]); + } + Document(dom) + } + + /// Returns `true` if a node and all its content should be removed. + fn clean_node_content(&self, node: &Handle) -> bool { + match node.data { + NodeData::Text { .. } + | NodeData::Comment { .. } + | NodeData::Doctype { .. } + | NodeData::Document + | NodeData::ProcessingInstruction { .. } => false, + NodeData::Element { ref name, .. } => self.clean_content_tags.contains(&*name.local), + } + } + + /// Remove unwanted attributes, and check if the node should be kept or not. + /// + /// The root node doesn't need cleaning because we create the root node ourselves, + /// and it doesn't get serialized, and ... it just exists to give the parser + /// a context (in this case, a div-like block context). + fn clean_child(&self, child: &mut Handle, url_base: Option<&Url>) -> bool { + match child.data { + NodeData::Text { .. } => true, + NodeData::Comment { .. } => !self.strip_comments, + NodeData::Doctype { .. } + | NodeData::Document + | NodeData::ProcessingInstruction { .. } => false, + NodeData::Element { + ref name, + ref attrs, + .. + } => { + if self.tags.contains(&*name.local) { + let attr_filter = |attr: &html5ever::Attribute| { + let whitelisted = self.generic_attributes.contains(&*attr.name.local) + || self.generic_attribute_prefixes.as_ref().map(|prefixes| { + prefixes.iter().any(|&p| attr.name.local.starts_with(p)) + }) == Some(true) + || self + .tag_attributes + .get(&*name.local) + .map(|ta| ta.contains(&*attr.name.local)) + == Some(true) + || self + .tag_attribute_values + .get(&*name.local) + .and_then(|tav| tav.get(&*attr.name.local)) + .map(|vs| { + let attr_val = attr.value.to_lowercase(); + vs.iter().any(|v| v.to_lowercase() == attr_val) + }) + == Some(true); + if !whitelisted { + // If the class attribute is not whitelisted, + // but there is a whitelisted set of allowed_classes, + // do not strip out the class attribute. + // Banned classes will be filtered later. + &*attr.name.local == "class" + && self.allowed_classes.contains_key(&*name.local) + } else if is_url_attr(&*name.local, &*attr.name.local) { + let url = Url::parse(&*attr.value); + if let Ok(url) = url { + self.url_schemes.contains(url.scheme()) + } else if url == Err(url::ParseError::RelativeUrlWithoutBase) { + if matches!(self.url_relative, UrlRelative::Deny) { + false + } else if let Some(url_base) = url_base { + url_base.join(&*attr.value).is_ok() + } else { + true + } + } else { + false + } + } else { + true + } + }; + attrs.borrow_mut().retain(attr_filter); + true + } else { + false + } + } + } + } + + // Check for unexpected namespace changes. + // + // The issue happens if developers added to the list of allowed tags any + // tag which is parsed in RCDATA state, PLAINTEXT state or RAWTEXT state, + // that is: + // + // * title + // * textarea + // * xmp + // * iframe + // * noembed + // * noframes + // * plaintext + // * noscript + // * style + // * script + // + // An example in the wild is Plume, that allows iframe [1]. So in next + // examples I'll assume the following policy: + // + // Builder::new() + // .add_tags(&["iframe"]) + // + // In HTML namespace `<iframe>` is parsed specially; that is, its content is + // treated as text. For instance, the following html: + // + // <iframe><a>test + // + // Is parsed into the following DOM tree: + // + // iframe + // └─ #text: <a>test + // + // So iframe cannot have any children other than a text node. + // + // The same is not true, though, in "foreign content"; that is, within + // <svg> or <math> tags. The following html: + // + // <svg><iframe><a>test + // + // is parsed differently: + // + // svg + // └─ iframe + // └─ a + // └─ #text: test + // + // So in SVG namespace iframe can have children. + // + // Ammonia disallows <svg> but it keeps its content after deleting it. And + // the parser internally keeps track of the namespace of the element. So + // assume we have the following snippet: + // + // <svg><iframe><a title="</iframe><img src onerror=alert(1)>">test + // + // It is parsed into: + // + // svg + // └─ iframe + // └─ a title="</iframe><img src onerror=alert(1)>" + // └─ #text: test + // + // This DOM tree is harmless from ammonia point of view because the piece + // of code that looks like XSS is in a title attribute. Hence, the + // resulting "safe" HTML from ammonia would be: + // + // <iframe><a title="</iframe><img src onerror=alert(1)>" rel="noopener + // noreferrer">test</a></iframe> + // + // However, at this point, the information about namespace is lost, which + // means that the browser will parse this snippet into: + // + // ├─ iframe + // │ └─ #text: <a title=" + // ├─ img src="" onerror="alert(1)" + // └─ #text: " rel="noopener noreferrer">test + // + // Leading to XSS. + // + // To solve this issue, check for unexpected namespace switches after cleanup. + // Elements which change namespace at an unexpected point are removed. + // This function returns `true` if `child` should be kept, and `false` if it + // should be removed. + // + // [1]: https://github.com/Plume-org/Plume/blob/main/plume-models/src/safe_string.rs#L21 + fn check_expected_namespace(&self, parent: &Handle, child: &Handle) -> bool { + let (parent, child) = match (&parent.data, &child.data) { + (NodeData::Element { name: pn, .. }, NodeData::Element { name: cn, .. }) => (pn, cn), + _ => return true, + }; + // The only way to switch from html to svg is with the <svg> tag + if parent.ns == ns!(html) && child.ns == ns!(svg) { + child.local == local_name!("svg") + // The only way to switch from html to mathml is with the <math> tag + } else if parent.ns == ns!(html) && child.ns == ns!(mathml) { + child.local == local_name!("math") + // The only way to switch from mathml to svg/html is with a text integration point + } else if parent.ns == ns!(mathml) && child.ns != ns!(mathml) { + // https://html.spec.whatwg.org/#mathml + matches!( + &*parent.local, + "mi" | "mo" | "mn" | "ms" | "mtext" | "annotation-xml" + ) + // The only way to switch from svg to mathml/html is with an html integration point + } else if parent.ns == ns!(svg) && child.ns != ns!(svg) { + // https://html.spec.whatwg.org/#svg-0 + matches!(&*parent.local, "foreignObject") + } else if child.ns == ns!(svg) { + is_svg_tag(&*child.local) + } else if child.ns == ns!(mathml) { + is_mathml_tag(&*child.local) + } else if child.ns == ns!(html) { + (!is_svg_tag(&*child.local) && !is_mathml_tag(&*child.local)) + || matches!( + &*child.local, + "title" | "style" | "font" | "a" | "script" | "span" + ) + } else { + // There are no other supported ways to switch namespace + parent.ns == child.ns + } + } + + /// Add and transform special-cased attributes and elements. + /// + /// This function handles: + /// + /// * relative URL rewriting + /// * adding `<a rel>` attributes + /// * filtering out banned classes + fn adjust_node_attributes( + &self, + child: &mut Handle, + link_rel: &Option<StrTendril>, + url_base: Option<&Url>, + id_prefix: Option<&'a str>, + ) { + if let NodeData::Element { + ref name, + ref attrs, + .. + } = child.data + { + if let Some(set_attrs) = self.set_tag_attribute_values.get(&*name.local) { + let mut attrs = attrs.borrow_mut(); + for (&set_name, &set_value) in set_attrs { + // set the value of the attribute if the attribute is already present + if let Some(attr) = attrs.iter_mut().find(|attr| &*attr.name.local == set_name) + { + if &*attr.value != set_value { + attr.value = set_value.into(); + } + } else { + // otherwise, add the attribute + let attr = Attribute { + name: QualName::new(None, ns!(), set_name.into()), + value: set_value.into(), + }; + attrs.push(attr); + } + } + } + if let Some(ref link_rel) = *link_rel { + if &*name.local == "a" { + attrs.borrow_mut().push(Attribute { + name: QualName::new(None, ns!(), local_name!("rel")), + value: link_rel.clone(), + }) + } + } + if let Some(ref id_prefix) = id_prefix { + for attr in &mut *attrs.borrow_mut() { + if &attr.name.local == "id" { + if !attr.value.starts_with(id_prefix) { + attr.value = format_tendril!("{}{}", id_prefix, attr.value); + } + } + } + } + if let Some(ref attr_filter) = self.attribute_filter { + let mut drop_attrs = Vec::new(); + let mut attrs = attrs.borrow_mut(); + for (i, attr) in &mut attrs.iter_mut().enumerate() { + let replace_with = if let Some(new) = + attr_filter.filter(&*name.local, &*attr.name.local, &*attr.value) + { + if *new != *attr.value { + Some(format_tendril!("{}", new)) + } else { + None // no need to replace the attr if filter returned the same value + } + } else { + drop_attrs.push(i); + None + }; + if let Some(replace_with) = replace_with { + attr.value = replace_with; + } + } + for i in drop_attrs.into_iter().rev() { + attrs.swap_remove(i); + } + } + if let Some(ref base) = url_base { + for attr in &mut *attrs.borrow_mut() { + if is_url_attr(&*name.local, &*attr.name.local) { + let url = base + .join(&*attr.value) + .expect("invalid URLs should be stripped earlier"); + attr.value = format_tendril!("{}", url); + } + } + } else if let UrlRelative::Custom(ref evaluate) = self.url_relative { + let mut drop_attrs = Vec::new(); + let mut attrs = attrs.borrow_mut(); + for (i, attr) in attrs.iter_mut().enumerate() { + if is_url_attr(&*name.local, &*attr.name.local) && is_url_relative(&*attr.value) + { + let new_value = evaluate + .evaluate(&*attr.value) + .as_ref() + .map(Cow::as_ref) + .map(StrTendril::from_str) + .and_then(Result::ok); + if let Some(new_value) = new_value { + attr.value = new_value; + } else { + drop_attrs.push(i); + } + } + } + // Swap remove scrambles the vector after the current point. + // We will not do anything except with items before the current point. + // The `rev()` is, as such, necessary for correctness. + // We could use regular `remove(usize)` and a forward iterator, + // but that's slower. + for i in drop_attrs.into_iter().rev() { + attrs.swap_remove(i); + } + } + if let Some(allowed_values) = self.allowed_classes.get(&*name.local) { + for attr in &mut *attrs.borrow_mut() { + if &attr.name.local == "class" { + let mut classes = vec![]; + // https://html.spec.whatwg.org/#global-attributes:classes-2 + for class in attr.value.split_ascii_whitespace() { + if allowed_values.contains(class) { + classes.push(class.to_owned()); + } + } + attr.value = format_tendril!("{}", classes.join(" ")); + } + } + } + } + } + + /// Initializes an HTML fragment parser. + /// + /// Ammonia conforms to the HTML5 fragment parsing rules, + /// by parsing the given fragment as if it were included in a <div> tag. + fn make_parser() -> html::Parser<RcDom> { + html::parse_fragment( + RcDom::default(), + html::ParseOpts::default(), + QualName::new(None, ns!(html), local_name!("div")), + vec![], + ) + } +} + +/// Given an element name and attribute name, determine if the given attribute contains a URL. +fn is_url_attr(element: &str, attr: &str) -> bool { + attr == "href" + || attr == "src" + || (element == "form" && attr == "action") + || (element == "object" && attr == "data") + || ((element == "button" || element == "input") && attr == "formaction") + || (element == "a" && attr == "ping") + || (element == "video" && attr == "poster") +} + +/// Given an element name, check if it's SVG +fn is_svg_tag(element: &str) -> bool { + // https://svgwg.org/svg2-draft/eltindex.html + match element { + "a" + | "animate" + | "animateMotion" + | "animateTransform" + | "circle" + | "clipPath" + | "defs" + | "desc" + | "discard" + | "ellipse" + | "feBlend" + | "feColorMatrix" + | "feComponentTransfer" + | "feComposite" + | "feConvolveMatrix" + | "feDiffuseLighting" + | "feDisplacementMap" + | "feDistantLight" + | "feDropShadow" + | "feFlood" + | "feFuncA" + | "feFuncB" + | "feFuncG" + | "feFuncR" + | "feGaussianBlur" + | "feImage" + | "feMerge" + | "feMergeNode" + | "feMorphology" + | "feOffset" + | "fePointLight" + | "feSpecularLighting" + | "feSpotLight" + | "feTile" + | "feTurbulence" + | "filter" + | "foreignObject" + | "g" + | "image" + | "line" + | "linearGradient" + | "marker" + | "mask" + | "metadata" + | "mpath" + | "path" + | "pattern" + | "polygon" + | "polyline" + | "radialGradient" + | "rect" + | "script" + | "set" + | "stop" + | "style" + | "svg" + | "switch" + | "symbol" + | "text" + | "textPath" + | "title" + | "tspan" + | "use" + | "view" => true, + _ => false, + } +} + +/// Given an element name, check if it's Math +fn is_mathml_tag(element: &str) -> bool { + // https://svgwg.org/svg2-draft/eltindex.html + match element { + "abs" + | "and" + | "annotation" + | "annotation-xml" + | "apply" + | "approx" + | "arccos" + | "arccosh" + | "arccot" + | "arccoth" + | "arccsc" + | "arccsch" + | "arcsec" + | "arcsech" + | "arcsin" + | "arcsinh" + | "arctan" + | "arctanh" + | "arg" + | "bind" + | "bvar" + | "card" + | "cartesianproduct" + | "cbytes" + | "ceiling" + | "cerror" + | "ci" + | "cn" + | "codomain" + | "complexes" + | "compose" + | "condition" + | "conjugate" + | "cos" + | "cosh" + | "cot" + | "coth" + | "cs" + | "csc" + | "csch" + | "csymbol" + | "curl" + | "declare" + | "degree" + | "determinant" + | "diff" + | "divergence" + | "divide" + | "domain" + | "domainofapplication" + | "emptyset" + | "eq" + | "equivalent" + | "eulergamma" + | "exists" + | "exp" + | "exponentiale" + | "factorial" + | "factorof" + | "false" + | "floor" + | "fn" + | "forall" + | "gcd" + | "geq" + | "grad" + | "gt" + | "ident" + | "image" + | "imaginary" + | "imaginaryi" + | "implies" + | "in" + | "infinity" + | "int" + | "integers" + | "intersect" + | "interval" + | "inverse" + | "lambda" + | "laplacian" + | "lcm" + | "leq" + | "limit" + | "list" + | "ln" + | "log" + | "logbase" + | "lowlimit" + | "lt" + | "maction" + | "maligngroup" + | "malignmark" + | "math" + | "matrix" + | "matrixrow" + | "max" + | "mean" + | "median" + | "menclose" + | "merror" + | "mfenced" + | "mfrac" + | "mglyph" + | "mi" + | "min" + | "minus" + | "mlabeledtr" + | "mlongdiv" + | "mmultiscripts" + | "mn" + | "mo" + | "mode" + | "moment" + | "momentabout" + | "mover" + | "mpadded" + | "mphantom" + | "mprescripts" + | "mroot" + | "mrow" + | "ms" + | "mscarries" + | "mscarry" + | "msgroup" + | "msline" + | "mspace" + | "msqrt" + | "msrow" + | "mstack" + | "mstyle" + | "msub" + | "msubsup" + | "msup" + | "mtable" + | "mtd" + | "mtext" + | "mtr" + | "munder" + | "munderover" + | "naturalnumbers" + | "neq" + | "none" + | "not" + | "notanumber" + | "notin" + | "notprsubset" + | "notsubset" + | "or" + | "otherwise" + | "outerproduct" + | "partialdiff" + | "pi" + | "piece" + | "piecewise" + | "plus" + | "power" + | "primes" + | "product" + | "prsubset" + | "quotient" + | "rationals" + | "real" + | "reals" + | "reln" + | "rem" + | "root" + | "scalarproduct" + | "sdev" + | "sec" + | "sech" + | "selector" + | "semantics" + | "sep" + | "set" + | "setdiff" + | "share" + | "sin" + | "sinh" + | "span" + | "subset" + | "sum" + | "tan" + | "tanh" + | "tendsto" + | "times" + | "transpose" + | "true" + | "union" + | "uplimit" + | "variance" + | "vector" + | "vectorproduct" + | "xor" => true, + _ => false, + } +} + +fn is_url_relative(url: &str) -> bool { + matches!( + Url::parse(url), + Err(url::ParseError::RelativeUrlWithoutBase) + ) +} + +/// Policy for [relative URLs], that is, URLs that do not specify the scheme in full. +/// +/// This policy kicks in, if set, for any attribute named `src` or `href`, +/// as well as the `data` attribute of an `object` tag. +/// +/// [relative URLs]: struct.Builder.html#method.url_relative +/// +/// # Examples +/// +/// ## `Deny` +/// +/// * `<a href="test">` is a file-relative URL, and will be removed +/// * `<a href="/test">` is a domain-relative URL, and will be removed +/// * `<a href="//example.com/test">` is a scheme-relative URL, and will be removed +/// * `<a href="http://example.com/test">` is an absolute URL, and will be kept +/// +/// ## `PassThrough` +/// +/// No changes will be made to any URLs, except if a disallowed scheme is used. +/// +/// ## `RewriteWithBase` +/// +/// If the base is set to `http://notriddle.com/some-directory/some-file` +/// +/// * `<a href="test">` will be rewritten to `<a href="http://notriddle.com/some-directory/test">` +/// * `<a href="/test">` will be rewritten to `<a href="http://notriddle.com/test">` +/// * `<a href="//example.com/test">` will be rewritten to `<a href="http://example.com/test">` +/// * `<a href="http://example.com/test">` is an absolute URL, so it will be kept as-is +/// +/// ## `Custom` +/// +/// Pass the relative URL to a function. +/// If it returns `Some(string)`, then that one gets used. +/// Otherwise, it will remove the attribute (like `Deny` does). +/// +/// use std::borrow::Cow; +/// fn is_absolute_path(url: &str) -> bool { +/// let u = url.as_bytes(); +/// // `//a/b/c` is "protocol-relative", meaning "a" is a hostname +/// // `/a/b/c` is an absolute path, and what we want to do stuff to. +/// u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/') +/// } +/// fn evaluate(url: &str) -> Option<Cow<str>> { +/// if is_absolute_path(url) { +/// Some(Cow::Owned(String::from("/root") + url)) +/// } else { +/// Some(Cow::Borrowed(url)) +/// } +/// } +/// fn main() { +/// let a = ammonia::Builder::new() +/// .url_relative(ammonia::UrlRelative::Custom(Box::new(evaluate))) +/// .clean("<a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>") +/// .to_string(); +/// assert_eq!(a, "<a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>"); +/// } +/// +/// This function is only applied to relative URLs. +/// To filter all of the URLs, +/// use the not-yet-implemented Content Security Policy. +#[non_exhaustive] +pub enum UrlRelative { + /// Relative URLs will be completely stripped from the document. + Deny, + /// Relative URLs will be passed through unchanged. + PassThrough, + /// Relative URLs will be changed into absolute URLs, based on this base URL. + RewriteWithBase(Url), + /// Rewrite URLs with a custom function. + Custom(Box<dyn UrlRelativeEvaluate>), +} + +impl fmt::Debug for UrlRelative { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + UrlRelative::Deny => write!(f, "UrlRelative::Deny"), + UrlRelative::PassThrough => write!(f, "UrlRelative::PassThrough"), + UrlRelative::RewriteWithBase(ref base) => { + write!(f, "UrlRelative::RewriteWithBase({})", base) + } + UrlRelative::Custom(_) => write!(f, "UrlRelative::Custom"), + } + } +} + +/// Types that implement this trait can be used to convert a relative URL into an absolute URL. +/// +/// This evaluator is only called when the URL is relative; absolute URLs are not evaluated. +/// +/// See [`url_relative`][url_relative] for more details. +/// +/// [url_relative]: struct.Builder.html#method.url_relative +pub trait UrlRelativeEvaluate: Send + Sync { + /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string. + fn evaluate<'a>(&self, _: &'a str) -> Option<Cow<'a, str>>; +} +impl<T> UrlRelativeEvaluate for T +where + T: Fn(&str) -> Option<Cow<'_, str>> + Send + Sync, +{ + fn evaluate<'a>(&self, url: &'a str) -> Option<Cow<'a, str>> { + self(url) + } +} + +impl fmt::Debug for dyn AttributeFilter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("AttributeFilter") + } +} + +/// Types that implement this trait can be used to remove or rewrite arbitrary attributes. +/// +/// See [`attribute_filter`][attribute_filter] for more details. +/// +/// [attribute_filter]: struct.Builder.html#method.attribute_filter +pub trait AttributeFilter: Send + Sync { + /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string. + fn filter<'a>(&self, _: &str, _: &str, _: &'a str) -> Option<Cow<'a, str>>; +} + +impl<T> AttributeFilter for T +where + T: for<'a> Fn(&str, &str, &'a str) -> Option<Cow<'a, str>> + Send + Sync + 'static, +{ + fn filter<'a>(&self, element: &str, attribute: &str, value: &'a str) -> Option<Cow<'a, str>> { + self(element, attribute, value) + } +} + +/// A sanitized HTML document. +/// +/// The `Document` type is an opaque struct representing an HTML fragment that was sanitized by +/// `ammonia`. It can be converted to a [`String`] or written to a [`Write`] instance. This allows +/// users to avoid buffering the serialized representation to a [`String`] when desired. +/// +/// This type is opaque to insulate the caller from breaking changes in the `html5ever` interface. +/// +/// Note that this type wraps an `html5ever` DOM tree. `ammonia` does not support streaming, so +/// the complete fragment needs to be stored in memory during processing. +/// +/// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html +/// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html +/// +/// # Examples +/// +/// use ammonia::Builder; +/// +/// let input = "<!-- comments will be stripped -->This is an Ammonia example."; +/// let output = "This is an Ammonia example."; +/// +/// let document = Builder::new() +/// .clean(input); +/// assert_eq!(document.to_string(), output); +pub struct Document(RcDom); + +impl Document { + /// Serializes a `Document` instance to a `String`. + /// + /// This method returns a [`String`] with the sanitized HTML. This is the simplest way to use + /// `ammonia`. + /// + /// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html + /// + /// # Examples + /// + /// use ammonia::Builder; + /// + /// let input = "Some <style></style>HTML here"; + /// let output = "Some HTML here"; + /// + /// let document = Builder::new() + /// .clean(input); + /// assert_eq!(document.to_string(), output); + pub fn to_string(&self) -> String { + let opts = Self::serialize_opts(); + let mut ret_val = Vec::new(); + let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into(); + serialize(&mut ret_val, &inner, opts) + .expect("Writing to a string shouldn't fail (expect on OOM)"); + String::from_utf8(ret_val).expect("html5ever only supports UTF8") + } + + /// Serializes a `Document` instance to a writer. + /// + /// This method writes the sanitized HTML to a [`Write`] instance, avoiding a buffering step. + /// + /// To avoid consuming the writer, a mutable reference can be passed, like in the example below. + /// + /// Note that the in-memory representation of `Document` is larger than the serialized + /// `String`. + /// + /// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html + /// + /// # Examples + /// + /// use ammonia::Builder; + /// + /// let input = "Some <style></style>HTML here"; + /// let expected = b"Some HTML here"; + /// + /// let document = Builder::new() + /// .clean(input); + /// + /// let mut sanitized = Vec::new(); + /// document.write_to(&mut sanitized) + /// .expect("Writing to a string should not fail (except on OOM)"); + /// assert_eq!(sanitized, expected); + pub fn write_to<W>(&self, writer: W) -> io::Result<()> + where + W: io::Write, + { + let opts = Self::serialize_opts(); + let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into(); + serialize(writer, &inner, opts) + } + + /// Exposes the `Document` instance as an [`rcdom::Handle`]. + /// + /// This method returns the inner object backing the `Document` instance. This allows + /// making further changes to the DOM without introducing redundant serialization and + /// parsing. + /// + /// Note that this method should be considered unstable and sits outside of the semver + /// stability guarantees. It may change, break, or go away at any time, either because + /// of `html5ever` changes or `ammonia` implementation changes. + /// + /// For this method to be accessible, a `cfg` flag is required. The easiest way is to + /// use the `RUSTFLAGS` environment variable: + /// + /// ```text + /// RUSTFLAGS='--cfg ammonia_unstable' cargo build + /// ``` + /// + /// on Unix-like platforms, or + /// + /// ```text + /// set RUSTFLAGS=--cfg ammonia_unstable + /// cargo build + /// ``` + /// + /// on Windows. + /// + /// This requirement also applies to crates that transitively depend on crates that use + /// this flag. + /// + /// # Examples + /// + /// use ammonia::Builder; + /// use maplit::hashset; + /// use html5ever::serialize::{serialize, SerializeOpts}; + /// + /// # use std::error::Error; + /// # fn do_main() -> Result<(), Box<Error>> { + /// let input = "<a>one link</a> and <a>one more</a>"; + /// let expected = "<a>one more</a> and <a>one link</a>"; + /// + /// let document = Builder::new() + /// .link_rel(None) + /// .clean(input); + /// + /// let mut node = document.to_dom_node(); + /// node.children.borrow_mut().reverse(); + /// + /// let mut buf = Vec::new(); + /// serialize(&mut buf, &node, SerializeOpts::default())?; + /// let output = String::from_utf8(buf)?; + /// + /// assert_eq!(output, expected); + /// # Ok(()) + /// # } + /// # fn main() { do_main().unwrap() } + #[cfg(ammonia_unstable)] + pub fn to_dom_node(&self) -> Handle { + self.0.document.children.borrow()[0].clone() + } + + fn serialize_opts() -> SerializeOpts { + SerializeOpts::default() + } +} + +impl Clone for Document { + fn clone(&self) -> Self { + let parser = Builder::make_parser(); + let dom = parser.one(&self.to_string()[..]); + Document(dom) + } +} + +impl fmt::Display for Document { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.to_string()) + } +} + +impl fmt::Debug for Document { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Document({})", self.to_string()) + } +} + +impl From<Document> for String { + fn from(document: Document) -> Self { + document.to_string() + } +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn deeply_nested_whitelisted() { + clean(&"<b>".repeat(60_000)); + } + #[test] + fn deeply_nested_blacklisted() { + clean(&"<b-b>".repeat(60_000)); + } + #[test] + fn deeply_nested_alternating() { + clean(&"<b-b>".repeat(35_000)); + } + #[test] + fn included_angles() { + let fragment = "1 < 2"; + let result = clean(fragment); + assert_eq!(result, "1 < 2"); + } + #[test] + fn remove_script() { + let fragment = "an <script>evil()</script> example"; + let result = clean(fragment); + assert_eq!(result, "an example"); + } + #[test] + fn ignore_link() { + let fragment = "a <a href=\"http://www.google.com\">good</a> example"; + let expected = "a <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">\ + good</a> example"; + let result = clean(fragment); + assert_eq!(result, expected); + } + #[test] + fn remove_unsafe_link() { + let fragment = "an <a onclick=\"evil()\" href=\"http://www.google.com\">evil</a> example"; + let result = clean(fragment); + assert_eq!( + result, + "an <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">evil</a> example" + ); + } + #[test] + fn remove_js_link() { + let fragment = "an <a href=\"javascript:evil()\">evil</a> example"; + let result = clean(fragment); + assert_eq!(result, "an <a rel=\"noopener noreferrer\">evil</a> example"); + } + #[test] + fn tag_rebalance() { + let fragment = "<b>AWESOME!"; + let result = clean(fragment); + assert_eq!(result, "<b>AWESOME!</b>"); + } + #[test] + fn allow_url_relative() { + let fragment = "<a href=test>Test</a>"; + let result = Builder::new() + .url_relative(UrlRelative::PassThrough) + .clean(fragment) + .to_string(); + assert_eq!( + result, + "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>" + ); + } + #[test] + fn rewrite_url_relative() { + let fragment = "<a href=test>Test</a>"; + let result = Builder::new() + .url_relative(UrlRelative::RewriteWithBase( + Url::parse("http://example.com/").unwrap(), + )) + .clean(fragment) + .to_string(); + assert_eq!( + result, + "<a href=\"http://example.com/test\" rel=\"noopener noreferrer\">Test</a>" + ); + } + #[test] + fn rewrite_url_relative_with_invalid_url() { + // Reduced from https://github.com/Bauke/ammonia-crash-test + let fragment = r##"<a href="\\"https://example.com\\"">test</a>"##; + let result = Builder::new() + .url_relative(UrlRelative::RewriteWithBase( + Url::parse("http://example.com/").unwrap(), + )) + .clean(fragment) + .to_string(); + assert_eq!(result, r##"<a rel="noopener noreferrer">test</a>"##); + } + #[test] + fn attribute_filter_nop() { + let fragment = "<a href=test>Test</a>"; + let result = Builder::new() + .attribute_filter(|elem, attr, value| { + assert_eq!("a", elem); + assert!( + match (attr, value) { + ("href", "test") => true, + ("rel", "noopener noreferrer") => true, + _ => false, + }, + "{}", + value.to_string() + ); + Some(value.into()) + }) + .clean(fragment) + .to_string(); + assert_eq!( + result, + "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>" + ); + } + + #[test] + fn attribute_filter_drop() { + let fragment = "Test<img alt=test src=imgtest>"; + let result = Builder::new() + .attribute_filter(|elem, attr, value| { + assert_eq!("img", elem); + match (attr, value) { + ("src", "imgtest") => None, + ("alt", "test") => Some(value.into()), + _ => panic!("unexpected"), + } + }) + .clean(fragment) + .to_string(); + assert_eq!(result, r#"Test<img alt="test">"#); + } + + #[test] + fn url_filter_absolute() { + let fragment = "Test<img alt=test src=imgtest>"; + let result = Builder::new() + .attribute_filter(|elem, attr, value| { + assert_eq!("img", elem); + match (attr, value) { + ("src", "imgtest") => { + Some(format!("https://example.com/images/{}", value).into()) + } + ("alt", "test") => None, + _ => panic!("unexpected"), + } + }) + .url_relative(UrlRelative::RewriteWithBase( + Url::parse("http://wrong.invalid/").unwrap(), + )) + .clean(fragment) + .to_string(); + assert_eq!( + result, + r#"Test<img src="https://example.com/images/imgtest">"# + ); + } + + #[test] + fn url_filter_relative() { + let fragment = "Test<img alt=test src=imgtest>"; + let result = Builder::new() + .attribute_filter(|elem, attr, value| { + assert_eq!("img", elem); + match (attr, value) { + ("src", "imgtest") => Some("rewrite".into()), + ("alt", "test") => Some("altalt".into()), + _ => panic!("unexpected"), + } + }) + .url_relative(UrlRelative::RewriteWithBase( + Url::parse("https://example.com/base/#").unwrap(), + )) + .clean(fragment) + .to_string(); + assert_eq!( + result, + r#"Test<img alt="altalt" src="https://example.com/base/rewrite">"# + ); + } + + #[test] + fn rewrite_url_relative_no_rel() { + let fragment = "<a href=test>Test</a>"; + let result = Builder::new() + .url_relative(UrlRelative::RewriteWithBase( + Url::parse("http://example.com/").unwrap(), + )) + .link_rel(None) + .clean(fragment) + .to_string(); + assert_eq!(result, "<a href=\"http://example.com/test\">Test</a>"); + } + #[test] + fn deny_url_relative() { + let fragment = "<a href=test>Test</a>"; + let result = Builder::new() + .url_relative(UrlRelative::Deny) + .clean(fragment) + .to_string(); + assert_eq!(result, "<a rel=\"noopener noreferrer\">Test</a>"); + } + #[test] + fn replace_rel() { + let fragment = "<a href=test rel=\"garbage\">Test</a>"; + let result = Builder::new() + .url_relative(UrlRelative::PassThrough) + .clean(fragment) + .to_string(); + assert_eq!( + result, + "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>" + ); + } + #[test] + fn consider_rel_still_banned() { + let fragment = "<a href=test rel=\"garbage\">Test</a>"; + let result = Builder::new() + .url_relative(UrlRelative::PassThrough) + .link_rel(None) + .clean(fragment) + .to_string(); + assert_eq!(result, "<a href=\"test\">Test</a>"); + } + #[test] + fn object_data() { + let fragment = "<span data=\"javascript:evil()\">Test</span>\ + <object data=\"javascript:evil()\"></object>M"; + let expected = r#"<span data="javascript:evil()">Test</span><object></object>M"#; + let result = Builder::new() + .tags(hashset!["span", "object"]) + .generic_attributes(hashset!["data"]) + .clean(fragment) + .to_string(); + assert_eq!(result, expected); + } + #[test] + fn remove_attributes() { + let fragment = "<table border=\"1\"><tr></tr></table>"; + let result = Builder::new().clean(fragment); + assert_eq!( + result.to_string(), + "<table><tbody><tr></tr></tbody></table>" + ); + } + #[test] + fn quotes_in_attrs() { + let fragment = "<b title='\"'>contents</b>"; + let result = clean(fragment); + assert_eq!(result, "<b title=\""\">contents</b>"); + } + #[test] + #[should_panic] + fn panic_if_rel_is_allowed_and_replaced_generic() { + Builder::new() + .link_rel(Some("noopener noreferrer")) + .generic_attributes(hashset!["rel"]) + .clean("something"); + } + #[test] + #[should_panic] + fn panic_if_rel_is_allowed_and_replaced_a() { + Builder::new() + .link_rel(Some("noopener noreferrer")) + .tag_attributes(hashmap![ + "a" => hashset!["rel"], + ]) + .clean("something"); + } + #[test] + fn no_panic_if_rel_is_allowed_and_replaced_span() { + Builder::new() + .link_rel(Some("noopener noreferrer")) + .tag_attributes(hashmap![ + "span" => hashset!["rel"], + ]) + .clean("<span rel=\"what\">s</span>"); + } + #[test] + fn no_panic_if_rel_is_allowed_and_not_replaced_generic() { + Builder::new() + .link_rel(None) + .generic_attributes(hashset!["rel"]) + .clean("<a rel=\"what\">s</a>"); + } + #[test] + fn no_panic_if_rel_is_allowed_and_not_replaced_a() { + Builder::new() + .link_rel(None) + .tag_attributes(hashmap![ + "a" => hashset!["rel"], + ]) + .clean("<a rel=\"what\">s</a>"); + } + #[test] + fn dont_close_void_elements() { + let fragment = "<br>"; + let result = clean(fragment); + assert_eq!(result.to_string(), "<br>"); + } + #[should_panic] + #[test] + fn panic_on_allowed_classes_tag_attributes() { + let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>"; + Builder::new() + .link_rel(None) + .tag_attributes(hashmap![ + "p" => hashset!["class"], + "a" => hashset!["class"], + ]) + .allowed_classes(hashmap![ + "p" => hashset!["foo", "bar"], + "a" => hashset!["baz"], + ]) + .clean(fragment); + } + #[should_panic] + #[test] + fn panic_on_allowed_classes_generic_attributes() { + let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>"; + Builder::new() + .link_rel(None) + .generic_attributes(hashset!["class", "href", "some-foo"]) + .allowed_classes(hashmap![ + "p" => hashset!["foo", "bar"], + "a" => hashset!["baz"], + ]) + .clean(fragment); + } + #[test] + fn remove_non_allowed_classes() { + let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>"; + let result = Builder::new() + .link_rel(None) + .allowed_classes(hashmap![ + "p" => hashset!["foo", "bar"], + "a" => hashset!["baz"], + ]) + .clean(fragment); + assert_eq!( + result.to_string(), + "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>" + ); + } + #[test] + fn remove_non_allowed_classes_with_tag_class() { + let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>"; + let result = Builder::new() + .link_rel(None) + .tag_attributes(hashmap![ + "div" => hashset!["class"], + ]) + .allowed_classes(hashmap![ + "p" => hashset!["foo", "bar"], + "a" => hashset!["baz"], + ]) + .clean(fragment); + assert_eq!( + result.to_string(), + "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>" + ); + } + #[test] + fn allowed_classes_ascii_whitespace() { + // According to https://infra.spec.whatwg.org/#ascii-whitespace, + // TAB (\t), LF (\n), FF (\x0C), CR (\x0D) and SPACE (\x20) are + // considered to be ASCII whitespace. Unicode whitespace characters + // and VT (\x0B) aren't ASCII whitespace. + let fragment = "<p class=\"a\tb\nc\x0Cd\re f\x0B g\u{2000}\">"; + let result = Builder::new() + .allowed_classes(hashmap![ + "p" => hashset!["a", "b", "c", "d", "e", "f", "g"], + ]) + .clean(fragment); + assert_eq!(result.to_string(), r#"<p class="a b c d e"></p>"#); + } + #[test] + fn remove_non_allowed_attributes_with_tag_attribute_values() { + let fragment = "<p data-label=\"baz\" name=\"foo\"></p>"; + let result = Builder::new() + .tag_attribute_values(hashmap![ + "p" => hashmap![ + "data-label" => hashset!["bar"], + ], + ]) + .tag_attributes(hashmap![ + "p" => hashset!["name"], + ]) + .clean(fragment); + assert_eq!(result.to_string(), "<p name=\"foo\"></p>",); + } + #[test] + fn keep_allowed_attributes_with_tag_attribute_values() { + let fragment = "<p data-label=\"bar\" name=\"foo\"></p>"; + let result = Builder::new() + .tag_attribute_values(hashmap![ + "p" => hashmap![ + "data-label" => hashset!["bar"], + ], + ]) + .tag_attributes(hashmap![ + "p" => hashset!["name"], + ]) + .clean(fragment); + assert_eq!( + result.to_string(), + "<p data-label=\"bar\" name=\"foo\"></p>", + ); + } + #[test] + fn tag_attribute_values_case_insensitive() { + let fragment = "<input type=\"CHECKBOX\" name=\"foo\">"; + let result = Builder::new() + .tags(hashset!["input"]) + .tag_attribute_values(hashmap![ + "input" => hashmap![ + "type" => hashset!["checkbox"], + ], + ]) + .tag_attributes(hashmap![ + "input" => hashset!["name"], + ]) + .clean(fragment); + assert_eq!(result.to_string(), "<input type=\"CHECKBOX\" name=\"foo\">",); + } + #[test] + fn set_tag_attribute_values() { + let fragment = "<a href=\"https://example.com/\">Link</a>"; + let result = Builder::new() + .link_rel(None) + .add_tag_attributes("a", &["target"]) + .set_tag_attribute_value("a", "target", "_blank") + .clean(fragment); + assert_eq!( + result.to_string(), + "<a href=\"https://example.com/\" target=\"_blank\">Link</a>", + ); + } + #[test] + fn update_existing_set_tag_attribute_values() { + let fragment = "<a target=\"bad\" href=\"https://example.com/\">Link</a>"; + let result = Builder::new() + .link_rel(None) + .add_tag_attributes("a", &["target"]) + .set_tag_attribute_value("a", "target", "_blank") + .clean(fragment); + assert_eq!( + result.to_string(), + "<a target=\"_blank\" href=\"https://example.com/\">Link</a>", + ); + } + #[test] + fn unwhitelisted_set_tag_attribute_values() { + let fragment = "<span>hi</span><my-elem>"; + let result = Builder::new() + .set_tag_attribute_value("my-elem", "my-attr", "val") + .clean(fragment); + assert_eq!(result.to_string(), "<span>hi</span>",); + } + #[test] + fn remove_entity_link() { + let fragment = "<a href=\"javascript:a\ + lert('XSS')\">Click me!</a>"; + let result = clean(fragment); + assert_eq!( + result.to_string(), + "<a rel=\"noopener noreferrer\">Click me!</a>" + ); + } + #[test] + fn remove_relative_url_evaluate() { + fn is_absolute_path(url: &str) -> bool { + let u = url.as_bytes(); + // `//a/b/c` is "protocol-relative", meaning "a" is a hostname + // `/a/b/c` is an absolute path, and what we want to do stuff to. + u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/') + } + fn is_banned(url: &str) -> bool { + let u = url.as_bytes(); + u.get(0) == Some(&b'b') && u.get(1) == Some(&b'a') + } + fn evaluate(url: &str) -> Option<Cow<'_, str>> { + if is_absolute_path(url) { + Some(Cow::Owned(String::from("/root") + url)) + } else if is_banned(url) { + None + } else { + Some(Cow::Borrowed(url)) + } + } + let a = Builder::new() + .url_relative(UrlRelative::Custom(Box::new(evaluate))) + .clean("<a href=banned>banned</a><a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>") + .to_string(); + assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>"); + } + #[test] + fn remove_relative_url_evaluate_b() { + fn is_absolute_path(url: &str) -> bool { + let u = url.as_bytes(); + // `//a/b/c` is "protocol-relative", meaning "a" is a hostname + // `/a/b/c` is an absolute path, and what we want to do stuff to. + u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/') + } + fn is_banned(url: &str) -> bool { + let u = url.as_bytes(); + u.get(0) == Some(&b'b') && u.get(1) == Some(&b'a') + } + fn evaluate(url: &str) -> Option<Cow<'_, str>> { + if is_absolute_path(url) { + Some(Cow::Owned(String::from("/root") + url)) + } else if is_banned(url) { + None + } else { + Some(Cow::Borrowed(url)) + } + } + let a = Builder::new() + .url_relative(UrlRelative::Custom(Box::new(evaluate))) + .clean("<a href=banned>banned</a><a href=banned title=test>banned</a><a title=test href=banned>banned</a>") + .to_string(); + assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a rel=\"noopener noreferrer\" title=\"test\">banned</a><a title=\"test\" rel=\"noopener noreferrer\">banned</a>"); + } + #[test] + fn remove_relative_url_evaluate_c() { + // Don't run on absolute URLs. + fn evaluate(_: &str) -> Option<Cow<'_, str>> { + return Some(Cow::Owned(String::from("invalid"))); + } + let a = Builder::new() + .url_relative(UrlRelative::Custom(Box::new(evaluate))) + .clean("<a href=\"https://www.google.com/\">google</a>") + .to_string(); + assert_eq!( + a, + "<a href=\"https://www.google.com/\" rel=\"noopener noreferrer\">google</a>" + ); + } + #[test] + fn clean_children_of_bad_element() { + let fragment = "<bad><evil>a</evil>b</bad>"; + let result = Builder::new().clean(fragment); + assert_eq!(result.to_string(), "ab"); + } + #[test] + fn reader_input() { + let fragment = b"an <script>evil()</script> example"; + let result = Builder::new().clean_from_reader(&fragment[..]); + assert!(result.is_ok()); + assert_eq!(result.unwrap().to_string(), "an example"); + } + #[test] + fn reader_non_utf8() { + let fragment = b"non-utf8 \xF0\x90\x80string"; + let result = Builder::new().clean_from_reader(&fragment[..]); + assert!(result.is_ok()); + assert_eq!(result.unwrap().to_string(), "non-utf8 \u{fffd}string"); + } + #[test] + fn display_impl() { + let fragment = r#"a <a>link</a>"#; + let result = Builder::new().link_rel(None).clean(fragment); + assert_eq!(format!("{}", result), "a <a>link</a>"); + } + #[test] + fn debug_impl() { + let fragment = r#"a <a>link</a>"#; + let result = Builder::new().link_rel(None).clean(fragment); + assert_eq!(format!("{:?}", result), "Document(a <a>link</a>)"); + } + #[cfg(ammonia_unstable)] + #[test] + fn to_dom_node() { + let fragment = r#"a <a>link</a>"#; + let result = Builder::new().link_rel(None).clean(fragment); + let _node = result.to_dom_node(); + } + #[test] + fn string_from_document() { + let fragment = r#"a <a>link"#; + let result = String::from(Builder::new().link_rel(None).clean(fragment)); + assert_eq!(format!("{}", result), "a <a>link</a>"); + } + fn require_sync<T: Sync>(_: T) {} + fn require_send<T: Send>(_: T) {} + #[test] + fn require_sync_and_send() { + require_sync(Builder::new()); + require_send(Builder::new()); + } + #[test] + fn id_prefixed() { + let fragment = "<a id=\"hello\"></a><b id=\"hello\"></a>"; + let result = String::from( + Builder::new() + .tag_attributes(hashmap![ + "a" => hashset!["id"], + ]) + .id_prefix(Some("prefix-")) + .clean(fragment), + ); + assert_eq!( + result.to_string(), + "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a><b></b>" + ); + } + #[test] + fn id_already_prefixed() { + let fragment = "<a id=\"prefix-hello\"></a>"; + let result = String::from( + Builder::new() + .tag_attributes(hashmap![ + "a" => hashset!["id"], + ]) + .id_prefix(Some("prefix-")) + .clean(fragment), + ); + assert_eq!( + result.to_string(), + "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a>" + ); + } + #[test] + fn clean_content_tags() { + let fragment = "<script type=\"text/javascript\"><a>Hello!</a></script>"; + let result = String::from( + Builder::new() + .clean_content_tags(hashset!["script"]) + .clean(fragment), + ); + assert_eq!(result.to_string(), ""); + } + #[test] + fn only_clean_content_tags() { + let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>"; + let result = String::from( + Builder::new() + .clean_content_tags(hashset!["script"]) + .clean(fragment), + ); + assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>"); + } + #[test] + fn clean_removed_default_tag() { + let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>"; + let result = String::from( + Builder::new() + .rm_tags(hashset!["a"]) + .rm_tag_attributes("a", hashset!["href", "hreflang"]) + .clean_content_tags(hashset!["script"]) + .clean(fragment), + ); + assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>"); + } + #[test] + #[should_panic] + fn panic_on_clean_content_tag_attribute() { + Builder::new() + .rm_tags(std::iter::once("a")) + .clean_content_tags(hashset!["a"]) + .clean(""); + } + #[test] + #[should_panic] + fn panic_on_clean_content_tag() { + Builder::new().clean_content_tags(hashset!["a"]).clean(""); + } + + #[test] + fn clean_text_test() { + assert_eq!( + clean_text("<this> is <a test function"), + "<this> is <a test function" + ); + } + + #[test] + fn clean_text_spaces_test() { + assert_eq!(clean_text("\x09\x0a\x0c\x20"), "	  "); + } + + #[test] + fn ns_svg() { + // https://github.com/cure53/DOMPurify/pull/495 + let fragment = r##"<svg><iframe><a title="</iframe><img src onerror=alert(1)>">test"##; + let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment)); + assert_eq!(result.to_string(), "test"); + + let fragment = "<svg><iframe>remove me</iframe></svg><iframe>keep me</iframe>"; + let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment)); + assert_eq!(result.to_string(), "remove me<iframe>keep me</iframe>"); + + let fragment = "<svg><a>remove me</a></svg><iframe>keep me</iframe>"; + let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment)); + assert_eq!(result.to_string(), "remove me<iframe>keep me</iframe>"); + + let fragment = "<svg><a>keep me</a></svg><iframe>keep me</iframe>"; + let result = String::from(Builder::new().add_tags(&["iframe", "svg"]).clean(fragment)); + assert_eq!( + result.to_string(), + "<svg><a rel=\"noopener noreferrer\">keep me</a></svg><iframe>keep me</iframe>" + ); + } + + #[test] + fn ns_mathml() { + // https://github.com/cure53/DOMPurify/pull/495 + let fragment = "<mglyph></mglyph>"; + let result = String::from( + Builder::new() + .add_tags(&["math", "mtext", "mglyph"]) + .clean(fragment), + ); + assert_eq!(result.to_string(), ""); + let fragment = "<math><mtext><div><mglyph>"; + let result = String::from( + Builder::new() + .add_tags(&["math", "mtext", "mglyph"]) + .clean(fragment), + ); + assert_eq!( + result.to_string(), + "<math><mtext><div></div></mtext></math>" + ); + let fragment = "<math><mtext><mglyph>"; + let result = String::from( + Builder::new() + .add_tags(&["math", "mtext", "mglyph"]) + .clean(fragment), + ); + assert_eq!( + result.to_string(), + "<math><mtext><mglyph></mglyph></mtext></math>" + ); + } + + #[test] + fn generic_attribute_prefixes() { + let prefix_data = ["data-"]; + let prefix_code = ["code-"]; + let mut b = Builder::new(); + let mut hs: HashSet<&'_ str> = HashSet::new(); + hs.insert("data-"); + assert_eq!(b.generic_attribute_prefixes.is_none(), true); + b.generic_attribute_prefixes(hs); + assert_eq!(b.generic_attribute_prefixes.is_some(), true); + assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1); + b.add_generic_attribute_prefixes(&prefix_data); + assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1); + b.add_generic_attribute_prefixes(&prefix_code); + assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 2); + b.rm_generic_attribute_prefixes(&prefix_code); + assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1); + b.rm_generic_attribute_prefixes(&prefix_code); + assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1); + b.rm_generic_attribute_prefixes(&prefix_data); + assert_eq!(b.generic_attribute_prefixes.is_none(), true); + } + + #[test] + fn generic_attribute_prefixes_clean() { + let fragment = r#"<a data-1 data-2 code-1 code-2><a>Hello!</a></a>"#; + let result_cleaned = String::from( + Builder::new() + .add_tag_attributes("a", &["data-1"]) + .clean(fragment), + ); + assert_eq!( + result_cleaned, + r#"<a data-1="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"# + ); + let result_allowed = String::from( + Builder::new() + .add_tag_attributes("a", &["data-1"]) + .add_generic_attribute_prefixes(&["data-"]) + .clean(fragment), + ); + assert_eq!( + result_allowed, + r#"<a data-1="" data-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"# + ); + let result_allowed = String::from( + Builder::new() + .add_tag_attributes("a", &["data-1", "code-1"]) + .add_generic_attribute_prefixes(&["data-", "code-"]) + .clean(fragment), + ); + assert_eq!( + result_allowed, + r#"<a data-1="" data-2="" code-1="" code-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"# + ); + } + #[test] + fn lesser_than_isnt_html() { + let fragment = "1 < 2"; + assert!(!is_html(fragment)); + } + #[test] + fn dense_lesser_than_isnt_html() { + let fragment = "1<2"; + assert!(!is_html(fragment)); + } + #[test] + fn what_about_number_elements() { + let fragment = "foo<2>bar"; + assert!(!is_html(fragment)); + } + #[test] + fn turbofish_is_html_sadly() { + let fragment = "Vec::<u8>::new()"; + assert!(is_html(fragment)); + } + #[test] + fn stop_grinning() { + let fragment = "did you really believe me? <g>"; + assert!(is_html(fragment)); + } + #[test] + fn dont_be_bold() { + let fragment = "<b>"; + assert!(is_html(fragment)); + } +} diff --git a/vendor/ammonia/src/rcdom.rs b/vendor/ammonia/src/rcdom.rs new file mode 100644 index 000000000..2d3917e55 --- /dev/null +++ b/vendor/ammonia/src/rcdom.rs @@ -0,0 +1,512 @@ +// Copyright 2014-2017 The html5ever Project Developers. +// Copyright Michael Howell and others. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![allow(missing_docs)] + +//! A simple reference-counted DOM. +//! +//! This is sufficient as a static parse tree, but don't build a +//! web browser using it. :) +//! +//! A DOM is a [tree structure] with ordered children that can be represented in an XML-like +//! format. For example, the following graph +//! +//! ```text +//! div +//! +- "text node" +//! +- span +//! ``` +//! in HTML would be serialized as +//! +//! ```html +//! <div>text node<span></span></div> +//! ``` +//! +//! See the [document object model article on wikipedia][dom wiki] for more information. +//! +//! This implementation stores the information associated with each node once, and then hands out +//! refs to children. The nodes themselves are reference-counted to avoid copying - you can create +//! a new ref and then a node will outlive the document. Nodes own their children, but only have +//! weak references to their parents. +//! +//! [tree structure]: https://en.wikipedia.org/wiki/Tree_(data_structure) +//! [dom wiki]: https://en.wikipedia.org/wiki/Document_Object_Model + +use std::borrow::Cow; +use std::cell::{Cell, RefCell}; +use std::collections::{HashSet, VecDeque}; +use std::default::Default; +use std::fmt; +use std::io; +use std::mem; +use std::rc::{Rc, Weak}; + +use tendril::StrTendril; + +use html5ever::interface::tree_builder; +use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; +use html5ever::serialize::TraversalScope; +use html5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode}; +use html5ever::serialize::{Serialize, Serializer}; +use html5ever::Attribute; +use html5ever::ExpandedName; +use html5ever::QualName; + +/// The different kinds of nodes in the DOM. +#[derive(Debug)] +pub enum NodeData { + /// The `Document` itself - the root node of a HTML document. + Document, + + /// A `DOCTYPE` with name, public id, and system id. See + /// [document type declaration on wikipedia][dtd wiki]. + /// + /// [dtd wiki]: https://en.wikipedia.org/wiki/Document_type_declaration + Doctype { + name: StrTendril, + public_id: StrTendril, + system_id: StrTendril, + }, + + /// A text node. + Text { contents: RefCell<StrTendril> }, + + /// A comment. + Comment { contents: StrTendril }, + + /// An element with attributes. + Element { + name: QualName, + attrs: RefCell<Vec<Attribute>>, + + /// For HTML \<template\> elements, the [template contents]. + /// + /// [template contents]: https://html.spec.whatwg.org/multipage/#template-contents + template_contents: RefCell<Option<Handle>>, + + /// Whether the node is a [HTML integration point]. + /// + /// [HTML integration point]: https://html.spec.whatwg.org/multipage/#html-integration-point + mathml_annotation_xml_integration_point: bool, + }, + + /// A Processing instruction. + ProcessingInstruction { + target: StrTendril, + contents: StrTendril, + }, +} + +/// A DOM node. +pub struct Node { + /// Parent node. + pub parent: Cell<Option<WeakHandle>>, + /// Child nodes of this node. + pub children: RefCell<Vec<Handle>>, + /// Represents this node's data. + pub data: NodeData, +} + +impl Node { + /// Create a new node from its contents + pub fn new(data: NodeData) -> Rc<Self> { + Rc::new(Node { + data, + parent: Cell::new(None), + children: RefCell::new(Vec::new()), + }) + } +} + +impl Drop for Node { + fn drop(&mut self) { + let mut nodes = mem::replace(&mut *self.children.borrow_mut(), vec![]); + while let Some(node) = nodes.pop() { + let children = mem::replace(&mut *node.children.borrow_mut(), vec![]); + nodes.extend(children.into_iter()); + if let NodeData::Element { ref template_contents, .. } = node.data { + if let Some(template_contents) = template_contents.borrow_mut().take() { + nodes.push(template_contents); + } + } + } + } +} + +impl fmt::Debug for Node { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Node") + .field("data", &self.data) + .field("children", &self.children) + .finish() + } +} + +/// Reference to a DOM node. +pub type Handle = Rc<Node>; + +/// Weak reference to a DOM node, used for parent pointers. +pub type WeakHandle = Weak<Node>; + +/// Append a parentless node to another nodes' children +fn append(new_parent: &Handle, child: Handle) { + let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent))); + // Invariant: child cannot have existing parent + assert!(previous_parent.is_none()); + new_parent.children.borrow_mut().push(child); +} + +/// If the node has a parent, get it and this node's position in its children +fn get_parent_and_index(target: &Handle) -> Option<(Handle, usize)> { + if let Some(weak) = target.parent.take() { + let parent = weak.upgrade().expect("dangling weak pointer"); + target.parent.set(Some(weak)); + let i = match parent + .children + .borrow() + .iter() + .enumerate() + .find(|&(_, child)| Rc::ptr_eq(&child, &target)) + { + Some((i, _)) => i, + None => panic!("have parent but couldn't find in parent's children!"), + }; + Some((parent, i)) + } else { + None + } +} + +fn append_to_existing_text(prev: &Handle, text: &str) -> bool { + match prev.data { + NodeData::Text { ref contents } => { + contents.borrow_mut().push_slice(text); + true + }, + _ => false, + } +} + +fn remove_from_parent(target: &Handle) { + if let Some((parent, i)) = get_parent_and_index(target) { + parent.children.borrow_mut().remove(i); + target.parent.set(None); + } +} + +/// The DOM itself; the result of parsing. +pub struct RcDom { + /// The `Document` itself. + pub document: Handle, + + /// Errors that occurred during parsing. + pub errors: Vec<Cow<'static, str>>, + + /// The document's quirks mode. + pub quirks_mode: QuirksMode, +} + +impl TreeSink for RcDom { + type Output = Self; + fn finish(self) -> Self { + self + } + + type Handle = Handle; + + fn parse_error(&mut self, msg: Cow<'static, str>) { + self.errors.push(msg); + } + + fn get_document(&mut self) -> Handle { + self.document.clone() + } + + fn get_template_contents(&mut self, target: &Handle) -> Handle { + if let NodeData::Element { + ref template_contents, + .. + } = target.data + { + template_contents.borrow().as_ref().expect("not a template element!").clone() + } else { + panic!("not a template element!") + } + } + + fn set_quirks_mode(&mut self, mode: QuirksMode) { + self.quirks_mode = mode; + } + + fn same_node(&self, x: &Handle, y: &Handle) -> bool { + Rc::ptr_eq(x, y) + } + + fn elem_name<'a>(&self, target: &'a Handle) -> ExpandedName<'a> { + return match target.data { + NodeData::Element { ref name, .. } => name.expanded(), + _ => panic!("not an element!"), + }; + } + + fn create_element( + &mut self, + name: QualName, + attrs: Vec<Attribute>, + flags: ElementFlags, + ) -> Handle { + Node::new(NodeData::Element { + name, + attrs: RefCell::new(attrs), + template_contents: RefCell::new(if flags.template { + Some(Node::new(NodeData::Document)) + } else { + None + }), + mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point, + }) + } + + fn create_comment(&mut self, text: StrTendril) -> Handle { + Node::new(NodeData::Comment { contents: text }) + } + + fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Handle { + Node::new(NodeData::ProcessingInstruction { + target, + contents: data, + }) + } + + fn append(&mut self, parent: &Handle, child: NodeOrText<Handle>) { + // Append to an existing Text node if we have one. + match child { + NodeOrText::AppendText(ref text) => match parent.children.borrow().last() { + Some(h) => { + if append_to_existing_text(h, &text) { + return; + } + }, + _ => (), + }, + _ => (), + } + + append( + &parent, + match child { + NodeOrText::AppendText(text) => Node::new(NodeData::Text { + contents: RefCell::new(text), + }), + NodeOrText::AppendNode(node) => node, + }, + ); + } + + fn append_before_sibling(&mut self, sibling: &Handle, child: NodeOrText<Handle>) { + let (parent, i) = get_parent_and_index(&sibling) + .expect("append_before_sibling called on node without parent"); + + let child = match (child, i) { + // No previous node. + (NodeOrText::AppendText(text), 0) => Node::new(NodeData::Text { + contents: RefCell::new(text), + }), + + // Look for a text node before the insertion point. + (NodeOrText::AppendText(text), i) => { + let children = parent.children.borrow(); + let prev = &children[i - 1]; + if append_to_existing_text(prev, &text) { + return; + } + Node::new(NodeData::Text { + contents: RefCell::new(text), + }) + }, + + // The tree builder promises we won't have a text node after + // the insertion point. + + // Any other kind of node. + (NodeOrText::AppendNode(node), _) => node, + }; + + remove_from_parent(&child); + + child.parent.set(Some(Rc::downgrade(&parent))); + parent.children.borrow_mut().insert(i, child); + } + + fn append_based_on_parent_node( + &mut self, + element: &Self::Handle, + prev_element: &Self::Handle, + child: NodeOrText<Self::Handle>, + ) { + let parent = element.parent.take(); + let has_parent = parent.is_some(); + element.parent.set(parent); + + if has_parent { + self.append_before_sibling(element, child); + } else { + self.append(prev_element, child); + } + } + + fn append_doctype_to_document( + &mut self, + name: StrTendril, + public_id: StrTendril, + system_id: StrTendril, + ) { + append( + &self.document, + Node::new(NodeData::Doctype { + name, + public_id, + system_id, + }), + ); + } + + fn add_attrs_if_missing(&mut self, target: &Handle, attrs: Vec<Attribute>) { + let mut existing = if let NodeData::Element { ref attrs, .. } = target.data { + attrs.borrow_mut() + } else { + panic!("not an element") + }; + + let existing_names = existing + .iter() + .map(|e| e.name.clone()) + .collect::<HashSet<_>>(); + existing.extend( + attrs + .into_iter() + .filter(|attr| !existing_names.contains(&attr.name)), + ); + } + + fn remove_from_parent(&mut self, target: &Handle) { + remove_from_parent(&target); + } + + fn reparent_children(&mut self, node: &Handle, new_parent: &Handle) { + let mut children = node.children.borrow_mut(); + let mut new_children = new_parent.children.borrow_mut(); + for child in children.iter() { + let previous_parent = child.parent.replace(Some(Rc::downgrade(&new_parent))); + assert!(Rc::ptr_eq( + &node, + &previous_parent.unwrap().upgrade().expect("dangling weak") + )) + } + new_children.extend(mem::replace(&mut *children, Vec::new())); + } + + fn is_mathml_annotation_xml_integration_point(&self, target: &Handle) -> bool { + if let NodeData::Element { + mathml_annotation_xml_integration_point, + .. + } = target.data + { + mathml_annotation_xml_integration_point + } else { + panic!("not an element!") + } + } +} + +impl Default for RcDom { + fn default() -> RcDom { + RcDom { + document: Node::new(NodeData::Document), + errors: vec![], + quirks_mode: tree_builder::NoQuirks, + } + } +} + +enum SerializeOp { + Open(Handle), + Close(QualName), +} + +pub struct SerializableHandle(Handle); + +impl From<Handle> for SerializableHandle { + fn from(h: Handle) -> SerializableHandle { + SerializableHandle(h) + } +} + +impl Serialize for SerializableHandle { + fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()> + where + S: Serializer, + { + let mut ops = VecDeque::new(); + match traversal_scope { + IncludeNode => ops.push_back(SerializeOp::Open(self.0.clone())), + ChildrenOnly(_) => ops.extend(self + .0 + .children + .borrow() + .iter() + .map(|h| SerializeOp::Open(h.clone()))) + } + + while let Some(op) = ops.pop_front() { + match op { + SerializeOp::Open(handle) => match handle.data { + NodeData::Element { + ref name, + ref attrs, + .. + } => { + serializer.start_elem( + name.clone(), + attrs.borrow().iter().map(|at| (&at.name, &at.value[..])), + )?; + + ops.reserve(1 + handle.children.borrow().len()); + ops.push_front(SerializeOp::Close(name.clone())); + + for child in handle.children.borrow().iter().rev() { + ops.push_front(SerializeOp::Open(child.clone())); + } + }, + + NodeData::Doctype { ref name, .. } => serializer.write_doctype(&name)?, + + NodeData::Text { ref contents } => { + serializer.write_text(&contents.borrow())? + }, + + NodeData::Comment { ref contents } => serializer.write_comment(&contents)?, + + NodeData::ProcessingInstruction { + ref target, + ref contents, + } => serializer.write_processing_instruction(target, contents)?, + + NodeData::Document => panic!("Can't serialize Document node itself"), + }, + + SerializeOp::Close(name) => { + serializer.end_elem(name)?; + }, + } + } + + Ok(()) + } +} diff --git a/vendor/ammonia/tests/version-numbers.rs b/vendor/ammonia/tests/version-numbers.rs new file mode 100644 index 000000000..0f903a63c --- /dev/null +++ b/vendor/ammonia/tests/version-numbers.rs @@ -0,0 +1,6 @@ +use version_sync::assert_markdown_deps_updated; + +#[test] +fn test_readme_deps() { + assert_markdown_deps_updated!("README.md"); +} |