summaryrefslogtreecommitdiffstats
path: root/vendor/ammonia/src/lib.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /vendor/ammonia/src/lib.rs
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/ammonia/src/lib.rs')
-rw-r--r--vendor/ammonia/src/lib.rs3626
1 files changed, 3626 insertions, 0 deletions
diff --git a/vendor/ammonia/src/lib.rs b/vendor/ammonia/src/lib.rs
new file mode 100644
index 000000000..d80d66ac7
--- /dev/null
+++ b/vendor/ammonia/src/lib.rs
@@ -0,0 +1,3626 @@
+// Copyright (C) Michael Howell and others
+// this library is released under the same terms as Rust itself.
+
+#![deny(unsafe_code)]
+#![deny(missing_docs)]
+
+//! Ammonia is a whitelist-based HTML sanitization library. It is designed to
+//! prevent cross-site scripting, layout breaking, and clickjacking caused
+//! by untrusted user-provided HTML being mixed into a larger web page.
+//!
+//! Ammonia uses [html5ever] to parse and serialize document fragments the same way browsers do,
+//! so it is extremely resilient to syntactic obfuscation.
+//!
+//! Ammonia parses its input exactly according to the HTML5 specification;
+//! it will not linkify bare URLs, insert line or paragraph breaks, or convert `(C)` into &copy;.
+//! If you want that, use a markup processor before running the sanitizer, like [pulldown-cmark].
+//!
+//! # Examples
+//!
+//! ```
+//! let result = ammonia::clean(
+//! "<b><img src='' onerror='alert(\\'hax\\')'>I'm not trying to XSS you</b>"
+//! );
+//! assert_eq!(result, "<b><img src=\"\">I'm not trying to XSS you</b>");
+//! ```
+//!
+//! [html5ever]: https://github.com/servo/html5ever "The HTML parser in Servo"
+//! [pulldown-cmark]: https://github.com/google/pulldown-cmark "CommonMark parser"
+
+
+#[cfg(ammonia_unstable)]
+pub mod rcdom;
+
+#[cfg(not(ammonia_unstable))]
+mod rcdom;
+
+use html5ever::interface::Attribute;
+use html5ever::serialize::{serialize, SerializeOpts};
+use html5ever::tree_builder::{NodeOrText, TreeSink};
+use html5ever::{driver as html, local_name, namespace_url, ns, QualName};
+use maplit::{hashmap, hashset};
+use once_cell::sync::Lazy;
+use rcdom::{Handle, NodeData, RcDom, SerializableHandle};
+use std::borrow::{Borrow, Cow};
+use std::cmp::max;
+use std::collections::{HashMap, HashSet};
+use std::fmt;
+use std::io;
+use std::iter::IntoIterator as IntoIter;
+use std::mem::replace;
+use std::rc::Rc;
+use std::str::FromStr;
+use tendril::stream::TendrilSink;
+use tendril::StrTendril;
+use tendril::{format_tendril, ByteTendril};
+pub use url::Url;
+
+use html5ever::buffer_queue::BufferQueue;
+use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer};
+pub use url;
+
+static AMMONIA: Lazy<Builder<'static>> = Lazy::new(|| Builder::default());
+
+/// Clean HTML with a conservative set of defaults.
+///
+/// * [tags](struct.Builder.html#defaults)
+/// * [attributes on specific tags](struct.Builder.html#defaults-1)
+/// * [attributes on all tags](struct.Builder.html#defaults-2)
+/// * [url schemes](struct.Builder.html#defaults-3)
+/// * [relative URLs are passed through, unchanged, by default](struct.Builder.html#defaults-4)
+/// * [links are marked `noopener noreferrer` by default](struct.Builder.html#defaults-5)
+/// * [all `class=""` settings are blocked by default](struct.Builder.html#defaults-6)
+/// * [comments are stripped by default](struct.Builder.html#defaults-7)
+///
+/// [opener]: https://mathiasbynens.github.io/rel-noopener/
+/// [referrer]: https://en.wikipedia.org/wiki/HTTP_referer
+///
+/// # Examples
+///
+/// assert_eq!(ammonia::clean("XSS<script>attack</script>"), "XSS")
+pub fn clean(src: &str) -> String {
+ AMMONIA.clean(src).to_string()
+}
+
+/// Turn an arbitrary string into unformatted HTML.
+///
+/// This function is roughly equivalent to PHP's `htmlspecialchars` and `htmlentities`.
+/// It is as strict as possible, encoding every character that has special meaning to the
+/// HTML parser.
+///
+/// # Warnings
+///
+/// This function cannot be used to package strings into a `<script>` or `<style>` tag;
+/// you need a JavaScript or CSS escaper to do that.
+///
+/// // DO NOT DO THIS
+/// # use ammonia::clean_text;
+/// let untrusted = "Robert\"); abuse();//";
+/// let html = format!("<script>invoke(\"{}\")</script>", clean_text(untrusted));
+///
+/// `<textarea>` tags will strip the first newline, if present, even if that newline is encoded.
+/// If you want to build an editor that works the way most folks expect them to, you should put a
+/// newline at the beginning of the tag, like this:
+///
+/// # use ammonia::{Builder, clean_text};
+/// let untrusted = "\n\nhi!";
+/// let mut b = Builder::new();
+/// b.add_tags(&["textarea"]);
+/// // This is the bad version
+/// // The user put two newlines at the beginning, but the first one was removed
+/// let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted))).to_string();
+/// assert_eq!("<textarea>\nhi!</textarea>", sanitized);
+/// // This is a good version
+/// // The user put two newlines at the beginning, and we add a third one,
+/// // so the result still has two
+/// let sanitized = b.clean(&format!("<textarea>\n{}</textarea>", clean_text(untrusted))).to_string();
+/// assert_eq!("<textarea>\n\nhi!</textarea>", sanitized);
+/// // This version is also often considered good
+/// // For many applications, leading and trailing whitespace is probably unwanted
+/// let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted.trim()))).to_string();
+/// assert_eq!("<textarea>hi!</textarea>", sanitized);
+///
+/// It also does not make user text safe for HTML attribute microsyntaxes such as `class` or `id`.
+/// Only use this function for places where HTML accepts unrestricted text such as `title` attributes
+/// and paragraph contents.
+pub fn clean_text(src: &str) -> String {
+ let mut ret_val = String::with_capacity(max(4, src.len()));
+ for c in src.chars() {
+ let replacement = match c {
+ // this character, when confronted, will start a tag
+ '<' => "&lt;",
+ // in an unquoted attribute, will end the attribute value
+ '>' => "&gt;",
+ // in an attribute surrounded by double quotes, this character will end the attribute value
+ '\"' => "&quot;",
+ // in an attribute surrounded by single quotes, this character will end the attribute value
+ '\'' => "&apos;",
+ // in HTML5, returns a bogus parse error in an unquoted attribute, while in SGML/HTML, it will end an attribute value surrounded by backquotes
+ '`' => "&grave;",
+ // in an unquoted attribute, this character will end the attribute
+ '/' => "&#47;",
+ // starts an entity reference
+ '&' => "&amp;",
+ // if at the beginning of an unquoted attribute, will get ignored
+ '=' => "&#61;",
+ // will end an unquoted attribute
+ ' ' => "&#32;",
+ '\t' => "&#9;",
+ '\n' => "&#10;",
+ '\x0c' => "&#12;",
+ '\r' => "&#13;",
+ // a spec-compliant browser will perform this replacement anyway, but the middleware might not
+ '\0' => "&#65533;",
+ // ALL OTHER CHARACTERS ARE PASSED THROUGH VERBATIM
+ _ => {
+ ret_val.push(c);
+ continue;
+ }
+ };
+ ret_val.push_str(replacement);
+ }
+ ret_val
+}
+
+/// Determine if a given string contains HTML
+///
+/// This function is parses the full string into HTML and checks if the input contained any
+/// HTML syntax.
+///
+/// # Note
+/// This function will return positively for strings that contain invalid HTML syntax like
+/// `<g>` and even `Vec::<u8>::new()`.
+pub fn is_html(input: &str) -> bool {
+ let santok = SanitizationTokenizer::new();
+ let mut chunk = ByteTendril::new();
+ chunk.push_slice(input.as_bytes());
+ let mut input = BufferQueue::new();
+ input.push_back(chunk.try_reinterpret().unwrap());
+
+ let mut tok = Tokenizer::new(santok, Default::default());
+ let _ = tok.feed(&mut input);
+ tok.end();
+ tok.sink.was_sanitized
+}
+
+#[derive(Copy, Clone)]
+struct SanitizationTokenizer {
+ was_sanitized: bool,
+}
+
+impl SanitizationTokenizer {
+ pub fn new() -> SanitizationTokenizer {
+ SanitizationTokenizer {
+ was_sanitized: false,
+ }
+ }
+}
+
+impl TokenSink for SanitizationTokenizer {
+ type Handle = ();
+ fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
+ match token {
+ Token::CharacterTokens(_) | Token::EOFToken | Token::ParseError(_) => {}
+ _ => {
+ self.was_sanitized = true;
+ }
+ }
+ TokenSinkResult::Continue
+ }
+ fn end(&mut self) {}
+}
+
+/// An HTML sanitizer.
+///
+/// Given a fragment of HTML, Ammonia will parse it according to the HTML5
+/// parsing algorithm and sanitize any disallowed tags or attributes. This
+/// algorithm also takes care of things like unclosed and (some) misnested
+/// tags.
+///
+/// # Examples
+///
+/// use ammonia::{Builder, UrlRelative};
+///
+/// let a = Builder::default()
+/// .link_rel(None)
+/// .url_relative(UrlRelative::PassThrough)
+/// .clean("<a href=/>test")
+/// .to_string();
+/// assert_eq!(
+/// a,
+/// "<a href=\"/\">test</a>");
+///
+/// # Panics
+///
+/// Running [`clean`] or [`clean_from_reader`] may cause a panic if the builder is
+/// configured with any of these (contradictory) settings:
+///
+/// * The `rel` attribute is added to [`generic_attributes`] or the
+/// [`tag_attributes`] for the `<a>` tag, and [`link_rel`] is not set to `None`.
+///
+/// For example, this is going to panic, since [`link_rel`] is set to
+/// `Some("noopener noreferrer")` by default,
+/// and it makes no sense to simultaneously say that the user is allowed to
+/// set their own `rel` attribute while saying that every link shall be set to
+/// a particular value:
+///
+/// ```should_panic
+/// use ammonia::Builder;
+/// use maplit::hashset;
+///
+/// # fn main() {
+/// Builder::default()
+/// .generic_attributes(hashset!["rel"])
+/// .clean("");
+/// # }
+/// ```
+///
+/// This, however, is perfectly valid:
+///
+/// ```
+/// use ammonia::Builder;
+/// use maplit::hashset;
+///
+/// # fn main() {
+/// Builder::default()
+/// .generic_attributes(hashset!["rel"])
+/// .link_rel(None)
+/// .clean("");
+/// # }
+/// ```
+///
+/// * The `class` attribute is in [`allowed_classes`] and is in the
+/// corresponding [`tag_attributes`] or in [`generic_attributes`].
+///
+/// This is done both to line up with the treatment of `rel`,
+/// and to prevent people from accidentally allowing arbitrary
+/// classes on a particular element.
+///
+/// This will panic:
+///
+/// ```should_panic
+/// use ammonia::Builder;
+/// use maplit::{hashmap, hashset};
+///
+/// # fn main() {
+/// Builder::default()
+/// .generic_attributes(hashset!["class"])
+/// .allowed_classes(hashmap!["span" => hashset!["hidden"]])
+/// .clean("");
+/// # }
+/// ```
+///
+/// This, however, is perfectly valid:
+///
+/// ```
+/// use ammonia::Builder;
+/// use maplit::{hashmap, hashset};
+///
+/// # fn main() {
+/// Builder::default()
+/// .allowed_classes(hashmap!["span" => hashset!["hidden"]])
+/// .clean("");
+/// # }
+/// ```
+///
+/// * A tag is in either [`tags`] or [`tag_attributes`] while also
+/// being in [`clean_content_tags`].
+///
+/// Both [`tags`] and [`tag_attributes`] are whitelists but
+/// [`clean_content_tags`] is a blacklist, so it doesn't make sense
+/// to have the same tag in both.
+///
+/// For example, this will panic, since the `aside` tag is in
+/// [`tags`] by default:
+///
+/// ```should_panic
+/// use ammonia::Builder;
+/// use maplit::hashset;
+///
+/// # fn main() {
+/// Builder::default()
+/// .clean_content_tags(hashset!["aside"])
+/// .clean("");
+/// # }
+/// ```
+///
+/// This, however, is valid:
+///
+/// ```
+/// use ammonia::Builder;
+/// use maplit::hashset;
+///
+/// # fn main() {
+/// Builder::default()
+/// .rm_tags(&["aside"])
+/// .clean_content_tags(hashset!["aside"])
+/// .clean("");
+/// # }
+/// ```
+///
+/// [`clean`]: #method.clean
+/// [`clean_from_reader`]: #method.clean_from_reader
+/// [`generic_attributes`]: #method.generic_attributes
+/// [`tag_attributes`]: #method.tag_attributes
+/// [`generic_attributes`]: #method.generic_attributes
+/// [`link_rel`]: #method.link_rel
+/// [`allowed_classes`]: #method.allowed_classes
+/// [`id_prefix`]: #method.id_prefix
+/// [`tags`]: #method.tags
+/// [`clean_content_tags`]: #method.clean_content_tags
+#[derive(Debug)]
+pub struct Builder<'a> {
+ tags: HashSet<&'a str>,
+ clean_content_tags: HashSet<&'a str>,
+ tag_attributes: HashMap<&'a str, HashSet<&'a str>>,
+ tag_attribute_values: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>,
+ set_tag_attribute_values: HashMap<&'a str, HashMap<&'a str, &'a str>>,
+ generic_attributes: HashSet<&'a str>,
+ url_schemes: HashSet<&'a str>,
+ url_relative: UrlRelative,
+ attribute_filter: Option<Box<dyn AttributeFilter>>,
+ link_rel: Option<&'a str>,
+ allowed_classes: HashMap<&'a str, HashSet<&'a str>>,
+ strip_comments: bool,
+ id_prefix: Option<&'a str>,
+ generic_attribute_prefixes: Option<HashSet<&'a str>>,
+}
+
+impl<'a> Default for Builder<'a> {
+ fn default() -> Self {
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ let tags = hashset![
+ "a", "abbr", "acronym", "area", "article", "aside", "b", "bdi",
+ "bdo", "blockquote", "br", "caption", "center", "cite", "code",
+ "col", "colgroup", "data", "dd", "del", "details", "dfn", "div",
+ "dl", "dt", "em", "figcaption", "figure", "footer", "h1", "h2",
+ "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "i", "img",
+ "ins", "kbd", "kbd", "li", "map", "mark", "nav", "ol", "p", "pre",
+ "q", "rp", "rt", "rtc", "ruby", "s", "samp", "small", "span",
+ "strike", "strong", "sub", "summary", "sup", "table", "tbody",
+ "td", "th", "thead", "time", "tr", "tt", "u", "ul", "var", "wbr"
+ ];
+ let clean_content_tags = hashset!["script", "style"];
+ let generic_attributes = hashset!["lang", "title"];
+ let tag_attributes = hashmap![
+ "a" => hashset![
+ "href", "hreflang"
+ ],
+ "bdo" => hashset![
+ "dir"
+ ],
+ "blockquote" => hashset![
+ "cite"
+ ],
+ "col" => hashset![
+ "align", "char", "charoff", "span"
+ ],
+ "colgroup" => hashset![
+ "align", "char", "charoff", "span"
+ ],
+ "del" => hashset![
+ "cite", "datetime"
+ ],
+ "hr" => hashset![
+ "align", "size", "width"
+ ],
+ "img" => hashset![
+ "align", "alt", "height", "src", "width"
+ ],
+ "ins" => hashset![
+ "cite", "datetime"
+ ],
+ "ol" => hashset![
+ "start"
+ ],
+ "q" => hashset![
+ "cite"
+ ],
+ "table" => hashset![
+ "align", "char", "charoff", "summary"
+ ],
+ "tbody" => hashset![
+ "align", "char", "charoff"
+ ],
+ "td" => hashset![
+ "align", "char", "charoff", "colspan", "headers", "rowspan"
+ ],
+ "tfoot" => hashset![
+ "align", "char", "charoff"
+ ],
+ "th" => hashset![
+ "align", "char", "charoff", "colspan", "headers", "rowspan", "scope"
+ ],
+ "thead" => hashset![
+ "align", "char", "charoff"
+ ],
+ "tr" => hashset![
+ "align", "char", "charoff"
+ ],
+ ];
+ let tag_attribute_values = hashmap![];
+ let set_tag_attribute_values = hashmap![];
+ let url_schemes = hashset![
+ "bitcoin",
+ "ftp",
+ "ftps",
+ "geo",
+ "http",
+ "https",
+ "im",
+ "irc",
+ "ircs",
+ "magnet",
+ "mailto",
+ "mms",
+ "mx",
+ "news",
+ "nntp",
+ "openpgp4fpr",
+ "sip",
+ "sms",
+ "smsto",
+ "ssh",
+ "tel",
+ "url",
+ "webcal",
+ "wtai",
+ "xmpp"
+ ];
+ let allowed_classes = hashmap![];
+
+ Builder {
+ tags,
+ clean_content_tags,
+ tag_attributes,
+ tag_attribute_values,
+ set_tag_attribute_values,
+ generic_attributes,
+ url_schemes,
+ url_relative: UrlRelative::PassThrough,
+ attribute_filter: None,
+ link_rel: Some("noopener noreferrer"),
+ allowed_classes,
+ strip_comments: true,
+ id_prefix: None,
+ generic_attribute_prefixes: None,
+ }
+ }
+}
+
+impl<'a> Builder<'a> {
+ /// Sets the tags that are allowed.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::Builder;
+ /// use maplit::hashset;
+ ///
+ /// # fn main() {
+ /// let tags = hashset!["my-tag"];
+ /// let a = Builder::new()
+ /// .tags(tags)
+ /// .clean("<my-tag>")
+ /// .to_string();
+ /// assert_eq!(a, "<my-tag></my-tag>");
+ /// # }
+ ///
+ /// # Defaults
+ ///
+ /// ```notest
+ /// a, abbr, acronym, area, article, aside, b, bdi,
+ /// bdo, blockquote, br, caption, center, cite, code,
+ /// col, colgroup, data, dd, del, details, dfn, div,
+ /// dl, dt, em, figcaption, figure, footer, h1, h2,
+ /// h3, h4, h5, h6, header, hgroup, hr, i, img,
+ /// ins, kbd, kbd, li, map, mark, nav, ol, p, pre,
+ /// q, rp, rt, rtc, ruby, s, samp, small, span,
+ /// strike, strong, sub, summary, sup, table, tbody,
+ /// td, th, thead, time, tr, tt, u, ul, var, wbr
+ /// ```
+ pub fn tags(&mut self, value: HashSet<&'a str>) -> &mut Self {
+ self.tags = value;
+ self
+ }
+
+ /// Add additonal whitelisted tags without overwriting old ones.
+ ///
+ /// Does nothing if the tag is already there.
+ ///
+ /// # Examples
+ ///
+ /// let a = ammonia::Builder::default()
+ /// .add_tags(&["my-tag"])
+ /// .clean("<my-tag>test</my-tag> <span>mess</span>").to_string();
+ /// assert_eq!("<my-tag>test</my-tag> <span>mess</span>", a);
+ pub fn add_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
+ &mut self,
+ it: I,
+ ) -> &mut Self {
+ self.tags.extend(it.into_iter().map(Borrow::borrow));
+ self
+ }
+
+ /// Remove already-whitelisted tags.
+ ///
+ /// Does nothing if the tags is already gone.
+ ///
+ /// # Examples
+ ///
+ /// let a = ammonia::Builder::default()
+ /// .rm_tags(&["span"])
+ /// .clean("<span></span>").to_string();
+ /// assert_eq!("", a);
+ pub fn rm_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
+ &mut self,
+ it: I,
+ ) -> &mut Self {
+ for i in it {
+ self.tags.remove(i.borrow());
+ }
+ self
+ }
+
+ /// Returns a copy of the set of whitelisted tags.
+ ///
+ /// # Examples
+ ///
+ /// use maplit::hashset;
+ ///
+ /// let tags = hashset!["my-tag-1", "my-tag-2"];
+ ///
+ /// let mut b = ammonia::Builder::default();
+ /// b.tags(Clone::clone(&tags));
+ /// assert_eq!(tags, b.clone_tags());
+ pub fn clone_tags(&self) -> HashSet<&'a str> {
+ self.tags.clone()
+ }
+
+ /// Sets the tags whose contents will be completely removed from the output.
+ ///
+ /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause
+ /// a panic.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::Builder;
+ /// use maplit::hashset;
+ ///
+ /// # fn main() {
+ /// let tag_blacklist = hashset!["script", "style"];
+ /// let a = Builder::new()
+ /// .clean_content_tags(tag_blacklist)
+ /// .clean("<script>alert('hello')</script><style>a { background: #fff }</style>")
+ /// .to_string();
+ /// assert_eq!(a, "");
+ /// # }
+ ///
+ /// # Defaults
+ ///
+ /// No tags have content removed by default.
+ pub fn clean_content_tags(&mut self, value: HashSet<&'a str>) -> &mut Self {
+ self.clean_content_tags = value;
+ self
+ }
+
+ /// Add additonal blacklisted clean-content tags without overwriting old ones.
+ ///
+ /// Does nothing if the tag is already there.
+ ///
+ /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause
+ /// a panic.
+ ///
+ /// # Examples
+ ///
+ /// let a = ammonia::Builder::default()
+ /// .add_clean_content_tags(&["my-tag"])
+ /// .clean("<my-tag>test</my-tag><span>mess</span>").to_string();
+ /// assert_eq!("<span>mess</span>", a);
+ pub fn add_clean_content_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
+ &mut self,
+ it: I,
+ ) -> &mut Self {
+ self.clean_content_tags
+ .extend(it.into_iter().map(Borrow::borrow));
+ self
+ }
+
+ /// Remove already-blacklisted clean-content tags.
+ ///
+ /// Does nothing if the tags aren't blacklisted.
+ ///
+ /// # Examples
+ /// use ammonia::Builder;
+ /// use maplit::hashset;
+ ///
+ /// # fn main() {
+ /// let tag_blacklist = hashset!["script"];
+ /// let a = ammonia::Builder::default()
+ /// .clean_content_tags(tag_blacklist)
+ /// .rm_clean_content_tags(&["script"])
+ /// .clean("<script>XSS</script>").to_string();
+ /// assert_eq!("XSS", a);
+ /// # }
+ pub fn rm_clean_content_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
+ &mut self,
+ it: I,
+ ) -> &mut Self {
+ for i in it {
+ self.clean_content_tags.remove(i.borrow());
+ }
+ self
+ }
+
+ /// Returns a copy of the set of blacklisted clean-content tags.
+ ///
+ /// # Examples
+ /// # use maplit::hashset;
+ ///
+ /// let tags = hashset!["my-tag-1", "my-tag-2"];
+ ///
+ /// let mut b = ammonia::Builder::default();
+ /// b.clean_content_tags(Clone::clone(&tags));
+ /// assert_eq!(tags, b.clone_clean_content_tags());
+ pub fn clone_clean_content_tags(&self) -> HashSet<&'a str> {
+ self.clean_content_tags.clone()
+ }
+
+ /// Sets the HTML attributes that are allowed on specific tags.
+ ///
+ /// The value is structured as a map from tag names to a set of attribute names.
+ ///
+ /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::Builder;
+ /// use maplit::{hashmap, hashset};
+ ///
+ /// # fn main() {
+ /// let tags = hashset!["my-tag"];
+ /// let tag_attributes = hashmap![
+ /// "my-tag" => hashset!["val"]
+ /// ];
+ /// let a = Builder::new().tags(tags).tag_attributes(tag_attributes)
+ /// .clean("<my-tag val=1>")
+ /// .to_string();
+ /// assert_eq!(a, "<my-tag val=\"1\"></my-tag>");
+ /// # }
+ ///
+ /// # Defaults
+ ///
+ /// ```notest
+ /// a =>
+ /// href, hreflang
+ /// bdo =>
+ /// dir
+ /// blockquote =>
+ /// cite
+ /// col =>
+ /// align, char, charoff, span
+ /// colgroup =>
+ /// align, char, charoff, span
+ /// del =>
+ /// cite, datetime
+ /// hr =>
+ /// align, size, width
+ /// img =>
+ /// align, alt, height, src, width
+ /// ins =>
+ /// cite, datetime
+ /// ol =>
+ /// start
+ /// q =>
+ /// cite
+ /// table =>
+ /// align, char, charoff, summary
+ /// tbody =>
+ /// align, char, charoff
+ /// td =>
+ /// align, char, charoff, colspan, headers, rowspan
+ /// tfoot =>
+ /// align, char, charoff
+ /// th =>
+ /// align, char, charoff, colspan, headers, rowspan, scope
+ /// thead =>
+ /// align, char, charoff
+ /// tr =>
+ /// align, char, charoff
+ /// ```
+ pub fn tag_attributes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self {
+ self.tag_attributes = value;
+ self
+ }
+
+ /// Add additonal whitelisted tag-specific attributes without overwriting old ones.
+ ///
+ /// # Examples
+ ///
+ /// let a = ammonia::Builder::default()
+ /// .add_tags(&["my-tag"])
+ /// .add_tag_attributes("my-tag", &["my-attr"])
+ /// .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string();
+ /// assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a);
+ pub fn add_tag_attributes<
+ T: 'a + ?Sized + Borrow<str>,
+ U: 'a + ?Sized + Borrow<str>,
+ I: IntoIter<Item = &'a T>,
+ >(
+ &mut self,
+ tag: &'a U,
+ it: I,
+ ) -> &mut Self {
+ self.tag_attributes
+ .entry(tag.borrow())
+ .or_insert_with(|| HashSet::new())
+ .extend(it.into_iter().map(Borrow::borrow));
+ self
+ }
+
+ /// Remove already-whitelisted tag-specific attributes.
+ ///
+ /// Does nothing if the attribute is already gone.
+ ///
+ /// # Examples
+ ///
+ /// let a = ammonia::Builder::default()
+ /// .rm_tag_attributes("a", &["href"])
+ /// .clean("<a href=\"/\"></a>").to_string();
+ /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
+ pub fn rm_tag_attributes<
+ 'b,
+ 'c,
+ T: 'b + ?Sized + Borrow<str>,
+ U: 'c + ?Sized + Borrow<str>,
+ I: IntoIter<Item = &'b T>,
+ >(
+ &mut self,
+ tag: &'c U,
+ it: I,
+ ) -> &mut Self {
+ if let Some(tag) = self.tag_attributes.get_mut(tag.borrow()) {
+ for i in it {
+ tag.remove(i.borrow());
+ }
+ }
+ self
+ }
+
+ /// Returns a copy of the set of whitelisted tag-specific attributes.
+ ///
+ /// # Examples
+ /// use maplit::{hashmap, hashset};
+ ///
+ /// let tag_attributes = hashmap![
+ /// "my-tag" => hashset!["my-attr-1", "my-attr-2"]
+ /// ];
+ ///
+ /// let mut b = ammonia::Builder::default();
+ /// b.tag_attributes(Clone::clone(&tag_attributes));
+ /// assert_eq!(tag_attributes, b.clone_tag_attributes());
+ pub fn clone_tag_attributes(&self) -> HashMap<&'a str, HashSet<&'a str>> {
+ self.tag_attributes.clone()
+ }
+
+ /// Sets the values of HTML attributes that are allowed on specific tags.
+ ///
+ /// The value is structured as a map from tag names to a map from attribute names to a set of
+ /// attribute values.
+ ///
+ /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::Builder;
+ /// use maplit::{hashmap, hashset};
+ ///
+ /// # fn main() {
+ /// let tags = hashset!["my-tag"];
+ /// let tag_attribute_values = hashmap![
+ /// "my-tag" => hashmap![
+ /// "my-attr" => hashset!["val"],
+ /// ],
+ /// ];
+ /// let a = Builder::new().tags(tags).tag_attribute_values(tag_attribute_values)
+ /// .clean("<my-tag my-attr=val>")
+ /// .to_string();
+ /// assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>");
+ /// # }
+ ///
+ /// # Defaults
+ ///
+ /// None.
+ pub fn tag_attribute_values(
+ &mut self,
+ value: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>,
+ ) -> &mut Self {
+ self.tag_attribute_values = value;
+ self
+ }
+
+ /// Add additonal whitelisted tag-specific attribute values without overwriting old ones.
+ ///
+ /// # Examples
+ ///
+ /// let a = ammonia::Builder::default()
+ /// .add_tags(&["my-tag"])
+ /// .add_tag_attribute_values("my-tag", "my-attr", &[""])
+ /// .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string();
+ /// assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a);
+ pub fn add_tag_attribute_values<
+ T: 'a + ?Sized + Borrow<str>,
+ U: 'a + ?Sized + Borrow<str>,
+ V: 'a + ?Sized + Borrow<str>,
+ I: IntoIter<Item = &'a T>,
+ >(
+ &mut self,
+ tag: &'a U,
+ attribute: &'a V,
+ it: I,
+ ) -> &mut Self {
+ self.tag_attribute_values
+ .entry(tag.borrow())
+ .or_insert_with(HashMap::new)
+ .entry(attribute.borrow())
+ .or_insert_with(HashSet::new)
+ .extend(it.into_iter().map(Borrow::borrow));
+
+ self
+ }
+
+ /// Remove already-whitelisted tag-specific attribute values.
+ ///
+ /// Does nothing if the attribute or the value is already gone.
+ ///
+ /// # Examples
+ ///
+ /// let a = ammonia::Builder::default()
+ /// .rm_tag_attributes("a", &["href"])
+ /// .add_tag_attribute_values("a", "href", &["/"])
+ /// .rm_tag_attribute_values("a", "href", &["/"])
+ /// .clean("<a href=\"/\"></a>").to_string();
+ /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
+ pub fn rm_tag_attribute_values<
+ 'b,
+ 'c,
+ T: 'b + ?Sized + Borrow<str>,
+ U: 'c + ?Sized + Borrow<str>,
+ V: 'c + ?Sized + Borrow<str>,
+ I: IntoIter<Item = &'b T>,
+ >(
+ &mut self,
+ tag: &'c U,
+ attribute: &'c V,
+ it: I,
+ ) -> &mut Self {
+ if let Some(attrs) = self
+ .tag_attribute_values
+ .get_mut(tag.borrow())
+ .and_then(|map| map.get_mut(attribute.borrow()))
+ {
+ for i in it {
+ attrs.remove(i.borrow());
+ }
+ }
+ self
+ }
+
+ /// Returns a copy of the set of whitelisted tag-specific attribute values.
+ ///
+ /// # Examples
+ ///
+ /// use maplit::{hashmap, hashset};
+ ///
+ /// let attribute_values = hashmap![
+ /// "my-attr-1" => hashset!["foo"],
+ /// "my-attr-2" => hashset!["baz", "bar"],
+ /// ];
+ /// let tag_attribute_values = hashmap![
+ /// "my-tag" => attribute_values
+ /// ];
+ ///
+ /// let mut b = ammonia::Builder::default();
+ /// b.tag_attribute_values(Clone::clone(&tag_attribute_values));
+ /// assert_eq!(tag_attribute_values, b.clone_tag_attribute_values());
+ pub fn clone_tag_attribute_values(
+ &self,
+ ) -> HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>> {
+ self.tag_attribute_values.clone()
+ }
+
+ /// Sets the values of HTML attributes that are to be set on specific tags.
+ ///
+ /// The value is structured as a map from tag names to a map from attribute names to an
+ /// attribute value.
+ ///
+ /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::Builder;
+ /// use maplit::{hashmap, hashset};
+ ///
+ /// # fn main() {
+ /// let tags = hashset!["my-tag"];
+ /// let set_tag_attribute_values = hashmap![
+ /// "my-tag" => hashmap![
+ /// "my-attr" => "val",
+ /// ],
+ /// ];
+ /// let a = Builder::new().tags(tags).set_tag_attribute_values(set_tag_attribute_values)
+ /// .clean("<my-tag>")
+ /// .to_string();
+ /// assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>");
+ /// # }
+ ///
+ /// # Defaults
+ ///
+ /// None.
+ pub fn set_tag_attribute_values(
+ &mut self,
+ value: HashMap<&'a str, HashMap<&'a str, &'a str>>,
+ ) -> &mut Self {
+ self.set_tag_attribute_values = value;
+ self
+ }
+
+ /// Add an attribute value to set on a specific element.
+ ///
+ /// # Examples
+ ///
+ /// let a = ammonia::Builder::default()
+ /// .add_tags(&["my-tag"])
+ /// .set_tag_attribute_value("my-tag", "my-attr", "val")
+ /// .clean("<my-tag>test</my-tag> <span>mess</span>").to_string();
+ /// assert_eq!("<my-tag my-attr=\"val\">test</my-tag> <span>mess</span>", a);
+ pub fn set_tag_attribute_value<
+ T: 'a + ?Sized + Borrow<str>,
+ A: 'a + ?Sized + Borrow<str>,
+ V: 'a + ?Sized + Borrow<str>,
+ >(
+ &mut self,
+ tag: &'a T,
+ attribute: &'a A,
+ value: &'a V,
+ ) -> &mut Self {
+ self.set_tag_attribute_values
+ .entry(tag.borrow())
+ .or_insert_with(HashMap::new)
+ .insert(attribute.borrow(), value.borrow());
+ self
+ }
+
+ /// Remove existing tag-specific attribute values to be set.
+ ///
+ /// Does nothing if the attribute is already gone.
+ ///
+ /// # Examples
+ ///
+ /// let a = ammonia::Builder::default()
+ /// // this does nothing, since no value is set for this tag attribute yet
+ /// .rm_set_tag_attribute_value("a", "target")
+ /// .set_tag_attribute_value("a", "target", "_blank")
+ /// .rm_set_tag_attribute_value("a", "target")
+ /// .clean("<a href=\"/\"></a>").to_string();
+ /// assert_eq!("<a href=\"/\" rel=\"noopener noreferrer\"></a>", a);
+ pub fn rm_set_tag_attribute_value<
+ T: 'a + ?Sized + Borrow<str>,
+ A: 'a + ?Sized + Borrow<str>,
+ >(
+ &mut self,
+ tag: &'a T,
+ attribute: &'a A,
+ ) -> &mut Self {
+ if let Some(attributes) = self.set_tag_attribute_values.get_mut(tag.borrow()) {
+ attributes.remove(attribute.borrow());
+ }
+ self
+ }
+
+ /// Returns the value that will be set for the attribute on the element, if any.
+ ///
+ /// # Examples
+ ///
+ /// let mut b = ammonia::Builder::default();
+ /// b.set_tag_attribute_value("a", "target", "_blank");
+ /// let value = b.get_set_tag_attribute_value("a", "target");
+ /// assert_eq!(value, Some("_blank"));
+ pub fn get_set_tag_attribute_value<
+ T: 'a + ?Sized + Borrow<str>,
+ A: 'a + ?Sized + Borrow<str>,
+ >(
+ &self,
+ tag: &'a T,
+ attribute: &'a A,
+ ) -> Option<&'a str> {
+ self.set_tag_attribute_values
+ .get(tag.borrow())
+ .and_then(|map| map.get(attribute.borrow()))
+ .copied()
+ }
+
+ /// Returns a copy of the set of tag-specific attribute values to be set.
+ ///
+ /// # Examples
+ ///
+ /// use maplit::{hashmap, hashset};
+ ///
+ /// let attribute_values = hashmap![
+ /// "my-attr-1" => "foo",
+ /// "my-attr-2" => "bar",
+ /// ];
+ /// let set_tag_attribute_values = hashmap![
+ /// "my-tag" => attribute_values,
+ /// ];
+ ///
+ /// let mut b = ammonia::Builder::default();
+ /// b.set_tag_attribute_values(Clone::clone(&set_tag_attribute_values));
+ /// assert_eq!(set_tag_attribute_values, b.clone_set_tag_attribute_values());
+ pub fn clone_set_tag_attribute_values(&self) -> HashMap<&'a str, HashMap<&'a str, &'a str>> {
+ self.set_tag_attribute_values.clone()
+ }
+
+ /// Sets the prefix of attributes that are allowed on any tag.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::Builder;
+ /// use maplit::hashset;
+ ///
+ /// # fn main() {
+ /// let prefixes = hashset!["data-"];
+ /// let a = Builder::new()
+ /// .generic_attribute_prefixes(prefixes)
+ /// .clean("<b data-val=1>")
+ /// .to_string();
+ /// assert_eq!(a, "<b data-val=\"1\"></b>");
+ /// # }
+ ///
+ /// # Defaults
+ ///
+ /// ```notest
+ /// lang, title
+ /// ```
+ pub fn generic_attribute_prefixes(&mut self, value: HashSet<&'a str>) -> &mut Self {
+ self.generic_attribute_prefixes = Some(value);
+ self
+ }
+
+ /// Add additional whitelisted attribute prefix without overwriting old ones.
+ ///
+ /// # Examples
+ ///
+ /// let a = ammonia::Builder::default()
+ /// .add_generic_attribute_prefixes(&["my-"])
+ /// .clean("<span my-attr>mess</span>").to_string();
+ /// assert_eq!("<span my-attr=\"\">mess</span>", a);
+ pub fn add_generic_attribute_prefixes<
+ T: 'a + ?Sized + Borrow<str>,
+ I: IntoIter<Item = &'a T>,
+ >(
+ &mut self,
+ it: I,
+ ) -> &mut Self {
+ self.generic_attribute_prefixes
+ .get_or_insert_with(HashSet::new)
+ .extend(it.into_iter().map(Borrow::borrow));
+ self
+ }
+
+ /// Remove already-whitelisted attribute prefixes.
+ ///
+ /// Does nothing if the attribute prefix is already gone.
+ ///
+ /// # Examples
+ ///
+ /// let a = ammonia::Builder::default()
+ /// .add_generic_attribute_prefixes(&["data-", "code-"])
+ /// .rm_generic_attribute_prefixes(&["data-"])
+ /// .clean("<span code-test=\"foo\" data-test=\"cool\"></span>").to_string();
+ /// assert_eq!("<span code-test=\"foo\"></span>", a);
+ pub fn rm_generic_attribute_prefixes<
+ 'b,
+ T: 'b + ?Sized + Borrow<str>,
+ I: IntoIter<Item = &'b T>,
+ >(
+ &mut self,
+ it: I,
+ ) -> &mut Self {
+ if let Some(true) = self.generic_attribute_prefixes.as_mut().map(|prefixes| {
+ for i in it {
+ let _ = prefixes.remove(i.borrow());
+ }
+ prefixes.is_empty()
+ }) {
+ self.generic_attribute_prefixes = None;
+ }
+ self
+ }
+
+ /// Returns a copy of the set of whitelisted attribute prefixes.
+ ///
+ /// # Examples
+ ///
+ /// use maplit::hashset;
+ ///
+ /// let generic_attribute_prefixes = hashset!["my-prfx-1-", "my-prfx-2-"];
+ ///
+ /// let mut b = ammonia::Builder::default();
+ /// b.generic_attribute_prefixes(Clone::clone(&generic_attribute_prefixes));
+ /// assert_eq!(Some(generic_attribute_prefixes), b.clone_generic_attribute_prefixes());
+ pub fn clone_generic_attribute_prefixes(&self) -> Option<HashSet<&'a str>> {
+ self.generic_attribute_prefixes.clone()
+ }
+
+ /// Sets the attributes that are allowed on any tag.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::Builder;
+ /// use maplit::hashset;
+ ///
+ /// # fn main() {
+ /// let attributes = hashset!["data-val"];
+ /// let a = Builder::new()
+ /// .generic_attributes(attributes)
+ /// .clean("<b data-val=1>")
+ /// .to_string();
+ /// assert_eq!(a, "<b data-val=\"1\"></b>");
+ /// # }
+ ///
+ /// # Defaults
+ ///
+ /// ```notest
+ /// lang, title
+ /// ```
+ pub fn generic_attributes(&mut self, value: HashSet<&'a str>) -> &mut Self {
+ self.generic_attributes = value;
+ self
+ }
+
+ /// Add additonal whitelisted attributes without overwriting old ones.
+ ///
+ /// # Examples
+ ///
+ /// let a = ammonia::Builder::default()
+ /// .add_generic_attributes(&["my-attr"])
+ /// .clean("<span my-attr>mess</span>").to_string();
+ /// assert_eq!("<span my-attr=\"\">mess</span>", a);
+ pub fn add_generic_attributes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
+ &mut self,
+ it: I,
+ ) -> &mut Self {
+ self.generic_attributes
+ .extend(it.into_iter().map(Borrow::borrow));
+ self
+ }
+
+ /// Remove already-whitelisted attributes.
+ ///
+ /// Does nothing if the attribute is already gone.
+ ///
+ /// # Examples
+ ///
+ /// let a = ammonia::Builder::default()
+ /// .rm_generic_attributes(&["title"])
+ /// .clean("<span title=\"cool\"></span>").to_string();
+ /// assert_eq!("<span></span>", a);
+ pub fn rm_generic_attributes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
+ &mut self,
+ it: I,
+ ) -> &mut Self {
+ for i in it {
+ self.generic_attributes.remove(i.borrow());
+ }
+ self
+ }
+
+ /// Returns a copy of the set of whitelisted attributes.
+ ///
+ /// # Examples
+ ///
+ /// use maplit::hashset;
+ ///
+ /// let generic_attributes = hashset!["my-attr-1", "my-attr-2"];
+ ///
+ /// let mut b = ammonia::Builder::default();
+ /// b.generic_attributes(Clone::clone(&generic_attributes));
+ /// assert_eq!(generic_attributes, b.clone_generic_attributes());
+ pub fn clone_generic_attributes(&self) -> HashSet<&'a str> {
+ self.generic_attributes.clone()
+ }
+
+ /// Sets the URL schemes permitted on `href` and `src` attributes.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::Builder;
+ /// use maplit::hashset;
+ ///
+ /// # fn main() {
+ /// let url_schemes = hashset![
+ /// "http", "https", "mailto", "magnet"
+ /// ];
+ /// let a = Builder::new().url_schemes(url_schemes)
+ /// .clean("<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&xl=0&dn=zero_len.fil&xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\">zero-length file</a>")
+ /// .to_string();
+ ///
+ /// // See `link_rel` for information on the rel="noopener noreferrer" attribute
+ /// // in the cleaned HTML.
+ /// assert_eq!(a,
+ /// "<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&amp;xl=0&amp;dn=zero_len.fil&amp;xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&amp;xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\" rel=\"noopener noreferrer\">zero-length file</a>");
+ /// # }
+ ///
+ /// # Defaults
+ ///
+ /// ```notest
+ /// bitcoin, ftp, ftps, geo, http, https, im, irc,
+ /// ircs, magnet, mailto, mms, mx, news, nntp,
+ /// openpgp4fpr, sip, sms, smsto, ssh, tel, url,
+ /// webcal, wtai, xmpp
+ /// ```
+ pub fn url_schemes(&mut self, value: HashSet<&'a str>) -> &mut Self {
+ self.url_schemes = value;
+ self
+ }
+
+ /// Add additonal whitelisted URL schemes without overwriting old ones.
+ ///
+ /// # Examples
+ ///
+ /// let a = ammonia::Builder::default()
+ /// .add_url_schemes(&["my-scheme"])
+ /// .clean("<a href=my-scheme:home>mess</span>").to_string();
+ /// assert_eq!("<a href=\"my-scheme:home\" rel=\"noopener noreferrer\">mess</a>", a);
+ pub fn add_url_schemes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
+ &mut self,
+ it: I,
+ ) -> &mut Self {
+ self.url_schemes.extend(it.into_iter().map(Borrow::borrow));
+ self
+ }
+
+ /// Remove already-whitelisted attributes.
+ ///
+ /// Does nothing if the attribute is already gone.
+ ///
+ /// # Examples
+ ///
+ /// let a = ammonia::Builder::default()
+ /// .rm_url_schemes(&["ftp"])
+ /// .clean("<a href=\"ftp://ftp.mozilla.org/\"></a>").to_string();
+ /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
+ pub fn rm_url_schemes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
+ &mut self,
+ it: I,
+ ) -> &mut Self {
+ for i in it {
+ self.url_schemes.remove(i.borrow());
+ }
+ self
+ }
+
+ /// Returns a copy of the set of whitelisted URL schemes.
+ ///
+ /// # Examples
+ /// use maplit::hashset;
+ ///
+ /// let url_schemes = hashset!["my-scheme-1", "my-scheme-2"];
+ ///
+ /// let mut b = ammonia::Builder::default();
+ /// b.url_schemes(Clone::clone(&url_schemes));
+ /// assert_eq!(url_schemes, b.clone_url_schemes());
+ pub fn clone_url_schemes(&self) -> HashSet<&'a str> {
+ self.url_schemes.clone()
+ }
+
+ /// Configures the behavior for relative URLs: pass-through, resolve-with-base, or deny.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::{Builder, UrlRelative};
+ ///
+ /// let a = Builder::new().url_relative(UrlRelative::PassThrough)
+ /// .clean("<a href=/>Home</a>")
+ /// .to_string();
+ ///
+ /// // See `link_rel` for information on the rel="noopener noreferrer" attribute
+ /// // in the cleaned HTML.
+ /// assert_eq!(
+ /// a,
+ /// "<a href=\"/\" rel=\"noopener noreferrer\">Home</a>");
+ ///
+ /// # Defaults
+ ///
+ /// ```notest
+ /// UrlRelative::PassThrough
+ /// ```
+ pub fn url_relative(&mut self, value: UrlRelative) -> &mut Self {
+ self.url_relative = value;
+ self
+ }
+
+ /// Allows rewriting of all attributes using a callback.
+ ///
+ /// The callback takes name of the element, attribute and its value.
+ /// Returns `None` to remove the attribute, or a value to use.
+ ///
+ /// Rewriting of attributes with URLs is done before `url_relative()`.
+ ///
+ /// # Panics
+ ///
+ /// If more than one callback is set.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use ammonia::Builder;
+ /// let a = Builder::new()
+ /// .attribute_filter(|element, attribute, value| {
+ /// match (element, attribute) {
+ /// ("img", "src") => None,
+ /// _ => Some(value.into())
+ /// }
+ /// })
+ /// .link_rel(None)
+ /// .clean("<a href=/><img alt=Home src=foo></a>")
+ /// .to_string();
+ /// assert_eq!(a,
+ /// r#"<a href="/"><img alt="Home"></a>"#);
+ /// ```
+ pub fn attribute_filter<'cb, CallbackFn>(&mut self, callback: CallbackFn) -> &mut Self
+ where
+ CallbackFn: for<'u> Fn(&str, &str, &'u str) -> Option<Cow<'u, str>> + Send + Sync + 'static,
+ {
+ assert!(
+ self.attribute_filter.is_none(),
+ "attribute_filter can be set only once"
+ );
+ self.attribute_filter = Some(Box::new(callback));
+ self
+ }
+
+ /// Returns `true` if the relative URL resolver is set to `Deny`.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::{Builder, UrlRelative};
+ /// let mut a = Builder::default();
+ /// a.url_relative(UrlRelative::Deny);
+ /// assert!(a.is_url_relative_deny());
+ /// a.url_relative(UrlRelative::PassThrough);
+ /// assert!(!a.is_url_relative_deny());
+ pub fn is_url_relative_deny(&self) -> bool {
+ matches!(self.url_relative, UrlRelative::Deny)
+ }
+
+ /// Returns `true` if the relative URL resolver is set to `PassThrough`.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::{Builder, UrlRelative};
+ /// let mut a = Builder::default();
+ /// a.url_relative(UrlRelative::Deny);
+ /// assert!(!a.is_url_relative_pass_through());
+ /// a.url_relative(UrlRelative::PassThrough);
+ /// assert!(a.is_url_relative_pass_through());
+ pub fn is_url_relative_pass_through(&self) -> bool {
+ matches!(self.url_relative, UrlRelative::PassThrough)
+ }
+
+ /// Returns `true` if the relative URL resolver is set to `Custom`.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::{Builder, UrlRelative};
+ /// use std::borrow::Cow;
+ /// fn test(a: &str) -> Option<Cow<str>> { None }
+ /// # fn main() {
+ /// let mut a = Builder::default();
+ /// a.url_relative(UrlRelative::Custom(Box::new(test)));
+ /// assert!(a.is_url_relative_custom());
+ /// a.url_relative(UrlRelative::PassThrough);
+ /// assert!(!a.is_url_relative_custom());
+ /// a.url_relative(UrlRelative::Deny);
+ /// assert!(!a.is_url_relative_custom());
+ /// # }
+ pub fn is_url_relative_custom(&self) -> bool {
+ matches!(self.url_relative, UrlRelative::Custom(_))
+ }
+
+ /// Configures a `rel` attribute that will be added on links.
+ ///
+ /// If `rel` is in the generic or tag attributes, this must be set to `None`.
+ /// Common `rel` values to include:
+ ///
+ /// * `noopener`: This prevents [a particular type of XSS attack],
+ /// and should usually be turned on for untrusted HTML.
+ /// * `noreferrer`: This prevents the browser from [sending the source URL]
+ /// to the website that is linked to.
+ /// * `nofollow`: This prevents search engines from [using this link for
+ /// ranking], which disincentivizes spammers.
+ ///
+ /// To turn on rel-insertion, call this function with a space-separated list.
+ /// Ammonia does not parse rel-attributes;
+ /// it just puts the given string into the attribute directly.
+ ///
+ /// [a particular type of XSS attack]: https://mathiasbynens.github.io/rel-noopener/
+ /// [sending the source URL]: https://en.wikipedia.org/wiki/HTTP_referer
+ /// [using this link for ranking]: https://en.wikipedia.org/wiki/Nofollow
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::Builder;
+ ///
+ /// let a = Builder::new().link_rel(None)
+ /// .clean("<a href=https://rust-lang.org/>Rust</a>")
+ /// .to_string();
+ /// assert_eq!(
+ /// a,
+ /// "<a href=\"https://rust-lang.org/\">Rust</a>");
+ ///
+ /// # Defaults
+ ///
+ /// ```notest
+ /// Some("noopener noreferrer")
+ /// ```
+ pub fn link_rel(&mut self, value: Option<&'a str>) -> &mut Self {
+ self.link_rel = value;
+ self
+ }
+
+ /// Returns the settings for links' `rel` attribute, if one is set.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::{Builder, UrlRelative};
+ /// let mut a = Builder::default();
+ /// a.link_rel(Some("a b"));
+ /// assert_eq!(a.get_link_rel(), Some("a b"));
+ pub fn get_link_rel(&self) -> Option<&str> {
+ self.link_rel.clone()
+ }
+
+ /// Sets the CSS classes that are allowed on specific tags.
+ ///
+ /// The values is structured as a map from tag names to a set of class names.
+ ///
+ /// If the `class` attribute is itself whitelisted for a tag, then adding entries to
+ /// this map will cause a panic.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::Builder;
+ /// use maplit::{hashmap, hashset};
+ ///
+ /// # fn main() {
+ /// let allowed_classes = hashmap![
+ /// "code" => hashset!["rs", "ex", "c", "cxx", "js"]
+ /// ];
+ /// let a = Builder::new()
+ /// .allowed_classes(allowed_classes)
+ /// .clean("<code class=rs>fn main() {}</code>")
+ /// .to_string();
+ /// assert_eq!(
+ /// a,
+ /// "<code class=\"rs\">fn main() {}</code>");
+ /// # }
+ ///
+ /// # Defaults
+ ///
+ /// The set of allowed classes is empty by default.
+ pub fn allowed_classes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self {
+ self.allowed_classes = value;
+ self
+ }
+
+ /// Add additonal whitelisted classes without overwriting old ones.
+ ///
+ /// # Examples
+ ///
+ /// let a = ammonia::Builder::default()
+ /// .add_allowed_classes("a", &["onebox"])
+ /// .clean("<a href=/ class=onebox>mess</span>").to_string();
+ /// assert_eq!("<a href=\"/\" class=\"onebox\" rel=\"noopener noreferrer\">mess</a>", a);
+ pub fn add_allowed_classes<
+ T: 'a + ?Sized + Borrow<str>,
+ U: 'a + ?Sized + Borrow<str>,
+ I: IntoIter<Item = &'a T>,
+ >(
+ &mut self,
+ tag: &'a U,
+ it: I,
+ ) -> &mut Self {
+ self.allowed_classes
+ .entry(tag.borrow())
+ .or_insert_with(|| HashSet::new())
+ .extend(it.into_iter().map(Borrow::borrow));
+ self
+ }
+
+ /// Remove already-whitelisted attributes.
+ ///
+ /// Does nothing if the attribute is already gone.
+ ///
+ /// # Examples
+ ///
+ /// let a = ammonia::Builder::default()
+ /// .add_allowed_classes("span", &["active"])
+ /// .rm_allowed_classes("span", &["active"])
+ /// .clean("<span class=active>").to_string();
+ /// assert_eq!("<span class=\"\"></span>", a);
+ pub fn rm_allowed_classes<
+ 'b,
+ 'c,
+ T: 'b + ?Sized + Borrow<str>,
+ U: 'c + ?Sized + Borrow<str>,
+ I: IntoIter<Item = &'b T>,
+ >(
+ &mut self,
+ tag: &'c U,
+ it: I,
+ ) -> &mut Self {
+ if let Some(tag) = self.allowed_classes.get_mut(tag.borrow()) {
+ for i in it {
+ tag.remove(i.borrow());
+ }
+ }
+ self
+ }
+
+ /// Returns a copy of the set of whitelisted class attributes.
+ ///
+ /// # Examples
+ ///
+ /// use maplit::{hashmap, hashset};
+ ///
+ /// let allowed_classes = hashmap![
+ /// "my-tag" => hashset!["my-class-1", "my-class-2"]
+ /// ];
+ ///
+ /// let mut b = ammonia::Builder::default();
+ /// b.allowed_classes(Clone::clone(&allowed_classes));
+ /// assert_eq!(allowed_classes, b.clone_allowed_classes());
+ pub fn clone_allowed_classes(&self) -> HashMap<&'a str, HashSet<&'a str>> {
+ self.allowed_classes.clone()
+ }
+
+ /// Configures the handling of HTML comments.
+ ///
+ /// If this option is false, comments will be preserved.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::Builder;
+ ///
+ /// let a = Builder::new().strip_comments(false)
+ /// .clean("<!-- yes -->")
+ /// .to_string();
+ /// assert_eq!(
+ /// a,
+ /// "<!-- yes -->");
+ ///
+ /// # Defaults
+ ///
+ /// `true`
+ pub fn strip_comments(&mut self, value: bool) -> &mut Self {
+ self.strip_comments = value;
+ self
+ }
+
+ /// Returns `true` if comment stripping is turned on.
+ ///
+ /// # Examples
+ ///
+ /// let mut a = ammonia::Builder::new();
+ /// a.strip_comments(true);
+ /// assert!(a.will_strip_comments());
+ /// a.strip_comments(false);
+ /// assert!(!a.will_strip_comments());
+ pub fn will_strip_comments(&self) -> bool {
+ self.strip_comments
+ }
+
+ /// Prefixes all "id" attribute values with a given string. Note that the tag and
+ /// attribute themselves must still be whitelisted.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::Builder;
+ /// use maplit::hashset;
+ ///
+ /// # fn main() {
+ /// let attributes = hashset!["id"];
+ /// let a = Builder::new()
+ /// .generic_attributes(attributes)
+ /// .id_prefix(Some("safe-"))
+ /// .clean("<b id=42>")
+ /// .to_string();
+ /// assert_eq!(a, "<b id=\"safe-42\"></b>");
+ /// # }
+
+ ///
+ /// # Defaults
+ ///
+ /// `None`
+ pub fn id_prefix(&mut self, value: Option<&'a str>) -> &mut Self {
+ self.id_prefix = value;
+ self
+ }
+
+ /// Constructs a [`Builder`] instance configured with the [default options].
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::{Builder, Url, UrlRelative};
+ /// # use std::error::Error;
+ ///
+ /// # fn do_main() -> Result<(), Box<Error>> {
+ /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>.";
+ /// let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>.";
+ ///
+ /// let result = Builder::new() // <--
+ /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
+ /// .clean(input)
+ /// .to_string();
+ /// assert_eq!(result, output);
+ /// # Ok(())
+ /// # }
+ /// # fn main() { do_main().unwrap() }
+ ///
+ /// [default options]: fn.clean.html
+ /// [`Builder`]: struct.Builder.html
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Constructs a [`Builder`] instance configured with no allowed tags.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::{Builder, Url, UrlRelative};
+ /// # use std::error::Error;
+ ///
+ /// # fn do_main() -> Result<(), Box<Error>> {
+ /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>empty()</code> function</a>.";
+ /// let output = "This is an Ammonia example using the empty() function.";
+ ///
+ /// let result = Builder::empty() // <--
+ /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
+ /// .clean(input)
+ /// .to_string();
+ /// assert_eq!(result, output);
+ /// # Ok(())
+ /// # }
+ /// # fn main() { do_main().unwrap() }
+ ///
+ /// [default options]: fn.clean.html
+ /// [`Builder`]: struct.Builder.html
+ pub fn empty() -> Self {
+ Self {
+ tags: hashset![],
+ ..Self::default()
+ }
+ }
+
+ /// Sanitizes an HTML fragment in a string according to the configured options.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::{Builder, Url, UrlRelative};
+ /// # use std::error::Error;
+ ///
+ /// # fn do_main() -> Result<(), Box<Error>> {
+ /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>.";
+ /// let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>.";
+ ///
+ /// let result = Builder::new()
+ /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
+ /// .clean(input)
+ /// .to_string(); // <--
+ /// assert_eq!(result, output);
+ /// # Ok(())
+ /// # }
+ /// # fn main() { do_main().unwrap() }
+ pub fn clean(&self, src: &str) -> Document {
+ let parser = Self::make_parser();
+ let dom = parser.one(src);
+ self.clean_dom(dom)
+ }
+
+ /// Sanitizes an HTML fragment from a reader according to the configured options.
+ ///
+ /// The input should be in UTF-8 encoding, otherwise the decoding is lossy, just
+ /// like when using [`String::from_utf8_lossy`].
+ ///
+ /// To avoid consuming the reader, a mutable reference can be passed to this method.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::Builder;
+ /// # use std::error::Error;
+ ///
+ /// # fn do_main() -> Result<(), Box<Error>> {
+ /// let a = Builder::new()
+ /// .clean_from_reader(&b"<!-- no -->"[..])? // notice the `b`
+ /// .to_string();
+ /// assert_eq!(a, "");
+ /// # Ok(()) }
+ /// # fn main() { do_main().unwrap() }
+ ///
+ /// [`String::from_utf8_lossy`]: https://doc.rust-lang.org/std/string/struct.String.html#method.from_utf8_lossy
+ pub fn clean_from_reader<R>(&self, mut src: R) -> io::Result<Document>
+ where
+ R: io::Read,
+ {
+ let parser = Self::make_parser().from_utf8();
+ let dom = parser.read_from(&mut src)?;
+ Ok(self.clean_dom(dom))
+ }
+
+ /// Clean a post-parsing DOM.
+ ///
+ /// This is not a public API because RcDom isn't really stable.
+ /// We want to be able to take breaking changes to html5ever itself
+ /// without having to break Ammonia's API.
+ fn clean_dom(&self, mut dom: RcDom) -> Document {
+ let mut stack = Vec::new();
+ let mut removed = Vec::new();
+ let link_rel = self
+ .link_rel
+ .map(|link_rel| format_tendril!("{}", link_rel));
+ if link_rel.is_some() {
+ assert!(self.generic_attributes.get("rel").is_none());
+ assert!(self
+ .tag_attributes
+ .get("a")
+ .and_then(|a| a.get("rel"))
+ .is_none());
+ }
+ assert!(self.allowed_classes.is_empty() || !self.generic_attributes.contains("class"));
+ for (tag_name, _classes) in &self.allowed_classes {
+ assert!(self
+ .tag_attributes
+ .get(tag_name)
+ .and_then(|a| a.get("class"))
+ .is_none());
+ }
+ for tag_name in &self.clean_content_tags {
+ assert!(!self.tags.contains(tag_name));
+ assert!(!self.tag_attributes.contains_key(tag_name));
+ }
+ let url_base = if let UrlRelative::RewriteWithBase(ref base) = self.url_relative {
+ Some(base)
+ } else {
+ None
+ };
+ let body = {
+ let children = dom.document.children.borrow();
+ children[0].clone()
+ };
+ stack.extend(
+ replace(&mut *body.children.borrow_mut(), Vec::new())
+ .into_iter()
+ .rev(),
+ );
+ // This design approach is used to prevent pathological content from producing
+ // a stack overflow. The `stack` contains to-be-cleaned nodes, while `remove`,
+ // of course, contains nodes that need to be dropped (we can't just drop them,
+ // because they could have a very deep child tree).
+ while let Some(mut node) = stack.pop() {
+ let parent = node.parent
+ .replace(None).expect("a node in the DOM will have a parent, except the root, which is not processed")
+ .upgrade().expect("a node's parent will be pointed to by its parent (or the root pointer), and will not be dropped");
+ if self.clean_node_content(&node) {
+ removed.push(node);
+ continue;
+ }
+ let pass_clean = self.clean_child(&mut node, url_base);
+ let pass = pass_clean && self.check_expected_namespace(&parent, &node);
+ if pass {
+ self.adjust_node_attributes(&mut node, &link_rel, url_base, self.id_prefix);
+ dom.append(&parent.clone(), NodeOrText::AppendNode(node.clone()));
+ } else {
+ for sub in node.children.borrow_mut().iter_mut() {
+ sub.parent.replace(Some(Rc::downgrade(&parent)));
+ }
+ }
+ stack.extend(
+ replace(&mut *node.children.borrow_mut(), Vec::new())
+ .into_iter()
+ .rev(),
+ );
+ if !pass {
+ removed.push(node);
+ }
+ }
+ // Now, imperatively clean up all of the child nodes.
+ // Otherwise, we could wind up with a DoS, either caused by a memory leak,
+ // or caused by a stack overflow.
+ while let Some(node) = removed.pop() {
+ removed.extend_from_slice(&replace(&mut *node.children.borrow_mut(), Vec::new())[..]);
+ }
+ Document(dom)
+ }
+
+ /// Returns `true` if a node and all its content should be removed.
+ fn clean_node_content(&self, node: &Handle) -> bool {
+ match node.data {
+ NodeData::Text { .. }
+ | NodeData::Comment { .. }
+ | NodeData::Doctype { .. }
+ | NodeData::Document
+ | NodeData::ProcessingInstruction { .. } => false,
+ NodeData::Element { ref name, .. } => self.clean_content_tags.contains(&*name.local),
+ }
+ }
+
+ /// Remove unwanted attributes, and check if the node should be kept or not.
+ ///
+ /// The root node doesn't need cleaning because we create the root node ourselves,
+ /// and it doesn't get serialized, and ... it just exists to give the parser
+ /// a context (in this case, a div-like block context).
+ fn clean_child(&self, child: &mut Handle, url_base: Option<&Url>) -> bool {
+ match child.data {
+ NodeData::Text { .. } => true,
+ NodeData::Comment { .. } => !self.strip_comments,
+ NodeData::Doctype { .. }
+ | NodeData::Document
+ | NodeData::ProcessingInstruction { .. } => false,
+ NodeData::Element {
+ ref name,
+ ref attrs,
+ ..
+ } => {
+ if self.tags.contains(&*name.local) {
+ let attr_filter = |attr: &html5ever::Attribute| {
+ let whitelisted = self.generic_attributes.contains(&*attr.name.local)
+ || self.generic_attribute_prefixes.as_ref().map(|prefixes| {
+ prefixes.iter().any(|&p| attr.name.local.starts_with(p))
+ }) == Some(true)
+ || self
+ .tag_attributes
+ .get(&*name.local)
+ .map(|ta| ta.contains(&*attr.name.local))
+ == Some(true)
+ || self
+ .tag_attribute_values
+ .get(&*name.local)
+ .and_then(|tav| tav.get(&*attr.name.local))
+ .map(|vs| {
+ let attr_val = attr.value.to_lowercase();
+ vs.iter().any(|v| v.to_lowercase() == attr_val)
+ })
+ == Some(true);
+ if !whitelisted {
+ // If the class attribute is not whitelisted,
+ // but there is a whitelisted set of allowed_classes,
+ // do not strip out the class attribute.
+ // Banned classes will be filtered later.
+ &*attr.name.local == "class"
+ && self.allowed_classes.contains_key(&*name.local)
+ } else if is_url_attr(&*name.local, &*attr.name.local) {
+ let url = Url::parse(&*attr.value);
+ if let Ok(url) = url {
+ self.url_schemes.contains(url.scheme())
+ } else if url == Err(url::ParseError::RelativeUrlWithoutBase) {
+ if matches!(self.url_relative, UrlRelative::Deny) {
+ false
+ } else if let Some(url_base) = url_base {
+ url_base.join(&*attr.value).is_ok()
+ } else {
+ true
+ }
+ } else {
+ false
+ }
+ } else {
+ true
+ }
+ };
+ attrs.borrow_mut().retain(attr_filter);
+ true
+ } else {
+ false
+ }
+ }
+ }
+ }
+
+ // Check for unexpected namespace changes.
+ //
+ // The issue happens if developers added to the list of allowed tags any
+ // tag which is parsed in RCDATA state, PLAINTEXT state or RAWTEXT state,
+ // that is:
+ //
+ // * title
+ // * textarea
+ // * xmp
+ // * iframe
+ // * noembed
+ // * noframes
+ // * plaintext
+ // * noscript
+ // * style
+ // * script
+ //
+ // An example in the wild is Plume, that allows iframe [1]. So in next
+ // examples I'll assume the following policy:
+ //
+ // Builder::new()
+ // .add_tags(&["iframe"])
+ //
+ // In HTML namespace `<iframe>` is parsed specially; that is, its content is
+ // treated as text. For instance, the following html:
+ //
+ // <iframe><a>test
+ //
+ // Is parsed into the following DOM tree:
+ //
+ // iframe
+ // └─ #text: <a>test
+ //
+ // So iframe cannot have any children other than a text node.
+ //
+ // The same is not true, though, in "foreign content"; that is, within
+ // <svg> or <math> tags. The following html:
+ //
+ // <svg><iframe><a>test
+ //
+ // is parsed differently:
+ //
+ // svg
+ // └─ iframe
+ // └─ a
+ // └─ #text: test
+ //
+ // So in SVG namespace iframe can have children.
+ //
+ // Ammonia disallows <svg> but it keeps its content after deleting it. And
+ // the parser internally keeps track of the namespace of the element. So
+ // assume we have the following snippet:
+ //
+ // <svg><iframe><a title="</iframe><img src onerror=alert(1)>">test
+ //
+ // It is parsed into:
+ //
+ // svg
+ // └─ iframe
+ // └─ a title="</iframe><img src onerror=alert(1)>"
+ // └─ #text: test
+ //
+ // This DOM tree is harmless from ammonia point of view because the piece
+ // of code that looks like XSS is in a title attribute. Hence, the
+ // resulting "safe" HTML from ammonia would be:
+ //
+ // <iframe><a title="</iframe><img src onerror=alert(1)>" rel="noopener
+ // noreferrer">test</a></iframe>
+ //
+ // However, at this point, the information about namespace is lost, which
+ // means that the browser will parse this snippet into:
+ //
+ // ├─ iframe
+ // │ └─ #text: <a title="
+ // ├─ img src="" onerror="alert(1)"
+ // └─ #text: " rel="noopener noreferrer">test
+ //
+ // Leading to XSS.
+ //
+ // To solve this issue, check for unexpected namespace switches after cleanup.
+ // Elements which change namespace at an unexpected point are removed.
+ // This function returns `true` if `child` should be kept, and `false` if it
+ // should be removed.
+ //
+ // [1]: https://github.com/Plume-org/Plume/blob/main/plume-models/src/safe_string.rs#L21
+ fn check_expected_namespace(&self, parent: &Handle, child: &Handle) -> bool {
+ let (parent, child) = match (&parent.data, &child.data) {
+ (NodeData::Element { name: pn, .. }, NodeData::Element { name: cn, .. }) => (pn, cn),
+ _ => return true,
+ };
+ // The only way to switch from html to svg is with the <svg> tag
+ if parent.ns == ns!(html) && child.ns == ns!(svg) {
+ child.local == local_name!("svg")
+ // The only way to switch from html to mathml is with the <math> tag
+ } else if parent.ns == ns!(html) && child.ns == ns!(mathml) {
+ child.local == local_name!("math")
+ // The only way to switch from mathml to svg/html is with a text integration point
+ } else if parent.ns == ns!(mathml) && child.ns != ns!(mathml) {
+ // https://html.spec.whatwg.org/#mathml
+ matches!(
+ &*parent.local,
+ "mi" | "mo" | "mn" | "ms" | "mtext" | "annotation-xml"
+ )
+ // The only way to switch from svg to mathml/html is with an html integration point
+ } else if parent.ns == ns!(svg) && child.ns != ns!(svg) {
+ // https://html.spec.whatwg.org/#svg-0
+ matches!(&*parent.local, "foreignObject")
+ } else if child.ns == ns!(svg) {
+ is_svg_tag(&*child.local)
+ } else if child.ns == ns!(mathml) {
+ is_mathml_tag(&*child.local)
+ } else if child.ns == ns!(html) {
+ (!is_svg_tag(&*child.local) && !is_mathml_tag(&*child.local))
+ || matches!(
+ &*child.local,
+ "title" | "style" | "font" | "a" | "script" | "span"
+ )
+ } else {
+ // There are no other supported ways to switch namespace
+ parent.ns == child.ns
+ }
+ }
+
+ /// Add and transform special-cased attributes and elements.
+ ///
+ /// This function handles:
+ ///
+ /// * relative URL rewriting
+ /// * adding `<a rel>` attributes
+ /// * filtering out banned classes
+ fn adjust_node_attributes(
+ &self,
+ child: &mut Handle,
+ link_rel: &Option<StrTendril>,
+ url_base: Option<&Url>,
+ id_prefix: Option<&'a str>,
+ ) {
+ if let NodeData::Element {
+ ref name,
+ ref attrs,
+ ..
+ } = child.data
+ {
+ if let Some(set_attrs) = self.set_tag_attribute_values.get(&*name.local) {
+ let mut attrs = attrs.borrow_mut();
+ for (&set_name, &set_value) in set_attrs {
+ // set the value of the attribute if the attribute is already present
+ if let Some(attr) = attrs.iter_mut().find(|attr| &*attr.name.local == set_name)
+ {
+ if &*attr.value != set_value {
+ attr.value = set_value.into();
+ }
+ } else {
+ // otherwise, add the attribute
+ let attr = Attribute {
+ name: QualName::new(None, ns!(), set_name.into()),
+ value: set_value.into(),
+ };
+ attrs.push(attr);
+ }
+ }
+ }
+ if let Some(ref link_rel) = *link_rel {
+ if &*name.local == "a" {
+ attrs.borrow_mut().push(Attribute {
+ name: QualName::new(None, ns!(), local_name!("rel")),
+ value: link_rel.clone(),
+ })
+ }
+ }
+ if let Some(ref id_prefix) = id_prefix {
+ for attr in &mut *attrs.borrow_mut() {
+ if &attr.name.local == "id" {
+ if !attr.value.starts_with(id_prefix) {
+ attr.value = format_tendril!("{}{}", id_prefix, attr.value);
+ }
+ }
+ }
+ }
+ if let Some(ref attr_filter) = self.attribute_filter {
+ let mut drop_attrs = Vec::new();
+ let mut attrs = attrs.borrow_mut();
+ for (i, attr) in &mut attrs.iter_mut().enumerate() {
+ let replace_with = if let Some(new) =
+ attr_filter.filter(&*name.local, &*attr.name.local, &*attr.value)
+ {
+ if *new != *attr.value {
+ Some(format_tendril!("{}", new))
+ } else {
+ None // no need to replace the attr if filter returned the same value
+ }
+ } else {
+ drop_attrs.push(i);
+ None
+ };
+ if let Some(replace_with) = replace_with {
+ attr.value = replace_with;
+ }
+ }
+ for i in drop_attrs.into_iter().rev() {
+ attrs.swap_remove(i);
+ }
+ }
+ if let Some(ref base) = url_base {
+ for attr in &mut *attrs.borrow_mut() {
+ if is_url_attr(&*name.local, &*attr.name.local) {
+ let url = base
+ .join(&*attr.value)
+ .expect("invalid URLs should be stripped earlier");
+ attr.value = format_tendril!("{}", url);
+ }
+ }
+ } else if let UrlRelative::Custom(ref evaluate) = self.url_relative {
+ let mut drop_attrs = Vec::new();
+ let mut attrs = attrs.borrow_mut();
+ for (i, attr) in attrs.iter_mut().enumerate() {
+ if is_url_attr(&*name.local, &*attr.name.local) && is_url_relative(&*attr.value)
+ {
+ let new_value = evaluate
+ .evaluate(&*attr.value)
+ .as_ref()
+ .map(Cow::as_ref)
+ .map(StrTendril::from_str)
+ .and_then(Result::ok);
+ if let Some(new_value) = new_value {
+ attr.value = new_value;
+ } else {
+ drop_attrs.push(i);
+ }
+ }
+ }
+ // Swap remove scrambles the vector after the current point.
+ // We will not do anything except with items before the current point.
+ // The `rev()` is, as such, necessary for correctness.
+ // We could use regular `remove(usize)` and a forward iterator,
+ // but that's slower.
+ for i in drop_attrs.into_iter().rev() {
+ attrs.swap_remove(i);
+ }
+ }
+ if let Some(allowed_values) = self.allowed_classes.get(&*name.local) {
+ for attr in &mut *attrs.borrow_mut() {
+ if &attr.name.local == "class" {
+ let mut classes = vec![];
+ // https://html.spec.whatwg.org/#global-attributes:classes-2
+ for class in attr.value.split_ascii_whitespace() {
+ if allowed_values.contains(class) {
+ classes.push(class.to_owned());
+ }
+ }
+ attr.value = format_tendril!("{}", classes.join(" "));
+ }
+ }
+ }
+ }
+ }
+
+ /// Initializes an HTML fragment parser.
+ ///
+ /// Ammonia conforms to the HTML5 fragment parsing rules,
+ /// by parsing the given fragment as if it were included in a <div> tag.
+ fn make_parser() -> html::Parser<RcDom> {
+ html::parse_fragment(
+ RcDom::default(),
+ html::ParseOpts::default(),
+ QualName::new(None, ns!(html), local_name!("div")),
+ vec![],
+ )
+ }
+}
+
+/// Given an element name and attribute name, determine if the given attribute contains a URL.
+fn is_url_attr(element: &str, attr: &str) -> bool {
+ attr == "href"
+ || attr == "src"
+ || (element == "form" && attr == "action")
+ || (element == "object" && attr == "data")
+ || ((element == "button" || element == "input") && attr == "formaction")
+ || (element == "a" && attr == "ping")
+ || (element == "video" && attr == "poster")
+}
+
+/// Given an element name, check if it's SVG
+fn is_svg_tag(element: &str) -> bool {
+ // https://svgwg.org/svg2-draft/eltindex.html
+ match element {
+ "a"
+ | "animate"
+ | "animateMotion"
+ | "animateTransform"
+ | "circle"
+ | "clipPath"
+ | "defs"
+ | "desc"
+ | "discard"
+ | "ellipse"
+ | "feBlend"
+ | "feColorMatrix"
+ | "feComponentTransfer"
+ | "feComposite"
+ | "feConvolveMatrix"
+ | "feDiffuseLighting"
+ | "feDisplacementMap"
+ | "feDistantLight"
+ | "feDropShadow"
+ | "feFlood"
+ | "feFuncA"
+ | "feFuncB"
+ | "feFuncG"
+ | "feFuncR"
+ | "feGaussianBlur"
+ | "feImage"
+ | "feMerge"
+ | "feMergeNode"
+ | "feMorphology"
+ | "feOffset"
+ | "fePointLight"
+ | "feSpecularLighting"
+ | "feSpotLight"
+ | "feTile"
+ | "feTurbulence"
+ | "filter"
+ | "foreignObject"
+ | "g"
+ | "image"
+ | "line"
+ | "linearGradient"
+ | "marker"
+ | "mask"
+ | "metadata"
+ | "mpath"
+ | "path"
+ | "pattern"
+ | "polygon"
+ | "polyline"
+ | "radialGradient"
+ | "rect"
+ | "script"
+ | "set"
+ | "stop"
+ | "style"
+ | "svg"
+ | "switch"
+ | "symbol"
+ | "text"
+ | "textPath"
+ | "title"
+ | "tspan"
+ | "use"
+ | "view" => true,
+ _ => false,
+ }
+}
+
+/// Given an element name, check if it's Math
+fn is_mathml_tag(element: &str) -> bool {
+ // https://svgwg.org/svg2-draft/eltindex.html
+ match element {
+ "abs"
+ | "and"
+ | "annotation"
+ | "annotation-xml"
+ | "apply"
+ | "approx"
+ | "arccos"
+ | "arccosh"
+ | "arccot"
+ | "arccoth"
+ | "arccsc"
+ | "arccsch"
+ | "arcsec"
+ | "arcsech"
+ | "arcsin"
+ | "arcsinh"
+ | "arctan"
+ | "arctanh"
+ | "arg"
+ | "bind"
+ | "bvar"
+ | "card"
+ | "cartesianproduct"
+ | "cbytes"
+ | "ceiling"
+ | "cerror"
+ | "ci"
+ | "cn"
+ | "codomain"
+ | "complexes"
+ | "compose"
+ | "condition"
+ | "conjugate"
+ | "cos"
+ | "cosh"
+ | "cot"
+ | "coth"
+ | "cs"
+ | "csc"
+ | "csch"
+ | "csymbol"
+ | "curl"
+ | "declare"
+ | "degree"
+ | "determinant"
+ | "diff"
+ | "divergence"
+ | "divide"
+ | "domain"
+ | "domainofapplication"
+ | "emptyset"
+ | "eq"
+ | "equivalent"
+ | "eulergamma"
+ | "exists"
+ | "exp"
+ | "exponentiale"
+ | "factorial"
+ | "factorof"
+ | "false"
+ | "floor"
+ | "fn"
+ | "forall"
+ | "gcd"
+ | "geq"
+ | "grad"
+ | "gt"
+ | "ident"
+ | "image"
+ | "imaginary"
+ | "imaginaryi"
+ | "implies"
+ | "in"
+ | "infinity"
+ | "int"
+ | "integers"
+ | "intersect"
+ | "interval"
+ | "inverse"
+ | "lambda"
+ | "laplacian"
+ | "lcm"
+ | "leq"
+ | "limit"
+ | "list"
+ | "ln"
+ | "log"
+ | "logbase"
+ | "lowlimit"
+ | "lt"
+ | "maction"
+ | "maligngroup"
+ | "malignmark"
+ | "math"
+ | "matrix"
+ | "matrixrow"
+ | "max"
+ | "mean"
+ | "median"
+ | "menclose"
+ | "merror"
+ | "mfenced"
+ | "mfrac"
+ | "mglyph"
+ | "mi"
+ | "min"
+ | "minus"
+ | "mlabeledtr"
+ | "mlongdiv"
+ | "mmultiscripts"
+ | "mn"
+ | "mo"
+ | "mode"
+ | "moment"
+ | "momentabout"
+ | "mover"
+ | "mpadded"
+ | "mphantom"
+ | "mprescripts"
+ | "mroot"
+ | "mrow"
+ | "ms"
+ | "mscarries"
+ | "mscarry"
+ | "msgroup"
+ | "msline"
+ | "mspace"
+ | "msqrt"
+ | "msrow"
+ | "mstack"
+ | "mstyle"
+ | "msub"
+ | "msubsup"
+ | "msup"
+ | "mtable"
+ | "mtd"
+ | "mtext"
+ | "mtr"
+ | "munder"
+ | "munderover"
+ | "naturalnumbers"
+ | "neq"
+ | "none"
+ | "not"
+ | "notanumber"
+ | "notin"
+ | "notprsubset"
+ | "notsubset"
+ | "or"
+ | "otherwise"
+ | "outerproduct"
+ | "partialdiff"
+ | "pi"
+ | "piece"
+ | "piecewise"
+ | "plus"
+ | "power"
+ | "primes"
+ | "product"
+ | "prsubset"
+ | "quotient"
+ | "rationals"
+ | "real"
+ | "reals"
+ | "reln"
+ | "rem"
+ | "root"
+ | "scalarproduct"
+ | "sdev"
+ | "sec"
+ | "sech"
+ | "selector"
+ | "semantics"
+ | "sep"
+ | "set"
+ | "setdiff"
+ | "share"
+ | "sin"
+ | "sinh"
+ | "span"
+ | "subset"
+ | "sum"
+ | "tan"
+ | "tanh"
+ | "tendsto"
+ | "times"
+ | "transpose"
+ | "true"
+ | "union"
+ | "uplimit"
+ | "variance"
+ | "vector"
+ | "vectorproduct"
+ | "xor" => true,
+ _ => false,
+ }
+}
+
+fn is_url_relative(url: &str) -> bool {
+ matches!(
+ Url::parse(url),
+ Err(url::ParseError::RelativeUrlWithoutBase)
+ )
+}
+
+/// Policy for [relative URLs], that is, URLs that do not specify the scheme in full.
+///
+/// This policy kicks in, if set, for any attribute named `src` or `href`,
+/// as well as the `data` attribute of an `object` tag.
+///
+/// [relative URLs]: struct.Builder.html#method.url_relative
+///
+/// # Examples
+///
+/// ## `Deny`
+///
+/// * `<a href="test">` is a file-relative URL, and will be removed
+/// * `<a href="/test">` is a domain-relative URL, and will be removed
+/// * `<a href="//example.com/test">` is a scheme-relative URL, and will be removed
+/// * `<a href="http://example.com/test">` is an absolute URL, and will be kept
+///
+/// ## `PassThrough`
+///
+/// No changes will be made to any URLs, except if a disallowed scheme is used.
+///
+/// ## `RewriteWithBase`
+///
+/// If the base is set to `http://notriddle.com/some-directory/some-file`
+///
+/// * `<a href="test">` will be rewritten to `<a href="http://notriddle.com/some-directory/test">`
+/// * `<a href="/test">` will be rewritten to `<a href="http://notriddle.com/test">`
+/// * `<a href="//example.com/test">` will be rewritten to `<a href="http://example.com/test">`
+/// * `<a href="http://example.com/test">` is an absolute URL, so it will be kept as-is
+///
+/// ## `Custom`
+///
+/// Pass the relative URL to a function.
+/// If it returns `Some(string)`, then that one gets used.
+/// Otherwise, it will remove the attribute (like `Deny` does).
+///
+/// use std::borrow::Cow;
+/// fn is_absolute_path(url: &str) -> bool {
+/// let u = url.as_bytes();
+/// // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
+/// // `/a/b/c` is an absolute path, and what we want to do stuff to.
+/// u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/')
+/// }
+/// fn evaluate(url: &str) -> Option<Cow<str>> {
+/// if is_absolute_path(url) {
+/// Some(Cow::Owned(String::from("/root") + url))
+/// } else {
+/// Some(Cow::Borrowed(url))
+/// }
+/// }
+/// fn main() {
+/// let a = ammonia::Builder::new()
+/// .url_relative(ammonia::UrlRelative::Custom(Box::new(evaluate)))
+/// .clean("<a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>")
+/// .to_string();
+/// assert_eq!(a, "<a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>");
+/// }
+///
+/// This function is only applied to relative URLs.
+/// To filter all of the URLs,
+/// use the not-yet-implemented Content Security Policy.
+#[non_exhaustive]
+pub enum UrlRelative {
+ /// Relative URLs will be completely stripped from the document.
+ Deny,
+ /// Relative URLs will be passed through unchanged.
+ PassThrough,
+ /// Relative URLs will be changed into absolute URLs, based on this base URL.
+ RewriteWithBase(Url),
+ /// Rewrite URLs with a custom function.
+ Custom(Box<dyn UrlRelativeEvaluate>),
+}
+
+impl fmt::Debug for UrlRelative {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match *self {
+ UrlRelative::Deny => write!(f, "UrlRelative::Deny"),
+ UrlRelative::PassThrough => write!(f, "UrlRelative::PassThrough"),
+ UrlRelative::RewriteWithBase(ref base) => {
+ write!(f, "UrlRelative::RewriteWithBase({})", base)
+ }
+ UrlRelative::Custom(_) => write!(f, "UrlRelative::Custom"),
+ }
+ }
+}
+
+/// Types that implement this trait can be used to convert a relative URL into an absolute URL.
+///
+/// This evaluator is only called when the URL is relative; absolute URLs are not evaluated.
+///
+/// See [`url_relative`][url_relative] for more details.
+///
+/// [url_relative]: struct.Builder.html#method.url_relative
+pub trait UrlRelativeEvaluate: Send + Sync {
+ /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string.
+ fn evaluate<'a>(&self, _: &'a str) -> Option<Cow<'a, str>>;
+}
+impl<T> UrlRelativeEvaluate for T
+where
+ T: Fn(&str) -> Option<Cow<'_, str>> + Send + Sync,
+{
+ fn evaluate<'a>(&self, url: &'a str) -> Option<Cow<'a, str>> {
+ self(url)
+ }
+}
+
+impl fmt::Debug for dyn AttributeFilter {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.write_str("AttributeFilter")
+ }
+}
+
+/// Types that implement this trait can be used to remove or rewrite arbitrary attributes.
+///
+/// See [`attribute_filter`][attribute_filter] for more details.
+///
+/// [attribute_filter]: struct.Builder.html#method.attribute_filter
+pub trait AttributeFilter: Send + Sync {
+ /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string.
+ fn filter<'a>(&self, _: &str, _: &str, _: &'a str) -> Option<Cow<'a, str>>;
+}
+
+impl<T> AttributeFilter for T
+where
+ T: for<'a> Fn(&str, &str, &'a str) -> Option<Cow<'a, str>> + Send + Sync + 'static,
+{
+ fn filter<'a>(&self, element: &str, attribute: &str, value: &'a str) -> Option<Cow<'a, str>> {
+ self(element, attribute, value)
+ }
+}
+
+/// A sanitized HTML document.
+///
+/// The `Document` type is an opaque struct representing an HTML fragment that was sanitized by
+/// `ammonia`. It can be converted to a [`String`] or written to a [`Write`] instance. This allows
+/// users to avoid buffering the serialized representation to a [`String`] when desired.
+///
+/// This type is opaque to insulate the caller from breaking changes in the `html5ever` interface.
+///
+/// Note that this type wraps an `html5ever` DOM tree. `ammonia` does not support streaming, so
+/// the complete fragment needs to be stored in memory during processing.
+///
+/// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html
+/// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html
+///
+/// # Examples
+///
+/// use ammonia::Builder;
+///
+/// let input = "<!-- comments will be stripped -->This is an Ammonia example.";
+/// let output = "This is an Ammonia example.";
+///
+/// let document = Builder::new()
+/// .clean(input);
+/// assert_eq!(document.to_string(), output);
+pub struct Document(RcDom);
+
+impl Document {
+ /// Serializes a `Document` instance to a `String`.
+ ///
+ /// This method returns a [`String`] with the sanitized HTML. This is the simplest way to use
+ /// `ammonia`.
+ ///
+ /// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::Builder;
+ ///
+ /// let input = "Some <style></style>HTML here";
+ /// let output = "Some HTML here";
+ ///
+ /// let document = Builder::new()
+ /// .clean(input);
+ /// assert_eq!(document.to_string(), output);
+ pub fn to_string(&self) -> String {
+ let opts = Self::serialize_opts();
+ let mut ret_val = Vec::new();
+ let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
+ serialize(&mut ret_val, &inner, opts)
+ .expect("Writing to a string shouldn't fail (expect on OOM)");
+ String::from_utf8(ret_val).expect("html5ever only supports UTF8")
+ }
+
+ /// Serializes a `Document` instance to a writer.
+ ///
+ /// This method writes the sanitized HTML to a [`Write`] instance, avoiding a buffering step.
+ ///
+ /// To avoid consuming the writer, a mutable reference can be passed, like in the example below.
+ ///
+ /// Note that the in-memory representation of `Document` is larger than the serialized
+ /// `String`.
+ ///
+ /// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::Builder;
+ ///
+ /// let input = "Some <style></style>HTML here";
+ /// let expected = b"Some HTML here";
+ ///
+ /// let document = Builder::new()
+ /// .clean(input);
+ ///
+ /// let mut sanitized = Vec::new();
+ /// document.write_to(&mut sanitized)
+ /// .expect("Writing to a string should not fail (except on OOM)");
+ /// assert_eq!(sanitized, expected);
+ pub fn write_to<W>(&self, writer: W) -> io::Result<()>
+ where
+ W: io::Write,
+ {
+ let opts = Self::serialize_opts();
+ let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
+ serialize(writer, &inner, opts)
+ }
+
+ /// Exposes the `Document` instance as an [`rcdom::Handle`].
+ ///
+ /// This method returns the inner object backing the `Document` instance. This allows
+ /// making further changes to the DOM without introducing redundant serialization and
+ /// parsing.
+ ///
+ /// Note that this method should be considered unstable and sits outside of the semver
+ /// stability guarantees. It may change, break, or go away at any time, either because
+ /// of `html5ever` changes or `ammonia` implementation changes.
+ ///
+ /// For this method to be accessible, a `cfg` flag is required. The easiest way is to
+ /// use the `RUSTFLAGS` environment variable:
+ ///
+ /// ```text
+ /// RUSTFLAGS='--cfg ammonia_unstable' cargo build
+ /// ```
+ ///
+ /// on Unix-like platforms, or
+ ///
+ /// ```text
+ /// set RUSTFLAGS=--cfg ammonia_unstable
+ /// cargo build
+ /// ```
+ ///
+ /// on Windows.
+ ///
+ /// This requirement also applies to crates that transitively depend on crates that use
+ /// this flag.
+ ///
+ /// # Examples
+ ///
+ /// use ammonia::Builder;
+ /// use maplit::hashset;
+ /// use html5ever::serialize::{serialize, SerializeOpts};
+ ///
+ /// # use std::error::Error;
+ /// # fn do_main() -> Result<(), Box<Error>> {
+ /// let input = "<a>one link</a> and <a>one more</a>";
+ /// let expected = "<a>one more</a> and <a>one link</a>";
+ ///
+ /// let document = Builder::new()
+ /// .link_rel(None)
+ /// .clean(input);
+ ///
+ /// let mut node = document.to_dom_node();
+ /// node.children.borrow_mut().reverse();
+ ///
+ /// let mut buf = Vec::new();
+ /// serialize(&mut buf, &node, SerializeOpts::default())?;
+ /// let output = String::from_utf8(buf)?;
+ ///
+ /// assert_eq!(output, expected);
+ /// # Ok(())
+ /// # }
+ /// # fn main() { do_main().unwrap() }
+ #[cfg(ammonia_unstable)]
+ pub fn to_dom_node(&self) -> Handle {
+ self.0.document.children.borrow()[0].clone()
+ }
+
+ fn serialize_opts() -> SerializeOpts {
+ SerializeOpts::default()
+ }
+}
+
+impl Clone for Document {
+ fn clone(&self) -> Self {
+ let parser = Builder::make_parser();
+ let dom = parser.one(&self.to_string()[..]);
+ Document(dom)
+ }
+}
+
+impl fmt::Display for Document {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "{}", self.to_string())
+ }
+}
+
+impl fmt::Debug for Document {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "Document({})", self.to_string())
+ }
+}
+
+impl From<Document> for String {
+ fn from(document: Document) -> Self {
+ document.to_string()
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ #[test]
+ fn deeply_nested_whitelisted() {
+ clean(&"<b>".repeat(60_000));
+ }
+ #[test]
+ fn deeply_nested_blacklisted() {
+ clean(&"<b-b>".repeat(60_000));
+ }
+ #[test]
+ fn deeply_nested_alternating() {
+ clean(&"<b-b>".repeat(35_000));
+ }
+ #[test]
+ fn included_angles() {
+ let fragment = "1 < 2";
+ let result = clean(fragment);
+ assert_eq!(result, "1 &lt; 2");
+ }
+ #[test]
+ fn remove_script() {
+ let fragment = "an <script>evil()</script> example";
+ let result = clean(fragment);
+ assert_eq!(result, "an example");
+ }
+ #[test]
+ fn ignore_link() {
+ let fragment = "a <a href=\"http://www.google.com\">good</a> example";
+ let expected = "a <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">\
+ good</a> example";
+ let result = clean(fragment);
+ assert_eq!(result, expected);
+ }
+ #[test]
+ fn remove_unsafe_link() {
+ let fragment = "an <a onclick=\"evil()\" href=\"http://www.google.com\">evil</a> example";
+ let result = clean(fragment);
+ assert_eq!(
+ result,
+ "an <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">evil</a> example"
+ );
+ }
+ #[test]
+ fn remove_js_link() {
+ let fragment = "an <a href=\"javascript:evil()\">evil</a> example";
+ let result = clean(fragment);
+ assert_eq!(result, "an <a rel=\"noopener noreferrer\">evil</a> example");
+ }
+ #[test]
+ fn tag_rebalance() {
+ let fragment = "<b>AWESOME!";
+ let result = clean(fragment);
+ assert_eq!(result, "<b>AWESOME!</b>");
+ }
+ #[test]
+ fn allow_url_relative() {
+ let fragment = "<a href=test>Test</a>";
+ let result = Builder::new()
+ .url_relative(UrlRelative::PassThrough)
+ .clean(fragment)
+ .to_string();
+ assert_eq!(
+ result,
+ "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
+ );
+ }
+ #[test]
+ fn rewrite_url_relative() {
+ let fragment = "<a href=test>Test</a>";
+ let result = Builder::new()
+ .url_relative(UrlRelative::RewriteWithBase(
+ Url::parse("http://example.com/").unwrap(),
+ ))
+ .clean(fragment)
+ .to_string();
+ assert_eq!(
+ result,
+ "<a href=\"http://example.com/test\" rel=\"noopener noreferrer\">Test</a>"
+ );
+ }
+ #[test]
+ fn rewrite_url_relative_with_invalid_url() {
+ // Reduced from https://github.com/Bauke/ammonia-crash-test
+ let fragment = r##"<a href="\\"https://example.com\\"">test</a>"##;
+ let result = Builder::new()
+ .url_relative(UrlRelative::RewriteWithBase(
+ Url::parse("http://example.com/").unwrap(),
+ ))
+ .clean(fragment)
+ .to_string();
+ assert_eq!(result, r##"<a rel="noopener noreferrer">test</a>"##);
+ }
+ #[test]
+ fn attribute_filter_nop() {
+ let fragment = "<a href=test>Test</a>";
+ let result = Builder::new()
+ .attribute_filter(|elem, attr, value| {
+ assert_eq!("a", elem);
+ assert!(
+ match (attr, value) {
+ ("href", "test") => true,
+ ("rel", "noopener noreferrer") => true,
+ _ => false,
+ },
+ "{}",
+ value.to_string()
+ );
+ Some(value.into())
+ })
+ .clean(fragment)
+ .to_string();
+ assert_eq!(
+ result,
+ "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
+ );
+ }
+
+ #[test]
+ fn attribute_filter_drop() {
+ let fragment = "Test<img alt=test src=imgtest>";
+ let result = Builder::new()
+ .attribute_filter(|elem, attr, value| {
+ assert_eq!("img", elem);
+ match (attr, value) {
+ ("src", "imgtest") => None,
+ ("alt", "test") => Some(value.into()),
+ _ => panic!("unexpected"),
+ }
+ })
+ .clean(fragment)
+ .to_string();
+ assert_eq!(result, r#"Test<img alt="test">"#);
+ }
+
+ #[test]
+ fn url_filter_absolute() {
+ let fragment = "Test<img alt=test src=imgtest>";
+ let result = Builder::new()
+ .attribute_filter(|elem, attr, value| {
+ assert_eq!("img", elem);
+ match (attr, value) {
+ ("src", "imgtest") => {
+ Some(format!("https://example.com/images/{}", value).into())
+ }
+ ("alt", "test") => None,
+ _ => panic!("unexpected"),
+ }
+ })
+ .url_relative(UrlRelative::RewriteWithBase(
+ Url::parse("http://wrong.invalid/").unwrap(),
+ ))
+ .clean(fragment)
+ .to_string();
+ assert_eq!(
+ result,
+ r#"Test<img src="https://example.com/images/imgtest">"#
+ );
+ }
+
+ #[test]
+ fn url_filter_relative() {
+ let fragment = "Test<img alt=test src=imgtest>";
+ let result = Builder::new()
+ .attribute_filter(|elem, attr, value| {
+ assert_eq!("img", elem);
+ match (attr, value) {
+ ("src", "imgtest") => Some("rewrite".into()),
+ ("alt", "test") => Some("altalt".into()),
+ _ => panic!("unexpected"),
+ }
+ })
+ .url_relative(UrlRelative::RewriteWithBase(
+ Url::parse("https://example.com/base/#").unwrap(),
+ ))
+ .clean(fragment)
+ .to_string();
+ assert_eq!(
+ result,
+ r#"Test<img alt="altalt" src="https://example.com/base/rewrite">"#
+ );
+ }
+
+ #[test]
+ fn rewrite_url_relative_no_rel() {
+ let fragment = "<a href=test>Test</a>";
+ let result = Builder::new()
+ .url_relative(UrlRelative::RewriteWithBase(
+ Url::parse("http://example.com/").unwrap(),
+ ))
+ .link_rel(None)
+ .clean(fragment)
+ .to_string();
+ assert_eq!(result, "<a href=\"http://example.com/test\">Test</a>");
+ }
+ #[test]
+ fn deny_url_relative() {
+ let fragment = "<a href=test>Test</a>";
+ let result = Builder::new()
+ .url_relative(UrlRelative::Deny)
+ .clean(fragment)
+ .to_string();
+ assert_eq!(result, "<a rel=\"noopener noreferrer\">Test</a>");
+ }
+ #[test]
+ fn replace_rel() {
+ let fragment = "<a href=test rel=\"garbage\">Test</a>";
+ let result = Builder::new()
+ .url_relative(UrlRelative::PassThrough)
+ .clean(fragment)
+ .to_string();
+ assert_eq!(
+ result,
+ "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
+ );
+ }
+ #[test]
+ fn consider_rel_still_banned() {
+ let fragment = "<a href=test rel=\"garbage\">Test</a>";
+ let result = Builder::new()
+ .url_relative(UrlRelative::PassThrough)
+ .link_rel(None)
+ .clean(fragment)
+ .to_string();
+ assert_eq!(result, "<a href=\"test\">Test</a>");
+ }
+ #[test]
+ fn object_data() {
+ let fragment = "<span data=\"javascript:evil()\">Test</span>\
+ <object data=\"javascript:evil()\"></object>M";
+ let expected = r#"<span data="javascript:evil()">Test</span><object></object>M"#;
+ let result = Builder::new()
+ .tags(hashset!["span", "object"])
+ .generic_attributes(hashset!["data"])
+ .clean(fragment)
+ .to_string();
+ assert_eq!(result, expected);
+ }
+ #[test]
+ fn remove_attributes() {
+ let fragment = "<table border=\"1\"><tr></tr></table>";
+ let result = Builder::new().clean(fragment);
+ assert_eq!(
+ result.to_string(),
+ "<table><tbody><tr></tr></tbody></table>"
+ );
+ }
+ #[test]
+ fn quotes_in_attrs() {
+ let fragment = "<b title='\"'>contents</b>";
+ let result = clean(fragment);
+ assert_eq!(result, "<b title=\"&quot;\">contents</b>");
+ }
+ #[test]
+ #[should_panic]
+ fn panic_if_rel_is_allowed_and_replaced_generic() {
+ Builder::new()
+ .link_rel(Some("noopener noreferrer"))
+ .generic_attributes(hashset!["rel"])
+ .clean("something");
+ }
+ #[test]
+ #[should_panic]
+ fn panic_if_rel_is_allowed_and_replaced_a() {
+ Builder::new()
+ .link_rel(Some("noopener noreferrer"))
+ .tag_attributes(hashmap![
+ "a" => hashset!["rel"],
+ ])
+ .clean("something");
+ }
+ #[test]
+ fn no_panic_if_rel_is_allowed_and_replaced_span() {
+ Builder::new()
+ .link_rel(Some("noopener noreferrer"))
+ .tag_attributes(hashmap![
+ "span" => hashset!["rel"],
+ ])
+ .clean("<span rel=\"what\">s</span>");
+ }
+ #[test]
+ fn no_panic_if_rel_is_allowed_and_not_replaced_generic() {
+ Builder::new()
+ .link_rel(None)
+ .generic_attributes(hashset!["rel"])
+ .clean("<a rel=\"what\">s</a>");
+ }
+ #[test]
+ fn no_panic_if_rel_is_allowed_and_not_replaced_a() {
+ Builder::new()
+ .link_rel(None)
+ .tag_attributes(hashmap![
+ "a" => hashset!["rel"],
+ ])
+ .clean("<a rel=\"what\">s</a>");
+ }
+ #[test]
+ fn dont_close_void_elements() {
+ let fragment = "<br>";
+ let result = clean(fragment);
+ assert_eq!(result.to_string(), "<br>");
+ }
+ #[should_panic]
+ #[test]
+ fn panic_on_allowed_classes_tag_attributes() {
+ let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
+ Builder::new()
+ .link_rel(None)
+ .tag_attributes(hashmap![
+ "p" => hashset!["class"],
+ "a" => hashset!["class"],
+ ])
+ .allowed_classes(hashmap![
+ "p" => hashset!["foo", "bar"],
+ "a" => hashset!["baz"],
+ ])
+ .clean(fragment);
+ }
+ #[should_panic]
+ #[test]
+ fn panic_on_allowed_classes_generic_attributes() {
+ let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
+ Builder::new()
+ .link_rel(None)
+ .generic_attributes(hashset!["class", "href", "some-foo"])
+ .allowed_classes(hashmap![
+ "p" => hashset!["foo", "bar"],
+ "a" => hashset!["baz"],
+ ])
+ .clean(fragment);
+ }
+ #[test]
+ fn remove_non_allowed_classes() {
+ let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
+ let result = Builder::new()
+ .link_rel(None)
+ .allowed_classes(hashmap![
+ "p" => hashset!["foo", "bar"],
+ "a" => hashset!["baz"],
+ ])
+ .clean(fragment);
+ assert_eq!(
+ result.to_string(),
+ "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>"
+ );
+ }
+ #[test]
+ fn remove_non_allowed_classes_with_tag_class() {
+ let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
+ let result = Builder::new()
+ .link_rel(None)
+ .tag_attributes(hashmap![
+ "div" => hashset!["class"],
+ ])
+ .allowed_classes(hashmap![
+ "p" => hashset!["foo", "bar"],
+ "a" => hashset!["baz"],
+ ])
+ .clean(fragment);
+ assert_eq!(
+ result.to_string(),
+ "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>"
+ );
+ }
+ #[test]
+ fn allowed_classes_ascii_whitespace() {
+ // According to https://infra.spec.whatwg.org/#ascii-whitespace,
+ // TAB (\t), LF (\n), FF (\x0C), CR (\x0D) and SPACE (\x20) are
+ // considered to be ASCII whitespace. Unicode whitespace characters
+ // and VT (\x0B) aren't ASCII whitespace.
+ let fragment = "<p class=\"a\tb\nc\x0Cd\re f\x0B g\u{2000}\">";
+ let result = Builder::new()
+ .allowed_classes(hashmap![
+ "p" => hashset!["a", "b", "c", "d", "e", "f", "g"],
+ ])
+ .clean(fragment);
+ assert_eq!(result.to_string(), r#"<p class="a b c d e"></p>"#);
+ }
+ #[test]
+ fn remove_non_allowed_attributes_with_tag_attribute_values() {
+ let fragment = "<p data-label=\"baz\" name=\"foo\"></p>";
+ let result = Builder::new()
+ .tag_attribute_values(hashmap![
+ "p" => hashmap![
+ "data-label" => hashset!["bar"],
+ ],
+ ])
+ .tag_attributes(hashmap![
+ "p" => hashset!["name"],
+ ])
+ .clean(fragment);
+ assert_eq!(result.to_string(), "<p name=\"foo\"></p>",);
+ }
+ #[test]
+ fn keep_allowed_attributes_with_tag_attribute_values() {
+ let fragment = "<p data-label=\"bar\" name=\"foo\"></p>";
+ let result = Builder::new()
+ .tag_attribute_values(hashmap![
+ "p" => hashmap![
+ "data-label" => hashset!["bar"],
+ ],
+ ])
+ .tag_attributes(hashmap![
+ "p" => hashset!["name"],
+ ])
+ .clean(fragment);
+ assert_eq!(
+ result.to_string(),
+ "<p data-label=\"bar\" name=\"foo\"></p>",
+ );
+ }
+ #[test]
+ fn tag_attribute_values_case_insensitive() {
+ let fragment = "<input type=\"CHECKBOX\" name=\"foo\">";
+ let result = Builder::new()
+ .tags(hashset!["input"])
+ .tag_attribute_values(hashmap![
+ "input" => hashmap![
+ "type" => hashset!["checkbox"],
+ ],
+ ])
+ .tag_attributes(hashmap![
+ "input" => hashset!["name"],
+ ])
+ .clean(fragment);
+ assert_eq!(result.to_string(), "<input type=\"CHECKBOX\" name=\"foo\">",);
+ }
+ #[test]
+ fn set_tag_attribute_values() {
+ let fragment = "<a href=\"https://example.com/\">Link</a>";
+ let result = Builder::new()
+ .link_rel(None)
+ .add_tag_attributes("a", &["target"])
+ .set_tag_attribute_value("a", "target", "_blank")
+ .clean(fragment);
+ assert_eq!(
+ result.to_string(),
+ "<a href=\"https://example.com/\" target=\"_blank\">Link</a>",
+ );
+ }
+ #[test]
+ fn update_existing_set_tag_attribute_values() {
+ let fragment = "<a target=\"bad\" href=\"https://example.com/\">Link</a>";
+ let result = Builder::new()
+ .link_rel(None)
+ .add_tag_attributes("a", &["target"])
+ .set_tag_attribute_value("a", "target", "_blank")
+ .clean(fragment);
+ assert_eq!(
+ result.to_string(),
+ "<a target=\"_blank\" href=\"https://example.com/\">Link</a>",
+ );
+ }
+ #[test]
+ fn unwhitelisted_set_tag_attribute_values() {
+ let fragment = "<span>hi</span><my-elem>";
+ let result = Builder::new()
+ .set_tag_attribute_value("my-elem", "my-attr", "val")
+ .clean(fragment);
+ assert_eq!(result.to_string(), "<span>hi</span>",);
+ }
+ #[test]
+ fn remove_entity_link() {
+ let fragment = "<a href=\"&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61\
+ &#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29\">Click me!</a>";
+ let result = clean(fragment);
+ assert_eq!(
+ result.to_string(),
+ "<a rel=\"noopener noreferrer\">Click me!</a>"
+ );
+ }
+ #[test]
+ fn remove_relative_url_evaluate() {
+ fn is_absolute_path(url: &str) -> bool {
+ let u = url.as_bytes();
+ // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
+ // `/a/b/c` is an absolute path, and what we want to do stuff to.
+ u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/')
+ }
+ fn is_banned(url: &str) -> bool {
+ let u = url.as_bytes();
+ u.get(0) == Some(&b'b') && u.get(1) == Some(&b'a')
+ }
+ fn evaluate(url: &str) -> Option<Cow<'_, str>> {
+ if is_absolute_path(url) {
+ Some(Cow::Owned(String::from("/root") + url))
+ } else if is_banned(url) {
+ None
+ } else {
+ Some(Cow::Borrowed(url))
+ }
+ }
+ let a = Builder::new()
+ .url_relative(UrlRelative::Custom(Box::new(evaluate)))
+ .clean("<a href=banned>banned</a><a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>")
+ .to_string();
+ assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>");
+ }
+ #[test]
+ fn remove_relative_url_evaluate_b() {
+ fn is_absolute_path(url: &str) -> bool {
+ let u = url.as_bytes();
+ // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
+ // `/a/b/c` is an absolute path, and what we want to do stuff to.
+ u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/')
+ }
+ fn is_banned(url: &str) -> bool {
+ let u = url.as_bytes();
+ u.get(0) == Some(&b'b') && u.get(1) == Some(&b'a')
+ }
+ fn evaluate(url: &str) -> Option<Cow<'_, str>> {
+ if is_absolute_path(url) {
+ Some(Cow::Owned(String::from("/root") + url))
+ } else if is_banned(url) {
+ None
+ } else {
+ Some(Cow::Borrowed(url))
+ }
+ }
+ let a = Builder::new()
+ .url_relative(UrlRelative::Custom(Box::new(evaluate)))
+ .clean("<a href=banned>banned</a><a href=banned title=test>banned</a><a title=test href=banned>banned</a>")
+ .to_string();
+ assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a rel=\"noopener noreferrer\" title=\"test\">banned</a><a title=\"test\" rel=\"noopener noreferrer\">banned</a>");
+ }
+ #[test]
+ fn remove_relative_url_evaluate_c() {
+ // Don't run on absolute URLs.
+ fn evaluate(_: &str) -> Option<Cow<'_, str>> {
+ return Some(Cow::Owned(String::from("invalid")));
+ }
+ let a = Builder::new()
+ .url_relative(UrlRelative::Custom(Box::new(evaluate)))
+ .clean("<a href=\"https://www.google.com/\">google</a>")
+ .to_string();
+ assert_eq!(
+ a,
+ "<a href=\"https://www.google.com/\" rel=\"noopener noreferrer\">google</a>"
+ );
+ }
+ #[test]
+ fn clean_children_of_bad_element() {
+ let fragment = "<bad><evil>a</evil>b</bad>";
+ let result = Builder::new().clean(fragment);
+ assert_eq!(result.to_string(), "ab");
+ }
+ #[test]
+ fn reader_input() {
+ let fragment = b"an <script>evil()</script> example";
+ let result = Builder::new().clean_from_reader(&fragment[..]);
+ assert!(result.is_ok());
+ assert_eq!(result.unwrap().to_string(), "an example");
+ }
+ #[test]
+ fn reader_non_utf8() {
+ let fragment = b"non-utf8 \xF0\x90\x80string";
+ let result = Builder::new().clean_from_reader(&fragment[..]);
+ assert!(result.is_ok());
+ assert_eq!(result.unwrap().to_string(), "non-utf8 \u{fffd}string");
+ }
+ #[test]
+ fn display_impl() {
+ let fragment = r#"a <a>link</a>"#;
+ let result = Builder::new().link_rel(None).clean(fragment);
+ assert_eq!(format!("{}", result), "a <a>link</a>");
+ }
+ #[test]
+ fn debug_impl() {
+ let fragment = r#"a <a>link</a>"#;
+ let result = Builder::new().link_rel(None).clean(fragment);
+ assert_eq!(format!("{:?}", result), "Document(a <a>link</a>)");
+ }
+ #[cfg(ammonia_unstable)]
+ #[test]
+ fn to_dom_node() {
+ let fragment = r#"a <a>link</a>"#;
+ let result = Builder::new().link_rel(None).clean(fragment);
+ let _node = result.to_dom_node();
+ }
+ #[test]
+ fn string_from_document() {
+ let fragment = r#"a <a>link"#;
+ let result = String::from(Builder::new().link_rel(None).clean(fragment));
+ assert_eq!(format!("{}", result), "a <a>link</a>");
+ }
+ fn require_sync<T: Sync>(_: T) {}
+ fn require_send<T: Send>(_: T) {}
+ #[test]
+ fn require_sync_and_send() {
+ require_sync(Builder::new());
+ require_send(Builder::new());
+ }
+ #[test]
+ fn id_prefixed() {
+ let fragment = "<a id=\"hello\"></a><b id=\"hello\"></a>";
+ let result = String::from(
+ Builder::new()
+ .tag_attributes(hashmap![
+ "a" => hashset!["id"],
+ ])
+ .id_prefix(Some("prefix-"))
+ .clean(fragment),
+ );
+ assert_eq!(
+ result.to_string(),
+ "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a><b></b>"
+ );
+ }
+ #[test]
+ fn id_already_prefixed() {
+ let fragment = "<a id=\"prefix-hello\"></a>";
+ let result = String::from(
+ Builder::new()
+ .tag_attributes(hashmap![
+ "a" => hashset!["id"],
+ ])
+ .id_prefix(Some("prefix-"))
+ .clean(fragment),
+ );
+ assert_eq!(
+ result.to_string(),
+ "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a>"
+ );
+ }
+ #[test]
+ fn clean_content_tags() {
+ let fragment = "<script type=\"text/javascript\"><a>Hello!</a></script>";
+ let result = String::from(
+ Builder::new()
+ .clean_content_tags(hashset!["script"])
+ .clean(fragment),
+ );
+ assert_eq!(result.to_string(), "");
+ }
+ #[test]
+ fn only_clean_content_tags() {
+ let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>";
+ let result = String::from(
+ Builder::new()
+ .clean_content_tags(hashset!["script"])
+ .clean(fragment),
+ );
+ assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>");
+ }
+ #[test]
+ fn clean_removed_default_tag() {
+ let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>";
+ let result = String::from(
+ Builder::new()
+ .rm_tags(hashset!["a"])
+ .rm_tag_attributes("a", hashset!["href", "hreflang"])
+ .clean_content_tags(hashset!["script"])
+ .clean(fragment),
+ );
+ assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>");
+ }
+ #[test]
+ #[should_panic]
+ fn panic_on_clean_content_tag_attribute() {
+ Builder::new()
+ .rm_tags(std::iter::once("a"))
+ .clean_content_tags(hashset!["a"])
+ .clean("");
+ }
+ #[test]
+ #[should_panic]
+ fn panic_on_clean_content_tag() {
+ Builder::new().clean_content_tags(hashset!["a"]).clean("");
+ }
+
+ #[test]
+ fn clean_text_test() {
+ assert_eq!(
+ clean_text("<this> is <a test function"),
+ "&lt;this&gt;&#32;is&#32;&lt;a&#32;test&#32;function"
+ );
+ }
+
+ #[test]
+ fn clean_text_spaces_test() {
+ assert_eq!(clean_text("\x09\x0a\x0c\x20"), "&#9;&#10;&#12;&#32;");
+ }
+
+ #[test]
+ fn ns_svg() {
+ // https://github.com/cure53/DOMPurify/pull/495
+ let fragment = r##"<svg><iframe><a title="</iframe><img src onerror=alert(1)>">test"##;
+ let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
+ assert_eq!(result.to_string(), "test");
+
+ let fragment = "<svg><iframe>remove me</iframe></svg><iframe>keep me</iframe>";
+ let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
+ assert_eq!(result.to_string(), "remove me<iframe>keep me</iframe>");
+
+ let fragment = "<svg><a>remove me</a></svg><iframe>keep me</iframe>";
+ let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
+ assert_eq!(result.to_string(), "remove me<iframe>keep me</iframe>");
+
+ let fragment = "<svg><a>keep me</a></svg><iframe>keep me</iframe>";
+ let result = String::from(Builder::new().add_tags(&["iframe", "svg"]).clean(fragment));
+ assert_eq!(
+ result.to_string(),
+ "<svg><a rel=\"noopener noreferrer\">keep me</a></svg><iframe>keep me</iframe>"
+ );
+ }
+
+ #[test]
+ fn ns_mathml() {
+ // https://github.com/cure53/DOMPurify/pull/495
+ let fragment = "<mglyph></mglyph>";
+ let result = String::from(
+ Builder::new()
+ .add_tags(&["math", "mtext", "mglyph"])
+ .clean(fragment),
+ );
+ assert_eq!(result.to_string(), "");
+ let fragment = "<math><mtext><div><mglyph>";
+ let result = String::from(
+ Builder::new()
+ .add_tags(&["math", "mtext", "mglyph"])
+ .clean(fragment),
+ );
+ assert_eq!(
+ result.to_string(),
+ "<math><mtext><div></div></mtext></math>"
+ );
+ let fragment = "<math><mtext><mglyph>";
+ let result = String::from(
+ Builder::new()
+ .add_tags(&["math", "mtext", "mglyph"])
+ .clean(fragment),
+ );
+ assert_eq!(
+ result.to_string(),
+ "<math><mtext><mglyph></mglyph></mtext></math>"
+ );
+ }
+
+ #[test]
+ fn generic_attribute_prefixes() {
+ let prefix_data = ["data-"];
+ let prefix_code = ["code-"];
+ let mut b = Builder::new();
+ let mut hs: HashSet<&'_ str> = HashSet::new();
+ hs.insert("data-");
+ assert_eq!(b.generic_attribute_prefixes.is_none(), true);
+ b.generic_attribute_prefixes(hs);
+ assert_eq!(b.generic_attribute_prefixes.is_some(), true);
+ assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
+ b.add_generic_attribute_prefixes(&prefix_data);
+ assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
+ b.add_generic_attribute_prefixes(&prefix_code);
+ assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 2);
+ b.rm_generic_attribute_prefixes(&prefix_code);
+ assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
+ b.rm_generic_attribute_prefixes(&prefix_code);
+ assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
+ b.rm_generic_attribute_prefixes(&prefix_data);
+ assert_eq!(b.generic_attribute_prefixes.is_none(), true);
+ }
+
+ #[test]
+ fn generic_attribute_prefixes_clean() {
+ let fragment = r#"<a data-1 data-2 code-1 code-2><a>Hello!</a></a>"#;
+ let result_cleaned = String::from(
+ Builder::new()
+ .add_tag_attributes("a", &["data-1"])
+ .clean(fragment),
+ );
+ assert_eq!(
+ result_cleaned,
+ r#"<a data-1="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
+ );
+ let result_allowed = String::from(
+ Builder::new()
+ .add_tag_attributes("a", &["data-1"])
+ .add_generic_attribute_prefixes(&["data-"])
+ .clean(fragment),
+ );
+ assert_eq!(
+ result_allowed,
+ r#"<a data-1="" data-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
+ );
+ let result_allowed = String::from(
+ Builder::new()
+ .add_tag_attributes("a", &["data-1", "code-1"])
+ .add_generic_attribute_prefixes(&["data-", "code-"])
+ .clean(fragment),
+ );
+ assert_eq!(
+ result_allowed,
+ r#"<a data-1="" data-2="" code-1="" code-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
+ );
+ }
+ #[test]
+ fn lesser_than_isnt_html() {
+ let fragment = "1 < 2";
+ assert!(!is_html(fragment));
+ }
+ #[test]
+ fn dense_lesser_than_isnt_html() {
+ let fragment = "1<2";
+ assert!(!is_html(fragment));
+ }
+ #[test]
+ fn what_about_number_elements() {
+ let fragment = "foo<2>bar";
+ assert!(!is_html(fragment));
+ }
+ #[test]
+ fn turbofish_is_html_sadly() {
+ let fragment = "Vec::<u8>::new()";
+ assert!(is_html(fragment));
+ }
+ #[test]
+ fn stop_grinning() {
+ let fragment = "did you really believe me? <g>";
+ assert!(is_html(fragment));
+ }
+ #[test]
+ fn dont_be_bold() {
+ let fragment = "<b>";
+ assert!(is_html(fragment));
+ }
+}