1 files changed, 599 insertions, 0 deletions
diff --git a/dom/base/fragmentdirectives/test.rs b/dom/base/fragmentdirectives/test.rs
new file mode 100644
index 0000000000..d4509cb033
--- /dev/null
+++ b/dom/base/fragmentdirectives/test.rs
@@ -0,0 +1,599 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#[cfg(test)]
+mod test {
+    use crate::fragment_directive_impl::{
+        create_fragment_directive_string, parse_fragment_directive_and_remove_it_from_hash,
+        TextDirective,
+    };
+
+    /// This test verifies that valid combinations of [prefix-,]start[,end][,-suffix] are parsed correctly.
+    #[test]
+    fn test_parse_fragment_directive_with_one_text_directive() {
+        let test_cases = vec![
+            ("#:~:text=start", (None, Some("start"), None, None)),
+            (
+                "#:~:text=start,end",
+                (None, Some("start"), Some("end"), None),
+            ),
+            (
+                "#:~:text=prefix-,start",
+                (Some("prefix"), Some("start"), None, None),
+            ),
+            (
+                "#:~:text=prefix-,start,end",
+                (Some("prefix"), Some("start"), Some("end"), None),
+            ),
+            (
+                "#:~:text=prefix-,start,end,-suffix",
+                (Some("prefix"), Some("start"), Some("end"), Some("suffix")),
+            ),
+            (
+                "#:~:text=start,-suffix",
+                (None, Some("start"), None, Some("suffix")),
+            ),
+            (
+                "#:~:text=start,end,-suffix",
+                (None, Some("start"), Some("end"), Some("suffix")),
+            ),
+            ("#:~:text=text=", (None, Some("text="), None, None)),
+        ];
+        for (url, (prefix, start, end, suffix)) in test_cases {
+            let (stripped_url, fragment_directive, result) =
+                parse_fragment_directive_and_remove_it_from_hash(&url)
+                    .expect("The parser must find a result.");
+            assert_eq!(
+                fragment_directive,
+                &url[4..],
+                "The extracted fragment directive string
+                should be unsanitized and therefore match the input string."
+            );
+            assert_eq!(result.len(), 1, "There must be one parsed text fragment.");
+            assert_eq!(
+                stripped_url, "",
+                "The fragment directive must be removed from the url hash."
+            );
+            let text_directive = result.first().unwrap();
+            if prefix.is_none() {
+                assert!(
+                    text_directive.prefix().is_none(),
+                    "There must be no `prefix` token (test case `{}`).",
+                    url
+                );
+            } else {
+                assert!(
+                    text_directive
+                        .prefix()
+                        .as_ref()
+                        .expect("There must be a `prefix` token.")
+                        .value()
+                        == prefix.unwrap(),
+                    "Wrong value for `prefix` (test case `{}`).",
+                    url
+                );
+            }
+            if start.is_none() {
+                assert!(
+                    text_directive.start().is_none(),
+                    "There must be no `start` token (test case `{}`).",
+                    url
+                );
+            } else {
+                assert!(
+                    text_directive
+                        .start()
+                        .as_ref()
+                        .expect("There must be a `start` token.")
+                        .value()
+                        == start.unwrap(),
+                    "Wrong value for `start` (test case `{}`).",
+                    url
+                );
+            }
+            if end.is_none() {
+                assert!(
+                    text_directive.end().is_none(),
+                    "There must be no `end` token (test case `{}`).",
+                    url
+                );
+            } else {
+                assert!(
+                    text_directive
+                        .end()
+                        .as_ref()
+                        .expect("There must be a `end` token.")
+                        .value()
+                        == end.unwrap(),
+                    "Wrong value for `end` (test case `{}`).",
+                    url
+                );
+            }
+            if suffix.is_none() {
+                assert!(
+                    text_directive.suffix().is_none(),
+                    "There must be no `suffix` token (test case `{}`).",
+                    url
+                );
+            } else {
+                assert!(
+                    text_directive
+                        .suffix()
+                        .as_ref()
+                        .expect("There must be a `suffix` token.")
+                        .value()
+                        == suffix.unwrap(),
+                    "Wrong value for `suffix` (test case `{}`).",
+                    url
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_parse_full_url() {
+        for (url, stripped_url_ref) in [
+            ("https://example.com#:~:text=foo", "https://example.com"),
+            (
+                "https://example.com/some/page.html?query=answer#:~:text=foo",
+                "https://example.com/some/page.html?query=answer",
+            ),
+            (
+                "https://example.com/some/page.html?query=answer#fragment:~:text=foo",
+                "https://example.com/some/page.html?query=answer#fragment",
+            ),
+            (
+                "http://example.com/page.html?query=irrelevant:~:#bar:~:text=foo",
+                "http://example.com/page.html?query=irrelevant:~:#bar"
+            )
+        ] {
+            let (stripped_url, fragment_directive, _) =
+                parse_fragment_directive_and_remove_it_from_hash(&url)
+                    .expect("The parser must find a result");
+            assert_eq!(stripped_url, stripped_url_ref, "The stripped url is not correct.");
+            assert_eq!(fragment_directive, "text=foo");
+        }
+    }
+
+    /// This test verifies that a text fragment is parsed correctly if it is preceded
+    /// or followed by a fragment (i.e. `#foo:~:text=bar`).
+    #[test]
+    fn test_parse_text_fragment_after_fragments() {
+        let url = "#foo:~:text=start";
+        let (stripped_url, fragment_directive, result) =
+            parse_fragment_directive_and_remove_it_from_hash(&url)
+                .expect("The parser must find a result.");
+        assert_eq!(
+            result.len(),
+            1,
+            "There must be exactly one parsed text fragment."
+        );
+        assert_eq!(
+            stripped_url, "#foo",
+            "The fragment directive was not removed correctly."
+        );
+        assert_eq!(
+            fragment_directive, "text=start",
+            "The fragment directive was not extracted correctly."
+        );
+        let fragment = result.first().unwrap();
+        assert!(fragment.prefix().is_none(), "There is no `prefix` token.");
+        assert_eq!(
+            fragment
+                .start()
+                .as_ref()
+                .expect("There must be a `start` token.")
+                .value(),
+            "start"
+        );
+        assert!(fragment.end().is_none(), "There is no `end` token.");
+        assert!(fragment.suffix().is_none(), "There is no `suffix` token.");
+    }
+
+    /// Ensure that multiple text fragments are parsed correctly.
+    #[test]
+    fn test_parse_multiple_text_fragments() {
+        let url = "#:~:text=prefix-,start,-suffix&text=foo&text=bar,-suffix";
+        let (_, _, text_directives) =
+            parse_fragment_directive_and_remove_it_from_hash(&url)
+                .expect("The parser must find a result.");
+        assert_eq!(
+            text_directives.len(),
+            3,
+            "There must be exactly two parsed text fragments."
+        );
+        let first_text_directive = &text_directives[0];
+        assert_eq!(
+            first_text_directive
+                .prefix()
+                .as_ref()
+                .expect("There must be a `prefix` token.")
+                .value(),
+            "prefix"
+        );
+        assert_eq!(
+            first_text_directive
+                .start()
+                .as_ref()
+                .expect("There must be a `start` token.")
+                .value(),
+            "start"
+        );
+        assert!(
+            first_text_directive.end().is_none(),
+            "There is no `end` token."
+        );
+        assert_eq!(
+            first_text_directive
+                .suffix()
+                .as_ref()
+                .expect("There must be a `suffix` token.")
+                .value(),
+            "suffix"
+        );
+
+        let second_text_directive = &text_directives[1];
+        assert!(
+            second_text_directive.prefix().is_none(),
+            "There is no `prefix` token."
+        );
+        assert_eq!(
+            second_text_directive
+                .start()
+                .as_ref()
+                .expect("There must be a `start` token.")
+                .value(),
+            "foo"
+        );
+        assert!(
+            second_text_directive.end().is_none(),
+            "There is no `end` token."
+        );
+        assert!(
+            second_text_directive.suffix().is_none(),
+            "There is no `suffix` token."
+        );
+        let third_text_directive = &text_directives[2];
+        assert!(
+            third_text_directive.prefix().is_none(),
+            "There is no `prefix` token."
+        );
+        assert_eq!(
+            third_text_directive
+                .start()
+                .as_ref()
+                .expect("There must be a `start` token.")
+                .value(),
+            "bar"
+        );
+        assert!(
+            third_text_directive.end().is_none(),
+            "There is no `end` token."
+        );
+        assert_eq!(
+            third_text_directive
+                .suffix()
+                .as_ref()
+                .expect("There must be a `suffix` token.")
+                .value(),
+            "suffix"
+        );
+    }
+
+    /// Multiple text directives should be parsed correctly
+    /// if they are surrounded or separated by unknown directives.
+    #[test]
+    fn test_parse_multiple_text_directives_with_unknown_directive_in_between() {
+        for url in [
+            "#:~:foo&text=start1&text=start2",
+            "#:~:text=start1&foo&text=start2",
+            "#:~:text=start1&text=start2&foo",
+        ] {
+            let (_, fragment_directive, text_directives) =
+                parse_fragment_directive_and_remove_it_from_hash(&url)
+                    .expect("The parser must find a result.");
+            assert_eq!(
+                fragment_directive,
+                &url[4..],
+                "The extracted fragment directive string is unsanitized
+                and should contain the unknown directive."
+            );
+            assert_eq!(
+                text_directives.len(),
+                2,
+                "There must be exactly two parsed text fragments."
+            );
+            let first_text_directive = &text_directives[0];
+            assert_eq!(
+                first_text_directive
+                    .start()
+                    .as_ref()
+                    .expect("There must be a `start` token.")
+                    .value(),
+                "start1"
+            );
+            let second_text_directive = &text_directives[1];
+            assert_eq!(
+                second_text_directive
+                    .start()
+                    .as_ref()
+                    .expect("There must be a `start` token.")
+                    .value(),
+                "start2"
+            );
+        }
+    }
+
+    /// Ensures that input that doesn't contain a text fragment does not produce a result.
+    /// This includes the use of partial identifying tokens necessary for a text fragment
+    /// (e.g. `:~:` without `text=`, `text=foo` without the `:~:` or multiple occurrences of `:~:`)
+    /// In these cases, the parser must return `None` to indicate that there are no valid text fragments.
+    #[test]
+    fn test_parse_invalid_or_unknown_fragment_directive() {
+        for url in [
+            "#foo",
+            "#foo:",
+            "#foo:~:",
+            "#foo:~:bar",
+            "text=prefix-,start",
+            "#:~:text=foo-,bar,-baz:~:text=foo",
+        ] {
+            let text_directives =
+                parse_fragment_directive_and_remove_it_from_hash(&url);
+            assert!(
+                text_directives.is_none(),
+                "The fragment `{}` does not contain a valid or known fragment directive.",
+                url
+            );
+        }
+    }
+
+    /// Ensures that ill-formed text directives (but valid fragment directives)
+    /// (starting correctly with `:~:text=`) are not parsed.
+    /// Instead `None` must be returned.
+    /// Test cases include invalid combinations of `prefix`/`suffix`es,
+    /// additional `,`s, too many `start`/`end` tokens, or empty text fragments.
+    #[test]
+    fn test_parse_invalid_text_fragments() {
+        for url in [
+            "#:~:text=start,start,start",
+            "#:~:text=prefix-,prefix-",
+            "#:~:text=prefix-,-suffix",
+            "#:~:text=prefix-,start,start,start",
+            "#:~:text=prefix-,start,start,start,-suffix",
+            "#:~:text=start,start,start,-suffix",
+            "#:~:text=prefix-,start,end,-suffix,foo",
+            "#:~:text=foo,prefix-,start",
+            "#:~:text=prefix-,,start,",
+            "#:~:text=,prefix,start",
+            "#:~:text=",
+        ] {
+            let text_directives =
+                parse_fragment_directive_and_remove_it_from_hash(&url);
+            assert!(
+                text_directives.is_none(),
+                "The fragment directive `{}` does not contain a valid text directive.",
+                url
+            );
+        }
+    }
+
+    /// Ensure that out of multiple text fragments only the invalid ones are ignored
+    /// while valid text fragments are still returned.
+    /// Since correct parsing of multiple text fragments as well as
+    /// several forms of invalid text fragments are already tested in
+    /// `test_parse_multiple_text_fragments` and `test_parse_invalid_text_fragments()`,
+    /// it should be enough to test this with only one fragment directive
+    /// that contains two text fragments, one of them being invalid.
+    #[test]
+    fn test_valid_and_invalid_text_directives() {
+        for url in [
+            "#:~:text=start&text=,foo,",
+            "#:~:text=foo,foo,foo&text=start",
+        ] {
+            let (_, fragment_directive, text_directives) =
+                parse_fragment_directive_and_remove_it_from_hash(&url)
+                    .expect("The parser must find a result.");
+            assert_eq!(
+                fragment_directive,
+                &url[4..],
+                "The extracted fragment directive string is unsanitized
+                and should contain invalid text directives."
+            );
+            assert_eq!(
+                text_directives.len(),
+                1,
+                "There must be exactly one parsed text fragment."
+            );
+            let text_directive = text_directives.first().unwrap();
+            assert_eq!(
+                text_directive
+                    .start()
+                    .as_ref()
+                    .expect("There must be a `start` value.")
+                    .value(),
+                "start",
+                "The `start` value of the text directive has the wrong value."
+            );
+        }
+    }
+
+    /// Ensures that a fragment directive that contains percent-encoded characters
+    /// is decoded correctly. This explicitly includes characters which are used
+    /// for identifying text fragments, i.e. `#`, `, `, `&`, `:`, `~` and `-`.
+    #[test]
+    fn test_parse_percent_encoding_tokens() {
+        let url = "#:~:text=prefix%26-,start%20and%2C,end%23,-%26suffix%2D";
+        let (_, fragment_directive, text_directives) =
+            parse_fragment_directive_and_remove_it_from_hash(&url)
+                .expect("The parser must find a result.");
+        assert_eq!(
+            fragment_directive,
+            &url[4..],
+            "The extracted fragment directive string is unsanitized
+                and should contain the original and percent-decoded string."
+        );
+        let text_directive = text_directives.first().unwrap();
+        assert_eq!(
+            text_directive
+                .prefix()
+                .as_ref()
+                .expect("There must be a prefix.")
+                .value(),
+            "prefix&",
+            ""
+        );
+        assert_eq!(
+            text_directive
+                .start()
+                .as_ref()
+                .expect("There must be a prefix.")
+                .value(),
+            "start and,",
+            ""
+        );
+        assert_eq!(
+            text_directive
+                .end()
+                .as_ref()
+                .expect("There must be a prefix.")
+                .value(),
+            "end#",
+            ""
+        );
+        assert_eq!(
+            text_directive
+                .suffix()
+                .as_ref()
+                .expect("There must be a prefix.")
+                .value(),
+            "&suffix-",
+            ""
+        );
+    }
+
+    /// Ensures that a text fragment is created correctly,
+    /// based on a given combination of tokens.
+    /// This includes all sorts of combinations of
+    /// `prefix`, `suffix`, `start` and `end`,
+    /// als well as values for these tokens which contain
+    /// characters that need to be encoded because they are
+    /// identifiers for text fragments
+    /// (#`, `, `, `&`, `:`, `~` and `-`).
+    #[test]
+    fn test_create_fragment_directive() {
+        for (text_directive, expected_fragment_directive) in [
+            (
+                TextDirective::from_parts(
+                    String::new(),
+                    String::from("start"),
+                    String::new(),
+                    String::new(),
+                )
+                .unwrap(),
+                ":~:text=start",
+            ),
+            (
+                TextDirective::from_parts(
+                    String::new(),
+                    String::from("start"),
+                    String::from("end"),
+                    String::new(),
+                )
+                .unwrap(),
+                ":~:text=start,end",
+            ),
+            (
+                TextDirective::from_parts(
+                    String::from("prefix"),
+                    String::from("start"),
+                    String::from("end"),
+                    String::new(),
+                )
+                .unwrap(),
+                ":~:text=prefix-,start,end",
+            ),
+            (
+                TextDirective::from_parts(
+                    String::from("prefix"),
+                    String::from("start"),
+                    String::from("end"),
+                    String::from("suffix"),
+                )
+                .unwrap(),
+                ":~:text=prefix-,start,end,-suffix",
+            ),
+            (
+                TextDirective::from_parts(
+                    String::new(),
+                    String::from("start"),
+                    String::from("end"),
+                    String::from("suffix"),
+                )
+                .unwrap(),
+                ":~:text=start,end,-suffix",
+            ),
+            (
+                TextDirective::from_parts(
+                    String::from("prefix"),
+                    String::from("start"),
+                    String::new(),
+                    String::from("suffix"),
+                )
+                .unwrap(),
+                ":~:text=prefix-,start,-suffix",
+            ),
+            (
+                TextDirective::from_parts(
+                    String::from("prefix-"),
+                    String::from("start and,"),
+                    String::from("&end"),
+                    String::from("#:~:suffix"),
+                )
+                .unwrap(),
+                ":~:text=prefix%2D-,start%20and%2C,%26end,-%23%3A%7E%3Asuffix",
+            ),
+        ] {
+            let fragment_directive = create_fragment_directive_string(&vec![text_directive])
+                .expect("The given input must produce a valid fragment directive.");
+            assert_eq!(fragment_directive, expected_fragment_directive);
+        }
+    }
+
+    /// Ensures that a fragment directive is created correctly if multiple text fragments are given.
+    /// The resulting fragment must start with `:~:`
+    /// and each text fragment must be separated using `&text=`.
+    #[test]
+    fn test_create_fragment_directive_from_multiple_text_directives() {
+        let text_directives = vec![
+            TextDirective::from_parts(
+                String::new(),
+                String::from("start1"),
+                String::new(),
+                String::new(),
+            )
+            .unwrap(),
+            TextDirective::from_parts(
+                String::new(),
+                String::from("start2"),
+                String::new(),
+                String::new(),
+            )
+            .unwrap(),
+            TextDirective::from_parts(
+                String::new(),
+                String::from("start3"),
+                String::new(),
+                String::new(),
+            )
+            .unwrap(),
+        ];
+        let fragment_directive = create_fragment_directive_string(&text_directives)
+            .expect("The given input must produce a valid fragment directive.");
+        assert_eq!(
+            fragment_directive, ":~:text=start1&text=start2&text=start3",
+            "The created fragment directive is wrong for multiple fragments."
+        );
+    }
+}