summaryrefslogtreecommitdiffstats
path: root/dom/base/fragmentdirectives
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-15 03:34:42 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-15 03:34:42 +0000
commitda4c7e7ed675c3bf405668739c3012d140856109 (patch)
treecdd868dba063fecba609a1d819de271f0d51b23e /dom/base/fragmentdirectives
parentAdding upstream version 125.0.3. (diff)
downloadfirefox-da4c7e7ed675c3bf405668739c3012d140856109.tar.xz
firefox-da4c7e7ed675c3bf405668739c3012d140856109.zip
Adding upstream version 126.0.upstream/126.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'dom/base/fragmentdirectives')
-rw-r--r--dom/base/fragmentdirectives/Cargo.toml13
-rw-r--r--dom/base/fragmentdirectives/cbindgen.toml15
-rw-r--r--dom/base/fragmentdirectives/fragment_directive_impl.rs342
-rw-r--r--dom/base/fragmentdirectives/lib.rs158
-rw-r--r--dom/base/fragmentdirectives/test.rs599
5 files changed, 1127 insertions, 0 deletions
diff --git a/dom/base/fragmentdirectives/Cargo.toml b/dom/base/fragmentdirectives/Cargo.toml
new file mode 100644
index 0000000000..7b3b589668
--- /dev/null
+++ b/dom/base/fragmentdirectives/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "dom_fragmentdirectives"
+version = "0.1.0"
+authors = ["Jan Jaeschke <jjaschke@mozilla.com>"]
+edition = "2021"
+license = "MPL-2.0"
+
+[dependencies]
+nsstring = { path = "../../../xpcom/rust/nsstring/" }
+thin-vec = { version = "0.2.1", features = ["gecko-ffi"] }
+percent-encoding = { version = "2.3.1" }
+[lib]
+path = "lib.rs"
diff --git a/dom/base/fragmentdirectives/cbindgen.toml b/dom/base/fragmentdirectives/cbindgen.toml
new file mode 100644
index 0000000000..ec54ebc02d
--- /dev/null
+++ b/dom/base/fragmentdirectives/cbindgen.toml
@@ -0,0 +1,15 @@
+header = """/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */"""
+autogen_warning = """/* DO NOT MODIFY THIS MANUALLY! This file was generated using cbindgen. See RunCbindgen.py */
+"""
+include_version = true
+braces = "SameLine"
+line_length = 100
+tab_width = 2
+language = "C++"
+include_guard = "fragmentdirectives_ffi_generated_h"
+includes = ["nsStringFwd.h", "nsTArrayForwardDeclare.h"]
+
+[export.rename]
+"ThinVec" = "nsTArray"
diff --git a/dom/base/fragmentdirectives/fragment_directive_impl.rs b/dom/base/fragmentdirectives/fragment_directive_impl.rs
new file mode 100644
index 0000000000..dfbdb37415
--- /dev/null
+++ b/dom/base/fragmentdirectives/fragment_directive_impl.rs
@@ -0,0 +1,342 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+* License, v. 2.0. If a copy of the MPL was not distributed with this
+* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+use percent_encoding::{percent_decode, percent_encode, NON_ALPHANUMERIC};
+use std::str;
+
+/// The `FragmentDirectiveParameter` represents one of
+/// `[prefix-,]start[,end][,-suffix]` without any surrounding `-` or `,`.
+///
+/// The token is stored as percent-decoded string.
+/// Therefore, interfaces exist to
+/// - create a `FragmentDirectiveParameter` from a percent-encoded string.
+/// This function will determine from occurrence and position of a dash
+/// if the token represents a `prefix`, `suffix` or either `start` or `end`.
+/// - create a percent-encoded string from the value the token holds.
+pub enum TextDirectiveParameter {
+ Prefix(String),
+ StartOrEnd(String),
+ Suffix(String),
+}
+
+impl TextDirectiveParameter {
+ /// Creates a token from a percent-encoded string.
+ /// Based on position of a dash the correct token type is determined.
+ /// Returns `None` in case of an ill-formed token:
+ /// - starts and ends with a dash (i.e. `-token-`)
+ /// - only consists of a dash (i.e. `-`) or is empty
+ /// - conversion from percent-encoded string to utf8 fails.
+ pub fn from_percent_encoded(token: &[u8]) -> Option<Self> {
+ if token.is_empty() {
+ return None;
+ }
+ let starts_with_dash = *token.first().unwrap() == b'-';
+ let ends_with_dash = *token.last().unwrap() == b'-';
+ if starts_with_dash && ends_with_dash {
+ // `-token-` is not valid.
+ return None;
+ }
+ if token.len() == 1 && starts_with_dash {
+ // `-` is not valid.
+ return None;
+ }
+ // Note: Trimming of the raw strings is currently not mentioned in the spec.
+ // However, it looks as it is implicitly expected.
+ if starts_with_dash {
+ if let Ok(decoded_suffix) = percent_decode(&token[1..]).decode_utf8() {
+ return Some(TextDirectiveParameter::Suffix(String::from(
+ decoded_suffix.trim(),
+ )));
+ }
+ return None;
+ }
+ if ends_with_dash {
+ if let Ok(decoded_prefix) = percent_decode(&token[..token.len() - 1]).decode_utf8() {
+ return Some(TextDirectiveParameter::Prefix(String::from(
+ decoded_prefix.trim(),
+ )));
+ }
+ return None;
+ }
+ if let Ok(decoded_text) = percent_decode(&token).decode_utf8() {
+ return Some(TextDirectiveParameter::StartOrEnd(String::from(
+ decoded_text.trim(),
+ )));
+ }
+ None
+ }
+
+ /// Returns the value of the token as percent-decoded `String`.
+ pub fn value(&self) -> &String {
+ match self {
+ TextDirectiveParameter::Prefix(value) => &value,
+ TextDirectiveParameter::StartOrEnd(value) => &value,
+ TextDirectiveParameter::Suffix(value) => &value,
+ }
+ }
+
+ /// Creates a percent-encoded string of the token's value.
+ /// This includes placing a dash appropriately
+ /// to indicate whether this token is prefix, suffix or start/end.
+ ///
+ /// This method always returns a new object.
+ pub fn to_percent_encoded_string(&self) -> String {
+ let encode = |text: &String| percent_encode(text.as_bytes(), NON_ALPHANUMERIC).to_string();
+ match self {
+ Self::Prefix(text) => encode(text) + "-",
+ Self::StartOrEnd(text) => encode(text),
+ Self::Suffix(text) => {
+ let encoded = encode(text);
+ let mut result = String::with_capacity(encoded.len() + 1);
+ result.push_str("-");
+ result.push_str(&encoded);
+ result
+ }
+ }
+ }
+}
+
+/// This struct represents one parsed text directive using Rust types.
+///
+/// A text fragment is encoded into a URL fragment like this:
+/// `text=[prefix-,]start[,end][,-suffix]`
+///
+/// The text directive is considered valid if at least `start` is not None.
+/// (see `Self::is_valid()`).
+#[derive(Default)]
+pub struct TextDirective {
+ prefix: Option<TextDirectiveParameter>,
+ start: Option<TextDirectiveParameter>,
+ end: Option<TextDirectiveParameter>,
+ suffix: Option<TextDirectiveParameter>,
+}
+impl TextDirective {
+ /// Creates an instance from string parts.
+ /// This function is intended to be used when a fragment directive string should be created.
+ /// Returns `None` if `start` is empty.
+ pub fn from_parts(prefix: String, start: String, end: String, suffix: String) -> Option<Self> {
+ if !start.is_empty() {
+ Some(Self {
+ prefix: if !prefix.is_empty() {
+ Some(TextDirectiveParameter::Prefix(prefix.trim().into()))
+ } else {
+ None
+ },
+ start: Some(TextDirectiveParameter::StartOrEnd(start.trim().into())),
+ end: if !end.is_empty() {
+ Some(TextDirectiveParameter::StartOrEnd(end.trim().into()))
+ } else {
+ None
+ },
+ suffix: if !suffix.is_empty() {
+ Some(TextDirectiveParameter::Suffix(suffix.trim().into()))
+ } else {
+ None
+ },
+ })
+ } else {
+ None
+ }
+ }
+
+ /// Creates an instance from a percent-encoded string
+ /// that originates from a fragment directive.
+ ///
+ /// `text_fragment` is supposed to have this format:
+ /// ```
+ /// text=[prefix-,]start[,end][,-suffix]
+ /// ```
+ /// This function returns `None` if `text_fragment`
+ /// does not start with `text=`, it contains 0 or more
+ /// than 4 elements or prefix/suffix/start or end
+ /// occur too many times.
+ /// It also returns `None` if any of the tokens parses to fail.
+ pub fn from_percent_encoded_string(text_directive: &str) -> Option<Self> {
+ // first check if the string starts with `text=`
+ if text_directive.len() < 6 {
+ return None;
+ }
+ if !text_directive.starts_with("text=") {
+ return None;
+ }
+
+ let mut parsed_text_directive = Self::default();
+ let valid = text_directive[5..]
+ .split(",")
+ // Parse the substrings into `TextDirectiveParameter`s. This will determine
+ // for each substring if it is a Prefix, Suffix or Start/End,
+ // or if it is invalid.
+ .map(|token| TextDirectiveParameter::from_percent_encoded(token.as_bytes()))
+ // populate `parsed_text_directive` and check its validity by inserting the parameters
+ // one by one. Given that the parameters are sorted by their position in the source,
+ // the validity of the text directive can be determined while adding the parameters.
+ .map(|token| match token {
+ Some(TextDirectiveParameter::Prefix(..)) => {
+ if !parsed_text_directive.is_empty() {
+ // `prefix-` must be the first result.
+ return false;
+ }
+ parsed_text_directive.prefix = token;
+ return true;
+ }
+ Some(TextDirectiveParameter::StartOrEnd(..)) => {
+ if parsed_text_directive.suffix.is_some() {
+ // start or end must come before `-suffix`.
+ return false;
+ }
+ if parsed_text_directive.start.is_none() {
+ parsed_text_directive.start = token;
+ return true;
+ }
+ if parsed_text_directive.end.is_none() {
+ parsed_text_directive.end = token;
+ return true;
+ }
+ // if `start` and `end` is already filled,
+ // this is invalid as well.
+ return false;
+ }
+ Some(TextDirectiveParameter::Suffix(..)) => {
+ if parsed_text_directive.start.is_some()
+ && parsed_text_directive.suffix.is_none()
+ {
+ // `start` must be present and `-suffix` must not be present.
+ // `end` may be present.
+ parsed_text_directive.suffix = token;
+ return true;
+ }
+ return false;
+ }
+ // empty or invalid token renders the whole text directive invalid.
+ None => false,
+ })
+ .all(|valid| valid);
+ if valid {
+ return Some(parsed_text_directive);
+ }
+ None
+ }
+
+ /// Creates a percent-encoded string for the current `TextDirective`.
+ /// In the unlikely case that the `TextDirective` is invalid (i.e. `start` is None),
+ /// which should have been caught earlier,this method returns an empty string.
+ pub fn to_percent_encoded_string(&self) -> String {
+ if !self.is_valid() {
+ return String::default();
+ }
+ String::from("text=")
+ + &[&self.prefix, &self.start, &self.end, &self.suffix]
+ .iter()
+ .filter_map(|&token| token.as_ref())
+ .map(|token| token.to_percent_encoded_string())
+ .collect::<Vec<_>>()
+ .join(",")
+ }
+
+ pub fn start(&self) -> &Option<TextDirectiveParameter> {
+ &self.start
+ }
+
+ pub fn end(&self) -> &Option<TextDirectiveParameter> {
+ &self.end
+ }
+
+ pub fn prefix(&self) -> &Option<TextDirectiveParameter> {
+ &self.prefix
+ }
+
+ pub fn suffix(&self) -> &Option<TextDirectiveParameter> {
+ &self.suffix
+ }
+
+ fn is_empty(&self) -> bool {
+ self.prefix.is_none() && self.start.is_none() && self.end.is_none() && self.suffix.is_none()
+ }
+
+ /// A `TextDirective` object is valid if it contains the `start` token.
+ /// All other tokens are optional.
+ fn is_valid(&self) -> bool {
+ self.start.is_some()
+ }
+}
+/// Parses a fragment directive into a list of `TextDirective` objects and removes
+/// the fragment directive from the input url.
+///
+/// If the hash does not contain a fragment directive, `url` is not modified
+/// and this function returns `None`.
+/// Otherwise, the fragment directive is removed from `url` and parsed.
+/// If parsing fails, this function returns `None`.
+pub fn parse_fragment_directive_and_remove_it_from_hash(
+ url: &str,
+) -> Option<(&str, &str, Vec<TextDirective>)> {
+ // The Fragment Directive is preceded by a `:~:`,
+ // which is only allowed to appear in the hash once.
+ // However (even if unlikely), it might appear outside of the hash,
+ // so this code only considers it when it is after the #.
+ let maybe_first_hash_pos = url.find("#");
+ // If there is no # in url, it is considered to be only the hash (and not a full url).
+ let first_hash_pos = maybe_first_hash_pos.unwrap_or_default();
+ let mut fragment_directive_iter = url[first_hash_pos..].split(":~:");
+ let url_with_stripped_fragment_directive =
+ &url[..first_hash_pos + fragment_directive_iter.next().unwrap_or_default().len()];
+
+ if let Some(fragment_directive) = fragment_directive_iter.next() {
+ if fragment_directive_iter.next().is_some() {
+ // There are multiple occurrences of `:~:`, which is not allowed.
+ return None;
+ }
+ // - fragments are separated by `&`.
+ // - if a fragment does not start with `text=`, it is not a text fragment and will be ignored.
+ // - if parsing of the text fragment fails (for whatever reason), it will be ignored.
+ let text_directives: Vec<_> = fragment_directive
+ .split("&")
+ .map(|maybe_text_fragment| {
+ TextDirective::from_percent_encoded_string(&maybe_text_fragment)
+ })
+ .filter_map(|maybe_text_directive| maybe_text_directive)
+ .collect();
+ if !text_directives.is_empty() {
+ return Some((
+ url_with_stripped_fragment_directive
+ .strip_suffix("#")
+ .unwrap_or(url_with_stripped_fragment_directive),
+ fragment_directive,
+ text_directives,
+ ));
+ }
+ }
+ None
+}
+
+/// Creates a percent-encoded text fragment string.
+///
+/// The returned string starts with `:~:`, so that it can be appended
+/// to a normal fragment.
+/// Text directives which are not valid (ie., they are missing the `start` parameter),
+/// are skipped.
+///
+/// Returns `None` if `fragment_directives` is empty.
+pub fn create_fragment_directive_string(text_directives: &Vec<TextDirective>) -> Option<String> {
+ if text_directives.is_empty() {
+ return None;
+ }
+ let encoded_fragment_directives: Vec<_> = text_directives
+ .iter()
+ .filter(|&fragment_directive| fragment_directive.is_valid())
+ .map(|fragment_directive| fragment_directive.to_percent_encoded_string())
+ .filter(|text_directive| !text_directive.is_empty())
+ .collect();
+ if encoded_fragment_directives.is_empty() {
+ return None;
+ }
+ Some(String::from(":~:") + &encoded_fragment_directives.join("&"))
+}
+
+/// Creates the percent-encoded text directive string for a single text directive.
+pub fn create_text_directive_string(text_directive: &TextDirective) -> Option<String> {
+ if text_directive.is_valid() {
+ Some(text_directive.to_percent_encoded_string())
+ } else {
+ None
+ }
+}
diff --git a/dom/base/fragmentdirectives/lib.rs b/dom/base/fragmentdirectives/lib.rs
new file mode 100644
index 0000000000..0003849eb7
--- /dev/null
+++ b/dom/base/fragmentdirectives/lib.rs
@@ -0,0 +1,158 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use nsstring::{nsCString, nsString};
+use thin_vec::ThinVec;
+pub mod fragment_directive_impl;
+mod test;
+
+/// This struct contains the percent-decoded parts of a text directive.
+/// All parts besides `start` are optional (which is indicated by an empty string).
+///
+/// This struct uses Gecko String types, whereas the parser internally uses Rust types.
+/// Therefore, conversion functions are provided.
+#[repr(C)]
+pub struct TextDirective {
+ prefix: nsString,
+ start: nsString,
+ end: nsString,
+ suffix: nsString,
+}
+
+impl TextDirective {
+ /// Creates a `FragmentDirectiveElement` object from a `FragmentDirectiveElementInternal` object
+ /// (which uses Rust string types).
+ fn from_rust_type(element: &fragment_directive_impl::TextDirective) -> Self {
+ Self {
+ prefix: element
+ .prefix()
+ .as_ref()
+ .map_or_else(nsString::new, |token| nsString::from(token.value())),
+ start: element
+ .start()
+ .as_ref()
+ .map_or_else(nsString::new, |token| nsString::from(token.value())),
+ end: element
+ .end()
+ .as_ref()
+ .map_or_else(nsString::new, |token| nsString::from(token.value())),
+ suffix: element
+ .suffix()
+ .as_ref()
+ .map_or_else(nsString::new, |token| nsString::from(token.value())),
+ }
+ }
+
+ /// Converts the contents of this object into Rust types.
+ /// Returns `None` if the given fragment is not valid.
+ /// The only invalid condition is a fragment that is missing the `start` token.
+ fn to_rust_type(&self) -> Option<fragment_directive_impl::TextDirective> {
+ fragment_directive_impl::TextDirective::from_parts(
+ self.prefix.to_string(),
+ self.start.to_string(),
+ self.end.to_string(),
+ self.suffix.to_string(),
+ )
+ }
+}
+
+/// Result of the `parse_fragment_directive()` function.
+///
+/// The result contains the original given URL without the fragment directive,
+/// a unsanitized string version of the extracted fragment directive,
+/// and an array of the parsed text directives.
+#[repr(C)]
+pub struct ParsedFragmentDirectiveResult {
+ url_without_fragment_directive: nsCString,
+ fragment_directive: nsCString,
+ text_directives: ThinVec<TextDirective>,
+}
+
+/// Parses the fragment directive from a given URL.
+///
+/// This function writes the result data into `result`.
+/// The result consists of
+/// - the input url without the fragment directive,
+/// - the fragment directive as unparsed string,
+/// - a list of the parsed and percent-decoded text directives.
+///
+/// Directives which are unknown will be ignored.
+/// If new directive types are added in the future, they should also be considered here.
+/// This function returns false if no fragment directive is found, or it could not be parsed.
+#[no_mangle]
+pub extern "C" fn parse_fragment_directive(
+ url: &nsCString,
+ result: &mut ParsedFragmentDirectiveResult,
+) -> bool {
+ // sanitize inputs
+ result.url_without_fragment_directive = nsCString::new();
+ result.fragment_directive = nsCString::new();
+ result.text_directives.clear();
+
+ let url_as_rust_string = url.to_utf8();
+ if let Some((stripped_url, fragment_directive, text_directives)) =
+ fragment_directive_impl::parse_fragment_directive_and_remove_it_from_hash(
+ &url_as_rust_string,
+ )
+ {
+ result
+ .url_without_fragment_directive
+ .assign(&stripped_url);
+ result.fragment_directive.assign(&fragment_directive);
+ result.text_directives.extend(
+ text_directives
+ .iter()
+ .map(|text_directive| TextDirective::from_rust_type(text_directive)),
+ );
+ return true;
+ }
+ false
+}
+
+/// Creates a percent-encoded fragment directive string from a given list of `FragmentDirectiveElement`s.
+///
+/// The returned string has this form:
+/// `:~:text=[prefix1-,]start1[,end1][,-suffix1]&text=[prefix2-,]start2[,end2][,-suffix2]`
+///
+/// Invalid `FragmentDirectiveElement`s are ignored, where "invalid" means that no `start` token is provided.
+/// If there are no valid `FragmentDirectiveElement`s, an empty string is returned.
+#[no_mangle]
+pub extern "C" fn create_fragment_directive(
+ text_directives: &ThinVec<TextDirective>,
+ fragment_directive: &mut nsCString,
+) -> bool {
+ let directives_rust = Vec::from_iter(
+ text_directives
+ .iter()
+ .filter_map(|fragment| fragment.to_rust_type()),
+ );
+ if let Some(fragment_directive_rust) =
+ fragment_directive_impl::create_fragment_directive_string(&directives_rust)
+ {
+ fragment_directive.assign(&fragment_directive_rust);
+ return true;
+ }
+
+ false
+}
+
+/// Creates a percent-encoded text directive string for a single text directive.
+/// The returned string has the form `text=[prefix-,]start[,end][,-suffix]`.
+/// If the provided `TextDirective` is invalid (i.e. it has no `start` attribute),
+/// the outparam `directive_string` is empty and the function returns false.
+#[no_mangle]
+pub extern "C" fn create_text_directive(
+ text_directive: &TextDirective,
+ directive_string: &mut nsCString,
+) -> bool {
+ if let Some(text_directive_rust) = text_directive.to_rust_type() {
+ if let Some(text_directive_string_rust) =
+ fragment_directive_impl::create_text_directive_string(&text_directive_rust)
+ {
+ directive_string.assign(&text_directive_string_rust);
+ return true;
+ }
+ }
+ false
+}
diff --git a/dom/base/fragmentdirectives/test.rs b/dom/base/fragmentdirectives/test.rs
new file mode 100644
index 0000000000..d4509cb033
--- /dev/null
+++ b/dom/base/fragmentdirectives/test.rs
@@ -0,0 +1,599 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#[cfg(test)]
+mod test {
+ use crate::fragment_directive_impl::{
+ create_fragment_directive_string, parse_fragment_directive_and_remove_it_from_hash,
+ TextDirective,
+ };
+
+ /// This test verifies that valid combinations of [prefix-,]start[,end][,-suffix] are parsed correctly.
+ #[test]
+ fn test_parse_fragment_directive_with_one_text_directive() {
+ let test_cases = vec![
+ ("#:~:text=start", (None, Some("start"), None, None)),
+ (
+ "#:~:text=start,end",
+ (None, Some("start"), Some("end"), None),
+ ),
+ (
+ "#:~:text=prefix-,start",
+ (Some("prefix"), Some("start"), None, None),
+ ),
+ (
+ "#:~:text=prefix-,start,end",
+ (Some("prefix"), Some("start"), Some("end"), None),
+ ),
+ (
+ "#:~:text=prefix-,start,end,-suffix",
+ (Some("prefix"), Some("start"), Some("end"), Some("suffix")),
+ ),
+ (
+ "#:~:text=start,-suffix",
+ (None, Some("start"), None, Some("suffix")),
+ ),
+ (
+ "#:~:text=start,end,-suffix",
+ (None, Some("start"), Some("end"), Some("suffix")),
+ ),
+ ("#:~:text=text=", (None, Some("text="), None, None)),
+ ];
+ for (url, (prefix, start, end, suffix)) in test_cases {
+ let (stripped_url, fragment_directive, result) =
+ parse_fragment_directive_and_remove_it_from_hash(&url)
+ .expect("The parser must find a result.");
+ assert_eq!(
+ fragment_directive,
+ &url[4..],
+ "The extracted fragment directive string
+ should be unsanitized and therefore match the input string."
+ );
+ assert_eq!(result.len(), 1, "There must be one parsed text fragment.");
+ assert_eq!(
+ stripped_url, "",
+ "The fragment directive must be removed from the url hash."
+ );
+ let text_directive = result.first().unwrap();
+ if prefix.is_none() {
+ assert!(
+ text_directive.prefix().is_none(),
+ "There must be no `prefix` token (test case `{}`).",
+ url
+ );
+ } else {
+ assert!(
+ text_directive
+ .prefix()
+ .as_ref()
+ .expect("There must be a `prefix` token.")
+ .value()
+ == prefix.unwrap(),
+ "Wrong value for `prefix` (test case `{}`).",
+ url
+ );
+ }
+ if start.is_none() {
+ assert!(
+ text_directive.start().is_none(),
+ "There must be no `start` token (test case `{}`).",
+ url
+ );
+ } else {
+ assert!(
+ text_directive
+ .start()
+ .as_ref()
+ .expect("There must be a `start` token.")
+ .value()
+ == start.unwrap(),
+ "Wrong value for `start` (test case `{}`).",
+ url
+ );
+ }
+ if end.is_none() {
+ assert!(
+ text_directive.end().is_none(),
+ "There must be no `end` token (test case `{}`).",
+ url
+ );
+ } else {
+ assert!(
+ text_directive
+ .end()
+ .as_ref()
+ .expect("There must be a `end` token.")
+ .value()
+ == end.unwrap(),
+ "Wrong value for `end` (test case `{}`).",
+ url
+ );
+ }
+ if suffix.is_none() {
+ assert!(
+ text_directive.suffix().is_none(),
+ "There must be no `suffix` token (test case `{}`).",
+ url
+ );
+ } else {
+ assert!(
+ text_directive
+ .suffix()
+ .as_ref()
+ .expect("There must be a `suffix` token.")
+ .value()
+ == suffix.unwrap(),
+ "Wrong value for `suffix` (test case `{}`).",
+ url
+ );
+ }
+ }
+ }
+
+ #[test]
+ fn test_parse_full_url() {
+ for (url, stripped_url_ref) in [
+ ("https://example.com#:~:text=foo", "https://example.com"),
+ (
+ "https://example.com/some/page.html?query=answer#:~:text=foo",
+ "https://example.com/some/page.html?query=answer",
+ ),
+ (
+ "https://example.com/some/page.html?query=answer#fragment:~:text=foo",
+ "https://example.com/some/page.html?query=answer#fragment",
+ ),
+ (
+ "http://example.com/page.html?query=irrelevant:~:#bar:~:text=foo",
+ "http://example.com/page.html?query=irrelevant:~:#bar"
+ )
+ ] {
+ let (stripped_url, fragment_directive, _) =
+ parse_fragment_directive_and_remove_it_from_hash(&url)
+ .expect("The parser must find a result");
+ assert_eq!(stripped_url, stripped_url_ref, "The stripped url is not correct.");
+ assert_eq!(fragment_directive, "text=foo");
+ }
+ }
+
+ /// This test verifies that a text fragment is parsed correctly if it is preceded
+ /// or followed by a fragment (i.e. `#foo:~:text=bar`).
+ #[test]
+ fn test_parse_text_fragment_after_fragments() {
+ let url = "#foo:~:text=start";
+ let (stripped_url, fragment_directive, result) =
+ parse_fragment_directive_and_remove_it_from_hash(&url)
+ .expect("The parser must find a result.");
+ assert_eq!(
+ result.len(),
+ 1,
+ "There must be exactly one parsed text fragment."
+ );
+ assert_eq!(
+ stripped_url, "#foo",
+ "The fragment directive was not removed correctly."
+ );
+ assert_eq!(
+ fragment_directive, "text=start",
+ "The fragment directive was not extracted correctly."
+ );
+ let fragment = result.first().unwrap();
+ assert!(fragment.prefix().is_none(), "There is no `prefix` token.");
+ assert_eq!(
+ fragment
+ .start()
+ .as_ref()
+ .expect("There must be a `start` token.")
+ .value(),
+ "start"
+ );
+ assert!(fragment.end().is_none(), "There is no `end` token.");
+ assert!(fragment.suffix().is_none(), "There is no `suffix` token.");
+ }
+
+ /// Ensure that multiple text fragments are parsed correctly.
+ #[test]
+ fn test_parse_multiple_text_fragments() {
+ let url = "#:~:text=prefix-,start,-suffix&text=foo&text=bar,-suffix";
+ let (_, _, text_directives) =
+ parse_fragment_directive_and_remove_it_from_hash(&url)
+ .expect("The parser must find a result.");
+ assert_eq!(
+ text_directives.len(),
+ 3,
+ "There must be exactly two parsed text fragments."
+ );
+ let first_text_directive = &text_directives[0];
+ assert_eq!(
+ first_text_directive
+ .prefix()
+ .as_ref()
+ .expect("There must be a `prefix` token.")
+ .value(),
+ "prefix"
+ );
+ assert_eq!(
+ first_text_directive
+ .start()
+ .as_ref()
+ .expect("There must be a `start` token.")
+ .value(),
+ "start"
+ );
+ assert!(
+ first_text_directive.end().is_none(),
+ "There is no `end` token."
+ );
+ assert_eq!(
+ first_text_directive
+ .suffix()
+ .as_ref()
+ .expect("There must be a `suffix` token.")
+ .value(),
+ "suffix"
+ );
+
+ let second_text_directive = &text_directives[1];
+ assert!(
+ second_text_directive.prefix().is_none(),
+ "There is no `prefix` token."
+ );
+ assert_eq!(
+ second_text_directive
+ .start()
+ .as_ref()
+ .expect("There must be a `start` token.")
+ .value(),
+ "foo"
+ );
+ assert!(
+ second_text_directive.end().is_none(),
+ "There is no `end` token."
+ );
+ assert!(
+ second_text_directive.suffix().is_none(),
+ "There is no `suffix` token."
+ );
+ let third_text_directive = &text_directives[2];
+ assert!(
+ third_text_directive.prefix().is_none(),
+ "There is no `prefix` token."
+ );
+ assert_eq!(
+ third_text_directive
+ .start()
+ .as_ref()
+ .expect("There must be a `start` token.")
+ .value(),
+ "bar"
+ );
+ assert!(
+ third_text_directive.end().is_none(),
+ "There is no `end` token."
+ );
+ assert_eq!(
+ third_text_directive
+ .suffix()
+ .as_ref()
+ .expect("There must be a `suffix` token.")
+ .value(),
+ "suffix"
+ );
+ }
+
+ /// Multiple text directives should be parsed correctly
+ /// if they are surrounded or separated by unknown directives.
+ #[test]
+ fn test_parse_multiple_text_directives_with_unknown_directive_in_between() {
+ for url in [
+ "#:~:foo&text=start1&text=start2",
+ "#:~:text=start1&foo&text=start2",
+ "#:~:text=start1&text=start2&foo",
+ ] {
+ let (_, fragment_directive, text_directives) =
+ parse_fragment_directive_and_remove_it_from_hash(&url)
+ .expect("The parser must find a result.");
+ assert_eq!(
+ fragment_directive,
+ &url[4..],
+ "The extracted fragment directive string is unsanitized
+ and should contain the unknown directive."
+ );
+ assert_eq!(
+ text_directives.len(),
+ 2,
+ "There must be exactly two parsed text fragments."
+ );
+ let first_text_directive = &text_directives[0];
+ assert_eq!(
+ first_text_directive
+ .start()
+ .as_ref()
+ .expect("There must be a `start` token.")
+ .value(),
+ "start1"
+ );
+ let second_text_directive = &text_directives[1];
+ assert_eq!(
+ second_text_directive
+ .start()
+ .as_ref()
+ .expect("There must be a `start` token.")
+ .value(),
+ "start2"
+ );
+ }
+ }
+
+ /// Ensures that input that doesn't contain a text fragment does not produce a result.
+ /// This includes the use of partial identifying tokens necessary for a text fragment
+ /// (e.g. `:~:` without `text=`, `text=foo` without the `:~:` or multiple occurrences of `:~:`)
+ /// In these cases, the parser must return `None` to indicate that there are no valid text fragments.
+ #[test]
+ fn test_parse_invalid_or_unknown_fragment_directive() {
+ for url in [
+ "#foo",
+ "#foo:",
+ "#foo:~:",
+ "#foo:~:bar",
+ "text=prefix-,start",
+ "#:~:text=foo-,bar,-baz:~:text=foo",
+ ] {
+ let text_directives =
+ parse_fragment_directive_and_remove_it_from_hash(&url);
+ assert!(
+ text_directives.is_none(),
+ "The fragment `{}` does not contain a valid or known fragment directive.",
+ url
+ );
+ }
+ }
+
+ /// Ensures that ill-formed text directives (but valid fragment directives)
+ /// (starting correctly with `:~:text=`) are not parsed.
+ /// Instead `None` must be returned.
+ /// Test cases include invalid combinations of `prefix`/`suffix`es,
+ /// additional `,`s, too many `start`/`end` tokens, or empty text fragments.
+ #[test]
+ fn test_parse_invalid_text_fragments() {
+ for url in [
+ "#:~:text=start,start,start",
+ "#:~:text=prefix-,prefix-",
+ "#:~:text=prefix-,-suffix",
+ "#:~:text=prefix-,start,start,start",
+ "#:~:text=prefix-,start,start,start,-suffix",
+ "#:~:text=start,start,start,-suffix",
+ "#:~:text=prefix-,start,end,-suffix,foo",
+ "#:~:text=foo,prefix-,start",
+ "#:~:text=prefix-,,start,",
+ "#:~:text=,prefix,start",
+ "#:~:text=",
+ ] {
+ let text_directives =
+ parse_fragment_directive_and_remove_it_from_hash(&url);
+ assert!(
+ text_directives.is_none(),
+ "The fragment directive `{}` does not contain a valid text directive.",
+ url
+ );
+ }
+ }
+
+ /// Ensure that out of multiple text fragments only the invalid ones are ignored
+ /// while valid text fragments are still returned.
+ /// Since correct parsing of multiple text fragments as well as
+ /// several forms of invalid text fragments are already tested in
+ /// `test_parse_multiple_text_fragments` and `test_parse_invalid_text_fragments()`,
+ /// it should be enough to test this with only one fragment directive
+ /// that contains two text fragments, one of them being invalid.
+ #[test]
+ fn test_valid_and_invalid_text_directives() {
+ for url in [
+ "#:~:text=start&text=,foo,",
+ "#:~:text=foo,foo,foo&text=start",
+ ] {
+ let (_, fragment_directive, text_directives) =
+ parse_fragment_directive_and_remove_it_from_hash(&url)
+ .expect("The parser must find a result.");
+ assert_eq!(
+ fragment_directive,
+ &url[4..],
+ "The extracted fragment directive string is unsanitized
+ and should contain invalid text directives."
+ );
+ assert_eq!(
+ text_directives.len(),
+ 1,
+ "There must be exactly one parsed text fragment."
+ );
+ let text_directive = text_directives.first().unwrap();
+ assert_eq!(
+ text_directive
+ .start()
+ .as_ref()
+ .expect("There must be a `start` value.")
+ .value(),
+ "start",
+ "The `start` value of the text directive has the wrong value."
+ );
+ }
+ }
+
+ /// Ensures that a fragment directive that contains percent-encoded characters
+ /// is decoded correctly. This explicitly includes characters which are used
+ /// for identifying text fragments, i.e. `#`, `, `, `&`, `:`, `~` and `-`.
+ #[test]
+ fn test_parse_percent_encoding_tokens() {
+ let url = "#:~:text=prefix%26-,start%20and%2C,end%23,-%26suffix%2D";
+ let (_, fragment_directive, text_directives) =
+ parse_fragment_directive_and_remove_it_from_hash(&url)
+ .expect("The parser must find a result.");
+ assert_eq!(
+ fragment_directive,
+ &url[4..],
+ "The extracted fragment directive string is unsanitized
+ and should contain the original and percent-decoded string."
+ );
+ let text_directive = text_directives.first().unwrap();
+ assert_eq!(
+ text_directive
+ .prefix()
+ .as_ref()
+ .expect("There must be a prefix.")
+ .value(),
+ "prefix&",
+ ""
+ );
+ assert_eq!(
+ text_directive
+ .start()
+ .as_ref()
+ .expect("There must be a prefix.")
+ .value(),
+ "start and,",
+ ""
+ );
+ assert_eq!(
+ text_directive
+ .end()
+ .as_ref()
+ .expect("There must be a prefix.")
+ .value(),
+ "end#",
+ ""
+ );
+ assert_eq!(
+ text_directive
+ .suffix()
+ .as_ref()
+ .expect("There must be a prefix.")
+ .value(),
+ "&suffix-",
+ ""
+ );
+ }
+
+ /// Ensures that a text fragment is created correctly,
+ /// based on a given combination of tokens.
+ /// This includes all sorts of combinations of
+ /// `prefix`, `suffix`, `start` and `end`,
+ /// als well as values for these tokens which contain
+ /// characters that need to be encoded because they are
+ /// identifiers for text fragments
+ /// (#`, `, `, `&`, `:`, `~` and `-`).
+ #[test]
+ fn test_create_fragment_directive() {
+ for (text_directive, expected_fragment_directive) in [
+ (
+ TextDirective::from_parts(
+ String::new(),
+ String::from("start"),
+ String::new(),
+ String::new(),
+ )
+ .unwrap(),
+ ":~:text=start",
+ ),
+ (
+ TextDirective::from_parts(
+ String::new(),
+ String::from("start"),
+ String::from("end"),
+ String::new(),
+ )
+ .unwrap(),
+ ":~:text=start,end",
+ ),
+ (
+ TextDirective::from_parts(
+ String::from("prefix"),
+ String::from("start"),
+ String::from("end"),
+ String::new(),
+ )
+ .unwrap(),
+ ":~:text=prefix-,start,end",
+ ),
+ (
+ TextDirective::from_parts(
+ String::from("prefix"),
+ String::from("start"),
+ String::from("end"),
+ String::from("suffix"),
+ )
+ .unwrap(),
+ ":~:text=prefix-,start,end,-suffix",
+ ),
+ (
+ TextDirective::from_parts(
+ String::new(),
+ String::from("start"),
+ String::from("end"),
+ String::from("suffix"),
+ )
+ .unwrap(),
+ ":~:text=start,end,-suffix",
+ ),
+ (
+ TextDirective::from_parts(
+ String::from("prefix"),
+ String::from("start"),
+ String::new(),
+ String::from("suffix"),
+ )
+ .unwrap(),
+ ":~:text=prefix-,start,-suffix",
+ ),
+ (
+ TextDirective::from_parts(
+ String::from("prefix-"),
+ String::from("start and,"),
+ String::from("&end"),
+ String::from("#:~:suffix"),
+ )
+ .unwrap(),
+ ":~:text=prefix%2D-,start%20and%2C,%26end,-%23%3A%7E%3Asuffix",
+ ),
+ ] {
+ let fragment_directive = create_fragment_directive_string(&vec![text_directive])
+ .expect("The given input must produce a valid fragment directive.");
+ assert_eq!(fragment_directive, expected_fragment_directive);
+ }
+ }
+
+ /// Ensures that a fragment directive is created correctly if multiple text fragments are given.
+ /// The resulting fragment must start with `:~:`
+ /// and each text fragment must be separated using `&text=`.
+ #[test]
+ fn test_create_fragment_directive_from_multiple_text_directives() {
+ let text_directives = vec![
+ TextDirective::from_parts(
+ String::new(),
+ String::from("start1"),
+ String::new(),
+ String::new(),
+ )
+ .unwrap(),
+ TextDirective::from_parts(
+ String::new(),
+ String::from("start2"),
+ String::new(),
+ String::new(),
+ )
+ .unwrap(),
+ TextDirective::from_parts(
+ String::new(),
+ String::from("start3"),
+ String::new(),
+ String::new(),
+ )
+ .unwrap(),
+ ];
+ let fragment_directive = create_fragment_directive_string(&text_directives)
+ .expect("The given input must produce a valid fragment directive.");
+ assert_eq!(
+ fragment_directive, ":~:text=start1&text=start2&text=start3",
+ "The created fragment directive is wrong for multiple fragments."
+ );
+ }
+}