diff options
Diffstat (limited to 'vendor/pulldown-cmark/tests/lib.rs')
-rw-r--r-- | vendor/pulldown-cmark/tests/lib.rs | 421 |
1 files changed, 421 insertions, 0 deletions
diff --git a/vendor/pulldown-cmark/tests/lib.rs b/vendor/pulldown-cmark/tests/lib.rs new file mode 100644 index 000000000..45bf7f719 --- /dev/null +++ b/vendor/pulldown-cmark/tests/lib.rs @@ -0,0 +1,421 @@ +use html5ever::serialize::{serialize, SerializeOpts}; +use html5ever::{driver as html, local_name, namespace_url, ns, QualName}; +use markup5ever_rcdom::{Handle, NodeData, RcDom, SerializableHandle}; +use pulldown_cmark::{Options, Parser}; + +use regex::Regex; +use std::collections::HashSet; +use std::mem; +use std::rc::{Rc, Weak}; +use tendril::stream::TendrilSink; + +mod suite; + +#[inline(never)] +pub fn test_markdown_html(input: &str, output: &str, smart_punct: bool) { + let mut s = String::new(); + + let mut opts = Options::empty(); + opts.insert(Options::ENABLE_TABLES); + opts.insert(Options::ENABLE_FOOTNOTES); + opts.insert(Options::ENABLE_STRIKETHROUGH); + opts.insert(Options::ENABLE_TASKLISTS); + if smart_punct { + opts.insert(Options::ENABLE_SMART_PUNCTUATION); + } + opts.insert(Options::ENABLE_HEADING_ATTRIBUTES); + + let p = Parser::new_ext(input, opts); + pulldown_cmark::html::push_html(&mut s, p); + + assert_eq!(normalize_html(output), normalize_html(&s)); +} + +lazy_static::lazy_static! { + static ref WHITESPACE_RE: Regex = Regex::new(r"\s+").unwrap(); + static ref LEADING_WHITESPACE_RE: Regex = Regex::new(r"\A\s+").unwrap(); + static ref TRAILING_WHITESPACE_RE: Regex = Regex::new(r"\s+\z").unwrap(); + static ref BLOCK_TAGS: HashSet<&'static str> = [ + "article", + "header", + "aside", + "hgroup", + "blockquote", + "hr", + "iframe", + "body", + "li", + "map", + "button", + "object", + "canvas", + "ol", + "caption", + "output", + "col", + "p", + "colgroup", + "pre", + "dd", + "progress", + "div", + "section", + "dl", + "table", + "td", + "dt", + "tbody", + "embed", + "textarea", + "fieldset", + "tfoot", + "figcaption", + "th", + "figure", + "thead", + "footer", + "tr", + "form", + "ul", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "video", + "script", + "style" + ] + .iter() + .cloned() + .collect(); + static ref WHITESPACE_SENSITIVE_TAGS: HashSet<&'static str> = + ["pre", "code", "h1", "h2", "h3", "h4", "h5", "h6"] + .iter() + .cloned() + .collect(); + static ref TABLE_TAGS: HashSet<&'static str> = ["table", "thead", "tbody", "tr", "td"] + .iter() + .cloned() + .collect(); +} + +fn make_html_parser() -> html::Parser<RcDom> { + html::parse_fragment( + RcDom::default(), + html::ParseOpts::default(), + QualName::new(None, ns!(html), local_name!("div")), + vec![], + ) +} + +fn normalize_html(s: &str) -> String { + let parser = make_html_parser(); + let dom = parser.one(s); + let body: SerializableHandle = normalize_dom(&dom).into(); + let opts = SerializeOpts::default(); + let mut ret_val = Vec::new(); + serialize(&mut ret_val, &body, opts) + .expect("Writing to a string shouldn't fail (expect on OOM)"); + String::from_utf8(ret_val).expect("html5ever should always produce UTF8") +} + +fn normalize_dom(dom: &RcDom) -> Handle { + let body = { + let children = dom.document.children.borrow(); + children[0].clone() + }; + let mut current_level = Vec::new(); + let mut next_level = Vec::new(); + current_level.extend(body.children.borrow().iter().cloned().rev()); + loop { + while let Some(mut node) = current_level.pop() { + let parent = node.parent.replace(None); + node.parent.replace(parent.clone()); + let parent = parent + .expect("a node in the DOM will have a parent, except the root, which is not processed") + .upgrade().expect("a node's parent will be pointed to by its parent (or the root pointer), and will not be dropped"); + let retain = normalize_node(&parent, &mut node); + if !retain { + let mut siblings = parent.children.borrow_mut(); + siblings.retain(|s| !Rc::ptr_eq(&node, s)); + } else { + next_level.extend(node.children.borrow().iter().cloned().rev()); + } + } + if next_level.is_empty() { + break; + }; + mem::swap(&mut next_level, &mut current_level); + } + body +} + +// Returns false if node is an empty text node or an empty tbody. +// Returns true otherwise. +fn normalize_node(parent: &Handle, node: &mut Handle) -> bool { + match node.data { + NodeData::Comment { .. } + | NodeData::Doctype { .. } + | NodeData::Document + | NodeData::ProcessingInstruction { .. } => true, + NodeData::Text { ref contents, .. } => { + let mut contents = contents.borrow_mut(); + let is_pre = { + let mut parent = parent.clone(); + loop { + let is_pre = if let NodeData::Element { ref name, .. } = parent.data { + WHITESPACE_SENSITIVE_TAGS.contains(&&*name.local.to_ascii_lowercase()) + } else { + false + }; + if is_pre { + break true; + }; + let parent_ = parent.parent.replace(None); + parent.parent.replace(parent_.clone()); + let parent_ = parent_.as_ref().and_then(Weak::upgrade); + if let Some(parent_) = parent_ { + parent = parent_ + } else { + break false; + }; + } + }; + if !is_pre { + let (is_first_in_block, is_last_in_block) = { + let mut is_first_in_block = true; + let mut is_last_in_block = true; + let mut parent = parent.clone(); + let mut node = node.clone(); + loop { + let reached_block = if let NodeData::Element { ref name, .. } = parent.data + { + BLOCK_TAGS.contains(&&*name.local.to_ascii_lowercase()) + } else { + false + }; + let (is_first, is_last) = { + let siblings = parent.children.borrow(); + let n = &node; + ( + siblings.get(0).map(|s| Rc::ptr_eq(s, n)).unwrap_or(false), + siblings.len() > 0 + && siblings + .get(siblings.len() - 1) + .map(|s| Rc::ptr_eq(s, n)) + .unwrap_or(false), + ) + }; + is_first_in_block = is_first_in_block && is_first; + is_last_in_block = is_last_in_block && is_last; + if (is_first_in_block || is_last_in_block) && !reached_block { + node = parent.clone(); + let parent_ = parent.parent.replace(None); + parent.parent.replace(parent_.clone()); + let parent_ = parent_.as_ref().and_then(Weak::upgrade); + if let Some(parent_) = parent_ { + parent = parent_; + } else { + break (is_first_in_block, is_last_in_block); + } + } else { + break (is_first_in_block, is_last_in_block); + } + } + }; + let is_preceeded_by_ws = { + let mut parent = parent.clone(); + let mut node = node.clone(); + 'ascent: loop { + let is_first = { + let siblings = parent.children.borrow(); + let n = &node; + siblings.get(0).map(|s| Rc::ptr_eq(s, n)).unwrap_or(false) + }; + if is_first { + node = parent.clone(); + let parent_ = parent.parent.replace(None); + parent.parent.replace(parent_.clone()); + let parent_ = parent_.as_ref().and_then(Weak::upgrade); + if let Some(parent_) = parent_ { + parent = parent_; + } else { + break 'ascent false; + } + } else { + let siblings = parent.children.borrow(); + let n = &node; + let mut pos = !0; + 'search: for (i, s) in siblings.iter().enumerate() { + if Rc::ptr_eq(s, n) { + pos = i; + break 'search; + } + } + assert!( + pos != !0, + "The list of node's parent's children shall contain node" + ); + assert!( + pos != 0, + "If node is not first, then node's position shall not be zero" + ); + let mut preceding = siblings[pos - 1].clone(); + 'descent: loop { + if let NodeData::Text { .. } = preceding.data { + break 'descent; + } + preceding = { + let ch = preceding.children.borrow(); + if ch.len() == 0 { + break 'descent; + } + if let Some(preceeding_) = ch.get(ch.len() - 1) { + preceeding_.clone() + } else { + break 'descent; + } + }; + } + if let NodeData::Text { ref contents, .. } = preceding.data { + break 'ascent TRAILING_WHITESPACE_RE.is_match(&*contents.borrow()); + } else { + break 'ascent false; + } + } + } + }; + + let is_in_table = if let NodeData::Element { ref name, .. } = parent.data { + TABLE_TAGS.contains(&&*name.local.to_ascii_lowercase()) + } else { + false + }; + let whitespace_replacement = if is_in_table { "" } else { " " }; + *contents = WHITESPACE_RE + .replace_all(&*contents, whitespace_replacement) + .as_ref() + .into(); + + if is_first_in_block || is_preceeded_by_ws { + *contents = LEADING_WHITESPACE_RE + .replace_all(&*contents, "") + .as_ref() + .into(); + } + if is_last_in_block { + *contents = TRAILING_WHITESPACE_RE + .replace_all(&*contents, "") + .as_ref() + .into(); + } + // TODO: collapse whitespace when adjacent to whitespace. + // For example, the whitespace in the span should be collapsed in all of these cases: + // + // " <span> q </span> " + // "<b>q </b><span> q</span>" + // "<b>q <i></i></b><span> q</span>" + // "<b>q <i></i></b><span> q</span>" + // "q <b></b><span> q</span>" + } + &**contents != "" + } + NodeData::Element { + ref attrs, + ref name, + .. + } => { + let mut attrs = attrs.borrow_mut(); + for a in attrs.iter_mut() { + a.name.local = a.name.local.to_ascii_lowercase().into(); + } + attrs.sort_by(|a: &html5ever::Attribute, b: &html5ever::Attribute| { + (&*a.name.local).cmp(&*b.name.local) + }); + let ascii_name = &*name.local.to_ascii_lowercase(); + // drop empty tbody's + ascii_name != "tbody" + || node.children.borrow().len() > 1 + || node + .children + .borrow() + .iter() + .next() + .map(|only_child| match only_child.data { + NodeData::Text { ref contents, .. } => { + !contents.borrow().chars().all(|c| c.is_whitespace()) + } + _ => true, + }) + .unwrap_or(false) + } + } +} + +#[test] +fn strip_div_newline() { + assert_eq!("<div></div>", normalize_html("<div>\n</div>")); +} + +#[test] +fn strip_end_newline() { + assert_eq!("test", normalize_html("test\n")); +} + +#[test] +fn strip_double_space() { + assert_eq!("test mess", normalize_html("test mess")); +} + +#[test] +fn strip_inline_internal_text() { + assert_eq!( + "<u>a </u>b <u>c</u>", + normalize_html("<u> a </u> b <u> c </u>") + ) +} + +#[test] +fn strip_inline_block_internal_text() { + assert_eq!( + "<u>a </u>b <u>c</u>", + normalize_html(" <u> a </u> b <u> c </u> ") + ) +} + +#[test] +fn leaves_necessary_whitespace_alone() { + assert_eq!("<u>a</u> b <u>c</u>", normalize_html("<u>a</u> b <u>c</u>")) +} + +#[test] +fn leaves_necessary_whitespace_alone_weird() { + assert_eq!( + "<u>a </u>b <u>c</u>", + normalize_html(" <u>a </u>b <u>c</u>") + ) +} + +#[test] +fn leaves_necessary_whitespace_all_nested() { + assert_eq!( + "<u></u><u></u><u></u><u></u>", + normalize_html("<u> </u><u> </u><u> </u><u> </u>") + ) +} + +#[test] +fn drops_empty_tbody() { + assert_eq!( + "<table><thead><tr><td>hi</td></tr></thead></table>", + normalize_html("<table><thead><tr><td>hi</td></tr></thead><tbody> </tbody></table>") + ) +} + +#[test] +fn leaves_nonempty_tbody() { + let input = "<table><thead><tr><td>hi</td></tr></thead><tbody><tr></tr></tbody></table>"; + assert_eq!(input, normalize_html(input)) +} |