// Copyright 2014-2017 The html5ever Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. extern crate html5ever; extern crate typed_arena; use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; use html5ever::tendril::{StrTendril, TendrilSink}; use html5ever::{parse_document, Attribute, ExpandedName, QualName}; use std::borrow::Cow; use std::cell::{Cell, RefCell}; use std::collections::HashSet; use std::io::{self, Read}; use std::ptr; fn main() { let mut bytes = Vec::new(); io::stdin().read_to_end(&mut bytes).unwrap(); let arena = typed_arena::Arena::new(); html5ever_parse_slice_into_arena(&bytes, &arena); } fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> { let sink = Sink { arena: arena, document: arena.alloc(Node::new(NodeData::Document)), quirks_mode: QuirksMode::NoQuirks, }; parse_document(sink, Default::default()) .from_utf8() .one(bytes) } type Arena<'arena> = &'arena typed_arena::Arena>; type Ref<'arena> = &'arena Node<'arena>; type Link<'arena> = Cell>>; struct Sink<'arena> { arena: Arena<'arena>, document: Ref<'arena>, quirks_mode: QuirksMode, } pub struct Node<'arena> { parent: Link<'arena>, next_sibling: Link<'arena>, previous_sibling: Link<'arena>, first_child: Link<'arena>, last_child: Link<'arena>, data: NodeData<'arena>, } pub enum NodeData<'arena> { Document, Doctype { name: StrTendril, public_id: StrTendril, system_id: StrTendril, }, Text { contents: RefCell, }, Comment { contents: StrTendril, }, Element { name: QualName, attrs: RefCell>, template_contents: Option>, mathml_annotation_xml_integration_point: bool, }, ProcessingInstruction { target: StrTendril, contents: StrTendril, }, } impl<'arena> Node<'arena> { fn new(data: NodeData<'arena>) -> Self { Node { parent: Cell::new(None), previous_sibling: Cell::new(None), next_sibling: Cell::new(None), first_child: Cell::new(None), last_child: Cell::new(None), data: data, } } fn detach(&self) { let parent = self.parent.take(); let previous_sibling = self.previous_sibling.take(); let next_sibling = self.next_sibling.take(); if let Some(next_sibling) = next_sibling { next_sibling.previous_sibling.set(previous_sibling); } else if let Some(parent) = parent { parent.last_child.set(previous_sibling); } if let Some(previous_sibling) = previous_sibling { previous_sibling.next_sibling.set(next_sibling); } else if let Some(parent) = parent { parent.first_child.set(next_sibling); } } fn append(&'arena self, new_child: &'arena Self) { new_child.detach(); new_child.parent.set(Some(self)); if let Some(last_child) = self.last_child.take() { new_child.previous_sibling.set(Some(last_child)); debug_assert!(last_child.next_sibling.get().is_none()); last_child.next_sibling.set(Some(new_child)); } else { debug_assert!(self.first_child.get().is_none()); self.first_child.set(Some(new_child)); } self.last_child.set(Some(new_child)); } fn insert_before(&'arena self, new_sibling: &'arena Self) { new_sibling.detach(); new_sibling.parent.set(self.parent.get()); new_sibling.next_sibling.set(Some(self)); if let Some(previous_sibling) = self.previous_sibling.take() { new_sibling.previous_sibling.set(Some(previous_sibling)); debug_assert!(ptr::eq::( previous_sibling.next_sibling.get().unwrap(), self )); previous_sibling.next_sibling.set(Some(new_sibling)); } else if let Some(parent) = self.parent.get() { debug_assert!(ptr::eq::(parent.first_child.get().unwrap(), self)); parent.first_child.set(Some(new_sibling)); } self.previous_sibling.set(Some(new_sibling)); } } impl<'arena> Sink<'arena> { fn new_node(&self, data: NodeData<'arena>) -> Ref<'arena> { self.arena.alloc(Node::new(data)) } fn append_common(&self, child: NodeOrText>, previous: P, append: A) where P: FnOnce() -> Option>, A: FnOnce(Ref<'arena>), { let new_node = match child { NodeOrText::AppendText(text) => { // Append to an existing Text node if we have one. if let Some(&Node { data: NodeData::Text { ref contents }, .. }) = previous() { contents.borrow_mut().push_tendril(&text); return; } self.new_node(NodeData::Text { contents: RefCell::new(text), }) }, NodeOrText::AppendNode(node) => node, }; append(new_node) } } impl<'arena> TreeSink for Sink<'arena> { type Handle = Ref<'arena>; type Output = Ref<'arena>; fn finish(self) -> Ref<'arena> { self.document } fn parse_error(&mut self, _: Cow<'static, str>) {} fn get_document(&mut self) -> Ref<'arena> { self.document } fn set_quirks_mode(&mut self, mode: QuirksMode) { self.quirks_mode = mode; } fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool { ptr::eq::(*x, *y) } fn elem_name<'a>(&self, target: &'a Ref<'arena>) -> ExpandedName<'a> { match target.data { NodeData::Element { ref name, .. } => name.expanded(), _ => panic!("not an element!"), } } fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> { if let NodeData::Element { template_contents: Some(ref contents), .. } = target.data { contents } else { panic!("not a template element!") } } fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool { if let NodeData::Element { mathml_annotation_xml_integration_point, .. } = target.data { mathml_annotation_xml_integration_point } else { panic!("not an element!") } } fn create_element( &mut self, name: QualName, attrs: Vec, flags: ElementFlags, ) -> Ref<'arena> { self.new_node(NodeData::Element { name, attrs: RefCell::new(attrs), template_contents: if flags.template { Some(self.new_node(NodeData::Document)) } else { None }, mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point, }) } fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> { self.new_node(NodeData::Comment { contents: text }) } fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> { self.new_node(NodeData::ProcessingInstruction { target: target, contents: data, }) } fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText>) { self.append_common( child, || parent.last_child.get(), |new_node| parent.append(new_node), ) } fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText>) { self.append_common( child, || sibling.previous_sibling.get(), |new_node| sibling.insert_before(new_node), ) } fn append_based_on_parent_node( &mut self, element: &Ref<'arena>, prev_element: &Ref<'arena>, child: NodeOrText>, ) { if element.parent.get().is_some() { self.append_before_sibling(element, child) } else { self.append(prev_element, child) } } fn append_doctype_to_document( &mut self, name: StrTendril, public_id: StrTendril, system_id: StrTendril, ) { self.document.append(self.new_node(NodeData::Doctype { name, public_id, system_id, })) } fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec) { let mut existing = if let NodeData::Element { ref attrs, .. } = target.data { attrs.borrow_mut() } else { panic!("not an element") }; let existing_names = existing .iter() .map(|e| e.name.clone()) .collect::>(); existing.extend( attrs .into_iter() .filter(|attr| !existing_names.contains(&attr.name)), ); } fn remove_from_parent(&mut self, target: &Ref<'arena>) { target.detach() } fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) { let mut next_child = node.first_child.get(); while let Some(child) = next_child { debug_assert!(ptr::eq::(child.parent.get().unwrap(), *node)); next_child = child.next_sibling.get(); new_parent.append(child) } } }