path: root/testing/web-platform/tests/css/tools/w3ctestlib/
diff options
Diffstat (limited to 'testing/web-platform/tests/css/tools/w3ctestlib/')
1 files changed, 277 insertions, 0 deletions
diff --git a/testing/web-platform/tests/css/tools/w3ctestlib/ b/testing/web-platform/tests/css/tools/w3ctestlib/
new file mode 100644
index 0000000000..7f73bc17ec
--- /dev/null
+++ b/testing/web-platform/tests/css/tools/w3ctestlib/
@@ -0,0 +1,277 @@
+# CSS Test Source Manipulation Library
+# Initial code by fantasai, joint copyright 2010 W3C and Microsoft
+# additions by copyright 2013 Hewlett-Packard
+# Licensed under BSD 3-Clause: <>
+import lxml
+from lxml import etree
+import htmlentitydefs
+import copy
+class HTMLSerializer(object):
+ gXMLns = ''
+ gHTMLns = ''
+ gDefaultNamespaces = {'': 'xmlns',
+ '': 'xmlns',
+ '': 'xlink'}
+ gVoidElements = frozenset((
+ 'base',
+ 'command',
+ 'event-source',
+ 'link',
+ 'meta',
+ 'hr',
+ 'br',
+ 'img',
+ 'embed',
+ 'param',
+ 'area',
+ 'col',
+ 'input',
+ 'source'
+ ))
+ gCDataElements = frozenset((
+ 'style',
+ 'script'
+ ))
+ gInvisibleChars = frozenset(
+ # ASCII control chars
+ range(0x0, 0x9) + range(0xB, 0xD) + range(0xE, 0x20) +
+ # Other control chars
+ # fixed-width spaces, zero-width marks, bidi marks
+ range(0x2000, 0x2010) +
+ # LS, PS, bidi control codes
+ range(0x2028, 0x2030) +
+ # nbsp, mathsp, ideosp, WJ, interlinear
+ [0x00A0, 0x205F, 0x3000, 0x2060, 0xFFF9, 0xFFFA, 0xFFFB]
+ )
+ gXMLEscapes = frozenset(gInvisibleChars |
+ frozenset((ord('&'), ord('<'), ord('>'))))
+ gXMLEntityNames = {'"': 'quot', '&': 'amp', "'": 'apos', '<': 'lt', '>': 'gt'}
+ gDocTypes = {
+ 'html': '<!DOCTYPE html>',
+ 'html4':
+ '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "">',
+ 'html4-transitional':
+ '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "">',
+ 'html4-frameset':
+ '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "">',
+ 'svg11':
+ '<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1 Basic//EN" "">',
+ 'svg11-tiny':
+ '<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1 Tiny//EN" "">',
+ 'xhtml10':
+ '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "">',
+ 'xhtml10-transitional':
+ '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "">',
+ 'xhtml10-frameset':
+ '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" "">',
+ 'xhtml11':
+ '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "">',
+ 'xhtml-basic11':
+ '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML Basic 1.1//EN" "">'
+ }
+ def __init__(self):
+ self._reset()
+ def _reset(self, xhtml = False):
+ self.mOutput = u''
+ self.mXHTML = xhtml
+ def _output(self, *args):
+ for arg in args:
+ self.mOutput += unicode(arg)
+ def _escape(self, text, escapeChars):
+ # This algorithm is O(MN) for M len(text) and N num escapable
+ # But it doesn't modify the text when N is zero (common case) and
+ # N is expected to be small (usually 1 or 2) in most other cases.
+ escapable = set()
+ for char in text:
+ if ord(char) in escapeChars:
+ escapable.add(char)
+ for char in escapable:
+ if (self.mXHTML):
+ name = self.gXMLEntityNames.get(char)
+ else:
+ name = htmlentitydefs.codepoint2name.get(ord(char))
+ escape = u'&%s;' % name if name else u'&#x%X;' % ord(char)
+ text = text.replace(char, escape)
+ return text
+ def _escapeXML(self, text):
+ return self._escape(text, self.gXMLEscapes)
+ def _escapeInvisible(self, text):
+ return self._escape(text, self.gInvisibleChars)
+ def _serializeElement(self, element, namespacePrefixes):
+ qName = etree.QName(element)
+ attrs = element.attrib.items() # in tree order
+ if (not namespacePrefixes):
+ namespacePrefixes = self.gDefaultNamespaces
+ if (self.mXHTML):
+ namespacePrefixes = copy.copy(namespacePrefixes)
+ for attr, value in attrs:
+ attrQName = etree.QName(attr)
+ if (self.gXMLns == attrQName.namespace):
+ namespacePrefixes[value] = attrQName.localname
+ elif ('xmlns' == attrQName.localname):
+ namespacePrefixes[value] = ''
+ if (self.mXHTML and qName.namespace and namespacePrefixes[qName.namespace]):
+ self._output('<', namespacePrefixes[qName.namespace], ':', qName.localname)
+ else:
+ self._output('<', qName.localname)
+ for attr, value in attrs:
+ attrQName = etree.QName(attr)
+ if ((attrQName.namespace == self.gXMLns) and ('lang' == attrQName.localname)):
+ if (self.mXHTML):
+ attr = 'xml:lang'
+ else:
+ attr = 'lang'
+ elif (attrQName.namespace and namespacePrefixes[attrQName.namespace]):
+ attr = namespacePrefixes[attrQName.namespace] + ':' + attrQName.localname
+ else:
+ attr = attrQName.localname
+ self._output(' ', attr, '=')
+ value = value.replace('&', '&amp;')
+ if (self.mXHTML):
+ value = value.replace('<', '&lt;')
+ if (('"' in value) and ("'" not in value)):
+ self._output("'", self._escapeInvisible(value), "'")
+ else:
+ self._output('"', self._escapeInvisible(value.replace('"', '&quot;')), '"')
+ if ((qName.namespace == self.gHTMLns) and (qName.localname in self.gVoidElements)):
+ if (self.mXHTML):
+ self._output(' />')
+ else:
+ self._output('>')
+ else:
+ self._output('>')
+ if (None != element.text):
+ if ((qName.namespace == self.gHTMLns) and (qName.localname in self.gCDataElements)):
+ if (self.mXHTML):
+ self._output(self._escapeXML(element.text)) # or self._output('<![CDATA[', element.text, ']]>')
+ else:
+ self._output(element.text)
+ else:
+ self._output(self._escapeXML(element.text))
+ for child in list(element):
+ self._serializeNode(child, namespacePrefixes)
+ self._output('</', qName.localname, '>')
+ if (None != element.tail):
+ self._output(self._escapeXML(element.tail))
+ def _serializeEntity(self, entity):
+ self._output(entity.text)
+ if (None != entity.tail):
+ self._output(self._escapeXML(entity.tail))
+ def _serializePI(self, pi):
+ if (self.mXHTML):
+ self._output('<?',, ' ', pi.text, '?>')
+ else:
+ raise Exception("Processing Instructions can't be converted to HTML")
+ if (None != pi.tail):
+ self._output(self._escapeXML(pi.tail))
+ def _serializeComment(self, comment):
+ self._output('<!--', comment.text, '-->') # XXX escape comment?
+ if (None != comment.tail):
+ self._output(self._escapeXML(comment.tail))
+ def _serializeNode(self, node, namespacePrefixes = None):
+ if (isinstance(node, etree._Entity)):
+ self._serializeEntity(node)
+ elif (isinstance(node, etree._ProcessingInstruction)):
+ self._serializePI(node)
+ elif (isinstance(node, etree._Comment)):
+ self._serializeComment(node)
+ else:
+ self._serializeElement(node, namespacePrefixes)
+ def _serializeTree(self, tree):
+ root = tree.getroot()
+ preceding = [node for node in root.itersiblings(preceding = True)]
+ preceding.reverse()
+ for node in preceding:
+ self._serializeNode(node)
+ self._serializeNode(root)
+ for node in root.itersiblings():
+ self._serializeNode(node)
+ def _serializeDoctype(self, tree, doctype, default):
+ if (doctype):
+ self._output(self.gDocTypes[doctype], '\n')
+ else:
+ if (hasattr(tree, 'docinfo') and tree.docinfo and tree.docinfo.doctype):
+ doctypeSearch = tree.docinfo.doctype.lower()
+ for doctype in self.gDocTypes:
+ if (self.gDocTypes[doctype].lower() == doctypeSearch):
+ break
+ else:
+ doctype = None
+ if (self.mXHTML):
+ if ('html' == doctype):
+ doctype = 'xhtml10'
+ elif ('html4' == doctype):
+ doctype = 'xhtml10'
+ elif ('html4-transitional' == doctype):
+ doctype = 'xhtml10-transitional'
+ elif ('html4-frameset' == doctype):
+ doctype = 'xhtml10-frameset'
+ else:
+ if ('xhtml10' == doctype):
+ doctype = 'html4'
+ elif ('xhtml10-transitional' == doctype):
+ doctype = 'html4-transitional'
+ elif ('xhtml10-frameset' == doctype):
+ doctype = 'html4-frameset'
+ elif ('xhtml11' == doctype):
+ doctype = 'html4'
+ if (doctype):
+ self._output(self.gDocTypes[doctype], '\n')
+ else:
+ self._output(tree.docinfo.doctype, '\n')
+ else:
+ self._output(self.gDocTypes[default], '\n')
+ def serializeHTML(self, tree, doctype = None):
+ self._reset()
+ self._serializeDoctype(tree, doctype, 'html')
+ self._serializeTree(tree)
+ return self.mOutput
+ def serializeXHTML(self, tree, doctype = None):
+ self._reset(True)
+ # XXX '<!xml ...' ??
+ self._serializeDoctype(tree, doctype, 'xhtml11')
+ self._serializeTree(tree)
+ return self.mOutput