summaryrefslogtreecommitdiffstats
path: root/testing/web-platform/tests/css/tools/w3ctestlib/Sources.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:22:09 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:22:09 +0000
commit43a97878ce14b72f0981164f87f2e35e14151312 (patch)
tree620249daf56c0258faa40cbdcf9cfba06de2a846 /testing/web-platform/tests/css/tools/w3ctestlib/Sources.py
parentInitial commit. (diff)
downloadfirefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz
firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'testing/web-platform/tests/css/tools/w3ctestlib/Sources.py')
-rw-r--r--testing/web-platform/tests/css/tools/w3ctestlib/Sources.py1473
1 files changed, 1473 insertions, 0 deletions
diff --git a/testing/web-platform/tests/css/tools/w3ctestlib/Sources.py b/testing/web-platform/tests/css/tools/w3ctestlib/Sources.py
new file mode 100644
index 0000000000..f3848030ba
--- /dev/null
+++ b/testing/web-platform/tests/css/tools/w3ctestlib/Sources.py
@@ -0,0 +1,1473 @@
+#!/usr/bin/python
+# CSS Test Source Manipulation Library
+# Initial code by fantasai, joint copyright 2010 W3C and Microsoft
+# Licensed under BSD 3-Clause: <http://www.w3.org/Consortium/Legal/2008/03-bsd-license>
+
+from __future__ import print_function
+from os.path import basename, exists, join
+import os
+import filecmp
+import shutil
+import re
+import codecs
+import collections
+from xml import dom
+import html5lib
+from html5lib import treebuilders
+from lxml import etree
+from lxml.etree import ParseError
+from Utils import getMimeFromExt, escapeToNamedASCII, basepath, isPathInsideBase, relativeURL, assetName
+import HTMLSerializer
+import warnings
+import hashlib
+
+class SourceTree(object):
+ """Class that manages structure of test repository source.
+ Temporarily hard-coded path and filename rules, this should be configurable.
+ """
+
+ def __init__(self, repository = None):
+ self.mTestExtensions = ['.xht', '.html', '.xhtml', '.htm', '.xml', '.svg']
+ self.mReferenceExtensions = ['.xht', '.html', '.xhtml', '.htm', '.xml', '.png', '.svg']
+ self.mRepository = repository
+
+ def _splitDirs(self, dir):
+ if ('' == dir):
+ pathList = []
+ elif ('/' in dir):
+ pathList = dir.split('/')
+ else:
+ pathList = dir.split(os.path.sep)
+ return pathList
+
+ def _splitPath(self, filePath):
+ """split a path into a list of directory names and the file name
+ paths may come form the os or mercurial, which always uses '/' as the
+ directory separator
+ """
+ dir, fileName = os.path.split(filePath.lower())
+ return (self._splitDirs(dir), fileName)
+
+ def isTracked(self, filePath):
+ pathList, fileName = self._splitPath(filePath)
+ return (not self._isIgnored(pathList, fileName))
+
+ def _isApprovedPath(self, pathList):
+ return ((1 < len(pathList)) and ('approved' == pathList[0]) and (('support' == pathList[1]) or ('src' in pathList)))
+
+ def isApprovedPath(self, filePath):
+ pathList, fileName = self._splitPath(filePath)
+ return (not self._isIgnored(pathList, fileName)) and self._isApprovedPath(pathList)
+
+ def _isIgnoredPath(self, pathList):
+ return (('.hg' in pathList) or ('.git' in pathList) or
+ ('.svn' in pathList) or ('cvs' in pathList) or
+ ('incoming' in pathList) or ('work-in-progress' in pathList) or
+ ('data' in pathList) or ('archive' in pathList) or
+ ('reports' in pathList) or ('tools' == pathList[0]) or
+ ('test-plan' in pathList) or ('test-plans' in pathList))
+
+ def _isIgnored(self, pathList, fileName):
+ if (pathList): # ignore files in root
+ return (self._isIgnoredPath(pathList) or
+ fileName.startswith('.directory') or ('lock' == fileName) or
+ ('.ds_store' == fileName) or
+ fileName.startswith('.hg') or fileName.startswith('.git') or
+ ('sections.dat' == fileName) or ('get-spec-sections.pl' == fileName))
+ return True
+
+ def isIgnored(self, filePath):
+ pathList, fileName = self._splitPath(filePath)
+ return self._isIgnored(pathList, fileName)
+
+ def isIgnoredDir(self, dir):
+ pathList = self._splitDirs(dir)
+ return self._isIgnoredPath(pathList)
+
+ def _isToolPath(self, pathList):
+ return ('tools' in pathList)
+
+ def _isTool(self, pathList, fileName):
+ return self._isToolPath(pathList)
+
+ def isTool(self, filePath):
+ pathList, fileName = self._splitPath(filePath)
+ return (not self._isIgnored(pathList, fileName)) and self._isTool(pathList, fileName)
+
+ def _isSupportPath(self, pathList):
+ return ('support' in pathList)
+
+ def _isSupport(self, pathList, fileName):
+ return (self._isSupportPath(pathList) or
+ ((not self._isTool(pathList, fileName)) and
+ (not self._isReference(pathList, fileName)) and
+ (not self._isTestCase(pathList, fileName))))
+
+ def isSupport(self, filePath):
+ pathList, fileName = self._splitPath(filePath)
+ return (not self._isIgnored(pathList, fileName)) and self._isSupport(pathList, fileName)
+
+ def _isReferencePath(self, pathList):
+ return (('reftest' in pathList) or ('reference' in pathList))
+
+ def _isReference(self, pathList, fileName):
+ if ((not self._isSupportPath(pathList)) and (not self._isToolPath(pathList))):
+ baseName, fileExt = os.path.splitext(fileName)[:2]
+ if (bool(re.search('(^ref-|^notref-).+', baseName)) or
+ bool(re.search('.+(-ref[0-9]*$|-notref[0-9]*$)', baseName)) or
+ ('-ref-' in baseName) or ('-notref-' in baseName)):
+ return (fileExt in self.mReferenceExtensions)
+ if (self._isReferencePath(pathList)):
+ return (fileExt in self.mReferenceExtensions)
+ return False
+
+ def isReference(self, filePath):
+ pathList, fileName = self._splitPath(filePath)
+ return (not self._isIgnored(pathList, fileName)) and self._isReference(pathList, fileName)
+
+ def isReferenceAnywhere(self, filePath):
+ pathList, fileName = self._splitPath(filePath)
+ return self._isReference(pathList, fileName)
+
+ def _isTestCase(self, pathList, fileName):
+ if ((not self._isToolPath(pathList)) and (not self._isSupportPath(pathList)) and (not self._isReference(pathList, fileName))):
+ fileExt = os.path.splitext(fileName)[1]
+ return (fileExt in self.mTestExtensions)
+ return False
+
+ def isTestCase(self, filePath):
+ pathList, fileName = self._splitPath(filePath)
+ return (not self._isIgnored(pathList, fileName)) and self._isTestCase(pathList, fileName)
+
+ def getAssetName(self, filePath):
+ pathList, fileName = self._splitPath(filePath)
+ if (self._isReference(pathList, fileName) or self._isTestCase(pathList, fileName)):
+ return assetName(fileName)
+ return fileName.lower() # support files keep full name
+
+ def getAssetType(self, filePath):
+ pathList, fileName = self._splitPath(filePath)
+ if (self._isReference(pathList, fileName)):
+ return intern('reference')
+ if (self._isTestCase(pathList, fileName)):
+ return intern('testcase')
+ if (self._isTool(pathList, fileName)):
+ return intern('tool')
+ return intern('support')
+
+
+class SourceCache:
+ """Cache for FileSource objects. Supports one FileSource object
+ per sourcepath.
+ """
+ def __init__(self, sourceTree):
+ self.__cache = {}
+ self.sourceTree = sourceTree
+
+ def generateSource(self, sourcepath, relpath, data = None):
+ """Return a FileSource or derivative based on the extensionMap.
+
+ Uses a cache to avoid creating more than one of the same object:
+ does not support creating two FileSources with the same sourcepath;
+ asserts if this is tried. (.htaccess files are not cached.)
+
+ Cache is bypassed if loading form a change context
+ """
+ if ((None == data) and self.__cache.has_key(sourcepath)):
+ source = self.__cache[sourcepath]
+ assert relpath == source.relpath
+ return source
+
+ if basename(sourcepath) == '.htaccess':
+ return ConfigSource(self.sourceTree, sourcepath, relpath, data)
+ mime = getMimeFromExt(sourcepath)
+ if (mime == 'application/xhtml+xml'):
+ source = XHTMLSource(self.sourceTree, sourcepath, relpath, data)
+ elif (mime == 'text/html'):
+ source = HTMLSource(self.sourceTree, sourcepath, relpath, data)
+ elif (mime == 'image/svg+xml'):
+ source = SVGSource(self.sourceTree, sourcepath, relpath, data)
+ elif (mime == 'application/xml'):
+ source = XMLSource(self.sourceTree, sourcepath, relpath, data)
+ else:
+ source = FileSource(self.sourceTree, sourcepath, relpath, mime, data)
+ if (None == data):
+ self.__cache[sourcepath] = source
+ return source
+
+class SourceSet:
+ """Set of FileSource objects. No two FileSources of the same type in the set may
+ have the same name (except .htaccess files, which are merged).
+ """
+ def __init__(self, sourceCache):
+ self.sourceCache = sourceCache
+ self.pathMap = {} # type/name -> source
+
+ def __len__(self):
+ return len(self.pathMap)
+
+ def _keyOf(self, source):
+ return source.type() + '/' + source.keyName()
+
+ def __contains__(self, source):
+ return self._keyOf(source) in self.pathMap
+
+
+ def iter(self):
+ """Iterate over FileSource objects in SourceSet.
+ """
+ return self.pathMap.itervalues()
+
+ def addSource(self, source, ui):
+ """Add FileSource `source`. Throws exception if we already have
+ a FileSource with the same path relpath but different contents.
+ (ConfigSources are exempt from this requirement.)
+ """
+ cachedSource = self.pathMap.get(self._keyOf(source))
+ if not cachedSource:
+ self.pathMap[self._keyOf(source)] = source
+ else:
+ if source != cachedSource:
+ if isinstance(source, ConfigSource):
+ cachedSource.append(source)
+ else:
+ ui.warn("File merge mismatch %s vs %s for %s\n" % \
+ (cachedSource.sourcepath, source.sourcepath, source.name()))
+
+ def add(self, sourcepath, relpath, ui):
+ """Generate and add FileSource from sourceCache. Return the resulting
+ FileSource.
+
+ Throws exception if we already have a FileSource with the same path
+ relpath but different contents.
+ """
+ source = self.sourceCache.generateSource(sourcepath, relpath)
+ self.addSource(source, ui)
+ return source
+
+ @staticmethod
+ def combine(a, b, ui):
+ """Merges a and b, and returns whichever one contains the merger (which
+ one is chosen based on merge efficiency). Can accept None as an argument.
+ """
+ if not (a and b):
+ return a or b
+ if len(a) < len(b):
+ return b.merge(a, ui)
+ return a.merge(b, ui)
+
+ def merge(self, other, ui):
+ """Merge sourceSet's contents into this SourceSet.
+
+ Throws a RuntimeError if there's a sourceCache mismatch.
+ Throws an Exception if two files with the same relpath mismatch.
+ Returns merge result (i.e. self)
+ """
+ if self.sourceCache is not other.sourceCache:
+ raise RuntimeError
+
+ for source in other.pathMap.itervalues():
+ self.addSource(source, ui)
+ return self
+
+ def adjustContentPaths(self, format):
+ for source in self.pathMap.itervalues():
+ source.adjustContentPaths(format)
+
+ def write(self, format):
+ """Write files out through OutputFormat `format`.
+ """
+ for source in self.pathMap.itervalues():
+ format.write(source)
+
+
+class StringReader(object):
+ """Wrapper around a string to give it a file-like api
+ """
+ def __init__(self, string):
+ self.mString = string
+ self.mIndex = 0
+
+ def read(self, maxSize = None):
+ if (self.mIndex < len(self.mString)):
+ if (maxSize and (0 < maxSize)):
+ slice = self.mString[self.mIndex:self.mIndex + maxSize]
+ self.mIndex += len(slice)
+ return slice
+ else:
+ self.mIndex = len(self.mString)
+ return self.mString
+ return ''
+
+
+class NamedDict(object):
+ def get(self, key):
+ if (key in self):
+ return self[key]
+ return None
+
+ def __eq__(self, other):
+ for key in self.__slots__:
+ if (self[key] != other[key]):
+ return False
+ return True
+
+ def __ne__(self, other):
+ for key in self.__slots__:
+ if (self[key] != other[key]):
+ return True
+ return False
+
+ def __len__(self):
+ return len(self.__slots__)
+
+ def __iter__(self):
+ return iter(self.__slots__)
+
+ def __contains__(self, key):
+ return (key in self.__slots__)
+
+ def copy(self):
+ clone = self.__class__()
+ for key in self.__slots__:
+ clone[key] = self[key]
+ return clone
+
+ def keys(self):
+ return self.__slots__
+
+ def has_key(self, key):
+ return (key in self)
+
+ def items(self):
+ return [(key, self[key]) for key in self.__slots__]
+
+ def iteritems(self):
+ return iter(self.items())
+
+ def iterkeys(self):
+ return self.__iter__()
+
+ def itervalues(self):
+ return iter(self.items())
+
+ def __str__(self):
+ return '{ ' + ', '.join([key + ': ' + str(self[key]) for key in self.__slots__]) + ' }'
+
+
+class Metadata(NamedDict):
+ __slots__ = ('name', 'title', 'asserts', 'credits', 'reviewers', 'flags', 'links', 'references', 'revision', 'selftest', 'scripttest')
+
+ def __init__(self, name = None, title = None, asserts = [], credits = [], reviewers = [], flags = [], links = [],
+ references = [], revision = None, selftest = True, scripttest = False):
+ self.name = name
+ self.title = title
+ self.asserts = asserts
+ self.credits = credits
+ self.reviewers = reviewers
+ self.flags = flags
+ self.links = links
+ self.references = references
+ self.revision = revision
+ self.selftest = selftest
+ self.scripttest = scripttest
+
+ def __getitem__(self, key):
+ if ('name' == key):
+ return self.name
+ if ('title' == key):
+ return self.title
+ if ('asserts' == key):
+ return self.asserts
+ if ('credits' == key):
+ return self.credits
+ if ('reviewers' == key):
+ return self.reviewers
+ if ('flags' == key):
+ return self.flags
+ if ('links' == key):
+ return self.links
+ if ('references' == key):
+ return self.references
+ if ('revision' == key):
+ return self.revision
+ if ('selftest' == key):
+ return self.selftest
+ if ('scripttest' == key):
+ return self.scripttest
+ return None
+
+ def __setitem__(self, key, value):
+ if ('name' == key):
+ self.name = value
+ elif ('title' == key):
+ self.title = value
+ elif ('asserts' == key):
+ self.asserts = value
+ elif ('credits' == key):
+ self.credits = value
+ elif ('reviewers' == key):
+ self.reviewers = value
+ elif ('flags' == key):
+ self.flags = value
+ elif ('links' == key):
+ self.links = value
+ elif ('references' == key):
+ self.references = value
+ elif ('revision' == key):
+ self.revision = value
+ elif ('selftest' == key):
+ self.selftest = value
+ elif ('scripttest' == key):
+ self.scripttest = value
+ else:
+ raise KeyError()
+
+
+class ReferenceData(NamedDict):
+ __slots__ = ('name', 'type', 'relpath', 'repopath')
+
+ def __init__(self, name = None, type = None, relpath = None, repopath = None):
+ self.name = name
+ self.type = type
+ self.relpath = relpath
+ self.repopath = repopath
+
+ def __getitem__(self, key):
+ if ('name' == key):
+ return self.name
+ if ('type' == key):
+ return self.type
+ if ('relpath' == key):
+ return self.relpath
+ if ('repopath' == key):
+ return self.repopath
+ return None
+
+ def __setitem__(self, key, value):
+ if ('name' == key):
+ self.name = value
+ elif ('type' == key):
+ self.type = value
+ elif ('relpath' == key):
+ self.relpath = value
+ elif ('repopath' == key):
+ self.repopath = value
+ else:
+ raise KeyError()
+
+UserData = collections.namedtuple('UserData', ('name', 'link'))
+
+class LineString(str):
+ def __new__(cls, value, line):
+ self = str.__new__(cls, value)
+ self.line = line
+ return self
+
+ def lineValue(self):
+ return 'Line ' + str(self.line) + ': ' + str.__str__(self) if (self.line) else str.__str__(self)
+
+
+class FileSource:
+ """Object representing a file. Two FileSources are equal if they represent
+ the same file contents. It is recommended to use a SourceCache to generate
+ FileSources.
+ """
+
+ def __init__(self, sourceTree, sourcepath, relpath, mimetype = None, data = None):
+ """Init FileSource from source path. Give it relative path relpath.
+
+ `mimetype` should be the canonical MIME type for the file, if known.
+ If `mimetype` is None, guess type from file extension, defaulting to
+ the None key's value in extensionMap.
+
+ `data` if provided, is a the contents of the file. Otherwise the file is read
+ from disk.
+ """
+ self.sourceTree = sourceTree
+ self.sourcepath = sourcepath
+ self.relpath = relpath
+ self.mimetype = mimetype or getMimeFromExt(sourcepath)
+ self._data = data
+ self.errors = None
+ self.encoding = 'utf-8'
+ self.refs = {}
+ self.scripts = {}
+ self.metadata = None
+ self.metaSource = None
+
+ def __eq__(self, other):
+ if not isinstance(other, FileSource):
+ return False
+ return self.sourcepath == other.sourcepath or \
+ filecmp.cmp(self.sourcepath, other.sourcepath)
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __cmp__(self, other):
+ return cmp(self.name(), other.name())
+
+ def name(self):
+ return self.sourceTree.getAssetName(self.sourcepath)
+
+ def keyName(self):
+ if ('support' == self.type()):
+ return os.path.relpath(self.relpath, 'support')
+ return self.name()
+
+ def type(self):
+ return self.sourceTree.getAssetType(self.sourcepath)
+
+ def relativeURL(self, other):
+ return relativeURL(self.relpath, other.relpath)
+
+ def data(self):
+ """Return file contents as a byte string."""
+ if (self._data is None):
+ with open(self.sourcepath, 'r') as f:
+ self._data = f.read()
+ if (self._data.startswith(codecs.BOM_UTF8)):
+ self.encoding = 'utf-8-sig' # XXX look for other unicode BOMs
+ return self._data
+
+ def unicode(self):
+ try:
+ return self.data().decode(self.encoding)
+ except UnicodeDecodeError:
+ return None
+
+ def parse(self):
+ """Parses and validates FileSource data from sourcepath."""
+ self.loadMetadata()
+
+ def validate(self):
+ """Ensure data is loaded from sourcepath."""
+ self.parse()
+
+ def adjustContentPaths(self, format):
+ """Adjust any paths in file content for output format
+ XXX need to account for group paths"""
+ if (self.refs):
+ seenRefs = {}
+ seenRefs[self.sourcepath] = '=='
+ def adjustReferences(source):
+ newRefs = {}
+ for refName in source.refs:
+ refType, refPath, refNode, refSource = source.refs[refName]
+ if refSource:
+ refPath = relativeURL(format.dest(self.relpath), format.dest(refSource.relpath))
+ if (refSource.sourcepath not in seenRefs):
+ seenRefs[refSource.sourcepath] = refType
+ adjustReferences(refSource)
+ else:
+ refPath = relativeURL(format.dest(self.relpath), format.dest(refPath))
+ if (refPath != refNode.get('href')):
+ refNode.set('href', refPath)
+ newRefs[refName] = (refType, refPath, refNode, refSource) # update path in metadata
+ source.refs = newRefs
+ adjustReferences(self)
+
+ if (self.scripts): # force testharness.js scripts to absolute path
+ for src in self.scripts:
+ if (src.endswith('/resources/testharness.js')): # accept relative paths to testharness.js
+ scriptNode = self.scripts[src]
+ scriptNode.set('src', '/resources/testharness.js')
+ elif (src.endswith('/resources/testharnessreport.js')):
+ scriptNode = self.scripts[src]
+ scriptNode.set('src', '/resources/testharnessreport.js')
+
+
+ def write(self, format):
+ """Writes FileSource.data() out to `self.relpath` through Format `format`."""
+ data = self.data()
+ with open(format.dest(self.relpath), 'w') as f:
+ f.write(data)
+ if (self.metaSource):
+ self.metaSource.write(format) # XXX need to get output path from format, but not let it choose actual format
+
+ def compact(self):
+ """Clears all cached data, preserves computed data."""
+ pass
+
+ def revision(self):
+ """Returns hash of the contents of this file and any related file, references, support files, etc.
+ XXX also needs to account for .meta file
+ """
+ sha = hashlib.sha1()
+ sha.update(self.data())
+ seenRefs = set(self.sourcepath)
+ def hashReference(source):
+ for refName in source.refs:
+ refSource = source.refs[refName][3]
+ if (refSource and (refSource.sourcepath not in seenRefs)):
+ sha.update(refSource.data())
+ seenRefs.add(refSource.sourcepath)
+ hashReference(refSource)
+ hashReference(self)
+ return sha.hexdigest()
+
+ def loadMetadata(self):
+ """Look for .meta file and load any metadata from it if present
+ """
+ pass
+
+ def augmentMetadata(self, next=None, prev=None, reference=None, notReference=None):
+ if (self.metaSource):
+ return self.metaSource.augmentMetadata(next, prev, reference, notReference)
+ return None
+
+ # See http://wiki.csswg.org/test/css2.1/format for more info on metadata
+ def getMetadata(self, asUnicode = False):
+ """Return dictionary of test metadata. Stores list of errors
+ in self.errors if there are parse or metadata errors.
+ Data fields include:
+ - asserts [list of strings]
+ - credits [list of (name string, url string) tuples]
+ - reviewers [ list of (name string, url string) tuples]
+ - flags [list of token strings]
+ - links [list of url strings]
+ - name [string]
+ - title [string]
+ - references [list of ReferenceData per reference; None if not reftest]
+ - revision [revision id of last commit]
+ - selftest [bool]
+ - scripttest [bool]
+ Strings are given in ascii unless asUnicode==True.
+ """
+
+ self.validate()
+
+ def encode(str):
+ return str if (hasattr(str, 'line')) else intern(str.encode('utf-8'))
+
+ def escape(str, andIntern = True):
+ return str.encode('utf-8') if asUnicode else intern(escapeToNamedASCII(str)) if andIntern else escapeToNamedASCII(str)
+
+ def listReferences(source, seen):
+ refGroups = []
+ for refType, refRelPath, refNode, refSource in source.refs.values():
+ if ('==' == refType):
+ if (refSource):
+ refSourcePath = refSource.sourcepath
+ else:
+ refSourcePath = os.path.normpath(join(basepath(source.sourcepath), refRelPath))
+ if (refSourcePath in seen):
+ continue
+ seen.add(refSourcePath)
+ if (refSource):
+ sourceData = ReferenceData(name = self.sourceTree.getAssetName(refSourcePath), type = refType,
+ relpath = refRelPath, repopath = refSourcePath)
+ if (refSource.refs):
+ subRefLists = listReferences(refSource, seen.copy())
+ if (subRefLists):
+ for subRefList in subRefLists:
+ refGroups.append([sourceData] + subRefList)
+ else:
+ refGroups.append([sourceData])
+ else:
+ refGroups.append([sourceData])
+ else:
+ sourceData = ReferenceData(name = self.sourceTree.getAssetName(refSourcePath), type = refType,
+ relpath = relativeURL(self.sourcepath, refSourcePath),
+ repopath = refSourcePath)
+ refGroups.append([sourceData])
+ notRefs = {}
+ for refType, refRelPath, refNode, refSource in source.refs.values():
+ if ('!=' == refType):
+ if (refSource):
+ refSourcePath = refSource.sourcepath
+ else:
+ refSourcePath = os.path.normpath(join(basepath(source.sourcepath), refRelPath))
+ if (refSourcePath in seen):
+ continue
+ seen.add(refSourcePath)
+ if (refSource):
+ sourceData = ReferenceData(name = self.sourceTree.getAssetName(refSourcePath), type = refType,
+ relpath = refRelPath, repopath = refSourcePath)
+ notRefs[sourceData.name] = sourceData
+ if (refSource.refs):
+ for subRefList in listReferences(refSource, seen):
+ for subRefData in subRefList:
+ notRefs[subRefData.name] = subRefData
+ else:
+ sourceData = ReferenceData(name = self.sourceTree.getAssetName(refSourcePath), type = refType,
+ relpath = relativeURL(self.sourcepath, refSourcePath),
+ repopath = refSourcePath)
+ notRefs[sourceData.name] = sourceData
+ if (notRefs):
+ for refData in notRefs.values():
+ refData.type = '!='
+ if (refGroups):
+ for refGroup in refGroups:
+ for notRef in notRefs.values():
+ for ref in refGroup:
+ if (ref.name == notRef.name):
+ break
+ else:
+ refGroup.append(notRef)
+ else:
+ refGroups.append(notRefs.values())
+ return refGroups
+
+ references = listReferences(self, set([self.sourcepath])) if (self.refs) else None
+
+ if (self.metadata):
+ data = Metadata(
+ name = encode(self.name()),
+ title = escape(self.metadata['title'], False),
+ asserts = [escape(assertion, False) for assertion in self.metadata['asserts']],
+ credits = [UserData(escape(name), encode(link)) for name, link in self.metadata['credits']],
+ reviewers = [UserData(escape(name), encode(link)) for name, link in self.metadata['reviewers']],
+ flags = [encode(flag) for flag in self.metadata['flags']],
+ links = [encode(link) for link in self.metadata['links']],
+ references = references,
+ revision = self.revision(),
+ selftest = self.isSelftest(),
+ scripttest = self.isScripttest()
+ )
+ return data
+ return None
+
+ def addReference(self, referenceSource, match = None):
+ """Add reference source."""
+ self.validate()
+ refName = referenceSource.name()
+ refPath = self.relativeURL(referenceSource)
+ if refName not in self.refs:
+ node = None
+ if match == '==':
+ node = self.augmentMetadata(reference=referenceSource).reference
+ elif match == '!=':
+ node = self.augmentMetadata(notReference=referenceSource).notReference
+ self.refs[refName] = (match, refPath, node, referenceSource)
+ else:
+ node = self.refs[refName][2]
+ node.set('href', refPath)
+ if (match):
+ node.set('rel', 'mismatch' if ('!=' == match) else 'match')
+ else:
+ match = self.refs[refName][0]
+ self.refs[refName] = (match, refPath, node, referenceSource)
+
+ def getReferencePaths(self):
+ """Get list of paths to references as tuple(path, relPath, refType)."""
+ self.validate()
+ return [(os.path.join(os.path.dirname(self.sourcepath), ref[1]),
+ os.path.join(os.path.dirname(self.relpath), ref[1]),
+ ref[0])
+ for ref in self.refs.values()]
+
+ def isTest(self):
+ self.validate()
+ return bool(self.metadata) and bool(self.metadata.get('links'))
+
+ def isReftest(self):
+ return self.isTest() and bool(self.refs)
+
+ def isSelftest(self):
+ return self.isTest() and (not bool(self.refs))
+
+ def isScripttest(self):
+ if (self.isTest() and self.scripts):
+ for src in self.scripts:
+ if (src.endswith('/resources/testharness.js')): # accept relative paths to testharness.js
+ return True
+ return False
+
+ def hasFlag(self, flag):
+ data = self.getMetadata()
+ if data:
+ return flag in data['flags']
+ return False
+
+
+
+class ConfigSource(FileSource):
+ """Object representing a text-based configuration file.
+ Capable of merging multiple config-file contents.
+ """
+
+ def __init__(self, sourceTree, sourcepath, relpath, mimetype = None, data = None):
+ """Init ConfigSource from source path. Give it relative path relpath.
+ """
+ FileSource.__init__(self, sourceTree, sourcepath, relpath, mimetype, data)
+ self.sourcepath = [sourcepath]
+
+ def __eq__(self, other):
+ if not isinstance(other, ConfigSource):
+ return False
+ if self is other or self.sourcepath == other.sourcepath:
+ return True
+ if len(self.sourcepath) != len(other.sourcepath):
+ return False
+ for this, that in zip(self.sourcepath, other.sourcepath):
+ if not filecmp.cmp(this, that):
+ return False
+ return True
+
+ def __ne__(self, other):
+ return not self == other
+
+ def name(self):
+ return '.htaccess'
+
+ def type(self):
+ return intern('support')
+
+ def data(self):
+ """Merge contents of all config files represented by this source."""
+ data = ''
+ for src in self.sourcepath:
+ with open(src) as f:
+ data += f.read()
+ data += '\n'
+ return data
+
+ def getMetadata(self, asUnicode = False):
+ return None
+
+ def append(self, other):
+ """Appends contents of ConfigSource `other` to this source.
+ Asserts if self.relpath != other.relpath.
+ """
+ assert isinstance(other, ConfigSource)
+ assert self != other and self.relpath == other.relpath
+ self.sourcepath.extend(other.sourcepath)
+
+class ReftestFilepathError(Exception):
+ pass
+
+class ReftestManifest(ConfigSource):
+ """Object representing a reftest manifest file.
+ Iterating the ReftestManifest returns (testpath, refpath) tuples
+ with paths relative to the manifest.
+ """
+ def __init__(self, sourceTree, sourcepath, relpath, data = None):
+ """Init ReftestManifest from source path. Give it relative path `relpath`
+ and load its .htaccess file.
+ """
+ ConfigSource.__init__(self, sourceTree, sourcepath, relpath, mimetype = 'config/reftest', data = data)
+
+ def basepath(self):
+ """Returns the base relpath of this reftest manifest path, i.e.
+ the parent of the manifest file.
+ """
+ return basepath(self.relpath)
+
+ baseRE = re.compile(r'^#\s*relstrip\s+(\S+)\s*')
+ stripRE = re.compile(r'#.*')
+ parseRE = re.compile(r'^\s*([=!]=)\s*(\S+)\s+(\S+)')
+
+ def __iter__(self):
+ """Parse the reftest manifest files represented by this ReftestManifest
+ and return path information about each reftest pair as
+ ((test-sourcepath, ref-sourcepath), (test-relpath, ref-relpath), reftype)
+ Raises a ReftestFilepathError if any sources file do not exist or
+ if any relpaths point higher than the relpath root.
+ """
+ striplist = []
+ for src in self.sourcepath:
+ relbase = basepath(self.relpath)
+ srcbase = basepath(src)
+ with open(src) as f:
+ for line in f:
+ strip = self.baseRE.search(line)
+ if strip:
+ striplist.append(strip.group(1))
+ line = self.stripRE.sub('', line)
+ m = self.parseRE.search(line)
+ if m:
+ record = ((join(srcbase, m.group(2)), join(srcbase, m.group(3))), \
+ (join(relbase, m.group(2)), join(relbase, m.group(3))), \
+ m.group(1))
+ # for strip in striplist:
+ # strip relrecord
+ if not exists(record[0][0]):
+ raise ReftestFilepathError("Manifest Error in %s: "
+ "Reftest test file %s does not exist." \
+ % (src, record[0][0]))
+ elif not exists(record[0][1]):
+ raise ReftestFilepathError("Manifest Error in %s: "
+ "Reftest reference file %s does not exist." \
+ % (src, record[0][1]))
+ elif not isPathInsideBase(record[1][0]):
+ raise ReftestFilepathError("Manifest Error in %s: "
+ "Reftest test replath %s not within relpath root." \
+ % (src, record[1][0]))
+ elif not isPathInsideBase(record[1][1]):
+ raise ReftestFilepathError("Manifest Error in %s: "
+ "Reftest test replath %s not within relpath root." \
+ % (src, record[1][1]))
+ yield record
+
+import Utils # set up XML catalog
+xhtmlns = '{http://www.w3.org/1999/xhtml}'
+svgns = '{http://www.w3.org/2000/svg}'
+xmlns = '{http://www.w3.org/XML/1998/namespace}'
+xlinkns = '{http://www.w3.org/1999/xlink}'
+
+class XMLSource(FileSource):
+ """FileSource object with support reading XML trees."""
+
+ NodeTuple = collections.namedtuple('NodeTuple', ['next', 'prev', 'reference', 'notReference'])
+
+ # Public Data
+ syntaxErrorDoc = \
+ u"""
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+ <html xmlns="http://www.w3.org/1999/xhtml">
+ <head><title>Syntax Error</title></head>
+ <body>
+ <p>The XML file <![CDATA[%s]]> contains a syntax error and could not be parsed.
+ Please correct it and try again.</p>
+ <p>The parser's error report was:</p>
+ <pre><![CDATA[%s]]></pre>
+ </body>
+ </html>
+ """
+
+ # Private Data and Methods
+ __parser = etree.XMLParser(no_network=True,
+ # perf nightmare dtd_validation=True,
+ remove_comments=False,
+ strip_cdata=False,
+ resolve_entities=False)
+
+ # Public Methods
+
+ def __init__(self, sourceTree, sourcepath, relpath, data = None):
+ """Initialize XMLSource by loading from XML file `sourcepath`.
+ Parse errors are reported in `self.errors`,
+ and the source is replaced with an XHTML error message.
+ """
+ FileSource.__init__(self, sourceTree, sourcepath, relpath, data = data)
+ self.tree = None
+ self.injectedTags = {}
+
+ def cacheAsParseError(self, filename, e):
+ """Replace document with an error message."""
+ errorDoc = self.syntaxErrorDoc % (filename, e)
+ from StringIO import StringIO
+ self.tree = etree.parse(StringIO(errorDoc), parser=self.__parser)
+
+ def parse(self):
+ """Parse file and store any parse errors in self.errors"""
+ self.errors = None
+ try:
+ data = self.data()
+ if (data):
+ self.tree = etree.parse(StringReader(data), parser=self.__parser)
+ self.encoding = self.tree.docinfo.encoding or 'utf-8'
+ self.injectedTags = {}
+ else:
+ self.tree = None
+ self.errors = ['Empty source file']
+ self.encoding = 'utf-8'
+
+ FileSource.loadMetadata(self)
+ if ((not self.metadata) and self.tree and (not self.errors)):
+ self.extractMetadata(self.tree)
+ except etree.ParseError as e:
+ print("PARSE ERROR: " + self.sourcepath)
+ self.cacheAsParseError(self.sourcepath, e)
+ e.W3CTestLibErrorLocation = self.sourcepath
+ self.errors = [str(e)]
+ self.encoding = 'utf-8'
+
+ def validate(self):
+ """Parse file if not parsed, and store any parse errors in self.errors"""
+ if self.tree is None:
+ self.parse()
+
+ def getMeatdataContainer(self):
+ return self.tree.getroot().find(xhtmlns+'head')
+
+ def injectMetadataLink(self, rel, href, tagCode = None):
+ """Inject (prepend) <link> with data given inside metadata container.
+ Injected element is tagged with `tagCode`, which can be
+ used to clear it with clearInjectedTags later.
+ """
+ self.validate()
+ container = self.getMeatdataContainer()
+ if (container):
+ node = etree.Element(xhtmlns+'link', {'rel': rel, 'href': href})
+ node.tail = container.text
+ container.insert(0, node)
+ self.injectedTags[node] = tagCode or True
+ return node
+ return None
+
+ def clearInjectedTags(self, tagCode = None):
+ """Clears all injected elements from the tree, or clears injected
+ elements tagged with `tagCode` if `tagCode` is given.
+ """
+ if not self.injectedTags or not self.tree: return
+ for node in self.injectedTags:
+ node.getparent().remove(node)
+ del self.injectedTags[node]
+
+ def serializeXML(self):
+ self.validate()
+ return etree.tounicode(self.tree)
+
+ def data(self):
+ if ((not self.tree) or (self.metaSource)):
+ return FileSource.data(self)
+ return self.serializeXML().encode(self.encoding, 'xmlcharrefreplace')
+
+ def unicode(self):
+ if ((not self.tree) or (self.metaSource)):
+ return FileSource.unicode(self)
+ return self.serializeXML()
+
+ def write(self, format, output=None):
+ """Write Source through OutputFormat `format`.
+ Write contents as string `output` instead if specified.
+ """
+ if not output:
+ output = self.unicode()
+
+ # write
+ with open(format.dest(self.relpath), 'w') as f:
+ f.write(output.encode(self.encoding, 'xmlcharrefreplace'))
+
+ def compact(self):
+ self.tree = None
+
+ def getMetadataElements(self, tree):
+ container = self.getMeatdataContainer()
+ if (None != container):
+ return [node for node in container]
+ return None
+
+ def extractMetadata(self, tree):
+ """Extract metadata from tree."""
+ links = []; credits = []; reviewers = []; flags = []; asserts = []; title = ''
+
+ def tokenMatch(token, string):
+ return bool(re.search('(^|\s+)%s($|\s+)' % token, string)) if (string) else False
+
+ errors = []
+ readFlags = False
+ metaElements = self.getMetadataElements(tree)
+ if (not metaElements):
+ errors.append("Missing <head> element")
+ else:
+ # Scan and cache metadata
+ for node in metaElements:
+ if (node.tag == xhtmlns+'link'):
+ # help links
+ if tokenMatch('help', node.get('rel')):
+ link = node.get('href').strip() if node.get('href') else None
+ if (not link):
+ errors.append(LineString("Help link missing href value.", node.sourceline))
+ elif (not (link.startswith('http://') or link.startswith('https://'))):
+ errors.append(LineString("Help link " + link.encode('utf-8') + " must be absolute URL.", node.sourceline))
+ elif (link in links):
+ errors.append(LineString("Duplicate help link " + link.encode('utf-8') + ".", node.sourceline))
+ else:
+ links.append(LineString(link, node.sourceline))
+ # == references
+ elif tokenMatch('match', node.get('rel')) or tokenMatch('reference', node.get('rel')):
+ refPath = node.get('href').strip() if node.get('href') else None
+ if (not refPath):
+ errors.append(LineString("Reference link missing href value.", node.sourceline))
+ else:
+ refName = self.sourceTree.getAssetName(join(self.sourcepath, refPath))
+ if (refName in self.refs):
+ errors.append(LineString("Reference " + refName.encode('utf-8') + " already specified.", node.sourceline))
+ else:
+ self.refs[refName] = ('==', refPath, node, None)
+ # != references
+ elif tokenMatch('mismatch', node.get('rel')) or tokenMatch('not-reference', node.get('rel')):
+ refPath = node.get('href').strip() if node.get('href') else None
+ if (not refPath):
+ errors.append(LineString("Reference link missing href value.", node.sourceline))
+ else:
+ refName = self.sourceTree.getAssetName(join(self.sourcepath, refPath))
+ if (refName in self.refs):
+ errors.append(LineString("Reference " + refName.encode('utf-8') + " already specified.", node.sourceline))
+ else:
+ self.refs[refName] = ('!=', refPath, node, None)
+ else: # may have both author and reviewer in the same link
+ # credits
+ if tokenMatch('author', node.get('rel')):
+ name = node.get('title')
+ name = name.strip() if name else name
+ if (not name):
+ errors.append(LineString("Author link missing name (title attribute).", node.sourceline))
+ else:
+ link = node.get('href').strip() if node.get('href') else None
+ if (not link):
+ errors.append(LineString("Author link for \"" + name.encode('utf-8') + "\" missing contact URL (http or mailto).", node.sourceline))
+ else:
+ credits.append((name, link))
+ # reviewers
+ if tokenMatch('reviewer', node.get('rel')):
+ name = node.get('title')
+ name = name.strip() if name else name
+ if (not name):
+ errors.append(LineString("Reviewer link missing name (title attribute).", node.sourceline))
+ else:
+ link = node.get('href').strip() if node.get('href') else None
+ if (not link):
+ errors.append(LineString("Reviewer link for \"" + name.encode('utf-8') + "\" missing contact URL (http or mailto).", node.sourceline))
+ else:
+ reviewers.append((name, link))
+ elif (node.tag == xhtmlns+'meta'):
+ metatype = node.get('name')
+ metatype = metatype.strip() if metatype else metatype
+ # requirement flags
+ if ('flags' == metatype):
+ if (readFlags):
+ errors.append(LineString("Flags must only be specified once.", node.sourceline))
+ else:
+ readFlags = True
+ if (None == node.get('content')):
+ errors.append(LineString("Flags meta missing content attribute.", node.sourceline))
+ else:
+ for flag in sorted(node.get('content').split()):
+ flags.append(flag)
+ # test assertions
+ elif ('assert' == metatype):
+ if (None == node.get('content')):
+ errors.append(LineString("Assert meta missing content attribute.", node.sourceline))
+ else:
+ asserts.append(node.get('content').strip().replace('\t', ' '))
+ # title
+ elif (node.tag == xhtmlns+'title'):
+ title = node.text.strip() if node.text else ''
+ match = re.match('(?:[^:]*)[tT]est(?:[^:]*):(.*)', title, re.DOTALL)
+ if (match):
+ title = match.group(1)
+ title = title.strip()
+ # script
+ elif (node.tag == xhtmlns+'script'):
+ src = node.get('src').strip() if node.get('src') else None
+ if (src):
+ self.scripts[src] = node
+
+ if (asserts or credits or reviewers or flags or links or title):
+ self.metadata = {'asserts' : asserts,
+ 'credits' : credits,
+ 'reviewers' : reviewers,
+ 'flags' : flags,
+ 'links' : links,
+ 'title' : title
+ }
+
+ if (errors):
+ if (self.errors):
+ self.errors += errors
+ else:
+ self.errors = errors
+
+
+ def augmentMetadata(self, next=None, prev=None, reference=None, notReference=None):
+ """Add extra useful metadata to the head. All arguments are optional.
+ * Adds next/prev links to next/prev Sources given
+ * Adds reference link to reference Source given
+ """
+ self.validate()
+ if next:
+ next = self.injectMetadataLink('next', self.relativeURL(next), 'next')
+ if prev:
+ prev = self.injectMetadataLink('prev', self.relativeURL(prev), 'prev')
+ if reference:
+ reference = self.injectMetadataLink('match', self.relativeURL(reference), 'ref')
+ if notReference:
+ notReference = self.injectMetadataLink('mismatch', self.relativeURL(notReference), 'not-ref')
+ return self.NodeTuple(next, prev, reference, notReference)
+
+
+class XHTMLSource(XMLSource):
+ """FileSource object with support for XHTML->HTML conversions."""
+
+ # Public Methods
+
+ def __init__(self, sourceTree, sourcepath, relpath, data = None):
+ """Initialize XHTMLSource by loading from XHTML file `sourcepath`.
+ Parse errors are stored in `self.errors`,
+ and the source is replaced with an XHTML error message.
+ """
+ XMLSource.__init__(self, sourceTree, sourcepath, relpath, data = data)
+
+ def serializeXHTML(self, doctype = None):
+ return self.serializeXML()
+
+ def serializeHTML(self, doctype = None):
+ self.validate()
+ # Serialize
+# print self.relpath
+ serializer = HTMLSerializer.HTMLSerializer()
+ output = serializer.serializeHTML(self.tree, doctype)
+ return output
+
+
+class SVGSource(XMLSource):
+ """FileSource object with support for extracting metadata from SVG."""
+
+ def __init__(self, sourceTree, sourcepath, relpath, data = None):
+ """Initialize SVGSource by loading from SVG file `sourcepath`.
+ Parse errors are stored in `self.errors`,
+ and the source is replaced with an XHTML error message.
+ """
+ XMLSource.__init__(self, sourceTree, sourcepath, relpath, data = data)
+
+ def getMeatdataContainer(self):
+ groups = self.tree.getroot().findall(svgns+'g')
+ for group in groups:
+ if ('testmeta' == group.get('id')):
+ return group
+ return None
+
+ def extractMetadata(self, tree):
+ """Extract metadata from tree."""
+ links = []; credits = []; reviewers = []; flags = []; asserts = []; title = ''
+
+ def tokenMatch(token, string):
+ return bool(re.search('(^|\s+)%s($|\s+)' % token, string)) if (string) else False
+
+ errors = []
+ readFlags = False
+ metaElements = self.getMetadataElements(tree)
+ if (not metaElements):
+ errors.append("Missing <g id='testmeta'> element")
+ else:
+ # Scan and cache metadata
+ for node in metaElements:
+ if (node.tag == xhtmlns+'link'):
+ # help links
+ if tokenMatch('help', node.get('rel')):
+ link = node.get('href').strip() if node.get('href') else None
+ if (not link):
+ errors.append(LineString("Help link missing href value.", node.sourceline))
+ elif (not (link.startswith('http://') or link.startswith('https://'))):
+ errors.append(LineString("Help link " + link.encode('utf-8') + " must be absolute URL.", node.sourceline))
+ elif (link in links):
+ errors.append(LineString("Duplicate help link " + link.encode('utf-8') + ".", node.sourceline))
+ else:
+ links.append(LineString(link, node.sourceline))
+ # == references
+ elif tokenMatch('match', node.get('rel')) or tokenMatch('reference', node.get('rel')):
+ refPath = node.get('href').strip() if node.get('href') else None
+ if (not refPath):
+ errors.append(LineString("Reference link missing href value.", node.sourceline))
+ else:
+ refName = self.sourceTree.getAssetName(join(self.sourcepath, refPath))
+ if (refName in self.refs):
+ errors.append(LineString("Reference " + refName.encode('utf-8') + " already specified.", node.sourceline))
+ else:
+ self.refs[refName] = ('==', refPath, node, None)
+ # != references
+ elif tokenMatch('mismatch', node.get('rel')) or tokenMatch('not-reference', node.get('rel')):
+ refPath = node.get('href').strip() if node.get('href') else None
+ if (not refPath):
+ errors.append(LineString("Reference link missing href value.", node.sourceline))
+ else:
+ refName = self.sourceTree.getAssetName(join(self.sourcepath, refPath))
+ if (refName in self.refs):
+ errors.append(LineString("Reference " + refName.encode('utf-8') + " already specified.", node.sourceline))
+ else:
+ self.refs[refName] = ('!=', refPath, node, None)
+ else: # may have both author and reviewer in the same link
+ # credits
+ if tokenMatch('author', node.get('rel')):
+ name = node.get('title')
+ name = name.strip() if name else name
+ if (not name):
+ errors.append(LineString("Author link missing name (title attribute).", node.sourceline))
+ else:
+ link = node.get('href').strip() if node.get('href') else None
+ if (not link):
+ errors.append(LineString("Author link for \"" + name.encode('utf-8') + "\" missing contact URL (http or mailto).", node.sourceline))
+ else:
+ credits.append((name, link))
+ # reviewers
+ if tokenMatch('reviewer', node.get('rel')):
+ name = node.get('title')
+ name = name.strip() if name else name
+ if (not name):
+ errors.append(LineString("Reviewer link missing name (title attribute).", node.sourceline))
+ else:
+ link = node.get('href').strip() if node.get('href') else None
+ if (not link):
+ errors.append(LineString("Reviewer link for \"" + name.encode('utf-8') + "\" missing contact URL (http or mailto).", node.sourceline))
+ else:
+ reviewers.append((name, link))
+ elif (node.tag == svgns+'metadata'):
+ metatype = node.get('class')
+ metatype = metatype.strip() if metatype else metatype
+ # requirement flags
+ if ('flags' == metatype):
+ if (readFlags):
+ errors.append(LineString("Flags must only be specified once.", node.sourceline))
+ else:
+ readFlags = True
+ text = node.find(svgns+'text')
+ flagString = text.text if (text) else node.text
+ if (flagString):
+ for flag in sorted(flagString.split()):
+ flags.append(flag)
+ elif (node.tag == svgns+'desc'):
+ metatype = node.get('class')
+ metatype = metatype.strip() if metatype else metatype
+ # test assertions
+ if ('assert' == metatype):
+ asserts.append(node.text.strip().replace('\t', ' '))
+ # test title
+ elif node.tag == svgns+'title':
+ title = node.text.strip() if node.text else ''
+ match = re.match('(?:[^:]*)[tT]est(?:[^:]*):(.*)', title, re.DOTALL)
+ if (match):
+ title = match.group(1)
+ title = title.strip()
+ # script tag (XXX restricted to metadata container?)
+ elif (node.tag == svgns+'script'):
+ src = node.get('src').strip() if node.get('src') else None
+ if (src):
+ self.scripts[src] = node
+
+ if (asserts or credits or reviewers or flags or links or title):
+ self.metadata = {'asserts' : asserts,
+ 'credits' : credits,
+ 'reviewers' : reviewers,
+ 'flags' : flags,
+ 'links' : links,
+ 'title' : title
+ }
+ if (errors):
+ if (self.errors):
+ self.errors += errors
+ else:
+ self.errors = errors
+
+
+
+class HTMLSource(XMLSource):
+ """FileSource object with support for HTML metadata and HTML->XHTML conversions (untested)."""
+
+ # Private Data and Methods
+ __parser = html5lib.HTMLParser(tree = treebuilders.getTreeBuilder('lxml'))
+
+ # Public Methods
+
+ def __init__(self, sourceTree, sourcepath, relpath, data = None):
+ """Initialize HTMLSource by loading from HTML file `sourcepath`.
+ """
+ XMLSource.__init__(self, sourceTree, sourcepath, relpath, data = data)
+
+ def parse(self):
+ """Parse file and store any parse errors in self.errors"""
+ self.errors = None
+ try:
+ data = self.data()
+ if data:
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore")
+ self.tree = self.__parser.parse(data)
+ self.encoding = self.__parser.documentEncoding
+ self.injectedTags = {}
+ else:
+ self.tree = None
+ self.errors = ['Empty source file']
+ self.encoding = 'utf-8'
+
+ FileSource.loadMetadata(self)
+ if ((not self.metadata) and self.tree and (not self.errors)):
+ self.extractMetadata(self.tree)
+ except Exception as e:
+ print("PARSE ERROR: " + self.sourcepath)
+ e.W3CTestLibErrorLocation = self.sourcepath
+ self.errors = [str(e)]
+ self.encoding = 'utf-8'
+
+ def _injectXLinks(self, element, nodeList):
+ injected = False
+
+ xlinkAttrs = ['href', 'type', 'role', 'arcrole', 'title', 'show', 'actuate']
+ if (element.get('href') or element.get(xlinkns + 'href')):
+ for attr in xlinkAttrs:
+ if (element.get(xlinkns + attr)):
+ injected = True
+ if (element.get(attr)):
+ injected = True
+ value = element.get(attr)
+ del element.attrib[attr]
+ element.set(xlinkns + attr, value)
+ nodeList.append((element, xlinkns + attr, attr))
+
+ for child in element:
+ if (type(child.tag) == type('')): # element node
+ qName = etree.QName(child.tag)
+ if ('foreignobject' != qName.localname.lower()):
+ injected |= self._injectXLinks(child, nodeList)
+ return injected
+
+
+ def _findElements(self, namespace, elementName):
+ elements = self.tree.findall('.//{' + namespace + '}' + elementName)
+ if (self.tree.getroot().tag == '{' + namespace + '}' + elementName):
+ elements.insert(0, self.tree.getroot())
+ return elements
+
+ def _injectNamespace(self, elementName, prefix, namespace, doXLinks, nodeList):
+ attr = xmlns + prefix if (prefix) else 'xmlns'
+ elements = self._findElements(namespace, elementName)
+ for element in elements:
+ if not element.get(attr):
+ element.set(attr, namespace)
+ nodeList.append((element, attr, None))
+ if (doXLinks):
+ if (self._injectXLinks(element, nodeList)):
+ element.set(xmlns + 'xlink', 'http://www.w3.org/1999/xlink')
+ nodeList.append((element, xmlns + 'xlink', None))
+
+ def injectNamespaces(self):
+ nodeList = []
+ self._injectNamespace('html', None, 'http://www.w3.org/1999/xhtml', False, nodeList)
+ self._injectNamespace('svg', None, 'http://www.w3.org/2000/svg', True, nodeList)
+ self._injectNamespace('math', None, 'http://www.w3.org/1998/Math/MathML', True, nodeList)
+ return nodeList
+
+ def removeNamespaces(self, nodeList):
+ if nodeList:
+ for element, attr, oldAttr in nodeList:
+ if (oldAttr):
+ value = element.get(attr)
+ del element.attrib[attr]
+ element.set(oldAttr, value)
+ else:
+ del element.attrib[attr]
+
+ def serializeXHTML(self, doctype = None):
+ self.validate()
+ # Serialize
+ nodeList = self.injectNamespaces()
+# print self.relpath
+ serializer = HTMLSerializer.HTMLSerializer()
+ o = serializer.serializeXHTML(self.tree, doctype)
+
+ self.removeNamespaces(nodeList)
+ return o
+
+ def serializeHTML(self, doctype = None):
+ self.validate()
+ # Serialize
+# print self.relpath
+ serializer = HTMLSerializer.HTMLSerializer()
+ o = serializer.serializeHTML(self.tree, doctype)
+
+ return o
+
+ def data(self):
+ if ((not self.tree) or (self.metaSource)):
+ return FileSource.data(self)
+ return self.serializeHTML().encode(self.encoding, 'xmlcharrefreplace')
+
+ def unicode(self):
+ if ((not self.tree) or (self.metaSource)):
+ return FileSource.unicode(self)
+ return self.serializeHTML()
+