diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /testing/web-platform/tests/css/tools/w3ctestlib/Sources.py | |
parent | Initial commit. (diff) | |
download | firefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'testing/web-platform/tests/css/tools/w3ctestlib/Sources.py')
-rw-r--r-- | testing/web-platform/tests/css/tools/w3ctestlib/Sources.py | 1473 |
1 files changed, 1473 insertions, 0 deletions
diff --git a/testing/web-platform/tests/css/tools/w3ctestlib/Sources.py b/testing/web-platform/tests/css/tools/w3ctestlib/Sources.py new file mode 100644 index 0000000000..f3848030ba --- /dev/null +++ b/testing/web-platform/tests/css/tools/w3ctestlib/Sources.py @@ -0,0 +1,1473 @@ +#!/usr/bin/python +# CSS Test Source Manipulation Library +# Initial code by fantasai, joint copyright 2010 W3C and Microsoft +# Licensed under BSD 3-Clause: <http://www.w3.org/Consortium/Legal/2008/03-bsd-license> + +from __future__ import print_function +from os.path import basename, exists, join +import os +import filecmp +import shutil +import re +import codecs +import collections +from xml import dom +import html5lib +from html5lib import treebuilders +from lxml import etree +from lxml.etree import ParseError +from Utils import getMimeFromExt, escapeToNamedASCII, basepath, isPathInsideBase, relativeURL, assetName +import HTMLSerializer +import warnings +import hashlib + +class SourceTree(object): + """Class that manages structure of test repository source. + Temporarily hard-coded path and filename rules, this should be configurable. + """ + + def __init__(self, repository = None): + self.mTestExtensions = ['.xht', '.html', '.xhtml', '.htm', '.xml', '.svg'] + self.mReferenceExtensions = ['.xht', '.html', '.xhtml', '.htm', '.xml', '.png', '.svg'] + self.mRepository = repository + + def _splitDirs(self, dir): + if ('' == dir): + pathList = [] + elif ('/' in dir): + pathList = dir.split('/') + else: + pathList = dir.split(os.path.sep) + return pathList + + def _splitPath(self, filePath): + """split a path into a list of directory names and the file name + paths may come form the os or mercurial, which always uses '/' as the + directory separator + """ + dir, fileName = os.path.split(filePath.lower()) + return (self._splitDirs(dir), fileName) + + def isTracked(self, filePath): + pathList, fileName = self._splitPath(filePath) + return (not self._isIgnored(pathList, fileName)) + + def _isApprovedPath(self, pathList): + return ((1 < len(pathList)) and ('approved' == pathList[0]) and (('support' == pathList[1]) or ('src' in pathList))) + + def isApprovedPath(self, filePath): + pathList, fileName = self._splitPath(filePath) + return (not self._isIgnored(pathList, fileName)) and self._isApprovedPath(pathList) + + def _isIgnoredPath(self, pathList): + return (('.hg' in pathList) or ('.git' in pathList) or + ('.svn' in pathList) or ('cvs' in pathList) or + ('incoming' in pathList) or ('work-in-progress' in pathList) or + ('data' in pathList) or ('archive' in pathList) or + ('reports' in pathList) or ('tools' == pathList[0]) or + ('test-plan' in pathList) or ('test-plans' in pathList)) + + def _isIgnored(self, pathList, fileName): + if (pathList): # ignore files in root + return (self._isIgnoredPath(pathList) or + fileName.startswith('.directory') or ('lock' == fileName) or + ('.ds_store' == fileName) or + fileName.startswith('.hg') or fileName.startswith('.git') or + ('sections.dat' == fileName) or ('get-spec-sections.pl' == fileName)) + return True + + def isIgnored(self, filePath): + pathList, fileName = self._splitPath(filePath) + return self._isIgnored(pathList, fileName) + + def isIgnoredDir(self, dir): + pathList = self._splitDirs(dir) + return self._isIgnoredPath(pathList) + + def _isToolPath(self, pathList): + return ('tools' in pathList) + + def _isTool(self, pathList, fileName): + return self._isToolPath(pathList) + + def isTool(self, filePath): + pathList, fileName = self._splitPath(filePath) + return (not self._isIgnored(pathList, fileName)) and self._isTool(pathList, fileName) + + def _isSupportPath(self, pathList): + return ('support' in pathList) + + def _isSupport(self, pathList, fileName): + return (self._isSupportPath(pathList) or + ((not self._isTool(pathList, fileName)) and + (not self._isReference(pathList, fileName)) and + (not self._isTestCase(pathList, fileName)))) + + def isSupport(self, filePath): + pathList, fileName = self._splitPath(filePath) + return (not self._isIgnored(pathList, fileName)) and self._isSupport(pathList, fileName) + + def _isReferencePath(self, pathList): + return (('reftest' in pathList) or ('reference' in pathList)) + + def _isReference(self, pathList, fileName): + if ((not self._isSupportPath(pathList)) and (not self._isToolPath(pathList))): + baseName, fileExt = os.path.splitext(fileName)[:2] + if (bool(re.search('(^ref-|^notref-).+', baseName)) or + bool(re.search('.+(-ref[0-9]*$|-notref[0-9]*$)', baseName)) or + ('-ref-' in baseName) or ('-notref-' in baseName)): + return (fileExt in self.mReferenceExtensions) + if (self._isReferencePath(pathList)): + return (fileExt in self.mReferenceExtensions) + return False + + def isReference(self, filePath): + pathList, fileName = self._splitPath(filePath) + return (not self._isIgnored(pathList, fileName)) and self._isReference(pathList, fileName) + + def isReferenceAnywhere(self, filePath): + pathList, fileName = self._splitPath(filePath) + return self._isReference(pathList, fileName) + + def _isTestCase(self, pathList, fileName): + if ((not self._isToolPath(pathList)) and (not self._isSupportPath(pathList)) and (not self._isReference(pathList, fileName))): + fileExt = os.path.splitext(fileName)[1] + return (fileExt in self.mTestExtensions) + return False + + def isTestCase(self, filePath): + pathList, fileName = self._splitPath(filePath) + return (not self._isIgnored(pathList, fileName)) and self._isTestCase(pathList, fileName) + + def getAssetName(self, filePath): + pathList, fileName = self._splitPath(filePath) + if (self._isReference(pathList, fileName) or self._isTestCase(pathList, fileName)): + return assetName(fileName) + return fileName.lower() # support files keep full name + + def getAssetType(self, filePath): + pathList, fileName = self._splitPath(filePath) + if (self._isReference(pathList, fileName)): + return intern('reference') + if (self._isTestCase(pathList, fileName)): + return intern('testcase') + if (self._isTool(pathList, fileName)): + return intern('tool') + return intern('support') + + +class SourceCache: + """Cache for FileSource objects. Supports one FileSource object + per sourcepath. + """ + def __init__(self, sourceTree): + self.__cache = {} + self.sourceTree = sourceTree + + def generateSource(self, sourcepath, relpath, data = None): + """Return a FileSource or derivative based on the extensionMap. + + Uses a cache to avoid creating more than one of the same object: + does not support creating two FileSources with the same sourcepath; + asserts if this is tried. (.htaccess files are not cached.) + + Cache is bypassed if loading form a change context + """ + if ((None == data) and self.__cache.has_key(sourcepath)): + source = self.__cache[sourcepath] + assert relpath == source.relpath + return source + + if basename(sourcepath) == '.htaccess': + return ConfigSource(self.sourceTree, sourcepath, relpath, data) + mime = getMimeFromExt(sourcepath) + if (mime == 'application/xhtml+xml'): + source = XHTMLSource(self.sourceTree, sourcepath, relpath, data) + elif (mime == 'text/html'): + source = HTMLSource(self.sourceTree, sourcepath, relpath, data) + elif (mime == 'image/svg+xml'): + source = SVGSource(self.sourceTree, sourcepath, relpath, data) + elif (mime == 'application/xml'): + source = XMLSource(self.sourceTree, sourcepath, relpath, data) + else: + source = FileSource(self.sourceTree, sourcepath, relpath, mime, data) + if (None == data): + self.__cache[sourcepath] = source + return source + +class SourceSet: + """Set of FileSource objects. No two FileSources of the same type in the set may + have the same name (except .htaccess files, which are merged). + """ + def __init__(self, sourceCache): + self.sourceCache = sourceCache + self.pathMap = {} # type/name -> source + + def __len__(self): + return len(self.pathMap) + + def _keyOf(self, source): + return source.type() + '/' + source.keyName() + + def __contains__(self, source): + return self._keyOf(source) in self.pathMap + + + def iter(self): + """Iterate over FileSource objects in SourceSet. + """ + return self.pathMap.itervalues() + + def addSource(self, source, ui): + """Add FileSource `source`. Throws exception if we already have + a FileSource with the same path relpath but different contents. + (ConfigSources are exempt from this requirement.) + """ + cachedSource = self.pathMap.get(self._keyOf(source)) + if not cachedSource: + self.pathMap[self._keyOf(source)] = source + else: + if source != cachedSource: + if isinstance(source, ConfigSource): + cachedSource.append(source) + else: + ui.warn("File merge mismatch %s vs %s for %s\n" % \ + (cachedSource.sourcepath, source.sourcepath, source.name())) + + def add(self, sourcepath, relpath, ui): + """Generate and add FileSource from sourceCache. Return the resulting + FileSource. + + Throws exception if we already have a FileSource with the same path + relpath but different contents. + """ + source = self.sourceCache.generateSource(sourcepath, relpath) + self.addSource(source, ui) + return source + + @staticmethod + def combine(a, b, ui): + """Merges a and b, and returns whichever one contains the merger (which + one is chosen based on merge efficiency). Can accept None as an argument. + """ + if not (a and b): + return a or b + if len(a) < len(b): + return b.merge(a, ui) + return a.merge(b, ui) + + def merge(self, other, ui): + """Merge sourceSet's contents into this SourceSet. + + Throws a RuntimeError if there's a sourceCache mismatch. + Throws an Exception if two files with the same relpath mismatch. + Returns merge result (i.e. self) + """ + if self.sourceCache is not other.sourceCache: + raise RuntimeError + + for source in other.pathMap.itervalues(): + self.addSource(source, ui) + return self + + def adjustContentPaths(self, format): + for source in self.pathMap.itervalues(): + source.adjustContentPaths(format) + + def write(self, format): + """Write files out through OutputFormat `format`. + """ + for source in self.pathMap.itervalues(): + format.write(source) + + +class StringReader(object): + """Wrapper around a string to give it a file-like api + """ + def __init__(self, string): + self.mString = string + self.mIndex = 0 + + def read(self, maxSize = None): + if (self.mIndex < len(self.mString)): + if (maxSize and (0 < maxSize)): + slice = self.mString[self.mIndex:self.mIndex + maxSize] + self.mIndex += len(slice) + return slice + else: + self.mIndex = len(self.mString) + return self.mString + return '' + + +class NamedDict(object): + def get(self, key): + if (key in self): + return self[key] + return None + + def __eq__(self, other): + for key in self.__slots__: + if (self[key] != other[key]): + return False + return True + + def __ne__(self, other): + for key in self.__slots__: + if (self[key] != other[key]): + return True + return False + + def __len__(self): + return len(self.__slots__) + + def __iter__(self): + return iter(self.__slots__) + + def __contains__(self, key): + return (key in self.__slots__) + + def copy(self): + clone = self.__class__() + for key in self.__slots__: + clone[key] = self[key] + return clone + + def keys(self): + return self.__slots__ + + def has_key(self, key): + return (key in self) + + def items(self): + return [(key, self[key]) for key in self.__slots__] + + def iteritems(self): + return iter(self.items()) + + def iterkeys(self): + return self.__iter__() + + def itervalues(self): + return iter(self.items()) + + def __str__(self): + return '{ ' + ', '.join([key + ': ' + str(self[key]) for key in self.__slots__]) + ' }' + + +class Metadata(NamedDict): + __slots__ = ('name', 'title', 'asserts', 'credits', 'reviewers', 'flags', 'links', 'references', 'revision', 'selftest', 'scripttest') + + def __init__(self, name = None, title = None, asserts = [], credits = [], reviewers = [], flags = [], links = [], + references = [], revision = None, selftest = True, scripttest = False): + self.name = name + self.title = title + self.asserts = asserts + self.credits = credits + self.reviewers = reviewers + self.flags = flags + self.links = links + self.references = references + self.revision = revision + self.selftest = selftest + self.scripttest = scripttest + + def __getitem__(self, key): + if ('name' == key): + return self.name + if ('title' == key): + return self.title + if ('asserts' == key): + return self.asserts + if ('credits' == key): + return self.credits + if ('reviewers' == key): + return self.reviewers + if ('flags' == key): + return self.flags + if ('links' == key): + return self.links + if ('references' == key): + return self.references + if ('revision' == key): + return self.revision + if ('selftest' == key): + return self.selftest + if ('scripttest' == key): + return self.scripttest + return None + + def __setitem__(self, key, value): + if ('name' == key): + self.name = value + elif ('title' == key): + self.title = value + elif ('asserts' == key): + self.asserts = value + elif ('credits' == key): + self.credits = value + elif ('reviewers' == key): + self.reviewers = value + elif ('flags' == key): + self.flags = value + elif ('links' == key): + self.links = value + elif ('references' == key): + self.references = value + elif ('revision' == key): + self.revision = value + elif ('selftest' == key): + self.selftest = value + elif ('scripttest' == key): + self.scripttest = value + else: + raise KeyError() + + +class ReferenceData(NamedDict): + __slots__ = ('name', 'type', 'relpath', 'repopath') + + def __init__(self, name = None, type = None, relpath = None, repopath = None): + self.name = name + self.type = type + self.relpath = relpath + self.repopath = repopath + + def __getitem__(self, key): + if ('name' == key): + return self.name + if ('type' == key): + return self.type + if ('relpath' == key): + return self.relpath + if ('repopath' == key): + return self.repopath + return None + + def __setitem__(self, key, value): + if ('name' == key): + self.name = value + elif ('type' == key): + self.type = value + elif ('relpath' == key): + self.relpath = value + elif ('repopath' == key): + self.repopath = value + else: + raise KeyError() + +UserData = collections.namedtuple('UserData', ('name', 'link')) + +class LineString(str): + def __new__(cls, value, line): + self = str.__new__(cls, value) + self.line = line + return self + + def lineValue(self): + return 'Line ' + str(self.line) + ': ' + str.__str__(self) if (self.line) else str.__str__(self) + + +class FileSource: + """Object representing a file. Two FileSources are equal if they represent + the same file contents. It is recommended to use a SourceCache to generate + FileSources. + """ + + def __init__(self, sourceTree, sourcepath, relpath, mimetype = None, data = None): + """Init FileSource from source path. Give it relative path relpath. + + `mimetype` should be the canonical MIME type for the file, if known. + If `mimetype` is None, guess type from file extension, defaulting to + the None key's value in extensionMap. + + `data` if provided, is a the contents of the file. Otherwise the file is read + from disk. + """ + self.sourceTree = sourceTree + self.sourcepath = sourcepath + self.relpath = relpath + self.mimetype = mimetype or getMimeFromExt(sourcepath) + self._data = data + self.errors = None + self.encoding = 'utf-8' + self.refs = {} + self.scripts = {} + self.metadata = None + self.metaSource = None + + def __eq__(self, other): + if not isinstance(other, FileSource): + return False + return self.sourcepath == other.sourcepath or \ + filecmp.cmp(self.sourcepath, other.sourcepath) + + def __ne__(self, other): + return not self == other + + def __cmp__(self, other): + return cmp(self.name(), other.name()) + + def name(self): + return self.sourceTree.getAssetName(self.sourcepath) + + def keyName(self): + if ('support' == self.type()): + return os.path.relpath(self.relpath, 'support') + return self.name() + + def type(self): + return self.sourceTree.getAssetType(self.sourcepath) + + def relativeURL(self, other): + return relativeURL(self.relpath, other.relpath) + + def data(self): + """Return file contents as a byte string.""" + if (self._data is None): + with open(self.sourcepath, 'r') as f: + self._data = f.read() + if (self._data.startswith(codecs.BOM_UTF8)): + self.encoding = 'utf-8-sig' # XXX look for other unicode BOMs + return self._data + + def unicode(self): + try: + return self.data().decode(self.encoding) + except UnicodeDecodeError: + return None + + def parse(self): + """Parses and validates FileSource data from sourcepath.""" + self.loadMetadata() + + def validate(self): + """Ensure data is loaded from sourcepath.""" + self.parse() + + def adjustContentPaths(self, format): + """Adjust any paths in file content for output format + XXX need to account for group paths""" + if (self.refs): + seenRefs = {} + seenRefs[self.sourcepath] = '==' + def adjustReferences(source): + newRefs = {} + for refName in source.refs: + refType, refPath, refNode, refSource = source.refs[refName] + if refSource: + refPath = relativeURL(format.dest(self.relpath), format.dest(refSource.relpath)) + if (refSource.sourcepath not in seenRefs): + seenRefs[refSource.sourcepath] = refType + adjustReferences(refSource) + else: + refPath = relativeURL(format.dest(self.relpath), format.dest(refPath)) + if (refPath != refNode.get('href')): + refNode.set('href', refPath) + newRefs[refName] = (refType, refPath, refNode, refSource) # update path in metadata + source.refs = newRefs + adjustReferences(self) + + if (self.scripts): # force testharness.js scripts to absolute path + for src in self.scripts: + if (src.endswith('/resources/testharness.js')): # accept relative paths to testharness.js + scriptNode = self.scripts[src] + scriptNode.set('src', '/resources/testharness.js') + elif (src.endswith('/resources/testharnessreport.js')): + scriptNode = self.scripts[src] + scriptNode.set('src', '/resources/testharnessreport.js') + + + def write(self, format): + """Writes FileSource.data() out to `self.relpath` through Format `format`.""" + data = self.data() + with open(format.dest(self.relpath), 'w') as f: + f.write(data) + if (self.metaSource): + self.metaSource.write(format) # XXX need to get output path from format, but not let it choose actual format + + def compact(self): + """Clears all cached data, preserves computed data.""" + pass + + def revision(self): + """Returns hash of the contents of this file and any related file, references, support files, etc. + XXX also needs to account for .meta file + """ + sha = hashlib.sha1() + sha.update(self.data()) + seenRefs = set(self.sourcepath) + def hashReference(source): + for refName in source.refs: + refSource = source.refs[refName][3] + if (refSource and (refSource.sourcepath not in seenRefs)): + sha.update(refSource.data()) + seenRefs.add(refSource.sourcepath) + hashReference(refSource) + hashReference(self) + return sha.hexdigest() + + def loadMetadata(self): + """Look for .meta file and load any metadata from it if present + """ + pass + + def augmentMetadata(self, next=None, prev=None, reference=None, notReference=None): + if (self.metaSource): + return self.metaSource.augmentMetadata(next, prev, reference, notReference) + return None + + # See http://wiki.csswg.org/test/css2.1/format for more info on metadata + def getMetadata(self, asUnicode = False): + """Return dictionary of test metadata. Stores list of errors + in self.errors if there are parse or metadata errors. + Data fields include: + - asserts [list of strings] + - credits [list of (name string, url string) tuples] + - reviewers [ list of (name string, url string) tuples] + - flags [list of token strings] + - links [list of url strings] + - name [string] + - title [string] + - references [list of ReferenceData per reference; None if not reftest] + - revision [revision id of last commit] + - selftest [bool] + - scripttest [bool] + Strings are given in ascii unless asUnicode==True. + """ + + self.validate() + + def encode(str): + return str if (hasattr(str, 'line')) else intern(str.encode('utf-8')) + + def escape(str, andIntern = True): + return str.encode('utf-8') if asUnicode else intern(escapeToNamedASCII(str)) if andIntern else escapeToNamedASCII(str) + + def listReferences(source, seen): + refGroups = [] + for refType, refRelPath, refNode, refSource in source.refs.values(): + if ('==' == refType): + if (refSource): + refSourcePath = refSource.sourcepath + else: + refSourcePath = os.path.normpath(join(basepath(source.sourcepath), refRelPath)) + if (refSourcePath in seen): + continue + seen.add(refSourcePath) + if (refSource): + sourceData = ReferenceData(name = self.sourceTree.getAssetName(refSourcePath), type = refType, + relpath = refRelPath, repopath = refSourcePath) + if (refSource.refs): + subRefLists = listReferences(refSource, seen.copy()) + if (subRefLists): + for subRefList in subRefLists: + refGroups.append([sourceData] + subRefList) + else: + refGroups.append([sourceData]) + else: + refGroups.append([sourceData]) + else: + sourceData = ReferenceData(name = self.sourceTree.getAssetName(refSourcePath), type = refType, + relpath = relativeURL(self.sourcepath, refSourcePath), + repopath = refSourcePath) + refGroups.append([sourceData]) + notRefs = {} + for refType, refRelPath, refNode, refSource in source.refs.values(): + if ('!=' == refType): + if (refSource): + refSourcePath = refSource.sourcepath + else: + refSourcePath = os.path.normpath(join(basepath(source.sourcepath), refRelPath)) + if (refSourcePath in seen): + continue + seen.add(refSourcePath) + if (refSource): + sourceData = ReferenceData(name = self.sourceTree.getAssetName(refSourcePath), type = refType, + relpath = refRelPath, repopath = refSourcePath) + notRefs[sourceData.name] = sourceData + if (refSource.refs): + for subRefList in listReferences(refSource, seen): + for subRefData in subRefList: + notRefs[subRefData.name] = subRefData + else: + sourceData = ReferenceData(name = self.sourceTree.getAssetName(refSourcePath), type = refType, + relpath = relativeURL(self.sourcepath, refSourcePath), + repopath = refSourcePath) + notRefs[sourceData.name] = sourceData + if (notRefs): + for refData in notRefs.values(): + refData.type = '!=' + if (refGroups): + for refGroup in refGroups: + for notRef in notRefs.values(): + for ref in refGroup: + if (ref.name == notRef.name): + break + else: + refGroup.append(notRef) + else: + refGroups.append(notRefs.values()) + return refGroups + + references = listReferences(self, set([self.sourcepath])) if (self.refs) else None + + if (self.metadata): + data = Metadata( + name = encode(self.name()), + title = escape(self.metadata['title'], False), + asserts = [escape(assertion, False) for assertion in self.metadata['asserts']], + credits = [UserData(escape(name), encode(link)) for name, link in self.metadata['credits']], + reviewers = [UserData(escape(name), encode(link)) for name, link in self.metadata['reviewers']], + flags = [encode(flag) for flag in self.metadata['flags']], + links = [encode(link) for link in self.metadata['links']], + references = references, + revision = self.revision(), + selftest = self.isSelftest(), + scripttest = self.isScripttest() + ) + return data + return None + + def addReference(self, referenceSource, match = None): + """Add reference source.""" + self.validate() + refName = referenceSource.name() + refPath = self.relativeURL(referenceSource) + if refName not in self.refs: + node = None + if match == '==': + node = self.augmentMetadata(reference=referenceSource).reference + elif match == '!=': + node = self.augmentMetadata(notReference=referenceSource).notReference + self.refs[refName] = (match, refPath, node, referenceSource) + else: + node = self.refs[refName][2] + node.set('href', refPath) + if (match): + node.set('rel', 'mismatch' if ('!=' == match) else 'match') + else: + match = self.refs[refName][0] + self.refs[refName] = (match, refPath, node, referenceSource) + + def getReferencePaths(self): + """Get list of paths to references as tuple(path, relPath, refType).""" + self.validate() + return [(os.path.join(os.path.dirname(self.sourcepath), ref[1]), + os.path.join(os.path.dirname(self.relpath), ref[1]), + ref[0]) + for ref in self.refs.values()] + + def isTest(self): + self.validate() + return bool(self.metadata) and bool(self.metadata.get('links')) + + def isReftest(self): + return self.isTest() and bool(self.refs) + + def isSelftest(self): + return self.isTest() and (not bool(self.refs)) + + def isScripttest(self): + if (self.isTest() and self.scripts): + for src in self.scripts: + if (src.endswith('/resources/testharness.js')): # accept relative paths to testharness.js + return True + return False + + def hasFlag(self, flag): + data = self.getMetadata() + if data: + return flag in data['flags'] + return False + + + +class ConfigSource(FileSource): + """Object representing a text-based configuration file. + Capable of merging multiple config-file contents. + """ + + def __init__(self, sourceTree, sourcepath, relpath, mimetype = None, data = None): + """Init ConfigSource from source path. Give it relative path relpath. + """ + FileSource.__init__(self, sourceTree, sourcepath, relpath, mimetype, data) + self.sourcepath = [sourcepath] + + def __eq__(self, other): + if not isinstance(other, ConfigSource): + return False + if self is other or self.sourcepath == other.sourcepath: + return True + if len(self.sourcepath) != len(other.sourcepath): + return False + for this, that in zip(self.sourcepath, other.sourcepath): + if not filecmp.cmp(this, that): + return False + return True + + def __ne__(self, other): + return not self == other + + def name(self): + return '.htaccess' + + def type(self): + return intern('support') + + def data(self): + """Merge contents of all config files represented by this source.""" + data = '' + for src in self.sourcepath: + with open(src) as f: + data += f.read() + data += '\n' + return data + + def getMetadata(self, asUnicode = False): + return None + + def append(self, other): + """Appends contents of ConfigSource `other` to this source. + Asserts if self.relpath != other.relpath. + """ + assert isinstance(other, ConfigSource) + assert self != other and self.relpath == other.relpath + self.sourcepath.extend(other.sourcepath) + +class ReftestFilepathError(Exception): + pass + +class ReftestManifest(ConfigSource): + """Object representing a reftest manifest file. + Iterating the ReftestManifest returns (testpath, refpath) tuples + with paths relative to the manifest. + """ + def __init__(self, sourceTree, sourcepath, relpath, data = None): + """Init ReftestManifest from source path. Give it relative path `relpath` + and load its .htaccess file. + """ + ConfigSource.__init__(self, sourceTree, sourcepath, relpath, mimetype = 'config/reftest', data = data) + + def basepath(self): + """Returns the base relpath of this reftest manifest path, i.e. + the parent of the manifest file. + """ + return basepath(self.relpath) + + baseRE = re.compile(r'^#\s*relstrip\s+(\S+)\s*') + stripRE = re.compile(r'#.*') + parseRE = re.compile(r'^\s*([=!]=)\s*(\S+)\s+(\S+)') + + def __iter__(self): + """Parse the reftest manifest files represented by this ReftestManifest + and return path information about each reftest pair as + ((test-sourcepath, ref-sourcepath), (test-relpath, ref-relpath), reftype) + Raises a ReftestFilepathError if any sources file do not exist or + if any relpaths point higher than the relpath root. + """ + striplist = [] + for src in self.sourcepath: + relbase = basepath(self.relpath) + srcbase = basepath(src) + with open(src) as f: + for line in f: + strip = self.baseRE.search(line) + if strip: + striplist.append(strip.group(1)) + line = self.stripRE.sub('', line) + m = self.parseRE.search(line) + if m: + record = ((join(srcbase, m.group(2)), join(srcbase, m.group(3))), \ + (join(relbase, m.group(2)), join(relbase, m.group(3))), \ + m.group(1)) + # for strip in striplist: + # strip relrecord + if not exists(record[0][0]): + raise ReftestFilepathError("Manifest Error in %s: " + "Reftest test file %s does not exist." \ + % (src, record[0][0])) + elif not exists(record[0][1]): + raise ReftestFilepathError("Manifest Error in %s: " + "Reftest reference file %s does not exist." \ + % (src, record[0][1])) + elif not isPathInsideBase(record[1][0]): + raise ReftestFilepathError("Manifest Error in %s: " + "Reftest test replath %s not within relpath root." \ + % (src, record[1][0])) + elif not isPathInsideBase(record[1][1]): + raise ReftestFilepathError("Manifest Error in %s: " + "Reftest test replath %s not within relpath root." \ + % (src, record[1][1])) + yield record + +import Utils # set up XML catalog +xhtmlns = '{http://www.w3.org/1999/xhtml}' +svgns = '{http://www.w3.org/2000/svg}' +xmlns = '{http://www.w3.org/XML/1998/namespace}' +xlinkns = '{http://www.w3.org/1999/xlink}' + +class XMLSource(FileSource): + """FileSource object with support reading XML trees.""" + + NodeTuple = collections.namedtuple('NodeTuple', ['next', 'prev', 'reference', 'notReference']) + + # Public Data + syntaxErrorDoc = \ + u""" + <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> + <html xmlns="http://www.w3.org/1999/xhtml"> + <head><title>Syntax Error</title></head> + <body> + <p>The XML file <![CDATA[%s]]> contains a syntax error and could not be parsed. + Please correct it and try again.</p> + <p>The parser's error report was:</p> + <pre><![CDATA[%s]]></pre> + </body> + </html> + """ + + # Private Data and Methods + __parser = etree.XMLParser(no_network=True, + # perf nightmare dtd_validation=True, + remove_comments=False, + strip_cdata=False, + resolve_entities=False) + + # Public Methods + + def __init__(self, sourceTree, sourcepath, relpath, data = None): + """Initialize XMLSource by loading from XML file `sourcepath`. + Parse errors are reported in `self.errors`, + and the source is replaced with an XHTML error message. + """ + FileSource.__init__(self, sourceTree, sourcepath, relpath, data = data) + self.tree = None + self.injectedTags = {} + + def cacheAsParseError(self, filename, e): + """Replace document with an error message.""" + errorDoc = self.syntaxErrorDoc % (filename, e) + from StringIO import StringIO + self.tree = etree.parse(StringIO(errorDoc), parser=self.__parser) + + def parse(self): + """Parse file and store any parse errors in self.errors""" + self.errors = None + try: + data = self.data() + if (data): + self.tree = etree.parse(StringReader(data), parser=self.__parser) + self.encoding = self.tree.docinfo.encoding or 'utf-8' + self.injectedTags = {} + else: + self.tree = None + self.errors = ['Empty source file'] + self.encoding = 'utf-8' + + FileSource.loadMetadata(self) + if ((not self.metadata) and self.tree and (not self.errors)): + self.extractMetadata(self.tree) + except etree.ParseError as e: + print("PARSE ERROR: " + self.sourcepath) + self.cacheAsParseError(self.sourcepath, e) + e.W3CTestLibErrorLocation = self.sourcepath + self.errors = [str(e)] + self.encoding = 'utf-8' + + def validate(self): + """Parse file if not parsed, and store any parse errors in self.errors""" + if self.tree is None: + self.parse() + + def getMeatdataContainer(self): + return self.tree.getroot().find(xhtmlns+'head') + + def injectMetadataLink(self, rel, href, tagCode = None): + """Inject (prepend) <link> with data given inside metadata container. + Injected element is tagged with `tagCode`, which can be + used to clear it with clearInjectedTags later. + """ + self.validate() + container = self.getMeatdataContainer() + if (container): + node = etree.Element(xhtmlns+'link', {'rel': rel, 'href': href}) + node.tail = container.text + container.insert(0, node) + self.injectedTags[node] = tagCode or True + return node + return None + + def clearInjectedTags(self, tagCode = None): + """Clears all injected elements from the tree, or clears injected + elements tagged with `tagCode` if `tagCode` is given. + """ + if not self.injectedTags or not self.tree: return + for node in self.injectedTags: + node.getparent().remove(node) + del self.injectedTags[node] + + def serializeXML(self): + self.validate() + return etree.tounicode(self.tree) + + def data(self): + if ((not self.tree) or (self.metaSource)): + return FileSource.data(self) + return self.serializeXML().encode(self.encoding, 'xmlcharrefreplace') + + def unicode(self): + if ((not self.tree) or (self.metaSource)): + return FileSource.unicode(self) + return self.serializeXML() + + def write(self, format, output=None): + """Write Source through OutputFormat `format`. + Write contents as string `output` instead if specified. + """ + if not output: + output = self.unicode() + + # write + with open(format.dest(self.relpath), 'w') as f: + f.write(output.encode(self.encoding, 'xmlcharrefreplace')) + + def compact(self): + self.tree = None + + def getMetadataElements(self, tree): + container = self.getMeatdataContainer() + if (None != container): + return [node for node in container] + return None + + def extractMetadata(self, tree): + """Extract metadata from tree.""" + links = []; credits = []; reviewers = []; flags = []; asserts = []; title = '' + + def tokenMatch(token, string): + return bool(re.search('(^|\s+)%s($|\s+)' % token, string)) if (string) else False + + errors = [] + readFlags = False + metaElements = self.getMetadataElements(tree) + if (not metaElements): + errors.append("Missing <head> element") + else: + # Scan and cache metadata + for node in metaElements: + if (node.tag == xhtmlns+'link'): + # help links + if tokenMatch('help', node.get('rel')): + link = node.get('href').strip() if node.get('href') else None + if (not link): + errors.append(LineString("Help link missing href value.", node.sourceline)) + elif (not (link.startswith('http://') or link.startswith('https://'))): + errors.append(LineString("Help link " + link.encode('utf-8') + " must be absolute URL.", node.sourceline)) + elif (link in links): + errors.append(LineString("Duplicate help link " + link.encode('utf-8') + ".", node.sourceline)) + else: + links.append(LineString(link, node.sourceline)) + # == references + elif tokenMatch('match', node.get('rel')) or tokenMatch('reference', node.get('rel')): + refPath = node.get('href').strip() if node.get('href') else None + if (not refPath): + errors.append(LineString("Reference link missing href value.", node.sourceline)) + else: + refName = self.sourceTree.getAssetName(join(self.sourcepath, refPath)) + if (refName in self.refs): + errors.append(LineString("Reference " + refName.encode('utf-8') + " already specified.", node.sourceline)) + else: + self.refs[refName] = ('==', refPath, node, None) + # != references + elif tokenMatch('mismatch', node.get('rel')) or tokenMatch('not-reference', node.get('rel')): + refPath = node.get('href').strip() if node.get('href') else None + if (not refPath): + errors.append(LineString("Reference link missing href value.", node.sourceline)) + else: + refName = self.sourceTree.getAssetName(join(self.sourcepath, refPath)) + if (refName in self.refs): + errors.append(LineString("Reference " + refName.encode('utf-8') + " already specified.", node.sourceline)) + else: + self.refs[refName] = ('!=', refPath, node, None) + else: # may have both author and reviewer in the same link + # credits + if tokenMatch('author', node.get('rel')): + name = node.get('title') + name = name.strip() if name else name + if (not name): + errors.append(LineString("Author link missing name (title attribute).", node.sourceline)) + else: + link = node.get('href').strip() if node.get('href') else None + if (not link): + errors.append(LineString("Author link for \"" + name.encode('utf-8') + "\" missing contact URL (http or mailto).", node.sourceline)) + else: + credits.append((name, link)) + # reviewers + if tokenMatch('reviewer', node.get('rel')): + name = node.get('title') + name = name.strip() if name else name + if (not name): + errors.append(LineString("Reviewer link missing name (title attribute).", node.sourceline)) + else: + link = node.get('href').strip() if node.get('href') else None + if (not link): + errors.append(LineString("Reviewer link for \"" + name.encode('utf-8') + "\" missing contact URL (http or mailto).", node.sourceline)) + else: + reviewers.append((name, link)) + elif (node.tag == xhtmlns+'meta'): + metatype = node.get('name') + metatype = metatype.strip() if metatype else metatype + # requirement flags + if ('flags' == metatype): + if (readFlags): + errors.append(LineString("Flags must only be specified once.", node.sourceline)) + else: + readFlags = True + if (None == node.get('content')): + errors.append(LineString("Flags meta missing content attribute.", node.sourceline)) + else: + for flag in sorted(node.get('content').split()): + flags.append(flag) + # test assertions + elif ('assert' == metatype): + if (None == node.get('content')): + errors.append(LineString("Assert meta missing content attribute.", node.sourceline)) + else: + asserts.append(node.get('content').strip().replace('\t', ' ')) + # title + elif (node.tag == xhtmlns+'title'): + title = node.text.strip() if node.text else '' + match = re.match('(?:[^:]*)[tT]est(?:[^:]*):(.*)', title, re.DOTALL) + if (match): + title = match.group(1) + title = title.strip() + # script + elif (node.tag == xhtmlns+'script'): + src = node.get('src').strip() if node.get('src') else None + if (src): + self.scripts[src] = node + + if (asserts or credits or reviewers or flags or links or title): + self.metadata = {'asserts' : asserts, + 'credits' : credits, + 'reviewers' : reviewers, + 'flags' : flags, + 'links' : links, + 'title' : title + } + + if (errors): + if (self.errors): + self.errors += errors + else: + self.errors = errors + + + def augmentMetadata(self, next=None, prev=None, reference=None, notReference=None): + """Add extra useful metadata to the head. All arguments are optional. + * Adds next/prev links to next/prev Sources given + * Adds reference link to reference Source given + """ + self.validate() + if next: + next = self.injectMetadataLink('next', self.relativeURL(next), 'next') + if prev: + prev = self.injectMetadataLink('prev', self.relativeURL(prev), 'prev') + if reference: + reference = self.injectMetadataLink('match', self.relativeURL(reference), 'ref') + if notReference: + notReference = self.injectMetadataLink('mismatch', self.relativeURL(notReference), 'not-ref') + return self.NodeTuple(next, prev, reference, notReference) + + +class XHTMLSource(XMLSource): + """FileSource object with support for XHTML->HTML conversions.""" + + # Public Methods + + def __init__(self, sourceTree, sourcepath, relpath, data = None): + """Initialize XHTMLSource by loading from XHTML file `sourcepath`. + Parse errors are stored in `self.errors`, + and the source is replaced with an XHTML error message. + """ + XMLSource.__init__(self, sourceTree, sourcepath, relpath, data = data) + + def serializeXHTML(self, doctype = None): + return self.serializeXML() + + def serializeHTML(self, doctype = None): + self.validate() + # Serialize +# print self.relpath + serializer = HTMLSerializer.HTMLSerializer() + output = serializer.serializeHTML(self.tree, doctype) + return output + + +class SVGSource(XMLSource): + """FileSource object with support for extracting metadata from SVG.""" + + def __init__(self, sourceTree, sourcepath, relpath, data = None): + """Initialize SVGSource by loading from SVG file `sourcepath`. + Parse errors are stored in `self.errors`, + and the source is replaced with an XHTML error message. + """ + XMLSource.__init__(self, sourceTree, sourcepath, relpath, data = data) + + def getMeatdataContainer(self): + groups = self.tree.getroot().findall(svgns+'g') + for group in groups: + if ('testmeta' == group.get('id')): + return group + return None + + def extractMetadata(self, tree): + """Extract metadata from tree.""" + links = []; credits = []; reviewers = []; flags = []; asserts = []; title = '' + + def tokenMatch(token, string): + return bool(re.search('(^|\s+)%s($|\s+)' % token, string)) if (string) else False + + errors = [] + readFlags = False + metaElements = self.getMetadataElements(tree) + if (not metaElements): + errors.append("Missing <g id='testmeta'> element") + else: + # Scan and cache metadata + for node in metaElements: + if (node.tag == xhtmlns+'link'): + # help links + if tokenMatch('help', node.get('rel')): + link = node.get('href').strip() if node.get('href') else None + if (not link): + errors.append(LineString("Help link missing href value.", node.sourceline)) + elif (not (link.startswith('http://') or link.startswith('https://'))): + errors.append(LineString("Help link " + link.encode('utf-8') + " must be absolute URL.", node.sourceline)) + elif (link in links): + errors.append(LineString("Duplicate help link " + link.encode('utf-8') + ".", node.sourceline)) + else: + links.append(LineString(link, node.sourceline)) + # == references + elif tokenMatch('match', node.get('rel')) or tokenMatch('reference', node.get('rel')): + refPath = node.get('href').strip() if node.get('href') else None + if (not refPath): + errors.append(LineString("Reference link missing href value.", node.sourceline)) + else: + refName = self.sourceTree.getAssetName(join(self.sourcepath, refPath)) + if (refName in self.refs): + errors.append(LineString("Reference " + refName.encode('utf-8') + " already specified.", node.sourceline)) + else: + self.refs[refName] = ('==', refPath, node, None) + # != references + elif tokenMatch('mismatch', node.get('rel')) or tokenMatch('not-reference', node.get('rel')): + refPath = node.get('href').strip() if node.get('href') else None + if (not refPath): + errors.append(LineString("Reference link missing href value.", node.sourceline)) + else: + refName = self.sourceTree.getAssetName(join(self.sourcepath, refPath)) + if (refName in self.refs): + errors.append(LineString("Reference " + refName.encode('utf-8') + " already specified.", node.sourceline)) + else: + self.refs[refName] = ('!=', refPath, node, None) + else: # may have both author and reviewer in the same link + # credits + if tokenMatch('author', node.get('rel')): + name = node.get('title') + name = name.strip() if name else name + if (not name): + errors.append(LineString("Author link missing name (title attribute).", node.sourceline)) + else: + link = node.get('href').strip() if node.get('href') else None + if (not link): + errors.append(LineString("Author link for \"" + name.encode('utf-8') + "\" missing contact URL (http or mailto).", node.sourceline)) + else: + credits.append((name, link)) + # reviewers + if tokenMatch('reviewer', node.get('rel')): + name = node.get('title') + name = name.strip() if name else name + if (not name): + errors.append(LineString("Reviewer link missing name (title attribute).", node.sourceline)) + else: + link = node.get('href').strip() if node.get('href') else None + if (not link): + errors.append(LineString("Reviewer link for \"" + name.encode('utf-8') + "\" missing contact URL (http or mailto).", node.sourceline)) + else: + reviewers.append((name, link)) + elif (node.tag == svgns+'metadata'): + metatype = node.get('class') + metatype = metatype.strip() if metatype else metatype + # requirement flags + if ('flags' == metatype): + if (readFlags): + errors.append(LineString("Flags must only be specified once.", node.sourceline)) + else: + readFlags = True + text = node.find(svgns+'text') + flagString = text.text if (text) else node.text + if (flagString): + for flag in sorted(flagString.split()): + flags.append(flag) + elif (node.tag == svgns+'desc'): + metatype = node.get('class') + metatype = metatype.strip() if metatype else metatype + # test assertions + if ('assert' == metatype): + asserts.append(node.text.strip().replace('\t', ' ')) + # test title + elif node.tag == svgns+'title': + title = node.text.strip() if node.text else '' + match = re.match('(?:[^:]*)[tT]est(?:[^:]*):(.*)', title, re.DOTALL) + if (match): + title = match.group(1) + title = title.strip() + # script tag (XXX restricted to metadata container?) + elif (node.tag == svgns+'script'): + src = node.get('src').strip() if node.get('src') else None + if (src): + self.scripts[src] = node + + if (asserts or credits or reviewers or flags or links or title): + self.metadata = {'asserts' : asserts, + 'credits' : credits, + 'reviewers' : reviewers, + 'flags' : flags, + 'links' : links, + 'title' : title + } + if (errors): + if (self.errors): + self.errors += errors + else: + self.errors = errors + + + +class HTMLSource(XMLSource): + """FileSource object with support for HTML metadata and HTML->XHTML conversions (untested).""" + + # Private Data and Methods + __parser = html5lib.HTMLParser(tree = treebuilders.getTreeBuilder('lxml')) + + # Public Methods + + def __init__(self, sourceTree, sourcepath, relpath, data = None): + """Initialize HTMLSource by loading from HTML file `sourcepath`. + """ + XMLSource.__init__(self, sourceTree, sourcepath, relpath, data = data) + + def parse(self): + """Parse file and store any parse errors in self.errors""" + self.errors = None + try: + data = self.data() + if data: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + self.tree = self.__parser.parse(data) + self.encoding = self.__parser.documentEncoding + self.injectedTags = {} + else: + self.tree = None + self.errors = ['Empty source file'] + self.encoding = 'utf-8' + + FileSource.loadMetadata(self) + if ((not self.metadata) and self.tree and (not self.errors)): + self.extractMetadata(self.tree) + except Exception as e: + print("PARSE ERROR: " + self.sourcepath) + e.W3CTestLibErrorLocation = self.sourcepath + self.errors = [str(e)] + self.encoding = 'utf-8' + + def _injectXLinks(self, element, nodeList): + injected = False + + xlinkAttrs = ['href', 'type', 'role', 'arcrole', 'title', 'show', 'actuate'] + if (element.get('href') or element.get(xlinkns + 'href')): + for attr in xlinkAttrs: + if (element.get(xlinkns + attr)): + injected = True + if (element.get(attr)): + injected = True + value = element.get(attr) + del element.attrib[attr] + element.set(xlinkns + attr, value) + nodeList.append((element, xlinkns + attr, attr)) + + for child in element: + if (type(child.tag) == type('')): # element node + qName = etree.QName(child.tag) + if ('foreignobject' != qName.localname.lower()): + injected |= self._injectXLinks(child, nodeList) + return injected + + + def _findElements(self, namespace, elementName): + elements = self.tree.findall('.//{' + namespace + '}' + elementName) + if (self.tree.getroot().tag == '{' + namespace + '}' + elementName): + elements.insert(0, self.tree.getroot()) + return elements + + def _injectNamespace(self, elementName, prefix, namespace, doXLinks, nodeList): + attr = xmlns + prefix if (prefix) else 'xmlns' + elements = self._findElements(namespace, elementName) + for element in elements: + if not element.get(attr): + element.set(attr, namespace) + nodeList.append((element, attr, None)) + if (doXLinks): + if (self._injectXLinks(element, nodeList)): + element.set(xmlns + 'xlink', 'http://www.w3.org/1999/xlink') + nodeList.append((element, xmlns + 'xlink', None)) + + def injectNamespaces(self): + nodeList = [] + self._injectNamespace('html', None, 'http://www.w3.org/1999/xhtml', False, nodeList) + self._injectNamespace('svg', None, 'http://www.w3.org/2000/svg', True, nodeList) + self._injectNamespace('math', None, 'http://www.w3.org/1998/Math/MathML', True, nodeList) + return nodeList + + def removeNamespaces(self, nodeList): + if nodeList: + for element, attr, oldAttr in nodeList: + if (oldAttr): + value = element.get(attr) + del element.attrib[attr] + element.set(oldAttr, value) + else: + del element.attrib[attr] + + def serializeXHTML(self, doctype = None): + self.validate() + # Serialize + nodeList = self.injectNamespaces() +# print self.relpath + serializer = HTMLSerializer.HTMLSerializer() + o = serializer.serializeXHTML(self.tree, doctype) + + self.removeNamespaces(nodeList) + return o + + def serializeHTML(self, doctype = None): + self.validate() + # Serialize +# print self.relpath + serializer = HTMLSerializer.HTMLSerializer() + o = serializer.serializeHTML(self.tree, doctype) + + return o + + def data(self): + if ((not self.tree) or (self.metaSource)): + return FileSource.data(self) + return self.serializeHTML().encode(self.encoding, 'xmlcharrefreplace') + + def unicode(self): + if ((not self.tree) or (self.metaSource)): + return FileSource.unicode(self) + return self.serializeHTML() + |