Adding upstream version 110.0.1.upstream/110.0.1 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 09:22:09 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 09:22:09 +0000
commit: 43a97878ce14b72f0981164f87f2e35e14151312 (patch)
tree: 620249daf56c0258faa40cbdcf9cfba06de2a846 /testing/web-platform/tests/css/tools/w3ctestlib/Sources.py
parent: Initial commit. (diff)
download: firefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz
firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip
1 files changed, 1473 insertions, 0 deletions
diff --git a/testing/web-platform/tests/css/tools/w3ctestlib/Sources.py b/testing/web-platform/tests/css/tools/w3ctestlib/Sources.py
new file mode 100644
index 0000000000..f3848030ba
--- /dev/null
+++ b/testing/web-platform/tests/css/tools/w3ctestlib/Sources.py
@@ -0,0 +1,1473 @@
+#!/usr/bin/python
+# CSS Test Source Manipulation Library
+# Initial code by fantasai, joint copyright 2010 W3C and Microsoft
+# Licensed under BSD 3-Clause: <http://www.w3.org/Consortium/Legal/2008/03-bsd-license>
+
+from __future__ import print_function
+from os.path import basename, exists, join
+import os
+import filecmp
+import shutil
+import re
+import codecs
+import collections
+from xml import dom
+import html5lib
+from html5lib import treebuilders
+from lxml import etree
+from lxml.etree import ParseError
+from Utils import getMimeFromExt, escapeToNamedASCII, basepath, isPathInsideBase, relativeURL, assetName
+import HTMLSerializer
+import warnings
+import hashlib
+
+class SourceTree(object):
+  """Class that manages structure of test repository source.
+     Temporarily hard-coded path and filename rules, this should be configurable.
+  """
+
+  def __init__(self, repository = None):
+    self.mTestExtensions = ['.xht', '.html', '.xhtml', '.htm', '.xml', '.svg']
+    self.mReferenceExtensions = ['.xht', '.html', '.xhtml', '.htm', '.xml', '.png', '.svg']
+    self.mRepository = repository
+
+  def _splitDirs(self, dir):
+    if ('' == dir):
+      pathList = []
+    elif ('/' in dir):
+      pathList = dir.split('/')
+    else:
+      pathList = dir.split(os.path.sep)
+    return pathList
+
+  def _splitPath(self, filePath):
+    """split a path into a list of directory names and the file name
+       paths may come form the os or mercurial, which always uses '/' as the
+       directory separator
+    """
+    dir, fileName = os.path.split(filePath.lower())
+    return (self._splitDirs(dir), fileName)
+
+  def isTracked(self, filePath):
+    pathList, fileName = self._splitPath(filePath)
+    return (not self._isIgnored(pathList, fileName))
+
+  def _isApprovedPath(self, pathList):
+    return ((1 < len(pathList)) and ('approved' == pathList[0]) and (('support' == pathList[1]) or ('src' in pathList)))
+
+  def isApprovedPath(self, filePath):
+    pathList, fileName = self._splitPath(filePath)
+    return (not self._isIgnored(pathList, fileName)) and self._isApprovedPath(pathList)
+
+  def _isIgnoredPath(self, pathList):
+      return (('.hg' in pathList) or ('.git' in pathList) or
+              ('.svn' in pathList) or ('cvs' in pathList) or
+              ('incoming' in pathList) or ('work-in-progress' in pathList) or
+              ('data' in pathList) or ('archive' in pathList) or
+              ('reports' in pathList) or ('tools' == pathList[0]) or
+              ('test-plan' in pathList) or ('test-plans' in pathList))
+
+  def _isIgnored(self, pathList, fileName):
+    if (pathList):  # ignore files in root
+      return (self._isIgnoredPath(pathList) or
+              fileName.startswith('.directory') or ('lock' == fileName) or
+              ('.ds_store' == fileName) or
+              fileName.startswith('.hg') or fileName.startswith('.git') or
+              ('sections.dat' == fileName) or ('get-spec-sections.pl' == fileName))
+    return True
+
+  def isIgnored(self, filePath):
+    pathList, fileName = self._splitPath(filePath)
+    return self._isIgnored(pathList, fileName)
+
+  def isIgnoredDir(self, dir):
+    pathList = self._splitDirs(dir)
+    return self._isIgnoredPath(pathList)
+
+  def _isToolPath(self, pathList):
+    return ('tools' in pathList)
+
+  def _isTool(self, pathList, fileName):
+    return self._isToolPath(pathList)
+
+  def isTool(self, filePath):
+    pathList, fileName = self._splitPath(filePath)
+    return (not self._isIgnored(pathList, fileName)) and self._isTool(pathList, fileName)
+
+  def _isSupportPath(self, pathList):
+    return ('support' in pathList)
+
+  def _isSupport(self, pathList, fileName):
+    return (self._isSupportPath(pathList) or
+            ((not self._isTool(pathList, fileName)) and
+             (not self._isReference(pathList, fileName)) and
+             (not self._isTestCase(pathList, fileName))))
+
+  def isSupport(self, filePath):
+    pathList, fileName = self._splitPath(filePath)
+    return (not self._isIgnored(pathList, fileName)) and self._isSupport(pathList, fileName)
+
+  def _isReferencePath(self, pathList):
+    return (('reftest' in pathList) or ('reference' in pathList))
+
+  def _isReference(self, pathList, fileName):
+    if ((not self._isSupportPath(pathList)) and (not self._isToolPath(pathList))):
+      baseName, fileExt = os.path.splitext(fileName)[:2]
+      if (bool(re.search('(^ref-|^notref-).+', baseName)) or
+          bool(re.search('.+(-ref[0-9]*$|-notref[0-9]*$)', baseName)) or
+          ('-ref-' in baseName) or ('-notref-' in baseName)):
+        return (fileExt in self.mReferenceExtensions)
+      if (self._isReferencePath(pathList)):
+        return (fileExt in self.mReferenceExtensions)
+    return False
+
+  def isReference(self, filePath):
+    pathList, fileName = self._splitPath(filePath)
+    return (not self._isIgnored(pathList, fileName)) and self._isReference(pathList, fileName)
+
+  def isReferenceAnywhere(self, filePath):
+    pathList, fileName = self._splitPath(filePath)
+    return self._isReference(pathList, fileName)
+
+  def _isTestCase(self, pathList, fileName):
+    if ((not self._isToolPath(pathList)) and (not self._isSupportPath(pathList)) and (not self._isReference(pathList, fileName))):
+      fileExt = os.path.splitext(fileName)[1]
+      return (fileExt in self.mTestExtensions)
+    return False
+
+  def isTestCase(self, filePath):
+    pathList, fileName = self._splitPath(filePath)
+    return (not self._isIgnored(pathList, fileName)) and self._isTestCase(pathList, fileName)
+
+  def getAssetName(self, filePath):
+    pathList, fileName = self._splitPath(filePath)
+    if (self._isReference(pathList, fileName) or self._isTestCase(pathList, fileName)):
+      return assetName(fileName)
+    return fileName.lower() # support files keep full name
+
+  def getAssetType(self, filePath):
+    pathList, fileName = self._splitPath(filePath)
+    if (self._isReference(pathList, fileName)):
+      return intern('reference')
+    if (self._isTestCase(pathList, fileName)):
+      return intern('testcase')
+    if (self._isTool(pathList, fileName)):
+      return intern('tool')
+    return intern('support')
+
+
+class SourceCache:
+  """Cache for FileSource objects. Supports one FileSource object
+     per sourcepath.
+  """
+  def __init__(self, sourceTree):
+    self.__cache = {}
+    self.sourceTree = sourceTree
+
+  def generateSource(self, sourcepath, relpath, data = None):
+    """Return a FileSource or derivative based on the extensionMap.
+
+       Uses a cache to avoid creating more than one of the same object:
+       does not support creating two FileSources with the same sourcepath;
+       asserts if this is tried. (.htaccess files are not cached.)
+
+       Cache is bypassed if loading form a change context
+    """
+    if ((None == data) and self.__cache.has_key(sourcepath)):
+      source = self.__cache[sourcepath]
+      assert relpath == source.relpath
+      return source
+
+    if basename(sourcepath) == '.htaccess':
+      return ConfigSource(self.sourceTree, sourcepath, relpath, data)
+    mime = getMimeFromExt(sourcepath)
+    if (mime == 'application/xhtml+xml'):
+      source = XHTMLSource(self.sourceTree, sourcepath, relpath, data)
+    elif (mime == 'text/html'):
+      source = HTMLSource(self.sourceTree, sourcepath, relpath, data)
+    elif (mime == 'image/svg+xml'):
+      source = SVGSource(self.sourceTree, sourcepath, relpath, data)
+    elif (mime == 'application/xml'):
+      source = XMLSource(self.sourceTree, sourcepath, relpath, data)
+    else:
+      source = FileSource(self.sourceTree, sourcepath, relpath, mime, data)
+    if (None == data):
+      self.__cache[sourcepath] = source
+    return source
+
+class SourceSet:
+  """Set of FileSource objects. No two FileSources of the same type in the set may
+     have the same name (except .htaccess files, which are merged).
+  """
+  def __init__(self, sourceCache):
+    self.sourceCache = sourceCache
+    self.pathMap = {} # type/name -> source
+
+  def __len__(self):
+    return len(self.pathMap)
+
+  def _keyOf(self, source):
+    return source.type() + '/' + source.keyName()
+
+  def __contains__(self, source):
+    return self._keyOf(source) in self.pathMap
+
+
+  def iter(self):
+    """Iterate over FileSource objects in SourceSet.
+    """
+    return self.pathMap.itervalues()
+
+  def addSource(self, source, ui):
+    """Add FileSource `source`. Throws exception if we already have
+       a FileSource with the same path relpath but different contents.
+       (ConfigSources are exempt from this requirement.)
+    """
+    cachedSource = self.pathMap.get(self._keyOf(source))
+    if not cachedSource:
+      self.pathMap[self._keyOf(source)] = source
+    else:
+      if source != cachedSource:
+        if isinstance(source, ConfigSource):
+          cachedSource.append(source)
+        else:
+          ui.warn("File merge mismatch %s vs %s for %s\n" % \
+                (cachedSource.sourcepath, source.sourcepath, source.name()))
+
+  def add(self, sourcepath, relpath, ui):
+    """Generate and add FileSource from sourceCache. Return the resulting
+       FileSource.
+
+       Throws exception if we already have a FileSource with the same path
+       relpath but different contents.
+    """
+    source = self.sourceCache.generateSource(sourcepath, relpath)
+    self.addSource(source, ui)
+    return source
+
+  @staticmethod
+  def combine(a, b, ui):
+    """Merges a and b, and returns whichever one contains the merger (which
+       one is chosen based on merge efficiency). Can accept None as an argument.
+    """
+    if not (a and b):
+      return a or b
+    if len(a) < len(b):
+      return b.merge(a, ui)
+    return a.merge(b, ui)
+
+  def merge(self, other, ui):
+    """Merge sourceSet's contents into this SourceSet.
+
+       Throws a RuntimeError if there's a sourceCache mismatch.
+       Throws an Exception if two files with the same relpath mismatch.
+       Returns merge result (i.e. self)
+    """
+    if self.sourceCache is not other.sourceCache:
+      raise RuntimeError
+
+    for source in other.pathMap.itervalues():
+      self.addSource(source, ui)
+    return self
+
+  def adjustContentPaths(self, format):
+    for source in self.pathMap.itervalues():
+      source.adjustContentPaths(format)
+
+  def write(self, format):
+    """Write files out through OutputFormat `format`.
+    """
+    for source in self.pathMap.itervalues():
+      format.write(source)
+
+
+class StringReader(object):
+  """Wrapper around a string to give it a file-like api
+  """
+  def __init__(self, string):
+    self.mString = string
+    self.mIndex = 0
+
+  def read(self, maxSize = None):
+    if (self.mIndex < len(self.mString)):
+      if (maxSize and (0 < maxSize)):
+        slice = self.mString[self.mIndex:self.mIndex + maxSize]
+        self.mIndex += len(slice)
+        return slice
+      else:
+        self.mIndex = len(self.mString)
+        return self.mString
+    return ''
+
+
+class NamedDict(object):
+    def get(self, key):
+        if (key in self):
+            return self[key]
+        return None
+
+    def __eq__(self, other):
+        for key in self.__slots__:
+            if (self[key] != other[key]):
+                return False
+        return True
+
+    def __ne__(self, other):
+        for key in self.__slots__:
+            if (self[key] != other[key]):
+                return True
+        return False
+
+    def __len__(self):
+        return len(self.__slots__)
+
+    def __iter__(self):
+        return iter(self.__slots__)
+
+    def __contains__(self, key):
+        return (key in self.__slots__)
+
+    def copy(self):
+        clone = self.__class__()
+        for key in self.__slots__:
+            clone[key] = self[key]
+        return clone
+
+    def keys(self):
+        return self.__slots__
+
+    def has_key(self, key):
+        return (key in self)
+
+    def items(self):
+        return [(key, self[key]) for key in self.__slots__]
+
+    def iteritems(self):
+        return iter(self.items())
+
+    def iterkeys(self):
+        return self.__iter__()
+
+    def itervalues(self):
+        return iter(self.items())
+
+    def __str__(self):
+        return '{ ' + ', '.join([key + ': ' + str(self[key]) for key in self.__slots__]) + ' }'
+
+
+class Metadata(NamedDict):
+    __slots__ = ('name', 'title', 'asserts', 'credits', 'reviewers', 'flags', 'links', 'references', 'revision', 'selftest', 'scripttest')
+
+    def __init__(self, name = None, title = None, asserts = [], credits = [], reviewers = [], flags = [], links = [],
+                 references = [], revision = None, selftest = True, scripttest = False):
+        self.name = name
+        self.title = title
+        self.asserts = asserts
+        self.credits = credits
+        self.reviewers = reviewers
+        self.flags = flags
+        self.links = links
+        self.references = references
+        self.revision = revision
+        self.selftest = selftest
+        self.scripttest = scripttest
+
+    def __getitem__(self, key):
+        if ('name' == key):
+            return self.name
+        if ('title' == key):
+            return self.title
+        if ('asserts' == key):
+            return self.asserts
+        if ('credits' == key):
+            return self.credits
+        if ('reviewers' == key):
+            return self.reviewers
+        if ('flags' == key):
+            return self.flags
+        if ('links' == key):
+            return self.links
+        if ('references' == key):
+            return self.references
+        if ('revision' == key):
+            return self.revision
+        if ('selftest' == key):
+            return self.selftest
+        if ('scripttest' == key):
+            return self.scripttest
+        return None
+
+    def __setitem__(self, key, value):
+        if ('name' == key):
+            self.name = value
+        elif ('title' == key):
+            self.title = value
+        elif ('asserts' == key):
+            self.asserts = value
+        elif ('credits' == key):
+            self.credits = value
+        elif ('reviewers' == key):
+            self.reviewers = value
+        elif ('flags' == key):
+            self.flags = value
+        elif ('links' == key):
+            self.links = value
+        elif ('references' == key):
+            self.references = value
+        elif ('revision' == key):
+            self.revision = value
+        elif ('selftest' == key):
+            self.selftest = value
+        elif ('scripttest' == key):
+            self.scripttest = value
+        else:
+            raise KeyError()
+
+
+class ReferenceData(NamedDict):
+    __slots__ = ('name', 'type', 'relpath', 'repopath')
+
+    def __init__(self, name = None, type = None, relpath = None, repopath = None):
+        self.name = name
+        self.type = type
+        self.relpath = relpath
+        self.repopath = repopath
+
+    def __getitem__(self, key):
+        if ('name' == key):
+            return self.name
+        if ('type' == key):
+            return self.type
+        if ('relpath' == key):
+            return self.relpath
+        if ('repopath' == key):
+            return self.repopath
+        return None
+
+    def __setitem__(self, key, value):
+        if ('name' == key):
+            self.name = value
+        elif ('type' == key):
+            self.type = value
+        elif ('relpath' == key):
+            self.relpath = value
+        elif ('repopath' == key):
+            self.repopath = value
+        else:
+            raise KeyError()
+
+UserData = collections.namedtuple('UserData', ('name', 'link'))
+
+class LineString(str):
+    def __new__(cls, value, line):
+        self = str.__new__(cls, value)
+        self.line = line
+        return self
+
+    def lineValue(self):
+        return 'Line ' + str(self.line) + ': ' + str.__str__(self) if (self.line) else str.__str__(self)
+
+
+class FileSource:
+  """Object representing a file. Two FileSources are equal if they represent
+     the same file contents. It is recommended to use a SourceCache to generate
+     FileSources.
+  """
+
+  def __init__(self, sourceTree, sourcepath, relpath, mimetype = None, data = None):
+    """Init FileSource from source path. Give it relative path relpath.
+
+       `mimetype` should be the canonical MIME type for the file, if known.
+        If `mimetype` is None, guess type from file extension, defaulting to
+        the None key's value in extensionMap.
+
+       `data` if provided, is a the contents of the file. Otherwise the file is read
+        from disk.
+    """
+    self.sourceTree = sourceTree
+    self.sourcepath = sourcepath
+    self.relpath    = relpath
+    self.mimetype   = mimetype or getMimeFromExt(sourcepath)
+    self._data      = data
+    self.errors     = None
+    self.encoding   = 'utf-8'
+    self.refs       = {}
+    self.scripts    = {}
+    self.metadata   = None
+    self.metaSource = None
+
+  def __eq__(self, other):
+    if not isinstance(other, FileSource):
+      return False
+    return self.sourcepath == other.sourcepath or \
+           filecmp.cmp(self.sourcepath, other.sourcepath)
+
+  def __ne__(self, other):
+    return not self == other
+
+  def __cmp__(self, other):
+    return cmp(self.name(), other.name())
+
+  def name(self):
+    return self.sourceTree.getAssetName(self.sourcepath)
+
+  def keyName(self):
+    if ('support' == self.type()):
+      return os.path.relpath(self.relpath, 'support')
+    return self.name()
+
+  def type(self):
+    return self.sourceTree.getAssetType(self.sourcepath)
+
+  def relativeURL(self, other):
+    return relativeURL(self.relpath, other.relpath)
+
+  def data(self):
+    """Return file contents as a byte string."""
+    if (self._data is None):
+      with open(self.sourcepath, 'r') as f:
+        self._data = f.read()
+    if (self._data.startswith(codecs.BOM_UTF8)):
+      self.encoding = 'utf-8-sig' # XXX look for other unicode BOMs
+    return self._data
+
+  def unicode(self):
+    try:
+      return self.data().decode(self.encoding)
+    except UnicodeDecodeError:
+      return None
+
+  def parse(self):
+    """Parses and validates FileSource data from sourcepath."""
+    self.loadMetadata()
+
+  def validate(self):
+    """Ensure data is loaded from sourcepath."""
+    self.parse()
+
+  def adjustContentPaths(self, format):
+    """Adjust any paths in file content for output format
+       XXX need to account for group paths"""
+    if (self.refs):
+      seenRefs = {}
+      seenRefs[self.sourcepath] = '=='
+      def adjustReferences(source):
+        newRefs = {}
+        for refName in source.refs:
+          refType, refPath, refNode, refSource = source.refs[refName]
+          if refSource:
+            refPath = relativeURL(format.dest(self.relpath), format.dest(refSource.relpath))
+            if (refSource.sourcepath not in seenRefs):
+              seenRefs[refSource.sourcepath] = refType
+              adjustReferences(refSource)
+          else:
+            refPath = relativeURL(format.dest(self.relpath), format.dest(refPath))
+          if (refPath != refNode.get('href')):
+            refNode.set('href', refPath)
+          newRefs[refName] = (refType, refPath, refNode, refSource) # update path in metadata
+        source.refs = newRefs
+      adjustReferences(self)
+
+    if (self.scripts):   # force testharness.js scripts to absolute path
+      for src in self.scripts:
+        if (src.endswith('/resources/testharness.js')):   # accept relative paths to testharness.js
+            scriptNode = self.scripts[src]
+            scriptNode.set('src', '/resources/testharness.js')
+        elif (src.endswith('/resources/testharnessreport.js')):
+            scriptNode = self.scripts[src]
+            scriptNode.set('src', '/resources/testharnessreport.js')
+
+
+  def write(self, format):
+    """Writes FileSource.data() out to `self.relpath` through Format `format`."""
+    data = self.data()
+    with open(format.dest(self.relpath), 'w') as f:
+      f.write(data)
+    if (self.metaSource):
+      self.metaSource.write(format) # XXX need to get output path from format, but not let it choose actual format
+
+  def compact(self):
+    """Clears all cached data, preserves computed data."""
+    pass
+
+  def revision(self):
+    """Returns hash of the contents of this file and any related file, references, support files, etc.
+       XXX also needs to account for .meta file
+    """
+    sha = hashlib.sha1()
+    sha.update(self.data())
+    seenRefs = set(self.sourcepath)
+    def hashReference(source):
+        for refName in source.refs:
+            refSource = source.refs[refName][3]
+            if (refSource and (refSource.sourcepath not in seenRefs)):
+                sha.update(refSource.data())
+                seenRefs.add(refSource.sourcepath)
+                hashReference(refSource)
+    hashReference(self)
+    return sha.hexdigest()
+
+  def loadMetadata(self):
+    """Look for .meta file and load any metadata from it if present
+    """
+    pass
+
+  def augmentMetadata(self, next=None, prev=None, reference=None, notReference=None):
+    if (self.metaSource):
+      return self.metaSource.augmentMetadata(next, prev, reference, notReference)
+    return None
+
+  # See http://wiki.csswg.org/test/css2.1/format for more info on metadata
+  def getMetadata(self, asUnicode = False):
+    """Return dictionary of test metadata. Stores list of errors
+       in self.errors if there are parse or metadata errors.
+       Data fields include:
+         - asserts [list of strings]
+         - credits [list of (name string, url string) tuples]
+         - reviewers [ list of (name string, url string) tuples]
+         - flags   [list of token strings]
+         - links   [list of url strings]
+         - name    [string]
+         - title   [string]
+         - references [list of ReferenceData per reference; None if not reftest]
+         - revision   [revision id of last commit]
+         - selftest [bool]
+         - scripttest [bool]
+       Strings are given in ascii unless asUnicode==True.
+    """
+
+    self.validate()
+
+    def encode(str):
+        return str if (hasattr(str, 'line')) else intern(str.encode('utf-8'))
+
+    def escape(str, andIntern = True):
+      return str.encode('utf-8') if asUnicode else intern(escapeToNamedASCII(str)) if andIntern else escapeToNamedASCII(str)
+
+    def listReferences(source, seen):
+        refGroups = []
+        for refType, refRelPath, refNode, refSource in source.refs.values():
+            if ('==' == refType):
+                if (refSource):
+                    refSourcePath = refSource.sourcepath
+                else:
+                    refSourcePath = os.path.normpath(join(basepath(source.sourcepath), refRelPath))
+                if (refSourcePath in seen):
+                    continue
+                seen.add(refSourcePath)
+                if (refSource):
+                    sourceData = ReferenceData(name = self.sourceTree.getAssetName(refSourcePath), type = refType,
+                                               relpath = refRelPath, repopath = refSourcePath)
+                    if (refSource.refs):
+                        subRefLists = listReferences(refSource, seen.copy())
+                        if (subRefLists):
+                            for subRefList in subRefLists:
+                                refGroups.append([sourceData] + subRefList)
+                        else:
+                            refGroups.append([sourceData])
+                    else:
+                        refGroups.append([sourceData])
+                else:
+                    sourceData = ReferenceData(name = self.sourceTree.getAssetName(refSourcePath), type = refType,
+                                               relpath = relativeURL(self.sourcepath, refSourcePath),
+                                               repopath = refSourcePath)
+                    refGroups.append([sourceData])
+        notRefs = {}
+        for refType, refRelPath, refNode, refSource in source.refs.values():
+            if ('!=' == refType):
+                if (refSource):
+                    refSourcePath = refSource.sourcepath
+                else:
+                    refSourcePath = os.path.normpath(join(basepath(source.sourcepath), refRelPath))
+                if (refSourcePath in seen):
+                    continue
+                seen.add(refSourcePath)
+                if (refSource):
+                    sourceData = ReferenceData(name = self.sourceTree.getAssetName(refSourcePath), type = refType,
+                                               relpath = refRelPath, repopath = refSourcePath)
+                    notRefs[sourceData.name] = sourceData
+                    if (refSource.refs):
+                        for subRefList in listReferences(refSource, seen):
+                            for subRefData in subRefList:
+                                notRefs[subRefData.name] = subRefData
+                else:
+                    sourceData = ReferenceData(name = self.sourceTree.getAssetName(refSourcePath), type = refType,
+                                               relpath = relativeURL(self.sourcepath, refSourcePath),
+                                               repopath = refSourcePath)
+                    notRefs[sourceData.name] = sourceData
+        if (notRefs):
+            for refData in notRefs.values():
+                refData.type = '!='
+            if (refGroups):
+                for refGroup in refGroups:
+                    for notRef in notRefs.values():
+                        for ref in refGroup:
+                            if (ref.name == notRef.name):
+                                break
+                        else:
+                            refGroup.append(notRef)
+            else:
+                refGroups.append(notRefs.values())
+        return refGroups
+
+    references = listReferences(self, set([self.sourcepath])) if (self.refs) else None
+
+    if (self.metadata):
+      data = Metadata(
+              name       = encode(self.name()),
+              title      = escape(self.metadata['title'], False),
+              asserts    = [escape(assertion, False) for assertion in self.metadata['asserts']],
+              credits    = [UserData(escape(name), encode(link)) for name, link in self.metadata['credits']],
+              reviewers  = [UserData(escape(name), encode(link)) for name, link in self.metadata['reviewers']],
+              flags      = [encode(flag) for flag in self.metadata['flags']],
+              links      = [encode(link) for link in self.metadata['links']],
+              references = references,
+              revision   = self.revision(),
+              selftest   = self.isSelftest(),
+              scripttest = self.isScripttest()
+             )
+      return data
+    return None
+
+  def addReference(self, referenceSource, match = None):
+    """Add reference source."""
+    self.validate()
+    refName = referenceSource.name()
+    refPath = self.relativeURL(referenceSource)
+    if refName not in self.refs:
+      node = None
+      if match == '==':
+        node = self.augmentMetadata(reference=referenceSource).reference
+      elif match == '!=':
+        node = self.augmentMetadata(notReference=referenceSource).notReference
+      self.refs[refName] = (match, refPath, node, referenceSource)
+    else:
+      node = self.refs[refName][2]
+      node.set('href', refPath)
+      if (match):
+        node.set('rel', 'mismatch' if ('!=' == match) else 'match')
+      else:
+        match = self.refs[refName][0]
+      self.refs[refName] = (match, refPath, node, referenceSource)
+
+  def getReferencePaths(self):
+    """Get list of paths to references as tuple(path, relPath, refType)."""
+    self.validate()
+    return [(os.path.join(os.path.dirname(self.sourcepath), ref[1]),
+             os.path.join(os.path.dirname(self.relpath), ref[1]),
+             ref[0])
+            for ref in self.refs.values()]
+
+  def isTest(self):
+    self.validate()
+    return bool(self.metadata) and bool(self.metadata.get('links'))
+
+  def isReftest(self):
+    return self.isTest() and bool(self.refs)
+
+  def isSelftest(self):
+    return self.isTest() and (not bool(self.refs))
+
+  def isScripttest(self):
+    if (self.isTest() and self.scripts):
+        for src in self.scripts:
+            if (src.endswith('/resources/testharness.js')):   # accept relative paths to testharness.js
+                return True
+    return False
+
+  def hasFlag(self, flag):
+    data = self.getMetadata()
+    if data:
+      return flag in data['flags']
+    return False
+
+
+
+class ConfigSource(FileSource):
+  """Object representing a text-based configuration file.
+     Capable of merging multiple config-file contents.
+  """
+
+  def __init__(self, sourceTree, sourcepath, relpath, mimetype = None, data = None):
+    """Init ConfigSource from source path. Give it relative path relpath.
+    """
+    FileSource.__init__(self, sourceTree, sourcepath, relpath, mimetype, data)
+    self.sourcepath = [sourcepath]
+
+  def __eq__(self, other):
+    if not isinstance(other, ConfigSource):
+      return False
+    if self is other or self.sourcepath == other.sourcepath:
+      return True
+    if len(self.sourcepath) != len(other.sourcepath):
+      return False
+    for this, that in zip(self.sourcepath, other.sourcepath):
+      if not filecmp.cmp(this, that):
+        return False
+    return True
+
+  def __ne__(self, other):
+    return not self == other
+
+  def name(self):
+    return '.htaccess'
+
+  def type(self):
+    return intern('support')
+
+  def data(self):
+    """Merge contents of all config files represented by this source."""
+    data = ''
+    for src in self.sourcepath:
+      with open(src) as f:
+        data += f.read()
+      data += '\n'
+    return data
+
+  def getMetadata(self, asUnicode = False):
+    return None
+
+  def append(self, other):
+    """Appends contents of ConfigSource `other` to this source.
+       Asserts if self.relpath != other.relpath.
+    """
+    assert isinstance(other, ConfigSource)
+    assert self != other and self.relpath == other.relpath
+    self.sourcepath.extend(other.sourcepath)
+
+class ReftestFilepathError(Exception):
+  pass
+
+class ReftestManifest(ConfigSource):
+  """Object representing a reftest manifest file.
+     Iterating the ReftestManifest returns (testpath, refpath) tuples
+     with paths relative to the manifest.
+  """
+  def __init__(self, sourceTree, sourcepath, relpath, data = None):
+    """Init ReftestManifest from source path. Give it relative path `relpath`
+       and load its .htaccess file.
+    """
+    ConfigSource.__init__(self, sourceTree, sourcepath, relpath, mimetype = 'config/reftest', data = data)
+
+  def basepath(self):
+    """Returns the base relpath of this reftest manifest path, i.e.
+       the parent of the manifest file.
+    """
+    return basepath(self.relpath)
+
+  baseRE = re.compile(r'^#\s*relstrip\s+(\S+)\s*')
+  stripRE = re.compile(r'#.*')
+  parseRE = re.compile(r'^\s*([=!]=)\s*(\S+)\s+(\S+)')
+
+  def __iter__(self):
+    """Parse the reftest manifest files represented by this ReftestManifest
+       and return path information about each reftest pair as
+         ((test-sourcepath, ref-sourcepath), (test-relpath, ref-relpath), reftype)
+       Raises a ReftestFilepathError if any sources file do not exist or
+       if any relpaths point higher than the relpath root.
+    """
+    striplist = []
+    for src in self.sourcepath:
+      relbase = basepath(self.relpath)
+      srcbase = basepath(src)
+      with open(src) as f:
+        for line in f:
+          strip = self.baseRE.search(line)
+          if strip:
+            striplist.append(strip.group(1))
+          line = self.stripRE.sub('', line)
+          m = self.parseRE.search(line)
+          if m:
+            record = ((join(srcbase, m.group(2)), join(srcbase, m.group(3))), \
+                      (join(relbase, m.group(2)), join(relbase, m.group(3))), \
+                      m.group(1))
+  #          for strip in striplist:
+              # strip relrecord
+            if not exists(record[0][0]):
+              raise ReftestFilepathError("Manifest Error in %s: "
+                                         "Reftest test file %s does not exist." \
+                                          % (src, record[0][0]))
+            elif not exists(record[0][1]):
+              raise ReftestFilepathError("Manifest Error in %s: "
+                                         "Reftest reference file %s does not exist." \
+                                         % (src, record[0][1]))
+            elif not isPathInsideBase(record[1][0]):
+              raise ReftestFilepathError("Manifest Error in %s: "
+                                         "Reftest test replath %s not within relpath root." \
+                                         % (src, record[1][0]))
+            elif not isPathInsideBase(record[1][1]):
+              raise ReftestFilepathError("Manifest Error in %s: "
+                                         "Reftest test replath %s not within relpath root." \
+                                         % (src, record[1][1]))
+            yield record
+
+import Utils # set up XML catalog
+xhtmlns = '{http://www.w3.org/1999/xhtml}'
+svgns = '{http://www.w3.org/2000/svg}'
+xmlns = '{http://www.w3.org/XML/1998/namespace}'
+xlinkns = '{http://www.w3.org/1999/xlink}'
+
+class XMLSource(FileSource):
+  """FileSource object with support reading XML trees."""
+
+  NodeTuple = collections.namedtuple('NodeTuple', ['next', 'prev', 'reference', 'notReference'])
+
+  # Public Data
+  syntaxErrorDoc = \
+  u"""
+  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+  <html xmlns="http://www.w3.org/1999/xhtml">
+    <head><title>Syntax Error</title></head>
+    <body>
+      <p>The XML file <![CDATA[%s]]> contains a syntax error and could not be parsed.
+      Please correct it and try again.</p>
+      <p>The parser's error report was:</p>
+      <pre><![CDATA[%s]]></pre>
+    </body>
+  </html>
+  """
+
+  # Private Data and Methods
+  __parser = etree.XMLParser(no_network=True,
+  # perf nightmare           dtd_validation=True,
+                             remove_comments=False,
+                             strip_cdata=False,
+                             resolve_entities=False)
+
+  # Public Methods
+
+  def __init__(self, sourceTree, sourcepath, relpath, data = None):
+    """Initialize XMLSource by loading from XML file `sourcepath`.
+      Parse errors are reported in `self.errors`,
+      and the source is replaced with an XHTML error message.
+    """
+    FileSource.__init__(self, sourceTree, sourcepath, relpath, data = data)
+    self.tree = None
+    self.injectedTags = {}
+
+  def cacheAsParseError(self, filename, e):
+      """Replace document with an error message."""
+      errorDoc = self.syntaxErrorDoc % (filename, e)
+      from StringIO import StringIO
+      self.tree = etree.parse(StringIO(errorDoc), parser=self.__parser)
+
+  def parse(self):
+    """Parse file and store any parse errors in self.errors"""
+    self.errors = None
+    try:
+      data = self.data()
+      if (data):
+        self.tree = etree.parse(StringReader(data), parser=self.__parser)
+        self.encoding = self.tree.docinfo.encoding or 'utf-8'
+        self.injectedTags = {}
+      else:
+        self.tree = None
+        self.errors = ['Empty source file']
+        self.encoding = 'utf-8'
+
+      FileSource.loadMetadata(self)
+      if ((not self.metadata) and self.tree and (not self.errors)):
+        self.extractMetadata(self.tree)
+    except etree.ParseError as e:
+      print("PARSE ERROR: " + self.sourcepath)
+      self.cacheAsParseError(self.sourcepath, e)
+      e.W3CTestLibErrorLocation = self.sourcepath
+      self.errors = [str(e)]
+      self.encoding = 'utf-8'
+
+  def validate(self):
+    """Parse file if not parsed, and store any parse errors in self.errors"""
+    if self.tree is None:
+      self.parse()
+
+  def getMeatdataContainer(self):
+    return self.tree.getroot().find(xhtmlns+'head')
+
+  def injectMetadataLink(self, rel, href, tagCode = None):
+    """Inject (prepend) <link> with data given inside metadata container.
+       Injected element is tagged with `tagCode`, which can be
+       used to clear it with clearInjectedTags later.
+    """
+    self.validate()
+    container = self.getMeatdataContainer()
+    if (container):
+      node = etree.Element(xhtmlns+'link', {'rel': rel, 'href': href})
+      node.tail = container.text
+      container.insert(0, node)
+      self.injectedTags[node] = tagCode or True
+      return node
+    return None
+
+  def clearInjectedTags(self, tagCode = None):
+    """Clears all injected elements from the tree, or clears injected
+       elements tagged with `tagCode` if `tagCode` is given.
+    """
+    if not self.injectedTags or not self.tree: return
+    for node in self.injectedTags:
+      node.getparent().remove(node)
+      del self.injectedTags[node]
+
+  def serializeXML(self):
+    self.validate()
+    return etree.tounicode(self.tree)
+
+  def data(self):
+    if ((not self.tree) or (self.metaSource)):
+      return FileSource.data(self)
+    return self.serializeXML().encode(self.encoding, 'xmlcharrefreplace')
+
+  def unicode(self):
+    if ((not self.tree) or (self.metaSource)):
+      return FileSource.unicode(self)
+    return self.serializeXML()
+
+  def write(self, format, output=None):
+    """Write Source through OutputFormat `format`.
+       Write contents as string `output` instead if specified.
+    """
+    if not output:
+      output = self.unicode()
+
+    # write
+    with open(format.dest(self.relpath), 'w') as f:
+      f.write(output.encode(self.encoding, 'xmlcharrefreplace'))
+
+  def compact(self):
+    self.tree = None
+
+  def getMetadataElements(self, tree):
+    container = self.getMeatdataContainer()
+    if (None != container):
+      return [node for node in container]
+    return None
+
+  def extractMetadata(self, tree):
+    """Extract metadata from tree."""
+    links = []; credits = []; reviewers = []; flags = []; asserts = []; title = ''
+
+    def tokenMatch(token, string):
+        return bool(re.search('(^|\s+)%s($|\s+)' % token, string)) if (string) else False
+
+    errors = []
+    readFlags = False
+    metaElements = self.getMetadataElements(tree)
+    if (not metaElements):
+        errors.append("Missing <head> element")
+    else:
+        # Scan and cache metadata
+        for node in metaElements:
+            if (node.tag == xhtmlns+'link'):
+                # help links
+                if tokenMatch('help', node.get('rel')):
+                    link = node.get('href').strip() if node.get('href') else None
+                    if (not link):
+                        errors.append(LineString("Help link missing href value.", node.sourceline))
+                    elif (not (link.startswith('http://') or link.startswith('https://'))):
+                        errors.append(LineString("Help link " + link.encode('utf-8') + " must be absolute URL.", node.sourceline))
+                    elif (link in links):
+                        errors.append(LineString("Duplicate help link " + link.encode('utf-8') + ".", node.sourceline))
+                    else:
+                        links.append(LineString(link, node.sourceline))
+                # == references
+                elif tokenMatch('match', node.get('rel')) or tokenMatch('reference', node.get('rel')):
+                    refPath = node.get('href').strip() if node.get('href') else None
+                    if (not refPath):
+                        errors.append(LineString("Reference link missing href value.", node.sourceline))
+                    else:
+                        refName = self.sourceTree.getAssetName(join(self.sourcepath, refPath))
+                        if (refName in self.refs):
+                            errors.append(LineString("Reference " + refName.encode('utf-8') + " already specified.", node.sourceline))
+                        else:
+                            self.refs[refName] = ('==', refPath, node, None)
+                # != references
+                elif tokenMatch('mismatch', node.get('rel')) or tokenMatch('not-reference', node.get('rel')):
+                    refPath = node.get('href').strip() if node.get('href') else None
+                    if (not refPath):
+                        errors.append(LineString("Reference link missing href value.", node.sourceline))
+                    else:
+                        refName = self.sourceTree.getAssetName(join(self.sourcepath, refPath))
+                        if (refName in self.refs):
+                            errors.append(LineString("Reference " + refName.encode('utf-8') + " already specified.", node.sourceline))
+                        else:
+                            self.refs[refName] = ('!=', refPath, node, None)
+                else: # may have both author and reviewer in the same link
+                    # credits
+                    if tokenMatch('author', node.get('rel')):
+                        name = node.get('title')
+                        name = name.strip() if name else name
+                        if (not name):
+                            errors.append(LineString("Author link missing name (title attribute).", node.sourceline))
+                        else:
+                            link = node.get('href').strip() if node.get('href') else None
+                            if (not link):
+                                errors.append(LineString("Author link for \"" + name.encode('utf-8') + "\" missing contact URL (http or mailto).", node.sourceline))
+                            else:
+                                credits.append((name, link))
+                    # reviewers
+                    if tokenMatch('reviewer', node.get('rel')):
+                        name = node.get('title')
+                        name = name.strip() if name else name
+                        if (not name):
+                            errors.append(LineString("Reviewer link missing name (title attribute).", node.sourceline))
+                        else:
+                            link = node.get('href').strip() if node.get('href') else None
+                            if (not link):
+                                errors.append(LineString("Reviewer link for \"" + name.encode('utf-8') + "\" missing contact URL (http or mailto).", node.sourceline))
+                            else:
+                                reviewers.append((name, link))
+            elif (node.tag == xhtmlns+'meta'):
+                metatype = node.get('name')
+                metatype = metatype.strip() if metatype else metatype
+                # requirement flags
+                if ('flags' == metatype):
+                    if (readFlags):
+                        errors.append(LineString("Flags must only be specified once.", node.sourceline))
+                    else:
+                        readFlags = True
+                        if (None == node.get('content')):
+                            errors.append(LineString("Flags meta missing content attribute.", node.sourceline))
+                        else:
+                            for flag in sorted(node.get('content').split()):
+                                flags.append(flag)
+                # test assertions
+                elif ('assert' == metatype):
+                    if (None == node.get('content')):
+                        errors.append(LineString("Assert meta missing content attribute.", node.sourceline))
+                    else:
+                        asserts.append(node.get('content').strip().replace('\t', ' '))
+            # title
+            elif (node.tag == xhtmlns+'title'):
+                title = node.text.strip() if node.text else ''
+                match = re.match('(?:[^:]*)[tT]est(?:[^:]*):(.*)', title, re.DOTALL)
+                if (match):
+                    title = match.group(1)
+                title = title.strip()
+            # script
+            elif (node.tag == xhtmlns+'script'):
+                src = node.get('src').strip() if node.get('src') else None
+                if (src):
+                    self.scripts[src] = node
+
+    if (asserts or credits or reviewers or flags or links or title):
+        self.metadata = {'asserts'   : asserts,
+                         'credits'   : credits,
+                         'reviewers' : reviewers,
+                         'flags'     : flags,
+                         'links'     : links,
+                         'title'     : title
+                        }
+
+    if (errors):
+        if (self.errors):
+            self.errors += errors
+        else:
+            self.errors = errors
+
+
+  def augmentMetadata(self, next=None, prev=None, reference=None, notReference=None):
+     """Add extra useful metadata to the head. All arguments are optional.
+          * Adds next/prev links to  next/prev Sources given
+          * Adds reference link to reference Source given
+     """
+     self.validate()
+     if next:
+       next = self.injectMetadataLink('next', self.relativeURL(next), 'next')
+     if prev:
+       prev = self.injectMetadataLink('prev', self.relativeURL(prev), 'prev')
+     if reference:
+       reference = self.injectMetadataLink('match', self.relativeURL(reference), 'ref')
+     if notReference:
+       notReference = self.injectMetadataLink('mismatch', self.relativeURL(notReference), 'not-ref')
+     return self.NodeTuple(next, prev, reference, notReference)
+
+
+class XHTMLSource(XMLSource):
+  """FileSource object with support for XHTML->HTML conversions."""
+
+  # Public Methods
+
+  def __init__(self, sourceTree, sourcepath, relpath, data = None):
+    """Initialize XHTMLSource by loading from XHTML file `sourcepath`.
+      Parse errors are stored in `self.errors`,
+      and the source is replaced with an XHTML error message.
+    """
+    XMLSource.__init__(self, sourceTree, sourcepath, relpath, data = data)
+
+  def serializeXHTML(self, doctype = None):
+    return self.serializeXML()
+
+  def serializeHTML(self, doctype = None):
+    self.validate()
+    # Serialize
+#    print self.relpath
+    serializer = HTMLSerializer.HTMLSerializer()
+    output = serializer.serializeHTML(self.tree, doctype)
+    return output
+
+
+class SVGSource(XMLSource):
+  """FileSource object with support for extracting metadata from SVG."""
+
+  def __init__(self, sourceTree, sourcepath, relpath, data = None):
+    """Initialize SVGSource by loading from SVG file `sourcepath`.
+      Parse errors are stored in `self.errors`,
+      and the source is replaced with an XHTML error message.
+    """
+    XMLSource.__init__(self, sourceTree, sourcepath, relpath, data = data)
+
+  def getMeatdataContainer(self):
+    groups = self.tree.getroot().findall(svgns+'g')
+    for group in groups:
+      if ('testmeta' == group.get('id')):
+        return group
+    return None
+
+  def extractMetadata(self, tree):
+    """Extract metadata from tree."""
+    links = []; credits = []; reviewers = []; flags = []; asserts = []; title = ''
+
+    def tokenMatch(token, string):
+        return bool(re.search('(^|\s+)%s($|\s+)' % token, string)) if (string) else False
+
+    errors = []
+    readFlags = False
+    metaElements = self.getMetadataElements(tree)
+    if (not metaElements):
+        errors.append("Missing <g id='testmeta'> element")
+    else:
+        # Scan and cache metadata
+        for node in metaElements:
+            if (node.tag == xhtmlns+'link'):
+                # help links
+                if tokenMatch('help', node.get('rel')):
+                    link = node.get('href').strip() if node.get('href') else None
+                    if (not link):
+                        errors.append(LineString("Help link missing href value.", node.sourceline))
+                    elif (not (link.startswith('http://') or link.startswith('https://'))):
+                        errors.append(LineString("Help link " + link.encode('utf-8') + " must be absolute URL.", node.sourceline))
+                    elif (link in links):
+                        errors.append(LineString("Duplicate help link " + link.encode('utf-8') + ".", node.sourceline))
+                    else:
+                        links.append(LineString(link, node.sourceline))
+                # == references
+                elif tokenMatch('match', node.get('rel')) or tokenMatch('reference', node.get('rel')):
+                    refPath = node.get('href').strip() if node.get('href') else None
+                    if (not refPath):
+                        errors.append(LineString("Reference link missing href value.", node.sourceline))
+                    else:
+                        refName = self.sourceTree.getAssetName(join(self.sourcepath, refPath))
+                        if (refName in self.refs):
+                            errors.append(LineString("Reference " + refName.encode('utf-8') + " already specified.", node.sourceline))
+                        else:
+                            self.refs[refName] = ('==', refPath, node, None)
+                # != references
+                elif tokenMatch('mismatch', node.get('rel')) or tokenMatch('not-reference', node.get('rel')):
+                    refPath = node.get('href').strip() if node.get('href') else None
+                    if (not refPath):
+                        errors.append(LineString("Reference link missing href value.", node.sourceline))
+                    else:
+                        refName = self.sourceTree.getAssetName(join(self.sourcepath, refPath))
+                        if (refName in self.refs):
+                            errors.append(LineString("Reference " + refName.encode('utf-8') + " already specified.", node.sourceline))
+                        else:
+                            self.refs[refName] = ('!=', refPath, node, None)
+                else: # may have both author and reviewer in the same link
+                    # credits
+                    if tokenMatch('author', node.get('rel')):
+                        name = node.get('title')
+                        name = name.strip() if name else name
+                        if (not name):
+                            errors.append(LineString("Author link missing name (title attribute).", node.sourceline))
+                        else:
+                            link = node.get('href').strip() if node.get('href') else None
+                            if (not link):
+                                errors.append(LineString("Author link for \"" + name.encode('utf-8') + "\" missing contact URL (http or mailto).", node.sourceline))
+                            else:
+                                credits.append((name, link))
+                    # reviewers
+                    if tokenMatch('reviewer', node.get('rel')):
+                        name = node.get('title')
+                        name = name.strip() if name else name
+                        if (not name):
+                            errors.append(LineString("Reviewer link missing name (title attribute).", node.sourceline))
+                        else:
+                            link = node.get('href').strip() if node.get('href') else None
+                            if (not link):
+                                errors.append(LineString("Reviewer link for \"" + name.encode('utf-8') + "\" missing contact URL (http or mailto).", node.sourceline))
+                            else:
+                                reviewers.append((name, link))
+            elif (node.tag == svgns+'metadata'):
+                metatype = node.get('class')
+                metatype = metatype.strip() if metatype else metatype
+                # requirement flags
+                if ('flags' == metatype):
+                    if (readFlags):
+                        errors.append(LineString("Flags must only be specified once.", node.sourceline))
+                    else:
+                        readFlags = True
+                        text = node.find(svgns+'text')
+                        flagString = text.text if (text) else node.text
+                        if (flagString):
+                            for flag in sorted(flagString.split()):
+                                flags.append(flag)
+            elif (node.tag == svgns+'desc'):
+                metatype = node.get('class')
+                metatype = metatype.strip() if metatype else metatype
+                # test assertions
+                if ('assert' == metatype):
+                    asserts.append(node.text.strip().replace('\t', ' '))
+            # test title
+            elif node.tag == svgns+'title':
+                title = node.text.strip() if node.text else ''
+                match = re.match('(?:[^:]*)[tT]est(?:[^:]*):(.*)', title, re.DOTALL)
+                if (match):
+                    title = match.group(1)
+                title = title.strip()
+            # script tag (XXX restricted to metadata container?)
+            elif (node.tag == svgns+'script'):
+                src = node.get('src').strip() if node.get('src') else None
+                if (src):
+                    self.scripts[src] = node
+
+    if (asserts or credits or reviewers or flags or links or title):
+        self.metadata = {'asserts'   : asserts,
+                         'credits'   : credits,
+                         'reviewers' : reviewers,
+                         'flags'     : flags,
+                         'links'     : links,
+                         'title'     : title
+                        }
+    if (errors):
+        if (self.errors):
+            self.errors += errors
+        else:
+            self.errors = errors
+
+
+
+class HTMLSource(XMLSource):
+  """FileSource object with support for HTML metadata and HTML->XHTML conversions (untested)."""
+
+  # Private Data and Methods
+  __parser = html5lib.HTMLParser(tree = treebuilders.getTreeBuilder('lxml'))
+
+  # Public Methods
+
+  def __init__(self, sourceTree, sourcepath, relpath, data = None):
+    """Initialize HTMLSource by loading from HTML file `sourcepath`.
+    """
+    XMLSource.__init__(self, sourceTree, sourcepath, relpath, data = data)
+
+  def parse(self):
+    """Parse file and store any parse errors in self.errors"""
+    self.errors = None
+    try:
+      data = self.data()
+      if data:
+        with warnings.catch_warnings():
+          warnings.simplefilter("ignore")
+          self.tree = self.__parser.parse(data)
+          self.encoding = self.__parser.documentEncoding
+          self.injectedTags = {}
+      else:
+        self.tree = None
+        self.errors = ['Empty source file']
+        self.encoding = 'utf-8'
+
+      FileSource.loadMetadata(self)
+      if ((not self.metadata) and self.tree and (not self.errors)):
+        self.extractMetadata(self.tree)
+    except Exception as e:
+      print("PARSE ERROR: " + self.sourcepath)
+      e.W3CTestLibErrorLocation = self.sourcepath
+      self.errors = [str(e)]
+      self.encoding = 'utf-8'
+
+  def _injectXLinks(self, element, nodeList):
+    injected = False
+
+    xlinkAttrs = ['href', 'type', 'role', 'arcrole', 'title', 'show', 'actuate']
+    if (element.get('href') or element.get(xlinkns + 'href')):
+      for attr in xlinkAttrs:
+        if (element.get(xlinkns + attr)):
+          injected = True
+        if (element.get(attr)):
+          injected = True
+          value = element.get(attr)
+          del element.attrib[attr]
+          element.set(xlinkns + attr, value)
+          nodeList.append((element, xlinkns + attr, attr))
+
+    for child in element:
+        if (type(child.tag) == type('')): # element node
+            qName = etree.QName(child.tag)
+            if ('foreignobject' != qName.localname.lower()):
+                injected |= self._injectXLinks(child, nodeList)
+    return injected
+
+
+  def _findElements(self, namespace, elementName):
+      elements = self.tree.findall('.//{' + namespace + '}' + elementName)
+      if (self.tree.getroot().tag == '{' + namespace + '}' + elementName):
+          elements.insert(0, self.tree.getroot())
+      return elements
+
+  def _injectNamespace(self, elementName, prefix, namespace, doXLinks, nodeList):
+    attr = xmlns + prefix if (prefix) else 'xmlns'
+    elements = self._findElements(namespace, elementName)
+    for element in elements:
+      if not element.get(attr):
+        element.set(attr, namespace)
+        nodeList.append((element, attr, None))
+        if (doXLinks):
+          if (self._injectXLinks(element, nodeList)):
+            element.set(xmlns + 'xlink', 'http://www.w3.org/1999/xlink')
+            nodeList.append((element, xmlns + 'xlink', None))
+
+  def injectNamespaces(self):
+    nodeList = []
+    self._injectNamespace('html', None, 'http://www.w3.org/1999/xhtml', False, nodeList)
+    self._injectNamespace('svg', None, 'http://www.w3.org/2000/svg', True, nodeList)
+    self._injectNamespace('math', None, 'http://www.w3.org/1998/Math/MathML', True, nodeList)
+    return nodeList
+
+  def removeNamespaces(self, nodeList):
+      if nodeList:
+          for element, attr, oldAttr in nodeList:
+              if (oldAttr):
+                  value = element.get(attr)
+                  del element.attrib[attr]
+                  element.set(oldAttr, value)
+              else:
+                  del element.attrib[attr]
+
+  def serializeXHTML(self, doctype = None):
+    self.validate()
+    # Serialize
+    nodeList = self.injectNamespaces()
+#    print self.relpath
+    serializer = HTMLSerializer.HTMLSerializer()
+    o = serializer.serializeXHTML(self.tree, doctype)
+
+    self.removeNamespaces(nodeList)
+    return o
+
+  def serializeHTML(self, doctype = None):
+    self.validate()
+    # Serialize
+#    print self.relpath
+    serializer = HTMLSerializer.HTMLSerializer()
+    o = serializer.serializeHTML(self.tree, doctype)
+
+    return o
+
+  def data(self):
+    if ((not self.tree) or (self.metaSource)):
+      return FileSource.data(self)
+    return self.serializeHTML().encode(self.encoding, 'xmlcharrefreplace')
+
+  def unicode(self):
+    if ((not self.tree) or (self.metaSource)):
+      return FileSource.unicode(self)
+    return self.serializeHTML()
+
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 09:22:09 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 09:22:09 +0000
commit	43a97878ce14b72f0981164f87f2e35e14151312 (patch)
tree	620249daf56c0258faa40cbdcf9cfba06de2a846 /testing/web-platform/tests/css/tools/w3ctestlib/Sources.py
parent	Initial commit. (diff)
download	firefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip