summaryrefslogtreecommitdiffstats
path: root/testing/web-platform/tests/annotation-vocab/tools/vocab_tester.py
diff options
context:
space:
mode:
Diffstat (limited to 'testing/web-platform/tests/annotation-vocab/tools/vocab_tester.py')
-rw-r--r--testing/web-platform/tests/annotation-vocab/tools/vocab_tester.py242
1 files changed, 242 insertions, 0 deletions
diff --git a/testing/web-platform/tests/annotation-vocab/tools/vocab_tester.py b/testing/web-platform/tests/annotation-vocab/tools/vocab_tester.py
new file mode 100644
index 0000000000..7d9cc5b8a2
--- /dev/null
+++ b/testing/web-platform/tests/annotation-vocab/tools/vocab_tester.py
@@ -0,0 +1,242 @@
+
+# Author: Rob Sanderson (azaroth42@gmail.com)
+# License: Apache2
+# Last Modified: 2016-09-02
+
+from __future__ import print_function
+
+import json
+from rdflib import ConjunctiveGraph, URIRef
+from pyld import jsonld
+from pyld.jsonld import compact, expand, frame, from_rdf, to_rdf, JsonLdProcessor
+import urllib
+
+# Stop code from looking up the contexts online for every operation
+docCache = {}
+
+def fetch(url):
+ fh = urllib.urlopen(url)
+ data = fh.read()
+ fh.close()
+ return data
+
+def load_document_and_cache(url):
+ if docCache.has_key(url):
+ return docCache[url]
+
+ doc = {
+ 'contextUrl': None,
+ 'documentUrl': None,
+ 'document': ''
+ }
+ data = fetch(url)
+ doc['document'] = data;
+ docCache[url] = doc
+ return doc
+
+jsonld.set_document_loader(load_document_and_cache)
+
+class Validator(object):
+
+ def __init__(self):
+
+ self.rdflib_class_map = {
+ "Annotation": "oa:Annotation",
+ "Dataset": "dctypes:Dataset",
+ "Image": "dctypes:StillImage",
+ "Video": "dctypes:MovingImage",
+ "Audio": "dctypes:Sound",
+ "Text": "dctypes:Text",
+ "TextualBody": "oa:TextualBody",
+ "ResourceSelection": "oa:ResourceSelection",
+ "SpecificResource": "oa:SpecificResource",
+ "FragmentSelector": "oa:FragmentSelector",
+ "CssSelector": "oa:CssSelector",
+ "XPathSelector": "oa:XPathSelector",
+ "TextQuoteSelector": "oa:TextQuoteSelector",
+ "TextPositionSelector": "oa:TextPositionSelector",
+ "DataPositionSelector": "oa:DataPositionSelector",
+ "SvgSelector": "oa:SvgSelector",
+ "RangeSelector": "oa:RangeSelector",
+ "TimeState": "oa:TimeState",
+ "HttpState": "oa:HttpRequestState",
+ "CssStylesheet": "oa:CssStyle",
+ "Choice": "oa:Choice",
+ "Composite": "oa:Composite",
+ "List": "oa:List",
+ "Independents": "oa:Independents",
+ "Person": "foaf:Person",
+ "Software": "as:Application",
+ "Organization": "foaf:Organization",
+ "AnnotationCollection": "as:OrderedCollection",
+ "AnnotationPage": "as:OrderedCollectionPage",
+ "Audience": "schema:Audience"
+ }
+
+
+ def _clean_bnode_ids(self, js):
+ new = {}
+ for (k,v) in js.items():
+ if k == 'id' and v.startswith("_:"):
+ continue
+ elif type(v) == dict:
+ # recurse
+ res = self._clean_bnode_ids(v)
+ new[k] = res
+ else:
+ new[k] = v
+ return new
+
+ def _mk_rdflib_jsonld(self, js):
+ # rdflib's json-ld implementation sucks
+ # Pre-process to make it work
+ # recurse the structure looking for types, and replacing them.
+ new = {}
+ for (k,v) in js.items():
+ if k == 'type':
+ if type(v) == list:
+ nl = []
+ for i in v:
+ if self.rdflib_class_map.has_key(i):
+ nl.append(self.rdflib_class_map[i])
+ new['type'] = nl
+ else:
+ if self.rdflib_class_map.has_key(v):
+ new['type'] = self.rdflib_class_map[v]
+ elif type(v) == dict:
+ # recurse
+ res = self._mk_rdflib_jsonld(v)
+ new[k] = res
+ else:
+ new[k] = v
+ return new
+
+ def json_to_rdf(self, js, fmt=None):
+ d2 = self._mk_rdflib_jsonld(js)
+ js = json.dumps(d2)
+ g = ConjunctiveGraph()
+ g.parse(data=js, format='json-ld')
+ if fmt:
+ out = g.serialize(format=fmt)
+ return out
+ else:
+ return g
+
+ def rdf_to_jsonld(self, rdf, fmt):
+
+ g = ConjunctiveGraph()
+ g.parse(data=rdf, format=fmt)
+ out = g.serialize(format='json-ld')
+
+ j2 = json.loads(out)
+ j2 = {"@context": context_js, "@graph": j2}
+ framed = frame(j2, frame_js)
+ out = compact(framed, context_js)
+ # recursively clean blank node ids
+ #out = self._clean_bnode_ids(out)
+ return out
+
+ def compact_and_clean(self, js):
+ newjs = compact(js, context_js)
+ newjs['@context'] = context
+ if newjs.has_key("@graph"):
+ for k,v in newjs['@graph'].items():
+ newjs[k] = v
+ del newjs['@graph']
+ return newjs
+
+validator = Validator()
+
+example = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/model/wd2/examples/correct/anno4.json"
+example_ttl = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/vocab/wd/examples/correct/anno1.ttl"
+context = "http://www.w3.org/ns/anno.jsonld"
+frameURI = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/jsonld/annotation_frame.jsonld"
+# ontology = "https://www.w3.org/ns/oa.ttl"
+ontology = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/vocab/wd/ontology/oa.ttl"
+
+data = fetch(context)
+context_js = json.loads(data)
+data = fetch(example)
+example_js = json.loads(data)
+data = fetch(frameURI)
+frame_js = json.loads(data)
+
+# Test1: JSON-LD context document can be parsed without errors by JSON-LD validators
+# Context document is parsable if it can be loaded and used to expand the example
+try:
+ expanded = expand(example_js, context_js)
+except:
+ print("Context is invalid, failed Test 1")
+
+
+# Test2: JSON-LD context document can be used to convert JSON-LD serialized Annotations into RDF triples.
+try:
+ jsonld_nq = to_rdf(example_js, {"base": "http://example.org/", "format": "application/nquads"})
+except:
+ print("Cannot use context to convert JSON-LD to NQuads")
+
+
+# Test3: Graphs produced are isomorphic
+try:
+ rl_g = validator.json_to_rdf(example_js)
+ g = ConjunctiveGraph()
+ js_g = g.parse(data=jsonld_nq, format="nt")
+ rl_g_nq = rl_g.serialize(format="nquads")
+ assert(len(rl_g.store) == len(js_g.store))
+ assert(rl_g.isomorphic(js_g))
+except:
+ print("Different triples from two parsers, or non-isomorphic graphs")
+
+
+# Test4: The graphs produced can be converted back into JSON-LD without loss of information
+try:
+ js = validator.rdf_to_jsonld(jsonld_nq, "nt")
+ js2 = validator.compact_and_clean(js)
+ assert(js2 == example_js)
+except:
+ print("Failed to recompact parsed data")
+ raise
+
+
+# Test5: ontology documents can be parsed without errors by validators
+try:
+ g = ConjunctiveGraph().parse(ontology, format="turtle")
+except:
+ raise
+
+
+# Test6: ontology is internally consistent with respect to domains, ranges, etc
+
+# step 1: find all the classes.
+rdftype = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
+rdfsdomain = URIRef("http://www.w3.org/2000/01/rdf-schema#domain")
+rdfsrange = URIRef("http://www.w3.org/2000/01/rdf-schema#range")
+rdfsresource = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#Resource")
+rdfssco = URIRef("http://www.w3.org/2000/01/rdf-schema#subClassOf")
+asColl = URIRef("http://www.w3.org/ns/activitystreams#OrderedCollection")
+skosConcept = URIRef("http://www.w3.org/2004/02/skos/core#Concept")
+
+otherClasses = [asColl, skosConcept]
+classes = list(g.subjects(rdftype, URIRef("http://www.w3.org/2000/01/rdf-schema#Class")))
+props = list(g.subjects(rdftype, URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#Property")))
+
+for p in props:
+ domains = list(g.objects(p, rdfsdomain))
+ for d in domains:
+ assert(d in classes)
+
+for p in props:
+ ranges = list(g.objects(p, rdfsrange))
+ for r in ranges:
+ if not r in classes and not str(r).startswith("http://www.w3.org/2001/XMLSchema#") and \
+ not r == rdfsresource:
+ print("Found inconsistent property: %s has unknown range" % p)
+
+for c in classes:
+ parents = list(g.objects(c, rdfssco))
+ for p in parents:
+ if not p in classes and not p in otherClasses:
+ print("Found inconsistent class: %s has unknown superClass" % c)
+
+
+print("Done.")