diff options
Diffstat (limited to 'testing/web-platform/tests/annotation-vocab/tools/vocab_tester.py')
-rw-r--r-- | testing/web-platform/tests/annotation-vocab/tools/vocab_tester.py | 242 |
1 files changed, 242 insertions, 0 deletions
diff --git a/testing/web-platform/tests/annotation-vocab/tools/vocab_tester.py b/testing/web-platform/tests/annotation-vocab/tools/vocab_tester.py new file mode 100644 index 0000000000..7d9cc5b8a2 --- /dev/null +++ b/testing/web-platform/tests/annotation-vocab/tools/vocab_tester.py @@ -0,0 +1,242 @@ + +# Author: Rob Sanderson (azaroth42@gmail.com) +# License: Apache2 +# Last Modified: 2016-09-02 + +from __future__ import print_function + +import json +from rdflib import ConjunctiveGraph, URIRef +from pyld import jsonld +from pyld.jsonld import compact, expand, frame, from_rdf, to_rdf, JsonLdProcessor +import urllib + +# Stop code from looking up the contexts online for every operation +docCache = {} + +def fetch(url): + fh = urllib.urlopen(url) + data = fh.read() + fh.close() + return data + +def load_document_and_cache(url): + if docCache.has_key(url): + return docCache[url] + + doc = { + 'contextUrl': None, + 'documentUrl': None, + 'document': '' + } + data = fetch(url) + doc['document'] = data; + docCache[url] = doc + return doc + +jsonld.set_document_loader(load_document_and_cache) + +class Validator(object): + + def __init__(self): + + self.rdflib_class_map = { + "Annotation": "oa:Annotation", + "Dataset": "dctypes:Dataset", + "Image": "dctypes:StillImage", + "Video": "dctypes:MovingImage", + "Audio": "dctypes:Sound", + "Text": "dctypes:Text", + "TextualBody": "oa:TextualBody", + "ResourceSelection": "oa:ResourceSelection", + "SpecificResource": "oa:SpecificResource", + "FragmentSelector": "oa:FragmentSelector", + "CssSelector": "oa:CssSelector", + "XPathSelector": "oa:XPathSelector", + "TextQuoteSelector": "oa:TextQuoteSelector", + "TextPositionSelector": "oa:TextPositionSelector", + "DataPositionSelector": "oa:DataPositionSelector", + "SvgSelector": "oa:SvgSelector", + "RangeSelector": "oa:RangeSelector", + "TimeState": "oa:TimeState", + "HttpState": "oa:HttpRequestState", + "CssStylesheet": "oa:CssStyle", + "Choice": "oa:Choice", + "Composite": "oa:Composite", + "List": "oa:List", + "Independents": "oa:Independents", + "Person": "foaf:Person", + "Software": "as:Application", + "Organization": "foaf:Organization", + "AnnotationCollection": "as:OrderedCollection", + "AnnotationPage": "as:OrderedCollectionPage", + "Audience": "schema:Audience" + } + + + def _clean_bnode_ids(self, js): + new = {} + for (k,v) in js.items(): + if k == 'id' and v.startswith("_:"): + continue + elif type(v) == dict: + # recurse + res = self._clean_bnode_ids(v) + new[k] = res + else: + new[k] = v + return new + + def _mk_rdflib_jsonld(self, js): + # rdflib's json-ld implementation sucks + # Pre-process to make it work + # recurse the structure looking for types, and replacing them. + new = {} + for (k,v) in js.items(): + if k == 'type': + if type(v) == list: + nl = [] + for i in v: + if self.rdflib_class_map.has_key(i): + nl.append(self.rdflib_class_map[i]) + new['type'] = nl + else: + if self.rdflib_class_map.has_key(v): + new['type'] = self.rdflib_class_map[v] + elif type(v) == dict: + # recurse + res = self._mk_rdflib_jsonld(v) + new[k] = res + else: + new[k] = v + return new + + def json_to_rdf(self, js, fmt=None): + d2 = self._mk_rdflib_jsonld(js) + js = json.dumps(d2) + g = ConjunctiveGraph() + g.parse(data=js, format='json-ld') + if fmt: + out = g.serialize(format=fmt) + return out + else: + return g + + def rdf_to_jsonld(self, rdf, fmt): + + g = ConjunctiveGraph() + g.parse(data=rdf, format=fmt) + out = g.serialize(format='json-ld') + + j2 = json.loads(out) + j2 = {"@context": context_js, "@graph": j2} + framed = frame(j2, frame_js) + out = compact(framed, context_js) + # recursively clean blank node ids + #out = self._clean_bnode_ids(out) + return out + + def compact_and_clean(self, js): + newjs = compact(js, context_js) + newjs['@context'] = context + if newjs.has_key("@graph"): + for k,v in newjs['@graph'].items(): + newjs[k] = v + del newjs['@graph'] + return newjs + +validator = Validator() + +example = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/model/wd2/examples/correct/anno4.json" +example_ttl = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/vocab/wd/examples/correct/anno1.ttl" +context = "http://www.w3.org/ns/anno.jsonld" +frameURI = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/jsonld/annotation_frame.jsonld" +# ontology = "https://www.w3.org/ns/oa.ttl" +ontology = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/vocab/wd/ontology/oa.ttl" + +data = fetch(context) +context_js = json.loads(data) +data = fetch(example) +example_js = json.loads(data) +data = fetch(frameURI) +frame_js = json.loads(data) + +# Test1: JSON-LD context document can be parsed without errors by JSON-LD validators +# Context document is parsable if it can be loaded and used to expand the example +try: + expanded = expand(example_js, context_js) +except: + print("Context is invalid, failed Test 1") + + +# Test2: JSON-LD context document can be used to convert JSON-LD serialized Annotations into RDF triples. +try: + jsonld_nq = to_rdf(example_js, {"base": "http://example.org/", "format": "application/nquads"}) +except: + print("Cannot use context to convert JSON-LD to NQuads") + + +# Test3: Graphs produced are isomorphic +try: + rl_g = validator.json_to_rdf(example_js) + g = ConjunctiveGraph() + js_g = g.parse(data=jsonld_nq, format="nt") + rl_g_nq = rl_g.serialize(format="nquads") + assert(len(rl_g.store) == len(js_g.store)) + assert(rl_g.isomorphic(js_g)) +except: + print("Different triples from two parsers, or non-isomorphic graphs") + + +# Test4: The graphs produced can be converted back into JSON-LD without loss of information +try: + js = validator.rdf_to_jsonld(jsonld_nq, "nt") + js2 = validator.compact_and_clean(js) + assert(js2 == example_js) +except: + print("Failed to recompact parsed data") + raise + + +# Test5: ontology documents can be parsed without errors by validators +try: + g = ConjunctiveGraph().parse(ontology, format="turtle") +except: + raise + + +# Test6: ontology is internally consistent with respect to domains, ranges, etc + +# step 1: find all the classes. +rdftype = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") +rdfsdomain = URIRef("http://www.w3.org/2000/01/rdf-schema#domain") +rdfsrange = URIRef("http://www.w3.org/2000/01/rdf-schema#range") +rdfsresource = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#Resource") +rdfssco = URIRef("http://www.w3.org/2000/01/rdf-schema#subClassOf") +asColl = URIRef("http://www.w3.org/ns/activitystreams#OrderedCollection") +skosConcept = URIRef("http://www.w3.org/2004/02/skos/core#Concept") + +otherClasses = [asColl, skosConcept] +classes = list(g.subjects(rdftype, URIRef("http://www.w3.org/2000/01/rdf-schema#Class"))) +props = list(g.subjects(rdftype, URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#Property"))) + +for p in props: + domains = list(g.objects(p, rdfsdomain)) + for d in domains: + assert(d in classes) + +for p in props: + ranges = list(g.objects(p, rdfsrange)) + for r in ranges: + if not r in classes and not str(r).startswith("http://www.w3.org/2001/XMLSchema#") and \ + not r == rdfsresource: + print("Found inconsistent property: %s has unknown range" % p) + +for c in classes: + parents = list(g.objects(c, rdfssco)) + for p in parents: + if not p in classes and not p in otherClasses: + print("Found inconsistent class: %s has unknown superClass" % c) + + +print("Done.") |