# Author: Rob Sanderson (azaroth42@gmail.com) # License: Apache2 # Last Modified: 2016-09-02 from __future__ import print_function import json from rdflib import ConjunctiveGraph, URIRef from pyld import jsonld from pyld.jsonld import compact, expand, frame, from_rdf, to_rdf, JsonLdProcessor import urllib # Stop code from looking up the contexts online for every operation docCache = {} def fetch(url): fh = urllib.urlopen(url) data = fh.read() fh.close() return data def load_document_and_cache(url): if docCache.has_key(url): return docCache[url] doc = { 'contextUrl': None, 'documentUrl': None, 'document': '' } data = fetch(url) doc['document'] = data; docCache[url] = doc return doc jsonld.set_document_loader(load_document_and_cache) class Validator(object): def __init__(self): self.rdflib_class_map = { "Annotation": "oa:Annotation", "Dataset": "dctypes:Dataset", "Image": "dctypes:StillImage", "Video": "dctypes:MovingImage", "Audio": "dctypes:Sound", "Text": "dctypes:Text", "TextualBody": "oa:TextualBody", "ResourceSelection": "oa:ResourceSelection", "SpecificResource": "oa:SpecificResource", "FragmentSelector": "oa:FragmentSelector", "CssSelector": "oa:CssSelector", "XPathSelector": "oa:XPathSelector", "TextQuoteSelector": "oa:TextQuoteSelector", "TextPositionSelector": "oa:TextPositionSelector", "DataPositionSelector": "oa:DataPositionSelector", "SvgSelector": "oa:SvgSelector", "RangeSelector": "oa:RangeSelector", "TimeState": "oa:TimeState", "HttpState": "oa:HttpRequestState", "CssStylesheet": "oa:CssStyle", "Choice": "oa:Choice", "Composite": "oa:Composite", "List": "oa:List", "Independents": "oa:Independents", "Person": "foaf:Person", "Software": "as:Application", "Organization": "foaf:Organization", "AnnotationCollection": "as:OrderedCollection", "AnnotationPage": "as:OrderedCollectionPage", "Audience": "schema:Audience" } def _clean_bnode_ids(self, js): new = {} for (k,v) in js.items(): if k == 'id' and v.startswith("_:"): continue elif type(v) == dict: # recurse res = self._clean_bnode_ids(v) new[k] = res else: new[k] = v return new def _mk_rdflib_jsonld(self, js): # rdflib's json-ld implementation sucks # Pre-process to make it work # recurse the structure looking for types, and replacing them. new = {} for (k,v) in js.items(): if k == 'type': if type(v) == list: nl = [] for i in v: if self.rdflib_class_map.has_key(i): nl.append(self.rdflib_class_map[i]) new['type'] = nl else: if self.rdflib_class_map.has_key(v): new['type'] = self.rdflib_class_map[v] elif type(v) == dict: # recurse res = self._mk_rdflib_jsonld(v) new[k] = res else: new[k] = v return new def json_to_rdf(self, js, fmt=None): d2 = self._mk_rdflib_jsonld(js) js = json.dumps(d2) g = ConjunctiveGraph() g.parse(data=js, format='json-ld') if fmt: out = g.serialize(format=fmt) return out else: return g def rdf_to_jsonld(self, rdf, fmt): g = ConjunctiveGraph() g.parse(data=rdf, format=fmt) out = g.serialize(format='json-ld') j2 = json.loads(out) j2 = {"@context": context_js, "@graph": j2} framed = frame(j2, frame_js) out = compact(framed, context_js) # recursively clean blank node ids #out = self._clean_bnode_ids(out) return out def compact_and_clean(self, js): newjs = compact(js, context_js) newjs['@context'] = context if newjs.has_key("@graph"): for k,v in newjs['@graph'].items(): newjs[k] = v del newjs['@graph'] return newjs validator = Validator() example = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/model/wd2/examples/correct/anno4.json" example_ttl = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/vocab/wd/examples/correct/anno1.ttl" context = "http://www.w3.org/ns/anno.jsonld" frameURI = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/jsonld/annotation_frame.jsonld" # ontology = "https://www.w3.org/ns/oa.ttl" ontology = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/vocab/wd/ontology/oa.ttl" data = fetch(context) context_js = json.loads(data) data = fetch(example) example_js = json.loads(data) data = fetch(frameURI) frame_js = json.loads(data) # Test1: JSON-LD context document can be parsed without errors by JSON-LD validators # Context document is parsable if it can be loaded and used to expand the example try: expanded = expand(example_js, context_js) except: print("Context is invalid, failed Test 1") # Test2: JSON-LD context document can be used to convert JSON-LD serialized Annotations into RDF triples. try: jsonld_nq = to_rdf(example_js, {"base": "http://example.org/", "format": "application/nquads"}) except: print("Cannot use context to convert JSON-LD to NQuads") # Test3: Graphs produced are isomorphic try: rl_g = validator.json_to_rdf(example_js) g = ConjunctiveGraph() js_g = g.parse(data=jsonld_nq, format="nt") rl_g_nq = rl_g.serialize(format="nquads") assert(len(rl_g.store) == len(js_g.store)) assert(rl_g.isomorphic(js_g)) except: print("Different triples from two parsers, or non-isomorphic graphs") # Test4: The graphs produced can be converted back into JSON-LD without loss of information try: js = validator.rdf_to_jsonld(jsonld_nq, "nt") js2 = validator.compact_and_clean(js) assert(js2 == example_js) except: print("Failed to recompact parsed data") raise # Test5: ontology documents can be parsed without errors by validators try: g = ConjunctiveGraph().parse(ontology, format="turtle") except: raise # Test6: ontology is internally consistent with respect to domains, ranges, etc # step 1: find all the classes. rdftype = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") rdfsdomain = URIRef("http://www.w3.org/2000/01/rdf-schema#domain") rdfsrange = URIRef("http://www.w3.org/2000/01/rdf-schema#range") rdfsresource = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#Resource") rdfssco = URIRef("http://www.w3.org/2000/01/rdf-schema#subClassOf") asColl = URIRef("http://www.w3.org/ns/activitystreams#OrderedCollection") skosConcept = URIRef("http://www.w3.org/2004/02/skos/core#Concept") otherClasses = [asColl, skosConcept] classes = list(g.subjects(rdftype, URIRef("http://www.w3.org/2000/01/rdf-schema#Class"))) props = list(g.subjects(rdftype, URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#Property"))) for p in props: domains = list(g.objects(p, rdfsdomain)) for d in domains: assert(d in classes) for p in props: ranges = list(g.objects(p, rdfsrange)) for r in ranges: if not r in classes and not str(r).startswith("http://www.w3.org/2001/XMLSchema#") and \ not r == rdfsresource: print("Found inconsistent property: %s has unknown range" % p) for c in classes: parents = list(g.objects(c, rdfssco)) for p in parents: if not p in classes and not p in otherClasses: print("Found inconsistent class: %s has unknown superClass" % c) print("Done.")