def test_default_with_datatype():
csvw = CSVW(csv_path='tests/virtual1.csv',
metadata_path='tests/virtual1.default.datatype.csv-metadata.json')
rdf_output = csvw.to_rdf()
g = ConjunctiveGraph()
g.parse(data=rdf_output, format="turtle")
ns = Namespace("http://example.org/")
for x in [1, 2]:
active_vals = list(g.triples((ns['sub-{}'.format(x)], ns['active'], None)))
assert len(active_vals) == 1
active_val = active_vals[0][2]
assert isinstance(active_val, Literal)
assert active_val.datatype == XSD.boolean
assert active_val.value
string_vals = list(g.triples((ns['sub-{}'.format(x)], ns['stringprop1'], None)))
assert len(string_vals) == 1
string_val = string_vals[0][2]
assert isinstance(string_val, Literal)
assert string_val.value == "some string"
string_vals = list(g.triples((ns['sub-{}'.format(x)], ns['stringprop2'], None)))
assert len(string_vals) == 1
string_val = string_vals[0][2]
assert isinstance(string_val, Literal)
assert "%20" not in string_val.value
python类ConjunctiveGraph()的实例源码
def test_null_values_with_single_string():
csvw = CSVW(csv_path="tests/null1.csv",
metadata_path="tests/null1.single.csv-metadata.json")
rdf_contents = csvw.to_rdf()
g = ConjunctiveGraph()
g.parse(data=rdf_contents, format="turtle")
# There should be no subject NA
all_subjects = {x for x in g.subjects()}
assert subj_ns['null_key'] not in all_subjects
assert subj_ns['1'] in all_subjects
assert len(all_subjects) == 4
# Null valued objects should not be created
all_objects = {x for x in g.objects()}
assert Literal('null_key', datatype=XSD.token) not in all_objects
assert Literal('null_sector') not in all_objects
assert Literal('null_id', datatype=XSD.token) not in all_objects
assert Literal('PUBLIC') in all_objects
assert Literal('12', datatype=XSD.token) in all_objects
# Spot check some triples do not exist but other do from the same row
null_key_lit = Literal('null_id', datatype=XSD.token)
assert len(list(g.triples((subj_ns['2'], id_uri, null_key_lit)))) == 0
priv_lit = Literal('PRIVATE')
assert len(list(g.triples((subj_ns['2'], sect_uri, priv_lit)))) == 1
null_sector_lit = Literal('null_sector')
assert len(list(g.triples((subj_ns['3'], sect_uri, null_sector_lit)))) == 0
twelve_lit = Literal('12', datatype=XSD.token)
assert len(list(g.triples((subj_ns['3'], id_uri, twelve_lit)))) == 1
def test_literals_with_new_lines():
csv_path = "tests/parsing.quoted_newlines.csv"
metadata_path = "tests/parsing.quoted_newlines.csv-metadata.json"
csvw = CSVW(csv_path=csv_path,
metadata_path=metadata_path)
rdf_contents = csvw.to_rdf()
g = ConjunctiveGraph()
g.parse(data=rdf_contents, format="turtle")
ns = Namespace("http://example.org/expense/")
desc = URIRef("http://example.org/desc")
taxi_triples = list(g.triples((ns['taxi'], desc, None)))
assert len(taxi_triples) == 1
taxi_desc = taxi_triples[0][2]
assert isinstance(taxi_desc, Literal)
assert len(taxi_desc.value.splitlines()) == 2
flight = URIRef("http://example.org/expense/multi-hop%20flight")
flight_triples = list(g.triples((flight, desc, None)))
assert len(flight_triples) == 1
flight_desc = flight_triples[0][2]
assert isinstance(flight_desc, Literal)
assert len(flight_desc.value.splitlines()) == 4
dinner_triples = list(g.triples((ns['dinner'], desc, None)))
assert len(dinner_triples) == 1
dinner_desc = dinner_triples[0][2]
assert isinstance(dinner_desc, Literal)
assert u'\u2019' in dinner_desc, "Expected to read unicode characters"
assert u"('')" in dinner_desc, "Expected to read apostrophes"
def test_literals_with_escaped_quotes():
csv_path = "tests/parsing.escaped_quotes.csv"
metadata_path = "tests/parsing.escaped_quotes.csv-metadata.json"
csvw = CSVW(csv_path=csv_path,
metadata_path=metadata_path)
rdf_contents = csvw.to_rdf()
g = ConjunctiveGraph()
g.parse(data=rdf_contents, format="turtle")
ns = Namespace("http://example.org/expense/")
desc = URIRef("http://example.org/desc")
taxi_triples = list(g.triples((ns['taxi'], desc, None)))
assert len(taxi_triples) == 1
taxi_desc = taxi_triples[0][2]
assert isinstance(taxi_desc, Literal)
assert taxi_desc.value == "go from x to y"
quoted_expense_triples = list(g.triples((URIRef("http://example.org/expense/quoted%20expense"), desc, None)))
assert len(quoted_expense_triples) == 1
quoted_expense_desc = quoted_expense_triples[0][2]
assert isinstance(quoted_expense_desc, Literal)
assert quoted_expense_desc.value == "for some reason it came with quotes in it"
flight_triples = list(g.triples((ns['flight'], desc, None)))
assert len(flight_triples) == 1
flight_desc = flight_triples[0][2]
assert isinstance(flight_desc, Literal)
assert flight_desc.value == "had to fly \"escaped quotes business\" for this trip"
car_triples = list(g.triples((ns['car'], desc, None)))
assert len(car_triples) == 1
car_desc = car_triples[0][2]
assert isinstance(car_desc, Literal)
assert car_desc.value == " some \ in it to be escaped"
def verify_rdf(rdf_output):
g = ConjunctiveGraph()
g.parse(data=rdf_output, format="turtle")
assert len(g) == 6
assert len(set(g.subjects())) == 2
assert len(set(g.predicates())) == 3
assert len(set(g.objects())) == 6
def test_empty():
csvw = CSVW(csv_path="tests/empty.csv",
metadata_path="tests/empty.csv-metadata.json")
rdf_output = csvw.to_rdf()
g = ConjunctiveGraph()
g.parse(data=rdf_output, format="turtle")
assert len(g) == 0
def verify_rdf_contents(contents, fmt):
g = ConjunctiveGraph()
g.parse(data=contents, format=fmt)
books = Namespace('http://www.books.org/')
isbn = Namespace("http://www.books.org/isbn/")
# Check number of all triples
assert sum(1 for _ in g.triples((None, None, None))) == NUM_SUBJECTS * NUM_TRIPLES_PER_SUBJ
# Check number of subject
subjs = set(g.subjects())
expected_subjs = ["0062316095", "0374532508", "1610391845", "0374275637"]
assert len(subjs) == len(expected_subjs)
for s in expected_subjs:
assert isbn[s] in subjs
# Verify isbn number is positive integer
s_isbn = list(g.triples((isbn[s], books['isbnnumber'], None)))
assert len(s_isbn) == 1
s_isbn_val = s_isbn[0][2]
assert isinstance(s_isbn_val, Literal)
assert s_isbn_val.datatype == XSD.positiveInteger
# Verify pages is a unsignedShort
s_page = list(g.triples((isbn[s], books['pagecount'], None)))
assert len(s_page) == 1
s_page_val = s_page[0][2]
assert isinstance(s_page_val, Literal)
assert s_page_val.datatype == XSD.unsignedShort
# Verify hardcover is a boolean
s_hardcover = list(g.triples((isbn[s], books['hardcover'], None)))
assert len(s_hardcover) == 1
s_hardcover_val = s_hardcover[0][2]
assert isinstance(s_hardcover_val, Literal)
assert s_hardcover_val.datatype == XSD.boolean
# Verify price is a decimal
s_price = list(g.triples((isbn[s], books['price'], None)))
assert len(s_price) == 1
s_price_val = s_price[0][2]
assert isinstance(s_price_val, Literal)
assert s_price_val.datatype == XSD.decimal
def test_bibcat_rules_blank_graphs_rml_rules_error(self):
self.assertRaises(Exception, processor.Processor, rml_rules=rdflib.Graph())
self.assertRaises(Exception,
processor.Processor,
rml_rules=rdflib.ConjunctiveGraph())
def __init__(self, rml_rules):
self.rml = rdflib.Graph()
if isinstance(rml_rules, list):
for rule in rml_rules:
# First check if rule exists on the filesystem
if os.path.exists(rule):
with open(rule) as file_obj:
raw_rule = file_obj.read()
else:
raw_rule = get_map(rule).decode()
self.rml.parse(data=raw_rule,
format='turtle')
elif isinstance(rml_rules, (rdflib.Graph, rdflib.ConjunctiveGraph)):
self.rml = rml_rules
elif os.path.exists(rml_rules):
self.rml.parse(rml_rules, format='turtle')
else:
self.rml.parse(data=get_map(rml_rules).decode(), format='turtle')
# Populate Namespaces Manager
for prefix, namespace in self.rml.namespaces():
setattr(NS_MGR, prefix, rdflib.Namespace(namespace))
self.output, self.source, self.triplestore_url = None, None, None
self.parents = set()
self.constants = dict(version=__version__)
self.triple_maps = dict()
for row in self.rml.query(GET_TRIPLE_MAPS):
triple_map_iri = row[0]
map_key = str(triple_map_iri)
self.triple_maps[map_key] = SimpleNamespace()
self.triple_maps[map_key].logicalSource = \
self.__logical_source__(triple_map_iri)
self.triple_maps[map_key].subjectMap = \
self.__subject_map__(triple_map_iri)
self.triple_maps[map_key].predicateObjectMap = \
self.__predicate_object_map__(triple_map_iri)
def run(self, input_graph, rdf_classes=[]):
"""Takes a graph and deduplicates various RDF classes
Args:
-----
graph: rdflib.Graph or rdflib.ConjunctiveGraph
rdf_classes: list of RDF Classes to use in filtering
IRIs
"""
self.output = input_graph
if rdf_classes is None:
rdf_classes = []
all_classes = self.default_classes + rdf_classes
for class_ in all_classes:
for entity in self.output.subjects(
predicate=rdflib.RDF.type,
object=class_):
label = self.output.value(subject=entity,
predicate=rdflib.RDFS.label)
if label is not None:
self.__get_or_mint__(entity, class_, label)
value = self.output.value(subject=entity,
predicate=rdflib.RDF.value)
if value is not None:
self.__get_or_mint__(entity, class_, value)
def __init__(self):
self.graph = rdflib.ConjunctiveGraph()
self.graph.parse(config.NAMESPACES_PATH, format=config.NAMESPACES_FORMAT)
ns = dict(self.graph.namespaces())
self.property_path = prepareQuery(self.property_path, initNs=ns)
self.subject_and_class_query = prepareQuery(self.subject_and_class_query, initNs=ns)
self.domains_query = prepareQuery(self.domains_query, initNs=ns)
self.from_entity = prepareQuery(self.from_entity, initNs=ns)
self.single_query = prepareQuery(self.single_query, initNs=ns)
rospy.Subscriber("/aide/rdf", RdfGraphStamped, self.extend_onthology)
def query_sparql_endpoint(sparql_endpoint, query=all_data_q):
graph = rdflib.ConjunctiveGraph('SPARQLStore')
rt = graph.open(sparql_endpoint)
rs = graph.query(query)
return rs
def __init__(self, db_path):
self.g = ConjunctiveGraph()
self.path = db_path
self.choices = set()
self.labels = {}
def create_graph(self):
self.g.open(self.path + mirbasegraph_name, create=True)
data = self.parse_mirbase(self.path)
#g = ConjunctiveGraph(store="SPARQLUpdateStore")
# g.bind()
mirna_class = URIRef("http://purl.obolibrary.org/obo/SO_0000276")
for mid in data:
mirna_instance = URIRef(MIRBASE + data[mid]["acc"])
self.g.add((mirna_instance, RDF.type, mirna_class))
label = Literal(data[mid]["name"])
self.g.add((mirna_instance, RDFS.label, label))
description = Literal(data[mid]["description"])
self.g.add((mirna_instance, RDFS.comment, description))
for p in data[mid]["previous_names"]:
if p.strip():
previous_name = Literal(p)
self.g.add((mirna_instance, MIRBASE["previous_acc"], previous_name))
for mature in data[mid]["mature"]:
mature_instance = URIRef(MIRBASE + data[mid]["mature"][mature]["acc"])
self.g.add((mature_instance, RDF.type, mirna_class))
mature_label = Literal(data[mid]["mature"][mature]["name"])
self.g.add((mature_instance, RDFS.label, mature_label))
for mature_p in data[mid]["mature"][mature]["previous_names"]:
if mature_p.strip():
mature_previous_name = Literal(mature_p)
self.g.add((mature_instance, MIRBASE["previous_acc"], mature_previous_name))
self.g.add((mirna_instance, MIRBASE["stemloopOf"], mature_instance))
self.get_label_to_acc()
self.choices = self.labels.keys()
goa_data = self.parse_goa_gaf("data/goa_human_rna.gaf")
for label in self.labels:
if label in goa_data:
for go_id in goa_data[label]:
mirna_instance = self.labels[label]
self.g.add((mirna_instance, MIRBASE["goa"], Literal(go_id)))
def parse(self, source, graph, encoding="utf-8"):
if encoding not in [None, "utf-8"]:
raise Exception(
("TriG files are always utf-8 encoded, ",
"I was passed: %s") % encoding)
# we're currently being handed a Graph, not a ConjunctiveGraph
assert graph.store.context_aware, "TriG Parser needs a context-aware store!"
conj_graph = ConjunctiveGraph(store=graph.store)
conj_graph.default_context = graph # TODO: CG __init__ should have a
# default_context arg
# TODO: update N3Processor so that it can use conj_graph as the sink
conj_graph.namespace_manager = graph.namespace_manager
sink = RDFSink(conj_graph)
baseURI = conj_graph.absolutize(
source.getPublicId() or source.getSystemId() or "")
p = TrigSinkParser(sink, baseURI=baseURI, turtle=True)
p.loadStream(source.getByteStream())
for prefix, namespace in p._bindings.items():
conj_graph.bind(prefix, namespace)
# return ???
def _get_dataset(self):
if self._dataset is None:
raise Exception(
'You performed a query operation requiring ' +
'a dataset (i.e. ConjunctiveGraph), but ' +
'operating currently on a single graph.')
return self._dataset
def __init__(self):
self.graph = ConjunctiveGraph()
if os.path.exists(storefn):
self.graph.load(storeuri, format='n3')
self.graph.bind('dc', DC)
self.graph.bind('foaf', FOAF)
self.graph.bind('imdb', IMDB)
self.graph.bind('rev', 'http://purl.org/stuff/rev#')
def testData():
data = ConjunctiveGraph()
data += [ ( URIRef('urn:a'), URIRef('urn:p'), Literal('a') ),
( URIRef('urn:b'), URIRef('urn:p'), Literal('b') ) ]
a = set(g.query("SELECT ?target WHERE { ?target <urn:p> ?val }", initBindings={'val': Literal('a')}))
b = set(g.query("SELECT ?target WHERE { ?target <urn:p> ?val } VALUES (?val) {('a')}"))
assert a==b, "data: %r != %r"%(a,b)
def testSerialize(self):
s1 = URIRef('store:1')
r1 = URIRef('resource:1')
r2 = URIRef('resource:2')
label = URIRef('predicate:label')
g1 = Graph(identifier=s1)
g1.add((r1, label, Literal("label 1", lang="en")))
g1.add((r1, label, Literal("label 2")))
s2 = URIRef('store:2')
g2 = Graph(identifier=s2)
g2.add((r2, label, Literal("label 3")))
g = ConjunctiveGraph()
for s, p, o in g1.triples((None, None, None)):
g.addN([(s, p, o, g1)])
for s, p, o in g2.triples((None, None, None)):
g.addN([(s, p, o, g2)])
r3 = URIRef('resource:3')
g.add((r3, label, Literal(4)))
r = g.serialize(format='trix')
g3 = ConjunctiveGraph()
g3.parse(BytesIO(r), format='trix')
for q in g3.quads((None, None, None)):
# TODO: Fix once getGraph/getContext is in conjunctive graph
if isinstance(q[3].identifier, URIRef):
tg = Graph(store=g.store, identifier=q[3].identifier)
else:
# BNode, this is a bit ugly
# we cannot match the bnode to the right graph automagically
# here I know there is only one anonymous graph,
# and that is the default one, but this is not always the case
tg = g.default_context
self.assert_(q[0:3] in tg)
def test_issue_250(self):
"""
https://github.com/RDFLib/rdflib/issues/250
When I have a ConjunctiveGraph with the default namespace set,
for example
import rdflib
g = rdflib.ConjunctiveGraph()
g.bind(None, "http://defaultnamespace")
then the Trix serializer binds the default namespace twice in its XML
output, once for the Trix namespace and once for the namespace I used:
print(g.serialize(format='trix').decode('UTF-8'))
<?xml version="1.0" encoding="utf-8"?>
<TriX
xmlns:xml="http://www.w3.org/XML/1998/namespace"
xmlns="http://defaultnamespace"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns="http://www.w3.org/2004/03/trix/trix-1/"
/>
"""
graph = ConjunctiveGraph()
graph.bind(None, "http://defaultnamespace")
sg = graph.serialize(format='trix').decode('UTF-8')
self.assert_(
'xmlns="http://defaultnamespace"' not in sg, sg)
self.assert_(
'xmlns="http://www.w3.org/2004/03/trix/trix-1/' in sg, sg)