def __add_creators__(self, work_graph, work_uri, instance_uri):
"""Method takes a new work graph and instance uri, queries for
relators:creators of instance uri and adds values to work graph
Args:
work_graph(rdflib.Graph): RDF Graph of new BF Work
instance_uri(rdflib.URIRef): URI of BF Instance
"""
instance_key = str(instance_uri)
if instance_key in self.processed:
for code in self.creator_codes:
if not code in self.processed[instance_key]:
continue
relator = getattr(NS_MGR.relators, code)
for agent_uri in self.processed[instance_key][code]:
work_graph.add((work_uri,
relator,
agent_uri))
python类Graph()的实例源码
def __add_work_title__(self, work_graph, work_uri, instance_uri):
"""Method takes a new work graph and instance uri, queries for
bf:InstanceTitle of instance uri and adds values to work graph
Args:
work_graph(rdflib.Graph): RDF Graph of new BF Work
instance_uri(rdflib.URIRef): URI of BF Instance
"""
instance_key = str(instance_uri)
if instance_key in self.processed and\
"title" in self.processed[instance_key]:
work_title_bnode = rdflib.BNode()
work_graph.add((work_uri, NS_MGR.bf.title, work_title_bnode))
work_graph.add((work_title_bnode,
NS_MGR.rdf.type,
NS_MGR.bf.WorkTitle))
for row in self.processed[instance_key]["title"]:
main_title, subtitle = row["mainTitle"], row["subtitle"]
work_graph.add((work_title_bnode,
NS_MGR.bf.mainTitle,
rdflib.Literal(main_title)))
if subtitle:
work_graph.add((work_title_bnode,
NS_MGR.bf.subtitle,
rdflib.Literal(subtitle)))
def graph_member(self, ldp_root, c_id, obj=None):
if not obj:
obj = self.member()
node = URIRef(ldp_root+encoder.encode(c_id)+"/member/"+encoder.encode(obj.id))
mappings = URIRef(node+"#mappings")
g = Graph(identifier=node)
g.add((node, RDF.type, RDA.Member))
g.add((node, DCTERMS.identifier, Literal(obj.id)))
g.add((node, RDA.location, Literal(obj.location)))
if hasattr(obj, 'datatype'):
g.add((node, RDA.datatype, Literal(obj.datatype)))
if hasattr(obj, 'ontology'):
g.add((node, RDA.ontology, Literal(obj.ontology)))
if hasattr(obj, 'mappings'):
g.add((node, RDA.mappings, mappings))
mp = obj.mappings
if hasattr(mp, 'role'):
g.add((mappings, RDA.role, URIRef(obj.mappings.role)))
if hasattr(mp, 'index'):
g.add((mappings, RDA.itemIndex, Literal(obj.mappings.index)))
if hasattr(mp, 'dateAdded'):
g.add((mappings, RDA.dateAdded, Literal(obj.mappings.dateAdded)))
return g
def html_table_parser(self, res_name):
"""
Method to instantiate HtmlTableParser, analyze tables and then give in output a list of tables.
:param res_name: resource that has to be analyzed
:return: list of tables found
"""
html_doc_tree = self.html_object_getter(res_name)
# if html doc is defined
if html_doc_tree:
graph = rdflib.Graph()
# instantiate html table parser
html_table_parser = HtmlTableParser.HtmlTableParser(html_doc_tree, self.chapter, graph,
self.topic, res_name, self.utils, False)
# if there are tables to analyze
if html_table_parser:
# analyze and parse tables
html_table_parser.analyze_tables()
return html_table_parser.all_tables
# if there aren't tables to analyze result will be empty
else:
return ""
# if html doc is not defined result will be empty
else:
return ""
def __init__(self, edam_url):
"""
:param edam_url: path to EDAM.owl file
:type edam_url: STRING
All the EDAM ontology will be contained in a dictionnary (self.edam_ontology).
"""
if edam_url is None:
LOGGER.info("Loading EDAM info from http://edamontology.org/EDAM.owl")
self.edam_ontology = rdflib.Graph()
self.edam_ontology.parse("http://edamontology.org/EDAM.owl")
# Get version of EDAM ontology
version_query = """SELECT ?version WHERE {
<http://edamontology.org> doap:Version ?version}"""
for row in self.edam_ontology.query(version_query):
self.version = row[0]
break
else:
pass
def rdf(request):
uri = request.GET['uri']
g = Graph()
annotations = Annotation.objects.filter(uri=uri)
for annotation in annotations:
if annotation.title:
g.add( ( URIRef(annotation.uri), URIRef("http://localhost/metawiki/index.php/Special:URIResolver/Property-3ATitle"), Literal(annotation.title) ) )
if annotation.notes:
g.add( ( URIRef(annotation.uri), URIRef("http://localhost/metawiki/index.php/Special:URIResolver/Property-3ANotes"), Literal(annotation.notes) ) )
for tag in annotation.tags.all():
g.add( ( URIRef(annotation.uri), URIRef("http://localhost/metawiki/index.php/Special:URIResolver/Property-3ATag"), Literal(tag.prefLabel) ) )
status = HttpResponse(g.serialize( format='xml' ) )
status["Content-Type"] = "application/rdf+xml"
return status
def get_triples(org, out_file, weeks=1, span=None, format="turtle"):
g = Graph()
if span is not None:
records = get_publications_for_org(org, span=span)
else:
records = get_publications_for_org(org, weeks=int(weeks))
num = 0
for num, rec in enumerate(records):
g += rec.to_rdf()
trips = len(g)
console("{} records found. {} triples created.".format(num or 0, trips))
if trips > 0:
if out_file is not None:
output_graph(g, destination=out_file, format=format)
else:
print output_graph(g, format=format)
def add_vcard(self, position, name):
"""
:param position: number in author order
:param name: name as string - last, first, middle
:return: rdflib.Graph
"""
g = Graph()
# vcard individual
vci_uri = D['vcard-individual-' + position + '-' + self.localid]
g.add((vci_uri, RDF.type, VCARD.Individual))
# vcard name
vcn_uri = D['vcard-name-' + position + '-' + self.localid]
g.add((vcn_uri, RDF.type, VCARD.Name))
g.add((vcn_uri, RDFS.label, Literal(name)))
# Parse name into first, last, middle
name = HumanName(name)
g.add((vcn_uri, VCARD.givenName, Literal(name.first)))
g.add((vcn_uri, VCARD.familyName, Literal(name.last)))
if name.middle != "":
g.add((vcn_uri, VIVO.middleName, Literal(name.middle)))
# Relate vcard individual to vcard name
g.add((vci_uri, VCARD.hasName, vcn_uri))
return vci_uri, g
def authorship(self):
"""
Add authorship statements and vcards for authors.
:return: rdflib.Graph
"""
g = Graph()
for num, au in enumerate(self.authors()):
position = str(num + 1)
vcard_individual_uri, vcard_stmts = self.add_vcard(position, au)
g += vcard_stmts
# Authorship
aship_uri = D['authorship-' + position + '-' + self.localid]
g.add((aship_uri, RDF.type, VIVO.Authorship))
g.add((aship_uri, VIVO.rank, Literal(int(position))))
# Relate pub and authorship
g.add((aship_uri, VIVO.relates, self.pub_uri))
# Relate vcard and authorship
g.add((aship_uri, VIVO.relates, vcard_individual_uri))
return g
def _vcard_email(self):
g = Graph()
try:
emails = [e for e in self.profile["emails"].split("|")]
except KeyError:
try:
emails = [self.profile['email']]
except KeyError:
emails = []
for email in emails:
vt = Resource(g, self.vcard_email_uri)
vt.set(RDF.type, VCARD.Work)
# Label probably not necessary
vt.set(RDFS.label, Literal(email))
vt.set(VCARD.email, Literal(email))
return g
def org_total_counts(orgs):
#pcounts = incites_api.get_total_pubs(name)
g = Graph()
for org_name in orgs:
org_uri = waan_uri(org_name)
ln = local_name(org_uri)
pcounts = load_incites_json_file(org_name, 'total')
for item in pcounts:
curi = D['pubcount-' + ln + '-' + str(item['year'])]
g.add((curi, RDF.type, WOS.InCitesPubPerYear))
g.add((curi, RDFS.label, Literal("{} - {}".format(item['year'], item['count']))))
g.add((curi, WOS.number, Literal(item['count'])))
g.add((curi, WOS.year, Literal(item['year'])))
g.add((org_uri, VIVO.relates, curi))
ng = "http://localhost/data/incites-pub-year-counts"
backend.sync_updates(ng, g)
return True
def org_total_cites(orgs):
g = Graph()
for org_name in orgs:
org_uri = waan_uri(org_name)
#print>>sys.stderr, "Processing", org_name, "total cites"
ln = local_name(org_uri)
tc = load_incites_json_file(org_name, 'cites')
for item in tc:
curi = D['citecount-' + ln + '-' + str(item['year'])]
g.add((curi, RDF.type, WOS.InCitesCitesPerYear))
g.add((curi, RDFS.label, Literal("{} - {}".format(item['year'], item['count']))))
g.add((curi, WOS.number, Literal(item['count'])))
g.add((curi, WOS.year, Literal(item['year'])))
g.add((org_uri, VIVO.relates, curi))
#print g.serialize(format="turtle")
ng = "http://localhost/data/incites-total-cites-year-counts"
backend.sync_updates(ng, g)
return True
def org_top_categories(orgs):
g = Graph()
for org_name in orgs:
#print>>sys.stderr, "Processing", org_name, "top categories"
org_uri = waan_uri(org_name)
ln = local_name(org_uri)
top_cat = load_incites_json_file(org_name, 'categories')
for item in top_cat:
cat = item['category']
category_uri = get_category_uri(cat)
curi = D['topcategory-'] + ln + slugify(cat)
g.add((curi, RDF.type, WOS.InCitesTopCategory))
g.add((curi, RDFS.label, Literal("{} - {}".format(org_name, cat))))
g.add((curi, WOS.number, Literal(item['count'])))
g.add((curi, VIVO.relates, category_uri))
g.add((curi, VIVO.relates, org_uri))
#print g.serialize(format="turtle")
ng = "http://localhost/data/incites-top-categories"
backend.sync_updates(ng, g)
return True
def run(self):
g = Graph()
wos_top = D['wos-topics']
g.add((wos_top, RDF.type, WOS.TopTopic))
g.add((wos_top, RDFS.label, Literal("Web of Science Subject Schemas")))
with open(self.input_file) as inf:
for row in csv.DictReader(inf):
ra = row['Research Area (eASCA)']
category = row['WoS Category (tASCA)']
broad, ra1, ra2 = self.chunk_ras(ra)
broad_uri, cg = self.do_term(broad, clz=WOS.BroadDiscipline)
g.add((broad_uri, SKOS.broader, wos_top))
g += cg
ra1_uri, cg = self.do_term(ra1, broader=broad_uri, clz=WOS.ResearchArea, uri_prefix="wosra")
g += cg
ra2_uri = None
if ra2 is not None:
ra2_uri, cg = self.do_term(ra2, broader=ra1_uri, clz=WOS.ResearchArea, uri_prefix="wosra")
g += cg
cat_uri, cg = self.do_term(category, broader=ra2_uri or ra1_uri, clz=WOS.Category)
g += cg
self.serialize(g)
def add_grant(grant, pub_uri):
"""
Create a funder and grant(s).
"""
g = Graph()
if grant.get("agency") is None:
logger.info("No agency found for {} with ids.".format(pub_uri, ";".join(grant.get("ids", []))))
return g
slug = slugify(grant["agency"])
uri = D['funder-' + slug]
g.add((uri, RDF.type, WOS.Funder))
g.add((uri, RDFS.label, Literal(grant["agency"])))
for gid in grant["ids"]:
label = "{} - {}".format(grant["agency"], gid)
guri = D['grant-'] + slugify(label)
g.add((guri, RDF.type, WOS.Grant))
g.add((guri, RDFS.label, Literal(label)))
g.add((guri, WOS.grantId, Literal(gid)))
g.add((guri, VIVO.relates, uri))
g.add((guri, VIVO.relates, pub_uri))
return g
def addressships(self):
g = Graph()
addresses = self.addresses()
for addr in addresses:
addr_uri = self.addr_uri(addr["full_address"], addr["number"])
org = addr["organization"]
r = Resource(g, addr_uri)
r.set(RDF.type, WOS.Address)
r.set(RDFS.label, Literal(addr['full_address']))
r.set(WOS.organizationName, Literal(org))
r.set(WOS.sequenceNumber, Literal(addr['number']))
# relation to author set by authorship
# relate to pub
r.set(VIVO.relates, self.uri)
# sub orgs
for idx, suborg in enumerate(addr["sub_organizations"]):
label = "{}, {}".format(suborg, org)
so_uri = self.sub_org_uri(label)
r.add(VIVO.relates, so_uri)
# relate unified orgs
for uorg in addr["unified_orgs"]:
uo_uri = waan_uri(uorg)
r.add(VIVO.relates, uo_uri)
return g
preferred_suborg_names.py 文件源码
项目:rap-etl
作者: RAP-research-output-impact
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def get_existing_address(uri):
vstore = backend.get_store()
rq = rq_prefixes + """
SELECT ?address
WHERE {
?uri vivo:relatedBy ?address.
?address a wos:Address .
}
"""
rmg = Graph()
addr_uris = []
for row in vstore.query(rq, initBindings={'uri': uri}):
addr_uris.append(row.address)
rmg.add((row.address, VIVO.relates, uri))
rmg.add((uri, VIVO.relatedBy, row.address))
return addr_uris, rmg
def ingest_graph(graph):
sparql = """SELECT DISTINCT ?subject WHERE { ?subject ?pred ?obj . }"""
for row in graph.query(sparql):
subject = row[0]
fedora_result = requests.post(FEDORA_URL)
fedora_subject = rdflib.URIRef(fedora_result.text)
subject_graph = rdflib.Graph()
subject_graph.parse(str(fedora_subject))
subject_graph.namespace_manager.bind(
'schema',
'http://schema.org/')
subject_graph.namespace_manager.bind(
'owl',
str(rdflib.OWL))
subject_graph.add((fedora_subject, rdflib.OWL.sameAs, subject))
for pred, obj in graph.predicate_objects(
subject=subject):
subject_graph.add((fedora_subject, pred, obj))
print(subject_graph.serialize(format='turtle').decode())
update_result = requests.put(str(fedora_subject),
data=subject_graph.serialize(format='turtle'),
headers={"Content-Type": "text/turtle"})
def xquery_socket(raw_xml):
"""Function takes raw_xml and converts to BIBFRAME RDF
Args:
raw_xml -- Raw XML
"""
xquery_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
xquery_server.connect(('localhost', 8089))
xquery_server.sendall(raw_xml + b'\n')
rdf_xml = b''
while 1:
data = xquery_server.recv(1024)
if not data:
break
rdf_xml += data
xquery_server.close()
bf_graph = rdflib.Graph()
for namespace in [("bf", "http://bibframe.org/vocab/"),
("schema", "http://schema.org/")]:
bf_graph.namespace_manager.bind(namespace[0], namespace[1])
bf_graph.parse(data=rdf_xml.decode(), format='xml')
return bf_graph
def extract_genome_acc(prot_rdf):
"""
Extracts and returns the assembly accession from the proteome rdf
which provided as input. Returns -1 if not available
prot_rdf: A Uniprot's proteome rdf url or file path
"""
g = Graph()
response = requests.get(prot_rdf).status_code
if response == httplib.OK:
g.load(prot_rdf)
for s, p, o in g:
if string.find(o, "GCA") != -1:
return os.path.split(o)[1]
return -1
# -----------------------------------------------------------------------------