Skip to content

Commit

Permalink
Merge pull request #19 from obophenotype/rel_validation
Browse files Browse the repository at this point in the history
Relation validation
  • Loading branch information
hkir-dev authored Apr 14, 2022
2 parents 7bb4614 + dcd9826 commit 8186422
Show file tree
Hide file tree
Showing 7 changed files with 26,301 additions and 0 deletions.
9,441 changes: 9,441 additions & 0 deletions src/ontology/report/all_labels.csv

Large diffs are not rendered by default.

6,251 changes: 6,251 additions & 0 deletions src/ontology/report/mba_relations.csv

Large diffs are not rendered by default.

5,247 changes: 5,247 additions & 0 deletions src/ontology/report/not_valid_relations.tsv

Large diffs are not rendered by default.

5,247 changes: 5,247 additions & 0 deletions src/ontology/report/not_valid_relations_lbl.tsv

Large diffs are not rendered by default.

78 changes: 78 additions & 0 deletions src/scripts/relation_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import csv
import os
import pandas as pd


REL_REPORT_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../ontology/report/not_valid_relations.tsv")
REL_REPORT_LBL_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../ontology/report/not_valid_relations_lbl.tsv")

ALL_LABELS_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../ontology/report/all_labels.csv")


def add_labels_to_report(report_path, labels_path, output_path):
headers, records = read_csv_to_dict(report_path, delimiter="\t", generated_ids=True)
labels = read_csv_to_dict(labels_path)[1]

normalized_markers = []
for row_num in records:
normalized_data = {"o": records[row_num]["o"],
"s": records[row_num]["s"],
"olabel": records[row_num]["olabel"],
"slabel": records[row_num]["slabel"],
"user_oiri": records[row_num]["user_olabel"],
"user_olabel": labels[records[row_num]["user_olabel"]]["label"],
"user_siri": records[row_num]["user_slabel"],
"user_slabel": labels[records[row_num]["user_slabel"]]["label"]
}

normalized_markers.append(normalized_data)

class_robot_template = pd.DataFrame.from_records(normalized_markers)
class_robot_template.to_csv(output_path, sep="\t", index=False)


def read_csv_to_dict(csv_path, id_column=0, id_column_name="", delimiter=",", id_to_lower=False, generated_ids=False):
"""
Reads tsv file content into a dict. Key is the first column value and the value is dict representation of the
row values (each header is a key and column value is the value).
Args:
csv_path: Path of the CSV file
id_column: Id column becomes the keys of the dict. This column should be unique. Default is the first column.
id_column_name: Alternative to the numeric id_column, id_column_name specifies id_column by its header string.
delimiter: Value delimiter. Default is comma.
id_to_lower: applies string lowercase operation to the key
generated_ids: If 'True', uses row number as the key of the dict. Initial key is 0.
Returns:
Function provides two return values: first; headers of the table and second; the CSV content dict. Key of the
content is the first column value and the values are dict of row values.
"""
records = dict()

headers = []
with open(csv_path) as fd:
rd = csv.reader(fd, delimiter=delimiter, quotechar='"')
row_count = 0
for row in rd:
_id = row[id_column]
if id_to_lower:
_id = str(_id).lower()
if generated_ids:
_id = row_count

if row_count == 0:
headers = row
if id_column_name and id_column_name in headers:
id_column = headers.index(id_column_name)
else:
row_object = dict()
for column_num, column_value in enumerate(row):
row_object[headers[column_num]] = column_value
records[_id] = row_object

row_count += 1

return headers, records


add_labels_to_report(REL_REPORT_PATH, ALL_LABELS_PATH, REL_REPORT_LBL_PATH)
15 changes: 15 additions & 0 deletions src/sparql/all_labels.sparql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>

SELECT DISTINCT ?term ?label
WHERE {
?term a owl:Class .
?term rdfs:label ?label .

FILTER (STRSTARTS(STR(?term), "http://purl.obolibrary.org/obo/MBA_") || STRSTARTS(STR(?term), "http://purl.obolibrary.org/obo/DMBA_")
|| STRSTARTS(STR(?term), "http://purl.obolibrary.org/obo/ABA_") || STRSTARTS(STR(?term), "http://purl.obolibrary.org/obo/DHBA_")
|| STRSTARTS(STR(?term), "http://purl.obolibrary.org/obo/HBA_") || STRSTARTS(STR(?term), "http://purl.obolibrary.org/obo/PBA_"))
}
22 changes: 22 additions & 0 deletions src/sparql/relation_validation.sparql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_>

SELECT DISTINCT ?o ?s ?olabel ?slabel (str(?o_mba) as ?user_olabel) (str(?s_mba) as ?user_slabel)
WHERE{
?s_mba rdfs:subClassOf ?s .
?s_mba rdfs:subClassOf ?restriction.
?restriction owl:onProperty <http://purl.obolibrary.org/obo/BFO_0000050> .
?restriction owl:someValuesFrom ?o_mba .
?o_mba rdfs:subClassOf ?o .
?s rdfs:label ?slabel .
?o rdfs:label ?olabel .

FILTER (isIRI(?o) && STRSTARTS(STR(?o), "http://purl.obolibrary.org/obo/UBERON_"))
FILTER (isIRI(?s) && STRSTARTS(STR(?s), "http://purl.obolibrary.org/obo/UBERON_"))
FILTER (?o != ?s)
FILTER (STRSTARTS(STR(?s_mba), "http://purl.obolibrary.org/obo/MBA_") || STRSTARTS(STR(?s_mba), "http://purl.obolibrary.org/obo/DMBA_"))
}

0 comments on commit 8186422

Please sign in to comment.