From a23c4dd5a2c5496125a2f8a70e8723c86fce115f Mon Sep 17 00:00:00 2001 From: Harshad Date: Fri, 11 Aug 2023 10:52:48 -0500 Subject: [PATCH] Use `curies`' `Converter` in `strict=False` mode. (#409) - [x] Fixes #408 - [x] Fixes #269 This way, when a `prefix_map` has duplicate `uri_prefix` or `prefix` from the user, `curies` will not throw an error. `sssom-py` already gives priority to a user-defined prefix map over the default one (which now is EPM from bioregistry). --------- Co-authored-by: Nico Matentzoglu --- src/sssom/parsers.py | 2 +- tests/data/hp-subset-metadata.yml | 2 ++ tests/data/hp-subset.json | 59 +++++++++++++++++++++++++++++++ tests/test_parsers.py | 23 ++++++++++++ 4 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 tests/data/hp-subset-metadata.yml create mode 100644 tests/data/hp-subset.json diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py index dfc1cf1e..5c6028bf 100644 --- a/src/sssom/parsers.py +++ b/src/sssom/parsers.py @@ -667,7 +667,7 @@ def from_obographs( :return: An SSSOM data frame (MappingSetDataFrame) """ _ensure_prefix_map(prefix_map) - converter = Converter.from_prefix_map(prefix_map) + converter = Converter.from_prefix_map(prefix_map, strict=False) ms = _init_mapping_set(meta) mlist: List[Mapping] = [] # bad_attrs = {} diff --git a/tests/data/hp-subset-metadata.yml b/tests/data/hp-subset-metadata.yml new file mode 100644 index 00000000..fb6b2cf6 --- /dev/null +++ b/tests/data/hp-subset-metadata.yml @@ -0,0 +1,2 @@ +curie_map: + HP: http://example/obo/HP_ \ No newline at end of file diff --git a/tests/data/hp-subset.json b/tests/data/hp-subset.json new file mode 100644 index 00000000..ac2fe18d --- /dev/null +++ b/tests/data/hp-subset.json @@ -0,0 +1,59 @@ +{ + "graphs" : [ { + "nodes" : [ { + "id" : "http://example/obo/HP_0011770", + "meta" : { + "definition" : { + "val" : "A type of hyperparathyroidism that occurs following kidney transplantation, which is a treatment for secondary hyperparathyroidism. Although kidney transplantation leads to a normalization of serum calcium and parathyroid hormone in most patients. The state of persistent hypercalcemia and hyperparathyroidism is referred to as tertiary hyperparathyroidism.", + "xrefs" : [ "DDD:spark" ] + }, + "xrefs" : [ { + "val" : "SNOMEDCT_US:78200003" + }, { + "val" : "UMLS:C0271858" + } ], + "basicPropertyValues" : [ { + "pred" : "http://www.geneontology.org/formats/oboInOwl#created_by", + "val" : "peter" + }, { + "pred" : "http://www.geneontology.org/formats/oboInOwl#creation_date", + "val" : "2012-04-22T04:38:20Z" + }, { + "pred" : "http://www.geneontology.org/formats/oboInOwl#hasOBONamespace", + "val" : "human_phenotype" + } ] + }, + "type" : "CLASS", + "lbl" : "Tertiary hyperparathyroidism" + }, { + "id" : "http://example/obo/HP_0009450", + "meta" : { + "definition" : { + "val" : "Increased width of the proximal phalanx of the 3rd finger.", + "xrefs" : [ "HPO:curators" ] + }, + "xrefs" : [ { + "val" : "UMLS:C4024355" + } ], + "synonyms" : [ { + "pred" : "hasExactSynonym", + "val" : "Broad innermost bone of middle finger", + "xrefs" : [ "ORCID:0000-0001-5208-3432" ], + "synonymType" : "http://purl.obolibrary.org/obo/hp#layperson" + } ], + "basicPropertyValues" : [ { + "pred" : "http://www.geneontology.org/formats/oboInOwl#hasOBONamespace", + "val" : "human_phenotype" + }, { + "pred" : "http://www.geneontology.org/formats/oboInOwl#created_by", + "val" : "doelkens" + }, { + "pred" : "http://www.geneontology.org/formats/oboInOwl#creation_date", + "val" : "2009-01-14T04:33:41Z" + } ] + }, + "type" : "CLASS", + "lbl" : "Broad proximal phalanx of the 3rd finger" + } ] + } ] +} \ No newline at end of file diff --git a/tests/test_parsers.py b/tests/test_parsers.py index e513af23..cd350a80 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -13,6 +13,7 @@ from rdflib import Graph from sssom.context import _raise_on_invalid_prefix_map, get_default_metadata +from sssom.io import parse_file from sssom.parsers import ( from_alignment_minidom, from_obographs, @@ -223,3 +224,25 @@ def test_read_sssom_table(self): self.assertEqual(imported_df.iloc[idx][k], v) else: self.assertEqual(imported_df.iloc[idx][k], v) + + def test_parse_obographs_merged(self): + """Test parsing OBO Graph JSON using custom prefix_map.""" + hp_json = f"{test_data_dir}/hp-subset.json" + hp_meta = f"{test_data_dir}/hp-subset-metadata.yml" + outfile = f"{test_out_dir}/hp-subset-parse.tsv" + + with open(hp_meta, "r") as f: + data = yaml.safe_load(f) + custom_curie_map = data["curie_map"] + + with open(outfile, "w") as f: + parse_file( + input_path=hp_json, + prefix_map_mode="merged", + clean_prefixes=True, + input_format="obographs-json", + metadata_path=hp_meta, + output=f, + ) + msdf = parse_sssom_table(outfile) + self.assertTrue(custom_curie_map.items() <= msdf.prefix_map.items())