diff --git a/src/bioregistry/curation/ingest_prefixmaps_bioportal.py b/src/bioregistry/curation/ingest_prefixmaps_bioportal.py new file mode 100644 index 000000000..963307d62 --- /dev/null +++ b/src/bioregistry/curation/ingest_prefixmaps_bioportal.py @@ -0,0 +1,65 @@ +"""Intest manually curated BioPortal PURLs from :mod:`prefixmaps`, curated by Harry Caufield.""" + +import requests +import yaml +import bioregistry +from bioregistry.external.bioportal import get_bioportal + +URL = "https://raw.githubusercontent.com/linkml/prefixmaps/main/src/prefixmaps/data/bioportal.curated.yaml" +#: A mapping from BioPortal prefixes to lists of URI prefixes to skip +BLACKLIST = {"BFO": ["http://www.ifomis.org/bfo/1.1/snap#"]} + + +def main(): + count = 0 + max_count = 10 + bioportal = get_bioportal(force_download=False) + + bioportal_to_bioregistry = bioregistry.get_registry_invmap("bioportal") + + res = requests.get(URL) + data = yaml.safe_load(res.text)["prefixes"] + for bioportal_prefix, uri_prefixes in data.items(): + if bioportal_prefix not in bioportal: + # these are nonsense + continue + + bioregistry_prefix = bioportal_to_bioregistry.get(bioportal_prefix) + if bioregistry_prefix is None: + # these might be relevant, but are not currently in the Bioregistry. + # note that there's no quality filter on BioPortal content, and it's not + # clear if there's a quality filter on the curation here, so we skip them + continue + + resource = bioregistry.get_resource(bioregistry_prefix) + bioregistry_uri_prefixes = resource.get_uri_prefixes() + if isinstance(uri_prefixes, str): + uri_prefixes = [uri_prefixes] + for uri_prefix in uri_prefixes: + if uri_prefix in BLACKLIST.get(bioportal_prefix, []): + continue + if uri_prefix.startswith("OBO:"): + uri_prefix = "http://purl.obolibrary.org/obo/" + uri_prefix[len("OBO:") :] + if uri_prefix in bioregistry_uri_prefixes: + continue + print(bioregistry_prefix, uri_prefix) + + if count > max_count: + continue + p = bioregistry.Provider( + code="", + name="", + homepage="", + description="", + uri_format=uri_prefix + "$1", + ) + if resource.providers is None: + resource.providers = [] + resource.providers.append(p) + count += 1 + + bioregistry.manager.write_registry() + + +if __name__ == "__main__": + main() diff --git a/src/bioregistry/data/bioregistry.json b/src/bioregistry/data/bioregistry.json index dc38cba12..7d2e0f81f 100644 --- a/src/bioregistry/data/bioregistry.json +++ b/src/bioregistry/data/bioregistry.json @@ -1014,6 +1014,15 @@ "prefix": "ADO" }, "pattern": "^\\d{7}$", + "providers": [ + { + "code": "legacy", + "description": "Legacy URI used before switching to OBO PURLs", + "homepage": "http://scai.fraunhofer.de", + "name": "Legacy", + "uri_format": "http://scai.fraunhofer.de/AlzheimerOntology#$1" + } + ], "publications": [ { "doi": "10.1016/j.jalz.2013.02.009", @@ -2717,10 +2726,10 @@ "pattern": "^ACTRN\\d+$", "providers": [ { - "code": "", - "description": "", - "homepage": "", - "name": "", + "code": "redirect", + "description": "Australian New Zealand Clinical Trials Registry (redirect)", + "homepage": "https://anzctr.org.au", + "name": "Australian New Zealand Clinical Trials Registry (redirect)", "uri_format": "https://anzctr.org.au/ACTRN$1.aspx" } ], @@ -4799,6 +4808,15 @@ ], "uri_format": "http://www.whocc.no/atc_ddd_index/?code=$1" }, + "providers": [ + { + "code": "bioportal.purl", + "description": "BioPortal assigned additional PURLs for ATC codes", + "homepage": "http://purl.bioontology.org/ontology/ATC", + "name": "BioPortal", + "uri_format": "http://purl.bioontology.org/ontology/ATC/$1" + } + ], "publications": [ { "pubmed": "7368387", @@ -9539,6 +9557,15 @@ }, "name": "Biomedical Informatics Research Network Lexicon", "pattern": "^\\d+$", + "providers": [ + { + "code": "bioportal.purl", + "description": "Pseudo-BioPortal PURL assigned to BirnLex", + "homepage": "http://bioontology.org/projects/ontologies/birnlex", + "name": "BioPortal", + "uri_format": "http://bioontology.org/projects/ontologies/birnlex#$1" + } + ], "uri_format": "http://uri.neuinfo.org/nif/nifstd/birnlex_$1" }, "biro": { @@ -26859,10 +26886,17 @@ "providers": [ { "code": "purl", - "description": "Legacy PURLs found in OAE", + "description": "Legacy PURLs for DOID, including the redundant DOID_", "homepage": "http://purl.org/obo/owl/", "name": "Legacy PURL", "uri_format": "http://purl.org/obo/owl/DOID#DOID_$1" + }, + { + "code": "purl2", + "description": "Legacy PURLs for DOID, not including the redundant DOID_", + "homepage": "http://purl.org/obo/owl/", + "name": "Legacy PURL", + "uri_format": "http://purl.org/obo/owl/DOID#$1" } ], "publications": [ @@ -34809,6 +34843,15 @@ "prefix": "EPIO" }, "pattern": "^\\d{7}$", + "providers": [ + { + "code": "legacy", + "description": "Legacy internal URL before switching to OBO PURL", + "homepage": "https://bio.scai.fraunhofer.de/ontolog", + "name": "Legacy", + "uri_format": "https://bio.scai.fraunhofer.de/ontology/epilepsy#$1" + } + ], "uri_format": "http://purl.obolibrary.org/obo/EPIO_$1" }, "epo": { @@ -39673,7 +39716,7 @@ "description": "Access funder data through a DOI for crossref funders.", "homepage": "https://doi.org", "name": "DOI", - "uri_format": "https://dx.doi.org/10.13039/501100000995" + "uri_format": "https://dx.doi.org/10.13039/$1" } ], "synonyms": [ @@ -42715,6 +42758,15 @@ "fairsharing": "FAIRsharing.175hsz" }, "name": "General Formal Ontology", + "providers": [ + { + "code": "alt1", + "description": "Alternate identifier using gfo-basic.owl instead of gfo.owl", + "homepage": "http://www.onto-med.de/ontologies", + "name": "Alternate 1", + "uri_format": "http://www.onto-med.de/ontologies/gfo-basic.owl#$1" + } + ], "uri_format": "http://www.onto-med.de/ontologies/gfo.owl#$1" }, "ghr": { @@ -57210,10 +57262,10 @@ "pattern": "^jRCT\\w?\\d+$", "providers": [ { - "code": "", - "description": "", - "homepage": "", - "name": "", + "code": "detail", + "description": "Japan Registry of Clinical Trials (Details)", + "homepage": "https://jrct.niph.go.jp", + "name": "Japan Registry of Clinical Trials (Details)", "uri_format": "https://jrct.niph.go.jp/latest-detail/$1" } ], diff --git a/tests/test_data.py b/tests/test_data.py index e187d4e3c..7dc295316 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -672,6 +672,10 @@ def test_providers(self): for provider in resource.providers: with self.subTest(prefix=prefix, code=provider.code): self.assertNotEqual(provider.code, prefix) + self.assertNotEqual(provider.code, "", msg="code should not be an empty string") + self.assertNotEqual(provider.homepage, "", msg="homepage should not be an empty string") + self.assertNotEqual(provider.description, "", msg="desc. should not be an empty string") + self.assertNotEqual(provider.name, "", msg="name should not be an empty string") self.assertNotIn( provider.code, set(self.metaregistry), @@ -683,7 +687,7 @@ def test_providers(self): provider.code, msg="Provider codes must be lowercase. Ideally, they should be simple and memorable", ) - # self.assertIn("$1", provider.uri_format) + self.assertIn("$1", provider.uri_format) self.assertNotIn( "$2", provider.uri_format,