From 2edf77c68c6ec001f5c23c48410818c6f68aad83 Mon Sep 17 00:00:00 2001 From: Vlad Gainullin Date: Thu, 7 May 2020 09:58:21 -0400 Subject: [PATCH 1/6] fix pip install --- setup.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 71441e5..819e079 100644 --- a/setup.py +++ b/setup.py @@ -31,9 +31,6 @@ 'negspacy', 'networkx', 'gensim', - 'en_core_sci_sm @ https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz', - - ], - dependency_links=['https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz#egg=en_core_sci_sm'] + ] ) From 50180d8dc42a7e61caa3ae961e6c786a54492e50 Mon Sep 17 00:00:00 2001 From: Vlad Gainullin Date: Thu, 7 May 2020 10:20:39 -0400 Subject: [PATCH 2/6] Update pythonpackage.yml --- .github/workflows/pythonpackage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index c7a87fa..6880f52 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -21,7 +21,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pipenv - python setup.py develop + pip install . - name: Test with unittest From 58fdafe1672a8f5411578e681f9ec942efe34b46 Mon Sep 17 00:00:00 2001 From: Vlad Gainullin Date: Thu, 7 May 2020 10:24:16 -0400 Subject: [PATCH 3/6] Update pythonpackage.yml --- .github/workflows/pythonpackage.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 6880f52..7c27f4e 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -22,6 +22,7 @@ jobs: python -m pip install --upgrade pip pip install pipenv pip install . + pip install -r requirements - name: Test with unittest From 8ec22ed0e29eeca8167bbb8a6655afb0bf88c9f8 Mon Sep 17 00:00:00 2001 From: Vlad Gainullin Date: Thu, 7 May 2020 10:26:37 -0400 Subject: [PATCH 4/6] Update pythonpackage.yml --- .github/workflows/pythonpackage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 7c27f4e..696f2ba 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -22,7 +22,7 @@ jobs: python -m pip install --upgrade pip pip install pipenv pip install . - pip install -r requirements + pip install -r requirements.txt - name: Test with unittest From 9acf5598db72be4f4761fa138725dff7882b2d8c Mon Sep 17 00:00:00 2001 From: Vlad Gainullin Date: Thu, 7 May 2020 13:21:49 -0400 Subject: [PATCH 5/6] fix pip install --- txt2hpo/nlp.py | 37 +++++++++++++++++++++++++------------ txt2hpo/util.py | 13 +++++++++++++ 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/txt2hpo/nlp.py b/txt2hpo/nlp.py index b348919..d24f802 100644 --- a/txt2hpo/nlp.py +++ b/txt2hpo/nlp.py @@ -1,24 +1,29 @@ import spacy -import en_core_sci_sm from negspacy.negation import Negex from gensim.parsing.preprocessing import remove_stopwords from txt2hpo.config import logger -from txt2hpo.util import hpo_network +from txt2hpo.util import hpo_network, download_model from nltk.stem import RegexpStemmer from spacy.tokens import Token def nlp_model(negation_language="en"): try: + import en_core_sci_sm nlp = en_core_sci_sm.load(disable=["tagger", "parser"]) nlp.add_pipe(nlp.create_pipe('sentencizer')) negex = Negex(nlp, language=negation_language, chunk_prefix=["no"]) nlp.add_pipe(negex, last=True) Token.set_extension('negex', default=False, force=True) - except OSError: - nlp = None - logger.info('Negation model could not be loaded\n') + except ModuleNotFoundError: + rl = download_model("https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz") + if rl == 0: + import en_core_sci_sm + nlp = en_core_sci_sm.load(disable=["tagger", "parser"]) + else: + logger.info('Negation model could not be loaded\n') + nlp = None if nlp: for not_a_stop in remove_from_stops.split(" "): @@ -28,15 +33,23 @@ def nlp_model(negation_language="en"): return nlp try: + import en_core_sci_sm nlp_sans_ner = en_core_sci_sm.load(disable=["tagger", "parser", "ner"]) - logger.info('Using sci spacy language model\n') + logger.info('Using scispaCy language model\n') -except OSError as e: - logger.info('Sci spacy language model could not be loaded\n') - logger.info('Performing a one-time download of an English language model\n') - from spacy.cli import download - download('en_core_web_sm') - nlp_sans_ner = spacy.load("en_core_web_sm", disable=["tagger", "parser", "ner"]) +except ModuleNotFoundError: + rl = download_model( + "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz") + if rl == 0: + import en_core_sci_sm + nlp_sans_ner = en_core_sci_sm.load(disable=["tagger", "parser", "ner"]) + logger.info('Using scispaCy language model\n') + else: + logger.info('scispaCy language model could not be loaded\n') + logger.info('Performing a one-time download of an English language model\n') + from spacy.cli import download + download('en_core_web_sm') + nlp_sans_ner = spacy.load("en_core_web_sm", disable=["tagger", "parser", "ner"]) # these are used in hpo as part of phenotype definition, should block from filtering remove_from_stops = "first second third fourth fifth under over front back behind ca above below without no not " diff --git a/txt2hpo/util.py b/txt2hpo/util.py index c7c552a..4d47310 100644 --- a/txt2hpo/util.py +++ b/txt2hpo/util.py @@ -4,6 +4,10 @@ from phenopy.config import config as phenopy_config from phenopy import generate_annotated_hpo_network +import sys +import subprocess +import os + obo_file = phenopy_config.get('hpo', 'obo_file') @@ -57,3 +61,12 @@ def remove_key(dict_list, key): if key in d: del d[key] return dict_list + + +def download_model(filename, user_pip_args=None): + download_url = filename + pip_args = ["--no-cache-dir"] + if user_pip_args: + pip_args.extend(user_pip_args) + cmd = [sys.executable, "-m", "pip", "install"] + pip_args + [download_url] + return subprocess.call(cmd, env=os.environ.copy()) \ No newline at end of file From 855a1b080f5ef77cee31bae3dafe118400c1e421 Mon Sep 17 00:00:00 2001 From: Vlad Gainullin Date: Thu, 7 May 2020 13:31:35 -0400 Subject: [PATCH 6/6] Update pythonpackage.yml --- .github/workflows/pythonpackage.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 696f2ba..4bf2b7f 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -22,9 +22,7 @@ jobs: python -m pip install --upgrade pip pip install pipenv pip install . - pip install -r requirements.txt - - name: Test with unittest run: | pip install pytest