Skip to content

Commit

Permalink
Merge pull request #38 from GeneDx/bugfix/requirements
Browse files Browse the repository at this point in the history
fix pip install
  • Loading branch information
vgainullin authored May 7, 2020
2 parents 54dd4d3 + 855a1b0 commit ff59554
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 18 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install pipenv
python setup.py develop
pip install .
- name: Test with unittest
run: |
pip install pytest
Expand Down
5 changes: 1 addition & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@
'negspacy',
'networkx',
'gensim',
'en_core_sci_sm @ https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz',


],
dependency_links=['https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz#egg=en_core_sci_sm']
]
)
37 changes: 25 additions & 12 deletions txt2hpo/nlp.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,29 @@
import spacy
import en_core_sci_sm
from negspacy.negation import Negex
from gensim.parsing.preprocessing import remove_stopwords
from txt2hpo.config import logger
from txt2hpo.util import hpo_network
from txt2hpo.util import hpo_network, download_model
from nltk.stem import RegexpStemmer
from spacy.tokens import Token


def nlp_model(negation_language="en"):
try:
import en_core_sci_sm
nlp = en_core_sci_sm.load(disable=["tagger", "parser"])
nlp.add_pipe(nlp.create_pipe('sentencizer'))
negex = Negex(nlp, language=negation_language, chunk_prefix=["no"])
nlp.add_pipe(negex, last=True)
Token.set_extension('negex', default=False, force=True)

except OSError:
nlp = None
logger.info('Negation model could not be loaded\n')
except ModuleNotFoundError:
rl = download_model("https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz")
if rl == 0:
import en_core_sci_sm
nlp = en_core_sci_sm.load(disable=["tagger", "parser"])
else:
logger.info('Negation model could not be loaded\n')
nlp = None

if nlp:
for not_a_stop in remove_from_stops.split(" "):
Expand All @@ -28,15 +33,23 @@ def nlp_model(negation_language="en"):
return nlp

try:
import en_core_sci_sm
nlp_sans_ner = en_core_sci_sm.load(disable=["tagger", "parser", "ner"])
logger.info('Using sci spacy language model\n')
logger.info('Using scispaCy language model\n')

except OSError as e:
logger.info('Sci spacy language model could not be loaded\n')
logger.info('Performing a one-time download of an English language model\n')
from spacy.cli import download
download('en_core_web_sm')
nlp_sans_ner = spacy.load("en_core_web_sm", disable=["tagger", "parser", "ner"])
except ModuleNotFoundError:
rl = download_model(
"https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz")
if rl == 0:
import en_core_sci_sm
nlp_sans_ner = en_core_sci_sm.load(disable=["tagger", "parser", "ner"])
logger.info('Using scispaCy language model\n')
else:
logger.info('scispaCy language model could not be loaded\n')
logger.info('Performing a one-time download of an English language model\n')
from spacy.cli import download
download('en_core_web_sm')
nlp_sans_ner = spacy.load("en_core_web_sm", disable=["tagger", "parser", "ner"])

# these are used in hpo as part of phenotype definition, should block from filtering
remove_from_stops = "first second third fourth fifth under over front back behind ca above below without no not "
Expand Down
13 changes: 13 additions & 0 deletions txt2hpo/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
from phenopy.config import config as phenopy_config
from phenopy import generate_annotated_hpo_network

import sys
import subprocess
import os


obo_file = phenopy_config.get('hpo', 'obo_file')

Expand Down Expand Up @@ -57,3 +61,12 @@ def remove_key(dict_list, key):
if key in d:
del d[key]
return dict_list


def download_model(filename, user_pip_args=None):
download_url = filename
pip_args = ["--no-cache-dir"]
if user_pip_args:
pip_args.extend(user_pip_args)
cmd = [sys.executable, "-m", "pip", "install"] + pip_args + [download_url]
return subprocess.call(cmd, env=os.environ.copy())

0 comments on commit ff59554

Please sign in to comment.