Skip to content

Commit

Permalink
feat: ruff as formatter (#322)
Browse files Browse the repository at this point in the history
* ruff formatter instead of black

* refactor: ruff reformatted files

* feat: more complete ruff adjustments

* refactor: all codebase to comply with ruff rules

* chore: update lock

* feat: more stringent docstring rules

* revert: remove isort and black from Makefile
  • Loading branch information
d0choa authored Dec 12, 2023
1 parent 0bd02aa commit 6798153
Show file tree
Hide file tree
Showing 51 changed files with 192 additions and 213 deletions.
15 changes: 3 additions & 12 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,10 @@ repos:
rev: v0.1.6
hooks:
- id: ruff

- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
args:
[--add-import, from __future__ import annotations, --profile, black]

- repo: https://github.com/psf/black
rev: 23.11.0
hooks:
- id: black

- --fix
- id: ruff-format
files: ^((otg|utils|tests)/.+)?[^/]+\.py$
- repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook
rev: v9.10.0
hooks:
Expand Down
2 changes: 0 additions & 2 deletions .vscode/extensions.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
{
"recommendations": [
"charliermarsh.ruff",
"ms-python.isort",
"ms-python.mypy-type-checker",
"ms-python.python",
"ms-python.black-formatter",
"esbenp.prettier-vscode"
],
"unwantedRecommendations": ["ms-python.flake8"]
Expand Down
7 changes: 2 additions & 5 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.formatOnPaste": false,
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
Expand All @@ -19,14 +19,11 @@
}
},
"json.format.keepLines": true,
"isort.args": ["--profile", "black"],
"autoDocstring.docstringFormat": "google",
"python.testing.pytestArgs": [".", "--doctest-modules", "--cov=src/"],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"mypy-type-checker.severity": {
"error": "Information"
},
"ruff.fixAll": false,
"ruff.organizeImports": false
}
}
3 changes: 0 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ check: ## Lint and format code
@echo "Linting docstrings..."
@poetry run pydoclint --config=pyproject.toml src
@poetry run pydoclint --config=pyproject.toml --skip-checking-short-docstrings=true tests
@echo "Formatting..."
@poetry run black src/otg .
@poetry run isort src/otg .

test: ## Run tests
@echo "Running Tests..."
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
[![docs](https://github.com/opentargets/genetics_etl_python/actions/workflows/docs.yaml/badge.svg)](https://opentargets.github.io/genetics_etl_python/)
[![codecov](https://codecov.io/gh/opentargets/genetics_etl_python/branch/main/graph/badge.svg?token=5ixzgu8KFP)](https://codecov.io/gh/opentargets/genetics_etl_python)
[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
[![pre-commit.ci status](https://results.pre-commit.ci/badge/github/opentargets/genetics_etl_python/main.svg)](https://results.pre-commit.ci/badge/github/opentargets/genetics_etl_python)

# Genetics Portal Data Pipeline (experimental)
Expand Down
1 change: 0 additions & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ hide:
![docs](https://github.com/opentargets/genetics_etl_python/actions/workflows/docs.yaml/badge.svg)
[![codecov](https://codecov.io/gh/opentargets/genetics_etl_python/branch/main/graph/badge.svg?token=5ixzgu8KFP)](https://codecov.io/gh/opentargets/genetics_etl_python)
[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
[![pre-commit.ci status](https://results.pre-commit.ci/badge/github/opentargets/genetics_etl_python/main.svg)](https://results.pre-commit.ci/badge/github/opentargets/genetics_etl_python)

---
Expand Down
50 changes: 5 additions & 45 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

98 changes: 88 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ scikit-learn = "^1.3.2"

[tool.poetry.dev-dependencies]
pre-commit = "^3.6.0"
black = {version = "^22.12.0", allow-prereleases = true}
mypy = "^1.7"
pep8-naming = "^0.13.2"
interrogate = "^1.5.0"
Expand Down Expand Up @@ -84,9 +83,6 @@ upload_to_release = false
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

[tool.isort]
profile = "black"

[tool.deptry]
extend_exclude = ["src/conftest.py", "src/airflow", "src/utils"]

Expand Down Expand Up @@ -135,12 +131,94 @@ ignore_missing_imports = true


[tool.ruff]
select = ["D", "E", "F", "B"]
ignore = ["D417", "E501", "E203"]
fixable = ["F401"]
unfixable = ["B"]
line-length = 88
target-version = "py310"
select = [
"B002", # Python does not support the unary prefix increment
"B007", # Loop control variable {name} not used within loop body
"B014", # Exception handler with duplicate exception
"B023", # Function definition does not bind loop variable {name}
"B026", # Star-arg unpacking after a keyword argument is strongly discouraged
"C", # complexity
"COM818", # Trailing comma on bare tuple prohibited
"D", # docstrings
"DTZ003", # Use datetime.now(tz=) instead of datetime.utcnow()
"DTZ004", # Use datetime.fromtimestamp(ts, tz=) instead of datetime.utcfromtimestamp(ts)
"E", # pycodestyle
"F", # pyflakes/autoflake
"G", # flake8-logging-format
"I", # isort
"ICN001", # import concentions; {name} should be imported as {asname}
"ISC001", # Implicitly concatenated string literals on one line
"N804", # First argument of a class method should be named cls
"N805", # First argument of a method should be named self
"N815", # Variable {name} in class scope should not be mixedCase
"PGH001", # No builtin eval() allowed
"PGH004", # Use specific rule codes when using noqa
"PLC0414", # Useless import alias. Import alias does not rename original package.
"PLC", # pylint
"PLE", # pylint
"PLR", # pylint
"PLW", # pylint
"Q000", # Double quotes found but single quotes preferred
"RUF006", # Store a reference to the return value of asyncio.create_task
"S102", # Use of exec detected
"S103", # bad-file-permissions
"S108", # hardcoded-temp-file
"S306", # suspicious-mktemp-usage
"S307", # suspicious-eval-usage
"S313", # suspicious-xmlc-element-tree-usage
"S314", # suspicious-xml-element-tree-usage
"S315", # suspicious-xml-expat-reader-usage
"S316", # suspicious-xml-expat-builder-usage
"S317", # suspicious-xml-sax-usage
"S318", # suspicious-xml-mini-dom-usage
"S319", # suspicious-xml-pull-dom-usage
"S320", # suspicious-xmle-tree-usage
"S601", # paramiko-call
"S602", # subprocess-popen-with-shell-equals-true
"S604", # call-with-shell-equals-true
"S608", # hardcoded-sql-expression
"S609", # unix-command-wildcard-injection
"SIM105", # Use contextlib.suppress({exception}) instead of try-except-pass
"SIM117", # Merge with-statements that use the same scope
"SIM118", # Use {key} in {dict} instead of {key} in {dict}.keys()
"SIM201", # Use {left} != {right} instead of not {left} == {right}
"SIM208", # Use {expr} instead of not (not {expr})
"SIM212", # Use {a} if {a} else {b} instead of {b} if not {a} else {a}
"SIM300", # Yoda conditions. Use 'age == 42' instead of '42 == age'.
"SIM401", # Use get from dict with default instead of an if block
"T100", # Trace found: {name} used
"T20", # flake8-print
"TID251", # Banned imports
"TRY004", # Prefer TypeError exception for invalid type
"TRY200", # Use raise from to specify exception cause
"TRY302", # Remove exception handler; error is immediately re-raised
"UP", # pyupgrade
"W", # pycodestyle
]

ignore = [
"E501", # line too long
"E731", # do not assign a lambda expression, use a def

# Ignore ignored, as the rule is now back in preview/nursery, which cannot
# be ignored anymore without warnings.
# https://github.com/astral-sh/ruff/issues/7491
# "PLC1901", # Lots of false positives

# False positives https://github.com/astral-sh/ruff/issues/5386
"PLC0208", # Use a sequence type instead of a `set` when iterating over values
"PLR0911", # Too many return statements ({returns} > {max_returns})
"PLR0912", # Too many branches ({branches} > {max_branches})
"PLR0913", # Too many arguments to function call ({c_args} > {max_args})
"PLR0915", # Too many statements ({statements} > {max_statements})
"PLR2004", # Magic value used in comparison, consider replacing {value} with a constant variable
"PLW2901", # Outer {outer_kind} variable {name} overwritten by inner {inner_kind} target
"UP006", # keep type annotation style as is
"UP007", # keep type annotation style as is
# Ignored due to performance: https://github.com/charliermarsh/ruff/issues/2923
"UP038", # Use `X | Y` in `isinstance` call instead of `(X, Y)`

]

[tool.ruff.per-file-ignores]
"__init__.py" = ["E402"]
Expand Down
28 changes: 13 additions & 15 deletions src/airflow/dags/common_airflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,16 @@


# Shared DAG construction parameters.
shared_dag_args = dict(
owner="Open Targets Data Team",
retries=0,
)
shared_dag_kwargs = dict(
tags=["genetics_etl", "experimental"],
start_date=pendulum.now(tz="Europe/London").subtract(days=1),
schedule="@once",
catchup=False,
)
shared_dag_args = {
"owner": "Open Targets Data Team",
"retries": 0,
}
shared_dag_kwargs = {
"tags": ["genetics_etl", "experimental"],
"start_date": pendulum.now(tz="Europe/London").subtract(days=1),
"schedule": "@once",
"catchup": False,
}


def create_cluster(
Expand Down Expand Up @@ -110,7 +110,7 @@ def create_cluster(
if num_local_ssds:
for worker_section in ("worker_config", "secondary_worker_config"):
# Create a disk config section if it does not exist.
cluster_config[worker_section].setdefault("disk_config", dict())
cluster_config[worker_section].setdefault("disk_config", {})
# Specify the number of local SSDs.
cluster_config[worker_section]["disk_config"][
"num_local_ssds"
Expand Down Expand Up @@ -288,7 +288,7 @@ def read_yaml_config(config_path: Path) -> Any:
Any: Parsed YAML config file.
"""
assert config_path.exists(), f"YAML config path {config_path} does not exist."
with open(config_path, "r") as config_file:
with open(config_path) as config_file:
return yaml.safe_load(config_file)


Expand Down Expand Up @@ -349,8 +349,6 @@ def submit_pyspark_job_no_operator(
},
},
}
res = job_client.submit_job(
job_client.submit_job(
project_id=GCP_PROJECT, region=GCP_REGION, job=job_description
)
job_id = res.reference.job_id
print(f"Submitted job ID {job_id}.")
14 changes: 7 additions & 7 deletions src/airflow/dags/gwas_catalog_harmonisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,20 +52,20 @@ def create_to_do_list(**kwargs: Any) -> Any:
raw_harmonised = ti.xcom_pull(
task_ids="list_raw_harmonised", key="return_value"
)
print("Number of raw harmonised files: ", len(raw_harmonised))
print("Number of raw harmonised files: ", len(raw_harmonised)) # noqa: T201
to_do_list = []
# Remove the ones that have been processed
parquets = ti.xcom_pull(task_ids="list_harmonised_parquet", key="return_value")
print("Number of parquet files: ", len(parquets))
print("Number of parquet files: ", len(parquets)) # noqa: T201
for path in raw_harmonised:
match_result = re.search(
"raw-harmonised/(.*)/(GCST\d+)/harmonised/(.*)\.h\.tsv\.gz", path
r"raw-harmonised/(.*)/(GCST\d+)/harmonised/(.*)\.h\.tsv\.gz", path
)
if match_result:
study_id = match_result.group(2)
if f"harmonised/{study_id}.parquet/_SUCCESS" not in parquets:
to_do_list.append(path)
print("Number of jobs to submit: ", len(to_do_list))
print("Number of jobs to submit: ", len(to_do_list)) # noqa: T201
ti.xcom_push(key="to_do_list", value=to_do_list)

# Submit jobs to dataproc
Expand All @@ -78,18 +78,18 @@ def submit_jobs(**kwargs: Any) -> None:
"""
ti = kwargs["ti"]
todo = ti.xcom_pull(task_ids="create_to_do_list", key="to_do_list")
print("Number of jobs to submit: ", len(todo))
print("Number of jobs to submit: ", len(todo)) # noqa: T201
for i in range(len(todo)):
# Not to exceed default quota 400 jobs per minute
if i > 0 and i % 399 == 0:
time.sleep(60)
input_path = todo[i]
match_result = re.search(
"raw-harmonised/(.*)/(GCST\d+)/harmonised/(.*)\.h\.tsv\.gz", input_path
r"raw-harmonised/(.*)/(GCST\d+)/harmonised/(.*)\.h\.tsv\.gz", input_path
)
if match_result:
study_id = match_result.group(2)
print("Submitting job for study: ", study_id)
print("Submitting job for study: ", study_id) # noqa: T201
common.submit_pyspark_job_no_operator(
cluster_name=CLUSTER_NAME,
step_id="gwas_catalog_sumstat_preprocess",
Expand Down
2 changes: 1 addition & 1 deletion src/otg/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def main(cfg: DictConfig) -> None:
Args:
cfg (DictConfig): hydra configuration object
"""
print(OmegaConf.to_yaml(cfg))
print(OmegaConf.to_yaml(cfg)) # noqa: T201
# Initialise and run step
instantiate(cfg.step)

Expand Down
Loading

0 comments on commit 6798153

Please sign in to comment.