diff --git a/Makefile_deprecated b/Makefile_deprecated deleted file mode 100644 index df9617ce9..000000000 --- a/Makefile_deprecated +++ /dev/null @@ -1,186 +0,0 @@ -PROJECT_ID ?= open-targets-genetics-dev -REGION ?= europe-west1 -CLUSTER_NAME ?= ${USER}-genetics-etl -PROJECT_NUMBER ?= $$(gcloud projects list --filter=${PROJECT_ID} --format="value(PROJECT_NUMBER)") -APP_NAME ?= $$(cat pyproject.toml| grep name | cut -d" " -f3 | sed 's/"//g') -VERSION_NO ?= $$(poetry version --short) -SRC_WITH_DEPS ?= code_bundle - -.PHONY: $(shell sed -n -e '/^$$/ { n ; /^[^ .\#][^ ]*:/ { s/:.*$$// ; p ; } ; }' $(MAKEFILE_LIST)) - -.DEFAULT_GOAL := help - -help: ## This is help - @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) - -clean: ## CleanUp Prior to Build - @rm -Rf ./dist - @rm -Rf ./${SRC_WITH_DEPS} - @rm -f requirements.txt - -setup-dev: SHELL:=/bin/bash -setup-dev: ## Setup dev environment - @. utils/install_dependencies.sh - -build: clean ## Build Python Package with Dependencies - @echo "Packaging Code and Dependencies for ${APP_NAME}-${VERSION_NO}" - @rm -rf ./dist - @poetry build - @cp ./src/*.py ./dist - @poetry run python ./utils/configure.py --cfg job > ./dist/config.yaml - @echo "Uploading to Dataproc" - @gsutil cp ./dist/${APP_NAME}-${VERSION_NO}-py3-none-any.whl gs://genetics_etl_python_playground/initialisation/ - @gsutil cp ./utils/initialise_cluster.sh gs://genetics_etl_python_playground/initialisation/ - -prepare_pics: ## Create cluster for variant annotation: - gcloud dataproc clusters create ${CLUSTER_NAME} \ - --image-version=2.0 \ - --project=${PROJECT_ID} \ - --region=${REGION} \ - --master-machine-type=n1-highmem-96 \ - --enable-component-gateway \ - --num-master-local-ssds=1 \ - --master-local-ssd-interface=NVME \ - --metadata="PACKAGE=gs://genetics_etl_python_playground/initialisation/${APP_NAME}-${VERSION_NO}-py3-none-any.whl" \ - --initialization-actions=gs://genetics_etl_python_playground/initialisation/initialise_cluster.sh \ - --single-node \ - --max-idle=10m - -prepare_variant_annotation: ## Create cluster for variant annotation - gcloud dataproc clusters create ${CLUSTER_NAME} \ - --image-version=2.0 \ - --project=${PROJECT_ID} \ - --region=${REGION} \ - --master-machine-type=n1-highmem-96 \ - --enable-component-gateway \ - --metadata="PACKAGE=gs://genetics_etl_python_playground/initialisation/${APP_NAME}-${VERSION_NO}-py3-none-any.whl" \ - --initialization-actions=gs://genetics_etl_python_playground/initialisation/initialise_cluster.sh \ - --single-node \ - --max-idle=10m - -prepare_variant_index: ## Create cluster for variant index generation - gcloud dataproc clusters create ${CLUSTER_NAME} \ - --image-version=2.0 \ - --project=${PROJECT_ID} \ - --region=${REGION} \ - --master-machine-type=n1-highmem-32 \ - --metadata="PACKAGE=gs://genetics_etl_python_playground/initialisation/${APP_NAME}-${VERSION_NO}-py3-none-any.whl" \ - --initialization-actions=gs://genetics_etl_python_playground/initialisation/initialise_cluster.sh \ - --enable-component-gateway \ - --single-node \ - --max-idle=10m - -prepare_v2g: ## Create cluster for variant to gene data generation - gcloud dataproc clusters create ${CLUSTER_NAME} \ - --image-version=2.0 \ - --project=${PROJECT_ID} \ - --region=${REGION} \ - --master-machine-type=n1-highmem-64 \ - --enable-component-gateway \ - --metadata="PACKAGE=gs://genetics_etl_python_playground/initialisation/${APP_NAME}-${VERSION_NO}-py3-none-any.whl" \ - --initialization-actions=gs://genetics_etl_python_playground/initialisation/initialise_cluster.sh \ - --single-node \ - --max-idle=10m - -prepare_v2g: ## Create cluster for variant to gene data generation - gcloud dataproc clusters create ${CLUSTER_NAME} \ - --image-version=2.0 \ - --project=${PROJECT_ID} \ - --region=${REGION} \ - --master-machine-type=n1-highmem-64 \ - --enable-component-gateway \ - --metadata="PACKAGE=gs://genetics_etl_python_playground/initialisation/${APP_NAME}-${VERSION_NO}-py3-none-any.whl" \ - --initialization-actions=gs://genetics_etl_python_playground/initialisation/initialise_cluster.sh \ - --single-node \ - --max-idle=10m - -prepare_coloc: ## Create cluster for coloc - gcloud dataproc clusters create ${CLUSTER_NAME} \ - --image-version=2.0 \ - --project=${PROJECT_ID} \ - --region=${REGION} \ - --master-machine-type=n1-highmem-64 \ - --num-master-local-ssds=1 \ - --master-local-ssd-interface=NVME \ - --enable-component-gateway \ - --metadata="PACKAGE=gs://genetics_etl_python_playground/initialisation/${APP_NAME}-${VERSION_NO}-py3-none-any.whl" \ - --initialization-actions=gs://genetics_etl_python_playground/initialisation/initialise_cluster.sh \ - --single-node \ - --max-idle=10m - -prepare_gwas: ## Create cluster for gwas data generation - gcloud dataproc clusters create ${CLUSTER_NAME} \ - --image-version=2.0 \ - --project=${PROJECT_ID} \ - --region=${REGION} \ - --master-machine-type=n1-highmem-32 \ - --metadata="PACKAGE=gs://genetics_etl_python_playground/initialisation/${APP_NAME}-${VERSION_NO}-py3-none-any.whl" \ - --initialization-actions=gs://genetics_etl_python_playground/initialisation/initialise_cluster.sh \ - --enable-component-gateway \ - --single-node \ - --max-idle=10m - -run_coloc: ## Generate coloc results - gcloud dataproc jobs submit pyspark ./dist/run_coloc.py \ - --cluster=${CLUSTER_NAME} \ - --files=./dist/config.yaml \ - --py-files=gs://genetics_etl_python_playground/initialisation/${APP_NAME}-${VERSION_NO}-py3-none-any.whl \ - --project=${PROJECT_ID} \ - --region=${REGION} - -run_v2g: ## Generate V2G dataset - gcloud dataproc jobs submit pyspark ./dist/run_v2g.py \ - --cluster=${CLUSTER_NAME} \ - --files=./dist/config.yaml \ - --py-files=gs://genetics_etl_python_playground/initialisation/${APP_NAME}-${VERSION_NO}-py3-none-any.whl \ - --project=${PROJECT_ID} \ - --region=${REGION} - -run_v2g: ## Generate V2G dataset - gcloud dataproc jobs submit pyspark ./dist/run_v2g.py \ - --cluster=${CLUSTER_NAME} \ - --files=./dist/config.yaml \ - --py-files=gs://genetics_etl_python_playground/initialisation/${APP_NAME}-${VERSION_NO}-py3-none-any.whl \ - --project=${PROJECT_ID} \ - --region=${REGION} - -run_variant_annotation: ## Generate variant annotation dataset - gcloud dataproc jobs submit pyspark ./dist/run_variant_annotation.py \ - --cluster=${CLUSTER_NAME} \ - --files=./dist/config.yaml \ - --properties='spark.jars=/opt/conda/miniconda3/lib/python3.8/site-packages/hail/backend/hail-all-spark.jar,spark.driver.extraClassPath=/opt/conda/miniconda3/lib/python3.8/site-packages/hail/backend/hail-all-spark.jar,spark.executor.extraClassPath=./hail-all-spark.jar,spark.serializer=org.apache.spark.serializer.KryoSerializer,spark.kryo.registrator=is.hail.kryo.HailKryoRegistrator' \ - --project=${PROJECT_ID} \ - --region=${REGION} - -run_variant_index: ## Generate variant index dataset - gcloud dataproc jobs submit pyspark ./dist/run_variant_index.py \ - --cluster=${CLUSTER_NAME} \ - --files=./dist/config.yaml \ - --py-files=gs://genetics_etl_python_playground/initialisation/${APP_NAME}-${VERSION_NO}-py3-none-any.whl \ - --project=${PROJECT_ID} \ - --region=${REGION} - -run_gwas: ## Ingest gwas dataset on a dataproc cluster - gcloud dataproc jobs submit pyspark ./dist/run_gwas_ingest.py \ - --cluster=${CLUSTER_NAME} \ - --files=./dist/config.yaml \ - --properties='spark.jars=/opt/conda/miniconda3/lib/python3.8/site-packages/hail/backend/hail-all-spark.jar,spark.driver.extraClassPath=/opt/conda/miniconda3/lib/python3.8/site-packages/hail/backend/hail-all-spark.jar,spark.executor.extraClassPath=./hail-all-spark.jar,spark.serializer=org.apache.spark.serializer.KryoSerializer,spark.kryo.registrator=is.hail.kryo.HailKryoRegistrator' \ - --py-files=gs://genetics_etl_python_playground/initialisation/${APP_NAME}-${VERSION_NO}-py3-none-any.whl \ - --project=${PROJECT_ID} \ - --region=${REGION} - -run_pics: ## Run pics method - gcloud dataproc jobs submit pyspark ./dist/pics_experiment.py \ - --cluster=${CLUSTER_NAME} \ - --files=./dist/config.yaml \ - --properties='spark.jars=/opt/conda/miniconda3/lib/python3.8/site-packages/hail/backend/hail-all-spark.jar,spark.driver.extraClassPath=/opt/conda/miniconda3/lib/python3.8/site-packages/hail/backend/hail-all-spark.jar,spark.executor.extraClassPath=./hail-all-spark.jar,spark.serializer=org.apache.spark.serializer.KryoSerializer,spark.kryo.registrator=is.hail.kryo.HailKryoRegistrator' \ - --project=${PROJECT_ID} \ - --region=${REGION} - -run_precompute_ld_index: ## Precompute ld-index information - gcloud dataproc jobs submit pyspark ./dist/run_precompute_ld_indexes.py \ - --cluster=${CLUSTER_NAME} \ - --files=./dist/config.yaml \ - --properties='spark.jars=/opt/conda/miniconda3/lib/python3.8/site-packages/hail/backend/hail-all-spark.jar,spark.driver.extraClassPath=/opt/conda/miniconda3/lib/python3.8/site-packages/hail/backend/hail-all-spark.jar,spark.executor.extraClassPath=./hail-all-spark.jar,spark.serializer=org.apache.spark.serializer.KryoSerializer,spark.kryo.registrator=is.hail.kryo.HailKryoRegistrator' \ - --project=${PROJECT_ID} \ - --region=${REGION}