diff --git a/.dockerignore b/.dockerignore index 48ae269..ee6ebbc 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,5 +3,7 @@ !examples/multi_backends/*.py !examples/wiki_parser/*.py !examples/caching/*.py +!examples/etl_pipeline/*.py +!examples/utilizing_gpu/*.py !bin !pirlib diff --git a/examples/etl_pipeline/argo-train.yml b/examples/etl_pipeline/argo-train.yml new file mode 100644 index 0000000..ad57210 --- /dev/null +++ b/examples/etl_pipeline/argo-train.yml @@ -0,0 +1,105 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: argo-train- +spec: + entrypoint: DAG-etl-pipeline + templates: + - name: create-spark-session-template + container: + image: maaz112233/pirlib + command: + - python + - -m + - pirlib.backends.argo_batch + - node + - gASVdgIAAAAAAACMCnBpcmxpYi5waXKUjAROb2RllJOUKYGUfZQojAJpZJSMFGNyZWF0ZV9zcGFya19zZXNzaW9ulIwLZW50cnlwb2ludHOUfZSMBG1haW6UaACMCkVudHJ5cG9pbnSUk5QpgZR9lCiMB3ZlcnNpb26UjAJ2MZSMB2hhbmRsZXKUjC5leGFtcGxlcy5ldGxfcGlwZWxpbmUuZXRsOmNyZWF0ZV9zcGFya19zZXNzaW9ulIwHcnVudGltZZSMCnB5dGhvbjozLjiUjAdjb2RldXJslE6MBWltYWdllIwRbWFhejExMjIzMy9waXJsaWKUdWJzjAlmcmFtZXdvcmuUTowGY29uZmlnlH2UjAV0aW1lcpSJc4wGaW5wdXRzlF2UaACMBUlucHV0lJOUKYGUfZQoaAWMB2RhdGFzZXSUjAZpb3R5cGWUjAlESVJFQ1RPUlmUjAZzb3VyY2WUaACMCkRhdGFTb3VyY2WUk5QpgZR9lCiMB25vZGVfaWSUTowLc3ViZ3JhcGhfaWSUTowJb3V0cHV0X2lklE6MDmdyYXBoX2lucHV0X2lklIwHZGF0YXNldJR1YowEbWV0YZRoAIwITWV0YWRhdGGUk5QpgZR9lCiMBG5hbWWUjAdkYXRhc2V0lIwLYW5ub3RhdGlvbnOUTnVidWJhjAdvdXRwdXRzlF2UaACMBk91dHB1dJSTlCmBlH2UKGgFjAZyZXR1cm6UaCKMCURJUkVDVE9SWZRoLmgwKYGUfZQoaDOMBnJldHVybpRoNU51YnViYWguaDApgZR9lChoM4wUY3JlYXRlX3NwYXJrX3Nlc3Npb26UaDVOdWJ1Yi4= + - gASVigAAAAAAAABdlIwKcGlybGliLnBpcpSMCkdyYXBoSW5wdXSUk5QpgZR9lCiMAmlklIwHZGF0YXNldJSMBmlvdHlwZZSMCURJUkVDVE9SWZSMBG1ldGGUaAGMCE1ldGFkYXRhlJOUKYGUfZQojARuYW1llIwHZGF0YXNldJSMC2Fubm90YXRpb25zlE51YnViYS4= + volumeMounts: + - name: node-outputs + mountPath: /mnt/node_outputs + - name: dataset + mountPath: /mnt/graph_inputs/dataset + volumes: + - name: node-outputs + nfs: + server: k8s-master.cm.cluster + path: /home/maaz/pirlib/examples/etl_pipeline/outputs + readOnly: no + - name: dataset + nfs: + server: k8s-master.cm.cluster + path: /home/maaz/pirlib/examples/etl_pipeline/inputs + readOnly: yes + - name: extract-transform-load-template + container: + image: maaz112233/pirlib + command: + - python + - -m + - pirlib.backends.argo_batch + - node + - gASVfAIAAAAAAACMCnBpcmxpYi5waXKUjAROb2RllJOUKYGUfZQojAJpZJSMFmV4dHJhY3RfdHJhbnNmb3JtX2xvYWSUjAtlbnRyeXBvaW50c5R9lIwEbWFpbpRoAIwKRW50cnlwb2ludJSTlCmBlH2UKIwHdmVyc2lvbpSMAnYxlIwHaGFuZGxlcpSMMGV4YW1wbGVzLmV0bF9waXBlbGluZS5ldGw6ZXh0cmFjdF90cmFuc2Zvcm1fbG9hZJSMB3J1bnRpbWWUjApweXRob246My44lIwHY29kZXVybJROjAVpbWFnZZSMEW1hYXoxMTIyMzMvcGlybGlilHVic4wJZnJhbWV3b3JrlE6MBmNvbmZpZ5R9lIwFdGltZXKUiXOMBmlucHV0c5RdlGgAjAVJbnB1dJSTlCmBlH2UKGgFjAdkYXRhc2V0lIwGaW90eXBllIwJRElSRUNUT1JZlIwGc291cmNllGgAjApEYXRhU291cmNllJOUKYGUfZQojAdub2RlX2lklE6MC3N1YmdyYXBoX2lklE6MCW91dHB1dF9pZJROjA5ncmFwaF9pbnB1dF9pZJSMB2RhdGFzZXSUdWKMBG1ldGGUaACMCE1ldGFkYXRhlJOUKYGUfZQojARuYW1llIwHZGF0YXNldJSMC2Fubm90YXRpb25zlE51YnViYYwHb3V0cHV0c5RdlGgAjAZPdXRwdXSUk5QpgZR9lChoBYwGcmV0dXJulGgijAlESVJFQ1RPUlmUaC5oMCmBlH2UKGgzjAZyZXR1cm6UaDVOdWJ1YmFoLmgwKYGUfZQoaDOMFmV4dHJhY3RfdHJhbnNmb3JtX2xvYWSUaDVOdWJ1Yi4= + - gASVigAAAAAAAABdlIwKcGlybGliLnBpcpSMCkdyYXBoSW5wdXSUk5QpgZR9lCiMAmlklIwHZGF0YXNldJSMBmlvdHlwZZSMCURJUkVDVE9SWZSMBG1ldGGUaAGMCE1ldGFkYXRhlJOUKYGUfZQojARuYW1llIwHZGF0YXNldJSMC2Fubm90YXRpb25zlE51YnViYS4= + volumeMounts: + - name: node-outputs + mountPath: /mnt/node_outputs + - name: dataset + mountPath: /mnt/graph_inputs/dataset + volumes: + - name: node-outputs + nfs: + server: k8s-master.cm.cluster + path: /home/maaz/pirlib/examples/etl_pipeline/outputs + readOnly: no + - name: dataset + nfs: + server: k8s-master.cm.cluster + path: /home/maaz/pirlib/examples/etl_pipeline/inputs + readOnly: yes + - name: etl-pipeline-template + container: + image: maaz112233/pirlib + command: + - python + - -m + - pirlib.backends.argo_batch + - graph + - gASVBAEAAAAAAABdlIwKcGlybGliLnBpcpSMC0dyYXBoT3V0cHV0lJOUKYGUfZQojAJpZJSMBnJldHVybpSMBmlvdHlwZZSMCURJUkVDVE9SWZSMBnNvdXJjZZRoAYwKRGF0YVNvdXJjZZSTlCmBlH2UKIwHbm9kZV9pZJSMFmV4dHJhY3RfdHJhbnNmb3JtX2xvYWSUjAtzdWJncmFwaF9pZJROjAlvdXRwdXRfaWSUjAZyZXR1cm6UjA5ncmFwaF9pbnB1dF9pZJROdWKMBG1ldGGUaAGMCE1ldGFkYXRhlJOUKYGUfZQojARuYW1llIwGcmV0dXJulIwLYW5ub3RhdGlvbnOUTnVidWJhLg== + volumeMounts: + - name: node-outputs + mountPath: /mnt/node_outputs + - name: dataset + mountPath: /mnt/graph_inputs/dataset + - name: graph-outputs + mountPath: /mnt/graph_outputs + volumes: + - name: node-outputs + nfs: + server: k8s-master.cm.cluster + path: /home/maaz/pirlib/examples/etl_pipeline/outputs + readOnly: no + - name: dataset + nfs: + server: k8s-master.cm.cluster + path: /home/maaz/pirlib/examples/etl_pipeline/inputs + readOnly: yes + - name: graph-outputs + nfs: + server: k8s-master.cm.cluster + path: /home/maaz/pirlib/examples/etl_pipeline/outputs + readOnly: no + - name: DAG-etl-pipeline + dag: + tasks: + - name: create-spark-session + template: create-spark-session-template + dependencies: [] + - name: extract-transform-load + template: extract-transform-load-template + dependencies: [] + - name: etl-pipeline + template: etl-pipeline-template + dependencies: + - create-spark-session + - extract-transform-load diff --git a/examples/etl_pipeline/etl.py b/examples/etl_pipeline/etl.py new file mode 100644 index 0000000..0e7289f --- /dev/null +++ b/examples/etl_pipeline/etl.py @@ -0,0 +1,60 @@ +from dataclasses import asdict +import requests +import pandas as pd +from pirlib.iotypes import DirectoryPath, FilePath +from pirlib.pipeline import pipeline +from pirlib.task import task +from pyspark.sql import SparkSession + +""" +Python Extract Transform Load Pipeline Example +""" + + +@task +def create_spark_session(dataset: DirectoryPath) -> DirectoryPath: + # Create a Spark session + spark = SparkSession.builder.appName("Simple Spark Session").getOrCreate() + # Check if the Spark session is successfully created + print("Spark version:", spark.version) + # Perform some simple operations using the Spark session + data = [("Alice", 34), ("Bob", 45), ("Charlie", 29)] + df = spark.createDataFrame(data, ["Name", "Age"]) + # Show the DataFrame + df.show() + # Stop the Spark session when done + spark.stop() + return dataset + + +@task +def extract_transform_load(dataset: DirectoryPath) -> DirectoryPath: + """This API extracts data from + http://universities.hipolabs.com + """ + API_URL = "http://universities.hipolabs.com/search?country=United+States" + data = requests.get(API_URL).json() + + df = pd.DataFrame(data) + print(f"Total Number of universities from API {len(data)}") + + df = df[df["name"].str.contains("California")] + print(f"Number of universities in california {len(df)}") + + df["domains"] = [",".join(map(str, l)) for l in df["domains"]] + df["web_pages"] = [",".join(map(str, l)) for l in df["web_pages"]] + df = df.reset_index(drop=True) + df = df[["domains", "country", "web_pages", "name"]] + + outdir = task.context().output + file_name = outdir / "file.csv" + df.to_csv(file_name, sep="\t", encoding="utf-8") + + return outdir + + +@pipeline +def etl_pipeline(dataset: DirectoryPath) -> DirectoryPath: + create_spark_session(dataset) + data = extract_transform_load(dataset) + return data diff --git a/examples/etl_pipeline/package_argo.yml b/examples/etl_pipeline/package_argo.yml new file mode 100644 index 0000000..6854194 --- /dev/null +++ b/examples/etl_pipeline/package_argo.yml @@ -0,0 +1,86 @@ +graphs: +- id: etl_pipeline + nodes: + - id: create_spark_session + entrypoints: + main: + version: v1 + handler: examples.etl_pipeline.etl:create_spark_session + runtime: python:3.8 + codeurl: null + image: maaz112233/pirlib + framework: null + config: + timer: false + inputs: + - id: dataset + iotype: DIRECTORY + source: + node_id: null + subgraph_id: null + output_id: null + graph_input_id: dataset + meta: + name: dataset + annotations: null + outputs: + - id: return + iotype: DIRECTORY + meta: + name: return + annotations: null + meta: + name: create_spark_session + annotations: null + - id: extract_transform_load + entrypoints: + main: + version: v1 + handler: examples.etl_pipeline.etl:extract_transform_load + runtime: python:3.8 + codeurl: null + image: maaz112233/pirlib + framework: null + config: + timer: false + inputs: + - id: dataset + iotype: DIRECTORY + source: + node_id: null + subgraph_id: null + output_id: null + graph_input_id: dataset + meta: + name: dataset + annotations: null + outputs: + - id: return + iotype: DIRECTORY + meta: + name: return + annotations: null + meta: + name: extract_transform_load + annotations: null + subgraphs: [] + inputs: + - id: dataset + iotype: DIRECTORY + meta: + name: dataset + annotations: null + outputs: + - id: return + iotype: DIRECTORY + source: + node_id: extract_transform_load + subgraph_id: null + output_id: return + graph_input_id: null + meta: + name: return + annotations: null + meta: + name: etl_pipeline + annotations: null diff --git a/examples/etl_pipeline/requirements.txt b/examples/etl_pipeline/requirements.txt new file mode 100644 index 0000000..65a42be --- /dev/null +++ b/examples/etl_pipeline/requirements.txt @@ -0,0 +1,2 @@ +requests +pandas diff --git a/examples/etl_pipeline/run_argo.sh b/examples/etl_pipeline/run_argo.sh new file mode 100644 index 0000000..c2253be --- /dev/null +++ b/examples/etl_pipeline/run_argo.sh @@ -0,0 +1,26 @@ +EXAMPLEDIR=$(dirname $0) +ROOTDIR=$EXAMPLEDIR/../.. + +### Module 1: Docker_Packaging +python $ROOTDIR/bin/pircli dockerize \ + $ROOTDIR \ + --auto \ + --pipeline examples.etl_pipeline.etl:etl_pipeline \ + --output $EXAMPLEDIR/package_argo.yml \ + --flatten \ + --docker_base_image godatadriven/pyspark:latest + +# Convert EXAMPLEDIR to absolute path since docker can't bind-mount relative paths. +EXAMPLEDIR=$([[ $EXAMPLEDIR = /* ]] && echo "$EXAMPLEDIR" || echo "$PWD/${EXAMPLEDIR#./}") + +### Module 2: Argoize_Module +INPUT_dataset=$EXAMPLEDIR/inputs \ +OUTPUT=$EXAMPLEDIR/outputs \ +NFS_SERVER=k8s-master.cm.cluster \ +python $ROOTDIR/bin/pircli generate $EXAMPLEDIR/package_argo.yml \ + --target pirlib.backends.argo_batch:ArgoBatchBackend \ + --output $EXAMPLEDIR/argo-train.yml + +# Run the Argo workflow +argo submit -n argo --watch $EXAMPLEDIR/argo-train.yml + \ No newline at end of file diff --git a/examples/multi_backends/argo-train.yml b/examples/multi_backends/argo-train.yml index b66503b..e8d80db 100644 --- a/examples/multi_backends/argo-train.yml +++ b/examples/multi_backends/argo-train.yml @@ -7,13 +7,13 @@ spec: templates: - name: clean-template container: - image: nilabhra/pirlib + image: maaz112233/pirlib command: - python - -m - pirlib.backends.argo_batch - node - - gASVSQIAAAAAAACMCnBpcmxpYi5waXKUjAROb2RllJOUKYGUfZQojAJpZJSMBWNsZWFulIwLZW50cnlwb2ludHOUfZSMBG1haW6UaACMCkVudHJ5cG9pbnSUk5QpgZR9lCiMB3ZlcnNpb26UjAJ2MZSMB2hhbmRsZXKUjCVleGFtcGxlcy5tdWx0aV9iYWNrZW5kcy5leGFtcGxlOmNsZWFulIwHcnVudGltZZSMCnB5dGhvbjozLjiUjAdjb2RldXJslE6MBWltYWdllIwPbmlsYWJocmEvcGlybGlilHVic4wJZnJhbWV3b3JrlE6MBmNvbmZpZ5R9lIwGaW5wdXRzlF2UaACMBUlucHV0lJOUKYGUfZQoaAWMB2RhdGFzZXSUjAZpb3R5cGWUjAlESVJFQ1RPUlmUjAZzb3VyY2WUaACMCkRhdGFTb3VyY2WUk5QpgZR9lCiMB25vZGVfaWSUTowLc3ViZ3JhcGhfaWSUTowJb3V0cHV0X2lklE6MDmdyYXBoX2lucHV0X2lklIwNdHJhaW5fZGF0YXNldJR1YowEbWV0YZRoAIwITWV0YWRhdGGUk5QpgZR9lCiMBG5hbWWUjAdkYXRhc2V0lIwLYW5ub3RhdGlvbnOUTnVidWJhjAdvdXRwdXRzlF2UaACMBk91dHB1dJSTlCmBlH2UKGgFjAZyZXR1cm6UaCGMCURJUkVDVE9SWZRoLWgvKYGUfZQoaDKMBnJldHVybpRoNE51YnViYWgtaC8pgZR9lChoMowFY2xlYW6UaDROdWJ1Yi4= + - gASVVQIAAAAAAACMCnBpcmxpYi5waXKUjAROb2RllJOUKYGUfZQojAJpZJSMBWNsZWFulIwLZW50cnlwb2ludHOUfZSMBG1haW6UaACMCkVudHJ5cG9pbnSUk5QpgZR9lCiMB3ZlcnNpb26UjAJ2MZSMB2hhbmRsZXKUjCVleGFtcGxlcy5tdWx0aV9iYWNrZW5kcy5leGFtcGxlOmNsZWFulIwHcnVudGltZZSMCnB5dGhvbjozLjiUjAdjb2RldXJslE6MBWltYWdllIwRbWFhejExMjIzMy9waXJsaWKUdWJzjAlmcmFtZXdvcmuUTowGY29uZmlnlH2UjAV0aW1lcpSJc4wGaW5wdXRzlF2UaACMBUlucHV0lJOUKYGUfZQoaAWMB2RhdGFzZXSUjAZpb3R5cGWUjAlESVJFQ1RPUlmUjAZzb3VyY2WUaACMCkRhdGFTb3VyY2WUk5QpgZR9lCiMB25vZGVfaWSUTowLc3ViZ3JhcGhfaWSUTowJb3V0cHV0X2lklE6MDmdyYXBoX2lucHV0X2lklIwNdHJhaW5fZGF0YXNldJR1YowEbWV0YZRoAIwITWV0YWRhdGGUk5QpgZR9lCiMBG5hbWWUjAdkYXRhc2V0lIwLYW5ub3RhdGlvbnOUTnVidWJhjAdvdXRwdXRzlF2UaACMBk91dHB1dJSTlCmBlH2UKGgFjAZyZXR1cm6UaCKMCURJUkVDVE9SWZRoLmgwKYGUfZQoaDOMBnJldHVybpRoNU51YnViYWguaDApgZR9lChoM4wFY2xlYW6UaDVOdWJ1Yi4= - gASVJAEAAAAAAABdlCiMCnBpcmxpYi5waXKUjApHcmFwaElucHV0lJOUKYGUfZQojAJpZJSMDXRyYWluX2RhdGFzZXSUjAZpb3R5cGWUjAlESVJFQ1RPUlmUjARtZXRhlGgBjAhNZXRhZGF0YZSTlCmBlH2UKIwEbmFtZZSMDXRyYWluX2RhdGFzZXSUjAthbm5vdGF0aW9uc5ROdWJ1YmgDKYGUfZQoaAaMD3RyYW5zbGF0ZV9tb2RlbJRoCIwERklMRZRoCmgMKYGUfZQoaA+MD3RyYW5zbGF0ZV9tb2RlbJRoEU51YnViaAMpgZR9lChoBowJc2VudGVuY2VzlGgIjAlESVJFQ1RPUlmUaApoDCmBlH2UKGgPjAlzZW50ZW5jZXOUaBFOdWJ1YmUu volumeMounts: - name: node-outputs @@ -24,22 +24,22 @@ spec: - name: node-outputs nfs: server: k8s-master.cm.cluster - path: /home/nilabhra/pirlib/examples/multi_backends/outputs + path: /home/maaz/pirlib/examples/multi_backends/outputs readOnly: no - name: train-dataset nfs: server: k8s-master.cm.cluster - path: /home/nilabhra/pirlib/examples/multi_backends/inputs/train_dataset + path: /home/maaz/pirlib/examples/multi_backends/inputs/train_dataset readOnly: yes - name: train-template container: - image: nilabhra/pirlib + image: maaz112233/pirlib command: - python - -m - pirlib.backends.argo_batch - node - - gASVoQIAAAAAAACMCnBpcmxpYi5waXKUjAROb2RllJOUKYGUfZQojAJpZJSMBXRyYWlulIwLZW50cnlwb2ludHOUfZSMBG1haW6UaACMCkVudHJ5cG9pbnSUk5QpgZR9lCiMB3ZlcnNpb26UjAJ2MZSMB2hhbmRsZXKUjCVleGFtcGxlcy5tdWx0aV9iYWNrZW5kcy5leGFtcGxlOnRyYWlulIwHcnVudGltZZSMCnB5dGhvbjozLjiUjAdjb2RldXJslE6MBWltYWdllIwPbmlsYWJocmEvcGlybGlilHVic4wJZnJhbWV3b3JrlGgAjAlGcmFtZXdvcmuUk5QpgZR9lCiMBG5hbWWUjAdhZGFwdGRslGgOTowGY29uZmlnlE51YmgefZQojBRhZGFwdGRsL21pbl9yZXBsaWNhc5RLAYwUYWRhcHRkbC9tYXhfcmVwbGljYXOUSwR1jAZpbnB1dHOUXZRoAIwFSW5wdXSUk5QpgZR9lChoBYwHZGF0YXNldJSMBmlvdHlwZZSMCURJUkVDVE9SWZSMBnNvdXJjZZRoAIwKRGF0YVNvdXJjZZSTlCmBlH2UKIwHbm9kZV9pZJSMBWNsZWFulIwLc3ViZ3JhcGhfaWSUTowJb3V0cHV0X2lklIwGcmV0dXJulIwOZ3JhcGhfaW5wdXRfaWSUTnVijARtZXRhlGgAjAhNZXRhZGF0YZSTlCmBlH2UKGgcjAdkYXRhc2V0lIwLYW5ub3RhdGlvbnOUTnVidWJhjAdvdXRwdXRzlF2UaACMBk91dHB1dJSTlCmBlH2UKGgFjAZyZXR1cm6UaCmMBEZJTEWUaDZoOCmBlH2UKGgcjAZyZXR1cm6UaDxOdWJ1YmFoNmg4KYGUfZQoaByMBXRyYWlulGg8TnVidWIu + - gASVrAIAAAAAAACMCnBpcmxpYi5waXKUjAROb2RllJOUKYGUfZQojAJpZJSMBXRyYWlulIwLZW50cnlwb2ludHOUfZSMBG1haW6UaACMCkVudHJ5cG9pbnSUk5QpgZR9lCiMB3ZlcnNpb26UjAJ2MZSMB2hhbmRsZXKUjCVleGFtcGxlcy5tdWx0aV9iYWNrZW5kcy5leGFtcGxlOnRyYWlulIwHcnVudGltZZSMCnB5dGhvbjozLjiUjAdjb2RldXJslE6MBWltYWdllIwRbWFhejExMjIzMy9waXJsaWKUdWJzjAlmcmFtZXdvcmuUaACMCUZyYW1ld29ya5STlCmBlH2UKIwEbmFtZZSMB2FkYXB0ZGyUaA5OjAZjb25maWeUTnViaB59lCiMFGFkYXB0ZGwvbWluX3JlcGxpY2FzlEsBjBRhZGFwdGRsL21heF9yZXBsaWNhc5RLBIwFdGltZXKUiXWMBmlucHV0c5RdlGgAjAVJbnB1dJSTlCmBlH2UKGgFjAdkYXRhc2V0lIwGaW90eXBllIwJRElSRUNUT1JZlIwGc291cmNllGgAjApEYXRhU291cmNllJOUKYGUfZQojAdub2RlX2lklIwFY2xlYW6UjAtzdWJncmFwaF9pZJROjAlvdXRwdXRfaWSUjAZyZXR1cm6UjA5ncmFwaF9pbnB1dF9pZJROdWKMBG1ldGGUaACMCE1ldGFkYXRhlJOUKYGUfZQoaByMB2RhdGFzZXSUjAthbm5vdGF0aW9uc5ROdWJ1YmGMB291dHB1dHOUXZRoAIwGT3V0cHV0lJOUKYGUfZQoaAWMBnJldHVybpRoKowERklMRZRoN2g5KYGUfZQoaByMBnJldHVybpRoPU51YnViYWg3aDkpgZR9lChoHIwFdHJhaW6UaD1OdWJ1Yi4= - gASVJAEAAAAAAABdlCiMCnBpcmxpYi5waXKUjApHcmFwaElucHV0lJOUKYGUfZQojAJpZJSMDXRyYWluX2RhdGFzZXSUjAZpb3R5cGWUjAlESVJFQ1RPUlmUjARtZXRhlGgBjAhNZXRhZGF0YZSTlCmBlH2UKIwEbmFtZZSMDXRyYWluX2RhdGFzZXSUjAthbm5vdGF0aW9uc5ROdWJ1YmgDKYGUfZQoaAaMD3RyYW5zbGF0ZV9tb2RlbJRoCIwERklMRZRoCmgMKYGUfZQoaA+MD3RyYW5zbGF0ZV9tb2RlbJRoEU51YnViaAMpgZR9lChoBowJc2VudGVuY2VzlGgIjAlESVJFQ1RPUlmUaApoDCmBlH2UKGgPjAlzZW50ZW5jZXOUaBFOdWJ1YmUu volumeMounts: - name: node-outputs @@ -48,17 +48,17 @@ spec: - name: node-outputs nfs: server: k8s-master.cm.cluster - path: /home/nilabhra/pirlib/examples/multi_backends/outputs + path: /home/maaz/pirlib/examples/multi_backends/outputs readOnly: no - name: evaluate-template container: - image: nilabhra/pirlib + image: maaz112233/pirlib command: - python - -m - pirlib.backends.argo_batch - node - - gASV9gIAAAAAAACMCnBpcmxpYi5waXKUjAROb2RllJOUKYGUfZQojAJpZJSMCGV2YWx1YXRllIwLZW50cnlwb2ludHOUfZSMBG1haW6UaACMCkVudHJ5cG9pbnSUk5QpgZR9lCiMB3ZlcnNpb26UjAJ2MZSMB2hhbmRsZXKUjChleGFtcGxlcy5tdWx0aV9iYWNrZW5kcy5leGFtcGxlOmV2YWx1YXRllIwHcnVudGltZZSMCnB5dGhvbjozLjiUjAdjb2RldXJslE6MBWltYWdllIwPbmlsYWJocmEvcGlybGlilHVic4wJZnJhbWV3b3JrlE6MBmNvbmZpZ5R9lIwGaW5wdXRzlF2UKGgAjAVJbnB1dJSTlCmBlH2UKGgFjBNrd2FyZ3MudGVzdF9kYXRhc2V0lIwGaW90eXBllIwJRElSRUNUT1JZlIwGc291cmNllGgAjApEYXRhU291cmNllJOUKYGUfZQojAdub2RlX2lklE6MC3N1YmdyYXBoX2lklE6MCW91dHB1dF9pZJROjA5ncmFwaF9pbnB1dF9pZJSMCXNlbnRlbmNlc5R1YowEbWV0YZRoAIwITWV0YWRhdGGUk5QpgZR9lCiMBG5hbWWUjBNrd2FyZ3MudGVzdF9kYXRhc2V0lIwLYW5ub3RhdGlvbnOUTnVidWJoHSmBlH2UKGgFjBJrd2FyZ3MucHJlZGljdGlvbnOUaCGMCURJUkVDVE9SWZRoI2glKYGUfZQoaCiMGGluZmVyX3BpcGVsaW5lLnNlbnRpbWVudJRoKU5oKowGcmV0dXJulGgrTnViaC1oLymBlH2UKGgyjBJrd2FyZ3MucHJlZGljdGlvbnOUaDROdWJ1YmWMB291dHB1dHOUXZRoAIwGT3V0cHV0lJOUKYGUfZQoaAWMBnJldHVybpRoIYwJREFUQUZSQU1FlGgtaC8pgZR9lChoMowGcmV0dXJulGg0TnVidWJhaC1oLymBlH2UKGgyjAhldmFsdWF0ZZRoNE51YnViLg== + - gASVAgMAAAAAAACMCnBpcmxpYi5waXKUjAROb2RllJOUKYGUfZQojAJpZJSMCGV2YWx1YXRllIwLZW50cnlwb2ludHOUfZSMBG1haW6UaACMCkVudHJ5cG9pbnSUk5QpgZR9lCiMB3ZlcnNpb26UjAJ2MZSMB2hhbmRsZXKUjChleGFtcGxlcy5tdWx0aV9iYWNrZW5kcy5leGFtcGxlOmV2YWx1YXRllIwHcnVudGltZZSMCnB5dGhvbjozLjiUjAdjb2RldXJslE6MBWltYWdllIwRbWFhejExMjIzMy9waXJsaWKUdWJzjAlmcmFtZXdvcmuUTowGY29uZmlnlH2UjAV0aW1lcpSJc4wGaW5wdXRzlF2UKGgAjAVJbnB1dJSTlCmBlH2UKGgFjBNrd2FyZ3MudGVzdF9kYXRhc2V0lIwGaW90eXBllIwJRElSRUNUT1JZlIwGc291cmNllGgAjApEYXRhU291cmNllJOUKYGUfZQojAdub2RlX2lklE6MC3N1YmdyYXBoX2lklE6MCW91dHB1dF9pZJROjA5ncmFwaF9pbnB1dF9pZJSMCXNlbnRlbmNlc5R1YowEbWV0YZRoAIwITWV0YWRhdGGUk5QpgZR9lCiMBG5hbWWUjBNrd2FyZ3MudGVzdF9kYXRhc2V0lIwLYW5ub3RhdGlvbnOUTnVidWJoHimBlH2UKGgFjBJrd2FyZ3MucHJlZGljdGlvbnOUaCKMCURJUkVDVE9SWZRoJGgmKYGUfZQoaCmMGGluZmVyX3BpcGVsaW5lLnNlbnRpbWVudJRoKk5oK4wGcmV0dXJulGgsTnViaC5oMCmBlH2UKGgzjBJrd2FyZ3MucHJlZGljdGlvbnOUaDVOdWJ1YmWMB291dHB1dHOUXZRoAIwGT3V0cHV0lJOUKYGUfZQoaAWMBnJldHVybpRoIowJREFUQUZSQU1FlGguaDApgZR9lChoM4wGcmV0dXJulGg1TnVidWJhaC5oMCmBlH2UKGgzjAhldmFsdWF0ZZRoNU51YnViLg== - gASVJAEAAAAAAABdlCiMCnBpcmxpYi5waXKUjApHcmFwaElucHV0lJOUKYGUfZQojAJpZJSMDXRyYWluX2RhdGFzZXSUjAZpb3R5cGWUjAlESVJFQ1RPUlmUjARtZXRhlGgBjAhNZXRhZGF0YZSTlCmBlH2UKIwEbmFtZZSMDXRyYWluX2RhdGFzZXSUjAthbm5vdGF0aW9uc5ROdWJ1YmgDKYGUfZQoaAaMD3RyYW5zbGF0ZV9tb2RlbJRoCIwERklMRZRoCmgMKYGUfZQoaA+MD3RyYW5zbGF0ZV9tb2RlbJRoEU51YnViaAMpgZR9lChoBowJc2VudGVuY2VzlGgIjAlESVJFQ1RPUlmUaApoDCmBlH2UKGgPjAlzZW50ZW5jZXOUaBFOdWJ1YmUu volumeMounts: - name: node-outputs @@ -69,22 +69,22 @@ spec: - name: node-outputs nfs: server: k8s-master.cm.cluster - path: /home/nilabhra/pirlib/examples/multi_backends/outputs + path: /home/maaz/pirlib/examples/multi_backends/outputs readOnly: no - name: sentences nfs: server: k8s-master.cm.cluster - path: /home/nilabhra/pirlib/examples/multi_backends/inputs/sentences + path: /home/maaz/pirlib/examples/multi_backends/inputs/sentences readOnly: yes - name: infer-pipeline-translate-1-template container: - image: nilabhra/pirlib + image: maaz112233/pirlib command: - python - -m - pirlib.backends.argo_batch - node - - gASV0wIAAAAAAACMCnBpcmxpYi5waXKUjAROb2RllJOUKYGUfZQojAJpZJSMGmluZmVyX3BpcGVsaW5lLnRyYW5zbGF0ZV8xlIwLZW50cnlwb2ludHOUfZSMBG1haW6UaACMCkVudHJ5cG9pbnSUk5QpgZR9lCiMB3ZlcnNpb26UjAJ2MZSMB2hhbmRsZXKUjClleGFtcGxlcy5tdWx0aV9iYWNrZW5kcy5leGFtcGxlOnRyYW5zbGF0ZZSMB3J1bnRpbWWUjApweXRob246My44lIwHY29kZXVybJROjAVpbWFnZZSMD25pbGFiaHJhL3BpcmxpYpR1YnOMCWZyYW1ld29ya5ROjAZjb25maWeUfZSMA2tleZSMBXZhbHVllHOMBmlucHV0c5RdlChoAIwFSW5wdXSUk5QpgZR9lChoBYwGYXJncy4wlIwGaW90eXBllIwERklMRZSMBnNvdXJjZZRoAIwKRGF0YVNvdXJjZZSTlCmBlH2UKIwHbm9kZV9pZJROjAtzdWJncmFwaF9pZJROjAlvdXRwdXRfaWSUTowOZ3JhcGhfaW5wdXRfaWSUjA90cmFuc2xhdGVfbW9kZWyUdWKMBG1ldGGUaACMCE1ldGFkYXRhlJOUKYGUfZQojARuYW1llIwGYXJncy4wlIwLYW5ub3RhdGlvbnOUTnVidWJoHymBlH2UKGgFjAZhcmdzLjGUaCOMCURJUkVDVE9SWZRoJWgnKYGUfZQoaCpOaCtOaCxOaC2MCXNlbnRlbmNlc5R1YmgvaDEpgZR9lChoNIwGYXJncy4xlGg2TnVidWJljAdvdXRwdXRzlF2UaACMBk91dHB1dJSTlCmBlH2UKGgFjAZyZXR1cm6UaCOMCURJUkVDVE9SWZRoL2gxKYGUfZQoaDSMBnJldHVybpRoNk51YnViYWgvaDEpgZR9lChoNIwLdHJhbnNsYXRlXzGUaDZOdWJ1Yi4= + - gASV3wIAAAAAAACMCnBpcmxpYi5waXKUjAROb2RllJOUKYGUfZQojAJpZJSMGmluZmVyX3BpcGVsaW5lLnRyYW5zbGF0ZV8xlIwLZW50cnlwb2ludHOUfZSMBG1haW6UaACMCkVudHJ5cG9pbnSUk5QpgZR9lCiMB3ZlcnNpb26UjAJ2MZSMB2hhbmRsZXKUjClleGFtcGxlcy5tdWx0aV9iYWNrZW5kcy5leGFtcGxlOnRyYW5zbGF0ZZSMB3J1bnRpbWWUjApweXRob246My44lIwHY29kZXVybJROjAVpbWFnZZSMEW1hYXoxMTIyMzMvcGlybGlilHVic4wJZnJhbWV3b3JrlE6MBmNvbmZpZ5R9lCiMBXRpbWVylImMA2tleZSMBXZhbHVllHWMBmlucHV0c5RdlChoAIwFSW5wdXSUk5QpgZR9lChoBYwGYXJncy4wlIwGaW90eXBllIwERklMRZSMBnNvdXJjZZRoAIwKRGF0YVNvdXJjZZSTlCmBlH2UKIwHbm9kZV9pZJROjAtzdWJncmFwaF9pZJROjAlvdXRwdXRfaWSUTowOZ3JhcGhfaW5wdXRfaWSUjA90cmFuc2xhdGVfbW9kZWyUdWKMBG1ldGGUaACMCE1ldGFkYXRhlJOUKYGUfZQojARuYW1llIwGYXJncy4wlIwLYW5ub3RhdGlvbnOUTnVidWJoICmBlH2UKGgFjAZhcmdzLjGUaCSMCURJUkVDVE9SWZRoJmgoKYGUfZQoaCtOaCxOaC1OaC6MCXNlbnRlbmNlc5R1YmgwaDIpgZR9lChoNYwGYXJncy4xlGg3TnVidWJljAdvdXRwdXRzlF2UaACMBk91dHB1dJSTlCmBlH2UKGgFjAZyZXR1cm6UaCSMCURJUkVDVE9SWZRoMGgyKYGUfZQoaDWMBnJldHVybpRoN051YnViYWgwaDIpgZR9lChoNYwLdHJhbnNsYXRlXzGUaDdOdWJ1Yi4= - gASVJAEAAAAAAABdlCiMCnBpcmxpYi5waXKUjApHcmFwaElucHV0lJOUKYGUfZQojAJpZJSMDXRyYWluX2RhdGFzZXSUjAZpb3R5cGWUjAlESVJFQ1RPUlmUjARtZXRhlGgBjAhNZXRhZGF0YZSTlCmBlH2UKIwEbmFtZZSMDXRyYWluX2RhdGFzZXSUjAthbm5vdGF0aW9uc5ROdWJ1YmgDKYGUfZQoaAaMD3RyYW5zbGF0ZV9tb2RlbJRoCIwERklMRZRoCmgMKYGUfZQoaA+MD3RyYW5zbGF0ZV9tb2RlbJRoEU51YnViaAMpgZR9lChoBowJc2VudGVuY2VzlGgIjAlESVJFQ1RPUlmUaApoDCmBlH2UKGgPjAlzZW50ZW5jZXOUaBFOdWJ1YmUu volumeMounts: - name: node-outputs @@ -98,27 +98,27 @@ spec: - name: node-outputs nfs: server: k8s-master.cm.cluster - path: /home/nilabhra/pirlib/examples/multi_backends/outputs + path: /home/maaz/pirlib/examples/multi_backends/outputs readOnly: no - name: translate-model nfs: server: k8s-master.cm.cluster - path: /home/nilabhra/pirlib/examples/multi_backends/inputs + path: /home/maaz/pirlib/examples/multi_backends/inputs readOnly: yes - name: sentences nfs: server: k8s-master.cm.cluster - path: /home/nilabhra/pirlib/examples/multi_backends/inputs/sentences + path: /home/maaz/pirlib/examples/multi_backends/inputs/sentences readOnly: yes - name: infer-pipeline-sentiment-template container: - image: nilabhra/pirlib + image: maaz112233/pirlib command: - python - -m - pirlib.backends.argo_batch - node - - gASV2wIAAAAAAACMCnBpcmxpYi5waXKUjAROb2RllJOUKYGUfZQojAJpZJSMGGluZmVyX3BpcGVsaW5lLnNlbnRpbWVudJSMC2VudHJ5cG9pbnRzlH2UjARtYWlulGgAjApFbnRyeXBvaW50lJOUKYGUfZQojAd2ZXJzaW9ulIwCdjGUjAdoYW5kbGVylIwpZXhhbXBsZXMubXVsdGlfYmFja2VuZHMuZXhhbXBsZTpzZW50aW1lbnSUjAdydW50aW1llIwKcHl0aG9uOjMuOJSMB2NvZGV1cmyUTowFaW1hZ2WUjA9uaWxhYmhyYS9waXJsaWKUdWJzjAlmcmFtZXdvcmuUTowGY29uZmlnlH2UjAZpbnB1dHOUXZQoaACMBUlucHV0lJOUKYGUfZQoaAWMBW1vZGVslIwGaW90eXBllIwERklMRZSMBnNvdXJjZZRoAIwKRGF0YVNvdXJjZZSTlCmBlH2UKIwHbm9kZV9pZJSMBXRyYWlulIwLc3ViZ3JhcGhfaWSUTowJb3V0cHV0X2lklIwGcmV0dXJulIwOZ3JhcGhfaW5wdXRfaWSUTnVijARtZXRhlGgAjAhNZXRhZGF0YZSTlCmBlH2UKIwEbmFtZZSMBW1vZGVslIwLYW5ub3RhdGlvbnOUTnVidWJoHSmBlH2UKGgFjAlzZW50ZW5jZXOUaCGMCURJUkVDVE9SWZRoI2glKYGUfZQoaCiMGmluZmVyX3BpcGVsaW5lLnRyYW5zbGF0ZV8xlGgqTmgrjAZyZXR1cm6UaC1OdWJoLmgwKYGUfZQoaDOMCXNlbnRlbmNlc5RoNU51YnViZYwHb3V0cHV0c5RdlGgAjAZPdXRwdXSUk5QpgZR9lChoBYwGcmV0dXJulGghjAlESVJFQ1RPUlmUaC5oMCmBlH2UKGgzjAZyZXR1cm6UaDVOdWJ1YmFoLmgwKYGUfZQoaDOMCXNlbnRpbWVudJRoNU51YnViLg== + - gASV5wIAAAAAAACMCnBpcmxpYi5waXKUjAROb2RllJOUKYGUfZQojAJpZJSMGGluZmVyX3BpcGVsaW5lLnNlbnRpbWVudJSMC2VudHJ5cG9pbnRzlH2UjARtYWlulGgAjApFbnRyeXBvaW50lJOUKYGUfZQojAd2ZXJzaW9ulIwCdjGUjAdoYW5kbGVylIwpZXhhbXBsZXMubXVsdGlfYmFja2VuZHMuZXhhbXBsZTpzZW50aW1lbnSUjAdydW50aW1llIwKcHl0aG9uOjMuOJSMB2NvZGV1cmyUTowFaW1hZ2WUjBFtYWF6MTEyMjMzL3BpcmxpYpR1YnOMCWZyYW1ld29ya5ROjAZjb25maWeUfZSMBXRpbWVylIlzjAZpbnB1dHOUXZQoaACMBUlucHV0lJOUKYGUfZQoaAWMBW1vZGVslIwGaW90eXBllIwERklMRZSMBnNvdXJjZZRoAIwKRGF0YVNvdXJjZZSTlCmBlH2UKIwHbm9kZV9pZJSMBXRyYWlulIwLc3ViZ3JhcGhfaWSUTowJb3V0cHV0X2lklIwGcmV0dXJulIwOZ3JhcGhfaW5wdXRfaWSUTnVijARtZXRhlGgAjAhNZXRhZGF0YZSTlCmBlH2UKIwEbmFtZZSMBW1vZGVslIwLYW5ub3RhdGlvbnOUTnVidWJoHimBlH2UKGgFjAlzZW50ZW5jZXOUaCKMCURJUkVDVE9SWZRoJGgmKYGUfZQoaCmMGmluZmVyX3BpcGVsaW5lLnRyYW5zbGF0ZV8xlGgrTmgsjAZyZXR1cm6UaC5OdWJoL2gxKYGUfZQoaDSMCXNlbnRlbmNlc5RoNk51YnViZYwHb3V0cHV0c5RdlGgAjAZPdXRwdXSUk5QpgZR9lChoBYwGcmV0dXJulGgijAlESVJFQ1RPUlmUaC9oMSmBlH2UKGg0jAZyZXR1cm6UaDZOdWJ1YmFoL2gxKYGUfZQoaDSMCXNlbnRpbWVudJRoNk51YnViLg== - gASVJAEAAAAAAABdlCiMCnBpcmxpYi5waXKUjApHcmFwaElucHV0lJOUKYGUfZQojAJpZJSMDXRyYWluX2RhdGFzZXSUjAZpb3R5cGWUjAlESVJFQ1RPUlmUjARtZXRhlGgBjAhNZXRhZGF0YZSTlCmBlH2UKIwEbmFtZZSMDXRyYWluX2RhdGFzZXSUjAthbm5vdGF0aW9uc5ROdWJ1YmgDKYGUfZQoaAaMD3RyYW5zbGF0ZV9tb2RlbJRoCIwERklMRZRoCmgMKYGUfZQoaA+MD3RyYW5zbGF0ZV9tb2RlbJRoEU51YnViaAMpgZR9lChoBowJc2VudGVuY2VzlGgIjAlESVJFQ1RPUlmUaApoDCmBlH2UKGgPjAlzZW50ZW5jZXOUaBFOdWJ1YmUu volumeMounts: - name: node-outputs @@ -127,11 +127,11 @@ spec: - name: node-outputs nfs: server: k8s-master.cm.cluster - path: /home/nilabhra/pirlib/examples/multi_backends/outputs + path: /home/maaz/pirlib/examples/multi_backends/outputs readOnly: no - name: train-pipeline-template container: - image: nilabhra/pirlib + image: maaz112233/pirlib command: - python - -m @@ -154,27 +154,27 @@ spec: - name: node-outputs nfs: server: k8s-master.cm.cluster - path: /home/nilabhra/pirlib/examples/multi_backends/outputs + path: /home/maaz/pirlib/examples/multi_backends/outputs readOnly: no - name: train-dataset nfs: server: k8s-master.cm.cluster - path: /home/nilabhra/pirlib/examples/multi_backends/inputs/train_dataset + path: /home/maaz/pirlib/examples/multi_backends/inputs/train_dataset readOnly: yes - name: translate-model nfs: server: k8s-master.cm.cluster - path: /home/nilabhra/pirlib/examples/multi_backends/inputs + path: /home/maaz/pirlib/examples/multi_backends/inputs readOnly: yes - name: sentences nfs: server: k8s-master.cm.cluster - path: /home/nilabhra/pirlib/examples/multi_backends/inputs/sentences + path: /home/maaz/pirlib/examples/multi_backends/inputs/sentences readOnly: yes - name: graph-outputs nfs: server: k8s-master.cm.cluster - path: /home/nilabhra/pirlib/examples/multi_backends/outputs + path: /home/maaz/pirlib/examples/multi_backends/outputs readOnly: no - name: DAG-train-pipeline dag: diff --git a/examples/multi_backends/run_argo.sh b/examples/multi_backends/run_argo.sh index 68afd8c..872ec78 100644 --- a/examples/multi_backends/run_argo.sh +++ b/examples/multi_backends/run_argo.sh @@ -7,7 +7,8 @@ python $ROOTDIR/bin/pircli dockerize \ --auto \ --pipeline examples.multi_backends.example:train_pipeline \ --output $EXAMPLEDIR/package_argo.yml \ - --flatten + --flatten \ + --docker_base_image python:3 # Convert EXAMPLEDIR to absolute path since docker can't bind-mount relative paths. EXAMPLEDIR=$([[ $EXAMPLEDIR = /* ]] && echo "$EXAMPLEDIR" || echo "$PWD/${EXAMPLEDIR#./}") diff --git a/pirlib/cli/dockerize.py b/pirlib/cli/dockerize.py index 2041fa5..c338d45 100644 --- a/pirlib/cli/dockerize.py +++ b/pirlib/cli/dockerize.py @@ -42,6 +42,9 @@ def config_dockerize_parser(parser: argparse.ArgumentParser) -> None: help="path to output file (or - for stdout)", ) parser.add_argument("--flatten", action="store_true", help="flatten pipeline(s)") + + parser.add_argument("--docker_base_image", help="base docker image to be used.") + parser.set_defaults(parser=parser, handler=_dockerize_handler) @@ -54,7 +57,7 @@ def _dockerize_handler(parser: argparse.ArgumentParser, args: argparse.Namespace print("=========== BEGIN INFERRED CONDA ENV ===========") print(conda_env.strip()) print("=========== END INFERRED CONDA ENV ===========") - dockerfile = _generate_dockerfile(args.path) + dockerfile = _generate_dockerfile(args.path, args.docker_base_image) print("========== BEGIN GENERATED DOCKERFILE ==========") print(dockerfile.strip()) print("========== END GENERATED DOCKERFILE ==========") @@ -100,14 +103,18 @@ def _dockerize_handler(parser: argparse.ArgumentParser, args: argparse.Namespace yaml.dump(dataclasses.asdict(package), args.output, sort_keys=False) -def _generate_dockerfile(context_path: pathlib.Path) -> str: +def _generate_dockerfile(context_path: pathlib.Path, docker_base_image: str) -> str: workdir = "/pircli/workdir" miniconda3 = "/opt/conda" pythonpath = _infer_pythonpath(context_path, workdir) + base_image = docker_base_image + + print("=========== BASE DOCKER IMAGE ===========") + print(base_image) return "\n".join( [ - "FROM continuumio/miniconda3:4.12.0", + "FROM continuumio/miniconda3:4.12.0 AS base", "ARG CONDA_ENV_B64", "RUN echo $CONDA_ENV_B64 | base64 -d > /tmp/environment.yml", "RUN conda env create -n pircli -f /tmp/environment.yml", @@ -115,6 +122,15 @@ def _generate_dockerfile(context_path: pathlib.Path) -> str: f"WORKDIR {workdir}", f"ENV PYTHONPATH={pythonpath}", f'ENV PATH="{miniconda3}/envs/pircli/bin":$PATH', + "", + f"FROM {base_image} AS final", + "COPY --from=base /opt/conda/ /opt/conda/", + "COPY --from=base /tmp/environment.yml /tmp/environment.yml", + f"COPY --from=base {workdir} {workdir}", + # "RUN conda env update -n pircli -f /tmp/environment.yml --prune", + f"WORKDIR {workdir}", + f"ENV PYTHONPATH={pythonpath}", + f'ENV PATH="{miniconda3}/envs/pircli/bin":$PATH', ] )