Skip to content

Commit

Permalink
Merge pull request #1006 from nextstrain/wdl/ingest_squash
Browse files Browse the repository at this point in the history
feat: WDL Script Overhaul and Squash
  • Loading branch information
Jennifer Chang authored Sep 28, 2022
2 parents fd4220c + cb290ac commit 55ae46f
Show file tree
Hide file tree
Showing 10 changed files with 382 additions and 83 deletions.
22 changes: 19 additions & 3 deletions .dockstore.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,24 @@
version: 1.2
workflows:
- subclass: WDL
primaryDescriptorPath: /workflow/wdl/workflow.wdl
primaryDescriptorPath: /workflow/wdl/ncov_workflow.wdl
testParameterFiles:
- /workflow/wdl/input_ncov.json
- /workflow/wdl/ncov_workflow.json
name: ncov
authors:
- name: Nextstrain
- name: Nextstrain
- subclass: WDL
primaryDescriptorPath: /workflow/wdl/gisaid_ingest.wdl
testParameterFiles:
- /workflow/wdl/gisaid_ingest.json
name: gisaid_ingest
authors:
- name: Nextstrain
- subclass: WDL
primaryDescriptorPath: /workflow/wdl/genbank_ingest.wdl
testParameterFiles:
- /workflow/wdl/genbank_ingest.json
name: genbank_ingest
authors:
- name: Nextstrain

3 changes: 3 additions & 0 deletions workflow/wdl/genbank_ingest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"GENBANK_INGEST.cache_nextclade_old":"${workspace.genbank_nextclade_tsv}"
}
35 changes: 35 additions & 0 deletions workflow/wdl/genbank_ingest.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
version 1.0

import "tasks/ncov_ingest.wdl" as ncov_ingest

workflow GENBANK_INGEST {
input {
# Optionals
File? cache_nextclade_old
String? filter # e.g. "region:Africa" passed to tsv-filters
Int? cpu
Int? memory # in GiB
Int? disk_size
}

call ncov_ingest.genbank_ingest as ingest {
input:
# optionals
cache_nextclade_old = cache_nextclade_old,
filter = filter,

cpu = cpu,
memory = memory,
disk_size = disk_size
}

output {
# ncov-ingest output either gisaid or genbank
File sequences_fasta = ingest.sequences_fasta
File metadata_tsv = ingest.metadata_tsv
File nextclade_tsv = ingest.nextclade_tsv
String last_run = ingest.last_run
}
}
6 changes: 6 additions & 0 deletions workflow/wdl/gisaid_ingest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"GISAID_INGEST.GISAID_API_ENDPOINT":"${workspace.GISAID_API_ENDPOINT}",
"GISAID_INGEST.GISAID_USERNAME_AND_PASSWORD":"${workspace.GISAID_USERNAME_AND_PASSWORD}",
"GISAID_INGEST.cache_nextclade_old":"${workspace.gisaid_nextclade_tsv}",
"GISAID_INGEST.ingest.giturl":"https://github.com/nextstrain/ncov-ingest/archive/refs/heads/master.zip"
}
42 changes: 42 additions & 0 deletions workflow/wdl/gisaid_ingest.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
version 1.0

import "tasks/ncov_ingest.wdl" as ncov_ingest

workflow GISAID_INGEST {
input {
# ncov ingest
String GISAID_API_ENDPOINT
String GISAID_USERNAME_AND_PASSWORD

# Optionals
File? cache_nextclade_old
String? filter # e.g. "region:Africa" passed to tsv-filters
Int? cpu
Int? memory # in GiB
Int? disk_size
}

call ncov_ingest.gisaid_ingest as ingest {
input:
GISAID_API_ENDPOINT = GISAID_API_ENDPOINT,
GISAID_USERNAME_AND_PASSWORD = GISAID_USERNAME_AND_PASSWORD,

# optionals
cache_nextclade_old = cache_nextclade_old,
filter = filter,

cpu = cpu,
memory = memory,
disk_size = disk_size
}

output {
# ncov-ingest output either gisaid or genbank
File sequences_fasta = ingest.sequences_fasta
File metadata_tsv = ingest.metadata_tsv
File nextclade_tsv = ingest.nextclade_tsv
String last_run = ingest.last_run
}
}
5 changes: 0 additions & 5 deletions workflow/wdl/input_ncov.json

This file was deleted.

4 changes: 4 additions & 0 deletions workflow/wdl/ncov_workflow.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"Nextstrain_WRKFLW.metadata_tsv":"${this.metadata}",
"Nextstrain_WRKFLW.sequence_fasta":"${this.sequences}"
}
21 changes: 9 additions & 12 deletions workflow/wdl/workflow.wdl → workflow/wdl/ncov_workflow.wdl
Original file line number Diff line number Diff line change
@@ -1,32 +1,31 @@
version 1.0

import "tasks/nextstrain.wdl" as nextstrain # <= modular method
# import "tasks/ncov_ingest.wdl" as ncov_ingest
import "tasks/nextstrain.wdl" as nextstrain

workflow Nextstrain_WRKFLW {
input {
# ncov
# Option 1: Pass in a sequence and metadata files, create a configfile_yaml
File? sequence_fasta
File? metadata_tsv
File? context_targz #<= optional contextual seqs in a tar.gz file
File? context_targz #<= optional contextual seqs in a tarball
String? build_name
# Option 2: Use a custom config file (e.g. builds.yaml) with https or s3 sequence or metadata files
File? configfile_yaml
File? custom_zip # optional modifier: add a my_profiles.zip folder for my_auspice_config.json
String? active_builds # optional modifier: specify "Wisconsin,Minnesota,Iowa"
# Option 3? GISAID augur zip?
# File? gisaid_zip # tarball
# Optional Keys for deployment
String? s3deploy
String? AWS_ACCESS_KEY_ID
String? AWS_SECRET_ACCESS_KEY
# By default, run the ncov workflow (can swap it for zika or something else)
String pathogen_giturl = "https://github.com/nextstrain/ncov/archive/refs/heads/master.zip"
String docker_path = "nextstrain/base:latest"
Int? cpu
Int? memory # in GiB
Int? disk_size
Expand All @@ -39,26 +38,24 @@ workflow Nextstrain_WRKFLW {
metadata_tsv = metadata_tsv,
context_targz = context_targz,
build_name = build_name,
# Option 2
configfile_yaml = configfile_yaml,
custom_zip = custom_zip,
active_builds = active_builds,
# Optional deploy to s3 site
s3deploy = s3deploy,
AWS_ACCESS_KEY_ID = AWS_ACCESS_KEY_ID,
AWS_SECRET_ACCESS_KEY = AWS_SECRET_ACCESS_KEY,
pathogen_giturl = pathogen_giturl,
dockerImage = docker_path,
cpu = cpu,
memory = memory,
disk_size = disk_size
}
output {
#Array[File] json_files = build.json_files
File auspice_zip = build.auspice_zip
File results_zip = build.results_zip
}
Expand Down
Loading

0 comments on commit 55ae46f

Please sign in to comment.