Skip to content

Commit

Permalink
Pick curl or wget based on availability
Browse files Browse the repository at this point in the history
  • Loading branch information
j23414 authored and j23414 committed Apr 17, 2023
1 parent 1116f0c commit 075828e
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 55 deletions.
23 changes: 15 additions & 8 deletions ingest/workflow/snakemake_rules/fetch_sequences.smk
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,22 @@ rule fetch_from_genbank:
csv_to_ndjson_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/csv-to-ndjson",
shell:
"""
if [[ ! -d bin ]]; then
mkdir bin
fi
if [[ ! -f bin/csv-to-ndjson ]]; then
cd bin
wget {params.csv_to_ndjson_url}
chmod 755 *
cd ..
# (1) Pick curl or wget based on availability
if which curl > /dev/null; then
download_cmd="curl -fsSL --output"
elif which wget > /dev/null; then
download_cmd="wget -O"
else
echo "ERROR: Neither curl nor wget found. Please install one of them."
exit 1
fi
# (2) Download the required scripts if not already present
[[ -d bin ]] || mkdir bin
[[ -f bin/csv-to-ndjson ]] || $download_cmd bin/csv-to-ndjson {params.csv_to_ndjson_url}
chmod +x bin/*
# (3) Fetch sequences from GenBank
./bin/fetch-from-genbank {params.serotype_tax_id} > {output.genbank_ndjson}
"""

Expand Down
45 changes: 29 additions & 16 deletions ingest/workflow/snakemake_rules/slack_notifications.smk
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,22 @@ rule notify_on_genbank_record_change:
notify_on_record_change_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/notify-on-record-change",
shell:
"""
if [[ ! -d bin ]]; then
mkdir bin
fi
if [[ ! -f bin/notify-on-record-change ]]; then
cd bin
wget {params.notify_on_record_change_url}
chmod 755
cd ..
# (1) Pick curl or wget based on availability
if which curl > /dev/null; then
download_cmd="curl -fsSL --output"
elif which wget > /dev/null; then
download_cmd="wget -O"
else
echo "ERROR: Neither curl nor wget found. Please install one of them."
exit 1
fi
# (2) Download the required scripts if not already present
[[ -d bin ]] || mkdir bin
[[ -f bin/notify-on-record-change ]] || $download_cmd bin/notify-on-record-change {params.notify_on_record_change_url}
chmod +x bin/*
# (3) Run the script
./bin/notify-on-record-change {input.genbank_ndjson} {params.s3_src:q}/genbank.ndjson.xz Genbank
"""

Expand All @@ -55,15 +61,22 @@ rule notify_on_metadata_diff:
notify_on_diff_url = "https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/notify-on-diff",
shell:
"""
if [[ ! -d bin ]]; then
mkdir bin
fi
if [[ ! -f bin/notify-on-diff ]]; then
cd bin
wget {params.notify_on_diff_url}
chmod 755
cd ..
# (1) Pick curl or wget based on availability
if which curl > /dev/null; then
download_cmd="curl -fsSL --output"
elif which wget > /dev/null; then
download_cmd="wget -O"
else
echo "ERROR: Neither curl nor wget found. Please install one of them."
exit 1
fi
# (2) Download the required scripts if not already present
[[ -d bin ]] || mkdir bin
[[ -f bin/notify-on-diff ]] || $download_cmd bin/notify-on-diff {params.notify_on_diff_url}
chmod +x bin/*
# (3) Run the script
./bin/notify-on-diff {input.metadata} {params.s3_src:q}/metadata.tsv.gz
"""

Expand Down
49 changes: 34 additions & 15 deletions ingest/workflow/snakemake_rules/transform.smk
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,18 @@ rule fetch_general_geolocation_rules:
geolocation_rules_url=config["transform"]["geolocation_rules_url"],
shell:
"""
curl {params.geolocation_rules_url} > {output.general_geolocation_rules}
# (1) Pick curl or wget based on availability
if which curl > /dev/null; then
download_cmd="curl -fsSL --output"
elif which wget > /dev/null; then
download_cmd="wget -O"
else
echo "ERROR: Neither curl nor wget found. Please install one of them."
exit 1
fi
# (2) Fetch general geolocation rules
$download_cmd {output.general_geolocation_rules} {params.geolocation_rules_url}
"""


Expand Down Expand Up @@ -73,22 +84,30 @@ rule transform:
ndjson_to_tsv_and_fasta_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/ndjson-to-tsv-and-fasta",
shell:
"""
if [[ ! -d bin ]]; then
mkdir bin
# (1) Pick curl or wget based on availability
if which curl > /dev/null; then
download_cmd="curl -fsSL --output"
elif which wget > /dev/null; then
download_cmd="wget -O"
else
echo "ERROR: Neither curl nor wget found. Please install one of them."
exit 1
fi
cd bin
[[ -f transform-field-names ]] || wget {params.transform_field_names_url}
[[ -f transform-string-fields ]] || wget {params.transform_string_fields_url}
[[ -f transform-strain-names ]] || wget {params.transform_strain_names_url}
[[ -f transform-date-fields ]] || wget {params.transform_date_fields_url}
[[ -f transform-genbank-location ]] || wget {params.transform_genbank_location_url}
[[ -f transform-authors ]] || wget {params.transform_authors_url}
[[ -f apply-geolocation-rules ]] || wget {params.apply_geolocation_rules_url}
[[ -f merge-user-metadata ]] || wget {params.merge_user_metadata_url}
[[ -f ndjson-to-tsv-and-fasta ]] || wget {params.ndjson_to_tsv_and_fasta_url}
chmod 755 *
cd ..
# (2) Download the required scripts if not already present
[[ -d bin ]] || mkdir bin
[[ -f bin/transform-field-names ]] || $download_cmd bin/transform-field-names {params.transform_field_names_url}
[[ -f bin/transform-string-fields ]] || $download_cmd bin/transform-string-fields {params.transform_string_fields_url}
[[ -f bin/transform-strain-names ]] || $download_cmd bin/transform-strain-names {params.transform_strain_names_url}
[[ -f bin/transform-date-fields ]] || $download_cmd bin/transform-date-fields {params.transform_date_fields_url}
[[ -f bin/transform-genbank-location ]] || $download_cmd bin/transform-genbank-location {params.transform_genbank_location_url}
[[ -f bin/transform-authors ]] || $download_cmd bin/transform-authors {params.transform_authors_url}
[[ -f bin/apply-geolocation-rules ]] || $download_cmd bin/apply-geolocation-rules {params.apply_geolocation_rules_url}
[[ -f bin/merge-user-metadata ]] || $download_cmd bin/merge-user-metadata {params.merge_user_metadata_url}
[[ -f bin/ndjson-to-tsv-and-fasta ]] || $download_cmd bin/ndjson-to-tsv-and-fasta {params.ndjson_to_tsv_and_fasta_url}
chmod +x bin/*
# (3) Transform the sequences
(cat {input.sequences_ndjson} \
| ./bin/transform-field-names \
--field-map {params.field_map} \
Expand Down
23 changes: 15 additions & 8 deletions ingest/workflow/snakemake_rules/trigger_rebuild.smk
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,21 @@ rule trigger_build:
trigger_on_new_data_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/trigger-on-new-data"
shell:
"""
if [[ ! -d bin ]]; then
mkdir bin
fi
if [[ ! -f bin/trigger-on-new-data ]]; then
cd bin
wget {params.trigger_on_new_data_url}
chmod 755 *
cd ..
# (1) Pick curl or wget based on availability
if which curl > /dev/null; then
download_cmd="curl -fsSL --output"
elif which wget > /dev/null; then
download_cmd="wget -O"
else
echo "ERROR: Neither curl nor wget found. Please install one of them."
exit 1
fi
# (2) Download the required scripts if not already present
[[ -d bin ]] || mkdir bin
[[ -f bin/trigger-on-new-data ]] || $download_cmd bin/trigger-on-new-data {params.trigger_on_new_data_url}
chmod +x bin/*
# (3) Trigger the build
./bin/trigger-on-new-data {input.metadata_upload} {input.fasta_upload}
"""
25 changes: 17 additions & 8 deletions ingest/workflow/snakemake_rules/upload.smk
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,24 @@ rule upload_to_s3:
cloudfront_invalidate_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/cloudfront-invalidate"
shell:
"""
if [[ ! -d bin ]]; then
mkdir bin
# (1) Pick curl or wget based on availability
if which curl > /dev/null; then
download_cmd="curl -fsSL --output"
elif which wget > /dev/null; then
download_cmd="wget -O"
else
echo "ERROR: Neither curl nor wget found. Please install one of them."
exit 1
fi
cd bin
[[ -f upload-to-s3 ]] || wget {params.upload_to_s3_url}
[[ -f sha256sum ]] || wget {params.sha256sum_url}
[[ -f cloudfront-invalidate ]] || wget {params.cloudfront_invalidate_url}
chmod 755 *
cd ..
# (2) Download the required scripts if not already present
[[ -d bin ]] || mkdir bin
[[ -f bin/upload-to-s3 ]] || $download_cmd bin/upload-to-s3 {params.upload_to_s3_url}
[[ -f bin/sha256sum ]] || $download_cmd bin/sha256sum {params.sha256sum_url}
[[ -f bin/cloudfront-invalidate ]] || $download_cmd bin/cloudfront-invalidate {params.cloudfront_invalidate_url}
chmod +x bin/*
# (3) Run the upload script
./bin/upload-to-s3 \
{params.quiet} \
{input.file_to_upload:q} \
Expand Down

0 comments on commit 075828e

Please sign in to comment.