From f20d4a1ab7b20f74db97e1ebcb397836aa35de12 Mon Sep 17 00:00:00 2001 From: Trevor Bedford Date: Fri, 28 Jun 2024 15:05:14 -0700 Subject: [PATCH] Include frequencies for N450 build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit produces tip-frequencies JSON files for both N450 and genome analyses, but only displays frequencies panel for N450. Start date of 2000 and end date of –6 months were chosen based on data availability. I increased bandwidth by about 2.5x to account for data quantity relative to temporal spread (compared to the 2y flu-like defaults). --- phylogenetic/Snakefile | 5 ++-- .../defaults/auspice_config_N450.json | 6 ++++ phylogenetic/defaults/config.yaml | 5 ++++ phylogenetic/rules/annotate_phylogeny.smk | 29 +++++++++++++++++++ phylogenetic/rules/export.smk | 10 +++++++ 5 files changed, 53 insertions(+), 2 deletions(-) diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile index b4521ff..0acf1a7 100644 --- a/phylogenetic/Snakefile +++ b/phylogenetic/Snakefile @@ -1,10 +1,11 @@ genes = ['N450', 'genome'] -configfile: "defaults/config.yaml" +configfile: "defaults/config.yaml" rule all: input: - auspice_json = expand("auspice/measles_{gene}.json", gene=genes) + auspice_json = expand("auspice/measles_{gene}.json", gene=genes), + tip_frequencies_json = expand("auspice/measles_{gene}_tip-frequencies.json", gene=genes) include: "rules/prepare_sequences.smk" include: "rules/prepare_sequences_N450.smk" diff --git a/phylogenetic/defaults/auspice_config_N450.json b/phylogenetic/defaults/auspice_config_N450.json index b7e8d01..00a350c 100644 --- a/phylogenetic/defaults/auspice_config_N450.json +++ b/phylogenetic/defaults/auspice_config_N450.json @@ -62,6 +62,12 @@ "country", "author" ], + "panels": [ + "tree", + "map", + "entropy", + "frequencies" + ], "metadata_columns": [ "author" ] diff --git a/phylogenetic/defaults/config.yaml b/phylogenetic/defaults/config.yaml index aa785ee..d7818ff 100644 --- a/phylogenetic/defaults/config.yaml +++ b/phylogenetic/defaults/config.yaml @@ -25,5 +25,10 @@ refine: clock_filter_iqd: 4 ancestral: inference: "joint" +tip_frequencies: + min_date: "2000-01-01" + max_date: "6M" + narrow_bandwidth: 0.2 + wide_bandwidth: 0.6 export: metadata_columns: "strain division location" diff --git a/phylogenetic/rules/annotate_phylogeny.smk b/phylogenetic/rules/annotate_phylogeny.smk index 61e94b8..d87627c 100644 --- a/phylogenetic/rules/annotate_phylogeny.smk +++ b/phylogenetic/rules/annotate_phylogeny.smk @@ -39,3 +39,32 @@ rule translate: --reference-sequence {input.reference} \ --output {output.node_data} \ """ + +rule tip_frequencies: + """ + Estimating KDE frequencies for tips + """ + input: + tree = "results/{gene}/tree.nwk", + metadata = "data/metadata.tsv" + params: + strain_id = config["strain_id_field"], + min_date = config["tip_frequencies"]["min_date"], + max_date = config["tip_frequencies"]["max_date"], + narrow_bandwidth = config["tip_frequencies"]["narrow_bandwidth"], + wide_bandwidth = config["tip_frequencies"]["wide_bandwidth"] + output: + tip_freq = "results/{gene}/tip-frequencies.json" + shell: + """ + augur frequencies \ + --method kde \ + --tree {input.tree} \ + --metadata {input.metadata} \ + --metadata-id-columns {params.strain_id} \ + --min-date {params.min_date} \ + --max-date {params.max_date} \ + --narrow-bandwidth {params.narrow_bandwidth} \ + --wide-bandwidth {params.wide_bandwidth} \ + --output {output.tip_freq} + """ diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk index 96dfed5..32cef95 100644 --- a/phylogenetic/rules/export.smk +++ b/phylogenetic/rules/export.smk @@ -33,3 +33,13 @@ rule export: --include-root-sequence-inline \ --output {output.auspice_json} """ + +rule final_tip_frequencies: + input: + tip_freq = "results/{gene}/tip-frequencies.json", + output: + tip_freq = "auspice/measles_{gene}_tip-frequencies.json" + shell: + """ + cp -f {input.tip_freq} {output.tip_freq} + """